◊(Local Yarn Code "crystalize.rkt at [1346740b]")

File crystalize.rkt artifact 43b9fb1b part of check-in 1346740b


#lang racket/base

; SPDX-License-Identifier: BlueOak-1.0.0
; This file is licensed under the Blue Oak Model License 1.0.0.

;; Provides functions for fast preserving and fetching of article/series data.
;;  → Docs and metas go in (saved to SQLite database)
;;    HTML comes out →
;; Calling sites have no notion of the database or schema.

;; The functions provided by sqlite-tools.rkt are not safe for user-provided
;; data; a maliciously crafted input could bomb the database. This is acceptable
;; since the database is merely a disposable cache, and since all the input
;; will be coming from me.

(require pollen/setup
         pollen/core
         pollen/template
         racket/string
         racket/function
         racket/list
         txexpr
         db/base
         "sqlite-tools.rkt"
         "snippets-html.rkt"
         "dust.rkt")

;; ~~~ Provides ~~~

(provide spell-of-summoning!
         crystalize-article!
         crystalize-series!
         crystalize-index-entries!
         article-plain-title
         list/articles
         list/articles+notes
         listing<>-short/articles
         listing<>-full/articles
         listing<>-full/articles+notes
         unfence
         sqltools:dbc
         preheat-series!)

;; ~~~ Private use ~~~

(define DBFILE (build-path (current-project-root) "vitreous.sqlite"))

;; Since the DB exists to serve as a high-speed cache, the tables are constructed so that
;; the most commonly needed data can be grabbed quickly with extremely simple queries. In
;; the even that you want to do something fancy and custom rather than using the pre-cooked
;; HTML, enough info is provided in the other columns to allow you to do so.
;;
(define table_articles-fields
  '(pagenode
    title_plain
    title_html_flow
    title_specified
    published
    updated
    author
    conceal
    series_pagenode
    noun_singular 
    note_count
    doc_html
    disposition
    disposition_note_id
    listing_full_html    ; Contains full content in default HTML format, but without notes
    listing_excerpt_html ; Not used for now
    listing_short_html)) ; Date and title only

(define table_notes-fields
  '(pagenode
    note_id
    title_html_flow
    author
    author_url
    date
    disposition
    content_html
    series_pagenode
    listing_full_html
    listing_excerpt_html  ; Not used for now
    listing_short_html))

(define table_series-fields
  '(pagenode
    title
    published
    noun_plural
    noun_singular))

(define table_keywordindex-fields
  '(entry
    subentry
    pagenode
    anchor))

(define table_articles (make-table-schema "articles" table_articles-fields))
(define table_notes (make-table-schema "notes" table_notes-fields #:primary-key-cols '(pagenode note_id)))
(define table_series (make-table-schema "series" table_series-fields))
(define table_keywordindex (make-table-schema "keywordindex"
                                              table_keywordindex-fields
                                              #:primary-key-cols '(pagenode anchor)))

;; ~~~ Provided functions: Initializing; Saving posts and notes

;; Initialize the database connection, creating the database if it doesn’t
;; exist, and executing the table schema queries
;;
(define (spell-of-summoning!)
  (init-db! DBFILE table_articles table_notes table_series table_keywordindex))

;; Save an article and its notes (if any) to the database, and return the
;; rendered HTML of the complete article.
;;
(define (crystalize-article! pagenode doc)
  (define-values
    (doc2 maybe-title) (splitf-txexpr doc (make-tag-predicate 'title)))
  (define-values 
    (body-txpr note-txprs) (splitf-txexpr doc2 (make-tag-predicate 'note)))
  (define-values (disposition disp-note-id)
    (notes->last-disposition-values note-txprs))
  
  (let* ([pubdate (select-from-metas 'published (current-metas))]
         [doc-html    (->html body-txpr #:splice? #t)]
         [title-specified? (not (equal? '() maybe-title))]
         [title-val   (if (not (null? maybe-title)) (car maybe-title) (check-for-poem-title doc))]
         [title-tx    (make-article-title pagenode title-val body-txpr disposition disp-note-id)]
         [title-html  (->html title-tx #:splice? #t)]
         [title-plain (tx-strs title-tx)]
         [series-node (series-pagenode)]
         [header      (html$-article-open pagenode title-specified? title-tx pubdate)]
         [footertext (make-article-footertext pagenode series-node disposition disp-note-id (length note-txprs))]
         [footer (html$-article-close footertext)]
         [notes-section-html (crystalize-notes! pagenode title-plain note-txprs)])

    (crystalize-index-entries! pagenode doc) ; Note the original doc is used here

    ;; Values must come in the order defined in table_article_fields
    (define article-record
      (list (symbol->string pagenode)
            title-plain
            title-html
            (bool->int title-specified?)
            pubdate
            (maybe-meta 'updated)
            (maybe-meta 'author default-authorname)
            (maybe-meta 'conceal)
            (symbol->string series-node)
            (maybe-meta 'noun (series-noun))
            (length note-txprs)
            doc-html
            disposition
            disp-note-id
            (string-append header doc-html footer)
            "" ; listing_excerpt_html: Not yet used
            (html$-article-listing-short pagenode pubdate title-html)))

    (apply query! (make-insert/replace-query 'articles table_articles-fields) article-record)
          
    (string-append header doc-html notes-section-html footer)))

;; ~~~ Retrieve listings of articles and notes ~~~
;; ~~~ (Mainly for use on Series pages         ~~~

;; (private) Create a WHERE clause matching a single series or list of series
(define (where/series s)
  (cond [(list? s)
         (let ([series (map (curry (format "~a/~a.html" series-folder)) s)])
           (format "WHERE `series_pagenode` IN ~a" (list->sql-values series)))]
        [(string? s)
         (format "WHERE `series_pagenode` IS \"~a/~a.html\"" series-folder s)]
        [(equal? s #t)
         (format "WHERE `series_pagenode` IS \"~a\"" (here-output-path))]
        [else ""]))

;; Return a combined list of articles and notes sorted by date
(define (list/articles+notes type #:series [s #t] #:limit [limit -1] [order "DESC"])
  (define select #<<@@@@@
     SELECT `~a` FROM
       (SELECT `~a`, `published`, `series_pagenode` FROM `articles`
        UNION SELECT
        `~a`,`date` AS `published`, `series_pagenode` FROM `notes`)
        ~a ORDER BY `published` ~a LIMIT ~a
@@@@@
    )
  (query-list (sqltools:dbc) (format select type type type (where/series s) order limit)))

;; Return a list of articles only, sorted by date
(define (list/articles type #:series [s #t] #:limit [limit -1] [order "DESC"])
  (define select "SELECT `~a` FROM `articles` ~a ORDER BY `published` ~a LIMIT ~a")
  (query-list (sqltools:dbc) (format select type (where/series s) order limit)))

;; ~~~~
;; Return cached HTML of articles and/or notes, fenced within a style txexpr to prevent it being
;; escaped by ->html. See also: definition of `unfence`

(define (listing<>-short/articles #:series [s #t] #:limit [limit -1] [order "DESC"])
  `(style "<ul class=\"article-list\">"
          ,@(list/articles "listing_short_html" #:series s #:limit limit order)
          "</ul>"))

(define (listing<>-full/articles #:series [s #t] #:limit [limit -1] [order "DESC"])
  `(style ,@(list/articles "listing_full_html" #:series s #:limit limit order)))

;; Return a combined list of articles and notes (“full content” version) sorted by date
(define (listing<>-full/articles+notes #:series [s #t] #:limit [limit -1] [order "DESC"])
  `(style ,@(list/articles+notes "listing_full_html" #:series s #:limit limit order)))

;; Remove "<style>" and "</style>" introduced by using ->html on docs containing output from
;; listing functions
(define (unfence html-str)
  (regexp-replace* #px"<[\\/]{0,1}style>" html-str ""))

;; ~~~ Article-related helper functions ~~~
;;

;; If the first element is a titled poem, the poem’s title can be used for the article title.
(define (check-for-poem-title doc)
  (define e1 (car (get-elements doc)))
  (define e2 (if (null? (get-elements e1))
                 '()
                 (car (get-elements e1))))
  (cond
    [(and (txexpr? e1)
          (equal? 'div (get-tag e1))
          (attrs-have-key? e1 'class)
          (string=? "poem" (attr-ref e1 'class))
          (not (null? e2))
          (txexpr? e2)
          (equal? 'p (get-tag e2))
          (attrs-have-key? e2 'class)
          (string=? "verse-heading" (attr-ref e2 'class)))
     `(title (span [[class "smallcaps"]] "‘" ,@(get-elements e2) "’"))]
    [else '()]))

;; Return a title txexpr for the current article, constructing a default if no title text was specified.
(define (make-article-title pagenode supplied-title body-tx disposition disp-note-id)
  (define title-elems
    (cond [(null? supplied-title) (list (default-title (get-elements body-tx)))]
          [else (get-elements supplied-title)]))
  
  (define disposition-part
    (cond [(non-empty-string? disposition)
           (define-values (mark _) (disposition-values disposition))
           `(a [[class "disposition-mark"] 
                [href ,(format "~a~a#~a" web-root pagenode disp-note-id)]] 
               ,mark)]
          [else ""]))
  ;; Returns a txexpr, the tag will be discarded by the template/snippets
  `(title ,@title-elems ,disposition-part))
  
;; Convert a bunch of information about an article into some nice English and links.
(define (make-article-footertext pagenode series disposition disp-note-id note-count)
  (define s-title (series-title))
  (define s-noun (series-noun))
  (define series-part
    (cond [(non-empty-string? s-title)
           (format "<span class=\"series-part\">This is ~a, part of <a href=\"/~a\">‘~a’</a>.</span>"
                   s-noun
                   series
                   s-title)]
          [else ""]))
  (define disp-part
    (cond [(non-empty-string? disposition)
           (define-values (mark verb) (disposition-values disposition))
           (format "Now considered <a href=\"/~a#~a\">~a</a>."
                   pagenode
                   disp-note-id
                   verb)]
          [else ""]))
  (define notes-part
    (cond [(note-count . > . 1)
           (format "There are <a href=\"/~a#furthernotes\">~a notes</a> appended."
                   pagenode
                   note-count)]
          [(and (note-count . > . 0) (string=? disposition ""))
           (format "There is <a href=\"/~a#furthernotes\">a note</a> appended."
                   pagenode)]
          [else ""]))
  
  (cond [(ormap non-empty-string? (list series-part disp-part notes-part))
         (string-join (list series-part disp-part notes-part))]
        [else ""]))
    

;; ~~~ Notes ~~~

;; Save a collection of ◊note tags to the DB, and return the HTML of the complete
;; “Further Notes” section at the end
;;
(define (crystalize-notes! pagenode parent-title note-txprs)
  (define (crystalizer note-tx)
    (crystalize-note! note-tx (symbol->string pagenode) parent-title))
  
  (cond [((length note-txprs) . > . 0)
         (define notes-html (map crystalizer note-txprs))
         (html$-notes-section notes-html)]
        [else ""]))

;; Save an individual note to the DB and return the HTML of the complete note as
;; it should appear on an individual article page
;;
(define (crystalize-note! note-tx pagenode parent-title-plain)
  (define-values (_ attrs elems) (txexpr->values note-tx))
  (define disposition-attr (maybe-attr 'disposition attrs))
  (define note-date (maybe-attr 'date attrs))
  
  ;; Check required attributes
  (unless (non-empty-string? note-date)
    (raise-arguments-error 'note "required attr missing: date" "attrs" attrs))
  (unless (or (string=? "" disposition-attr)
              (and ((length (string-split disposition-attr)) . >= . 2)))
    (raise-arguments-error 'note
                           "must be in format \"[symbol] [past-tense-verb]\""
                           "disposition attr"
                           disposition-attr))
  
  ;; Parse out remaining columns
  (define author (maybe-attr 'author attrs default-authorname))
  (define note-id (build-note-id note-tx))
  (define title-html-flow (html$-note-title pagenode parent-title-plain))
  (define author-url (maybe-attr 'author-url attrs))
  (define-values (disp-mark disp-verb) (disposition-values disposition-attr))
  (define content-html (html$-note-contents disp-mark disp-verb (get-elements note-tx)))
  (define listing-full-html
    (html$-note-listing-full pagenode note-id title-html-flow note-date content-html author author-url))

  (define note-record
    (list pagenode
          note-id
          title-html-flow
          author
          author-url
          note-date
          disposition-attr
          content-html
          (symbol->string (series-pagenode))
          listing-full-html
          "" ; listing_excerpt_html: Not used for now
          "")) ; listing_short_html: Not used for now
  
  ;; save to db
  (define save-note-query
    (format (string-append "INSERT OR REPLACE INTO `notes` (`rowid`, ~a) "
                           "VALUES ((SELECT `rowid` FROM `notes` WHERE `pagenode` = ?1"
                           " AND `note_id` = ?2), ~a)")
            (list->sql-fields table_notes-fields)
            (list->sql-parameters table_notes-fields)))
  (apply query! save-note-query note-record)
  
  ;; return html$ of note
  (html$-note-in-article note-id note-date content-html author author-url))

(define (article-plain-title pagenode)
  (query-value (sqltools:dbc) "SELECT `title_plain` FROM `articles` WHERE `pagenode` = ?1" (symbol->string pagenode)))

;; ~~~ Keyword Index Entries ~~~

;; (private) Convert an entry key into a list of at most two elements,
;; a main entry and a sub-entry.
;;   "entry" → '("entry" "")
;;   "entry!sub" → '("entry" "sub")
;;   "entry!sub!why?!? '("entry" "sub")
(define (split-entry str)
  (define splits (string-split str "!"))
  (list (car splits)
        (cadr (append splits (list "")))))

;; Save any index entries in doc to the SQLite cache.
;; Sub-entries are specified by "!" in the index key
(define (crystalize-index-entries! pagenode doc)
  (define (index-entry? tx)
    (and (txexpr? tx)
         (string=? "index-link" (attr-ref tx 'class "")) ; see definition of html-index
         (attr-ref tx 'data-index-entry #f)))
  (define-values (_ entries) (splitf-txexpr doc index-entry?))

  ; Naive idempotence: delete and re-insert all index entries every time doc is rendered.
  (query! "DELETE FROM `keywordindex` WHERE `pagenode` = ?1" (symbol->string pagenode))
  
  (unless (null? entries)
    (define entry-rows
      (for/list ([entry-tx (in-list entries)])
        (define entry-parts (split-entry (attr-ref entry-tx 'data-index-entry)))
        (list (first entry-parts)
              (second entry-parts)
              (symbol->string pagenode)
              (attr-ref entry-tx 'id))))
    (query! (make-insert-rows-query "keywordindex" table_keywordindex-fields entry-rows))))

;; ~~~ Series ~~~

;; Preloads the SQLite cache with info about each series.
;; I may not actually need this but I’m leaving it for now.
(define (preheat-series!)
  (query! "DELETE FROM `series`")
  (define series-values
    (for/list ([series-pagenode (in-list (cdr (series-pagetree)))])
      (define series-metas (get-metas series-pagenode))
      (list (symbol->string series-pagenode)
            (hash-ref series-metas 'title)
            (hash-ref series-metas 'published)
            (hash-ref series-metas 'noun-plural "")
            (hash-ref series-metas 'noun-singular ""))))
  (define sql$-insert (make-insert-rows-query 'series table_series-fields series-values))
  (displayln sql$-insert)
  (query! sql$-insert))

;; Save the current article to the `series` table of the SQLite cache
;; Should be called from a template for series pages
(define (crystalize-series!)
  (define series-row
    (list (path->string (here-output-path))
          (hash-ref (current-metas) 'title)
          (hash-ref (current-metas) 'published "")
          (hash-ref (current-metas) 'noun-plural "")
          (hash-ref (current-metas) 'noun-singular "")))
  (apply query! (make-insert/replace-query 'series table_series-fields) series-row))