◊(Local Yarn Code "crystalize.rkt at [4b2c827f]")

File crystalize.rkt artifact 7c40c653 part of check-in 4b2c827f


#lang racket/base

; SPDX-License-Identifier: BlueOak-1.0.0
; This file is licensed under the Blue Oak Model License 1.0.0.

(require deta
         db/base
         threading
         racket/match
         racket/string
         txexpr
         pollen/template
         pollen/decode
         (except-in pollen/core select) ; avoid conflict with deta
         )

(require "dust.rkt" "cache.rkt" "snippets-html.rkt")

(provide main
         cache-index-entries-only!)

(define current-title       (make-parameter #f))
(define current-excerpt     (make-parameter #f))
(define current-notes       (make-parameter '()))
(define current-disposition (make-parameter ""))
(define current-disp-id     (make-parameter ""))

;; Rebuild the SQLite cache with all article entities
(define (main)
  (define pages (cdr (articles-pagetree)))
  (define (getter-thunk)
    (define parent (thread-receive))
    (define article (thread-receive))
    (thread-send parent (all-entities article (get-doc article))))
  
  (for ([p (in-list pages)])
    (let ([t (thread getter-thunk)])
      (thread-send t (current-thread))
      (thread-send t p)))

  (define entities
    (time
      (for/vector ([i (in-range (length pages))])
                  (thread-receive))))

   (time (call-with-transaction
    (cache-conn)
    (lambda ()
      (init-cache-db! #:reset? #t)
      (for ([x (in-vector entities)])
        (for ([e (in-list x)])
          (insert-one! (cache-conn) e)))))))

(define (filter-special-tags tx)
  (match (get-tag tx)
    ['title (current-title tx) ""]
    ['excerpt (current-excerpt tx) ""]
    ['excerpt* (current-excerpt tx) `(@ ,@(get-elements tx))] ; splice contents back in
    ['note
     (define note-id (build-note-id tx))
     (cond [(attrs-have-key? tx 'disposition)
            (current-disp-id note-id)
            (current-disposition (attr-ref tx 'disposition))])
     (current-notes (cons (attr-set tx 'note-id note-id) (current-notes))) ""]
    [_ tx]))

;; From an article, return a list of cache:article, cache:note and cache:index-entry entities
(define (all-entities pagenode doc)
  ;; Reset parameters in case we’re doing multiple articles
  (current-title #f)
  (current-excerpt #f)
  (current-notes '())
  (current-disposition "")
  (current-disp-id "")
  (current-metas (get-metas pagenode))
  
  (define body-txpr (decode doc #:txexpr-proc filter-special-tags))
  (current-notes (reverse (current-notes)))
  (let* ([pubdate (select-from-metas 'published (current-metas))]
         [doc-html    (->html body-txpr #:splice? #t)]
         [title-specified? (if (current-title) #t #f)]
         [title-val   (or (current-title) (check-for-poem-title doc))]
         [title-tx    (make-article-title pagenode
                                          title-val
                                          body-txpr
                                          (current-disposition)
                                          (current-disp-id))]
         [title-html  (->html title-tx #:splice? #t)]
         [title-plain (tx-strs title-tx)]
         [header      (html$-article-open pagenode title-specified? title-tx pubdate)]
         [series-node (current-series-pagenode)]
         [footertext  (make-article-footertext pagenode
                                               series-node
                                               (current-disposition)
                                               (current-disp-id)
                                               (length (current-notes)))]
         [footer (html$-article-close footertext)]
         [listing-short (html$-article-listing-short pagenode pubdate title-html)]
         [listing-full (string-append header doc-html footer)]
         [listing-excerpt (match (current-excerpt)
                            [#f listing-full]
                            [(var e) (string-append header (html$-article-excerpt pagenode e) footer)])]
         [notes (extract-notes pagenode title-plain (current-notes))]
         [index-entries (extract-index-entries pagenode doc)])
    (append
     (cons (make-cache:article
            #:page pagenode
            #:title-plain title-plain
            #:title-html-flow title-html
            #:title-specified? title-specified?
            #:published pubdate
            #:updated (maybe-meta 'updated)
            #:author (maybe-meta 'author default-authorname)
            #:conceal (maybe-meta 'conceal)
            #:series-page series-node
            #:noun-singular (maybe-meta 'noun (current-series-noun))
            #:note-count (length (current-notes))
            #:content-html doc-html
            #:disposition (current-disposition)
            #:disp-html-anchor (current-disp-id)
            #:listing-full-html listing-full
            #:listing-excerpt-html listing-excerpt
            #:listing-short-html listing-short)
           notes)
     index-entries)))

(define (check-for-poem-title doc-txpr)
  (match (car (get-elements doc-txpr))
    [(txexpr 'div
             (list (list 'class "poem"))
             (list* (txexpr 'p
                            (list (list 'class "verse-heading"))
                            heading-elems)
                    _))
     `(title (span [[class "smallcaps"]] "‘" ,@heading-elems "’"))]
    [_ '()]))

;; Return a title txexpr for the current article, constructing a default if no title text was specified.
(define (make-article-title pagenode supplied-title body-tx disposition disp-note-id)
  (define title-elems
    (cond [(null? supplied-title) (list (default-title (get-elements body-tx)))]
          [else (get-elements supplied-title)]))
  
  (define disposition-part
    (cond [(non-empty-string? disposition)
           (define-values (mark _) (disposition-values disposition))
           `(a [[class "disposition-mark"] 
                [href ,(format "~a~a#~a" web-root pagenode disp-note-id)]] 
               ,mark)]
          [else ""]))
  ;; Returns a txexpr, the tag will be discarded by the template/snippets
  `(title ,@title-elems ,disposition-part))

;; Convert a bunch of information about an article into some nice English and links.
(define (make-article-footertext pagenode series disposition disp-note-id note-count)
  (define series-part
    (match (current-series-title)
      [(? non-empty-string? s-title)
       (format "<span class=\"series-part\">This is ~a, part of <a href=\"/~a\">‘~a’</a>.</span>"
               (current-series-noun)
               series
               s-title)]
      [_ ""]))
  (define disp-part
    (cond [(non-empty-string? disposition)
           (define-values (mark verb) (disposition-values disposition))
           (format "Now considered <a href=\"/~a#~a\">~a</a>."
                   pagenode
                   disp-note-id
                   verb)]
          [else ""]))
  (define notes-part
    (cond [(note-count . > . 1)
           (format "There are <a href=\"/~a#furthernotes\">~a notes</a> appended."
                   pagenode
                   note-count)]
          [(and (note-count . > . 0) (string=? disposition ""))
           (format "There is <a href=\"/~a#furthernotes\">a note</a> appended."
                   pagenode)]
          [else ""]))
  
  (cond [(ormap non-empty-string? (list series-part disp-part notes-part))
         (string-join (list series-part disp-part notes-part))]
        [else ""]))

;; ~~~ Notes ~~~

(define (extract-notes pagenode parent-title note-txprs)
  (for/list ([n (in-list note-txprs)])
    (make-note n pagenode parent-title)))

;; Create cache:note entity
(define (make-note note-tx pagenode parent-title-plain)
  (define-values (_ attrs elems) (txexpr->values note-tx))
  (define disposition-attr (maybe-attr 'disposition attrs))
  (define note-date (maybe-attr 'date attrs))
  
  ;; Check required attributes
  (unless (non-empty-string? note-date)
    (raise-arguments-error 'note "required attr missing: date" "attrs" attrs))
  (unless (or (string=? "" disposition-attr)
              (>= (length (string-split disposition-attr)) 2))
    (raise-arguments-error 'note
                           "must be in format \"[symbol] [past-tense-verb]\""
                           "disposition attr"
                           disposition-attr))
  (define-values (disp-mark disp-verb) (disposition-values disposition-attr))
  (let* ([note-id (build-note-id note-tx)]
         [title-tx (make-note-title pagenode parent-title-plain)]
         [title-html (->html title-tx #:splice? #t)]
         [author (maybe-attr 'author attrs default-authorname)]
         [author-url (maybe-attr 'author-url attrs)]
         [note-srcline (maybe-attr 'srcline attrs)]
         [content-html (html$-note-contents disp-mark disp-verb elems)]
         [listing-full (html$-note-listing-full pagenode
                                                note-id
                                                title-html
                                                note-date
                                                note-srcline
                                                content-html
                                                author
                                                author-url)])
    (make-cache:note
     #:page pagenode
     #:html-anchor note-id
     #:title-html-flow title-html
     #:title-plain (tx-strs title-tx)
     #:published note-date
     #:author author
     #:author-url author-url
     #:disposition disposition-attr
     #:series-page (current-series-pagenode)
     #:conceal (or (maybe-attr 'conceal attrs #f) (maybe-meta 'conceal))
     #:content-html content-html
     #:listing-full-html listing-full
     #:listing-excerpt-html listing-full
     #:listing-short-html "")))

(define (make-note-title pagenode parent-title-plain)
  `(note-title "Re: " (a [[class "cross-reference"]
                          [href ,(format "~a~a" web-root pagenode)]]
                         ,parent-title-plain)))

;; ~~~ Keyword Index Entries ~~~

;; (private) Convert an entry key into a list of at most two elements,
;; a main entry and a sub-entry.
;;   "entry" → '("entry" "")
;;   "entry!sub" → '("entry" "sub")
;;   "entry!sub!why?!? '("entry" "sub")
(define (split-entry str)
  (define splits (string-split str "!"))
  (list (car splits)
        (cadr (append splits (list "")))))

(define (index-entry-txpr? tx)
  (and (txexpr? tx)
       (string=? "index-link" (attr-ref tx 'class "")) ; see definition of html-index
       (attr-ref tx 'data-index-entry #f)))

(define (txexpr->index-entry tx pagenode)
  (match (split-entry (attr-ref tx 'data-index-entry))
    [(list main sub)
     (make-cache:index-entry
      #:entry main
      #:subentry sub
      #:page pagenode
      #:html-anchor (attr-ref tx 'id))]))

;; Get index entries out of metas
(define (current-metas-keyword-entries pagenode)
  (for/list ([kw (in-list (string-split (maybe-meta 'keywords "") #px";\\s*"))])
    (match (split-entry kw)
      [(list main sub)
       (make-cache:index-entry
        #:entry main
        #:subentry sub
        #:page pagenode
        #:html-anchor "")])))

;; Build a list of cache:index-entry entities for all entries in the docs or current metas
;; Sub-entries are specified by "!" in the index key
(define (extract-index-entries pagenode doc)
  (define-values (_ entry-txs) (splitf-txexpr doc index-entry-txpr?))
  (append (for/list ([etx (in-list entry-txs)]) (txexpr->index-entry etx pagenode))
          (current-metas-keyword-entries pagenode)))

(define (cache-index-entries-only! title pagenode doc)
  (void
   (thread
    (lambda ()
      (call-with-transaction
       (cache-conn)
       (lambda ()
         (delete-index-entries! pagenode)
         (delete-article! pagenode)
         (apply insert! (cache-conn)
                (make-cache:article
                 #:title-plain title
                 #:conceal "blog,feed"
                 #:page pagenode)
                (extract-index-entries pagenode doc))))))))