◊(Local Yarn Code "keyword-index.rkt at [233fdc64]")

File keyword-index.rkt artifact 50258609 part of check-in 233fdc64


#lang pollen/mode racket/base

; SPDX-License-Identifier: BlueOak-1.0.0
; This file is licensed under the Blue Oak Model License 1.0.0.

;; Builds an HTML page containing the keyword index for all ◊index entries in
;; the articles, by pulling them out of the SQLite cache DB.

(require racket/match
         racket/list
         racket/file
         racket/string
         db/base
         net/uri-codec
         pollen/template)

(require "crystalize.rkt"
         "dust.rkt"
         "snippets-html.rkt")

(provide main)

;; Terminology (because these things get confusing fast)
;;
;; Index HEADING:    A word or phrase that appears in the index
;; Index SUBHEADING: A Heading that appears under another Heading
;; Index LINK:       The connection between a particular Article and a Heading.
;;                   In the Index, represented by a URL+fragment back into the Article.
;;                   In the Article, represented by a URL+fragment to the Heading in the Index.
;; Index ENTRY:      A Heading, a unique ID, a list of Links, and a list of Subheading-entries
;; Index RECORD:     A simple vector representing a Link the way it is fetched from the SQLite
;;                   cache: #("heading" "subheading" NUMBER "/absolute-path.html#id_fragment" "Article Title")

(struct entry (heading      ; String
               id           ; String
               links        ; List
               subentries) #:transparent) ; List of entry structs

;; Get the first letter of an entry’s heading
(define (first-letter e)
  (char-upcase (first (string->list (entry-heading e)))))

;; Format the ID for back-linking to a particular entry on the keyword index page
(define (format-entry-id head subhead)
  (define maybe-sub
    (cond [(non-empty-string? subhead) (string-append "!" subhead)]
          [else ""]))
  (uri-encode (string-downcase (string-append head maybe-sub))))

;; Convert a Record into a txexpr representing a link to the original article
(define (make-link rec)
  (match-define (vector _ _ num link-url article-title) rec)
  `(a [[href ,link-url]
       [title ,article-title]]
      ,(number->string num)))

;; See if a Record belongs under an entry's heading or under an entry's subheading.
;; For subheadings, only checks the first one! This is because we rely on the records
;; being pre-alphabetized as they are fetched from the SQLite cache, and being
;; processed in order.
(define (test-match e record)
  (match-define (vector record-head record-subhead _ _ _) record)
  (define heading-match?
    (string-ci=? (entry-heading e) record-head))
  (define subheading-match?
    (and heading-match?
         (not (null? (entry-subentries e)))
         (non-empty-string? record-subhead)
         (string-ci=? (entry-heading (first (entry-subentries e))) record-subhead)))
  
  (cond [(and heading-match? subheading-match?)   'existing-subhead]
        [(and heading-match?
              (non-empty-string? record-subhead)) 'new-subhead]
        [heading-match?                           'heading]
        [else                                     'not-a-match]))

;; Add a record’s link to the front of an entry’s list of links
(define (add-entry-link e rec)
  (define existing-links (entry-links e))
  (struct-copy entry e [links (cons (make-link rec) existing-links)]))

;; Add a record’s link to the first subheading given in an entry.
(define (add-to-first-subentry e record)
  (match-define (entry _ _ _ (cons last-sub rest-subs)) e)
  (define updated-sub (add-entry-link last-sub record))
  (struct-copy entry e [subentries (cons updated-sub rest-subs)]))

;; Add a record's link to a new subheading in an entry
(define (add-new-subentry e record)
  (match-define (vector head subhead _ _ _) record)
  (define subhead-id (format-entry-id head subhead))
  (define new-sub (entry subhead subhead-id (list (make-link record)) '()))
  (struct-copy entry e [subentries (list new-sub)]))

;; Make a new entry from a record, placing its link in either the main entry
;; or in a new sub-entry.
(define (new-entry record)
  (match-define (vector head subhead _ _ _) record)
  (define e (entry head (format-entry-id head "") '() '()))
  (cond [(non-empty-string? subhead) (add-new-subentry e record)]
        [else (add-entry-link e record)]))

;; Get the index entries from the SQLite cache, return them as a list of vectors (Records!)
(define (fetch-entries)
  (define q
    ◊string-append{
 SELECT entry, subentry, a.rowid, "◊web-root" || k.page || "#" || html_anchor AS href, title_plain
 FROM index_entries k INNER JOIN articles a
 ON a.page = k.page
 ORDER BY entry COLLATE NOCASE ASC, subentry COLLATE NOCASE ASC;})
  (query-rows cache-conn q))

;; Convert a list of vectors from the cache DB into a list of the form:
;; ((FIRST-LETTER (entries ...)) ...)
;; The method relies on the records being pre-sorted by the SQL query.
(define (group-entries records)
  (define collated
    (for/fold ([entries (list (new-entry (first records)))]
               #:result (reverse entries))
              ([row (in-list (rest records))])
      (match-define (cons last-entry rest-entries) entries)
      (case (test-match last-entry row)
        [(existing-subhead) (cons (add-to-first-subentry last-entry row) rest-entries)]
        [(new-subhead)      (cons (add-new-subentry last-entry row) rest-entries)]
        [(heading)          (cons (add-entry-link last-entry row) rest-entries)]
        [else               (cons (new-entry row) entries)])))
  (for/list ([letter-group (in-list (group-by first-letter collated))])
    (list (first-letter (first letter-group)) letter-group)))

;; Convert an entry into a list item, recursively adding subentries
(define (entry->txexpr e)
  (match-define (entry head id links subentries) e)
  (define sub-txs
    (cond [(null? subentries) ""]
          [else `(ul ,@(map entry->txexpr subentries))]))
  `(li [[id ,id]] ,head nbsp ,@(add-between links ", ") ,sub-txs))

;; Return the complete HTML for the keyword index. Each letter group begins with a heading for the
;; letter, followed by a definition list for its entries.
(define (html$-index grouped-entries)
  (define groups
    (for/list ([letter-group (in-list grouped-entries)])
      (match-define (list letter-char entries) letter-group)
      `(section (h2 ,(list->string (list letter-char)))
                (ul ,@(map entry->txexpr entries)))))
  (apply string-append (map ->html groups)))

;; Build the complete HTML page
(define (html$-keywordindex-page the-index)
  ◊string-append{
 <!DOCTYPE html>
 <html lang="en">
 ◊html$-page-head{The Local Yarn: Keyword Index}
 ◊html$-page-body-open[]

 <div id="keywordindex">
 ◊the-index
 </div>
 ◊html$-page-body-close[]
 </html>})

(define (main)
  (displayln "Writing keyword-index.html…")
  (display-to-file (html$-keywordindex-page (html$-index (group-entries (fetch-entries))))
                   "keyword-index.html"
                   #:mode 'text
                   #:exists 'replace))