#lang racket/base
; SPDX-License-Identifier: BlueOak-1.0.0
; This file is licensed under the Blue Oak Model License 1.0.0.
(require pollen/core
pollen/pagetree
pollen/setup
pollen/file
net/uri-codec
file/sha1
gregor
txexpr
racket/list
racket/system
racket/string)
;; Provides common helper functions used throughout the project
(provide maybe-meta ; Select from (current-metas) or default value ("") if not available
maybe-attr ; Return an attribute’s value or a default ("") if not available
here-output-path
here-id
series-noun ; Retrieve noun-singular from current 'series meta, or ""
series-title ; Retrieve title of series in current 'series meta, or ""
series-pagenode
invalidate-series
make-tag-predicate
tx-strs
ymd->english
ymd->dateformat
default-authorname
default-title
articles-folder
series-folder
articles-pagetree
series-pagetree
first-words
build-note-id
notes->last-disposition-values
disposition-values
)
(define default-authorname "Joel Dueck")
(define series-folder "series")
(define articles-folder "articles")
(define (default-title body-txprs)
(format "“~a…”" (first-words body-txprs 5)))
(define (maybe-meta m [missing ""])
(cond [(current-metas) (or (select-from-metas m (current-metas)) missing)]
[else missing]))
;; Return the current output path, relative to (current-project-root)
;; Similar to the variable 'here' which is only accessible in Pollen templates,
;; except this is an actual path, not a string.
(define (here-output-path)
(cond [(current-metas)
(define-values (_ rel-path-parts)
(drop-common-prefix (explode-path (current-project-root))
(explode-path (string->path (select-from-metas 'here-path (current-metas))))))
(->output-path (apply build-path rel-path-parts))]
[else (error "No metas are available")]))
;; Checks current-metas for a 'series meta and returns the pagenode of that series,
;; or '|| if no series is specified.
(define (series-pagenode)
(define maybe-series (or (select-from-metas 'series (current-metas)) ""))
(cond
[(non-empty-string? maybe-series)
(->pagenode (format "~a/~a.html" series-folder maybe-series))]
[else '||]))
(define (series-noun)
(define series-pnode (series-pagenode))
(case series-pnode
['|| ""] ; no series specified
[else (or (select-from-metas 'noun-singular series-pnode) "")]))
(define (series-title)
(define series-pnode (series-pagenode))
(case series-pnode
['|| ""] ; no series specified
[else (or (select-from-metas 'title series-pnode) "")]))
;; Generates a short ID for the current article
(define (here-id [suffix #f])
(define here-hash
(substring (bytes->hex-string (sha1-bytes (path->bytes (here-output-path)))) 0 8))
(cond [(list? suffix) (apply string-append here-hash suffix)]
[(string? suffix) (string-append here-hash suffix)]
[else here-hash]))
;; “Touches” the last-modified date on the current article’s series, if there is one
(define (invalidate-series)
(define series-name (maybe-meta 'series #f))
(when series-name
(define series-file (build-path (current-project-root)
series-folder
(format "~a.poly.pm" series-name)))
(when (file-exists? series-file)
(case (system-type 'os)
[(windows) (system (format "type nul >> ~a" series-file))]
[else (system (format "touch ~a" series-file))]))))
;; ~~~ Project-wide Pagetrees ~~~
(define (include-in-pagetree folder extension)
(define (matching-file? f)
(string-suffix? f extension))
(define (file->output-pagenode f)
(string->symbol (format "~a/~a" folder (string-replace f extension ".html"))))
(define folder-path (build-path (current-project-root) folder))
(define file-strs (map path->string (directory-list folder-path)))
(map file->output-pagenode (filter matching-file? file-strs)))
(define (articles-pagetree)
`(root ,@(include-in-pagetree articles-folder ".poly.pm")))
(define (series-pagetree)
`(root ,@(include-in-pagetree series-folder ".poly.pm")))
;; ~~~ Convenience functions for tagged x-expressions ~~~
(define (maybe-attr name attrs [missing ""])
(define result (assoc name attrs))
(cond
[(pair? result) (cadr result)]
[else missing]))
;; Returns a function will test if a txexpr's tag matches the given symbol.
(define (make-tag-predicate tagsym)
(lambda (tx) (and (txexpr? tx) (equal? tagsym (get-tag tx)))))
(define (tx-strs xpr)
(cond
[(txexpr? xpr) (apply string-append (map tx-strs (get-elements xpr)))]
[(string? xpr) xpr]
[else ""]))
(module+ test
(require rackunit)
(define test-metas (hash 'name "Fiver" 'size "Small"))
(define test-attrs '([name "Hazel"] [rank "Chief"]))
(parameterize ([current-metas test-metas])
(check-equal? (maybe-meta 'name) "Fiver") ; present meta
(check-equal? (maybe-meta 'age) "") ; missing meta
(check-equal? (maybe-meta 'age 2) 2)) ; alternate default value
(check-equal? (maybe-attr 'rank test-attrs) "Chief")
(check-equal? (maybe-attr 'dingus test-attrs) "")
(check-equal? (maybe-attr 'dingus test-attrs "zippy") "zippy"))
;; Return the first N words out of a list of txexprs. This function will unpack the strings out of
;; the elements of one txexpr at a time until it finds the requested number of words. It aims to be
;; both reliable and fast for any size of list you pass it, and smart about the punctuation it
;; allows through.
(define (first-words txprs words-needed)
(define punc-allowed-in-word '(#\- #\' #\% #\$ #\‘ #\’ #\# #\& #\/ #\. #\!))
(define (word-boundary? c) (or (char-whitespace? c) (equal? c #\null) (eof-object? c)))
(define (word-char? c) (or (char-alphabetic? c) (char-numeric? c)))
(define in (open-input-string (tx-strs (first txprs))))
(define out (open-output-string))
(define words-found
(let loop ([words-found 0] [last-c #\null] [last-c-in-word? #f])
(define c (read-char in))
(cond [(equal? words-found words-needed) words-found]
[(eof-object? c)
(cond [(positive? words-found) (if last-c-in-word? (+ 1 words-found) words-found)]
[else 0])]
[else
(define-values (write-this-char? new-word-count c-in-word?)
(cond
;; Spaces increment the word count if the previous character was part of,
;; or adjacent to, a word
[(and (char-whitespace? c) last-c-in-word?)
(values (if (equal? words-needed (+ 1 words-found)) #f #t) (+ 1 words-found) #f)]
;; Some punctuation survives if the previous or next char is part of a word
[(member c punc-allowed-in-word)
(cond [(or (word-char? last-c) (word-char? (peek-char in)))
(values #t words-found #t)]
[else (values #f words-found #f)])]
[(word-char? c)
(values #t words-found #t)]
;; If c is a non-whitespace non-allowed character that immediately follows a word,
;; do not write it out but count it as being part of the word.
[(and (not (word-char? c)) (not (char-whitespace? c)) last-c-in-word?)
(values #f words-found #t)]
[else (values #f words-found #f)]))
(cond [write-this-char? (write-char c out)])
(loop new-word-count c c-in-word?)])))
(define words (get-output-string out))
(cond [(equal? words-found words-needed) words]
[(equal? '() (rest txprs)) words]
[else (string-append words " " (first-words (rest txprs) (- words-needed words-found)))]))
(module+ test
(require rackunit)
(define txs-decimals
'((p "Four score and 7.8 years ago — our fathers brought forth on this continent etc etc")))
(define txs-punc+split-elems
'((p "“Stop!” she called.") (p "(She was never one to be silent.)")))
(define txs-dashes
'((p [[class "newthought"]] (span [[class "smallcaps"]] "One - and") " only one.")
(p "That was all she would allow.")))
(define txs-parens-commas
'((p "She counted (" (em "one, two") "— silently, eyes unblinking")))
(define txs-short
'((span "Not much here!")))
(check-equal? (first-words txs-decimals 5) "Four score and 7.8 years")
(check-equal? (first-words txs-punc+split-elems 5) "Stop! she called. She was")
(check-equal? (first-words txs-dashes 5) "One and only one. That")
(check-equal? (first-words txs-dashes 4) "One and only one.")
(check-equal? (first-words txs-parens-commas 5) "She counted one two silently")
(check-equal? (first-words txs-short 5) "Not much here!"))
;; Convert, e.g., "* thoroughly recanted" into (values "*" "thoroughly recanted")
(define (disposition-values str)
(cond [(string=? "" str) (values "" "")]
[else (let ([splut (string-split str)])
(values (car splut) (string-join (cdr splut))))]))
;; The format of a note’s ID is “HTML-driven” (used as an anchor link) but is included
;; here since it also serves as a primary key in the DB.
(define (build-note-id txpr)
(string-append (maybe-attr 'date (get-attrs txpr))
"_"
(uri-encode (maybe-attr 'author (get-attrs txpr) default-authorname))))
;; Extract the last disposition (if any), and the ID of the disposing note, out of a list of notes
(define (notes->last-disposition-values txprs)
(define (contains-disposition? tx) (attrs-have-key? tx 'disposition))
(define disp-notes (filter contains-disposition? txprs))
(cond [(not (empty? disp-notes))
(define latest-disposition-note (last disp-notes))
(values (attr-ref latest-disposition-note 'disposition)
(build-note-id latest-disposition-note))]
[else (values "" "")]))
;; ~~~ Convenience functions for YYYY-MM-DD date strings ~~~
;; These functions ignore everything after the first space in the input!
(define (ymd->dateformat ymd-string dateformat)
(~t (iso8601->date (car (string-split ymd-string))) dateformat))
(define (ymd->english ymd-string)
(ymd->dateformat ymd-string "MMMM d, yyyy"))
(module+ test
(check-equal? (ymd->english "2018-08-12") "August 12, 2018")
(check-equal? (ymd->dateformat "2018-08-12" "d MMM YYYY") "12 Aug 2018")
;; How we handle weird input
(check-equal? (ymd->english "2018-08-12 everything after 1st space ignored") "August 12, 2018")
(check-equal? (ymd->english "2018-08 omitting the day") "August 1, 2018")
(check-equal? (ymd->english "2018 omitting month and day") "January 1, 2018")
(check-equal? (ymd->dateformat "2018-08-12" "123") "123")
;; Stuff we just don't handle
(check-exn exn:gregor:parse? (lambda () (ymd->english "2018-xyz"))))