◊(Local Yarn Code "dust.rkt at [612b87e3]")

File dust.rkt artifact 4a46ea1e part of check-in 612b87e3


#lang racket/base

;; Copyright (c) 2018 Joel Dueck.
;;
;; Licensed under the Apache License, Version 2.0 (the "License");
;; you may not use this file except in compliance with the License.
;; A copy of the License is included with this source code, in the
;; file "LICENSE.txt".
;; You may also obtain a copy of the License at
;;
;;       http://www.apache.org/licenses/LICENSE-2.0
;;
;; Unless required by applicable law or agreed to in writing, software
;; distributed under the License is distributed on an "AS IS" BASIS,
;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
;; See the License for the specific language governing permissions and
;; limitations under the License.
;;
;; Author contact information:
;;   joel@jdueck.net
;;   https://joeldueck.com
;; -------------------------------------------------------------------------

(require pollen/core
         pollen/pagetree
         pollen/setup
         pollen/file
         net/uri-codec
         gregor
         txexpr
         racket/list
         racket/string)

;; Provides common helper functions used throughout the project

(provide maybe-meta     ; Select from (current-metas) or default value ("") if not available
         maybe-attr     ; Return an attribute’s value or a default ("") if not available
         here-output-path
         series-noun    ; Retrieve noun-singular from current 'series meta, or ""
         series-title   ; Retrieve title of series in current 'series meta, or ""
         series-pagenode
         make-tag-predicate
         tx-strs
         ymd->english
         ymd->dateformat
         default-authorname
         default-title
         articles-folder
         series-folder
         articles-pagetree
         series-pagetree
         first-words
         build-note-id
         notes->last-disposition-values
         disposition-values
         )

(define default-authorname "Joel Dueck")
(define series-folder "series")
(define articles-folder "articles")

(define (default-title body-txprs)
  (format "“~a…”" (first-words body-txprs 5)))

(define (maybe-meta m [missing ""])
  (or (select-from-metas m (current-metas)) missing))

;; Return the current output path, relative to (current-project-root)
;; Similar to the variable 'here' which is only accessible in Pollen templates,
;; except this is an actual path, not a string.
(define (here-output-path)
  (cond [(current-metas)
         (define-values (_ rel-path-parts)
           (drop-common-prefix (explode-path (current-project-root))
                               (explode-path (string->path (select-from-metas 'here-path (current-metas))))))
         (->output-path (apply build-path rel-path-parts))]
        [else (error "No metas are available")]))

;; Checks current-metas for a 'series meta and returns the pagenode of that series,
;; or '|| if no series is specified.
(define (series-pagenode)
  (define maybe-series (or (select-from-metas 'series (current-metas)) ""))
  (cond
    [(non-empty-string? maybe-series)
     (->pagenode (format "~a/~a.html" series-folder maybe-series))]
    [else '||]))

(define (series-noun)
  (define series-pnode (series-pagenode)) 
  (case series-pnode
    ['|| ""] ; no series specified
    [else (or (select-from-metas 'noun-singular series-pnode) "")]))

(define (series-title)
  (define series-pnode (series-pagenode)) 
  (case series-pnode
    ['|| ""] ; no series specified
    [else (or (select-from-metas 'title series-pnode) "")]))

;; ~~~ Project-wide Pagetrees ~~~

(define (include-in-pagetree folder extension)
  (define (matching-file? f)
    (string-suffix? f extension))
  (define (file->output-pagenode f)
    (string->symbol (format "~a/~a" folder (string-replace f extension ".html"))))
  (define folder-path (build-path (current-project-root) folder))
  (define file-strs (map path->string (directory-list folder-path)))
  (map file->output-pagenode (filter matching-file? file-strs)))

(define (articles-pagetree)
  `(root ,@(include-in-pagetree articles-folder ".poly.pm")))

(define (series-pagetree)
  `(root ,@(include-in-pagetree series-folder ".poly.pm")))

;; ~~~ Convenience functions for tagged x-expressions ~~~

(define (maybe-attr name attrs [missing ""])
  (define result (assoc name attrs))
  (cond
    [(pair? result) (cadr result)]
    [else missing]))

;; Returns a function will test if a txexpr's tag matches the given symbol.
(define (make-tag-predicate tagsym)
  (lambda (tx) (and (txexpr? tx) (equal? tagsym (get-tag tx)))))

(define (tx-strs xpr)
  (cond
    [(txexpr? xpr) (apply string-append (map tx-strs (get-elements xpr)))]
    [(string? xpr) xpr]
    [else ""]))

(module+ test
  (require rackunit)
  (define test-metas (hash 'name "Fiver" 'size "Small"))
  (define test-attrs '([name "Hazel"] [rank "Chief"]))

  (parameterize ([current-metas test-metas])
    (check-equal? (maybe-meta 'name) "Fiver") ; present meta
    (check-equal? (maybe-meta 'age) "")       ; missing meta
    (check-equal? (maybe-meta 'age 2) 2))      ; alternate default value
  
  (check-equal? (maybe-attr 'rank test-attrs) "Chief")
  (check-equal? (maybe-attr 'dingus test-attrs) "")
  (check-equal? (maybe-attr 'dingus test-attrs "zippy") "zippy"))

;; Return the first N words out of a list of txexprs. This function will unpack the strings out of
;; the elements of one txexpr at a time until it finds the requested number of words. It aims to be
;; both reliable and fast for any size of list you pass it, and smart about the punctuation it
;; allows through.
(define (first-words txprs words-needed)
  (define punc-allowed-in-word '(#\- #\' #\% #\$ #\‘ #\’ #\# #\& #\/ #\. #\!))
  
  (define (word-boundary? c) (or (char-whitespace? c) (equal? c #\null) (eof-object? c)))
  (define (word-char? c) (or (char-alphabetic? c) (char-numeric? c)))
  
  (define in (open-input-string (tx-strs (first txprs))))
  (define out (open-output-string))
  
  (define words-found
    (let loop ([words-found 0] [last-c #\null] [last-c-in-word? #f])
      (define c (read-char in))

      (cond [(equal? words-found words-needed) words-found]
            [(eof-object? c)
             (cond [(positive? words-found) (if last-c-in-word? (+ 1 words-found) words-found)]
                   [else 0])]
            [else
             (define-values (write-this-char? new-word-count c-in-word?)
               (cond
                 ;; Spaces increment the word count if the previous character was part of,
                 ;; or adjacent to, a word
                 [(and (char-whitespace? c) last-c-in-word?)
                  (values (if (equal? words-needed (+ 1 words-found)) #f #t) (+ 1 words-found) #f)]
                 ;; Some punctuation survives if the previous or next char is part of a word
                 [(member c punc-allowed-in-word)
                  (cond [(or (word-char? last-c) (word-char? (peek-char in)))
                         (values #t words-found #t)]
                        [else (values #f words-found #f)])]
                 [(word-char? c)
                  (values #t words-found #t)]
                 ;; If c is a non-whitespace non-allowed character that immediately follows a word,
                 ;; do not write it out but count it as being part of the word.   
                 [(and (not (word-char? c)) (not (char-whitespace? c)) last-c-in-word?)
                  (values #f words-found #t)]
                 [else (values #f words-found #f)]))

             (cond [write-this-char? (write-char c out)])
             (loop new-word-count c c-in-word?)])))
  
  (define words (get-output-string out))
  (cond [(equal? words-found words-needed) words]
        [(equal? '() (rest txprs)) words]
        [else (string-append words " " (first-words (rest txprs) (- words-needed words-found)))]))

(module+ test
  (require rackunit)
  (define txs-decimals
    '((p "Four score and 7.8 years ago — our fathers brought forth on this continent etc etc")))
  (define txs-punc+split-elems
    '((p "“Stop!” she called.") (p "(She was never one to be silent.)")))
  (define txs-dashes
    '((p [[class "newthought"]] (span [[class "smallcaps"]] "One - and") " only one.")
      (p "That was all she would allow.")))
  (define txs-parens-commas
    '((p "She counted (" (em "one, two") "— silently, eyes unblinking")))
  (define txs-short
    '((span "Not much here!")))

  (check-equal? (first-words txs-decimals 5) "Four score and 7.8 years")
  (check-equal? (first-words txs-punc+split-elems 5) "Stop! she called. She was")
  (check-equal? (first-words txs-dashes 5) "One and only one. That")
  (check-equal? (first-words txs-dashes 4) "One and only one.")
  (check-equal? (first-words txs-parens-commas 5) "She counted one two silently")
  (check-equal? (first-words txs-short 5) "Not much here!"))
                
;; Convert, e.g., "* thoroughly recanted" into (values "*" "thoroughly recanted")
(define (disposition-values str)
  (cond [(string=? "" str) (values "" "")]
        [else (let ([splut (string-split str)])
                (values (car splut) (string-join (cdr splut))))]))

;; The format of a note’s ID is “HTML-driven” (used as an anchor link) but is included
;; here since it also serves as a primary key in the DB.
(define (build-note-id txpr)
  (string-append (maybe-attr 'date (get-attrs txpr))
                 "_"
                 (uri-encode (maybe-attr 'author (get-attrs txpr) default-authorname))))

;; Extract the last disposition (if any), and the ID of the disposing note, out of a list of notes
(define (notes->last-disposition-values txprs)
  (define (contains-disposition? tx) (attrs-have-key? tx 'disposition))
  (define disp-notes (filter contains-disposition? txprs))
  (cond [(not (empty? disp-notes))
         (define latest-disposition-note (last disp-notes))
         (values (attr-ref latest-disposition-note 'disposition)
                 (build-note-id latest-disposition-note))]
        [else (values "" "")]))
        
;; ~~~ Convenience functions for YYYY-MM-DD date strings ~~~

;; These functions ignore everything after the first space in the input!
(define (ymd->dateformat ymd-string dateformat)
  (~t (iso8601->date (car (string-split ymd-string))) dateformat))

(define (ymd->english ymd-string)
  (ymd->dateformat ymd-string "MMMM d, yyyy"))

(module+ test
  (check-equal? (ymd->english "2018-08-12") "August 12, 2018")
  (check-equal? (ymd->dateformat "2018-08-12" "d MMM YYYY") "12 Aug 2018")

  ;; How we handle weird input
  (check-equal? (ymd->english "2018-08-12 everything after 1st space ignored") "August 12, 2018")
  (check-equal? (ymd->english "2018-08 omitting the day") "August 1, 2018")
  (check-equal? (ymd->english "2018 omitting month and day") "January 1, 2018")
  (check-equal? (ymd->dateformat "2018-08-12" "123") "123")

  ;; Stuff we just don't handle
  (check-exn exn:gregor:parse? (lambda () (ymd->english "2018-xyz"))))