@@ -34,11 +34,13 @@ (provide maybe-meta ; Select from (current-metas) or default value ("") if not available maybe-attr ; Return an attribute’s value or a default ("") if not available series-noun ; Retrieve noun-singular from current 'series meta, or "" series-title ; Retrieve title of series in current 'series meta, or "" + series-pagenode attr-present? ; Test if an attribute is present + make-tag-predicate tx-strs ymd->english ymd->dateformat default-authorname default-title @@ -54,12 +56,12 @@ (define default-authorname "Joel Dueck") (define series-path "series") (define articles-path "articles") -(define (default-title date) - (format "Entry of ~a" (ymd->dateformat date "d MMM YYYY"))) +(define (default-title body-txprs) + (format "“~a…”" (first-words body-txprs 5))) (define (maybe-meta m [missing ""]) (or (select-from-metas m (current-metas)) missing)) ;; Checks current-metas for a 'series meta and returns the pagenode of that series, @@ -110,23 +112,20 @@ (define result (assoc name attrs)) (cond [(pair? result) (cadr result)] [else missing])) +;; Returns a function will test if a txexpr's tag matches the given symbol. +(define (make-tag-predicate tagsym) + (lambda (tx) (and (txexpr? tx) (equal? tagsym (get-tag tx))))) + (define (tx-strs xpr) (cond [(txexpr? xpr) (apply string-append (map tx-strs (get-elements xpr)))] [(string? xpr) xpr] [else ""])) -(define (first-words str n) - (define trunc - (apply string-append - (add-between (take (string-split str) n) " "))) - ;; Remove trailing punctuation (commas, etc.) - (regexp-replace #px"\\W+$" trunc "")) - (module+ test (require rackunit) (define test-metas (hash 'name "Fiver" 'size "Small")) (define test-attrs '([name "Hazel"] [rank "Chief"])) @@ -138,10 +137,80 @@ (check-equal? (attr-present? 'name test-attrs) #t) (check-equal? (attr-present? 'dingus test-attrs) #f) (check-equal? (maybe-attr 'rank test-attrs) "Chief") (check-equal? (maybe-attr 'dingus test-attrs) "") (check-equal? (maybe-attr 'dingus test-attrs "zippy") "zippy")) + +;; Return the first N words out of a list of txexprs. This function will unpack the strings out of +;; the elements of one txexpr at a time until it finds the requested number of words. It aims to be +;; both reliable and fast for any size of list you pass it, and smart about the punctuation it +;; allows through. +(define (first-words txprs words-needed) + (define punc-allowed-in-word '(#\- #\' #\% #\$ #\‘ #\’ #\# #\& #\/ #\. #\!)) + + (define (word-boundary? c) (or (char-whitespace? c) (equal? c #\null) (eof-object? c))) + (define (word-char? c) (or (char-alphabetic? c) (char-numeric? c))) + + (define in (open-input-string (tx-strs (first txprs)))) + (define out (open-output-string)) + + (define words-found + (let loop ([words-found 0] [last-c #\null] [last-c-in-word? #f]) + (define c (read-char in)) + + (cond [(equal? words-found words-needed) words-found] + [(eof-object? c) + (cond [(positive? words-found) (if last-c-in-word? (+ 1 words-found) words-found)] + [else 0])] + [else + (define-values (write-this-char? new-word-count c-in-word?) + (cond + ;; Spaces increment the word count if the previous character was part of, + ;; or adjacent to, a word + [(and (char-whitespace? c) last-c-in-word?) + (values (if (equal? words-needed (+ 1 words-found)) #f #t) (+ 1 words-found) #f)] + ;; Some punctuation survives if the previous or next char is part of a word + [(member c punc-allowed-in-word) + (cond [(or (word-char? last-c) (word-char? (peek-char in))) + (values #t words-found #t)] + [else (values #f words-found #f)])] + [(word-char? c) + (values #t words-found #t)] + ;; If c is a non-whitespace non-allowed character that immediately follows a word, + ;; do not write it out but count it as being part of the word. + [(and (not (word-char? c)) (not (char-whitespace? c)) last-c-in-word?) + (values #f words-found #t)] + [else (values #f words-found #f)])) + + (cond [write-this-char? (write-char c out)]) + (loop new-word-count c c-in-word?)]))) + + (define words (get-output-string out)) + (cond [(equal? words-found words-needed) words] + [(equal? '() (rest txprs)) words] + [else (string-append words " " (first-words (rest txprs) (- words-needed words-found)))])) + +(module+ test + (require rackunit) + (define txs-decimals + '((p "Four score and 7.8 years ago — our fathers brought forth on this continent etc etc"))) + (define txs-punc+split-elems + '((p "“Stop!” she called.") (p "(She was never one to be silent.)"))) + (define txs-dashes + '((p [[class "newthought"]] (span [[class "smallcaps"]] "One - and") " only one.") + (p "That was all she would allow."))) + (define txs-parens-commas + '((p "She counted (" (em "one, two") "— silently, eyes unblinking"))) + (define txs-short + '((span "Not much here!"))) + + (check-equal? (first-words txs-decimals 5) "Four score and 7.8 years") + (check-equal? (first-words txs-punc+split-elems 5) "Stop! she called. She was") + (check-equal? (first-words txs-dashes 5) "One and only one. That") + (check-equal? (first-words txs-dashes 4) "One and only one.") + (check-equal? (first-words txs-parens-commas 5) "She counted one two silently") + (check-equal? (first-words txs-short 5) "Not much here!")) ;; Convert, e.g., "* thoroughly recanted" into (values "*" "thoroughly recanted") (define (disposition-values str) (cond [(string=? "" str) (values "" "")] [else (let ([splut (string-split str)])