-
Notifications
You must be signed in to change notification settings - Fork 0
/
chicken-doc-html.scm
372 lines (347 loc) · 19.9 KB
/
chicken-doc-html.scm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
(module chicken-doc-html
(chicken-doc-sxml->html
tree->string quote-html
quote-identifier unquote-identifier definition->identifier signature->identifier)
(import scheme chicken)
(use (only sxml-transforms string->goodHTML SRV:send-reply)) ; temp
(use (only uri-generic uri-encode-string)) ; grr
(use matchable)
(use (only data-structures conc ->string string-intersperse string-translate))
(use (only ports with-output-to-string))
(use (only chicken-doc-admin man-filename->path))
(use colorize) ;yeah!
(use regex) (import irregex)
(use (only extras sprintf))
(use (only srfi-13 string-downcase))
(define (sxml-walk doc ss)
(let ((default-handler (cond ((assq '*default* ss) => cdr)
(else
(lambda (t b s) (error 'sxml-walk
"No default binding for" t)))))
(text-handler (cond ((assq '*text* ss) => cdr)
(else #f))))
(let loop ((doc doc))
(cond ((null? doc) '())
((pair? doc)
(let ((tag (car doc))
(body (cdr doc)))
(if (symbol? tag)
(let ((handler-cell (assq tag ss)))
(if handler-cell
((cdr handler-cell) tag body ss)
(default-handler tag body ss)))
(map loop doc))))
(else
(if text-handler
(text-handler '*text* doc ss)
doc))))))
(define (tree->string doc)
(with-output-to-string (lambda () (SRV:send-reply doc))))
(define (quote-html s)
(string->goodHTML s))
;; Like sxpath // *text*. Beware, if your tags have arguments that
;; shouldn't be considered text, they will still be extracted.
(define (text-content doc)
(tree->string
(sxml-walk doc `((*default* . ,(lambda (t b s) (sxml-walk b s)))
(*text* . ,(lambda (t b s) b))))))
;;; URI fragment (id=) handling for sections and definitions
;; Permitted characters in ID attributes in HTML < 5 are only A-Z a-z 0-9 : - _
;; even though URI fragments are much more liberal. For compatibility, we
;; "period-encode" all other chars.
(define +rx:%idfragment-escape+ (irregex "[^-_:A-Za-z0-9]"))
(define +rx:%idfragment-unescape+ (irregex "\\.([0-9a-fA-F][0-9a-fA-F])"))
;; Encode raw identifier text string so it is usable as an HTML 4 ID attribute
;; (and consequently, as a URI fragment).
(define (quote-identifier x) ; Not a good name; should prob. be encode-identifier
(irregex-replace/all
+rx:%idfragment-escape+ x
(lambda (m) (sprintf ".~x"
(char->integer
(string-ref (irregex-match-substring m 0) 0))))))
;; Decode period-encoded URI fragment (or ID attribute value).
;; Note that spaces were period-encoded, not converted to underscore,
;; so the transformation is reversible.
(define (unquote-identifier x)
(irregex-replace/all +rx:%idfragment-unescape+ x
(lambda (m) (string
(integer->char
(string->number (irregex-match-substring m 1)
16))))))
;; WARNING: Currently being used to both generate new ids for headers and
;; to figure out the id for an internal-link target. However the former may
;; distinuish duplicate IDs while the latter should ignore duplicates.
;; FIXME: Duplicate IDs will be generated for duplicate section or
;; definition names. A unique suffix is needed.
(define (section->identifier x)
(string-append "sec:"
(string-translate x #\space #\_)))
(define (definition->identifier x)
(string-append "def:" x))
(define (section->href x) ;; Convert section name to internal fragment href.
(string-append "#" (quote-identifier
(section->identifier x))))
(use (only svnwiki-sxml svnwiki-signature->identifier))
(define signature->identifier svnwiki-signature->identifier)
;;; HTML renderer
(define +rx:wiki-man-page+ (irregex '(: (? "http://wiki.call-cc.org")
(or "/man/4/"
"/manual/")
(submatch (+ any)))))
(define +rx:wiki-egg-page+ (irregex '(: (? "http://wiki.call-cc.org")
(or "/eggref/4/"
"/egg/")
(submatch (+ any)))))
(define (chicken-doc-sxml->html doc
path->href ; for internal links; make parameter?
def->href ; link to definition node
)
(tree->string
(let ((walk sxml-walk)
(drop-tag (lambda (t b s) '()))
(drop-tag-noisily (lambda (t b s) (warning "dropped" (cons t b)) '()))
(quote-text `(*text* . ,(lambda (t b s) (quote-html b)))))
(letrec ((block (lambda (tag)
(let ((open (conc "<" tag ">"))
(close (conc "</" tag ">")))
(lambda (t b s) (list open
(walk b s)
close)))))
(inline (lambda (tag)
(let ((open (conc "<" tag ">"))
(close (conc "</" tag ">")))
(lambda (t b s) (list open
(walk b inline-ss)
close)))))
(inline-ss `(
,quote-text
(*default* . ,drop-tag-noisily) ;; 500 error is annoying
(b . ,(inline "b"))
(i . ,(inline "i"))
(tt . ,(inline "tt"))
(sup . ,(inline "sup"))
(sub . ,(inline "sub"))
(small . ,(inline "small")) ;; questionable
(big . ,(inline "big")) ;; questionable
(img . ,drop-tag)
(link . ,(lambda (t b s)
(let ((link (lambda (href desc) ;; Caller must quote DESC.
(let ((href
;; svnwiki-sxml does not return int-link for
;; call-cc.org links, so we must check that here.
(cond
;; Wiki man page, link to corresponding man page
((string-match +rx:wiki-man-page+ href)
=> (lambda (m)
(cond ((man-filename->path (cadr m))
=> path->href)
(else href))))
;; Wiki egg page, link to node
((string-match +rx:wiki-egg-page+ href)
=> (lambda (m)
(path->href (list (cadr m)))))
(else href))))
`("<a href=\"" ,(quote-html href) "\">" ,desc "</a>")))))
(match b
((href desc)
(link href (walk desc inline-ss)))
((href)
(link href (quote-html href)))))))
(int-link
. ,(lambda (t b s)
(let ((ilink
(lambda (href desc) ;; Caller must quote DESC.
(let ((href
;; Usage of man-filename->path is barely tolerable.
;; Perhaps we should use the id cache.
(cond ((char=? (string-ref href 0)
#\#)
;; Assume #fragments target section names in this doc.
(section->href (substring href 1)))
;; Wiki man page, link to corresponding man page
((string-match +rx:wiki-man-page+ href)
=> (lambda (m)
(cond ((man-filename->path (cadr m))
=> path->href)
(else href))))
;; Wiki egg page, link to node
((string-match +rx:wiki-egg-page+ href)
=> (lambda (m)
(path->href (list (cadr m)))))
;; Unknown absolute path, link to wiki
((char=? (string-ref href 0)
#\/)
(string-append ; ???
"http://wiki.call-cc.org"
href))
;; Relative path, try man page. Wiki links to
;; current directory (/man) but we can't.
((man-filename->path href)
=> path->href)
;; Relative path, assume egg node.
(else
(path->href (list href)) ; !
))))
`("<a href=\"" ,(quote-html href) "\">" ,desc "</a>")))))
(match b
((href desc) (ilink href (walk desc inline-ss)))
((href) (ilink href (quote-html href)))))))))
)
(walk
doc
`(
(p . ,(inline "p"))
(def
. ,(lambda (t b def-ss)
`("<dl class=\"defsig\">"
,(match b
((('sig . sigs) . body)
`(,(map
(lambda (s)
(match s
((type sig . alist)
(let* ((defid (cond ((assq 'id alist) => cadr)
(else (signature->identifier sig type))))
(defid (and defid (->string defid))))
`("<dt class=\"defsig\""
,(if defid
`(" id=\""
,(quote-identifier
(definition->identifier defid))
#\")
'())
#\>
;; Link to underlying node.
,(if defid
`("<a href=" #\"
,(def->href defid)
#\" #\>)
'())
"<span class=\"sig\"><tt>"
,(quote-html sig) "</tt></span>"
,(if defid "</a>" '())
" "
"<span class=\"type\">"
,(quote-html (->string type))
"</span>"
"</dt>\n")))))
sigs)
"<dd class=\"defsig\">"
,(walk body def-ss)
"</dd>\n")))
"</dl>\n")))
(pre . ,(block "pre")) ; may need to quote contents
(ul . ,(lambda (t b ul-ss)
`("<ul>"
,(walk b `((li
. ,(lambda (t b s)
`("<li>"
,(walk b ul-ss)
"</li>\n")))))
"</ul>\n")))
(ol . ,(lambda (t b ol-ss)
`("<ol>"
,(walk b `((li
. ,(lambda (t b s)
`("<li>"
,(walk b ol-ss)
"</li>\n")))))
"</ol>\n")))
(dl . ,(lambda (t b dl-ss)
`("<dl>"
,(walk b `((dt . ,(lambda (t b s)
`("<dt>"
,(walk b inline-ss) ;?
"</dt>\n")))
(dd . ,(lambda (t b s)
`("<dd>"
,(walk b dl-ss)
"</dd>")))))
"</dl>\n")))
(tags . ,drop-tag)
(toc . ,drop-tag)
(section . ,(lambda (t b s)
(match b ((level title . body)
(let ((H (list
"h" (number->string level)))
(id (cond ((section->identifier
(text-content title))
=> quote-identifier)
(else #f))))
(list "<" H
(if id `(" id=\"" ,id "\"") '())
">"
"<a href=\"#" id "\">"
(walk title inline-ss)
"</a>"
"</" H ">"
(walk body s)))))))
(table . ,(lambda (t b table-ss)
`("<table>\n"
,(walk b `((tr . ,(lambda (t b s)
`("<tr>"
,(walk b
(let ((table-ss `((@ . ,drop-tag)
. ,table-ss)))
`((th . ,(lambda (t b s)
`("<th>"
,(walk b table-ss)
"</th>")))
(td . ,(lambda (t b s)
`("<td>"
,(walk b table-ss)
"</td>")))
(@ . ,drop-tag))))
"</tr>\n")))
(@ . ,drop-tag)))
"</table>\n")))
(highlight . ,(lambda (t b s)
(define (coloring-type? t)
(assq t (coloring-type-names)))
(match b ((lang . body)
(let ((lang (and lang (string->symbol
(string-downcase
(->string lang))))))
(if (and lang ;; lang #f not currently possible; reserved for future
(coloring-type? lang))
(list "<pre class=\"highlight\">"
(html-colorize lang
;; html-colorize quotes HTML; don't walk
(tree->string body))
"</pre>")
(list (if lang
(list "<!-- Unknown coloring type "
(quote-html (->string lang)) " -->\n")
'())
"<pre class=\"highlight\">"
(walk body s))))))))
;; convert example contents to `(pre ...) and re-walk it
;; FIXME: The html-parser will erroneously parse html tags
;; inside <expr> tags. Right now we drop them, but we
;; should either not parse them in the first place or
;; convert them back here (less nice). Furthermore the parser
;; may unrecoverably screw up the structure of examples, for
;; example if it contains an <h1> tag; therefore we drop unknown
;; tags to prevent a complete rendering error.
(examples
. ,(lambda (t b ex-ss)
(walk b `((*default* . ,drop-tag-noisily)
(example
. ,(lambda (t b s)
(walk `(pre
,(walk b
`((init . ,(lambda (t b s)
(list b "\n")))
(expr . ,(lambda (t b s)
(walk b `((*default*
. ,drop-tag-noisily)))))
(result . ,(lambda (t b s)
`("\n; Result: " ,b)))
(*default* . ,drop-tag-noisily))))
ex-ss)))))))
(blockquote . ,(block "blockquote"))
(hr . ,(lambda (t b s)
"<hr />"))
,@inline-ss
))))))
)