-
Notifications
You must be signed in to change notification settings - Fork 368
/
wiki.py
782 lines (660 loc) · 22 KB
/
wiki.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
# Copyright 2008, 2012-2019 Jaap Karssenberg <jaap.karssenberg@gmail.com>
'''This module handles parsing and dumping wiki text'''
import re
import logging
logger = logging.getLogger('zim.formats.wiki')
from zim.parser import Rule, fix_line_end, convert_space_to_tab
from zim.parser import Parser as RuleParser
from zim.parsing import url_encode, URL_ENCODE_DATA, \
escape_string, unescape_string, split_escaped_string
from zim.parsing import url_re as old_url_re
from zim.formats import *
from zim.formats.plain import Dumper as TextDumper
old_url_re = old_url_re.p
WIKI_FORMAT_VERSION = 'zim 0.7'
assert WIKI_FORMAT_VERSION != 'zim 0.26' # skip number for historic reasons
# History:
# - 0.7 support backarrow bullet
# - 0.6 new url parsing logic
# - 0.5 support nested formatting
# - 0.4 parser rewrite, specifically changed parsing indented blocks
# - 0.26 oldest format supported in python branch
info = {
'name': 'wiki',
'desc': 'Zim Wiki Format',
'mimetype': 'text/x-zim-wiki',
'extension': 'txt',
'native': True,
'import': True,
'export': True,
'usebase': True,
}
bullet_pattern = '(?:[\\*\u2022]|\\[[ \\*x><]\\]|\\d+\\.|[a-zA-Z]\\.)[\\ \\t]+'
# bullets can be '*' or 0x2022 for normal items
# and '[ ]', '[*]', '[x]', '[>]' or [<]' for checkbox items
# and '1.', '10.', or 'a.' for numbered items (but not 'aa.')
bullet_line_re = re.compile(r'^(\t*)(%s)(.*\n)$' % bullet_pattern)
# matches list item: prefix, bullet, text
number_bullet_re = re.compile('^(\d+|[a-zA-Z])\.$')
def check_number_bullet(bullet):
'''If bullet is a numbered bullet this returns the number or letter,
C{None} otherwise
'''
m = number_bullet_re.match(bullet)
if m:
return m.group(1)
else:
return None
param_re = re.compile('([\w-]+)=("(?:[^"]|"{2})*"|\S*)')
# matches parameter list for objects
# allow name="foo bar" and name=Foo
empty_lines_re = re.compile(r'((?:^[ \t]*\n)+)', re.M | re.U)
# match multiple empty lines
unindented_line_re = re.compile('^\S', re.M)
# match any unindented line
def _remove_indent(text, indent):
return re.sub('(?m)^' + indent, '', text)
# Specify "(?m)" instead of re.M since "flags" keyword is not
# supported in python 2.6
# NOTE: we follow rules of GFM spec, except:
# - we allow any URL scheme
# - we add a file URI match
# For GFM Markdown parser, remove these exceptions
#
# File paths cannot contain '\', '/', ':', '*', '?', '"', '<', '>', '|'
# These are valid URL / path seperators: / \ : ? |
# So restrict matching " < > and also '
url_re = re.compile(
'\\b(?P<url>'
'(www\.|https?://|\w+://)' # autolink & autourl prefix
'(?P<domain>([\w\-]+\.)+[\w\-]+)' # 2 or more domain sections
'[^\s<]*' # any non-space char except "<"
')|(?P<email>'
'(mailto:)?'
'[\w\.\-_+]+@' # email prefix
'([\w\-_]+\.)+[\w\-_]+' # email domain
')|(?P<fileuri>'
'file:/+'
'[^\s"<>\']+'
')', re.U
)
url_trailing_punctuation = ('?', '!', '.', ',', ':', '*', '_', '~')
def is_url(text):
'''Matches url_re and number of closing brackets matches
See L{https://github.github.com/gfm/#autolinks-extension-}
@param text: text to match as url
@returns: C{True} if C{text} is a valid url according to GFM rules
'''
url = match_url(text)
return url == text # No trailing puntuation or ")" excluded
def match_url(text):
'''Match regex and count number of closing brackets
See L{https://github.github.com/gfm/#autolinks-extension-}
@param text: text to match as url
@returns: the url or None
'''
m = url_re.match(text)
if m:
url = m.group(0)
if m.lastgroup == 'email':
# Do not allow end in "-" or "_", use trailing "."
while url:
if url[-1] == '.':
url = url[:-1]
elif url[-1] in ('-', '_'):
return None
else:
break
return url or None
# continue processing regular URL or file URI
if m.lastgroup == 'url':
domain = m.group('domain').split('.')
if '_' in domain[-1] or '_' in domain[-2]:
# Last two domain sections cannot contain "_"
return None
else:
return None
while url:
if url[-1] in url_trailing_punctuation \
or (url[-1] == ')' and url.count(')') > url.count('(')):
url = url[:-1]
elif url[-1] == ';':
m = re.search('&\w+;$', url)
if m:
ref = m.group(0)
url = url[:-len(ref)]
else:
url = url[:-1]
else:
return url
else:
return None
class WikiParser(object):
# This parser uses 3 levels of rules. The top level splits up
# paragraphs, verbatim paragraphs, images and objects.
# The second level further splits paragraphs in lists and indented
# blocks. The third level does the inline formatting for all
# text.
BULLETS = {
'[ ]': UNCHECKED_BOX,
'[x]': XCHECKED_BOX,
'[*]': CHECKED_BOX,
'[>]': MIGRATED_BOX,
'[<]': TRANSMIGRATED_BOX,
'*': BULLET,
}
def __init__(self, backward_indented_blocks=False, backward_url_parsing=False):
self.backward_indented_blocks = backward_indented_blocks
self.backward_url_parsing = backward_url_parsing
self.inline_parser = self._init_inline_parse()
self.list_and_indent_parser = self._init_intermediate_parser()
self.block_parser = self._init_block_parser()
def __call__(self, builder, text):
builder.start(FORMATTEDTEXT)
self.block_parser(builder, text)
builder.end(FORMATTEDTEXT)
def _init_inline_parse(self):
# Rules for inline formatting, links and tags
my_url_re = old_url_re if self.backward_url_parsing else url_re
descent = lambda *a: self.nested_inline_parser_below_link(*a)
self.nested_inline_parser_below_link = (
Rule(TAG, r'(?<!\S)@\w+', process=self.parse_tag)
| Rule(EMPHASIS, r'//(?!/)(.*?)(?<!:)//', descent=descent) # no ':' at the end (ex: 'http://')
| Rule(STRONG, r'\*\*(?!\*)(.*?)\*\*', descent=descent)
| Rule(MARK, r'__(?!_)(.*?)__', descent=descent)
| Rule(SUBSCRIPT, r'_\{(?!~)(.+?)\}', descent=descent)
| Rule(SUPERSCRIPT, r'\^\{(?!~)(.+?)\}', descent=descent)
| Rule(STRIKE, r'~~(?!~)(.+?)~~', descent=descent)
| Rule(VERBATIM, r"''(?!')(.+?)''")
)
descent = lambda *a: self.inline_parser(*a)
return (
Rule(LINK, my_url_re, process=self.parse_url)
| Rule(LINK, r'\[\[(?!\[)(.*?\]*)\]\]', process=self.parse_link)
| Rule(IMAGE, r'\{\{(?!\{)(.*?)\}\}', process=self.parse_image)
| Rule(TAG, r'(?<!\S)@\w+', process=self.parse_tag)
| Rule(EMPHASIS, r'//(?!/)(.*?)(?<!:)//', descent=descent) # no ':' at the end (ex: 'http://')
| Rule(STRONG, r'\*\*(?!\*)(.*?)\*\*', descent=descent)
| Rule(MARK, r'__(?!_)(.*?)__', descent=descent)
| Rule(SUBSCRIPT, r'_\{(?!~)(.+?)\}', descent=descent)
| Rule(SUPERSCRIPT, r'\^\{(?!~)(.+?)\}', descent=descent)
| Rule(STRIKE, r'~~(?!~)(.+?)~~', descent=descent)
| Rule(VERBATIM, r"''(?!')(.+?)''")
)
def _init_intermediate_parser(self):
# Intermediate level, breaks up lists and indented blocks
# TODO: deprecate this by taking lists out of the para
# and make a new para for each indented block
p = RuleParser(
Rule(
'X-Bullet-List',
r'''(
^ %s .* \n # Line starting with bullet
(?:
^ \t* %s .* \n # Line with same or more indent and bullet
)* # .. repeat
)''' % (bullet_pattern, bullet_pattern),
process=self.parse_list
),
Rule(
'X-Indented-Bullet-List',
r'''(
^(?P<list_indent>\t+) %s .* \n # Line with indent and bullet
(?:
^(?P=list_indent) \t* %s .* \n # Line with same or more indent and bullet
)* # .. repeat
)''' % (bullet_pattern, bullet_pattern),
process=self.parse_list
),
Rule(
'X-Indented-Block',
r'''(
^(?P<block_indent>\t+) .* \n # Line with indent
(?:
^(?P=block_indent) (?!\t|%s) .* \n # Line with _same_ indent, no bullet
)* # .. repeat
)''' % bullet_pattern,
process=self.parse_indent
),
)
p.process_unmatched = self.inline_parser
return p
def _init_block_parser(self):
# Top level parser, to break up block level items
p = RuleParser(
Rule(VERBATIM_BLOCK, r'''
^(?P<pre_indent>\t*) \'\'\' \s*? # 3 "'"
( (?:^.*\n)*? ) # multi-line text
^(?P=pre_indent) \'\'\' \s*? \n # another 3 "'" with matching indent
''',
process=self.parse_pre
),
Rule(OBJECT, r'''
^(?P<obj_indent>\t*) \{\{\{ \s*? (\S+:.*\n) # "{{{ object_type: attrib=..."
( (?:^.*\n)*? ) # multi-line body
^(?P=obj_indent) \}\}\} \s*? \n # "}}}" with matching indent
''',
process=self.parse_object
),
Rule(HEADING,
r'^( ==+ [\ \t]+ \S.*? ) [\ \t]* =* \n', # "==== heading ===="
process=self.parse_heading
),
# standard table format
Rule(TABLE, r'''
^(\|.*\|) \s*? \n # starting and ending with |
^( (?:\| [ \|\-:]+ \| \s*? \n)? ) # column align
( (?:^\|.*\| \s*? \n)+ ) # multi-lines: starting and ending with |
''',
process=self.parse_table
),
# line format
Rule(LINE, r'(?<=\n)-{5,}(?=\n)', process=self.parse_line) # \n----\n
)
p.process_unmatched = self.parse_para
return p
def parse_heading(self, builder, text):
'''Parse heading and determine it's level'''
assert text.startswith('=')
for i, c in enumerate(text):
if c != '=':
break
level = 7 - min(6, i)
# == is level 5
# === is level 4
# ...
# ======= is level 1
text = text[i:].lstrip() + '\n'
builder.start(HEADING, {'level': level})
self.inline_parser(builder, text)
builder.end(HEADING)
@staticmethod
def parse_pre(builder, indent, text):
'''Verbatim block with indenting'''
if indent:
text = _remove_indent(text, indent)
attrib = {'indent': len(indent)}
else:
attrib = None
builder.append(VERBATIM_BLOCK, attrib, text)
def parse_object(self, builder, indent, header, body):
'''Custom object'''
otype, param = header.split(':', 1)
otype = otype.strip().lower()
if otype == 'table':
# Special case to ensure backward compatibility for versions where
# tables could be stored as objects
if param.strip() != '':
logger.warn('Table object had unexpected parameters: %s', param.strip())
lines = body.splitlines(True)
headerrow = lines[0]
alignstyle = lines[1]
body = ''.join(lines[2:])
try:
return self.parse_table(builder, headerrow, alignstyle, body)
except:
logger.exception('Could not parse table object')
self._parse_object(builder, indent, otype, param, body)
def _parse_object(self, builder, indent, otype, param, body):
attrib = {}
for match in param_re.finditer(param):
key = match.group(1).lower()
value = match.group(2)
if value.startswith('"') and len(value) > 1: # Quoted string
value = value[1:-1].replace('""', '"')
attrib[key] = value
# Defined after parsing head, so these attrib can not be overruled
# accidentally
### FIXME FIXME FIXME - need to separate two types of attrib ###
attrib['type'] = otype
if indent:
body = _remove_indent(body, indent)
attrib['indent'] = len(indent)
builder.append(OBJECT, attrib, body)
def check_multi_attribute(self, attrib, key, default, list_length):
'''
Correct multi-attributes, so they do fit with column length of table
:param attrib: key-value store
:param key: key to select of attribute
:param default: default value for one list entry
:param list_length: required length of selected attribute
:return: attribute-value as list of different options
'''
if attrib and key in attrib and attrib[key]:
values = attrib[key].split(',')
else:
values = []
while len(values) > list_length:
values.pop()
while len(values) < list_length:
values.append(default)
return ','.join(values)
def parse_table(self, builder, headerline, alignstyle, body):
headerrow = split_escaped_string(headerline.strip().strip('|'), '|')
rows = [
split_escaped_string(line.strip().strip('|'), '|')
for line in body.split('\n')[:-1]
]
n_cols = max(len(headerrow), max(len(bodyrow) for bodyrow in rows))
aligns = []
for celltext in alignstyle.strip().strip('|').split('|'):
celltext = celltext.strip()
if celltext.startswith(':') and celltext.endswith(':'):
alignment = 'center'
elif celltext.startswith(':'):
alignment = 'left'
elif celltext.endswith(':'):
alignment = 'right'
else:
alignment = 'normal'
aligns.append(alignment)
while len(aligns) < n_cols:
aligns.append('normal')
# collect wrap settings from first table row
headers = []
wraps = []
for celltext in headerrow:
if celltext.rstrip().endswith('<'):
celltext = celltext.rstrip().rstrip('<')
wraps.append(1)
else:
wraps.append(0)
headers.append(celltext)
while len(headers) < n_cols:
headers.append('')
wraps.append(0)
attrib = {'aligns': ','.join(aligns), 'wraps': ','.join(map(str, wraps))}
builder.start(TABLE, attrib)
builder.start(HEADROW)
for celltext in headers:
celltext = unescape_string(celltext.strip()) or ' ' # must contain at least one character
builder.append(HEADDATA, {}, celltext)
builder.end(HEADROW)
for bodyrow in rows:
while len(bodyrow) < n_cols:
bodyrow.append('')
builder.start(TABLEROW)
for celltext in bodyrow:
builder.start(TABLEDATA)
celltext = unescape_string(celltext.strip()) or ' ' # must contain at least one character
self.inline_parser(builder, celltext)
builder.end(TABLEDATA)
builder.end(TABLEROW)
builder.end(TABLE)
def parse_para(self, builder, text):
'''Split a text into paragraphs and empty lines'''
if text.isspace():
builder.text(text)
else:
for block in empty_lines_re.split(text):
if not block: # empty string due to split
pass
elif block.isspace():
builder.text(block)
elif self.backward_indented_blocks \
and not unindented_line_re.search(block):
# Before zim 0.29 all indented paragraphs were
# verbatim.
builder.append(VERBATIM_BLOCK, None, block)
else:
block = convert_space_to_tab(block)
builder.start(PARAGRAPH)
self.list_and_indent_parser(builder, block)
builder.end(PARAGRAPH)
def parse_list(self, builder, text, indent=None):
'''Parse lists into items and recurse to get inline formatting
per list item
'''
if indent:
text = _remove_indent(text, indent)
attrib = {'indent': len(indent)}
else:
attrib = None
lines = text.splitlines(True)
self.parse_list_lines(builder, lines, 0, attrib)
def parse_list_lines(self, builder, lines, level, attrib=None):
listtype = None
first = True
while lines:
line = lines[0]
m = bullet_line_re.match(line)
assert m, 'Line does not match a list item: >>%s<<' % line
prefix, bullet, text = m.groups()
bullet = bullet.rstrip()
if first:
number = check_number_bullet(bullet)
if number:
listtype = NUMBEREDLIST
if not attrib:
attrib = {}
attrib['start'] = number
else:
listtype = BULLETLIST
builder.start(listtype, attrib)
first = False
mylevel = len(prefix)
if mylevel > level:
self.parse_list_lines(builder, lines, level + 1) # recurs
elif mylevel < level:
builder.end(listtype)
return
else:
if listtype == NUMBEREDLIST:
if bullet in self.BULLETS:
builder.end(listtype)
return self.parse_list_lines(builder, lines, level) # recurs
else:
attrib = None
else: # BULLETLIST
if bullet in self.BULLETS:
attrib = {'bullet': self.BULLETS[bullet]}
elif number_bullet_re.match(bullet):
builder.end(listtype)
return self.parse_list_lines(builder, lines, level) # recurs
else:
attrib = {'bullet': BULLET}
builder.start(LISTITEM, attrib)
self.inline_parser(builder, text)
builder.end(LISTITEM)
lines.pop(0)
builder.end(listtype)
def parse_indent(self, builder, text, indent):
'''Parse indented blocks and turn them into 'div' elements'''
text = _remove_indent(text, indent)
builder.start(BLOCK, {'indent': len(indent)})
self.inline_parser(builder, text)
builder.end(BLOCK)
def parse_link(self, builder, text):
text = text.strip('|') # old bug producing "[[|link]]", or "[[link|]]" or "[[||]]"
if not text or text.isspace():
return
href = None
if '|' in text:
href, text = text.split('|', 1)
text = text.strip('|') # stuff like "[[foo||bar]]"
if text.endswith(']'):
delta = text.count(']') - text.count('[')
if delta > 0:
self.inline_parser.backup_parser_offset(delta)
text = text[:-delta]
if href is None:
builder.append(LINK, {'href': text}, text)
else:
builder.start(LINK, {'href': href})
self.nested_inline_parser_below_link(builder, text)
builder.end(LINK)
@staticmethod
def parse_image(builder, text):
if '|' in text:
url, text = text.split('|', 1)
else:
url, text = text, None
attrib = ParserClass.parse_image_url(url)
if text:
attrib['alt'] = text
if attrib.get('type'):
# Backward compatibility of image generators < zim 0.70
attrib['type'] = 'image+' + attrib['type']
builder.append(OBJECT, attrib)
else:
builder.append(IMAGE, attrib)
def parse_url(self, builder, *a):
text = a[0]
if self.backward_url_parsing:
builder.append(LINK, {'href': text}, text)
else:
url = match_url(text)
if url is None:
self.inline_parser.backup_parser_offset(len(text) - 1)
elif url != text:
self.inline_parser.backup_parser_offset(len(text) - len(url))
builder.append(LINK, {'href': url}, url)
@staticmethod
def parse_tag(builder, text):
builder.append(TAG, {'name': text[1:]}, text)
@staticmethod
def parse_line(builder, text):
builder.append(LINE)
wikiparser = WikiParser() #: singleton instance
# FIXME FIXME we are redefining Parser here !
class Parser(ParserClass):
def __init__(self, version=WIKI_FORMAT_VERSION):
self.version = version
def parse(self, input, partial=False, file_input=False):
if not isinstance(input, str):
input = ''.join(input)
if not partial:
input = fix_line_end(input)
meta, version = None, False
if file_input:
input, meta = parse_header_lines(input)
version = meta.get('Wiki-Format')
# Support backward compatibility - see history notes WIKI_FORMAT_VERSION
version = version or self.version
if version in ('zim 0.6', 'zim 0.7'):
mywikiparser = wikiparser
elif version in ('zim 0.4', 'zim 0.5'):
mywikiparser = WikiParser(backward_url_parsing=True)
else:
mywikiparser = WikiParser(backward_indented_blocks=True, backward_url_parsing=True)
builder = ParseTreeBuilder(partial=partial)
mywikiparser(builder, input)
parsetree = builder.get_parsetree()
if meta is not None:
for k, v in list(meta.items()):
# Skip headers that are only interesting for the parser
#
# Also remove "Modification-Date" here because it causes conflicts
# when merging branches with version control, use mtime from filesystem
# If we see this header, remove it because it will not be updated.
if k not in ('Content-Type', 'Wiki-Format', 'Modification-Date'):
parsetree.meta[k] = v
return parsetree
class Dumper(TextDumper):
BULLETS = {
UNCHECKED_BOX: '[ ]',
XCHECKED_BOX: '[x]',
CHECKED_BOX: '[*]',
MIGRATED_BOX: '[>]',
TRANSMIGRATED_BOX: '[<]',
BULLET: '*',
}
TAGS = {
EMPHASIS: ('//', '//'),
STRONG: ('**', '**'),
MARK: ('__', '__'),
STRIKE: ('~~', '~~'),
VERBATIM: ("''", "''"),
TAG: ('', ''), # No additional annotation (apart from the visible @)
SUBSCRIPT: ('_{', '}'),
SUPERSCRIPT: ('^{', '}'),
}
def dump(self, tree, file_output=False):
# If file_output=True we add meta headers to the output
# would be nicer to handle this via a template, but works for now
if file_output:
header = (
('Content-Type', 'text/x-zim-wiki'),
('Wiki-Format', WIKI_FORMAT_VERSION),
)
return [dump_header_lines(header, getattr(tree, 'meta', {})), '\n'] \
+ TextDumper.dump(self, tree)
else:
return TextDumper.dump(self, tree)
def dump_pre(self, tag, attrib, strings):
# Indent and wrap with "'''" lines
strings.insert(0, "'''\n")
strings.append("'''\n")
strings = self.dump_indent(tag, attrib, strings)
return strings
def dump_h(self, tag, attrib, strings):
# Wrap line with number of "=="
level = int(attrib['level'])
if level < 1:
level = 1
elif level > 5:
level = 5
tag = '=' * (7 - level)
strings.insert(0, tag + ' ')
strings.append(' ' + tag)
return strings
def dump_link(self, tag, attrib, strings=None):
assert 'href' in attrib, \
'BUG: link misses href: %s "%s"' % (attrib, strings)
href = attrib['href']
if not strings or href == ''.join(strings):
if is_url(href):
return (href,) # no markup needed
else:
return ('[[', href, ']]')
else:
return ('[[', href, '|') + tuple(strings) + (']]',)
def dump_img(self, tag, attrib, strings=None):
src = attrib['src'] or ''
alt = attrib.get('alt')
opts = []
items = sorted(attrib.items())
for k, v in items:
if k in ('src', 'alt') or k.startswith('_'):
continue
elif v: # skip None, "" and 0
data = url_encode(str(v), mode=URL_ENCODE_DATA)
opts.append('%s=%s' % (k, data))
if opts:
src += '?%s' % '&'.join(opts)
if alt:
return ('{{', src, '|', alt, '}}')
else:
return('{{', src, '}}')
# TODO use text for caption (with full recursion)
def dump_object_fallback(self, tag, attrib, strings=None):
assert "type" in attrib, "Undefined type of object"
opts = []
for key, value in sorted(list(attrib.items())):
# TODO: sorted to make order predictable for testing - prefer use of OrderedDict
if key in ('type', 'indent') or value is None:
continue
# double quotes are escaped by doubling them
opts.append(' %s="%s"' % (key, str(value).replace('"', '""')))
if not strings:
strings = []
return ['{{{', attrib['type'], ':'] + opts + ['\n'] + strings + ['}}}\n']
# TODO put content in attrib, use text for caption (with full recursion)
# See img
def dump_table(self, tag, attrib, strings):
rows = strings
n = len(rows[0])
assert all(len(r) == n for r in rows), rows
aligns, wraps = TableParser.get_options(attrib)
maxwidths = TableParser.width2dim(rows)
lines = [
TableParser.headline(rows[0], maxwidths, aligns, wraps) + '\n',
TableParser.headsep(maxwidths, aligns, x='|', y='-') + '\n'
] + [
TableParser.rowline(row, maxwidths, aligns) + '\n' for row in rows[1:]
]
return lines
def dump_td(self, tag, attrib, strings):
text = ''.join(strings) if strings else ''
return [escape_string(text, '|')]
dump_th = dump_td