/
wmliterator.py
492 lines (451 loc) · 19.1 KB
/
wmliterator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
"""
wmliterator.py -- Python routines for navigating a Battle For Wesnoth WML tree
Author: Sapient (Patrick Parker), 2007
Purpose:
The WmlIterator class can be used to analyze and search the structure of WML
files non-invasively (i.e. preserving existing line structure), and its main
use is to determine when a transformation of deprecated content needs to take
place. (I wrote it was because wmllint was trying to do a lot of things with
regular expressions which really required a more algorithmic approach. Also,
wmllint was often inconsistent with correct handling of comments and values
inside strings.)
Limitations:
The WmlIterator does not attempt to expand macros, only to recognize them as
another level of nesting. Also, the handling of multiple assignment syntax
is somewhat limited (for similar reasons). Adding seamless support for these
would be ideal, but it presents a design challenge since the iteration is
supposed to be non-invasive. Thus, the current behavior is considered good
enough for now.
"""
import sys, re, copy
keyPattern = re.compile('(\w+)(,\s?\w+)*\s*=')
keySplit = re.compile(r'[=,\s]')
tagPattern = re.compile(r'(^|(?<![\w|}]))(\[/?\+?[a-z _]+\])')
macroOpenPattern = re.compile(r'(\{[^\s\}\{]*)')
macroClosePattern = re.compile(r'\}')
closeMacroType = 'end of macro'
silenceErrors = {}
def wmlfind(element, wmlItor):
"""Find a simple element from traversing a WML iterator"""
for itor in wmlItor.copy():
if element == itor.element:
return itor
return None
def wmlfindin(element, scopeElement, wmlItor):
"""Find an element inside a particular type of scope element"""
for itor in wmlItor.copy():
if element == itor.element:
if itor.scopes:
if scopeElement == itor.scopes[-1].element:
return itor
elif not scopeElement:
# allow searching in the empty scope
return itor
return None
def isDirective(elem):
"Identify things that shouldn't be indented."
if isinstance(elem, WmlIterator):
elem = elem.element
return elem.startswith(("#ifdef", "#ifndef", "#ifhave", "#ifnhave", "#ifver", "#ifnver", "#else", "#endif", "#define", "#enddef", "#undef"))
def isCloser(elem):
"Are we looking at a closing tag?"
if isinstance(elem, WmlIterator):
elem = elem.element
return type(elem) == type("") and elem.startswith("[/")
def isMacroCloser(elem):
"Are we looking at a macro closer?"
if isinstance(elem, WmlIterator):
elem = elem.element
return type(elem) == type("") and elem == closeMacroType
def isOpener(elem):
"Are we looking at an opening tag?"
if isinstance(elem, WmlIterator):
elem = elem.element
return type(elem) == type("") and elem.startswith("[") and not isCloser(elem)
def isExtender(elem):
"Are we looking at an extender tag?"
if isinstance(elem, WmlIterator):
elem = elem.element
return type(elem) == type("") and elem.startswith("[+")
def isMacroOpener(elem):
"Are we looking at a macro opener?"
if isinstance(elem, WmlIterator):
elem = elem.element
return type(elem) == type("") and elem.startswith("{")
def isAttribute(elem):
"Are we looking at an attribute (or attribute tuple)?"
if isinstance(elem, WmlIterator):
elem = elem.element
if type(elem) == type(()):
elem = elem[0]
return type(elem) == type("") and elem.endswith("=")
class WmlIterator(object):
"""Return an iterable WML navigation object.
Initialize with a list of lines or a file; if the the line list is
empty and the filename is specified, lines will be read from the file.
Note: if changes are made to lines while iterating, this may produce
unexpected results. In such case, seek() to the line number of a
scope behind where changes were made.
Important Attributes:
lines - this is an internal list of all the physical lines
scopes - this is an internal list of all open scopes (as iterators)
note: when retrieving an iterator from this list, always
use a copy to perform seek() or next(), and not the original
element - the wml tag, key, or macro name for this logical line
(in complex cases, this may be a tuple of elements...
see parseElements for list of possible values)
text - the exact text of this logical line, as it appears in the
original source, and ending with a newline
note: the logical line also includes multi-line quoted strings
span - the number of physical lines in this logical line:
always 1, unless text contains a multi-line quoted string
lineno - a zero-based line index marking where this text begins
"""
def __init__(self, lines=None, filename=None, begin=-1):
"Initialize a new WmlIterator."
self.fname = filename
if lines is None:
lines = []
if filename:
try:
ifp = open(self.fname)
lines = ifp.readlines()
ifp.close()
except Exception:
self.printError('error opening file')
self.lines = lines
self.reset()
self.seek(begin)
def parseQuotes(self, lines):
"""Return the line or multiline text if a quote spans multiple lines"""
text = lines[self.lineno]
span = 1
begincomment = text.find('#')
if begincomment < 0:
begincomment = None
beginquote = text[:begincomment].find('<<')
while beginquote >= 0:
endquote = -1
beginofend = beginquote+2
while endquote < 0:
endquote = text.find('>>', beginofend)
if endquote < 0:
if self.lineno + span >= len(lines):
self.printError('reached EOF due to unterminated string at line', self.lineno+1)
return text, span
beginofend = len(text)
text += lines[self.lineno + span]
span += 1
begincomment = text.find('#', endquote+2)
if begincomment < 0:
begincomment = None
beginquote = text[:begincomment].find('<<', endquote+2)
beginquote = text[:begincomment].find('"')
while beginquote >= 0:
endquote = -1
beginofend = beginquote+1
while endquote < 0:
endquote = text.find('"', beginofend)
if endquote < 0:
if self.lineno + span >= len(lines):
self.printError('reached EOF due to unterminated string at line', self.lineno+1)
return text, span
beginofend = len(text)
text += lines[self.lineno + span]
span += 1
begincomment = text.find('#', endquote+1)
if begincomment < 0:
begincomment = None
beginquote = text[:begincomment].find('"', endquote+1)
return text, span
def closeScope(self, scopes, closerElement):
"""Close the most recently opened scope. Return false if not enough scopes.
note: directives close all the way back to the last open directive
non-directives cannot close a directive and will no-op in that case."""
try:
if isDirective(closerElement):
while not isDirective(scopes.pop()):
pass
elif (closerElement==closeMacroType):
elem = ''
while not elem.startswith('{'):
closed = scopes.pop()
elem = closed
if isinstance(closed, WmlIterator):
elem = closed.element
if isDirective(elem):
self.printScopeError(closerElement)
scopes.append(closed) # to reduce additional errors (hopefully)
return True
elif not isDirective(scopes[-1]):
closed = scopes.pop()
elem = closed
if isinstance(closed, WmlIterator):
elem = closed.element
if (elem.startswith('{') and closerElement != closeMacroType):
scopes.append(closed)
elif (isOpener(elem) and closerElement != '[/'+elem[1:]
and '+'+closerElement != elem[1]+'[/'+elem[2:]):
self.printError('reached', closerElement, 'at line', self.lineno+1, 'before closing scope', elem)
scopes.append(closed) # to reduce additional errors (hopefully)
return True
except IndexError:
return False
def parseElements(self, text):
"""Remove any closed scopes, return a list of element names
and list of new unclosed scopes
Element Types:
tags: one of "[tag_name]" or "[/tag_name]"
[tag_name] - opens a scope
[/tag_name] - closes a scope
keys: either "key=" or ("key1=", "key2=") for multi-assignment
key= - does not affect the scope
key1,key2= - multi-assignment returns multiple elements
directives: one of "#ifdef", "#ifndef", "#ifhave", "#ifnhave", "#ifver", "#ifnver", "#else", "#endif", "#define", "#enddef"
#ifdef - opens a scope
#ifndef - opens a scope
#ifhave - opens a scope
#ifnhave - opens a scope
#ifver - opens a scope
#ifnver - opens a scope
#else - closes a scope, also opens a new scope
#endif - closes a scope
#define - opens a scope
#enddef - closes a scope
macro calls: "{MACRO_NAME}"
{MACRO_NAME - opens a scope
} - closes a scope
"""
elements = [] #(elementType, sortPos, scopeDelta)
# first remove any lua strings
beginquote = text.find('<<')
while beginquote >= 0:
endquote = text.find('>>', beginquote+2)
if endquote < 0:
text = text[:beginquote]
beginquote = -1 #terminate loop
else:
text = text[:beginquote] + text[endquote+2:]
beginquote = text.find('<<')
# remove any quoted strings
beginquote = text.find('"')
while beginquote >= 0:
endquote = text.find('"', beginquote+1)
if endquote < 0:
text = text[:beginquote]
beginquote = -1 #terminate loop
else:
text = text[:beginquote] + text[endquote+1:]
beginquote = text.find('"')
# next remove any comments
text = text.lstrip()
commentSearch = 1
if text.startswith('#ifdef'):
return (['#ifdef'],)*2
elif text.startswith('#ifndef'):
return (['#ifndef'],)*2
elif text.startswith('#ifhave'):
return (['#ifhave'],)*2
elif text.startswith('#ifnhave'):
return (['#ifnhave'],)*2
elif text.startswith('#ifver'):
return (['#ifver'],)*2
elif text.startswith('#ifnver'):
return (['#ifnver'],)*2
elif text.startswith('#else'):
if not self.closeScope(self.scopes, '#else'):
self.printScopeError('#else')
return (['#else'],)*2
elif text.startswith('#endif'):
if not self.closeScope(self.scopes, '#endif'):
self.printScopeError('#endif')
return ['#endif'], []
elif text.startswith('#define'):
return (['#define'],)*2
elif text.find('#enddef') >= 0:
elements.append(('#enddef', text.find('#enddef'), -1))
elif text.startswith('#po:') or text.startswith('# po:'):
elements.append(("#po", 0, 0))
else:
commentSearch = 0
begincomment = text.find('#', commentSearch)
if begincomment >= 0:
text = text[:begincomment]
#now find elements in a loop
for m in tagPattern.finditer(text):
delta = 1
if isCloser(m.group(2)):
delta = -1
elements.append((m.group(2), m.start(), delta))
for m in keyPattern.finditer(text):
for i, k in enumerate(keySplit.split(m.group(0))):
if k:
elements.append((k+'=', m.start()+i, 0))
for m in macroOpenPattern.finditer(text):
elements.append((m.group(1), m.start(), 1))
for m in macroClosePattern.finditer(text):
elements.append((closeMacroType, m.start(), -1))
#sort by start position
elements.sort(key=lambda x:x[1])
resultElements = []
openedScopes = []
for elem, sortPos, scopeDelta in elements:
while scopeDelta < 0:
if not(self.closeScope(openedScopes, elem)\
or self.closeScope(self.scopes, elem)):
self.printScopeError(elem)
scopeDelta += 1
while scopeDelta > 0:
openedScopes.append(elem)
scopeDelta -= 1
resultElements.append(elem)
return resultElements, openedScopes
def printScopeError(self, elementType):
"""Print out warning if a scope was unable to close"""
self.printError('attempt to close empty scope at', elementType, 'line', self.lineno+1)
def __iter__(self):
"""The magic iterator method"""
return self
def __cmp__(self, other):
"""Compare two iterators"""
return cmp((self.fname, self.lineno, self.element),
(other.fname, other.lineno, other.element))
def reset(self):
"""Reset any line tracking information to defaults"""
self.lineno = -1
self.scopes = []
self.nextScopes = []
self.text = ""
self.span = 1
self.element = ""
return self
def seek(self, lineno, clearEnd=True):
"""Move the iterator to a specific line number"""
if clearEnd:
self.endScope = None
if lineno < self.lineno:
for scope in reversed(self.scopes):
# if moving backwards, try to re-use a scope iterator
if scope.lineno <= lineno:
# copy the scope iterator's state to self
self.__dict__ = dict(scope.__dict__)
self.scopes = scope.scopes[:]
self.nextScopes = scope.nextScopes[:]
break
else:
# moving backwards past all scopes forces a reset
self.reset()
while self.lineno + self.span - 1 < lineno:
self.next()
return self
def ancestors(self):
"""Return a list of tags enclosing this location, outermost first."""
return tuple([x.element for x in self.scopes])
def hasNext(self):
"""Some loops may wish to check this method instead of calling next()
and handling StopIteration... note: inaccurate for ScopeIterators"""
return len(self.lines) > self.lineno + self.span
def copy(self):
"""Return a copy of this iterator"""
itor = copy.copy(self)
itor.scopes = self.scopes[:]
itor.nextScopes = self.nextScopes[:]
return itor
def __str__(self):
"""Return a pretty string representation"""
if self.lineno == -1:
return 'beginning of file'
loc = ' at line ' + str(self.lineno+1)
if self.element:
return str(self.element) + loc
if self.text.strip():
return 'text' + loc
return 'whitespace' + loc
def __repr__(self):
"""Return a very basic string representation"""
return 'WmlIterator<' + repr(self.element) +', line %d>'%(self.lineno+1)
def next(self):
"""Move the iterator to the next line number
note: May raise StopIteration"""
if not self.hasNext():
if self.scopes:
self.printError("reached EOF with open scopes", self.scopes)
raise StopIteration
self.lineno = self.lineno + self.span
self.text, self.span = self.parseQuotes(self.lines)
self.scopes.extend(self.nextScopes)
self.element, nextScopes = self.parseElements(self.text)
self.nextScopes = []
for elem in nextScopes:
# remember scopes by storing a copy of the iterator
copyItor = self.copy()
copyItor.element = elem
self.nextScopes.append(copyItor)
copyItor.nextScopes.append(copyItor)
if(len(self.element) == 1):
# currently we only wish to handle simple single assignment syntax
self.element = self.element[0]
if self.endScope is not None and not self.scopes.count(self.endScope):
raise StopIteration
return self
def isOpener(self):
return isOpener(self)
def isCloser(self):
return isCloser(self)
def isExtender(self):
return isExtender(self)
def isMacroOpener(self):
return isMacroOpener(self)
def isMacroCloser(self):
return isMacroCloser(self)
def isAttribute(self):
return isAttribute(self)
def iterScope(self):
"""Return an iterator for the current scope"""
if not self.scopes:
return WmlIterator(self.lines, self.fname)
scopeItor = self.scopes[-1].copy()
scopeItor.endScope = self.scopes[-1]
return scopeItor
def printError(nav, *misc):
"""Print error associated with a given file; avoid printing duplicates"""
if nav.fname:
silenceValue = ' '.join(map(str, misc))
if nav.fname not in silenceErrors:
print >>sys.stderr, nav.fname
silenceErrors[nav.fname] = set()
elif silenceValue in silenceErrors[nav.fname]:
return # do not print a duplicate error for this file
silenceErrors[nav.fname].add(silenceValue)
print >>sys.stderr, 'wmliterator:',
for item in misc:
print >>sys.stderr, item,
print >>sys.stderr #terminate line
if __name__ == '__main__':
"""Perform a test run on a file or directory"""
import os, glob
didSomething = False
flist = sys.argv[1:]
if not flist:
print 'Current directory is', os.getcwd()
flist = glob.glob(os.path.join(os.getcwd(), raw_input('Which file(s) would you like to test?\n')))
while flist:
fname = flist.pop()
if os.path.isdir(fname):
flist += glob.glob(fname + os.path.sep + '*')
continue
if not os.path.isfile(fname) or os.path.splitext(fname)[1] != '.cfg':
continue
print 'Reading', fname+'...'
didSomething = True
f = open(fname)
itor = WmlIterator(f.readlines())
for i in itor:
pass
f.close()
print itor.lineno + itor.span, 'lines read.'
if not didSomething:
print 'That is not a valid .cfg file'
if os.name == 'nt' and os.path.splitext(__file__)[0].endswith('wmliterator') and not sys.argv[1:]:
os.system('pause')
# wmliterator.py ends here