/
wmlgrammar.py
159 lines (130 loc) · 5.02 KB
/
wmlgrammar.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
"""
wmlgrammar -- parses a given schema into a more usable form
"""
import collections
import re
REQUIRED = 1
OPTIONAL = 2
REPEATED = 3
FORBIDDEN = 4
class Grammar(object):
def __init__(self, schema):
schema = schema.get_first("schema")
self.datatypes = {
"boolean": re.compile("^(yes|no|true|false|on|off)$"),
# "character" : re.compile("^.$"),
"float": re.compile("^(\\+|-)?[0-9]+(\.[0-9]*)?$"),
"integer": re.compile("^(\\+|-)?[0-9]+$"),
"string": re.compile(".*"),
"tstring": re.compile(".*"),
}
self.elements = {}
self.categories = collections.defaultdict(list)
for type in schema.get_all_text():
match = parse_match(type.data)
self.datatypes.update({type.name: match})
for element in schema.get_all_subs():
node = Node(element, self.datatypes)
self.elements.update({node.name: node})
for element in [el for el in self.elements.values() if el.parent]:
element.inherit(self.elements[element.parent])
# categories
for element in [el for el in self.elements.values() if el.category]:
self.categories[element.category].append(element)
def get_element(self, name):
return self.elements[name]
def get_datatype(self, name):
return self.datatypes[name]
def get_category(self, name):
return self.categories.get(name, [])
class Node(object):
def __init__(self, schema, datatypes):
self.name = schema.name
self.elements = set([])
self.ext_elements = [] # Ugh, do we really want to do this?
self.attributes = set()
self.parent = None
self.description = None
self.category = None
for item in schema.get_all_text():
if item.name[0] == '_':
self.elements.add(Element(item))
else:
self.attributes.add(Attribute(item, datatypes))
for item in schema.get_all_subs():
if item.name == "element":
print "[element] found in schema, not parsing yet"
# self.ext_elements...
elif item.name == "description":
self.description = item.get_text("text")
self.category = item.get_text("category")
else:
raise Exception("Unknown element [%s] encountered in grammar for [%s]" % (item.name, self.name))
if ':' in self.name:
self.name, self.parent = self.name.split(':', 1)
def inherit(self, other):
assert self.parent == other.name
self.elements.update(other.elements)
self.attributes.update(other.attributes)
self.parent = None
def get_attributes(self):
return self.attributes
def get_elements(self):
return self.elements
class Element(object):
def __init__(self, schema):
first, second = schema.data.split(" ", 1)
self.name = schema.name[1:]
self.freq = parse_frequency(first)
self.subname = second
def match(self, name):
return self.name == name
def __hash__(self):
return hash(self.name)
def __cmp__(self, other):
return (isinstance(other, type(self)) or isinstance(self, type(other))) and cmp(self.name, other.name)
class ExtElement(Element):
def __init__(self, schema):
self.re = parse_match(schema.get_text("match").data)
self.freq = parse_frequency(schema.get_text("freq").data)
self.subname = schema.get_text("name").data
def match(self, name):
return bool(self.re.match(name))
class Attribute(object):
def __init__(self, schema, datatypes):
first, second = schema.data.split(" ", 1)
if second not in datatypes:
raise Exception("Unknown datatype '%s'" % second)
self.name = schema.name
self.freq = parse_frequency(first)
self.type = second
self.re = datatypes[second]
def match(self, name):
return self.name == name
def validate(self, value):
return bool(self.re.match(value))
def __hash__(self):
return hash(self.name)
def __cmp__(self, other):
return (isinstance(other, type(self)) or isinstance(self, type(other))) and cmp(self.name, other.name)
def parse_frequency(string):
if string == "required":
return REQUIRED
elif string == "optional":
return OPTIONAL
elif string == "repeated":
return REPEATED
elif string == "forbidden":
return FORBIDDEN
else:
raise Exception("Unknown frequency '%s'" % string)
def parse_match(string):
(matchtype, matchtext) = string.split(" ", 1)
if matchtype == "re":
match = re.compile(matchtext)
elif matchtype == "enum":
match = re.compile("^(" + matchtext.replace(',', '|') + ")$")
else:
raise Exception("Unknown datatype encountered in %s=\"%s\": '%s'" % (type.name, type.data, matchtype))
return match
# vim: tabstop=4: shiftwidth=4: expandtab: softtabstop=4: autoindent: