|
1 """An implementation of the Zephyr Abstract Syntax Definition Language. |
|
2 |
|
3 See http://asdl.sourceforge.net/ and |
|
4 http://www.cs.princeton.edu/~danwang/Papers/dsl97/dsl97-abstract.html. |
|
5 |
|
6 Only supports top level module decl, not view. I'm guessing that view |
|
7 is intended to support the browser and I'm not interested in the |
|
8 browser. |
|
9 |
|
10 Changes for Python: Add support for module versions |
|
11 """ |
|
12 |
|
13 #__metaclass__ = type |
|
14 |
|
15 import os |
|
16 import traceback |
|
17 |
|
18 import spark |
|
19 |
|
20 class Token: |
|
21 # spark seems to dispatch in the parser based on a token's |
|
22 # type attribute |
|
23 def __init__(self, type, lineno): |
|
24 self.type = type |
|
25 self.lineno = lineno |
|
26 |
|
27 def __str__(self): |
|
28 return self.type |
|
29 |
|
30 def __repr__(self): |
|
31 return str(self) |
|
32 |
|
33 class Id(Token): |
|
34 def __init__(self, value, lineno): |
|
35 self.type = 'Id' |
|
36 self.value = value |
|
37 self.lineno = lineno |
|
38 |
|
39 def __str__(self): |
|
40 return self.value |
|
41 |
|
42 class String(Token): |
|
43 def __init__(self, value, lineno): |
|
44 self.type = 'String' |
|
45 self.value = value |
|
46 self.lineno = lineno |
|
47 |
|
48 class ASDLSyntaxError: |
|
49 |
|
50 def __init__(self, lineno, token=None, msg=None): |
|
51 self.lineno = lineno |
|
52 self.token = token |
|
53 self.msg = msg |
|
54 |
|
55 def __str__(self): |
|
56 if self.msg is None: |
|
57 return "Error at '%s', line %d" % (self.token, self.lineno) |
|
58 else: |
|
59 return "%s, line %d" % (self.msg, self.lineno) |
|
60 |
|
61 class ASDLScanner(spark.GenericScanner, object): |
|
62 |
|
63 def tokenize(self, input): |
|
64 self.rv = [] |
|
65 self.lineno = 1 |
|
66 super(ASDLScanner, self).tokenize(input) |
|
67 return self.rv |
|
68 |
|
69 def t_id(self, s): |
|
70 r"[\w\.]+" |
|
71 # XXX doesn't distinguish upper vs. lower, which is |
|
72 # significant for ASDL. |
|
73 self.rv.append(Id(s, self.lineno)) |
|
74 |
|
75 def t_string(self, s): |
|
76 r'"[^"]*"' |
|
77 self.rv.append(String(s, self.lineno)) |
|
78 |
|
79 def t_xxx(self, s): # not sure what this production means |
|
80 r"<=" |
|
81 self.rv.append(Token(s, self.lineno)) |
|
82 |
|
83 def t_punctuation(self, s): |
|
84 r"[\{\}\*\=\|\(\)\,\?\:]" |
|
85 self.rv.append(Token(s, self.lineno)) |
|
86 |
|
87 def t_comment(self, s): |
|
88 r"\-\-[^\n]*" |
|
89 pass |
|
90 |
|
91 def t_newline(self, s): |
|
92 r"\n" |
|
93 self.lineno += 1 |
|
94 |
|
95 def t_whitespace(self, s): |
|
96 r"[ \t]+" |
|
97 pass |
|
98 |
|
99 def t_default(self, s): |
|
100 r" . +" |
|
101 raise ValueError, "unmatched input: %s" % `s` |
|
102 |
|
103 class ASDLParser(spark.GenericParser, object): |
|
104 def __init__(self): |
|
105 super(ASDLParser, self).__init__("module") |
|
106 |
|
107 def typestring(self, tok): |
|
108 return tok.type |
|
109 |
|
110 def error(self, tok): |
|
111 raise ASDLSyntaxError(tok.lineno, tok) |
|
112 |
|
113 def p_module_0(self, (module, name, version, _0, _1)): |
|
114 " module ::= Id Id version { } " |
|
115 if module.value != "module": |
|
116 raise ASDLSyntaxError(module.lineno, |
|
117 msg="expected 'module', found %s" % module) |
|
118 return Module(name, None, version) |
|
119 |
|
120 def p_module(self, (module, name, version, _0, definitions, _1)): |
|
121 " module ::= Id Id version { definitions } " |
|
122 if module.value != "module": |
|
123 raise ASDLSyntaxError(module.lineno, |
|
124 msg="expected 'module', found %s" % module) |
|
125 return Module(name, definitions, version) |
|
126 |
|
127 def p_version(self, (version, V)): |
|
128 "version ::= Id String" |
|
129 if version.value != "version": |
|
130 raise ASDLSyntaxError(version.lineno, |
|
131 msg="expected 'version', found %" % version) |
|
132 return V |
|
133 |
|
134 def p_definition_0(self, (definition,)): |
|
135 " definitions ::= definition " |
|
136 return definition |
|
137 |
|
138 def p_definition_1(self, (definitions, definition)): |
|
139 " definitions ::= definition definitions " |
|
140 return definitions + definition |
|
141 |
|
142 def p_definition(self, (id, _, type)): |
|
143 " definition ::= Id = type " |
|
144 return [Type(id, type)] |
|
145 |
|
146 def p_type_0(self, (product,)): |
|
147 " type ::= product " |
|
148 return product |
|
149 |
|
150 def p_type_1(self, (sum,)): |
|
151 " type ::= sum " |
|
152 return Sum(sum) |
|
153 |
|
154 def p_type_2(self, (sum, id, _0, attributes, _1)): |
|
155 " type ::= sum Id ( fields ) " |
|
156 if id.value != "attributes": |
|
157 raise ASDLSyntaxError(id.lineno, |
|
158 msg="expected attributes, found %s" % id) |
|
159 if attributes: |
|
160 attributes.reverse() |
|
161 return Sum(sum, attributes) |
|
162 |
|
163 def p_product(self, (_0, fields, _1)): |
|
164 " product ::= ( fields ) " |
|
165 # XXX can't I just construct things in the right order? |
|
166 fields.reverse() |
|
167 return Product(fields) |
|
168 |
|
169 def p_sum_0(self, (constructor,)): |
|
170 " sum ::= constructor """ |
|
171 return [constructor] |
|
172 |
|
173 def p_sum_1(self, (constructor, _, sum)): |
|
174 " sum ::= constructor | sum " |
|
175 return [constructor] + sum |
|
176 |
|
177 def p_sum_2(self, (constructor, _, sum)): |
|
178 " sum ::= constructor | sum " |
|
179 return [constructor] + sum |
|
180 |
|
181 def p_constructor_0(self, (id,)): |
|
182 " constructor ::= Id " |
|
183 return Constructor(id) |
|
184 |
|
185 def p_constructor_1(self, (id, _0, fields, _1)): |
|
186 " constructor ::= Id ( fields ) " |
|
187 # XXX can't I just construct things in the right order? |
|
188 fields.reverse() |
|
189 return Constructor(id, fields) |
|
190 |
|
191 def p_fields_0(self, (field,)): |
|
192 " fields ::= field " |
|
193 return [field] |
|
194 |
|
195 def p_fields_1(self, (field, _, fields)): |
|
196 " fields ::= field , fields " |
|
197 return fields + [field] |
|
198 |
|
199 def p_field_0(self, (type,)): |
|
200 " field ::= Id " |
|
201 return Field(type) |
|
202 |
|
203 def p_field_1(self, (type, name)): |
|
204 " field ::= Id Id " |
|
205 return Field(type, name) |
|
206 |
|
207 def p_field_2(self, (type, _, name)): |
|
208 " field ::= Id * Id " |
|
209 return Field(type, name, seq=1) |
|
210 |
|
211 def p_field_3(self, (type, _, name)): |
|
212 " field ::= Id ? Id " |
|
213 return Field(type, name, opt=1) |
|
214 |
|
215 def p_field_4(self, (type, _)): |
|
216 " field ::= Id * " |
|
217 return Field(type, seq=1) |
|
218 |
|
219 def p_field_5(self, (type, _)): |
|
220 " field ::= Id ? " |
|
221 return Field(type, opt=1) |
|
222 |
|
223 builtin_types = ("identifier", "string", "int", "bool", "object") |
|
224 |
|
225 # below is a collection of classes to capture the AST of an AST :-) |
|
226 # not sure if any of the methods are useful yet, but I'm adding them |
|
227 # piecemeal as they seem helpful |
|
228 |
|
229 class AST: |
|
230 pass # a marker class |
|
231 |
|
232 class Module(AST): |
|
233 def __init__(self, name, dfns, version): |
|
234 self.name = name |
|
235 self.dfns = dfns |
|
236 self.version = version |
|
237 self.types = {} # maps type name to value (from dfns) |
|
238 for type in dfns: |
|
239 self.types[type.name.value] = type.value |
|
240 |
|
241 def __repr__(self): |
|
242 return "Module(%s, %s)" % (self.name, self.dfns) |
|
243 |
|
244 class Type(AST): |
|
245 def __init__(self, name, value): |
|
246 self.name = name |
|
247 self.value = value |
|
248 |
|
249 def __repr__(self): |
|
250 return "Type(%s, %s)" % (self.name, self.value) |
|
251 |
|
252 class Constructor(AST): |
|
253 def __init__(self, name, fields=None): |
|
254 self.name = name |
|
255 self.fields = fields or [] |
|
256 |
|
257 def __repr__(self): |
|
258 return "Constructor(%s, %s)" % (self.name, self.fields) |
|
259 |
|
260 class Field(AST): |
|
261 def __init__(self, type, name=None, seq=0, opt=0): |
|
262 self.type = type |
|
263 self.name = name |
|
264 self.seq = seq |
|
265 self.opt = opt |
|
266 |
|
267 def __repr__(self): |
|
268 if self.seq: |
|
269 extra = ", seq=1" |
|
270 elif self.opt: |
|
271 extra = ", opt=1" |
|
272 else: |
|
273 extra = "" |
|
274 if self.name is None: |
|
275 return "Field(%s%s)" % (self.type, extra) |
|
276 else: |
|
277 return "Field(%s, %s%s)" % (self.type, self.name, extra) |
|
278 |
|
279 class Sum(AST): |
|
280 def __init__(self, types, attributes=None): |
|
281 self.types = types |
|
282 self.attributes = attributes or [] |
|
283 |
|
284 def __repr__(self): |
|
285 if self.attributes is None: |
|
286 return "Sum(%s)" % self.types |
|
287 else: |
|
288 return "Sum(%s, %s)" % (self.types, self.attributes) |
|
289 |
|
290 class Product(AST): |
|
291 def __init__(self, fields): |
|
292 self.fields = fields |
|
293 |
|
294 def __repr__(self): |
|
295 return "Product(%s)" % self.fields |
|
296 |
|
297 class VisitorBase(object): |
|
298 |
|
299 def __init__(self, skip=0): |
|
300 self.cache = {} |
|
301 self.skip = skip |
|
302 |
|
303 def visit(self, object, *args): |
|
304 meth = self._dispatch(object) |
|
305 if meth is None: |
|
306 return |
|
307 try: |
|
308 meth(object, *args) |
|
309 except Exception, err: |
|
310 print "Error visiting", repr(object) |
|
311 print err |
|
312 traceback.print_exc() |
|
313 # XXX hack |
|
314 if hasattr(self, 'file'): |
|
315 self.file.flush() |
|
316 os._exit(1) |
|
317 |
|
318 def _dispatch(self, object): |
|
319 assert isinstance(object, AST), repr(object) |
|
320 klass = object.__class__ |
|
321 meth = self.cache.get(klass) |
|
322 if meth is None: |
|
323 methname = "visit" + klass.__name__ |
|
324 if self.skip: |
|
325 meth = getattr(self, methname, None) |
|
326 else: |
|
327 meth = getattr(self, methname) |
|
328 self.cache[klass] = meth |
|
329 return meth |
|
330 |
|
331 class Check(VisitorBase): |
|
332 |
|
333 def __init__(self): |
|
334 super(Check, self).__init__(skip=1) |
|
335 self.cons = {} |
|
336 self.errors = 0 |
|
337 self.types = {} |
|
338 |
|
339 def visitModule(self, mod): |
|
340 for dfn in mod.dfns: |
|
341 self.visit(dfn) |
|
342 |
|
343 def visitType(self, type): |
|
344 self.visit(type.value, str(type.name)) |
|
345 |
|
346 def visitSum(self, sum, name): |
|
347 for t in sum.types: |
|
348 self.visit(t, name) |
|
349 |
|
350 def visitConstructor(self, cons, name): |
|
351 key = str(cons.name) |
|
352 conflict = self.cons.get(key) |
|
353 if conflict is None: |
|
354 self.cons[key] = name |
|
355 else: |
|
356 print "Redefinition of constructor %s" % key |
|
357 print "Defined in %s and %s" % (conflict, name) |
|
358 self.errors += 1 |
|
359 for f in cons.fields: |
|
360 self.visit(f, key) |
|
361 |
|
362 def visitField(self, field, name): |
|
363 key = str(field.type) |
|
364 l = self.types.setdefault(key, []) |
|
365 l.append(name) |
|
366 |
|
367 def visitProduct(self, prod, name): |
|
368 for f in prod.fields: |
|
369 self.visit(f, name) |
|
370 |
|
371 def check(mod): |
|
372 v = Check() |
|
373 v.visit(mod) |
|
374 |
|
375 for t in v.types: |
|
376 if not mod.types.has_key(t) and not t in builtin_types: |
|
377 v.errors += 1 |
|
378 uses = ", ".join(v.types[t]) |
|
379 print "Undefined type %s, used in %s" % (t, uses) |
|
380 |
|
381 return not v.errors |
|
382 |
|
383 def parse(file): |
|
384 scanner = ASDLScanner() |
|
385 parser = ASDLParser() |
|
386 |
|
387 buf = open(file).read() |
|
388 tokens = scanner.tokenize(buf) |
|
389 try: |
|
390 return parser.parse(tokens) |
|
391 except ASDLSyntaxError, err: |
|
392 print err |
|
393 lines = buf.split("\n") |
|
394 print lines[err.lineno - 1] # lines starts at 0, files at 1 |
|
395 |
|
396 if __name__ == "__main__": |
|
397 import glob |
|
398 import sys |
|
399 |
|
400 if len(sys.argv) > 1: |
|
401 files = sys.argv[1:] |
|
402 else: |
|
403 testdir = "tests" |
|
404 files = glob.glob(testdir + "/*.asdl") |
|
405 |
|
406 for file in files: |
|
407 print file |
|
408 mod = parse(file) |
|
409 print "module", mod.name |
|
410 print len(mod.dfns), "definitions" |
|
411 if not check(mod): |
|
412 print "Check failed" |
|
413 else: |
|
414 for dfn in mod.dfns: |
|
415 print dfn.type |