|
1 """Parse a Python module and describe its classes and methods. |
|
2 |
|
3 Parse enough of a Python file to recognize imports and class and |
|
4 method definitions, and to find out the superclasses of a class. |
|
5 |
|
6 The interface consists of a single function: |
|
7 readmodule_ex(module [, path]) |
|
8 where module is the name of a Python module, and path is an optional |
|
9 list of directories where the module is to be searched. If present, |
|
10 path is prepended to the system search path sys.path. The return |
|
11 value is a dictionary. The keys of the dictionary are the names of |
|
12 the classes defined in the module (including classes that are defined |
|
13 via the from XXX import YYY construct). The values are class |
|
14 instances of the class Class defined here. One special key/value pair |
|
15 is present for packages: the key '__path__' has a list as its value |
|
16 which contains the package search path. |
|
17 |
|
18 A class is described by the class Class in this module. Instances |
|
19 of this class have the following instance variables: |
|
20 module -- the module name |
|
21 name -- the name of the class |
|
22 super -- a list of super classes (Class instances) |
|
23 methods -- a dictionary of methods |
|
24 file -- the file in which the class was defined |
|
25 lineno -- the line in the file on which the class statement occurred |
|
26 The dictionary of methods uses the method names as keys and the line |
|
27 numbers on which the method was defined as values. |
|
28 If the name of a super class is not recognized, the corresponding |
|
29 entry in the list of super classes is not a class instance but a |
|
30 string giving the name of the super class. Since import statements |
|
31 are recognized and imported modules are scanned as well, this |
|
32 shouldn't happen often. |
|
33 |
|
34 A function is described by the class Function in this module. |
|
35 Instances of this class have the following instance variables: |
|
36 module -- the module name |
|
37 name -- the name of the class |
|
38 file -- the file in which the class was defined |
|
39 lineno -- the line in the file on which the class statement occurred |
|
40 """ |
|
41 |
|
42 import sys |
|
43 import imp |
|
44 import tokenize |
|
45 from token import NAME, DEDENT, OP |
|
46 from operator import itemgetter |
|
47 |
|
48 __all__ = ["readmodule", "readmodule_ex", "Class", "Function"] |
|
49 |
|
50 _modules = {} # cache of modules we've seen |
|
51 |
|
52 # each Python class is represented by an instance of this class |
|
53 class Class: |
|
54 '''Class to represent a Python class.''' |
|
55 def __init__(self, module, name, super, file, lineno): |
|
56 self.module = module |
|
57 self.name = name |
|
58 if super is None: |
|
59 super = [] |
|
60 self.super = super |
|
61 self.methods = {} |
|
62 self.file = file |
|
63 self.lineno = lineno |
|
64 |
|
65 def _addmethod(self, name, lineno): |
|
66 self.methods[name] = lineno |
|
67 |
|
68 class Function: |
|
69 '''Class to represent a top-level Python function''' |
|
70 def __init__(self, module, name, file, lineno): |
|
71 self.module = module |
|
72 self.name = name |
|
73 self.file = file |
|
74 self.lineno = lineno |
|
75 |
|
76 def readmodule(module, path=None): |
|
77 '''Backwards compatible interface. |
|
78 |
|
79 Call readmodule_ex() and then only keep Class objects from the |
|
80 resulting dictionary.''' |
|
81 |
|
82 res = {} |
|
83 for key, value in _readmodule(module, path or []).items(): |
|
84 if isinstance(value, Class): |
|
85 res[key] = value |
|
86 return res |
|
87 |
|
88 def readmodule_ex(module, path=None): |
|
89 '''Read a module file and return a dictionary of classes. |
|
90 |
|
91 Search for MODULE in PATH and sys.path, read and parse the |
|
92 module and return a dictionary with one entry for each class |
|
93 found in the module. |
|
94 ''' |
|
95 return _readmodule(module, path or []) |
|
96 |
|
97 def _readmodule(module, path, inpackage=None): |
|
98 '''Do the hard work for readmodule[_ex]. |
|
99 |
|
100 If INPACKAGE is given, it must be the dotted name of the package in |
|
101 which we are searching for a submodule, and then PATH must be the |
|
102 package search path; otherwise, we are searching for a top-level |
|
103 module, and PATH is combined with sys.path. |
|
104 ''' |
|
105 # Compute the full module name (prepending inpackage if set) |
|
106 if inpackage is not None: |
|
107 fullmodule = "%s.%s" % (inpackage, module) |
|
108 else: |
|
109 fullmodule = module |
|
110 |
|
111 # Check in the cache |
|
112 if fullmodule in _modules: |
|
113 return _modules[fullmodule] |
|
114 |
|
115 # Initialize the dict for this module's contents |
|
116 dict = {} |
|
117 |
|
118 # Check if it is a built-in module; we don't do much for these |
|
119 if module in sys.builtin_module_names and inpackage is None: |
|
120 _modules[module] = dict |
|
121 return dict |
|
122 |
|
123 # Check for a dotted module name |
|
124 i = module.rfind('.') |
|
125 if i >= 0: |
|
126 package = module[:i] |
|
127 submodule = module[i+1:] |
|
128 parent = _readmodule(package, path, inpackage) |
|
129 if inpackage is not None: |
|
130 package = "%s.%s" % (inpackage, package) |
|
131 return _readmodule(submodule, parent['__path__'], package) |
|
132 |
|
133 # Search the path for the module |
|
134 f = None |
|
135 if inpackage is not None: |
|
136 f, fname, (_s, _m, ty) = imp.find_module(module, path) |
|
137 else: |
|
138 f, fname, (_s, _m, ty) = imp.find_module(module, path + sys.path) |
|
139 if ty == imp.PKG_DIRECTORY: |
|
140 dict['__path__'] = [fname] |
|
141 path = [fname] + path |
|
142 f, fname, (_s, _m, ty) = imp.find_module('__init__', [fname]) |
|
143 _modules[fullmodule] = dict |
|
144 if ty != imp.PY_SOURCE: |
|
145 # not Python source, can't do anything with this module |
|
146 f.close() |
|
147 return dict |
|
148 |
|
149 stack = [] # stack of (class, indent) pairs |
|
150 |
|
151 g = tokenize.generate_tokens(f.readline) |
|
152 try: |
|
153 for tokentype, token, start, _end, _line in g: |
|
154 if tokentype == DEDENT: |
|
155 lineno, thisindent = start |
|
156 # close nested classes and defs |
|
157 while stack and stack[-1][1] >= thisindent: |
|
158 del stack[-1] |
|
159 elif token == 'def': |
|
160 lineno, thisindent = start |
|
161 # close previous nested classes and defs |
|
162 while stack and stack[-1][1] >= thisindent: |
|
163 del stack[-1] |
|
164 tokentype, meth_name, start = g.next()[0:3] |
|
165 if tokentype != NAME: |
|
166 continue # Syntax error |
|
167 if stack: |
|
168 cur_class = stack[-1][0] |
|
169 if isinstance(cur_class, Class): |
|
170 # it's a method |
|
171 cur_class._addmethod(meth_name, lineno) |
|
172 # else it's a nested def |
|
173 else: |
|
174 # it's a function |
|
175 dict[meth_name] = Function(fullmodule, meth_name, |
|
176 fname, lineno) |
|
177 stack.append((None, thisindent)) # Marker for nested fns |
|
178 elif token == 'class': |
|
179 lineno, thisindent = start |
|
180 # close previous nested classes and defs |
|
181 while stack and stack[-1][1] >= thisindent: |
|
182 del stack[-1] |
|
183 tokentype, class_name, start = g.next()[0:3] |
|
184 if tokentype != NAME: |
|
185 continue # Syntax error |
|
186 # parse what follows the class name |
|
187 tokentype, token, start = g.next()[0:3] |
|
188 inherit = None |
|
189 if token == '(': |
|
190 names = [] # List of superclasses |
|
191 # there's a list of superclasses |
|
192 level = 1 |
|
193 super = [] # Tokens making up current superclass |
|
194 while True: |
|
195 tokentype, token, start = g.next()[0:3] |
|
196 if token in (')', ',') and level == 1: |
|
197 n = "".join(super) |
|
198 if n in dict: |
|
199 # we know this super class |
|
200 n = dict[n] |
|
201 else: |
|
202 c = n.split('.') |
|
203 if len(c) > 1: |
|
204 # super class is of the form |
|
205 # module.class: look in module for |
|
206 # class |
|
207 m = c[-2] |
|
208 c = c[-1] |
|
209 if m in _modules: |
|
210 d = _modules[m] |
|
211 if c in d: |
|
212 n = d[c] |
|
213 names.append(n) |
|
214 super = [] |
|
215 if token == '(': |
|
216 level += 1 |
|
217 elif token == ')': |
|
218 level -= 1 |
|
219 if level == 0: |
|
220 break |
|
221 elif token == ',' and level == 1: |
|
222 pass |
|
223 # only use NAME and OP (== dot) tokens for type name |
|
224 elif tokentype in (NAME, OP) and level == 1: |
|
225 super.append(token) |
|
226 # expressions in the base list are not supported |
|
227 inherit = names |
|
228 cur_class = Class(fullmodule, class_name, inherit, |
|
229 fname, lineno) |
|
230 if not stack: |
|
231 dict[class_name] = cur_class |
|
232 stack.append((cur_class, thisindent)) |
|
233 elif token == 'import' and start[1] == 0: |
|
234 modules = _getnamelist(g) |
|
235 for mod, _mod2 in modules: |
|
236 try: |
|
237 # Recursively read the imported module |
|
238 if inpackage is None: |
|
239 _readmodule(mod, path) |
|
240 else: |
|
241 try: |
|
242 _readmodule(mod, path, inpackage) |
|
243 except ImportError: |
|
244 _readmodule(mod, []) |
|
245 except: |
|
246 # If we can't find or parse the imported module, |
|
247 # too bad -- don't die here. |
|
248 pass |
|
249 elif token == 'from' and start[1] == 0: |
|
250 mod, token = _getname(g) |
|
251 if not mod or token != "import": |
|
252 continue |
|
253 names = _getnamelist(g) |
|
254 try: |
|
255 # Recursively read the imported module |
|
256 d = _readmodule(mod, path, inpackage) |
|
257 except: |
|
258 # If we can't find or parse the imported module, |
|
259 # too bad -- don't die here. |
|
260 continue |
|
261 # add any classes that were defined in the imported module |
|
262 # to our name space if they were mentioned in the list |
|
263 for n, n2 in names: |
|
264 if n in d: |
|
265 dict[n2 or n] = d[n] |
|
266 elif n == '*': |
|
267 # don't add names that start with _ |
|
268 for n in d: |
|
269 if n[0] != '_': |
|
270 dict[n] = d[n] |
|
271 except StopIteration: |
|
272 pass |
|
273 |
|
274 f.close() |
|
275 return dict |
|
276 |
|
277 def _getnamelist(g): |
|
278 # Helper to get a comma-separated list of dotted names plus 'as' |
|
279 # clauses. Return a list of pairs (name, name2) where name2 is |
|
280 # the 'as' name, or None if there is no 'as' clause. |
|
281 names = [] |
|
282 while True: |
|
283 name, token = _getname(g) |
|
284 if not name: |
|
285 break |
|
286 if token == 'as': |
|
287 name2, token = _getname(g) |
|
288 else: |
|
289 name2 = None |
|
290 names.append((name, name2)) |
|
291 while token != "," and "\n" not in token: |
|
292 token = g.next()[1] |
|
293 if token != ",": |
|
294 break |
|
295 return names |
|
296 |
|
297 def _getname(g): |
|
298 # Helper to get a dotted name, return a pair (name, token) where |
|
299 # name is the dotted name, or None if there was no dotted name, |
|
300 # and token is the next input token. |
|
301 parts = [] |
|
302 tokentype, token = g.next()[0:2] |
|
303 if tokentype != NAME and token != '*': |
|
304 return (None, token) |
|
305 parts.append(token) |
|
306 while True: |
|
307 tokentype, token = g.next()[0:2] |
|
308 if token != '.': |
|
309 break |
|
310 tokentype, token = g.next()[0:2] |
|
311 if tokentype != NAME: |
|
312 break |
|
313 parts.append(token) |
|
314 return (".".join(parts), token) |
|
315 |
|
316 def _main(): |
|
317 # Main program for testing. |
|
318 import os |
|
319 mod = sys.argv[1] |
|
320 if os.path.exists(mod): |
|
321 path = [os.path.dirname(mod)] |
|
322 mod = os.path.basename(mod) |
|
323 if mod.lower().endswith(".py"): |
|
324 mod = mod[:-3] |
|
325 else: |
|
326 path = [] |
|
327 dict = readmodule_ex(mod, path) |
|
328 objs = dict.values() |
|
329 objs.sort(lambda a, b: cmp(getattr(a, 'lineno', 0), |
|
330 getattr(b, 'lineno', 0))) |
|
331 for obj in objs: |
|
332 if isinstance(obj, Class): |
|
333 print "class", obj.name, obj.super, obj.lineno |
|
334 methods = sorted(obj.methods.iteritems(), key=itemgetter(1)) |
|
335 for name, lineno in methods: |
|
336 if name != "__path__": |
|
337 print " def", name, lineno |
|
338 elif isinstance(obj, Function): |
|
339 print "def", obj.name, obj.lineno |
|
340 |
|
341 if __name__ == "__main__": |
|
342 _main() |