|
1 # Module 'parser' |
|
2 # |
|
3 # Parse S-expressions output by the Panel Editor |
|
4 # (which is written in Scheme so it can't help writing S-expressions). |
|
5 # |
|
6 # See notes at end of file. |
|
7 from warnings import warnpy3k |
|
8 warnpy3k("the panelparser module has been removed in Python 3.0", stacklevel=2) |
|
9 del warnpy3k |
|
10 |
|
11 |
|
12 whitespace = ' \t\n' |
|
13 operators = '()\'' |
|
14 separators = operators + whitespace + ';' + '"' |
|
15 |
|
16 |
|
17 # Tokenize a string. |
|
18 # Return a list of tokens (strings). |
|
19 # |
|
20 def tokenize_string(s): |
|
21 tokens = [] |
|
22 while s: |
|
23 c = s[:1] |
|
24 if c in whitespace: |
|
25 s = s[1:] |
|
26 elif c == ';': |
|
27 s = '' |
|
28 elif c == '"': |
|
29 n = len(s) |
|
30 i = 1 |
|
31 while i < n: |
|
32 c = s[i] |
|
33 i = i+1 |
|
34 if c == '"': break |
|
35 if c == '\\': i = i+1 |
|
36 tokens.append(s[:i]) |
|
37 s = s[i:] |
|
38 elif c in operators: |
|
39 tokens.append(c) |
|
40 s = s[1:] |
|
41 else: |
|
42 n = len(s) |
|
43 i = 1 |
|
44 while i < n: |
|
45 if s[i] in separators: break |
|
46 i = i+1 |
|
47 tokens.append(s[:i]) |
|
48 s = s[i:] |
|
49 return tokens |
|
50 |
|
51 |
|
52 # Tokenize a whole file (given as file object, not as file name). |
|
53 # Return a list of tokens (strings). |
|
54 # |
|
55 def tokenize_file(fp): |
|
56 tokens = [] |
|
57 while 1: |
|
58 line = fp.readline() |
|
59 if not line: break |
|
60 tokens = tokens + tokenize_string(line) |
|
61 return tokens |
|
62 |
|
63 |
|
64 # Exception raised by parse_exr. |
|
65 # |
|
66 syntax_error = 'syntax error' |
|
67 |
|
68 |
|
69 # Parse an S-expression. |
|
70 # Input is a list of tokens as returned by tokenize_*(). |
|
71 # Return a pair (expr, tokens) |
|
72 # where expr is a list representing the s-expression, |
|
73 # and tokens contains the remaining tokens. |
|
74 # May raise syntax_error. |
|
75 # |
|
76 def parse_expr(tokens): |
|
77 if (not tokens) or tokens[0] != '(': |
|
78 raise syntax_error, 'expected "("' |
|
79 tokens = tokens[1:] |
|
80 expr = [] |
|
81 while 1: |
|
82 if not tokens: |
|
83 raise syntax_error, 'missing ")"' |
|
84 if tokens[0] == ')': |
|
85 return expr, tokens[1:] |
|
86 elif tokens[0] == '(': |
|
87 subexpr, tokens = parse_expr(tokens) |
|
88 expr.append(subexpr) |
|
89 else: |
|
90 expr.append(tokens[0]) |
|
91 tokens = tokens[1:] |
|
92 |
|
93 |
|
94 # Parse a file (given as file object, not as file name). |
|
95 # Return a list of parsed S-expressions found at the top level. |
|
96 # |
|
97 def parse_file(fp): |
|
98 tokens = tokenize_file(fp) |
|
99 exprlist = [] |
|
100 while tokens: |
|
101 expr, tokens = parse_expr(tokens) |
|
102 exprlist.append(expr) |
|
103 return exprlist |
|
104 |
|
105 |
|
106 # EXAMPLE: |
|
107 # |
|
108 # The input |
|
109 # '(hip (hop hur-ray))' |
|
110 # |
|
111 # passed to tokenize_string() returns the token list |
|
112 # ['(', 'hip', '(', 'hop', 'hur-ray', ')', ')'] |
|
113 # |
|
114 # When this is passed to parse_expr() it returns the expression |
|
115 # ['hip', ['hop', 'hur-ray']] |
|
116 # plus an empty token list (because there are no tokens left. |
|
117 # |
|
118 # When a file containing the example is passed to parse_file() it returns |
|
119 # a list whose only element is the output of parse_expr() above: |
|
120 # [['hip', ['hop', 'hur-ray']]] |
|
121 |
|
122 |
|
123 # TOKENIZING: |
|
124 # |
|
125 # Comments start with semicolon (;) and continue till the end of the line. |
|
126 # |
|
127 # Tokens are separated by whitespace, except the following characters |
|
128 # always form a separate token (outside strings): |
|
129 # ( ) ' |
|
130 # Strings are enclosed in double quotes (") and backslash (\) is used |
|
131 # as escape character in strings. |