symbian-qemu-0.9.1-12/python-win32-2.6.1/lib/lib2to3/pgen2/conv.py
changeset 1 2fb8b9db1c86
equal deleted inserted replaced
0:ffa851df0825 1:2fb8b9db1c86
       
     1 # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
       
     2 # Licensed to PSF under a Contributor Agreement.
       
     3 
       
     4 """Convert graminit.[ch] spit out by pgen to Python code.
       
     5 
       
     6 Pgen is the Python parser generator.  It is useful to quickly create a
       
     7 parser from a grammar file in Python's grammar notation.  But I don't
       
     8 want my parsers to be written in C (yet), so I'm translating the
       
     9 parsing tables to Python data structures and writing a Python parse
       
    10 engine.
       
    11 
       
    12 Note that the token numbers are constants determined by the standard
       
    13 Python tokenizer.  The standard token module defines these numbers and
       
    14 their names (the names are not used much).  The token numbers are
       
    15 hardcoded into the Python tokenizer and into pgen.  A Python
       
    16 implementation of the Python tokenizer is also available, in the
       
    17 standard tokenize module.
       
    18 
       
    19 On the other hand, symbol numbers (representing the grammar's
       
    20 non-terminals) are assigned by pgen based on the actual grammar
       
    21 input.
       
    22 
       
    23 Note: this module is pretty much obsolete; the pgen module generates
       
    24 equivalent grammar tables directly from the Grammar.txt input file
       
    25 without having to invoke the Python pgen C program.
       
    26 
       
    27 """
       
    28 
       
    29 # Python imports
       
    30 import re
       
    31 
       
    32 # Local imports
       
    33 from pgen2 import grammar, token
       
    34 
       
    35 
       
    36 class Converter(grammar.Grammar):
       
    37     """Grammar subclass that reads classic pgen output files.
       
    38 
       
    39     The run() method reads the tables as produced by the pgen parser
       
    40     generator, typically contained in two C files, graminit.h and
       
    41     graminit.c.  The other methods are for internal use only.
       
    42 
       
    43     See the base class for more documentation.
       
    44 
       
    45     """
       
    46 
       
    47     def run(self, graminit_h, graminit_c):
       
    48         """Load the grammar tables from the text files written by pgen."""
       
    49         self.parse_graminit_h(graminit_h)
       
    50         self.parse_graminit_c(graminit_c)
       
    51         self.finish_off()
       
    52 
       
    53     def parse_graminit_h(self, filename):
       
    54         """Parse the .h file writen by pgen.  (Internal)
       
    55 
       
    56         This file is a sequence of #define statements defining the
       
    57         nonterminals of the grammar as numbers.  We build two tables
       
    58         mapping the numbers to names and back.
       
    59 
       
    60         """
       
    61         try:
       
    62             f = open(filename)
       
    63         except IOError, err:
       
    64             print "Can't open %s: %s" % (filename, err)
       
    65             return False
       
    66         self.symbol2number = {}
       
    67         self.number2symbol = {}
       
    68         lineno = 0
       
    69         for line in f:
       
    70             lineno += 1
       
    71             mo = re.match(r"^#define\s+(\w+)\s+(\d+)$", line)
       
    72             if not mo and line.strip():
       
    73                 print "%s(%s): can't parse %s" % (filename, lineno,
       
    74                                                   line.strip())
       
    75             else:
       
    76                 symbol, number = mo.groups()
       
    77                 number = int(number)
       
    78                 assert symbol not in self.symbol2number
       
    79                 assert number not in self.number2symbol
       
    80                 self.symbol2number[symbol] = number
       
    81                 self.number2symbol[number] = symbol
       
    82         return True
       
    83 
       
    84     def parse_graminit_c(self, filename):
       
    85         """Parse the .c file writen by pgen.  (Internal)
       
    86 
       
    87         The file looks as follows.  The first two lines are always this:
       
    88 
       
    89         #include "pgenheaders.h"
       
    90         #include "grammar.h"
       
    91 
       
    92         After that come four blocks:
       
    93 
       
    94         1) one or more state definitions
       
    95         2) a table defining dfas
       
    96         3) a table defining labels
       
    97         4) a struct defining the grammar
       
    98 
       
    99         A state definition has the following form:
       
   100         - one or more arc arrays, each of the form:
       
   101           static arc arcs_<n>_<m>[<k>] = {
       
   102                   {<i>, <j>},
       
   103                   ...
       
   104           };
       
   105         - followed by a state array, of the form:
       
   106           static state states_<s>[<t>] = {
       
   107                   {<k>, arcs_<n>_<m>},
       
   108                   ...
       
   109           };
       
   110 
       
   111         """
       
   112         try:
       
   113             f = open(filename)
       
   114         except IOError, err:
       
   115             print "Can't open %s: %s" % (filename, err)
       
   116             return False
       
   117         # The code below essentially uses f's iterator-ness!
       
   118         lineno = 0
       
   119 
       
   120         # Expect the two #include lines
       
   121         lineno, line = lineno+1, f.next()
       
   122         assert line == '#include "pgenheaders.h"\n', (lineno, line)
       
   123         lineno, line = lineno+1, f.next()
       
   124         assert line == '#include "grammar.h"\n', (lineno, line)
       
   125 
       
   126         # Parse the state definitions
       
   127         lineno, line = lineno+1, f.next()
       
   128         allarcs = {}
       
   129         states = []
       
   130         while line.startswith("static arc "):
       
   131             while line.startswith("static arc "):
       
   132                 mo = re.match(r"static arc arcs_(\d+)_(\d+)\[(\d+)\] = {$",
       
   133                               line)
       
   134                 assert mo, (lineno, line)
       
   135                 n, m, k = map(int, mo.groups())
       
   136                 arcs = []
       
   137                 for _ in range(k):
       
   138                     lineno, line = lineno+1, f.next()
       
   139                     mo = re.match(r"\s+{(\d+), (\d+)},$", line)
       
   140                     assert mo, (lineno, line)
       
   141                     i, j = map(int, mo.groups())
       
   142                     arcs.append((i, j))
       
   143                 lineno, line = lineno+1, f.next()
       
   144                 assert line == "};\n", (lineno, line)
       
   145                 allarcs[(n, m)] = arcs
       
   146                 lineno, line = lineno+1, f.next()
       
   147             mo = re.match(r"static state states_(\d+)\[(\d+)\] = {$", line)
       
   148             assert mo, (lineno, line)
       
   149             s, t = map(int, mo.groups())
       
   150             assert s == len(states), (lineno, line)
       
   151             state = []
       
   152             for _ in range(t):
       
   153                 lineno, line = lineno+1, f.next()
       
   154                 mo = re.match(r"\s+{(\d+), arcs_(\d+)_(\d+)},$", line)
       
   155                 assert mo, (lineno, line)
       
   156                 k, n, m = map(int, mo.groups())
       
   157                 arcs = allarcs[n, m]
       
   158                 assert k == len(arcs), (lineno, line)
       
   159                 state.append(arcs)
       
   160             states.append(state)
       
   161             lineno, line = lineno+1, f.next()
       
   162             assert line == "};\n", (lineno, line)
       
   163             lineno, line = lineno+1, f.next()
       
   164         self.states = states
       
   165 
       
   166         # Parse the dfas
       
   167         dfas = {}
       
   168         mo = re.match(r"static dfa dfas\[(\d+)\] = {$", line)
       
   169         assert mo, (lineno, line)
       
   170         ndfas = int(mo.group(1))
       
   171         for i in range(ndfas):
       
   172             lineno, line = lineno+1, f.next()
       
   173             mo = re.match(r'\s+{(\d+), "(\w+)", (\d+), (\d+), states_(\d+),$',
       
   174                           line)
       
   175             assert mo, (lineno, line)
       
   176             symbol = mo.group(2)
       
   177             number, x, y, z = map(int, mo.group(1, 3, 4, 5))
       
   178             assert self.symbol2number[symbol] == number, (lineno, line)
       
   179             assert self.number2symbol[number] == symbol, (lineno, line)
       
   180             assert x == 0, (lineno, line)
       
   181             state = states[z]
       
   182             assert y == len(state), (lineno, line)
       
   183             lineno, line = lineno+1, f.next()
       
   184             mo = re.match(r'\s+("(?:\\\d\d\d)*")},$', line)
       
   185             assert mo, (lineno, line)
       
   186             first = {}
       
   187             rawbitset = eval(mo.group(1))
       
   188             for i, c in enumerate(rawbitset):
       
   189                 byte = ord(c)
       
   190                 for j in range(8):
       
   191                     if byte & (1<<j):
       
   192                         first[i*8 + j] = 1
       
   193             dfas[number] = (state, first)
       
   194         lineno, line = lineno+1, f.next()
       
   195         assert line == "};\n", (lineno, line)
       
   196         self.dfas = dfas
       
   197 
       
   198         # Parse the labels
       
   199         labels = []
       
   200         lineno, line = lineno+1, f.next()
       
   201         mo = re.match(r"static label labels\[(\d+)\] = {$", line)
       
   202         assert mo, (lineno, line)
       
   203         nlabels = int(mo.group(1))
       
   204         for i in range(nlabels):
       
   205             lineno, line = lineno+1, f.next()
       
   206             mo = re.match(r'\s+{(\d+), (0|"\w+")},$', line)
       
   207             assert mo, (lineno, line)
       
   208             x, y = mo.groups()
       
   209             x = int(x)
       
   210             if y == "0":
       
   211                 y = None
       
   212             else:
       
   213                 y = eval(y)
       
   214             labels.append((x, y))
       
   215         lineno, line = lineno+1, f.next()
       
   216         assert line == "};\n", (lineno, line)
       
   217         self.labels = labels
       
   218 
       
   219         # Parse the grammar struct
       
   220         lineno, line = lineno+1, f.next()
       
   221         assert line == "grammar _PyParser_Grammar = {\n", (lineno, line)
       
   222         lineno, line = lineno+1, f.next()
       
   223         mo = re.match(r"\s+(\d+),$", line)
       
   224         assert mo, (lineno, line)
       
   225         ndfas = int(mo.group(1))
       
   226         assert ndfas == len(self.dfas)
       
   227         lineno, line = lineno+1, f.next()
       
   228         assert line == "\tdfas,\n", (lineno, line)
       
   229         lineno, line = lineno+1, f.next()
       
   230         mo = re.match(r"\s+{(\d+), labels},$", line)
       
   231         assert mo, (lineno, line)
       
   232         nlabels = int(mo.group(1))
       
   233         assert nlabels == len(self.labels), (lineno, line)
       
   234         lineno, line = lineno+1, f.next()
       
   235         mo = re.match(r"\s+(\d+)$", line)
       
   236         assert mo, (lineno, line)
       
   237         start = int(mo.group(1))
       
   238         assert start in self.number2symbol, (lineno, line)
       
   239         self.start = start
       
   240         lineno, line = lineno+1, f.next()
       
   241         assert line == "};\n", (lineno, line)
       
   242         try:
       
   243             lineno, line = lineno+1, f.next()
       
   244         except StopIteration:
       
   245             pass
       
   246         else:
       
   247             assert 0, (lineno, line)
       
   248 
       
   249     def finish_off(self):
       
   250         """Create additional useful structures.  (Internal)."""
       
   251         self.keywords = {} # map from keyword strings to arc labels
       
   252         self.tokens = {}   # map from numeric token values to arc labels
       
   253         for ilabel, (type, value) in enumerate(self.labels):
       
   254             if type == token.NAME and value is not None:
       
   255                 self.keywords[value] = ilabel
       
   256             elif value is None:
       
   257                 self.tokens[type] = ilabel