symbian-qemu-0.9.1-12/python-2.6.1/Lib/lib2to3/pgen2/grammar.py
changeset 1 2fb8b9db1c86
equal deleted inserted replaced
0:ffa851df0825 1:2fb8b9db1c86
       
     1 # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
       
     2 # Licensed to PSF under a Contributor Agreement.
       
     3 
       
     4 """This module defines the data structures used to represent a grammar.
       
     5 
       
     6 These are a bit arcane because they are derived from the data
       
     7 structures used by Python's 'pgen' parser generator.
       
     8 
       
     9 There's also a table here mapping operators to their names in the
       
    10 token module; the Python tokenize module reports all operators as the
       
    11 fallback token code OP, but the parser needs the actual token code.
       
    12 
       
    13 """
       
    14 
       
    15 # Python imports
       
    16 import pickle
       
    17 
       
    18 # Local imports
       
    19 from . import token, tokenize
       
    20 
       
    21 
       
    22 class Grammar(object):
       
    23     """Pgen parsing tables tables conversion class.
       
    24 
       
    25     Once initialized, this class supplies the grammar tables for the
       
    26     parsing engine implemented by parse.py.  The parsing engine
       
    27     accesses the instance variables directly.  The class here does not
       
    28     provide initialization of the tables; several subclasses exist to
       
    29     do this (see the conv and pgen modules).
       
    30 
       
    31     The load() method reads the tables from a pickle file, which is
       
    32     much faster than the other ways offered by subclasses.  The pickle
       
    33     file is written by calling dump() (after loading the grammar
       
    34     tables using a subclass).  The report() method prints a readable
       
    35     representation of the tables to stdout, for debugging.
       
    36 
       
    37     The instance variables are as follows:
       
    38 
       
    39     symbol2number -- a dict mapping symbol names to numbers.  Symbol
       
    40                      numbers are always 256 or higher, to distinguish
       
    41                      them from token numbers, which are between 0 and
       
    42                      255 (inclusive).
       
    43 
       
    44     number2symbol -- a dict mapping numbers to symbol names;
       
    45                      these two are each other's inverse.
       
    46 
       
    47     states        -- a list of DFAs, where each DFA is a list of
       
    48                      states, each state is is a list of arcs, and each
       
    49                      arc is a (i, j) pair where i is a label and j is
       
    50                      a state number.  The DFA number is the index into
       
    51                      this list.  (This name is slightly confusing.)
       
    52                      Final states are represented by a special arc of
       
    53                      the form (0, j) where j is its own state number.
       
    54 
       
    55     dfas          -- a dict mapping symbol numbers to (DFA, first)
       
    56                      pairs, where DFA is an item from the states list
       
    57                      above, and first is a set of tokens that can
       
    58                      begin this grammar rule (represented by a dict
       
    59                      whose values are always 1).
       
    60 
       
    61     labels        -- a list of (x, y) pairs where x is either a token
       
    62                      number or a symbol number, and y is either None
       
    63                      or a string; the strings are keywords.  The label
       
    64                      number is the index in this list; label numbers
       
    65                      are used to mark state transitions (arcs) in the
       
    66                      DFAs.
       
    67 
       
    68     start         -- the number of the grammar's start symbol.
       
    69 
       
    70     keywords      -- a dict mapping keyword strings to arc labels.
       
    71 
       
    72     tokens        -- a dict mapping token numbers to arc labels.
       
    73 
       
    74     """
       
    75 
       
    76     def __init__(self):
       
    77         self.symbol2number = {}
       
    78         self.number2symbol = {}
       
    79         self.states = []
       
    80         self.dfas = {}
       
    81         self.labels = [(0, "EMPTY")]
       
    82         self.keywords = {}
       
    83         self.tokens = {}
       
    84         self.symbol2label = {}
       
    85         self.start = 256
       
    86 
       
    87     def dump(self, filename):
       
    88         """Dump the grammar tables to a pickle file."""
       
    89         f = open(filename, "wb")
       
    90         pickle.dump(self.__dict__, f, 2)
       
    91         f.close()
       
    92 
       
    93     def load(self, filename):
       
    94         """Load the grammar tables from a pickle file."""
       
    95         f = open(filename, "rb")
       
    96         d = pickle.load(f)
       
    97         f.close()
       
    98         self.__dict__.update(d)
       
    99 
       
   100     def report(self):
       
   101         """Dump the grammar tables to standard output, for debugging."""
       
   102         from pprint import pprint
       
   103         print "s2n"
       
   104         pprint(self.symbol2number)
       
   105         print "n2s"
       
   106         pprint(self.number2symbol)
       
   107         print "states"
       
   108         pprint(self.states)
       
   109         print "dfas"
       
   110         pprint(self.dfas)
       
   111         print "labels"
       
   112         pprint(self.labels)
       
   113         print "start", self.start
       
   114 
       
   115 
       
   116 # Map from operator to number (since tokenize doesn't do this)
       
   117 
       
   118 opmap_raw = """
       
   119 ( LPAR
       
   120 ) RPAR
       
   121 [ LSQB
       
   122 ] RSQB
       
   123 : COLON
       
   124 , COMMA
       
   125 ; SEMI
       
   126 + PLUS
       
   127 - MINUS
       
   128 * STAR
       
   129 / SLASH
       
   130 | VBAR
       
   131 & AMPER
       
   132 < LESS
       
   133 > GREATER
       
   134 = EQUAL
       
   135 . DOT
       
   136 % PERCENT
       
   137 ` BACKQUOTE
       
   138 { LBRACE
       
   139 } RBRACE
       
   140 @ AT
       
   141 == EQEQUAL
       
   142 != NOTEQUAL
       
   143 <> NOTEQUAL
       
   144 <= LESSEQUAL
       
   145 >= GREATEREQUAL
       
   146 ~ TILDE
       
   147 ^ CIRCUMFLEX
       
   148 << LEFTSHIFT
       
   149 >> RIGHTSHIFT
       
   150 ** DOUBLESTAR
       
   151 += PLUSEQUAL
       
   152 -= MINEQUAL
       
   153 *= STAREQUAL
       
   154 /= SLASHEQUAL
       
   155 %= PERCENTEQUAL
       
   156 &= AMPEREQUAL
       
   157 |= VBAREQUAL
       
   158 ^= CIRCUMFLEXEQUAL
       
   159 <<= LEFTSHIFTEQUAL
       
   160 >>= RIGHTSHIFTEQUAL
       
   161 **= DOUBLESTAREQUAL
       
   162 // DOUBLESLASH
       
   163 //= DOUBLESLASHEQUAL
       
   164 -> RARROW
       
   165 """
       
   166 
       
   167 opmap = {}
       
   168 for line in opmap_raw.splitlines():
       
   169     if line:
       
   170         op, name = line.split()
       
   171         opmap[op] = getattr(token, name)