symbian-qemu-0.9.1-12/python-2.6.1/Lib/test/test_tokenize.py
changeset 1 2fb8b9db1c86
equal deleted inserted replaced
0:ffa851df0825 1:2fb8b9db1c86
       
     1 doctests = """
       
     2 Tests for the tokenize module.
       
     3 
       
     4     >>> import glob, random, sys
       
     5 
       
     6 The tests can be really simple. Given a small fragment of source
       
     7 code, print out a table with tokens. The ENDMARK is omitted for
       
     8 brevity.
       
     9 
       
    10     >>> dump_tokens("1 + 1")
       
    11     NUMBER     '1'           (1, 0) (1, 1)
       
    12     OP         '+'           (1, 2) (1, 3)
       
    13     NUMBER     '1'           (1, 4) (1, 5)
       
    14 
       
    15     >>> dump_tokens("if False:\\n"
       
    16     ...             "    # NL\\n"
       
    17     ...             "    True = False # NEWLINE\\n")
       
    18     NAME       'if'          (1, 0) (1, 2)
       
    19     NAME       'False'       (1, 3) (1, 8)
       
    20     OP         ':'           (1, 8) (1, 9)
       
    21     NEWLINE    '\\n'          (1, 9) (1, 10)
       
    22     COMMENT    '# NL'        (2, 4) (2, 8)
       
    23     NL         '\\n'          (2, 8) (2, 9)
       
    24     INDENT     '    '        (3, 0) (3, 4)
       
    25     NAME       'True'        (3, 4) (3, 8)
       
    26     OP         '='           (3, 9) (3, 10)
       
    27     NAME       'False'       (3, 11) (3, 16)
       
    28     COMMENT    '# NEWLINE'   (3, 17) (3, 26)
       
    29     NEWLINE    '\\n'          (3, 26) (3, 27)
       
    30     DEDENT     ''            (4, 0) (4, 0)
       
    31 
       
    32     >>> indent_error_file = \"""
       
    33     ... def k(x):
       
    34     ...     x += 2
       
    35     ...   x += 5
       
    36     ... \"""
       
    37 
       
    38     >>> for tok in generate_tokens(StringIO(indent_error_file).readline): pass
       
    39     Traceback (most recent call last):
       
    40         ...
       
    41     IndentationError: unindent does not match any outer indentation level
       
    42 
       
    43 Test roundtrip for `untokenize`. `f` is an open file or a string. The source
       
    44 code in f is tokenized, converted back to source code via tokenize.untokenize(),
       
    45 and tokenized again from the latter. The test fails if the second tokenization
       
    46 doesn't match the first.
       
    47 
       
    48     >>> def roundtrip(f):
       
    49     ...     if isinstance(f, str): f = StringIO(f)
       
    50     ...     token_list = list(generate_tokens(f.readline))
       
    51     ...     f.close()
       
    52     ...     tokens1 = [tok[:2] for tok in token_list]
       
    53     ...     new_text = untokenize(tokens1)
       
    54     ...     readline = iter(new_text.splitlines(1)).next
       
    55     ...     tokens2 = [tok[:2] for tok in generate_tokens(readline)]
       
    56     ...     return tokens1 == tokens2
       
    57     ...
       
    58 
       
    59 There are some standard formattig practises that are easy to get right.
       
    60 
       
    61     >>> roundtrip("if x == 1:\\n"
       
    62     ...           "    print x\\n")
       
    63     True
       
    64 
       
    65     >>> roundtrip("# This is a comment\\n# This also")
       
    66     True
       
    67 
       
    68 Some people use different formatting conventions, which makes
       
    69 untokenize a little trickier. Note that this test involves trailing
       
    70 whitespace after the colon. Note that we use hex escapes to make the
       
    71 two trailing blanks apperant in the expected output.
       
    72 
       
    73     >>> roundtrip("if x == 1 : \\n"
       
    74     ...           "  print x\\n")
       
    75     True
       
    76 
       
    77     >>> f = test_support.findfile("tokenize_tests" + os.extsep + "txt")
       
    78     >>> roundtrip(open(f))
       
    79     True
       
    80 
       
    81     >>> roundtrip("if x == 1:\\n"
       
    82     ...           "    # A comment by itself.\\n"
       
    83     ...           "    print x # Comment here, too.\\n"
       
    84     ...           "    # Another comment.\\n"
       
    85     ...           "after_if = True\\n")
       
    86     True
       
    87 
       
    88     >>> roundtrip("if (x # The comments need to go in the right place\\n"
       
    89     ...           "    == 1):\\n"
       
    90     ...           "    print 'x==1'\\n")
       
    91     True
       
    92 
       
    93     >>> roundtrip("class Test: # A comment here\\n"
       
    94     ...           "  # A comment with weird indent\\n"
       
    95     ...           "  after_com = 5\\n"
       
    96     ...           "  def x(m): return m*5 # a one liner\\n"
       
    97     ...           "  def y(m): # A whitespace after the colon\\n"
       
    98     ...           "     return y*4 # 3-space indent\\n")
       
    99     True
       
   100 
       
   101 Some error-handling code
       
   102 
       
   103     >>> roundtrip("try: import somemodule\\n"
       
   104     ...           "except ImportError: # comment\\n"
       
   105     ...           "    print 'Can not import' # comment2\\n"
       
   106     ...           "else:   print 'Loaded'\\n")
       
   107     True
       
   108 
       
   109 Balancing continuation
       
   110 
       
   111     >>> roundtrip("a = (3,4, \\n"
       
   112     ...           "5,6)\\n"
       
   113     ...           "y = [3, 4,\\n"
       
   114     ...           "5]\\n"
       
   115     ...           "z = {'a': 5,\\n"
       
   116     ...           "'b':15, 'c':True}\\n"
       
   117     ...           "x = len(y) + 5 - a[\\n"
       
   118     ...           "3] - a[2]\\n"
       
   119     ...           "+ len(z) - z[\\n"
       
   120     ...           "'b']\\n")
       
   121     True
       
   122 
       
   123 Ordinary integers and binary operators
       
   124 
       
   125     >>> dump_tokens("0xff <= 255")
       
   126     NUMBER     '0xff'        (1, 0) (1, 4)
       
   127     OP         '<='          (1, 5) (1, 7)
       
   128     NUMBER     '255'         (1, 8) (1, 11)
       
   129     >>> dump_tokens("0b10 <= 255")
       
   130     NUMBER     '0b10'        (1, 0) (1, 4)
       
   131     OP         '<='          (1, 5) (1, 7)
       
   132     NUMBER     '255'         (1, 8) (1, 11)
       
   133     >>> dump_tokens("0o123 <= 0123")
       
   134     NUMBER     '0o123'       (1, 0) (1, 5)
       
   135     OP         '<='          (1, 6) (1, 8)
       
   136     NUMBER     '0123'        (1, 9) (1, 13)
       
   137     >>> dump_tokens("01234567 > ~0x15")
       
   138     NUMBER     '01234567'    (1, 0) (1, 8)
       
   139     OP         '>'           (1, 9) (1, 10)
       
   140     OP         '~'           (1, 11) (1, 12)
       
   141     NUMBER     '0x15'        (1, 12) (1, 16)
       
   142     >>> dump_tokens("2134568 != 01231515")
       
   143     NUMBER     '2134568'     (1, 0) (1, 7)
       
   144     OP         '!='          (1, 8) (1, 10)
       
   145     NUMBER     '01231515'    (1, 11) (1, 19)
       
   146     >>> dump_tokens("(-124561-1) & 0200000000")
       
   147     OP         '('           (1, 0) (1, 1)
       
   148     OP         '-'           (1, 1) (1, 2)
       
   149     NUMBER     '124561'      (1, 2) (1, 8)
       
   150     OP         '-'           (1, 8) (1, 9)
       
   151     NUMBER     '1'           (1, 9) (1, 10)
       
   152     OP         ')'           (1, 10) (1, 11)
       
   153     OP         '&'           (1, 12) (1, 13)
       
   154     NUMBER     '0200000000'  (1, 14) (1, 24)
       
   155     >>> dump_tokens("0xdeadbeef != -1")
       
   156     NUMBER     '0xdeadbeef'  (1, 0) (1, 10)
       
   157     OP         '!='          (1, 11) (1, 13)
       
   158     OP         '-'           (1, 14) (1, 15)
       
   159     NUMBER     '1'           (1, 15) (1, 16)
       
   160     >>> dump_tokens("0xdeadc0de & 012345")
       
   161     NUMBER     '0xdeadc0de'  (1, 0) (1, 10)
       
   162     OP         '&'           (1, 11) (1, 12)
       
   163     NUMBER     '012345'      (1, 13) (1, 19)
       
   164     >>> dump_tokens("0xFF & 0x15 | 1234")
       
   165     NUMBER     '0xFF'        (1, 0) (1, 4)
       
   166     OP         '&'           (1, 5) (1, 6)
       
   167     NUMBER     '0x15'        (1, 7) (1, 11)
       
   168     OP         '|'           (1, 12) (1, 13)
       
   169     NUMBER     '1234'        (1, 14) (1, 18)
       
   170 
       
   171 Long integers
       
   172 
       
   173     >>> dump_tokens("x = 0L")
       
   174     NAME       'x'           (1, 0) (1, 1)
       
   175     OP         '='           (1, 2) (1, 3)
       
   176     NUMBER     '0L'          (1, 4) (1, 6)
       
   177     >>> dump_tokens("x = 0xfffffffffff")
       
   178     NAME       'x'           (1, 0) (1, 1)
       
   179     OP         '='           (1, 2) (1, 3)
       
   180     NUMBER     '0xffffffffff (1, 4) (1, 17)
       
   181     >>> dump_tokens("x = 123141242151251616110l")
       
   182     NAME       'x'           (1, 0) (1, 1)
       
   183     OP         '='           (1, 2) (1, 3)
       
   184     NUMBER     '123141242151 (1, 4) (1, 26)
       
   185     >>> dump_tokens("x = -15921590215012591L")
       
   186     NAME       'x'           (1, 0) (1, 1)
       
   187     OP         '='           (1, 2) (1, 3)
       
   188     OP         '-'           (1, 4) (1, 5)
       
   189     NUMBER     '159215902150 (1, 5) (1, 23)
       
   190 
       
   191 Floating point numbers
       
   192 
       
   193     >>> dump_tokens("x = 3.14159")
       
   194     NAME       'x'           (1, 0) (1, 1)
       
   195     OP         '='           (1, 2) (1, 3)
       
   196     NUMBER     '3.14159'     (1, 4) (1, 11)
       
   197     >>> dump_tokens("x = 314159.")
       
   198     NAME       'x'           (1, 0) (1, 1)
       
   199     OP         '='           (1, 2) (1, 3)
       
   200     NUMBER     '314159.'     (1, 4) (1, 11)
       
   201     >>> dump_tokens("x = .314159")
       
   202     NAME       'x'           (1, 0) (1, 1)
       
   203     OP         '='           (1, 2) (1, 3)
       
   204     NUMBER     '.314159'     (1, 4) (1, 11)
       
   205     >>> dump_tokens("x = 3e14159")
       
   206     NAME       'x'           (1, 0) (1, 1)
       
   207     OP         '='           (1, 2) (1, 3)
       
   208     NUMBER     '3e14159'     (1, 4) (1, 11)
       
   209     >>> dump_tokens("x = 3E123")
       
   210     NAME       'x'           (1, 0) (1, 1)
       
   211     OP         '='           (1, 2) (1, 3)
       
   212     NUMBER     '3E123'       (1, 4) (1, 9)
       
   213     >>> dump_tokens("x+y = 3e-1230")
       
   214     NAME       'x'           (1, 0) (1, 1)
       
   215     OP         '+'           (1, 1) (1, 2)
       
   216     NAME       'y'           (1, 2) (1, 3)
       
   217     OP         '='           (1, 4) (1, 5)
       
   218     NUMBER     '3e-1230'     (1, 6) (1, 13)
       
   219     >>> dump_tokens("x = 3.14e159")
       
   220     NAME       'x'           (1, 0) (1, 1)
       
   221     OP         '='           (1, 2) (1, 3)
       
   222     NUMBER     '3.14e159'    (1, 4) (1, 12)
       
   223 
       
   224 String literals
       
   225 
       
   226     >>> dump_tokens("x = ''; y = \\\"\\\"")
       
   227     NAME       'x'           (1, 0) (1, 1)
       
   228     OP         '='           (1, 2) (1, 3)
       
   229     STRING     "''"          (1, 4) (1, 6)
       
   230     OP         ';'           (1, 6) (1, 7)
       
   231     NAME       'y'           (1, 8) (1, 9)
       
   232     OP         '='           (1, 10) (1, 11)
       
   233     STRING     '""'          (1, 12) (1, 14)
       
   234     >>> dump_tokens("x = '\\\"'; y = \\\"'\\\"")
       
   235     NAME       'x'           (1, 0) (1, 1)
       
   236     OP         '='           (1, 2) (1, 3)
       
   237     STRING     '\\'"\\''       (1, 4) (1, 7)
       
   238     OP         ';'           (1, 7) (1, 8)
       
   239     NAME       'y'           (1, 9) (1, 10)
       
   240     OP         '='           (1, 11) (1, 12)
       
   241     STRING     '"\\'"'        (1, 13) (1, 16)
       
   242     >>> dump_tokens("x = \\\"doesn't \\\"shrink\\\", does it\\\"")
       
   243     NAME       'x'           (1, 0) (1, 1)
       
   244     OP         '='           (1, 2) (1, 3)
       
   245     STRING     '"doesn\\'t "' (1, 4) (1, 14)
       
   246     NAME       'shrink'      (1, 14) (1, 20)
       
   247     STRING     '", does it"' (1, 20) (1, 31)
       
   248     >>> dump_tokens("x = u'abc' + U'ABC'")
       
   249     NAME       'x'           (1, 0) (1, 1)
       
   250     OP         '='           (1, 2) (1, 3)
       
   251     STRING     "u'abc'"      (1, 4) (1, 10)
       
   252     OP         '+'           (1, 11) (1, 12)
       
   253     STRING     "U'ABC'"      (1, 13) (1, 19)
       
   254     >>> dump_tokens('y = u"ABC" + U"ABC"')
       
   255     NAME       'y'           (1, 0) (1, 1)
       
   256     OP         '='           (1, 2) (1, 3)
       
   257     STRING     'u"ABC"'      (1, 4) (1, 10)
       
   258     OP         '+'           (1, 11) (1, 12)
       
   259     STRING     'U"ABC"'      (1, 13) (1, 19)
       
   260     >>> dump_tokens("x = ur'abc' + Ur'ABC' + uR'ABC' + UR'ABC'")
       
   261     NAME       'x'           (1, 0) (1, 1)
       
   262     OP         '='           (1, 2) (1, 3)
       
   263     STRING     "ur'abc'"     (1, 4) (1, 11)
       
   264     OP         '+'           (1, 12) (1, 13)
       
   265     STRING     "Ur'ABC'"     (1, 14) (1, 21)
       
   266     OP         '+'           (1, 22) (1, 23)
       
   267     STRING     "uR'ABC'"     (1, 24) (1, 31)
       
   268     OP         '+'           (1, 32) (1, 33)
       
   269     STRING     "UR'ABC'"     (1, 34) (1, 41)
       
   270     >>> dump_tokens('y = ur"abc" + Ur"ABC" + uR"ABC" + UR"ABC"')
       
   271     NAME       'y'           (1, 0) (1, 1)
       
   272     OP         '='           (1, 2) (1, 3)
       
   273     STRING     'ur"abc"'     (1, 4) (1, 11)
       
   274     OP         '+'           (1, 12) (1, 13)
       
   275     STRING     'Ur"ABC"'     (1, 14) (1, 21)
       
   276     OP         '+'           (1, 22) (1, 23)
       
   277     STRING     'uR"ABC"'     (1, 24) (1, 31)
       
   278     OP         '+'           (1, 32) (1, 33)
       
   279     STRING     'UR"ABC"'     (1, 34) (1, 41)
       
   280 
       
   281 Operators
       
   282 
       
   283     >>> dump_tokens("def d22(a, b, c=2, d=2, *k): pass")
       
   284     NAME       'def'         (1, 0) (1, 3)
       
   285     NAME       'd22'         (1, 4) (1, 7)
       
   286     OP         '('           (1, 7) (1, 8)
       
   287     NAME       'a'           (1, 8) (1, 9)
       
   288     OP         ','           (1, 9) (1, 10)
       
   289     NAME       'b'           (1, 11) (1, 12)
       
   290     OP         ','           (1, 12) (1, 13)
       
   291     NAME       'c'           (1, 14) (1, 15)
       
   292     OP         '='           (1, 15) (1, 16)
       
   293     NUMBER     '2'           (1, 16) (1, 17)
       
   294     OP         ','           (1, 17) (1, 18)
       
   295     NAME       'd'           (1, 19) (1, 20)
       
   296     OP         '='           (1, 20) (1, 21)
       
   297     NUMBER     '2'           (1, 21) (1, 22)
       
   298     OP         ','           (1, 22) (1, 23)
       
   299     OP         '*'           (1, 24) (1, 25)
       
   300     NAME       'k'           (1, 25) (1, 26)
       
   301     OP         ')'           (1, 26) (1, 27)
       
   302     OP         ':'           (1, 27) (1, 28)
       
   303     NAME       'pass'        (1, 29) (1, 33)
       
   304     >>> dump_tokens("def d01v_(a=1, *k, **w): pass")
       
   305     NAME       'def'         (1, 0) (1, 3)
       
   306     NAME       'd01v_'       (1, 4) (1, 9)
       
   307     OP         '('           (1, 9) (1, 10)
       
   308     NAME       'a'           (1, 10) (1, 11)
       
   309     OP         '='           (1, 11) (1, 12)
       
   310     NUMBER     '1'           (1, 12) (1, 13)
       
   311     OP         ','           (1, 13) (1, 14)
       
   312     OP         '*'           (1, 15) (1, 16)
       
   313     NAME       'k'           (1, 16) (1, 17)
       
   314     OP         ','           (1, 17) (1, 18)
       
   315     OP         '**'          (1, 19) (1, 21)
       
   316     NAME       'w'           (1, 21) (1, 22)
       
   317     OP         ')'           (1, 22) (1, 23)
       
   318     OP         ':'           (1, 23) (1, 24)
       
   319     NAME       'pass'        (1, 25) (1, 29)
       
   320 
       
   321 Comparison
       
   322 
       
   323     >>> dump_tokens("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != " +
       
   324     ...             "1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass")
       
   325     NAME       'if'          (1, 0) (1, 2)
       
   326     NUMBER     '1'           (1, 3) (1, 4)
       
   327     OP         '<'           (1, 5) (1, 6)
       
   328     NUMBER     '1'           (1, 7) (1, 8)
       
   329     OP         '>'           (1, 9) (1, 10)
       
   330     NUMBER     '1'           (1, 11) (1, 12)
       
   331     OP         '=='          (1, 13) (1, 15)
       
   332     NUMBER     '1'           (1, 16) (1, 17)
       
   333     OP         '>='          (1, 18) (1, 20)
       
   334     NUMBER     '5'           (1, 21) (1, 22)
       
   335     OP         '<='          (1, 23) (1, 25)
       
   336     NUMBER     '0x15'        (1, 26) (1, 30)
       
   337     OP         '<='          (1, 31) (1, 33)
       
   338     NUMBER     '0x12'        (1, 34) (1, 38)
       
   339     OP         '!='          (1, 39) (1, 41)
       
   340     NUMBER     '1'           (1, 42) (1, 43)
       
   341     NAME       'and'         (1, 44) (1, 47)
       
   342     NUMBER     '5'           (1, 48) (1, 49)
       
   343     NAME       'in'          (1, 50) (1, 52)
       
   344     NUMBER     '1'           (1, 53) (1, 54)
       
   345     NAME       'not'         (1, 55) (1, 58)
       
   346     NAME       'in'          (1, 59) (1, 61)
       
   347     NUMBER     '1'           (1, 62) (1, 63)
       
   348     NAME       'is'          (1, 64) (1, 66)
       
   349     NUMBER     '1'           (1, 67) (1, 68)
       
   350     NAME       'or'          (1, 69) (1, 71)
       
   351     NUMBER     '5'           (1, 72) (1, 73)
       
   352     NAME       'is'          (1, 74) (1, 76)
       
   353     NAME       'not'         (1, 77) (1, 80)
       
   354     NUMBER     '1'           (1, 81) (1, 82)
       
   355     OP         ':'           (1, 82) (1, 83)
       
   356     NAME       'pass'        (1, 84) (1, 88)
       
   357 
       
   358 Shift
       
   359 
       
   360     >>> dump_tokens("x = 1 << 1 >> 5")
       
   361     NAME       'x'           (1, 0) (1, 1)
       
   362     OP         '='           (1, 2) (1, 3)
       
   363     NUMBER     '1'           (1, 4) (1, 5)
       
   364     OP         '<<'          (1, 6) (1, 8)
       
   365     NUMBER     '1'           (1, 9) (1, 10)
       
   366     OP         '>>'          (1, 11) (1, 13)
       
   367     NUMBER     '5'           (1, 14) (1, 15)
       
   368 
       
   369 Additive
       
   370 
       
   371     >>> dump_tokens("x = 1 - y + 15 - 01 + 0x124 + z + a[5]")
       
   372     NAME       'x'           (1, 0) (1, 1)
       
   373     OP         '='           (1, 2) (1, 3)
       
   374     NUMBER     '1'           (1, 4) (1, 5)
       
   375     OP         '-'           (1, 6) (1, 7)
       
   376     NAME       'y'           (1, 8) (1, 9)
       
   377     OP         '+'           (1, 10) (1, 11)
       
   378     NUMBER     '15'          (1, 12) (1, 14)
       
   379     OP         '-'           (1, 15) (1, 16)
       
   380     NUMBER     '01'          (1, 17) (1, 19)
       
   381     OP         '+'           (1, 20) (1, 21)
       
   382     NUMBER     '0x124'       (1, 22) (1, 27)
       
   383     OP         '+'           (1, 28) (1, 29)
       
   384     NAME       'z'           (1, 30) (1, 31)
       
   385     OP         '+'           (1, 32) (1, 33)
       
   386     NAME       'a'           (1, 34) (1, 35)
       
   387     OP         '['           (1, 35) (1, 36)
       
   388     NUMBER     '5'           (1, 36) (1, 37)
       
   389     OP         ']'           (1, 37) (1, 38)
       
   390 
       
   391 Multiplicative
       
   392 
       
   393     >>> dump_tokens("x = 1//1*1/5*12%0x12")
       
   394     NAME       'x'           (1, 0) (1, 1)
       
   395     OP         '='           (1, 2) (1, 3)
       
   396     NUMBER     '1'           (1, 4) (1, 5)
       
   397     OP         '//'          (1, 5) (1, 7)
       
   398     NUMBER     '1'           (1, 7) (1, 8)
       
   399     OP         '*'           (1, 8) (1, 9)
       
   400     NUMBER     '1'           (1, 9) (1, 10)
       
   401     OP         '/'           (1, 10) (1, 11)
       
   402     NUMBER     '5'           (1, 11) (1, 12)
       
   403     OP         '*'           (1, 12) (1, 13)
       
   404     NUMBER     '12'          (1, 13) (1, 15)
       
   405     OP         '%'           (1, 15) (1, 16)
       
   406     NUMBER     '0x12'        (1, 16) (1, 20)
       
   407 
       
   408 Unary
       
   409 
       
   410     >>> dump_tokens("~1 ^ 1 & 1 |1 ^ -1")
       
   411     OP         '~'           (1, 0) (1, 1)
       
   412     NUMBER     '1'           (1, 1) (1, 2)
       
   413     OP         '^'           (1, 3) (1, 4)
       
   414     NUMBER     '1'           (1, 5) (1, 6)
       
   415     OP         '&'           (1, 7) (1, 8)
       
   416     NUMBER     '1'           (1, 9) (1, 10)
       
   417     OP         '|'           (1, 11) (1, 12)
       
   418     NUMBER     '1'           (1, 12) (1, 13)
       
   419     OP         '^'           (1, 14) (1, 15)
       
   420     OP         '-'           (1, 16) (1, 17)
       
   421     NUMBER     '1'           (1, 17) (1, 18)
       
   422     >>> dump_tokens("-1*1/1+1*1//1 - ---1**1")
       
   423     OP         '-'           (1, 0) (1, 1)
       
   424     NUMBER     '1'           (1, 1) (1, 2)
       
   425     OP         '*'           (1, 2) (1, 3)
       
   426     NUMBER     '1'           (1, 3) (1, 4)
       
   427     OP         '/'           (1, 4) (1, 5)
       
   428     NUMBER     '1'           (1, 5) (1, 6)
       
   429     OP         '+'           (1, 6) (1, 7)
       
   430     NUMBER     '1'           (1, 7) (1, 8)
       
   431     OP         '*'           (1, 8) (1, 9)
       
   432     NUMBER     '1'           (1, 9) (1, 10)
       
   433     OP         '//'          (1, 10) (1, 12)
       
   434     NUMBER     '1'           (1, 12) (1, 13)
       
   435     OP         '-'           (1, 14) (1, 15)
       
   436     OP         '-'           (1, 16) (1, 17)
       
   437     OP         '-'           (1, 17) (1, 18)
       
   438     OP         '-'           (1, 18) (1, 19)
       
   439     NUMBER     '1'           (1, 19) (1, 20)
       
   440     OP         '**'          (1, 20) (1, 22)
       
   441     NUMBER     '1'           (1, 22) (1, 23)
       
   442 
       
   443 Selector
       
   444 
       
   445     >>> dump_tokens("import sys, time\\nx = sys.modules['time'].time()")
       
   446     NAME       'import'      (1, 0) (1, 6)
       
   447     NAME       'sys'         (1, 7) (1, 10)
       
   448     OP         ','           (1, 10) (1, 11)
       
   449     NAME       'time'        (1, 12) (1, 16)
       
   450     NEWLINE    '\\n'          (1, 16) (1, 17)
       
   451     NAME       'x'           (2, 0) (2, 1)
       
   452     OP         '='           (2, 2) (2, 3)
       
   453     NAME       'sys'         (2, 4) (2, 7)
       
   454     OP         '.'           (2, 7) (2, 8)
       
   455     NAME       'modules'     (2, 8) (2, 15)
       
   456     OP         '['           (2, 15) (2, 16)
       
   457     STRING     "'time'"      (2, 16) (2, 22)
       
   458     OP         ']'           (2, 22) (2, 23)
       
   459     OP         '.'           (2, 23) (2, 24)
       
   460     NAME       'time'        (2, 24) (2, 28)
       
   461     OP         '('           (2, 28) (2, 29)
       
   462     OP         ')'           (2, 29) (2, 30)
       
   463 
       
   464 Methods
       
   465 
       
   466     >>> dump_tokens("@staticmethod\\ndef foo(x,y): pass")
       
   467     OP         '@'           (1, 0) (1, 1)
       
   468     NAME       'staticmethod (1, 1) (1, 13)
       
   469     NEWLINE    '\\n'          (1, 13) (1, 14)
       
   470     NAME       'def'         (2, 0) (2, 3)
       
   471     NAME       'foo'         (2, 4) (2, 7)
       
   472     OP         '('           (2, 7) (2, 8)
       
   473     NAME       'x'           (2, 8) (2, 9)
       
   474     OP         ','           (2, 9) (2, 10)
       
   475     NAME       'y'           (2, 10) (2, 11)
       
   476     OP         ')'           (2, 11) (2, 12)
       
   477     OP         ':'           (2, 12) (2, 13)
       
   478     NAME       'pass'        (2, 14) (2, 18)
       
   479 
       
   480 Backslash means line continuation, except for comments
       
   481 
       
   482     >>> roundtrip("x=1+\\\\n"
       
   483     ...           "1\\n"
       
   484     ...           "# This is a comment\\\\n"
       
   485     ...           "# This also\\n")
       
   486     True
       
   487     >>> roundtrip("# Comment \\\\nx = 0")
       
   488     True
       
   489 
       
   490 Two string literals on the same line
       
   491 
       
   492     >>> roundtrip("'' ''")
       
   493     True
       
   494 
       
   495 Test roundtrip on random python modules.
       
   496 pass the '-ucompiler' option to process the full directory.
       
   497 
       
   498     >>>
       
   499     >>> tempdir = os.path.dirname(f) or os.curdir
       
   500     >>> testfiles = glob.glob(os.path.join(tempdir, "test*.py"))
       
   501 
       
   502     >>> if not test_support.is_resource_enabled("compiler"):
       
   503     ...     testfiles = random.sample(testfiles, 10)
       
   504     ...
       
   505     >>> for testfile in testfiles:
       
   506     ...     if not roundtrip(open(testfile)):
       
   507     ...         print "Roundtrip failed for file %s" % testfile
       
   508     ...         break
       
   509     ... else: True
       
   510     True
       
   511 """
       
   512 
       
   513 
       
   514 from test import test_support
       
   515 from tokenize import (tokenize, untokenize, generate_tokens, NUMBER, NAME, OP,
       
   516                      STRING, ENDMARKER, tok_name)
       
   517 from StringIO import StringIO
       
   518 import os
       
   519 
       
   520 def dump_tokens(s):
       
   521     """Print out the tokens in s in a table format.
       
   522 
       
   523     The ENDMARKER is omitted.
       
   524     """
       
   525     f = StringIO(s)
       
   526     for type, token, start, end, line in generate_tokens(f.readline):
       
   527         if type == ENDMARKER:
       
   528             break
       
   529         type = tok_name[type]
       
   530         print("%(type)-10.10s %(token)-13.13r %(start)s %(end)s" % locals())
       
   531 
       
   532 # This is an example from the docs, set up as a doctest.
       
   533 def decistmt(s):
       
   534     """Substitute Decimals for floats in a string of statements.
       
   535 
       
   536     >>> from decimal import Decimal
       
   537     >>> s = 'print +21.3e-5*-.1234/81.7'
       
   538     >>> decistmt(s)
       
   539     "print +Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7')"
       
   540 
       
   541     The format of the exponent is inherited from the platform C library.
       
   542     Known cases are "e-007" (Windows) and "e-07" (not Windows).  Since
       
   543     we're only showing 12 digits, and the 13th isn't close to 5, the
       
   544     rest of the output should be platform-independent.
       
   545 
       
   546     >>> exec(s) #doctest: +ELLIPSIS
       
   547     -3.21716034272e-0...7
       
   548 
       
   549     Output from calculations with Decimal should be identical across all
       
   550     platforms.
       
   551 
       
   552     >>> exec(decistmt(s))
       
   553     -3.217160342717258261933904529E-7
       
   554     """
       
   555 
       
   556     result = []
       
   557     g = generate_tokens(StringIO(s).readline)   # tokenize the string
       
   558     for toknum, tokval, _, _, _  in g:
       
   559         if toknum == NUMBER and '.' in tokval:  # replace NUMBER tokens
       
   560             result.extend([
       
   561                 (NAME, 'Decimal'),
       
   562                 (OP, '('),
       
   563                 (STRING, repr(tokval)),
       
   564                 (OP, ')')
       
   565             ])
       
   566         else:
       
   567             result.append((toknum, tokval))
       
   568     return untokenize(result)
       
   569 
       
   570 
       
   571 __test__ = {"doctests" : doctests, 'decistmt': decistmt}
       
   572 
       
   573 
       
   574 def test_main():
       
   575     from test import test_tokenize
       
   576     test_support.run_doctest(test_tokenize, True)
       
   577 
       
   578 if __name__ == "__main__":
       
   579     test_main()