JavaScriptCore/create_regex_tables
changeset 0 4f2f89ce4247
equal deleted inserted replaced
-1:000000000000 0:4f2f89ce4247
       
     1 # Copyright (C) 2010 Apple Inc. All rights reserved.
       
     2 # 
       
     3 # Redistribution and use in source and binary forms, with or without
       
     4 # modification, are permitted provided that the following conditions
       
     5 # are met:
       
     6 # 1. Redistributions of source code must retain the above copyright
       
     7 #    notice, this list of conditions and the following disclaimer.
       
     8 # 2. Redistributions in binary form must reproduce the above copyright
       
     9 #    notice, this list of conditions and the following disclaimer in the
       
    10 #    documentation and/or other materials provided with the distribution.
       
    11 # 
       
    12 # THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
       
    13 # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
       
    14 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
       
    15 # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
       
    16 # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
       
    17 # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
       
    18 # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
       
    19 # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
       
    20 # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
       
    21 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
       
    22 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
       
    23 
       
    24 types = {
       
    25     "wordchar": { "UseTable" : True, "data": ['_', ('0','9'), ('A', 'Z'), ('a','z')]},
       
    26     "nonwordchar": { "UseTable" : True, "Inverse": "wordchar", "data": ['`', (0, ord('0') - 1), (ord('9') + 1, ord('A') - 1), (ord('Z') + 1, ord('_') - 1), (ord('z') + 1, 0xffff)]},
       
    27     "newline": { "UseTable" : False, "data": ['\n', '\r', 0x2028, 0x2029]},
       
    28     "spaces": { "UseTable" : True, "data": [' ', ('\t', '\r'), 0xa0, 0x1680, 0x180e, 0x2028, 0x2029, 0x202f, 0x205f, 0x3000, (0x2000, 0x200a)]},
       
    29     "nonspaces": { "UseTable" : True, "Inverse": "spaces", "data": [(0, ord('\t') - 1), (ord('\r') + 1, ord(' ') - 1), (ord(' ') + 1, 0x009f), (0x00a1, 0x167f), (0x1681, 0x180d), (0x180f, 0x1fff), (0x200b, 0x2027), (0x202a, 0x202e), (0x2030, 0x205e), (0x2060, 0x2fff), (0x3001, 0xffff)]},
       
    30     "digits": { "UseTable" : False, "data": [('0', '9')]},
       
    31     "nondigits": { "UseTable" : False, "Inverse": "digits", "data": [(0, ord('0') - 1), (ord('9') + 1, 0xffff)] }
       
    32 }
       
    33 entriesPerLine = 50
       
    34 arrays = "";
       
    35 functions = "";
       
    36 
       
    37 for name, classes in types.items():
       
    38     ranges = [];
       
    39     size = 0;
       
    40     for _class in classes["data"]:
       
    41         if type(_class) == str:
       
    42             ranges.append((ord(_class), ord(_class)))
       
    43         elif type(_class) == int:
       
    44             ranges.append((_class, _class))
       
    45         else:
       
    46             (min, max) = _class;
       
    47             if type(min) == str:
       
    48                 min = ord(min)
       
    49             if type(max) == str:
       
    50                 max = ord(max)
       
    51             if max > 0x7f and min <= 0x7f:
       
    52                 ranges.append((min, 0x7f))
       
    53                 min = 0x80
       
    54             ranges.append((min,max))
       
    55     ranges.sort();
       
    56     
       
    57     if classes["UseTable"] and (not "Inverse" in classes):
       
    58         array = ("static const char _%sData[65536] = {\n" % name);
       
    59         i = 0
       
    60         for (min,max) in ranges:
       
    61             while i < min:
       
    62                 i = i + 1
       
    63                 array += ('0,')
       
    64                 if (i % entriesPerLine == 0) and (i != 0):
       
    65                     array += ('\n')
       
    66             while i <= max:
       
    67                 i = i + 1
       
    68                 if (i == 65536):
       
    69                     array += ("1")
       
    70                 else:
       
    71                     array += ('1,')
       
    72                 if (i % entriesPerLine == 0) and (i != 0):
       
    73                     array += ('\n')
       
    74         while i < 0xffff:
       
    75             array += ("0,")
       
    76             i = i + 1;
       
    77             if (i % entriesPerLine == 0) and (i != 0):
       
    78                 array += ('\n')
       
    79         if i == 0xffff:
       
    80             array += ("0")
       
    81         array += ("\n};\n\n");
       
    82         arrays += array
       
    83     
       
    84     # Generate createFunction:
       
    85     function = "";
       
    86     function += ("CharacterClass* %sCreate()\n" % name)
       
    87     function += ("{\n")
       
    88     if classes["UseTable"]:
       
    89         if "Inverse" in classes:
       
    90             function += ("    CharacterClass* characterClass = new CharacterClass(CharacterClassTable::create(_%sData, true));\n" % (classes["Inverse"]))
       
    91         else:
       
    92             function += ("    CharacterClass* characterClass = new CharacterClass(CharacterClassTable::create(_%sData, false));\n" % (name))
       
    93     else:
       
    94         function += ("    CharacterClass* characterClass = new CharacterClass(0);\n")
       
    95     for (min, max) in ranges:
       
    96         if (min == max):
       
    97             if (min > 127):
       
    98                 function += ("    characterClass->m_matchesUnicode.append(0x%04x);\n" % min)
       
    99             else:
       
   100                 function += ("    characterClass->m_matches.append(0x%02x);\n" % min)
       
   101             continue
       
   102         if (min > 127) or (max > 127):
       
   103             function += ("    characterClass->m_rangesUnicode.append(CharacterRange(0x%04x, 0x%04x));\n" % (min, max))
       
   104         else:
       
   105             function += ("    characterClass->m_ranges.append(CharacterRange(0x%02x, 0x%02x));\n" % (min, max))
       
   106     function += ("    return characterClass;\n")
       
   107     function += ("}\n\n")
       
   108     functions += function
       
   109 
       
   110 print(arrays)
       
   111 print(functions)
       
   112