|
1 # Copyright (C) 2010 Apple Inc. All rights reserved. |
|
2 # |
|
3 # Redistribution and use in source and binary forms, with or without |
|
4 # modification, are permitted provided that the following conditions |
|
5 # are met: |
|
6 # 1. Redistributions of source code must retain the above copyright |
|
7 # notice, this list of conditions and the following disclaimer. |
|
8 # 2. Redistributions in binary form must reproduce the above copyright |
|
9 # notice, this list of conditions and the following disclaimer in the |
|
10 # documentation and/or other materials provided with the distribution. |
|
11 # |
|
12 # THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
|
13 # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|
14 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
|
15 # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
|
16 # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
|
17 # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
|
18 # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
|
19 # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
|
20 # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
|
21 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
|
22 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
23 |
|
24 types = { |
|
25 "wordchar": { "UseTable" : True, "data": ['_', ('0','9'), ('A', 'Z'), ('a','z')]}, |
|
26 "nonwordchar": { "UseTable" : True, "Inverse": "wordchar", "data": ['`', (0, ord('0') - 1), (ord('9') + 1, ord('A') - 1), (ord('Z') + 1, ord('_') - 1), (ord('z') + 1, 0xffff)]}, |
|
27 "newline": { "UseTable" : False, "data": ['\n', '\r', 0x2028, 0x2029]}, |
|
28 "spaces": { "UseTable" : True, "data": [' ', ('\t', '\r'), 0xa0, 0x1680, 0x180e, 0x2028, 0x2029, 0x202f, 0x205f, 0x3000, (0x2000, 0x200a)]}, |
|
29 "nonspaces": { "UseTable" : True, "Inverse": "spaces", "data": [(0, ord('\t') - 1), (ord('\r') + 1, ord(' ') - 1), (ord(' ') + 1, 0x009f), (0x00a1, 0x167f), (0x1681, 0x180d), (0x180f, 0x1fff), (0x200b, 0x2027), (0x202a, 0x202e), (0x2030, 0x205e), (0x2060, 0x2fff), (0x3001, 0xffff)]}, |
|
30 "digits": { "UseTable" : False, "data": [('0', '9')]}, |
|
31 "nondigits": { "UseTable" : False, "Inverse": "digits", "data": [(0, ord('0') - 1), (ord('9') + 1, 0xffff)] } |
|
32 } |
|
33 entriesPerLine = 50 |
|
34 arrays = ""; |
|
35 functions = ""; |
|
36 |
|
37 for name, classes in types.items(): |
|
38 ranges = []; |
|
39 size = 0; |
|
40 for _class in classes["data"]: |
|
41 if type(_class) == str: |
|
42 ranges.append((ord(_class), ord(_class))) |
|
43 elif type(_class) == int: |
|
44 ranges.append((_class, _class)) |
|
45 else: |
|
46 (min, max) = _class; |
|
47 if type(min) == str: |
|
48 min = ord(min) |
|
49 if type(max) == str: |
|
50 max = ord(max) |
|
51 if max > 0x7f and min <= 0x7f: |
|
52 ranges.append((min, 0x7f)) |
|
53 min = 0x80 |
|
54 ranges.append((min,max)) |
|
55 ranges.sort(); |
|
56 |
|
57 if classes["UseTable"] and (not "Inverse" in classes): |
|
58 array = ("static const char _%sData[65536] = {\n" % name); |
|
59 i = 0 |
|
60 for (min,max) in ranges: |
|
61 while i < min: |
|
62 i = i + 1 |
|
63 array += ('0,') |
|
64 if (i % entriesPerLine == 0) and (i != 0): |
|
65 array += ('\n') |
|
66 while i <= max: |
|
67 i = i + 1 |
|
68 if (i == 65536): |
|
69 array += ("1") |
|
70 else: |
|
71 array += ('1,') |
|
72 if (i % entriesPerLine == 0) and (i != 0): |
|
73 array += ('\n') |
|
74 while i < 0xffff: |
|
75 array += ("0,") |
|
76 i = i + 1; |
|
77 if (i % entriesPerLine == 0) and (i != 0): |
|
78 array += ('\n') |
|
79 if i == 0xffff: |
|
80 array += ("0") |
|
81 array += ("\n};\n\n"); |
|
82 arrays += array |
|
83 |
|
84 # Generate createFunction: |
|
85 function = ""; |
|
86 function += ("CharacterClass* %sCreate()\n" % name) |
|
87 function += ("{\n") |
|
88 if classes["UseTable"]: |
|
89 if "Inverse" in classes: |
|
90 function += (" CharacterClass* characterClass = new CharacterClass(CharacterClassTable::create(_%sData, true));\n" % (classes["Inverse"])) |
|
91 else: |
|
92 function += (" CharacterClass* characterClass = new CharacterClass(CharacterClassTable::create(_%sData, false));\n" % (name)) |
|
93 else: |
|
94 function += (" CharacterClass* characterClass = new CharacterClass(0);\n") |
|
95 for (min, max) in ranges: |
|
96 if (min == max): |
|
97 if (min > 127): |
|
98 function += (" characterClass->m_matchesUnicode.append(0x%04x);\n" % min) |
|
99 else: |
|
100 function += (" characterClass->m_matches.append(0x%02x);\n" % min) |
|
101 continue |
|
102 if (min > 127) or (max > 127): |
|
103 function += (" characterClass->m_rangesUnicode.append(CharacterRange(0x%04x, 0x%04x));\n" % (min, max)) |
|
104 else: |
|
105 function += (" characterClass->m_ranges.append(CharacterRange(0x%02x, 0x%02x));\n" % (min, max)) |
|
106 function += (" return characterClass;\n") |
|
107 function += ("}\n\n") |
|
108 functions += function |
|
109 |
|
110 print(arrays) |
|
111 print(functions) |
|
112 |