|
1 #!/usr/bin/perl -w |
|
2 # |
|
3 # This is JavaScriptCore's variant of the PCRE library. While this library |
|
4 # started out as a copy of PCRE, many of the features of PCRE have been |
|
5 # removed. This library now supports only the regular expression features |
|
6 # required by the JavaScript language specification, and has only the functions |
|
7 # needed by JavaScriptCore and the rest of WebKit. |
|
8 # |
|
9 # Originally written by Philip Hazel |
|
10 # Copyright (c) 1997-2006 University of Cambridge |
|
11 # Copyright (C) 2002, 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. |
|
12 # |
|
13 # ----------------------------------------------------------------------------- |
|
14 # Redistribution and use in source and binary forms, with or without |
|
15 # modification, are permitted provided that the following conditions are met: |
|
16 # |
|
17 # * Redistributions of source code must retain the above copyright notice, |
|
18 # this list of conditions and the following disclaimer. |
|
19 # |
|
20 # * Redistributions in binary form must reproduce the above copyright |
|
21 # notice, this list of conditions and the following disclaimer in the |
|
22 # documentation and/or other materials provided with the distribution. |
|
23 # |
|
24 # * Neither the name of the University of Cambridge nor the names of its |
|
25 # contributors may be used to endorse or promote products derived from |
|
26 # this software without specific prior written permission. |
|
27 # |
|
28 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
|
29 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|
30 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
|
31 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
|
32 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
|
33 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
|
34 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
|
35 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
|
36 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
|
37 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
|
38 # POSSIBILITY OF SUCH DAMAGE. |
|
39 # ----------------------------------------------------------------------------- |
|
40 |
|
41 # This is a freestanding support program to generate a file containing |
|
42 # character tables. The tables are built according to the default C |
|
43 # locale. |
|
44 |
|
45 use strict; |
|
46 |
|
47 use File::Basename; |
|
48 use File::Spec; |
|
49 use File::Temp qw(tempfile); |
|
50 use Getopt::Long; |
|
51 |
|
52 sub readHeaderValues(); |
|
53 |
|
54 my %pcre_internal; |
|
55 |
|
56 if (scalar(@ARGV) < 1) { |
|
57 print STDERR "Usage: ", basename($0), " [--preprocessor=program] output-file\n"; |
|
58 exit 1; |
|
59 } |
|
60 |
|
61 my $outputFile; |
|
62 my $preprocessor; |
|
63 GetOptions('preprocessor=s' => \$preprocessor); |
|
64 if (not $preprocessor) { |
|
65 $preprocessor = "cpp"; |
|
66 } |
|
67 |
|
68 $outputFile = $ARGV[0]; |
|
69 die('Must specify output file.') unless defined($outputFile); |
|
70 |
|
71 readHeaderValues(); |
|
72 |
|
73 open(OUT, ">", $outputFile) or die "$!"; |
|
74 binmode(OUT); |
|
75 |
|
76 printf(OUT |
|
77 "/*************************************************\n" . |
|
78 "* Perl-Compatible Regular Expressions *\n" . |
|
79 "*************************************************/\n\n" . |
|
80 "/* This file is automatically written by the dftables auxiliary \n" . |
|
81 "program. If you edit it by hand, you might like to edit the Makefile to \n" . |
|
82 "prevent its ever being regenerated.\n\n"); |
|
83 printf(OUT |
|
84 "This file contains the default tables for characters with codes less than\n" . |
|
85 "128 (ASCII characters). These tables are used when no external tables are\n" . |
|
86 "passed to PCRE. */\n\n" . |
|
87 "const unsigned char jsc_pcre_default_tables[%d] = {\n\n" . |
|
88 "/* This table is a lower casing table. */\n\n", $pcre_internal{tables_length}); |
|
89 |
|
90 if ($pcre_internal{lcc_offset} != 0) { |
|
91 die "lcc_offset != 0"; |
|
92 } |
|
93 |
|
94 printf(OUT " "); |
|
95 for (my $i = 0; $i < 128; $i++) { |
|
96 if (($i & 7) == 0 && $i != 0) { |
|
97 printf(OUT "\n "); |
|
98 } |
|
99 printf(OUT "0x%02X", ord(lc(chr($i)))); |
|
100 if ($i != 127) { |
|
101 printf(OUT ", "); |
|
102 } |
|
103 } |
|
104 printf(OUT ",\n\n"); |
|
105 |
|
106 printf(OUT "/* This table is a case flipping table. */\n\n"); |
|
107 |
|
108 if ($pcre_internal{fcc_offset} != 128) { |
|
109 die "fcc_offset != 128"; |
|
110 } |
|
111 |
|
112 printf(OUT " "); |
|
113 for (my $i = 0; $i < 128; $i++) { |
|
114 if (($i & 7) == 0 && $i != 0) { |
|
115 printf(OUT "\n "); |
|
116 } |
|
117 my $c = chr($i); |
|
118 printf(OUT "0x%02X", $c =~ /[[:lower:]]/ ? ord(uc($c)) : ord(lc($c))); |
|
119 if ($i != 127) { |
|
120 printf(OUT ", "); |
|
121 } |
|
122 } |
|
123 printf(OUT ",\n\n"); |
|
124 |
|
125 printf(OUT |
|
126 "/* This table contains bit maps for various character classes.\n" . |
|
127 "Each map is 32 bytes long and the bits run from the least\n" . |
|
128 "significant end of each byte. The classes are: space, digit, word. */\n\n"); |
|
129 |
|
130 if ($pcre_internal{cbits_offset} != $pcre_internal{fcc_offset} + 128) { |
|
131 die "cbits_offset != fcc_offset + 128"; |
|
132 } |
|
133 |
|
134 my @cbit_table = (0) x $pcre_internal{cbit_length}; |
|
135 for (my $i = ord('0'); $i <= ord('9'); $i++) { |
|
136 $cbit_table[$pcre_internal{cbit_digit} + $i / 8] |= 1 << ($i & 7); |
|
137 } |
|
138 $cbit_table[$pcre_internal{cbit_word} + ord('_') / 8] |= 1 << (ord('_') & 7); |
|
139 for (my $i = 0; $i < 128; $i++) { |
|
140 my $c = chr($i); |
|
141 if ($c =~ /[[:alnum:]]/) { |
|
142 $cbit_table[$pcre_internal{cbit_word} + $i / 8] |= 1 << ($i & 7); |
|
143 } |
|
144 if ($c =~ /[[:space:]]/) { |
|
145 $cbit_table[$pcre_internal{cbit_space} + $i / 8] |= 1 << ($i & 7); |
|
146 } |
|
147 } |
|
148 |
|
149 printf(OUT " "); |
|
150 for (my $i = 0; $i < $pcre_internal{cbit_length}; $i++) { |
|
151 if (($i & 7) == 0 && $i != 0) { |
|
152 if (($i & 31) == 0) { |
|
153 printf(OUT "\n"); |
|
154 } |
|
155 printf(OUT "\n "); |
|
156 } |
|
157 printf(OUT "0x%02X", $cbit_table[$i]); |
|
158 if ($i != $pcre_internal{cbit_length} - 1) { |
|
159 printf(OUT ", "); |
|
160 } |
|
161 } |
|
162 printf(OUT ",\n\n"); |
|
163 |
|
164 printf(OUT |
|
165 "/* This table identifies various classes of character by individual bits:\n" . |
|
166 " 0x%02x white space character\n" . |
|
167 " 0x%02x hexadecimal digit\n" . |
|
168 " 0x%02x alphanumeric or '_'\n*/\n\n", |
|
169 $pcre_internal{ctype_space}, $pcre_internal{ctype_xdigit}, $pcre_internal{ctype_word}); |
|
170 |
|
171 if ($pcre_internal{ctypes_offset} != $pcre_internal{cbits_offset} + $pcre_internal{cbit_length}) { |
|
172 die "ctypes_offset != cbits_offset + cbit_length"; |
|
173 } |
|
174 |
|
175 printf(OUT " "); |
|
176 for (my $i = 0; $i < 128; $i++) { |
|
177 my $x = 0; |
|
178 my $c = chr($i); |
|
179 if ($c =~ /[[:space:]]/) { |
|
180 $x += $pcre_internal{ctype_space}; |
|
181 } |
|
182 if ($c =~ /[[:xdigit:]]/) { |
|
183 $x += $pcre_internal{ctype_xdigit}; |
|
184 } |
|
185 if ($c =~ /[[:alnum:]_]/) { |
|
186 $x += $pcre_internal{ctype_word}; |
|
187 } |
|
188 printf(OUT "0x%02X", $x); |
|
189 if ($i != 127) { |
|
190 printf(OUT ", "); |
|
191 } else { |
|
192 printf(OUT "};"); |
|
193 } |
|
194 if (($i & 7) == 7) { |
|
195 printf(OUT " /* "); |
|
196 my $d = chr($i - 7); |
|
197 if ($d =~ /[[:print:]]/) { |
|
198 printf(OUT " %c -", $i - 7); |
|
199 } else { |
|
200 printf(OUT "%3d-", $i - 7); |
|
201 } |
|
202 if ($c =~ m/[[:print:]]/) { |
|
203 printf(OUT " %c ", $i); |
|
204 } else { |
|
205 printf(OUT "%3d", $i); |
|
206 } |
|
207 printf(OUT " */\n"); |
|
208 if ($i != 127) { |
|
209 printf(OUT " "); |
|
210 } |
|
211 } |
|
212 } |
|
213 |
|
214 if ($pcre_internal{tables_length} != $pcre_internal{ctypes_offset} + 128) { |
|
215 die "tables_length != ctypes_offset + 128"; |
|
216 } |
|
217 |
|
218 printf(OUT "\n\n/* End of chartables.c */\n"); |
|
219 |
|
220 close(OUT); |
|
221 |
|
222 exit 0; |
|
223 |
|
224 sub readHeaderValues() |
|
225 { |
|
226 my @variables = qw( |
|
227 cbit_digit |
|
228 cbit_length |
|
229 cbit_space |
|
230 cbit_word |
|
231 cbits_offset |
|
232 ctype_space |
|
233 ctype_word |
|
234 ctype_xdigit |
|
235 ctypes_offset |
|
236 fcc_offset |
|
237 lcc_offset |
|
238 tables_length |
|
239 ); |
|
240 |
|
241 local $/ = undef; |
|
242 |
|
243 my $headerPath = File::Spec->catfile(dirname($0), "pcre_internal.h"); |
|
244 |
|
245 my ($fh, $tempFile) = tempfile( |
|
246 basename($0) . "-XXXXXXXX", |
|
247 DIR => File::Spec->tmpdir(), |
|
248 SUFFIX => ".in", |
|
249 UNLINK => 0, |
|
250 ); |
|
251 |
|
252 print $fh "#define DFTABLES\n\n"; |
|
253 |
|
254 open(HEADER, "<", $headerPath) or die "$!"; |
|
255 print $fh <HEADER>; |
|
256 close(HEADER); |
|
257 |
|
258 print $fh "\n\n"; |
|
259 |
|
260 for my $v (@variables) { |
|
261 print $fh "\$pcre_internal{\"$v\"} = $v;\n"; |
|
262 } |
|
263 |
|
264 close($fh); |
|
265 |
|
266 open(CPP, "$preprocessor \"$tempFile\" |") or die "$!"; |
|
267 my $content = <CPP>; |
|
268 close(CPP); |
|
269 |
|
270 eval $content; |
|
271 die "$@" if $@; |
|
272 unlink $tempFile; |
|
273 } |