JavaScriptCore/pcre/dftables
changeset 0 4f2f89ce4247
equal deleted inserted replaced
-1:000000000000 0:4f2f89ce4247
       
     1 #!/usr/bin/perl -w
       
     2 #
       
     3 # This is JavaScriptCore's variant of the PCRE library. While this library
       
     4 # started out as a copy of PCRE, many of the features of PCRE have been
       
     5 # removed. This library now supports only the regular expression features
       
     6 # required by the JavaScript language specification, and has only the functions
       
     7 # needed by JavaScriptCore and the rest of WebKit.
       
     8 # 
       
     9 #                  Originally written by Philip Hazel
       
    10 #            Copyright (c) 1997-2006 University of Cambridge
       
    11 #  Copyright (C) 2002, 2004, 2006, 2007, 2008, 2009 Apple Inc.  All rights reserved.
       
    12 # 
       
    13 # -----------------------------------------------------------------------------
       
    14 # Redistribution and use in source and binary forms, with or without
       
    15 # modification, are permitted provided that the following conditions are met:
       
    16 # 
       
    17 #     * Redistributions of source code must retain the above copyright notice,
       
    18 #       this list of conditions and the following disclaimer.
       
    19 # 
       
    20 #     * Redistributions in binary form must reproduce the above copyright
       
    21 #       notice, this list of conditions and the following disclaimer in the
       
    22 #       documentation and/or other materials provided with the distribution.
       
    23 # 
       
    24 #     * Neither the name of the University of Cambridge nor the names of its
       
    25 #       contributors may be used to endorse or promote products derived from
       
    26 #       this software without specific prior written permission.
       
    27 # 
       
    28 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
       
    29 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
       
    30 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
       
    31 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
       
    32 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
       
    33 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
       
    34 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
       
    35 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
       
    36 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
       
    37 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
       
    38 # POSSIBILITY OF SUCH DAMAGE.
       
    39 # -----------------------------------------------------------------------------
       
    40 
       
    41 # This is a freestanding support program to generate a file containing
       
    42 # character tables. The tables are built according to the default C
       
    43 # locale.
       
    44 
       
    45 use strict;
       
    46 
       
    47 use File::Basename;
       
    48 use File::Spec;
       
    49 use File::Temp qw(tempfile);
       
    50 use Getopt::Long;
       
    51 
       
    52 sub readHeaderValues();
       
    53 
       
    54 my %pcre_internal;
       
    55 
       
    56 if (scalar(@ARGV) < 1) {
       
    57     print STDERR "Usage: ", basename($0), " [--preprocessor=program] output-file\n";
       
    58     exit 1;
       
    59 }
       
    60 
       
    61 my $outputFile;
       
    62 my $preprocessor;
       
    63 GetOptions('preprocessor=s' => \$preprocessor);
       
    64 if (not $preprocessor) {
       
    65     $preprocessor = "cpp";
       
    66 }
       
    67 
       
    68 $outputFile = $ARGV[0];
       
    69 die('Must specify output file.') unless defined($outputFile);
       
    70 
       
    71 readHeaderValues();
       
    72 
       
    73 open(OUT, ">", $outputFile) or die "$!";
       
    74 binmode(OUT);
       
    75 
       
    76 printf(OUT
       
    77     "/*************************************************\n" .
       
    78     "*      Perl-Compatible Regular Expressions       *\n" .
       
    79     "*************************************************/\n\n" .
       
    80     "/* This file is automatically written by the dftables auxiliary \n" .
       
    81     "program. If you edit it by hand, you might like to edit the Makefile to \n" .
       
    82     "prevent its ever being regenerated.\n\n");
       
    83 printf(OUT
       
    84     "This file contains the default tables for characters with codes less than\n" .
       
    85     "128 (ASCII characters). These tables are used when no external tables are\n" .
       
    86     "passed to PCRE. */\n\n" .
       
    87     "const unsigned char jsc_pcre_default_tables[%d] = {\n\n" .
       
    88     "/* This table is a lower casing table. */\n\n", $pcre_internal{tables_length});
       
    89 
       
    90 if ($pcre_internal{lcc_offset} != 0) {
       
    91     die "lcc_offset != 0";
       
    92 }
       
    93 
       
    94 printf(OUT "  ");
       
    95 for (my $i = 0; $i < 128; $i++) {
       
    96     if (($i & 7) == 0 && $i != 0) {
       
    97         printf(OUT "\n  ");
       
    98     }
       
    99     printf(OUT "0x%02X", ord(lc(chr($i))));
       
   100     if ($i != 127) {
       
   101         printf(OUT ", ");
       
   102     }
       
   103 }
       
   104 printf(OUT ",\n\n");
       
   105 
       
   106 printf(OUT "/* This table is a case flipping table. */\n\n");
       
   107 
       
   108 if ($pcre_internal{fcc_offset} != 128) {
       
   109   die "fcc_offset != 128";
       
   110 }
       
   111 
       
   112 printf(OUT "  ");
       
   113 for (my $i = 0; $i < 128; $i++) {
       
   114     if (($i & 7) == 0 && $i != 0) {
       
   115         printf(OUT "\n  ");
       
   116     }
       
   117     my $c = chr($i);
       
   118     printf(OUT "0x%02X", $c =~ /[[:lower:]]/ ? ord(uc($c)) : ord(lc($c)));
       
   119     if ($i != 127) {
       
   120         printf(OUT ", ");
       
   121     }
       
   122 }
       
   123 printf(OUT ",\n\n");
       
   124 
       
   125 printf(OUT
       
   126     "/* This table contains bit maps for various character classes.\n" .
       
   127     "Each map is 32 bytes long and the bits run from the least\n" .
       
   128     "significant end of each byte. The classes are: space, digit, word. */\n\n");
       
   129 
       
   130 if ($pcre_internal{cbits_offset} != $pcre_internal{fcc_offset} + 128) {
       
   131     die "cbits_offset != fcc_offset + 128";
       
   132 }
       
   133 
       
   134 my @cbit_table = (0) x $pcre_internal{cbit_length};
       
   135 for (my $i = ord('0'); $i <= ord('9'); $i++) {
       
   136     $cbit_table[$pcre_internal{cbit_digit} + $i / 8] |= 1 << ($i & 7);
       
   137 }
       
   138 $cbit_table[$pcre_internal{cbit_word} + ord('_') / 8] |= 1 << (ord('_') & 7);
       
   139 for (my $i = 0; $i < 128; $i++) {
       
   140     my $c = chr($i);
       
   141     if ($c =~ /[[:alnum:]]/) {
       
   142         $cbit_table[$pcre_internal{cbit_word} + $i / 8] |= 1 << ($i & 7);
       
   143     }
       
   144     if ($c =~ /[[:space:]]/) {
       
   145         $cbit_table[$pcre_internal{cbit_space} + $i / 8] |= 1 << ($i & 7);
       
   146     }
       
   147 }
       
   148 
       
   149 printf(OUT "  ");
       
   150 for (my $i = 0; $i < $pcre_internal{cbit_length}; $i++) {
       
   151     if (($i & 7) == 0 && $i != 0) {
       
   152         if (($i & 31) == 0) {
       
   153             printf(OUT "\n");
       
   154         }
       
   155         printf(OUT "\n  ");
       
   156     }
       
   157     printf(OUT "0x%02X", $cbit_table[$i]);
       
   158     if ($i != $pcre_internal{cbit_length} - 1) {
       
   159         printf(OUT ", ");
       
   160     }
       
   161 }
       
   162 printf(OUT ",\n\n");
       
   163 
       
   164 printf(OUT
       
   165     "/* This table identifies various classes of character by individual bits:\n" .
       
   166     "  0x%02x   white space character\n" .
       
   167     "  0x%02x   hexadecimal digit\n" .
       
   168     "  0x%02x   alphanumeric or '_'\n*/\n\n",
       
   169     $pcre_internal{ctype_space}, $pcre_internal{ctype_xdigit}, $pcre_internal{ctype_word});
       
   170 
       
   171 if ($pcre_internal{ctypes_offset} != $pcre_internal{cbits_offset} + $pcre_internal{cbit_length}) {
       
   172     die "ctypes_offset != cbits_offset + cbit_length";
       
   173 }
       
   174 
       
   175 printf(OUT "  ");
       
   176 for (my $i = 0; $i < 128; $i++) {
       
   177     my $x = 0;
       
   178     my $c = chr($i);
       
   179     if ($c =~ /[[:space:]]/) {
       
   180         $x += $pcre_internal{ctype_space};
       
   181     }
       
   182     if ($c =~ /[[:xdigit:]]/) {
       
   183         $x += $pcre_internal{ctype_xdigit};
       
   184     }
       
   185     if ($c =~ /[[:alnum:]_]/) {
       
   186         $x += $pcre_internal{ctype_word};
       
   187     }
       
   188     printf(OUT "0x%02X", $x);
       
   189     if ($i != 127) {
       
   190         printf(OUT ", ");
       
   191     } else {
       
   192         printf(OUT "};");
       
   193     }
       
   194     if (($i & 7) == 7) {
       
   195         printf(OUT " /* ");
       
   196         my $d = chr($i - 7);
       
   197         if ($d =~ /[[:print:]]/) {
       
   198             printf(OUT " %c -", $i - 7);
       
   199         } else {
       
   200             printf(OUT "%3d-", $i - 7);
       
   201         }
       
   202         if ($c =~ m/[[:print:]]/) {
       
   203             printf(OUT " %c ", $i);
       
   204         } else {
       
   205             printf(OUT "%3d", $i);
       
   206         }
       
   207         printf(OUT " */\n");
       
   208         if ($i != 127) {
       
   209             printf(OUT "  ");
       
   210         }
       
   211     }
       
   212 }
       
   213 
       
   214 if ($pcre_internal{tables_length} != $pcre_internal{ctypes_offset} + 128) {
       
   215     die "tables_length != ctypes_offset + 128";
       
   216 }
       
   217 
       
   218 printf(OUT "\n\n/* End of chartables.c */\n");
       
   219 
       
   220 close(OUT);
       
   221 
       
   222 exit 0;
       
   223 
       
   224 sub readHeaderValues()
       
   225 {
       
   226     my @variables = qw(
       
   227         cbit_digit
       
   228         cbit_length
       
   229         cbit_space
       
   230         cbit_word
       
   231         cbits_offset
       
   232         ctype_space
       
   233         ctype_word
       
   234         ctype_xdigit
       
   235         ctypes_offset
       
   236         fcc_offset
       
   237         lcc_offset
       
   238         tables_length
       
   239     );
       
   240 
       
   241     local $/ = undef;
       
   242 
       
   243     my $headerPath = File::Spec->catfile(dirname($0), "pcre_internal.h");
       
   244  
       
   245     my ($fh, $tempFile) = tempfile(
       
   246         basename($0) . "-XXXXXXXX",
       
   247         DIR => File::Spec->tmpdir(),
       
   248         SUFFIX => ".in",
       
   249         UNLINK => 0,
       
   250     );
       
   251 
       
   252     print $fh "#define DFTABLES\n\n";
       
   253 
       
   254     open(HEADER, "<", $headerPath) or die "$!";
       
   255     print $fh <HEADER>;
       
   256     close(HEADER);
       
   257 
       
   258     print $fh "\n\n";
       
   259 
       
   260     for my $v (@variables) {
       
   261         print $fh "\$pcre_internal{\"$v\"} = $v;\n";
       
   262     }
       
   263 
       
   264     close($fh);
       
   265 
       
   266     open(CPP, "$preprocessor \"$tempFile\" |") or die "$!";
       
   267     my $content = <CPP>;
       
   268     close(CPP);
       
   269     
       
   270     eval $content;
       
   271     die "$@" if $@;
       
   272     unlink $tempFile;
       
   273 }