|
1 /* This is JavaScriptCore's variant of the PCRE library. While this library |
|
2 started out as a copy of PCRE, many of the features of PCRE have been |
|
3 removed. This library now supports only the regular expression features |
|
4 required by the JavaScript language specification, and has only the functions |
|
5 needed by JavaScriptCore and the rest of WebKit. |
|
6 |
|
7 Originally written by Philip Hazel |
|
8 Copyright (c) 1997-2006 University of Cambridge |
|
9 Copyright (C) 2002, 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. |
|
10 |
|
11 ----------------------------------------------------------------------------- |
|
12 Redistribution and use in source and binary forms, with or without |
|
13 modification, are permitted provided that the following conditions are met: |
|
14 |
|
15 * Redistributions of source code must retain the above copyright notice, |
|
16 this list of conditions and the following disclaimer. |
|
17 |
|
18 * Redistributions in binary form must reproduce the above copyright |
|
19 notice, this list of conditions and the following disclaimer in the |
|
20 documentation and/or other materials provided with the distribution. |
|
21 |
|
22 * Neither the name of the University of Cambridge nor the names of its |
|
23 contributors may be used to endorse or promote products derived from |
|
24 this software without specific prior written permission. |
|
25 |
|
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
|
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
|
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
|
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
|
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
|
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
|
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
|
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
|
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
|
36 POSSIBILITY OF SUCH DAMAGE. |
|
37 ----------------------------------------------------------------------------- |
|
38 */ |
|
39 |
|
40 /* This module contains an internal function that is used to match an extended |
|
41 class (one that contains characters whose values are > 255). */ |
|
42 |
|
43 #include "config.h" |
|
44 #include "pcre_internal.h" |
|
45 |
|
46 /************************************************* |
|
47 * Match character against an XCLASS * |
|
48 *************************************************/ |
|
49 |
|
50 /* This function is called to match a character against an extended class that |
|
51 might contain values > 255. |
|
52 |
|
53 Arguments: |
|
54 c the character |
|
55 data points to the flag byte of the XCLASS data |
|
56 |
|
57 Returns: true if character matches, else false |
|
58 */ |
|
59 |
|
60 /* Get the next UTF-8 character, advancing the pointer. This is called when we |
|
61 know we are in UTF-8 mode. */ |
|
62 |
|
63 static inline void getUTF8CharAndAdvancePointer(int& c, const unsigned char*& subjectPtr) |
|
64 { |
|
65 c = *subjectPtr++; |
|
66 if ((c & 0xc0) == 0xc0) { |
|
67 int gcaa = jsc_pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */ |
|
68 int gcss = 6 * gcaa; |
|
69 c = (c & jsc_pcre_utf8_table3[gcaa]) << gcss; |
|
70 while (gcaa-- > 0) { |
|
71 gcss -= 6; |
|
72 c |= (*subjectPtr++ & 0x3f) << gcss; |
|
73 } |
|
74 } |
|
75 } |
|
76 |
|
77 bool jsc_pcre_xclass(int c, const unsigned char* data) |
|
78 { |
|
79 bool negated = (*data & XCL_NOT); |
|
80 |
|
81 /* Character values < 256 are matched against a bitmap, if one is present. If |
|
82 not, we still carry on, because there may be ranges that start below 256 in the |
|
83 additional data. */ |
|
84 |
|
85 if (c < 256) { |
|
86 if ((*data & XCL_MAP) != 0 && (data[1 + c/8] & (1 << (c&7))) != 0) |
|
87 return !negated; /* char found */ |
|
88 } |
|
89 |
|
90 /* First skip the bit map if present. Then match against the list of Unicode |
|
91 properties or large chars or ranges that end with a large char. We won't ever |
|
92 encounter XCL_PROP or XCL_NOTPROP when UCP support is not compiled. */ |
|
93 |
|
94 if ((*data++ & XCL_MAP) != 0) |
|
95 data += 32; |
|
96 |
|
97 int t; |
|
98 while ((t = *data++) != XCL_END) { |
|
99 if (t == XCL_SINGLE) { |
|
100 int x; |
|
101 getUTF8CharAndAdvancePointer(x, data); |
|
102 if (c == x) |
|
103 return !negated; |
|
104 } |
|
105 else if (t == XCL_RANGE) { |
|
106 int x, y; |
|
107 getUTF8CharAndAdvancePointer(x, data); |
|
108 getUTF8CharAndAdvancePointer(y, data); |
|
109 if (c >= x && c <= y) |
|
110 return !negated; |
|
111 } |
|
112 } |
|
113 |
|
114 return negated; /* char did not match */ |
|
115 } |