|
1 /* |
|
2 * Copyright (C) 1999-2001, 2004 Harri Porten (porten@kde.org) |
|
3 * Copyright (c) 2007, 2008 Apple Inc. All rights reserved. |
|
4 * Copyright (C) 2009 Torch Mobile, Inc. |
|
5 * |
|
6 * This library is free software; you can redistribute it and/or |
|
7 * modify it under the terms of the GNU Lesser General Public |
|
8 * License as published by the Free Software Foundation; either |
|
9 * version 2 of the License, or (at your option) any later version. |
|
10 * |
|
11 * This library is distributed in the hope that it will be useful, |
|
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
14 * Lesser General Public License for more details. |
|
15 * |
|
16 * You should have received a copy of the GNU Lesser General Public |
|
17 * License along with this library; if not, write to the Free Software |
|
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
19 * |
|
20 */ |
|
21 |
|
22 #include "config.h" |
|
23 #include "RegExp.h" |
|
24 #include "Lexer.h" |
|
25 #include <stdio.h> |
|
26 #include <stdlib.h> |
|
27 #include <string.h> |
|
28 #include <wtf/Assertions.h> |
|
29 #include <wtf/OwnArrayPtr.h> |
|
30 |
|
31 |
|
32 #if ENABLE(YARR) |
|
33 |
|
34 #include "yarr/RegexCompiler.h" |
|
35 #if ENABLE(YARR_JIT) |
|
36 #include "yarr/RegexJIT.h" |
|
37 #else |
|
38 #include "yarr/RegexInterpreter.h" |
|
39 #endif |
|
40 |
|
41 #else |
|
42 |
|
43 #include <pcre/pcre.h> |
|
44 |
|
45 #endif |
|
46 |
|
47 namespace JSC { |
|
48 |
|
49 inline RegExp::RegExp(JSGlobalData* globalData, const UString& pattern, const UString& flags) |
|
50 : m_pattern(pattern) |
|
51 , m_flagBits(0) |
|
52 , m_constructionError(0) |
|
53 , m_numSubpatterns(0) |
|
54 , m_lastMatchStart(-1) |
|
55 { |
|
56 // NOTE: The global flag is handled on a case-by-case basis by functions like |
|
57 // String::match and RegExpObject::match. |
|
58 if (!flags.isNull()) { |
|
59 if (flags.find('g') != UString::NotFound) |
|
60 m_flagBits |= Global; |
|
61 if (flags.find('i') != UString::NotFound) |
|
62 m_flagBits |= IgnoreCase; |
|
63 if (flags.find('m') != UString::NotFound) |
|
64 m_flagBits |= Multiline; |
|
65 } |
|
66 compile(globalData); |
|
67 } |
|
68 |
|
69 #if !ENABLE(YARR) |
|
70 RegExp::~RegExp() |
|
71 { |
|
72 jsRegExpFree(m_regExp); |
|
73 } |
|
74 #endif |
|
75 |
|
76 PassRefPtr<RegExp> RegExp::create(JSGlobalData* globalData, const UString& pattern, const UString& flags) |
|
77 { |
|
78 return adoptRef(new RegExp(globalData, pattern, flags)); |
|
79 } |
|
80 |
|
81 #if ENABLE(YARR) |
|
82 |
|
83 void RegExp::compile(JSGlobalData* globalData) |
|
84 { |
|
85 #if ENABLE(YARR_JIT) |
|
86 Yarr::jitCompileRegex(globalData, m_regExpJITCode, m_pattern, m_numSubpatterns, m_constructionError, ignoreCase(), multiline()); |
|
87 #else |
|
88 UNUSED_PARAM(globalData); |
|
89 m_regExpBytecode.set(Yarr::byteCompileRegex(m_pattern, m_numSubpatterns, m_constructionError, ignoreCase(), multiline())); |
|
90 #endif |
|
91 } |
|
92 |
|
93 int RegExp::match(const UString& s, int startOffset, Vector<int, 32>* ovector) |
|
94 { |
|
95 if (startOffset < 0) |
|
96 startOffset = 0; |
|
97 if (ovector) |
|
98 ovector->resize(0); |
|
99 |
|
100 if (static_cast<unsigned>(startOffset) > s.size() || s.isNull()) { |
|
101 m_lastMatchString = UString(); |
|
102 m_lastMatchStart = -1; |
|
103 m_lastOVector.shrink(0); |
|
104 return -1; |
|
105 } |
|
106 |
|
107 // Perform check to see if this match call is the same as the last match invocation |
|
108 // and if it is return the prior result. |
|
109 if ((startOffset == m_lastMatchStart) && (s.rep() == m_lastMatchString.rep())) { |
|
110 if (ovector) |
|
111 *ovector = m_lastOVector; |
|
112 |
|
113 if (m_lastOVector.isEmpty()) |
|
114 return -1; |
|
115 |
|
116 return m_lastOVector.at(0); |
|
117 } |
|
118 |
|
119 #if ENABLE(YARR_JIT) |
|
120 if (!!m_regExpJITCode) { |
|
121 #else |
|
122 if (m_regExpBytecode) { |
|
123 #endif |
|
124 int offsetVectorSize = (m_numSubpatterns + 1) * 3; // FIXME: should be 2 - but adding temporary fallback to pcre. |
|
125 int* offsetVector; |
|
126 Vector<int, 32> nonReturnedOvector; |
|
127 if (ovector) { |
|
128 ovector->resize(offsetVectorSize); |
|
129 offsetVector = ovector->data(); |
|
130 } else { |
|
131 nonReturnedOvector.resize(offsetVectorSize); |
|
132 offsetVector = nonReturnedOvector.data(); |
|
133 } |
|
134 |
|
135 ASSERT(offsetVector); |
|
136 for (int j = 0; j < offsetVectorSize; ++j) |
|
137 offsetVector[j] = -1; |
|
138 |
|
139 #if ENABLE(YARR_JIT) |
|
140 int result = Yarr::executeRegex(m_regExpJITCode, s.data(), startOffset, s.size(), offsetVector, offsetVectorSize); |
|
141 #else |
|
142 int result = Yarr::interpretRegex(m_regExpBytecode.get(), s.data(), startOffset, s.size(), offsetVector); |
|
143 #endif |
|
144 |
|
145 if (result < 0) { |
|
146 #ifndef NDEBUG |
|
147 // TODO: define up a symbol, rather than magic -1 |
|
148 if (result != -1) |
|
149 fprintf(stderr, "jsRegExpExecute failed with result %d\n", result); |
|
150 #endif |
|
151 if (ovector) |
|
152 ovector->clear(); |
|
153 } |
|
154 |
|
155 m_lastMatchString = s; |
|
156 m_lastMatchStart = startOffset; |
|
157 |
|
158 if (ovector) |
|
159 m_lastOVector = *ovector; |
|
160 else |
|
161 m_lastOVector = nonReturnedOvector; |
|
162 |
|
163 return result; |
|
164 } |
|
165 |
|
166 m_lastMatchString = UString(); |
|
167 m_lastMatchStart = -1; |
|
168 m_lastOVector.shrink(0); |
|
169 |
|
170 return -1; |
|
171 } |
|
172 |
|
173 #else |
|
174 |
|
175 void RegExp::compile(JSGlobalData*) |
|
176 { |
|
177 m_regExp = 0; |
|
178 JSRegExpIgnoreCaseOption ignoreCaseOption = ignoreCase() ? JSRegExpIgnoreCase : JSRegExpDoNotIgnoreCase; |
|
179 JSRegExpMultilineOption multilineOption = multiline() ? JSRegExpMultiline : JSRegExpSingleLine; |
|
180 m_regExp = jsRegExpCompile(reinterpret_cast<const UChar*>(m_pattern.data()), m_pattern.size(), ignoreCaseOption, multilineOption, &m_numSubpatterns, &m_constructionError); |
|
181 } |
|
182 |
|
183 int RegExp::match(const UString& s, int startOffset, Vector<int, 32>* ovector) |
|
184 { |
|
185 if (startOffset < 0) |
|
186 startOffset = 0; |
|
187 if (ovector) |
|
188 ovector->clear(); |
|
189 |
|
190 if (static_cast<unsigned>(startOffset) > s.size() || s.isNull()) |
|
191 return -1; |
|
192 |
|
193 if (m_regExp) { |
|
194 // Set up the offset vector for the result. |
|
195 // First 2/3 used for result, the last third used by PCRE. |
|
196 int* offsetVector; |
|
197 int offsetVectorSize; |
|
198 int fixedSizeOffsetVector[3]; |
|
199 if (!ovector) { |
|
200 offsetVectorSize = 3; |
|
201 offsetVector = fixedSizeOffsetVector; |
|
202 } else { |
|
203 offsetVectorSize = (m_numSubpatterns + 1) * 3; |
|
204 ovector->resize(offsetVectorSize); |
|
205 offsetVector = ovector->data(); |
|
206 } |
|
207 |
|
208 int numMatches = jsRegExpExecute(m_regExp, reinterpret_cast<const UChar*>(s.data()), s.size(), startOffset, offsetVector, offsetVectorSize); |
|
209 |
|
210 if (numMatches < 0) { |
|
211 #ifndef NDEBUG |
|
212 if (numMatches != JSRegExpErrorNoMatch) |
|
213 fprintf(stderr, "jsRegExpExecute failed with result %d\n", numMatches); |
|
214 #endif |
|
215 if (ovector) |
|
216 ovector->clear(); |
|
217 return -1; |
|
218 } |
|
219 |
|
220 return offsetVector[0]; |
|
221 } |
|
222 |
|
223 return -1; |
|
224 } |
|
225 |
|
226 #endif |
|
227 |
|
228 } // namespace JSC |