|
1 /* |
|
2 * Copyright (C) 2009 Apple Inc. All rights reserved. |
|
3 * |
|
4 * Redistribution and use in source and binary forms, with or without |
|
5 * modification, are permitted provided that the following conditions |
|
6 * are met: |
|
7 * 1. Redistributions of source code must retain the above copyright |
|
8 * notice, this list of conditions and the following disclaimer. |
|
9 * 2. Redistributions in binary form must reproduce the above copyright |
|
10 * notice, this list of conditions and the following disclaimer in the |
|
11 * documentation and/or other materials provided with the distribution. |
|
12 * |
|
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
|
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
|
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
|
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
|
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
|
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
|
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
|
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
|
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
|
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
24 */ |
|
25 |
|
26 #include "config.h" |
|
27 #include "RegexInterpreter.h" |
|
28 |
|
29 #include "RegexCompiler.h" |
|
30 #include "RegexPattern.h" |
|
31 |
|
32 #ifndef NDEBUG |
|
33 #include <stdio.h> |
|
34 #endif |
|
35 |
|
36 #if ENABLE(YARR) |
|
37 |
|
38 using namespace WTF; |
|
39 |
|
40 namespace JSC { namespace Yarr { |
|
41 |
|
42 class Interpreter { |
|
43 public: |
|
44 struct ParenthesesDisjunctionContext; |
|
45 |
|
46 struct BackTrackInfoPatternCharacter { |
|
47 uintptr_t matchAmount; |
|
48 }; |
|
49 struct BackTrackInfoCharacterClass { |
|
50 uintptr_t matchAmount; |
|
51 }; |
|
52 struct BackTrackInfoBackReference { |
|
53 uintptr_t begin; // Not really needed for greedy quantifiers. |
|
54 uintptr_t matchAmount; // Not really needed for fixed quantifiers. |
|
55 }; |
|
56 struct BackTrackInfoAlternative { |
|
57 uintptr_t offset; |
|
58 }; |
|
59 struct BackTrackInfoParentheticalAssertion { |
|
60 uintptr_t begin; |
|
61 }; |
|
62 struct BackTrackInfoParenthesesOnce { |
|
63 uintptr_t inParentheses; |
|
64 }; |
|
65 struct BackTrackInfoParentheses { |
|
66 uintptr_t matchAmount; |
|
67 ParenthesesDisjunctionContext* lastContext; |
|
68 uintptr_t prevBegin; |
|
69 uintptr_t prevEnd; |
|
70 }; |
|
71 |
|
72 static inline void appendParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack, ParenthesesDisjunctionContext* context) |
|
73 { |
|
74 context->next = backTrack->lastContext; |
|
75 backTrack->lastContext = context; |
|
76 ++backTrack->matchAmount; |
|
77 } |
|
78 |
|
79 static inline void popParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack) |
|
80 { |
|
81 ASSERT(backTrack->matchAmount); |
|
82 ASSERT(backTrack->lastContext); |
|
83 backTrack->lastContext = backTrack->lastContext->next; |
|
84 --backTrack->matchAmount; |
|
85 } |
|
86 |
|
87 struct DisjunctionContext |
|
88 { |
|
89 DisjunctionContext() |
|
90 : term(0) |
|
91 { |
|
92 } |
|
93 |
|
94 void* operator new(size_t, void* where) |
|
95 { |
|
96 return where; |
|
97 } |
|
98 |
|
99 int term; |
|
100 unsigned matchBegin; |
|
101 unsigned matchEnd; |
|
102 uintptr_t frame[1]; |
|
103 }; |
|
104 |
|
105 DisjunctionContext* allocDisjunctionContext(ByteDisjunction* disjunction) |
|
106 { |
|
107 return new(malloc(sizeof(DisjunctionContext) + (disjunction->m_frameSize - 1) * sizeof(uintptr_t))) DisjunctionContext(); |
|
108 } |
|
109 |
|
110 void freeDisjunctionContext(DisjunctionContext* context) |
|
111 { |
|
112 free(context); |
|
113 } |
|
114 |
|
115 struct ParenthesesDisjunctionContext |
|
116 { |
|
117 ParenthesesDisjunctionContext(int* output, ByteTerm& term) |
|
118 : next(0) |
|
119 { |
|
120 unsigned firstSubpatternId = term.atom.subpatternId; |
|
121 unsigned numNestedSubpatterns = term.atom.parenthesesDisjunction->m_numSubpatterns; |
|
122 |
|
123 for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i) { |
|
124 subpatternBackup[i] = output[(firstSubpatternId << 1) + i]; |
|
125 output[(firstSubpatternId << 1) + i] = -1; |
|
126 } |
|
127 |
|
128 new(getDisjunctionContext(term)) DisjunctionContext(); |
|
129 } |
|
130 |
|
131 void* operator new(size_t, void* where) |
|
132 { |
|
133 return where; |
|
134 } |
|
135 |
|
136 void restoreOutput(int* output, unsigned firstSubpatternId, unsigned numNestedSubpatterns) |
|
137 { |
|
138 for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i) |
|
139 output[(firstSubpatternId << 1) + i] = subpatternBackup[i]; |
|
140 } |
|
141 |
|
142 DisjunctionContext* getDisjunctionContext(ByteTerm& term) |
|
143 { |
|
144 return reinterpret_cast<DisjunctionContext*>(&(subpatternBackup[term.atom.parenthesesDisjunction->m_numSubpatterns << 1])); |
|
145 } |
|
146 |
|
147 ParenthesesDisjunctionContext* next; |
|
148 int subpatternBackup[1]; |
|
149 }; |
|
150 |
|
151 ParenthesesDisjunctionContext* allocParenthesesDisjunctionContext(ByteDisjunction* disjunction, int* output, ByteTerm& term) |
|
152 { |
|
153 return new(malloc(sizeof(ParenthesesDisjunctionContext) + (((term.atom.parenthesesDisjunction->m_numSubpatterns << 1) - 1) * sizeof(int)) + sizeof(DisjunctionContext) + (disjunction->m_frameSize - 1) * sizeof(uintptr_t))) ParenthesesDisjunctionContext(output, term); |
|
154 } |
|
155 |
|
156 void freeParenthesesDisjunctionContext(ParenthesesDisjunctionContext* context) |
|
157 { |
|
158 free(context); |
|
159 } |
|
160 |
|
161 class InputStream { |
|
162 public: |
|
163 InputStream(const UChar* input, unsigned start, unsigned length) |
|
164 : input(input) |
|
165 , pos(start) |
|
166 , length(length) |
|
167 { |
|
168 } |
|
169 |
|
170 void next() |
|
171 { |
|
172 ++pos; |
|
173 } |
|
174 |
|
175 void rewind(unsigned amount) |
|
176 { |
|
177 ASSERT(pos >= amount); |
|
178 pos -= amount; |
|
179 } |
|
180 |
|
181 int read() |
|
182 { |
|
183 ASSERT(pos < length); |
|
184 if (pos < length) |
|
185 return input[pos]; |
|
186 return -1; |
|
187 } |
|
188 |
|
189 int readChecked(int position) |
|
190 { |
|
191 ASSERT(position < 0); |
|
192 ASSERT((unsigned)-position <= pos); |
|
193 unsigned p = pos + position; |
|
194 ASSERT(p < length); |
|
195 return input[p]; |
|
196 } |
|
197 |
|
198 int reread(unsigned from) |
|
199 { |
|
200 ASSERT(from < length); |
|
201 return input[from]; |
|
202 } |
|
203 |
|
204 int prev() |
|
205 { |
|
206 ASSERT(!(pos > length)); |
|
207 if (pos && length) |
|
208 return input[pos - 1]; |
|
209 return -1; |
|
210 } |
|
211 |
|
212 unsigned getPos() |
|
213 { |
|
214 return pos; |
|
215 } |
|
216 |
|
217 void setPos(unsigned p) |
|
218 { |
|
219 pos = p; |
|
220 } |
|
221 |
|
222 bool atStart() |
|
223 { |
|
224 return pos == 0; |
|
225 } |
|
226 |
|
227 bool atEnd() |
|
228 { |
|
229 return pos == length; |
|
230 } |
|
231 |
|
232 bool checkInput(int count) |
|
233 { |
|
234 if ((pos + count) <= length) { |
|
235 pos += count; |
|
236 return true; |
|
237 } else |
|
238 return false; |
|
239 } |
|
240 |
|
241 void uncheckInput(int count) |
|
242 { |
|
243 pos -= count; |
|
244 } |
|
245 |
|
246 bool atStart(int position) |
|
247 { |
|
248 return (pos + position) == 0; |
|
249 } |
|
250 |
|
251 bool atEnd(int position) |
|
252 { |
|
253 return (pos + position) == length; |
|
254 } |
|
255 |
|
256 private: |
|
257 const UChar* input; |
|
258 unsigned pos; |
|
259 unsigned length; |
|
260 }; |
|
261 |
|
262 bool testCharacterClass(CharacterClass* characterClass, int ch) |
|
263 { |
|
264 if (ch & 0xFF80) { |
|
265 for (unsigned i = 0; i < characterClass->m_matchesUnicode.size(); ++i) |
|
266 if (ch == characterClass->m_matchesUnicode[i]) |
|
267 return true; |
|
268 for (unsigned i = 0; i < characterClass->m_rangesUnicode.size(); ++i) |
|
269 if ((ch >= characterClass->m_rangesUnicode[i].begin) && (ch <= characterClass->m_rangesUnicode[i].end)) |
|
270 return true; |
|
271 } else { |
|
272 for (unsigned i = 0; i < characterClass->m_matches.size(); ++i) |
|
273 if (ch == characterClass->m_matches[i]) |
|
274 return true; |
|
275 for (unsigned i = 0; i < characterClass->m_ranges.size(); ++i) |
|
276 if ((ch >= characterClass->m_ranges[i].begin) && (ch <= characterClass->m_ranges[i].end)) |
|
277 return true; |
|
278 } |
|
279 |
|
280 return false; |
|
281 } |
|
282 |
|
283 bool checkCharacter(int testChar, int inputPosition) |
|
284 { |
|
285 return testChar == input.readChecked(inputPosition); |
|
286 } |
|
287 |
|
288 bool checkCasedCharacter(int loChar, int hiChar, int inputPosition) |
|
289 { |
|
290 int ch = input.readChecked(inputPosition); |
|
291 return (loChar == ch) || (hiChar == ch); |
|
292 } |
|
293 |
|
294 bool checkCharacterClass(CharacterClass* characterClass, bool invert, int inputPosition) |
|
295 { |
|
296 bool match = testCharacterClass(characterClass, input.readChecked(inputPosition)); |
|
297 return invert ? !match : match; |
|
298 } |
|
299 |
|
300 bool tryConsumeBackReference(int matchBegin, int matchEnd, int inputOffset) |
|
301 { |
|
302 int matchSize = matchEnd - matchBegin; |
|
303 |
|
304 if (!input.checkInput(matchSize)) |
|
305 return false; |
|
306 |
|
307 for (int i = 0; i < matchSize; ++i) { |
|
308 if (!checkCharacter(input.reread(matchBegin + i), inputOffset - matchSize + i)) { |
|
309 input.uncheckInput(matchSize); |
|
310 return false; |
|
311 } |
|
312 } |
|
313 |
|
314 return true; |
|
315 } |
|
316 |
|
317 bool matchAssertionBOL(ByteTerm& term) |
|
318 { |
|
319 return (input.atStart(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition - 1))); |
|
320 } |
|
321 |
|
322 bool matchAssertionEOL(ByteTerm& term) |
|
323 { |
|
324 if (term.inputPosition) |
|
325 return (input.atEnd(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition))); |
|
326 else |
|
327 return (input.atEnd()) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.read())); |
|
328 } |
|
329 |
|
330 bool matchAssertionWordBoundary(ByteTerm& term) |
|
331 { |
|
332 bool prevIsWordchar = !input.atStart(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition - 1)); |
|
333 bool readIsWordchar; |
|
334 if (term.inputPosition) |
|
335 readIsWordchar = !input.atEnd(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition)); |
|
336 else |
|
337 readIsWordchar = !input.atEnd() && testCharacterClass(pattern->wordcharCharacterClass, input.read()); |
|
338 |
|
339 bool wordBoundary = prevIsWordchar != readIsWordchar; |
|
340 return term.invert() ? !wordBoundary : wordBoundary; |
|
341 } |
|
342 |
|
343 bool backtrackPatternCharacter(ByteTerm& term, DisjunctionContext* context) |
|
344 { |
|
345 BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation); |
|
346 |
|
347 switch (term.atom.quantityType) { |
|
348 case QuantifierFixedCount: |
|
349 break; |
|
350 |
|
351 case QuantifierGreedy: |
|
352 if (backTrack->matchAmount) { |
|
353 --backTrack->matchAmount; |
|
354 input.uncheckInput(1); |
|
355 return true; |
|
356 } |
|
357 break; |
|
358 |
|
359 case QuantifierNonGreedy: |
|
360 if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) { |
|
361 ++backTrack->matchAmount; |
|
362 if (checkCharacter(term.atom.patternCharacter, term.inputPosition - 1)) |
|
363 return true; |
|
364 } |
|
365 input.uncheckInput(backTrack->matchAmount); |
|
366 break; |
|
367 } |
|
368 |
|
369 return false; |
|
370 } |
|
371 |
|
372 bool backtrackPatternCasedCharacter(ByteTerm& term, DisjunctionContext* context) |
|
373 { |
|
374 BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation); |
|
375 |
|
376 switch (term.atom.quantityType) { |
|
377 case QuantifierFixedCount: |
|
378 break; |
|
379 |
|
380 case QuantifierGreedy: |
|
381 if (backTrack->matchAmount) { |
|
382 --backTrack->matchAmount; |
|
383 input.uncheckInput(1); |
|
384 return true; |
|
385 } |
|
386 break; |
|
387 |
|
388 case QuantifierNonGreedy: |
|
389 if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) { |
|
390 ++backTrack->matchAmount; |
|
391 if (checkCasedCharacter(term.atom.casedCharacter.lo, term.atom.casedCharacter.hi, term.inputPosition - 1)) |
|
392 return true; |
|
393 } |
|
394 input.uncheckInput(backTrack->matchAmount); |
|
395 break; |
|
396 } |
|
397 |
|
398 return false; |
|
399 } |
|
400 |
|
401 bool matchCharacterClass(ByteTerm& term, DisjunctionContext* context) |
|
402 { |
|
403 ASSERT(term.type == ByteTerm::TypeCharacterClass); |
|
404 BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation); |
|
405 |
|
406 switch (term.atom.quantityType) { |
|
407 case QuantifierFixedCount: { |
|
408 for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) { |
|
409 if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + matchAmount)) |
|
410 return false; |
|
411 } |
|
412 return true; |
|
413 } |
|
414 |
|
415 case QuantifierGreedy: { |
|
416 unsigned matchAmount = 0; |
|
417 while ((matchAmount < term.atom.quantityCount) && input.checkInput(1)) { |
|
418 if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - 1)) { |
|
419 input.uncheckInput(1); |
|
420 break; |
|
421 } |
|
422 ++matchAmount; |
|
423 } |
|
424 backTrack->matchAmount = matchAmount; |
|
425 |
|
426 return true; |
|
427 } |
|
428 |
|
429 case QuantifierNonGreedy: |
|
430 backTrack->matchAmount = 0; |
|
431 return true; |
|
432 } |
|
433 |
|
434 ASSERT_NOT_REACHED(); |
|
435 return false; |
|
436 } |
|
437 |
|
438 bool backtrackCharacterClass(ByteTerm& term, DisjunctionContext* context) |
|
439 { |
|
440 ASSERT(term.type == ByteTerm::TypeCharacterClass); |
|
441 BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation); |
|
442 |
|
443 switch (term.atom.quantityType) { |
|
444 case QuantifierFixedCount: |
|
445 break; |
|
446 |
|
447 case QuantifierGreedy: |
|
448 if (backTrack->matchAmount) { |
|
449 --backTrack->matchAmount; |
|
450 input.uncheckInput(1); |
|
451 return true; |
|
452 } |
|
453 break; |
|
454 |
|
455 case QuantifierNonGreedy: |
|
456 if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) { |
|
457 ++backTrack->matchAmount; |
|
458 if (checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - 1)) |
|
459 return true; |
|
460 } |
|
461 input.uncheckInput(backTrack->matchAmount); |
|
462 break; |
|
463 } |
|
464 |
|
465 return false; |
|
466 } |
|
467 |
|
468 bool matchBackReference(ByteTerm& term, DisjunctionContext* context) |
|
469 { |
|
470 ASSERT(term.type == ByteTerm::TypeBackReference); |
|
471 BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation); |
|
472 |
|
473 int matchBegin = output[(term.atom.subpatternId << 1)]; |
|
474 int matchEnd = output[(term.atom.subpatternId << 1) + 1]; |
|
475 ASSERT((matchBegin == -1) == (matchEnd == -1)); |
|
476 ASSERT(matchBegin <= matchEnd); |
|
477 |
|
478 if (matchBegin == matchEnd) |
|
479 return true; |
|
480 |
|
481 switch (term.atom.quantityType) { |
|
482 case QuantifierFixedCount: { |
|
483 backTrack->begin = input.getPos(); |
|
484 for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) { |
|
485 if (!tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) { |
|
486 input.setPos(backTrack->begin); |
|
487 return false; |
|
488 } |
|
489 } |
|
490 return true; |
|
491 } |
|
492 |
|
493 case QuantifierGreedy: { |
|
494 unsigned matchAmount = 0; |
|
495 while ((matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) |
|
496 ++matchAmount; |
|
497 backTrack->matchAmount = matchAmount; |
|
498 return true; |
|
499 } |
|
500 |
|
501 case QuantifierNonGreedy: |
|
502 backTrack->begin = input.getPos(); |
|
503 backTrack->matchAmount = 0; |
|
504 return true; |
|
505 } |
|
506 |
|
507 ASSERT_NOT_REACHED(); |
|
508 return false; |
|
509 } |
|
510 |
|
511 bool backtrackBackReference(ByteTerm& term, DisjunctionContext* context) |
|
512 { |
|
513 ASSERT(term.type == ByteTerm::TypeBackReference); |
|
514 BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation); |
|
515 |
|
516 int matchBegin = output[(term.atom.subpatternId << 1)]; |
|
517 int matchEnd = output[(term.atom.subpatternId << 1) + 1]; |
|
518 ASSERT((matchBegin == -1) == (matchEnd == -1)); |
|
519 ASSERT(matchBegin <= matchEnd); |
|
520 |
|
521 if (matchBegin == matchEnd) |
|
522 return false; |
|
523 |
|
524 switch (term.atom.quantityType) { |
|
525 case QuantifierFixedCount: |
|
526 // for quantityCount == 1, could rewind. |
|
527 input.setPos(backTrack->begin); |
|
528 break; |
|
529 |
|
530 case QuantifierGreedy: |
|
531 if (backTrack->matchAmount) { |
|
532 --backTrack->matchAmount; |
|
533 input.rewind(matchEnd - matchBegin); |
|
534 return true; |
|
535 } |
|
536 break; |
|
537 |
|
538 case QuantifierNonGreedy: |
|
539 if ((backTrack->matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) { |
|
540 ++backTrack->matchAmount; |
|
541 return true; |
|
542 } else |
|
543 input.setPos(backTrack->begin); |
|
544 break; |
|
545 } |
|
546 |
|
547 return false; |
|
548 } |
|
549 |
|
550 void recordParenthesesMatch(ByteTerm& term, ParenthesesDisjunctionContext* context) |
|
551 { |
|
552 if (term.capture()) { |
|
553 unsigned subpatternId = term.atom.subpatternId; |
|
554 output[(subpatternId << 1)] = context->getDisjunctionContext(term)->matchBegin + term.inputPosition; |
|
555 output[(subpatternId << 1) + 1] = context->getDisjunctionContext(term)->matchEnd + term.inputPosition; |
|
556 } |
|
557 } |
|
558 void resetMatches(ByteTerm& term, ParenthesesDisjunctionContext* context) |
|
559 { |
|
560 unsigned firstSubpatternId = term.atom.subpatternId; |
|
561 unsigned count = term.atom.parenthesesDisjunction->m_numSubpatterns; |
|
562 context->restoreOutput(output, firstSubpatternId, count); |
|
563 } |
|
564 void resetAssertionMatches(ByteTerm& term) |
|
565 { |
|
566 unsigned firstSubpatternId = term.atom.subpatternId; |
|
567 unsigned count = term.atom.parenthesesDisjunction->m_numSubpatterns; |
|
568 for (unsigned i = 0; i < (count << 1); ++i) |
|
569 output[(firstSubpatternId << 1) + i] = -1; |
|
570 } |
|
571 bool parenthesesDoBacktrack(ByteTerm& term, BackTrackInfoParentheses* backTrack) |
|
572 { |
|
573 while (backTrack->matchAmount) { |
|
574 ParenthesesDisjunctionContext* context = backTrack->lastContext; |
|
575 |
|
576 if (matchDisjunction(term.atom.parenthesesDisjunction, context->getDisjunctionContext(term), true)) |
|
577 return true; |
|
578 |
|
579 resetMatches(term, context); |
|
580 popParenthesesDisjunctionContext(backTrack); |
|
581 freeParenthesesDisjunctionContext(context); |
|
582 } |
|
583 |
|
584 return false; |
|
585 } |
|
586 |
|
587 bool matchParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context) |
|
588 { |
|
589 ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin); |
|
590 ASSERT(term.atom.quantityCount == 1); |
|
591 |
|
592 BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation); |
|
593 |
|
594 switch (term.atom.quantityType) { |
|
595 case QuantifierGreedy: { |
|
596 // set this speculatively; if we get to the parens end this will be true. |
|
597 backTrack->inParentheses = 1; |
|
598 break; |
|
599 } |
|
600 case QuantifierNonGreedy: { |
|
601 backTrack->inParentheses = 0; |
|
602 context->term += term.atom.parenthesesWidth; |
|
603 return true; |
|
604 } |
|
605 case QuantifierFixedCount: |
|
606 break; |
|
607 } |
|
608 |
|
609 if (term.capture()) { |
|
610 unsigned subpatternId = term.atom.subpatternId; |
|
611 output[(subpatternId << 1)] = input.getPos() + term.inputPosition; |
|
612 } |
|
613 |
|
614 return true; |
|
615 } |
|
616 |
|
617 bool matchParenthesesOnceEnd(ByteTerm& term, DisjunctionContext*) |
|
618 { |
|
619 ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd); |
|
620 ASSERT(term.atom.quantityCount == 1); |
|
621 |
|
622 if (term.capture()) { |
|
623 unsigned subpatternId = term.atom.subpatternId; |
|
624 output[(subpatternId << 1) + 1] = input.getPos() + term.inputPosition; |
|
625 } |
|
626 return true; |
|
627 } |
|
628 |
|
629 bool backtrackParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context) |
|
630 { |
|
631 ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin); |
|
632 ASSERT(term.atom.quantityCount == 1); |
|
633 |
|
634 BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation); |
|
635 |
|
636 if (term.capture()) { |
|
637 unsigned subpatternId = term.atom.subpatternId; |
|
638 output[(subpatternId << 1)] = -1; |
|
639 output[(subpatternId << 1) + 1] = -1; |
|
640 } |
|
641 |
|
642 switch (term.atom.quantityType) { |
|
643 case QuantifierGreedy: |
|
644 // if we backtrack to this point, there is another chance - try matching nothing. |
|
645 ASSERT(backTrack->inParentheses); |
|
646 backTrack->inParentheses = 0; |
|
647 context->term += term.atom.parenthesesWidth; |
|
648 return true; |
|
649 case QuantifierNonGreedy: |
|
650 ASSERT(backTrack->inParentheses); |
|
651 case QuantifierFixedCount: |
|
652 break; |
|
653 } |
|
654 |
|
655 return false; |
|
656 } |
|
657 |
|
658 bool backtrackParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context) |
|
659 { |
|
660 ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd); |
|
661 ASSERT(term.atom.quantityCount == 1); |
|
662 |
|
663 BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation); |
|
664 |
|
665 switch (term.atom.quantityType) { |
|
666 case QuantifierGreedy: |
|
667 if (!backTrack->inParentheses) { |
|
668 context->term -= term.atom.parenthesesWidth; |
|
669 return false; |
|
670 } |
|
671 case QuantifierNonGreedy: |
|
672 if (!backTrack->inParentheses) { |
|
673 // now try to match the parens; set this speculatively. |
|
674 backTrack->inParentheses = 1; |
|
675 if (term.capture()) { |
|
676 unsigned subpatternId = term.atom.subpatternId; |
|
677 output[subpatternId << 1] = input.getPos() + term.inputPosition; |
|
678 } |
|
679 context->term -= term.atom.parenthesesWidth; |
|
680 return true; |
|
681 } |
|
682 case QuantifierFixedCount: |
|
683 break; |
|
684 } |
|
685 |
|
686 return false; |
|
687 } |
|
688 |
|
689 bool matchParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context) |
|
690 { |
|
691 ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin); |
|
692 ASSERT(term.atom.quantityCount == 1); |
|
693 |
|
694 BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation); |
|
695 |
|
696 backTrack->begin = input.getPos(); |
|
697 return true; |
|
698 } |
|
699 |
|
700 bool matchParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context) |
|
701 { |
|
702 ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd); |
|
703 ASSERT(term.atom.quantityCount == 1); |
|
704 |
|
705 BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation); |
|
706 |
|
707 input.setPos(backTrack->begin); |
|
708 |
|
709 // We've reached the end of the parens; if they are inverted, this is failure. |
|
710 if (term.invert()) { |
|
711 context->term -= term.atom.parenthesesWidth; |
|
712 return false; |
|
713 } |
|
714 |
|
715 return true; |
|
716 } |
|
717 |
|
718 bool backtrackParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context) |
|
719 { |
|
720 ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin); |
|
721 ASSERT(term.atom.quantityCount == 1); |
|
722 |
|
723 // We've failed to match parens; if they are inverted, this is win! |
|
724 if (term.invert()) { |
|
725 context->term += term.atom.parenthesesWidth; |
|
726 return true; |
|
727 } |
|
728 |
|
729 return false; |
|
730 } |
|
731 |
|
732 bool backtrackParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context) |
|
733 { |
|
734 ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd); |
|
735 ASSERT(term.atom.quantityCount == 1); |
|
736 |
|
737 BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation); |
|
738 |
|
739 input.setPos(backTrack->begin); |
|
740 |
|
741 context->term -= term.atom.parenthesesWidth; |
|
742 return false; |
|
743 } |
|
744 |
|
745 bool matchParentheses(ByteTerm& term, DisjunctionContext* context) |
|
746 { |
|
747 ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern); |
|
748 |
|
749 BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation); |
|
750 |
|
751 unsigned subpatternId = term.atom.subpatternId; |
|
752 ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction; |
|
753 |
|
754 backTrack->prevBegin = output[(subpatternId << 1)]; |
|
755 backTrack->prevEnd = output[(subpatternId << 1) + 1]; |
|
756 |
|
757 backTrack->matchAmount = 0; |
|
758 backTrack->lastContext = 0; |
|
759 |
|
760 switch (term.atom.quantityType) { |
|
761 case QuantifierFixedCount: { |
|
762 // While we haven't yet reached our fixed limit, |
|
763 while (backTrack->matchAmount < term.atom.quantityCount) { |
|
764 // Try to do a match, and it it succeeds, add it to the list. |
|
765 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term); |
|
766 if (matchDisjunction(disjunctionBody, context->getDisjunctionContext(term))) |
|
767 appendParenthesesDisjunctionContext(backTrack, context); |
|
768 else { |
|
769 // The match failed; try to find an alternate point to carry on from. |
|
770 resetMatches(term, context); |
|
771 freeParenthesesDisjunctionContext(context); |
|
772 if (!parenthesesDoBacktrack(term, backTrack)) |
|
773 return false; |
|
774 } |
|
775 } |
|
776 |
|
777 ASSERT(backTrack->matchAmount == term.atom.quantityCount); |
|
778 ParenthesesDisjunctionContext* context = backTrack->lastContext; |
|
779 recordParenthesesMatch(term, context); |
|
780 return true; |
|
781 } |
|
782 |
|
783 case QuantifierGreedy: { |
|
784 while (backTrack->matchAmount < term.atom.quantityCount) { |
|
785 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term); |
|
786 if (matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term))) |
|
787 appendParenthesesDisjunctionContext(backTrack, context); |
|
788 else { |
|
789 resetMatches(term, context); |
|
790 freeParenthesesDisjunctionContext(context); |
|
791 break; |
|
792 } |
|
793 } |
|
794 |
|
795 if (backTrack->matchAmount) { |
|
796 ParenthesesDisjunctionContext* context = backTrack->lastContext; |
|
797 recordParenthesesMatch(term, context); |
|
798 } |
|
799 return true; |
|
800 } |
|
801 |
|
802 case QuantifierNonGreedy: |
|
803 return true; |
|
804 } |
|
805 |
|
806 ASSERT_NOT_REACHED(); |
|
807 return false; |
|
808 } |
|
809 |
|
810 // Rules for backtracking differ depending on whether this is greedy or non-greedy. |
|
811 // |
|
812 // Greedy matches never should try just adding more - you should already have done |
|
813 // the 'more' cases. Always backtrack, at least a leetle bit. However cases where |
|
814 // you backtrack an item off the list needs checking, since we'll never have matched |
|
815 // the one less case. Tracking forwards, still add as much as possible. |
|
816 // |
|
817 // Non-greedy, we've already done the one less case, so don't match on popping. |
|
818 // We haven't done the one more case, so always try to add that. |
|
819 // |
|
820 bool backtrackParentheses(ByteTerm& term, DisjunctionContext* context) |
|
821 { |
|
822 ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern); |
|
823 |
|
824 BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation); |
|
825 |
|
826 if (term.capture()) { |
|
827 unsigned subpatternId = term.atom.subpatternId; |
|
828 output[(subpatternId << 1)] = backTrack->prevBegin; |
|
829 output[(subpatternId << 1) + 1] = backTrack->prevEnd; |
|
830 } |
|
831 |
|
832 ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction; |
|
833 |
|
834 switch (term.atom.quantityType) { |
|
835 case QuantifierFixedCount: { |
|
836 ASSERT(backTrack->matchAmount == term.atom.quantityCount); |
|
837 |
|
838 ParenthesesDisjunctionContext* context = 0; |
|
839 |
|
840 if (!parenthesesDoBacktrack(term, backTrack)) |
|
841 return false; |
|
842 |
|
843 // While we haven't yet reached our fixed limit, |
|
844 while (backTrack->matchAmount < term.atom.quantityCount) { |
|
845 // Try to do a match, and it it succeeds, add it to the list. |
|
846 context = allocParenthesesDisjunctionContext(disjunctionBody, output, term); |
|
847 if (matchDisjunction(disjunctionBody, context->getDisjunctionContext(term))) |
|
848 appendParenthesesDisjunctionContext(backTrack, context); |
|
849 else { |
|
850 // The match failed; try to find an alternate point to carry on from. |
|
851 resetMatches(term, context); |
|
852 freeParenthesesDisjunctionContext(context); |
|
853 if (!parenthesesDoBacktrack(term, backTrack)) |
|
854 return false; |
|
855 } |
|
856 } |
|
857 |
|
858 ASSERT(backTrack->matchAmount == term.atom.quantityCount); |
|
859 context = backTrack->lastContext; |
|
860 recordParenthesesMatch(term, context); |
|
861 return true; |
|
862 } |
|
863 |
|
864 case QuantifierGreedy: { |
|
865 if (!backTrack->matchAmount) |
|
866 return false; |
|
867 |
|
868 ParenthesesDisjunctionContext* context = backTrack->lastContext; |
|
869 if (matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true)) { |
|
870 while (backTrack->matchAmount < term.atom.quantityCount) { |
|
871 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term); |
|
872 if (matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term))) |
|
873 appendParenthesesDisjunctionContext(backTrack, context); |
|
874 else { |
|
875 resetMatches(term, context); |
|
876 freeParenthesesDisjunctionContext(context); |
|
877 break; |
|
878 } |
|
879 } |
|
880 } else { |
|
881 resetMatches(term, context); |
|
882 popParenthesesDisjunctionContext(backTrack); |
|
883 freeParenthesesDisjunctionContext(context); |
|
884 } |
|
885 |
|
886 if (backTrack->matchAmount) { |
|
887 ParenthesesDisjunctionContext* context = backTrack->lastContext; |
|
888 recordParenthesesMatch(term, context); |
|
889 } |
|
890 return true; |
|
891 } |
|
892 |
|
893 case QuantifierNonGreedy: { |
|
894 // If we've not reached the limit, try to add one more match. |
|
895 if (backTrack->matchAmount < term.atom.quantityCount) { |
|
896 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term); |
|
897 if (matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term))) { |
|
898 appendParenthesesDisjunctionContext(backTrack, context); |
|
899 recordParenthesesMatch(term, context); |
|
900 return true; |
|
901 } else { |
|
902 resetMatches(term, context); |
|
903 freeParenthesesDisjunctionContext(context); |
|
904 } |
|
905 } |
|
906 |
|
907 // Nope - okay backtrack looking for an alternative. |
|
908 while (backTrack->matchAmount) { |
|
909 ParenthesesDisjunctionContext* context = backTrack->lastContext; |
|
910 if (matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true)) { |
|
911 // successful backtrack! we're back in the game! |
|
912 if (backTrack->matchAmount) { |
|
913 context = backTrack->lastContext; |
|
914 recordParenthesesMatch(term, context); |
|
915 } |
|
916 return true; |
|
917 } |
|
918 |
|
919 // pop a match off the stack |
|
920 resetMatches(term, context); |
|
921 popParenthesesDisjunctionContext(backTrack); |
|
922 freeParenthesesDisjunctionContext(context); |
|
923 } |
|
924 |
|
925 return false; |
|
926 } |
|
927 } |
|
928 |
|
929 ASSERT_NOT_REACHED(); |
|
930 return false; |
|
931 } |
|
932 |
|
933 #define MATCH_NEXT() { ++context->term; goto matchAgain; } |
|
934 #define BACKTRACK() { --context->term; goto backtrack; } |
|
935 #define currentTerm() (disjunction->terms[context->term]) |
|
936 bool matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false) |
|
937 { |
|
938 if (btrack) |
|
939 BACKTRACK(); |
|
940 |
|
941 context->matchBegin = input.getPos(); |
|
942 context->term = 0; |
|
943 |
|
944 matchAgain: |
|
945 ASSERT(context->term < static_cast<int>(disjunction->terms.size())); |
|
946 |
|
947 switch (currentTerm().type) { |
|
948 case ByteTerm::TypeSubpatternBegin: |
|
949 MATCH_NEXT(); |
|
950 case ByteTerm::TypeSubpatternEnd: |
|
951 context->matchEnd = input.getPos(); |
|
952 return true; |
|
953 |
|
954 case ByteTerm::TypeBodyAlternativeBegin: |
|
955 MATCH_NEXT(); |
|
956 case ByteTerm::TypeBodyAlternativeDisjunction: |
|
957 case ByteTerm::TypeBodyAlternativeEnd: |
|
958 context->matchEnd = input.getPos(); |
|
959 return true; |
|
960 |
|
961 case ByteTerm::TypeAlternativeBegin: |
|
962 MATCH_NEXT(); |
|
963 case ByteTerm::TypeAlternativeDisjunction: |
|
964 case ByteTerm::TypeAlternativeEnd: { |
|
965 int offset = currentTerm().alternative.end; |
|
966 BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation); |
|
967 backTrack->offset = offset; |
|
968 context->term += offset; |
|
969 MATCH_NEXT(); |
|
970 } |
|
971 |
|
972 case ByteTerm::TypeAssertionBOL: |
|
973 if (matchAssertionBOL(currentTerm())) |
|
974 MATCH_NEXT(); |
|
975 BACKTRACK(); |
|
976 case ByteTerm::TypeAssertionEOL: |
|
977 if (matchAssertionEOL(currentTerm())) |
|
978 MATCH_NEXT(); |
|
979 BACKTRACK(); |
|
980 case ByteTerm::TypeAssertionWordBoundary: |
|
981 if (matchAssertionWordBoundary(currentTerm())) |
|
982 MATCH_NEXT(); |
|
983 BACKTRACK(); |
|
984 |
|
985 case ByteTerm::TypePatternCharacterOnce: |
|
986 case ByteTerm::TypePatternCharacterFixed: { |
|
987 for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) { |
|
988 if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition + matchAmount)) |
|
989 BACKTRACK(); |
|
990 } |
|
991 MATCH_NEXT(); |
|
992 } |
|
993 case ByteTerm::TypePatternCharacterGreedy: { |
|
994 BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation); |
|
995 unsigned matchAmount = 0; |
|
996 while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) { |
|
997 if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition - 1)) { |
|
998 input.uncheckInput(1); |
|
999 break; |
|
1000 } |
|
1001 ++matchAmount; |
|
1002 } |
|
1003 backTrack->matchAmount = matchAmount; |
|
1004 |
|
1005 MATCH_NEXT(); |
|
1006 } |
|
1007 case ByteTerm::TypePatternCharacterNonGreedy: { |
|
1008 BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation); |
|
1009 backTrack->matchAmount = 0; |
|
1010 MATCH_NEXT(); |
|
1011 } |
|
1012 |
|
1013 case ByteTerm::TypePatternCasedCharacterOnce: |
|
1014 case ByteTerm::TypePatternCasedCharacterFixed: { |
|
1015 for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) { |
|
1016 if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition + matchAmount)) |
|
1017 BACKTRACK(); |
|
1018 } |
|
1019 MATCH_NEXT(); |
|
1020 } |
|
1021 case ByteTerm::TypePatternCasedCharacterGreedy: { |
|
1022 BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation); |
|
1023 unsigned matchAmount = 0; |
|
1024 while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) { |
|
1025 if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - 1)) { |
|
1026 input.uncheckInput(1); |
|
1027 break; |
|
1028 } |
|
1029 ++matchAmount; |
|
1030 } |
|
1031 backTrack->matchAmount = matchAmount; |
|
1032 |
|
1033 MATCH_NEXT(); |
|
1034 } |
|
1035 case ByteTerm::TypePatternCasedCharacterNonGreedy: { |
|
1036 BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation); |
|
1037 backTrack->matchAmount = 0; |
|
1038 MATCH_NEXT(); |
|
1039 } |
|
1040 |
|
1041 case ByteTerm::TypeCharacterClass: |
|
1042 if (matchCharacterClass(currentTerm(), context)) |
|
1043 MATCH_NEXT(); |
|
1044 BACKTRACK(); |
|
1045 case ByteTerm::TypeBackReference: |
|
1046 if (matchBackReference(currentTerm(), context)) |
|
1047 MATCH_NEXT(); |
|
1048 BACKTRACK(); |
|
1049 case ByteTerm::TypeParenthesesSubpattern: |
|
1050 if (matchParentheses(currentTerm(), context)) |
|
1051 MATCH_NEXT(); |
|
1052 BACKTRACK(); |
|
1053 case ByteTerm::TypeParenthesesSubpatternOnceBegin: |
|
1054 if (matchParenthesesOnceBegin(currentTerm(), context)) |
|
1055 MATCH_NEXT(); |
|
1056 BACKTRACK(); |
|
1057 case ByteTerm::TypeParenthesesSubpatternOnceEnd: |
|
1058 if (matchParenthesesOnceEnd(currentTerm(), context)) |
|
1059 MATCH_NEXT(); |
|
1060 BACKTRACK(); |
|
1061 case ByteTerm::TypeParentheticalAssertionBegin: |
|
1062 if (matchParentheticalAssertionBegin(currentTerm(), context)) |
|
1063 MATCH_NEXT(); |
|
1064 BACKTRACK(); |
|
1065 case ByteTerm::TypeParentheticalAssertionEnd: |
|
1066 if (matchParentheticalAssertionEnd(currentTerm(), context)) |
|
1067 MATCH_NEXT(); |
|
1068 BACKTRACK(); |
|
1069 |
|
1070 case ByteTerm::TypeCheckInput: |
|
1071 if (input.checkInput(currentTerm().checkInputCount)) |
|
1072 MATCH_NEXT(); |
|
1073 BACKTRACK(); |
|
1074 } |
|
1075 |
|
1076 // We should never fall-through to here. |
|
1077 ASSERT_NOT_REACHED(); |
|
1078 |
|
1079 backtrack: |
|
1080 ASSERT(context->term < static_cast<int>(disjunction->terms.size())); |
|
1081 |
|
1082 switch (currentTerm().type) { |
|
1083 case ByteTerm::TypeSubpatternBegin: |
|
1084 return false; |
|
1085 case ByteTerm::TypeSubpatternEnd: |
|
1086 ASSERT_NOT_REACHED(); |
|
1087 |
|
1088 case ByteTerm::TypeBodyAlternativeBegin: |
|
1089 case ByteTerm::TypeBodyAlternativeDisjunction: { |
|
1090 int offset = currentTerm().alternative.next; |
|
1091 context->term += offset; |
|
1092 if (offset > 0) |
|
1093 MATCH_NEXT(); |
|
1094 |
|
1095 if (input.atEnd()) |
|
1096 return false; |
|
1097 |
|
1098 input.next(); |
|
1099 context->matchBegin = input.getPos(); |
|
1100 MATCH_NEXT(); |
|
1101 } |
|
1102 case ByteTerm::TypeBodyAlternativeEnd: |
|
1103 ASSERT_NOT_REACHED(); |
|
1104 |
|
1105 case ByteTerm::TypeAlternativeBegin: |
|
1106 case ByteTerm::TypeAlternativeDisjunction: { |
|
1107 int offset = currentTerm().alternative.next; |
|
1108 context->term += offset; |
|
1109 if (offset > 0) |
|
1110 MATCH_NEXT(); |
|
1111 BACKTRACK(); |
|
1112 } |
|
1113 case ByteTerm::TypeAlternativeEnd: { |
|
1114 // We should never backtrack back into an alternative of the main body of the regex. |
|
1115 BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation); |
|
1116 unsigned offset = backTrack->offset; |
|
1117 context->term -= offset; |
|
1118 BACKTRACK(); |
|
1119 } |
|
1120 |
|
1121 case ByteTerm::TypeAssertionBOL: |
|
1122 case ByteTerm::TypeAssertionEOL: |
|
1123 case ByteTerm::TypeAssertionWordBoundary: |
|
1124 BACKTRACK(); |
|
1125 |
|
1126 case ByteTerm::TypePatternCharacterOnce: |
|
1127 case ByteTerm::TypePatternCharacterFixed: |
|
1128 case ByteTerm::TypePatternCharacterGreedy: |
|
1129 case ByteTerm::TypePatternCharacterNonGreedy: |
|
1130 if (backtrackPatternCharacter(currentTerm(), context)) |
|
1131 MATCH_NEXT(); |
|
1132 BACKTRACK(); |
|
1133 case ByteTerm::TypePatternCasedCharacterOnce: |
|
1134 case ByteTerm::TypePatternCasedCharacterFixed: |
|
1135 case ByteTerm::TypePatternCasedCharacterGreedy: |
|
1136 case ByteTerm::TypePatternCasedCharacterNonGreedy: |
|
1137 if (backtrackPatternCasedCharacter(currentTerm(), context)) |
|
1138 MATCH_NEXT(); |
|
1139 BACKTRACK(); |
|
1140 case ByteTerm::TypeCharacterClass: |
|
1141 if (backtrackCharacterClass(currentTerm(), context)) |
|
1142 MATCH_NEXT(); |
|
1143 BACKTRACK(); |
|
1144 case ByteTerm::TypeBackReference: |
|
1145 if (backtrackBackReference(currentTerm(), context)) |
|
1146 MATCH_NEXT(); |
|
1147 BACKTRACK(); |
|
1148 case ByteTerm::TypeParenthesesSubpattern: |
|
1149 if (backtrackParentheses(currentTerm(), context)) |
|
1150 MATCH_NEXT(); |
|
1151 BACKTRACK(); |
|
1152 case ByteTerm::TypeParenthesesSubpatternOnceBegin: |
|
1153 if (backtrackParenthesesOnceBegin(currentTerm(), context)) |
|
1154 MATCH_NEXT(); |
|
1155 BACKTRACK(); |
|
1156 case ByteTerm::TypeParenthesesSubpatternOnceEnd: |
|
1157 if (backtrackParenthesesOnceEnd(currentTerm(), context)) |
|
1158 MATCH_NEXT(); |
|
1159 BACKTRACK(); |
|
1160 case ByteTerm::TypeParentheticalAssertionBegin: |
|
1161 if (backtrackParentheticalAssertionBegin(currentTerm(), context)) |
|
1162 MATCH_NEXT(); |
|
1163 BACKTRACK(); |
|
1164 case ByteTerm::TypeParentheticalAssertionEnd: |
|
1165 if (backtrackParentheticalAssertionEnd(currentTerm(), context)) |
|
1166 MATCH_NEXT(); |
|
1167 BACKTRACK(); |
|
1168 |
|
1169 case ByteTerm::TypeCheckInput: |
|
1170 input.uncheckInput(currentTerm().checkInputCount); |
|
1171 BACKTRACK(); |
|
1172 } |
|
1173 |
|
1174 ASSERT_NOT_REACHED(); |
|
1175 return false; |
|
1176 } |
|
1177 |
|
1178 bool matchNonZeroDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false) |
|
1179 { |
|
1180 if (matchDisjunction(disjunction, context, btrack)) { |
|
1181 while (context->matchBegin == context->matchEnd) { |
|
1182 if (!matchDisjunction(disjunction, context, true)) |
|
1183 return false; |
|
1184 } |
|
1185 return true; |
|
1186 } |
|
1187 |
|
1188 return false; |
|
1189 } |
|
1190 |
|
1191 int interpret() |
|
1192 { |
|
1193 for (unsigned i = 0; i < ((pattern->m_body->m_numSubpatterns + 1) << 1); ++i) |
|
1194 output[i] = -1; |
|
1195 |
|
1196 DisjunctionContext* context = allocDisjunctionContext(pattern->m_body.get()); |
|
1197 |
|
1198 if (matchDisjunction(pattern->m_body.get(), context)) { |
|
1199 output[0] = context->matchBegin; |
|
1200 output[1] = context->matchEnd; |
|
1201 } |
|
1202 |
|
1203 freeDisjunctionContext(context); |
|
1204 |
|
1205 return output[0]; |
|
1206 } |
|
1207 |
|
1208 Interpreter(BytecodePattern* pattern, int* output, const UChar* inputChar, unsigned start, unsigned length) |
|
1209 : pattern(pattern) |
|
1210 , output(output) |
|
1211 , input(inputChar, start, length) |
|
1212 { |
|
1213 } |
|
1214 |
|
1215 private: |
|
1216 BytecodePattern *pattern; |
|
1217 int* output; |
|
1218 InputStream input; |
|
1219 }; |
|
1220 |
|
1221 |
|
1222 |
|
1223 class ByteCompiler { |
|
1224 struct ParenthesesStackEntry { |
|
1225 unsigned beginTerm; |
|
1226 unsigned savedAlternativeIndex; |
|
1227 ParenthesesStackEntry(unsigned beginTerm, unsigned savedAlternativeIndex/*, unsigned subpatternId, bool capture = false*/) |
|
1228 : beginTerm(beginTerm) |
|
1229 , savedAlternativeIndex(savedAlternativeIndex) |
|
1230 { |
|
1231 } |
|
1232 }; |
|
1233 |
|
1234 public: |
|
1235 ByteCompiler(RegexPattern& pattern) |
|
1236 : m_pattern(pattern) |
|
1237 { |
|
1238 m_currentAlternativeIndex = 0; |
|
1239 } |
|
1240 |
|
1241 PassOwnPtr<BytecodePattern> compile() |
|
1242 { |
|
1243 regexBegin(m_pattern.m_numSubpatterns, m_pattern.m_body->m_callFrameSize); |
|
1244 emitDisjunction(m_pattern.m_body); |
|
1245 regexEnd(); |
|
1246 |
|
1247 return adoptPtr(new BytecodePattern(m_bodyDisjunction.release(), m_allParenthesesInfo, m_pattern)); |
|
1248 } |
|
1249 |
|
1250 void checkInput(unsigned count) |
|
1251 { |
|
1252 m_bodyDisjunction->terms.append(ByteTerm::CheckInput(count)); |
|
1253 } |
|
1254 |
|
1255 void assertionBOL(int inputPosition) |
|
1256 { |
|
1257 m_bodyDisjunction->terms.append(ByteTerm::BOL(inputPosition)); |
|
1258 } |
|
1259 |
|
1260 void assertionEOL(int inputPosition) |
|
1261 { |
|
1262 m_bodyDisjunction->terms.append(ByteTerm::EOL(inputPosition)); |
|
1263 } |
|
1264 |
|
1265 void assertionWordBoundary(bool invert, int inputPosition) |
|
1266 { |
|
1267 m_bodyDisjunction->terms.append(ByteTerm::WordBoundary(invert, inputPosition)); |
|
1268 } |
|
1269 |
|
1270 void atomPatternCharacter(UChar ch, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType) |
|
1271 { |
|
1272 if (m_pattern.m_ignoreCase) { |
|
1273 UChar lo = Unicode::toLower(ch); |
|
1274 UChar hi = Unicode::toUpper(ch); |
|
1275 |
|
1276 if (lo != hi) { |
|
1277 m_bodyDisjunction->terms.append(ByteTerm(lo, hi, inputPosition, frameLocation, quantityCount, quantityType)); |
|
1278 return; |
|
1279 } |
|
1280 } |
|
1281 |
|
1282 m_bodyDisjunction->terms.append(ByteTerm(ch, inputPosition, frameLocation, quantityCount, quantityType)); |
|
1283 } |
|
1284 |
|
1285 void atomCharacterClass(CharacterClass* characterClass, bool invert, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType) |
|
1286 { |
|
1287 m_bodyDisjunction->terms.append(ByteTerm(characterClass, invert, inputPosition)); |
|
1288 |
|
1289 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount; |
|
1290 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType; |
|
1291 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation; |
|
1292 } |
|
1293 |
|
1294 void atomBackReference(unsigned subpatternId, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType) |
|
1295 { |
|
1296 ASSERT(subpatternId); |
|
1297 |
|
1298 m_bodyDisjunction->terms.append(ByteTerm::BackReference(subpatternId, inputPosition)); |
|
1299 |
|
1300 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount; |
|
1301 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType; |
|
1302 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation; |
|
1303 } |
|
1304 |
|
1305 void atomParenthesesSubpatternBegin(unsigned subpatternId, bool capture, int inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation) |
|
1306 { |
|
1307 int beginTerm = m_bodyDisjunction->terms.size(); |
|
1308 |
|
1309 m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, inputPosition)); |
|
1310 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation; |
|
1311 m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin()); |
|
1312 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation; |
|
1313 |
|
1314 m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex)); |
|
1315 m_currentAlternativeIndex = beginTerm + 1; |
|
1316 } |
|
1317 |
|
1318 void atomParentheticalAssertionBegin(unsigned subpatternId, bool invert, unsigned frameLocation, unsigned alternativeFrameLocation) |
|
1319 { |
|
1320 int beginTerm = m_bodyDisjunction->terms.size(); |
|
1321 |
|
1322 m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionBegin, subpatternId, invert, 0)); |
|
1323 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation; |
|
1324 m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin()); |
|
1325 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation; |
|
1326 |
|
1327 m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex)); |
|
1328 m_currentAlternativeIndex = beginTerm + 1; |
|
1329 } |
|
1330 |
|
1331 unsigned popParenthesesStack() |
|
1332 { |
|
1333 ASSERT(m_parenthesesStack.size()); |
|
1334 int stackEnd = m_parenthesesStack.size() - 1; |
|
1335 unsigned beginTerm = m_parenthesesStack[stackEnd].beginTerm; |
|
1336 m_currentAlternativeIndex = m_parenthesesStack[stackEnd].savedAlternativeIndex; |
|
1337 m_parenthesesStack.shrink(stackEnd); |
|
1338 |
|
1339 ASSERT(beginTerm < m_bodyDisjunction->terms.size()); |
|
1340 ASSERT(m_currentAlternativeIndex < m_bodyDisjunction->terms.size()); |
|
1341 |
|
1342 return beginTerm; |
|
1343 } |
|
1344 |
|
1345 #ifndef NDEBUG |
|
1346 void dumpDisjunction(ByteDisjunction* disjunction) |
|
1347 { |
|
1348 printf("ByteDisjunction(%p):\n\t", disjunction); |
|
1349 for (unsigned i = 0; i < disjunction->terms.size(); ++i) |
|
1350 printf("{ %d } ", disjunction->terms[i].type); |
|
1351 printf("\n"); |
|
1352 } |
|
1353 #endif |
|
1354 |
|
1355 void closeAlternative(int beginTerm) |
|
1356 { |
|
1357 int origBeginTerm = beginTerm; |
|
1358 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeBegin); |
|
1359 int endIndex = m_bodyDisjunction->terms.size(); |
|
1360 |
|
1361 unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation; |
|
1362 |
|
1363 if (!m_bodyDisjunction->terms[beginTerm].alternative.next) |
|
1364 m_bodyDisjunction->terms.remove(beginTerm); |
|
1365 else { |
|
1366 while (m_bodyDisjunction->terms[beginTerm].alternative.next) { |
|
1367 beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next; |
|
1368 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeDisjunction); |
|
1369 m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm; |
|
1370 m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation; |
|
1371 } |
|
1372 |
|
1373 m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm; |
|
1374 |
|
1375 m_bodyDisjunction->terms.append(ByteTerm::AlternativeEnd()); |
|
1376 m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation; |
|
1377 } |
|
1378 } |
|
1379 |
|
1380 void closeBodyAlternative() |
|
1381 { |
|
1382 int beginTerm = 0; |
|
1383 int origBeginTerm = 0; |
|
1384 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeBegin); |
|
1385 int endIndex = m_bodyDisjunction->terms.size(); |
|
1386 |
|
1387 unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation; |
|
1388 |
|
1389 while (m_bodyDisjunction->terms[beginTerm].alternative.next) { |
|
1390 beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next; |
|
1391 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeDisjunction); |
|
1392 m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm; |
|
1393 m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation; |
|
1394 } |
|
1395 |
|
1396 m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm; |
|
1397 |
|
1398 m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeEnd()); |
|
1399 m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation; |
|
1400 } |
|
1401 |
|
1402 void atomParenthesesEnd(bool doInline, unsigned lastSubpatternId, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType, unsigned callFrameSize = 0) |
|
1403 { |
|
1404 unsigned beginTerm = popParenthesesStack(); |
|
1405 closeAlternative(beginTerm + 1); |
|
1406 unsigned endTerm = m_bodyDisjunction->terms.size(); |
|
1407 |
|
1408 bool isAssertion = m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParentheticalAssertionBegin; |
|
1409 bool invertOrCapture = m_bodyDisjunction->terms[beginTerm].invertOrCapture; |
|
1410 unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId; |
|
1411 |
|
1412 m_bodyDisjunction->terms.append(ByteTerm(isAssertion ? ByteTerm::TypeParentheticalAssertionEnd : ByteTerm::TypeParenthesesSubpatternOnceEnd, subpatternId, invertOrCapture, inputPosition)); |
|
1413 m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm; |
|
1414 m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm; |
|
1415 m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation; |
|
1416 |
|
1417 if (doInline) { |
|
1418 m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount; |
|
1419 m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType; |
|
1420 m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount; |
|
1421 m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType; |
|
1422 } else { |
|
1423 ByteTerm& parenthesesBegin = m_bodyDisjunction->terms[beginTerm]; |
|
1424 ASSERT(parenthesesBegin.type == ByteTerm::TypeParenthesesSubpatternOnceBegin); |
|
1425 |
|
1426 bool invertOrCapture = parenthesesBegin.invertOrCapture; |
|
1427 unsigned subpatternId = parenthesesBegin.atom.subpatternId; |
|
1428 |
|
1429 unsigned numSubpatterns = lastSubpatternId - subpatternId + 1; |
|
1430 ByteDisjunction* parenthesesDisjunction = new ByteDisjunction(numSubpatterns, callFrameSize); |
|
1431 |
|
1432 parenthesesDisjunction->terms.append(ByteTerm::SubpatternBegin()); |
|
1433 for (unsigned termInParentheses = beginTerm + 1; termInParentheses < endTerm; ++termInParentheses) |
|
1434 parenthesesDisjunction->terms.append(m_bodyDisjunction->terms[termInParentheses]); |
|
1435 parenthesesDisjunction->terms.append(ByteTerm::SubpatternEnd()); |
|
1436 |
|
1437 m_bodyDisjunction->terms.shrink(beginTerm); |
|
1438 |
|
1439 m_allParenthesesInfo.append(parenthesesDisjunction); |
|
1440 m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction, invertOrCapture, inputPosition)); |
|
1441 |
|
1442 m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount; |
|
1443 m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType; |
|
1444 m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation; |
|
1445 } |
|
1446 } |
|
1447 |
|
1448 void regexBegin(unsigned numSubpatterns, unsigned callFrameSize) |
|
1449 { |
|
1450 m_bodyDisjunction = adoptPtr(new ByteDisjunction(numSubpatterns, callFrameSize)); |
|
1451 m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeBegin()); |
|
1452 m_bodyDisjunction->terms[0].frameLocation = 0; |
|
1453 m_currentAlternativeIndex = 0; |
|
1454 } |
|
1455 |
|
1456 void regexEnd() |
|
1457 { |
|
1458 closeBodyAlternative(); |
|
1459 } |
|
1460 |
|
1461 void alternativeBodyDisjunction() |
|
1462 { |
|
1463 int newAlternativeIndex = m_bodyDisjunction->terms.size(); |
|
1464 m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex; |
|
1465 m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeDisjunction()); |
|
1466 |
|
1467 m_currentAlternativeIndex = newAlternativeIndex; |
|
1468 } |
|
1469 |
|
1470 void alternativeDisjunction() |
|
1471 { |
|
1472 int newAlternativeIndex = m_bodyDisjunction->terms.size(); |
|
1473 m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex; |
|
1474 m_bodyDisjunction->terms.append(ByteTerm::AlternativeDisjunction()); |
|
1475 |
|
1476 m_currentAlternativeIndex = newAlternativeIndex; |
|
1477 } |
|
1478 |
|
1479 void emitDisjunction(PatternDisjunction* disjunction, unsigned inputCountAlreadyChecked = 0, unsigned parenthesesInputCountAlreadyChecked = 0) |
|
1480 { |
|
1481 for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) { |
|
1482 unsigned currentCountAlreadyChecked = inputCountAlreadyChecked; |
|
1483 |
|
1484 if (alt) { |
|
1485 if (disjunction == m_pattern.m_body) |
|
1486 alternativeBodyDisjunction(); |
|
1487 else |
|
1488 alternativeDisjunction(); |
|
1489 } |
|
1490 |
|
1491 PatternAlternative* alternative = disjunction->m_alternatives[alt]; |
|
1492 unsigned minimumSize = alternative->m_minimumSize; |
|
1493 |
|
1494 ASSERT(minimumSize >= parenthesesInputCountAlreadyChecked); |
|
1495 unsigned countToCheck = minimumSize - parenthesesInputCountAlreadyChecked; |
|
1496 if (countToCheck) |
|
1497 checkInput(countToCheck); |
|
1498 currentCountAlreadyChecked += countToCheck; |
|
1499 |
|
1500 for (unsigned i = 0; i < alternative->m_terms.size(); ++i) { |
|
1501 PatternTerm& term = alternative->m_terms[i]; |
|
1502 |
|
1503 switch (term.type) { |
|
1504 case PatternTerm::TypeAssertionBOL: |
|
1505 assertionBOL(term.inputPosition - currentCountAlreadyChecked); |
|
1506 break; |
|
1507 |
|
1508 case PatternTerm::TypeAssertionEOL: |
|
1509 assertionEOL(term.inputPosition - currentCountAlreadyChecked); |
|
1510 break; |
|
1511 |
|
1512 case PatternTerm::TypeAssertionWordBoundary: |
|
1513 assertionWordBoundary(term.invertOrCapture, term.inputPosition - currentCountAlreadyChecked); |
|
1514 break; |
|
1515 |
|
1516 case PatternTerm::TypePatternCharacter: |
|
1517 atomPatternCharacter(term.patternCharacter, term.inputPosition - currentCountAlreadyChecked, term.frameLocation, term.quantityCount, term.quantityType); |
|
1518 break; |
|
1519 |
|
1520 case PatternTerm::TypeCharacterClass: |
|
1521 atomCharacterClass(term.characterClass, term.invertOrCapture, term.inputPosition - currentCountAlreadyChecked, term.frameLocation, term.quantityCount, term.quantityType); |
|
1522 break; |
|
1523 |
|
1524 case PatternTerm::TypeBackReference: |
|
1525 atomBackReference(term.subpatternId, term.inputPosition - currentCountAlreadyChecked, term.frameLocation, term.quantityCount, term.quantityType); |
|
1526 break; |
|
1527 |
|
1528 case PatternTerm::TypeForwardReference: |
|
1529 break; |
|
1530 |
|
1531 case PatternTerm::TypeParenthesesSubpattern: { |
|
1532 unsigned disjunctionAlreadyCheckedCount = 0; |
|
1533 if ((term.quantityCount == 1) && !term.parentheses.isCopy) { |
|
1534 if (term.quantityType == QuantifierFixedCount) { |
|
1535 disjunctionAlreadyCheckedCount = term.parentheses.disjunction->m_minimumSize; |
|
1536 unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked; |
|
1537 atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.invertOrCapture, delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, term.frameLocation); |
|
1538 emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, term.parentheses.disjunction->m_minimumSize); |
|
1539 atomParenthesesEnd(true, term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize); |
|
1540 } else { |
|
1541 unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked; |
|
1542 atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.invertOrCapture, delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, term.frameLocation + RegexStackSpaceForBackTrackInfoParenthesesOnce); |
|
1543 emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0); |
|
1544 atomParenthesesEnd(true, term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize); |
|
1545 } |
|
1546 } else { |
|
1547 unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked; |
|
1548 atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.invertOrCapture, delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, 0); |
|
1549 emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0); |
|
1550 atomParenthesesEnd(false, term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize); |
|
1551 } |
|
1552 break; |
|
1553 } |
|
1554 |
|
1555 case PatternTerm::TypeParentheticalAssertion: { |
|
1556 unsigned alternativeFrameLocation = term.frameLocation + RegexStackSpaceForBackTrackInfoParentheticalAssertion; |
|
1557 |
|
1558 atomParentheticalAssertionBegin(term.parentheses.subpatternId, term.invertOrCapture, term.frameLocation, alternativeFrameLocation); |
|
1559 emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0); |
|
1560 atomParenthesesEnd(true, term.parentheses.lastSubpatternId, 0, term.frameLocation, term.quantityCount, term.quantityType); |
|
1561 break; |
|
1562 } |
|
1563 } |
|
1564 } |
|
1565 } |
|
1566 } |
|
1567 |
|
1568 private: |
|
1569 RegexPattern& m_pattern; |
|
1570 OwnPtr<ByteDisjunction> m_bodyDisjunction; |
|
1571 unsigned m_currentAlternativeIndex; |
|
1572 Vector<ParenthesesStackEntry> m_parenthesesStack; |
|
1573 Vector<ByteDisjunction*> m_allParenthesesInfo; |
|
1574 }; |
|
1575 |
|
1576 |
|
1577 PassOwnPtr<BytecodePattern> byteCompileRegex(const UString& patternString, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline) |
|
1578 { |
|
1579 RegexPattern pattern(ignoreCase, multiline); |
|
1580 |
|
1581 if ((error = compileRegex(patternString, pattern))) |
|
1582 return PassOwnPtr<BytecodePattern>(); |
|
1583 |
|
1584 numSubpatterns = pattern.m_numSubpatterns; |
|
1585 |
|
1586 return ByteCompiler(pattern).compile(); |
|
1587 } |
|
1588 |
|
1589 int interpretRegex(BytecodePattern* regex, const UChar* input, unsigned start, unsigned length, int* output) |
|
1590 { |
|
1591 return Interpreter(regex, output, input, start, length).interpret(); |
|
1592 } |
|
1593 |
|
1594 |
|
1595 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoPatternCharacter) == (RegexStackSpaceForBackTrackInfoPatternCharacter * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoPatternCharacter); |
|
1596 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoCharacterClass) == (RegexStackSpaceForBackTrackInfoCharacterClass * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoCharacterClass); |
|
1597 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoBackReference) == (RegexStackSpaceForBackTrackInfoBackReference * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoBackReference); |
|
1598 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoAlternative) == (RegexStackSpaceForBackTrackInfoAlternative * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoAlternative); |
|
1599 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParentheticalAssertion) == (RegexStackSpaceForBackTrackInfoParentheticalAssertion * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoParentheticalAssertion); |
|
1600 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParenthesesOnce) == (RegexStackSpaceForBackTrackInfoParenthesesOnce * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoParenthesesOnce); |
|
1601 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParentheses) == (RegexStackSpaceForBackTrackInfoParentheses * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoParentheses); |
|
1602 |
|
1603 |
|
1604 } } |
|
1605 |
|
1606 #endif |