JavaScriptCore/yarr/RegexInterpreter.cpp
changeset 0 4f2f89ce4247
equal deleted inserted replaced
-1:000000000000 0:4f2f89ce4247
       
     1 /*
       
     2  * Copyright (C) 2009 Apple Inc. All rights reserved.
       
     3  *
       
     4  * Redistribution and use in source and binary forms, with or without
       
     5  * modification, are permitted provided that the following conditions
       
     6  * are met:
       
     7  * 1. Redistributions of source code must retain the above copyright
       
     8  *    notice, this list of conditions and the following disclaimer.
       
     9  * 2. Redistributions in binary form must reproduce the above copyright
       
    10  *    notice, this list of conditions and the following disclaimer in the
       
    11  *    documentation and/or other materials provided with the distribution.
       
    12  *
       
    13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
       
    14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
       
    15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
       
    16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
       
    17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
       
    18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
       
    19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
       
    20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
       
    21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
       
    22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
       
    23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
       
    24  */
       
    25 
       
    26 #include "config.h"
       
    27 #include "RegexInterpreter.h"
       
    28 
       
    29 #include "RegexCompiler.h"
       
    30 #include "RegexPattern.h"
       
    31 
       
    32 #ifndef NDEBUG
       
    33 #include <stdio.h>
       
    34 #endif
       
    35 
       
    36 #if ENABLE(YARR)
       
    37 
       
    38 using namespace WTF;
       
    39 
       
    40 namespace JSC { namespace Yarr {
       
    41 
       
    42 class Interpreter {
       
    43 public:
       
    44     struct ParenthesesDisjunctionContext;
       
    45 
       
    46     struct BackTrackInfoPatternCharacter {
       
    47         uintptr_t matchAmount;
       
    48     };
       
    49     struct BackTrackInfoCharacterClass {
       
    50         uintptr_t matchAmount;
       
    51     };
       
    52     struct BackTrackInfoBackReference {
       
    53         uintptr_t begin; // Not really needed for greedy quantifiers.
       
    54         uintptr_t matchAmount; // Not really needed for fixed quantifiers.
       
    55     };
       
    56     struct BackTrackInfoAlternative {
       
    57         uintptr_t offset;
       
    58     };
       
    59     struct BackTrackInfoParentheticalAssertion {
       
    60         uintptr_t begin;
       
    61     };
       
    62     struct BackTrackInfoParenthesesOnce {
       
    63         uintptr_t inParentheses;
       
    64     };
       
    65     struct BackTrackInfoParentheses {
       
    66         uintptr_t matchAmount;
       
    67         ParenthesesDisjunctionContext* lastContext;
       
    68         uintptr_t prevBegin;
       
    69         uintptr_t prevEnd;
       
    70     };
       
    71 
       
    72     static inline void appendParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack, ParenthesesDisjunctionContext* context)
       
    73     {
       
    74         context->next = backTrack->lastContext;
       
    75         backTrack->lastContext = context;
       
    76         ++backTrack->matchAmount;
       
    77     }
       
    78 
       
    79     static inline void popParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack)
       
    80     {
       
    81         ASSERT(backTrack->matchAmount);
       
    82         ASSERT(backTrack->lastContext);
       
    83         backTrack->lastContext = backTrack->lastContext->next;
       
    84         --backTrack->matchAmount;
       
    85     }
       
    86 
       
    87     struct DisjunctionContext
       
    88     {
       
    89         DisjunctionContext()
       
    90             : term(0)
       
    91         {
       
    92         }
       
    93 
       
    94         void* operator new(size_t, void* where)
       
    95         {
       
    96             return where;
       
    97         }
       
    98 
       
    99         int term;
       
   100         unsigned matchBegin;
       
   101         unsigned matchEnd;
       
   102         uintptr_t frame[1];
       
   103     };
       
   104 
       
   105     DisjunctionContext* allocDisjunctionContext(ByteDisjunction* disjunction)
       
   106     {
       
   107         return new(malloc(sizeof(DisjunctionContext) + (disjunction->m_frameSize - 1) * sizeof(uintptr_t))) DisjunctionContext();
       
   108     }
       
   109 
       
   110     void freeDisjunctionContext(DisjunctionContext* context)
       
   111     {
       
   112         free(context);
       
   113     }
       
   114 
       
   115     struct ParenthesesDisjunctionContext
       
   116     {
       
   117         ParenthesesDisjunctionContext(int* output, ByteTerm& term)
       
   118             : next(0)
       
   119         {
       
   120             unsigned firstSubpatternId = term.atom.subpatternId;
       
   121             unsigned numNestedSubpatterns = term.atom.parenthesesDisjunction->m_numSubpatterns;
       
   122 
       
   123             for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i) {
       
   124                 subpatternBackup[i] = output[(firstSubpatternId << 1) + i];
       
   125                 output[(firstSubpatternId << 1) + i] = -1;
       
   126             }
       
   127 
       
   128             new(getDisjunctionContext(term)) DisjunctionContext();
       
   129         }
       
   130 
       
   131         void* operator new(size_t, void* where)
       
   132         {
       
   133             return where;
       
   134         }
       
   135 
       
   136         void restoreOutput(int* output, unsigned firstSubpatternId, unsigned numNestedSubpatterns)
       
   137         {
       
   138             for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i)
       
   139                 output[(firstSubpatternId << 1) + i] = subpatternBackup[i];
       
   140         }
       
   141 
       
   142         DisjunctionContext* getDisjunctionContext(ByteTerm& term)
       
   143         {
       
   144             return reinterpret_cast<DisjunctionContext*>(&(subpatternBackup[term.atom.parenthesesDisjunction->m_numSubpatterns << 1]));
       
   145         }
       
   146 
       
   147         ParenthesesDisjunctionContext* next;
       
   148         int subpatternBackup[1];
       
   149     };
       
   150 
       
   151     ParenthesesDisjunctionContext* allocParenthesesDisjunctionContext(ByteDisjunction* disjunction, int* output, ByteTerm& term)
       
   152     {
       
   153         return new(malloc(sizeof(ParenthesesDisjunctionContext) + (((term.atom.parenthesesDisjunction->m_numSubpatterns << 1) - 1) * sizeof(int)) + sizeof(DisjunctionContext) + (disjunction->m_frameSize - 1) * sizeof(uintptr_t))) ParenthesesDisjunctionContext(output, term);
       
   154     }
       
   155 
       
   156     void freeParenthesesDisjunctionContext(ParenthesesDisjunctionContext* context)
       
   157     {
       
   158         free(context);
       
   159     }
       
   160 
       
   161     class InputStream {
       
   162     public:
       
   163         InputStream(const UChar* input, unsigned start, unsigned length)
       
   164             : input(input)
       
   165             , pos(start)
       
   166             , length(length)
       
   167         {
       
   168         }
       
   169 
       
   170         void next()
       
   171         {
       
   172             ++pos;
       
   173         }
       
   174 
       
   175         void rewind(unsigned amount)
       
   176         {
       
   177             ASSERT(pos >= amount);
       
   178             pos -= amount;
       
   179         }
       
   180 
       
   181         int read()
       
   182         {
       
   183             ASSERT(pos < length);
       
   184             if (pos < length)
       
   185                 return input[pos];
       
   186             return -1;
       
   187         }
       
   188 
       
   189         int readChecked(int position)
       
   190         {
       
   191             ASSERT(position < 0);
       
   192             ASSERT((unsigned)-position <= pos);
       
   193             unsigned p = pos + position;
       
   194             ASSERT(p < length);
       
   195             return input[p];
       
   196         }
       
   197 
       
   198         int reread(unsigned from)
       
   199         {
       
   200             ASSERT(from < length);
       
   201             return input[from];
       
   202         }
       
   203 
       
   204         int prev()
       
   205         {
       
   206             ASSERT(!(pos > length));
       
   207             if (pos && length)
       
   208                 return input[pos - 1];
       
   209             return -1;
       
   210         }
       
   211 
       
   212         unsigned getPos()
       
   213         {
       
   214             return pos;
       
   215         }
       
   216 
       
   217         void setPos(unsigned p)
       
   218         {
       
   219             pos = p;
       
   220         }
       
   221 
       
   222         bool atStart()
       
   223         {
       
   224             return pos == 0;
       
   225         }
       
   226 
       
   227         bool atEnd()
       
   228         {
       
   229             return pos == length;
       
   230         }
       
   231 
       
   232         bool checkInput(int count)
       
   233         {
       
   234             if ((pos + count) <= length) {
       
   235                 pos += count;
       
   236                 return true;
       
   237             } else
       
   238                 return false;
       
   239         }
       
   240 
       
   241         void uncheckInput(int count)
       
   242         {
       
   243             pos -= count;
       
   244         }
       
   245 
       
   246         bool atStart(int position)
       
   247         {
       
   248             return (pos + position) == 0;
       
   249         }
       
   250 
       
   251         bool atEnd(int position)
       
   252         {
       
   253             return (pos + position) == length;
       
   254         }
       
   255 
       
   256     private:
       
   257         const UChar* input;
       
   258         unsigned pos;
       
   259         unsigned length;
       
   260     };
       
   261 
       
   262     bool testCharacterClass(CharacterClass* characterClass, int ch)
       
   263     {
       
   264         if (ch & 0xFF80) {
       
   265             for (unsigned i = 0; i < characterClass->m_matchesUnicode.size(); ++i)
       
   266                 if (ch == characterClass->m_matchesUnicode[i])
       
   267                     return true;
       
   268             for (unsigned i = 0; i < characterClass->m_rangesUnicode.size(); ++i)
       
   269                 if ((ch >= characterClass->m_rangesUnicode[i].begin) && (ch <= characterClass->m_rangesUnicode[i].end))
       
   270                     return true;
       
   271         } else {
       
   272             for (unsigned i = 0; i < characterClass->m_matches.size(); ++i)
       
   273                 if (ch == characterClass->m_matches[i])
       
   274                     return true;
       
   275             for (unsigned i = 0; i < characterClass->m_ranges.size(); ++i)
       
   276                 if ((ch >= characterClass->m_ranges[i].begin) && (ch <= characterClass->m_ranges[i].end))
       
   277                     return true;
       
   278         }
       
   279 
       
   280         return false;
       
   281     }
       
   282 
       
   283     bool checkCharacter(int testChar, int inputPosition)
       
   284     {
       
   285         return testChar == input.readChecked(inputPosition);
       
   286     }
       
   287 
       
   288     bool checkCasedCharacter(int loChar, int hiChar, int inputPosition)
       
   289     {
       
   290         int ch = input.readChecked(inputPosition);
       
   291         return (loChar == ch) || (hiChar == ch);
       
   292     }
       
   293 
       
   294     bool checkCharacterClass(CharacterClass* characterClass, bool invert, int inputPosition)
       
   295     {
       
   296         bool match = testCharacterClass(characterClass, input.readChecked(inputPosition));
       
   297         return invert ? !match : match;
       
   298     }
       
   299 
       
   300     bool tryConsumeBackReference(int matchBegin, int matchEnd, int inputOffset)
       
   301     {
       
   302         int matchSize = matchEnd - matchBegin;
       
   303 
       
   304         if (!input.checkInput(matchSize))
       
   305             return false;
       
   306 
       
   307         for (int i = 0; i < matchSize; ++i) {
       
   308             if (!checkCharacter(input.reread(matchBegin + i), inputOffset - matchSize + i)) {
       
   309                 input.uncheckInput(matchSize);
       
   310                 return false;
       
   311             }
       
   312         }
       
   313 
       
   314         return true;
       
   315     }
       
   316 
       
   317     bool matchAssertionBOL(ByteTerm& term)
       
   318     {
       
   319         return (input.atStart(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition - 1)));
       
   320     }
       
   321 
       
   322     bool matchAssertionEOL(ByteTerm& term)
       
   323     {
       
   324         if (term.inputPosition)
       
   325             return (input.atEnd(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition)));
       
   326         else
       
   327             return (input.atEnd()) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.read()));
       
   328     }
       
   329 
       
   330     bool matchAssertionWordBoundary(ByteTerm& term)
       
   331     {
       
   332         bool prevIsWordchar = !input.atStart(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition - 1));
       
   333         bool readIsWordchar;
       
   334         if (term.inputPosition)
       
   335             readIsWordchar = !input.atEnd(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition));
       
   336         else
       
   337             readIsWordchar = !input.atEnd() && testCharacterClass(pattern->wordcharCharacterClass, input.read());
       
   338 
       
   339         bool wordBoundary = prevIsWordchar != readIsWordchar;
       
   340         return term.invert() ? !wordBoundary : wordBoundary;
       
   341     }
       
   342 
       
   343     bool backtrackPatternCharacter(ByteTerm& term, DisjunctionContext* context)
       
   344     {
       
   345         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
       
   346 
       
   347         switch (term.atom.quantityType) {
       
   348         case QuantifierFixedCount:
       
   349             break;
       
   350 
       
   351         case QuantifierGreedy:
       
   352             if (backTrack->matchAmount) {
       
   353                 --backTrack->matchAmount;
       
   354                 input.uncheckInput(1);
       
   355                 return true;
       
   356             }
       
   357             break;
       
   358 
       
   359         case QuantifierNonGreedy:
       
   360             if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
       
   361                 ++backTrack->matchAmount;
       
   362                 if (checkCharacter(term.atom.patternCharacter, term.inputPosition - 1))
       
   363                     return true;
       
   364             }
       
   365             input.uncheckInput(backTrack->matchAmount);
       
   366             break;
       
   367         }
       
   368 
       
   369         return false;
       
   370     }
       
   371 
       
   372     bool backtrackPatternCasedCharacter(ByteTerm& term, DisjunctionContext* context)
       
   373     {
       
   374         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
       
   375 
       
   376         switch (term.atom.quantityType) {
       
   377         case QuantifierFixedCount:
       
   378             break;
       
   379 
       
   380         case QuantifierGreedy:
       
   381             if (backTrack->matchAmount) {
       
   382                 --backTrack->matchAmount;
       
   383                 input.uncheckInput(1);
       
   384                 return true;
       
   385             }
       
   386             break;
       
   387 
       
   388         case QuantifierNonGreedy:
       
   389             if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
       
   390                 ++backTrack->matchAmount;
       
   391                 if (checkCasedCharacter(term.atom.casedCharacter.lo, term.atom.casedCharacter.hi, term.inputPosition - 1))
       
   392                     return true;
       
   393             }
       
   394             input.uncheckInput(backTrack->matchAmount);
       
   395             break;
       
   396         }
       
   397 
       
   398         return false;
       
   399     }
       
   400 
       
   401     bool matchCharacterClass(ByteTerm& term, DisjunctionContext* context)
       
   402     {
       
   403         ASSERT(term.type == ByteTerm::TypeCharacterClass);
       
   404         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
       
   405 
       
   406         switch (term.atom.quantityType) {
       
   407         case QuantifierFixedCount: {
       
   408             for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) {
       
   409                 if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + matchAmount))
       
   410                     return false;
       
   411             }
       
   412             return true;
       
   413         }
       
   414 
       
   415         case QuantifierGreedy: {
       
   416             unsigned matchAmount = 0;
       
   417             while ((matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
       
   418                 if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - 1)) {
       
   419                     input.uncheckInput(1);
       
   420                     break;
       
   421                 }
       
   422                 ++matchAmount;
       
   423             }
       
   424             backTrack->matchAmount = matchAmount;
       
   425 
       
   426             return true;
       
   427         }
       
   428 
       
   429         case QuantifierNonGreedy:
       
   430             backTrack->matchAmount = 0;
       
   431             return true;
       
   432         }
       
   433 
       
   434         ASSERT_NOT_REACHED();
       
   435         return false;
       
   436     }
       
   437 
       
   438     bool backtrackCharacterClass(ByteTerm& term, DisjunctionContext* context)
       
   439     {
       
   440         ASSERT(term.type == ByteTerm::TypeCharacterClass);
       
   441         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
       
   442 
       
   443         switch (term.atom.quantityType) {
       
   444         case QuantifierFixedCount:
       
   445             break;
       
   446 
       
   447         case QuantifierGreedy:
       
   448             if (backTrack->matchAmount) {
       
   449                 --backTrack->matchAmount;
       
   450                 input.uncheckInput(1);
       
   451                 return true;
       
   452             }
       
   453             break;
       
   454 
       
   455         case QuantifierNonGreedy:
       
   456             if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
       
   457                 ++backTrack->matchAmount;
       
   458                 if (checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - 1))
       
   459                     return true;
       
   460             }
       
   461             input.uncheckInput(backTrack->matchAmount);
       
   462             break;
       
   463         }
       
   464 
       
   465         return false;
       
   466     }
       
   467 
       
   468     bool matchBackReference(ByteTerm& term, DisjunctionContext* context)
       
   469     {
       
   470         ASSERT(term.type == ByteTerm::TypeBackReference);
       
   471         BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
       
   472 
       
   473         int matchBegin = output[(term.atom.subpatternId << 1)];
       
   474         int matchEnd = output[(term.atom.subpatternId << 1) + 1];
       
   475         ASSERT((matchBegin == -1) == (matchEnd == -1));
       
   476         ASSERT(matchBegin <= matchEnd);
       
   477 
       
   478         if (matchBegin == matchEnd)
       
   479             return true;
       
   480 
       
   481         switch (term.atom.quantityType) {
       
   482         case QuantifierFixedCount: {
       
   483             backTrack->begin = input.getPos();
       
   484             for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) {
       
   485                 if (!tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
       
   486                     input.setPos(backTrack->begin);
       
   487                     return false;
       
   488                 }
       
   489             }
       
   490             return true;
       
   491         }
       
   492 
       
   493         case QuantifierGreedy: {
       
   494             unsigned matchAmount = 0;
       
   495             while ((matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition))
       
   496                 ++matchAmount;
       
   497             backTrack->matchAmount = matchAmount;
       
   498             return true;
       
   499         }
       
   500 
       
   501         case QuantifierNonGreedy:
       
   502             backTrack->begin = input.getPos();
       
   503             backTrack->matchAmount = 0;
       
   504             return true;
       
   505         }
       
   506 
       
   507         ASSERT_NOT_REACHED();
       
   508         return false;
       
   509     }
       
   510 
       
   511     bool backtrackBackReference(ByteTerm& term, DisjunctionContext* context)
       
   512     {
       
   513         ASSERT(term.type == ByteTerm::TypeBackReference);
       
   514         BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
       
   515 
       
   516         int matchBegin = output[(term.atom.subpatternId << 1)];
       
   517         int matchEnd = output[(term.atom.subpatternId << 1) + 1];
       
   518         ASSERT((matchBegin == -1) == (matchEnd == -1));
       
   519         ASSERT(matchBegin <= matchEnd);
       
   520 
       
   521         if (matchBegin == matchEnd)
       
   522             return false;
       
   523 
       
   524         switch (term.atom.quantityType) {
       
   525         case QuantifierFixedCount:
       
   526             // for quantityCount == 1, could rewind.
       
   527             input.setPos(backTrack->begin);
       
   528             break;
       
   529 
       
   530         case QuantifierGreedy:
       
   531             if (backTrack->matchAmount) {
       
   532                 --backTrack->matchAmount;
       
   533                 input.rewind(matchEnd - matchBegin);
       
   534                 return true;
       
   535             }
       
   536             break;
       
   537 
       
   538         case QuantifierNonGreedy:
       
   539             if ((backTrack->matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
       
   540                 ++backTrack->matchAmount;
       
   541                 return true;
       
   542             } else
       
   543                 input.setPos(backTrack->begin);
       
   544             break;
       
   545         }
       
   546 
       
   547         return false;
       
   548     }
       
   549 
       
   550     void recordParenthesesMatch(ByteTerm& term, ParenthesesDisjunctionContext* context)
       
   551     {
       
   552         if (term.capture()) {
       
   553             unsigned subpatternId = term.atom.subpatternId;
       
   554             output[(subpatternId << 1)] = context->getDisjunctionContext(term)->matchBegin + term.inputPosition;
       
   555             output[(subpatternId << 1) + 1] = context->getDisjunctionContext(term)->matchEnd + term.inputPosition;
       
   556         }
       
   557     }
       
   558     void resetMatches(ByteTerm& term, ParenthesesDisjunctionContext* context)
       
   559     {
       
   560         unsigned firstSubpatternId = term.atom.subpatternId;
       
   561         unsigned count = term.atom.parenthesesDisjunction->m_numSubpatterns;
       
   562         context->restoreOutput(output, firstSubpatternId, count);
       
   563     }
       
   564     void resetAssertionMatches(ByteTerm& term)
       
   565     {
       
   566         unsigned firstSubpatternId = term.atom.subpatternId;
       
   567         unsigned count = term.atom.parenthesesDisjunction->m_numSubpatterns;
       
   568         for (unsigned i = 0; i < (count << 1); ++i)
       
   569             output[(firstSubpatternId << 1) + i] = -1;
       
   570     }
       
   571     bool parenthesesDoBacktrack(ByteTerm& term, BackTrackInfoParentheses* backTrack)
       
   572     {
       
   573         while (backTrack->matchAmount) {
       
   574             ParenthesesDisjunctionContext* context = backTrack->lastContext;
       
   575 
       
   576             if (matchDisjunction(term.atom.parenthesesDisjunction, context->getDisjunctionContext(term), true))
       
   577                 return true;
       
   578 
       
   579             resetMatches(term, context);
       
   580             popParenthesesDisjunctionContext(backTrack);
       
   581             freeParenthesesDisjunctionContext(context);
       
   582         }
       
   583 
       
   584         return false;
       
   585     }
       
   586 
       
   587     bool matchParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
       
   588     {
       
   589         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
       
   590         ASSERT(term.atom.quantityCount == 1);
       
   591 
       
   592         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
       
   593 
       
   594         switch (term.atom.quantityType) {
       
   595         case QuantifierGreedy: {
       
   596             // set this speculatively; if we get to the parens end this will be true.
       
   597             backTrack->inParentheses = 1;
       
   598             break;
       
   599         }
       
   600         case QuantifierNonGreedy: {
       
   601             backTrack->inParentheses = 0;
       
   602             context->term += term.atom.parenthesesWidth;
       
   603             return true;
       
   604         }
       
   605         case QuantifierFixedCount:
       
   606             break;
       
   607         }
       
   608 
       
   609         if (term.capture()) {
       
   610             unsigned subpatternId = term.atom.subpatternId;
       
   611             output[(subpatternId << 1)] = input.getPos() + term.inputPosition;
       
   612         }
       
   613 
       
   614         return true;
       
   615     }
       
   616 
       
   617     bool matchParenthesesOnceEnd(ByteTerm& term, DisjunctionContext*)
       
   618     {
       
   619         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
       
   620         ASSERT(term.atom.quantityCount == 1);
       
   621 
       
   622         if (term.capture()) {
       
   623             unsigned subpatternId = term.atom.subpatternId;
       
   624             output[(subpatternId << 1) + 1] = input.getPos() + term.inputPosition;
       
   625         }
       
   626         return true;
       
   627     }
       
   628 
       
   629     bool backtrackParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
       
   630     {
       
   631         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
       
   632         ASSERT(term.atom.quantityCount == 1);
       
   633 
       
   634         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
       
   635 
       
   636         if (term.capture()) {
       
   637             unsigned subpatternId = term.atom.subpatternId;
       
   638             output[(subpatternId << 1)] = -1;
       
   639             output[(subpatternId << 1) + 1] = -1;
       
   640         }
       
   641 
       
   642         switch (term.atom.quantityType) {
       
   643         case QuantifierGreedy:
       
   644             // if we backtrack to this point, there is another chance - try matching nothing.
       
   645             ASSERT(backTrack->inParentheses);
       
   646             backTrack->inParentheses = 0;
       
   647             context->term += term.atom.parenthesesWidth;
       
   648             return true;
       
   649         case QuantifierNonGreedy:
       
   650             ASSERT(backTrack->inParentheses);
       
   651         case QuantifierFixedCount:
       
   652             break;
       
   653         }
       
   654 
       
   655         return false;
       
   656     }
       
   657 
       
   658     bool backtrackParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
       
   659     {
       
   660         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
       
   661         ASSERT(term.atom.quantityCount == 1);
       
   662 
       
   663         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
       
   664 
       
   665         switch (term.atom.quantityType) {
       
   666         case QuantifierGreedy:
       
   667             if (!backTrack->inParentheses) {
       
   668                 context->term -= term.atom.parenthesesWidth;
       
   669                 return false;
       
   670             }
       
   671         case QuantifierNonGreedy:
       
   672             if (!backTrack->inParentheses) {
       
   673                 // now try to match the parens; set this speculatively.
       
   674                 backTrack->inParentheses = 1;
       
   675                 if (term.capture()) {
       
   676                     unsigned subpatternId = term.atom.subpatternId;
       
   677                     output[subpatternId << 1] = input.getPos() + term.inputPosition;
       
   678                 }
       
   679                 context->term -= term.atom.parenthesesWidth;
       
   680                 return true;
       
   681             }
       
   682         case QuantifierFixedCount:
       
   683             break;
       
   684         }
       
   685 
       
   686         return false;
       
   687     }
       
   688 
       
   689     bool matchParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
       
   690     {
       
   691         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
       
   692         ASSERT(term.atom.quantityCount == 1);
       
   693 
       
   694         BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
       
   695 
       
   696         backTrack->begin = input.getPos();
       
   697         return true;
       
   698     }
       
   699 
       
   700     bool matchParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
       
   701     {
       
   702         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
       
   703         ASSERT(term.atom.quantityCount == 1);
       
   704 
       
   705         BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
       
   706 
       
   707         input.setPos(backTrack->begin);
       
   708 
       
   709         // We've reached the end of the parens; if they are inverted, this is failure.
       
   710         if (term.invert()) {
       
   711             context->term -= term.atom.parenthesesWidth;
       
   712             return false;
       
   713         }
       
   714 
       
   715         return true;
       
   716     }
       
   717 
       
   718     bool backtrackParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
       
   719     {
       
   720         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
       
   721         ASSERT(term.atom.quantityCount == 1);
       
   722 
       
   723         // We've failed to match parens; if they are inverted, this is win!
       
   724         if (term.invert()) {
       
   725             context->term += term.atom.parenthesesWidth;
       
   726             return true;
       
   727         }
       
   728 
       
   729         return false;
       
   730     }
       
   731 
       
   732     bool backtrackParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
       
   733     {
       
   734         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
       
   735         ASSERT(term.atom.quantityCount == 1);
       
   736 
       
   737         BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
       
   738 
       
   739         input.setPos(backTrack->begin);
       
   740 
       
   741         context->term -= term.atom.parenthesesWidth;
       
   742         return false;
       
   743     }
       
   744 
       
   745     bool matchParentheses(ByteTerm& term, DisjunctionContext* context)
       
   746     {
       
   747         ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
       
   748 
       
   749         BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
       
   750 
       
   751         unsigned subpatternId = term.atom.subpatternId;
       
   752         ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
       
   753 
       
   754         backTrack->prevBegin = output[(subpatternId << 1)];
       
   755         backTrack->prevEnd = output[(subpatternId << 1) + 1];
       
   756 
       
   757         backTrack->matchAmount = 0;
       
   758         backTrack->lastContext = 0;
       
   759 
       
   760         switch (term.atom.quantityType) {
       
   761         case QuantifierFixedCount: {
       
   762             // While we haven't yet reached our fixed limit,
       
   763             while (backTrack->matchAmount < term.atom.quantityCount) {
       
   764                 // Try to do a match, and it it succeeds, add it to the list.
       
   765                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
       
   766                 if (matchDisjunction(disjunctionBody, context->getDisjunctionContext(term)))
       
   767                     appendParenthesesDisjunctionContext(backTrack, context);
       
   768                 else {
       
   769                     // The match failed; try to find an alternate point to carry on from.
       
   770                     resetMatches(term, context);
       
   771                     freeParenthesesDisjunctionContext(context);
       
   772                     if (!parenthesesDoBacktrack(term, backTrack))
       
   773                         return false;
       
   774                 }
       
   775             }
       
   776 
       
   777             ASSERT(backTrack->matchAmount == term.atom.quantityCount);
       
   778             ParenthesesDisjunctionContext* context = backTrack->lastContext;
       
   779             recordParenthesesMatch(term, context);
       
   780             return true;
       
   781         }
       
   782 
       
   783         case QuantifierGreedy: {
       
   784             while (backTrack->matchAmount < term.atom.quantityCount) {
       
   785                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
       
   786                 if (matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term)))
       
   787                     appendParenthesesDisjunctionContext(backTrack, context);
       
   788                 else {
       
   789                     resetMatches(term, context);
       
   790                     freeParenthesesDisjunctionContext(context);
       
   791                     break;
       
   792                 }
       
   793             }
       
   794 
       
   795             if (backTrack->matchAmount) {
       
   796                 ParenthesesDisjunctionContext* context = backTrack->lastContext;
       
   797                 recordParenthesesMatch(term, context);
       
   798             }
       
   799             return true;
       
   800         }
       
   801 
       
   802         case QuantifierNonGreedy:
       
   803             return true;
       
   804         }
       
   805 
       
   806         ASSERT_NOT_REACHED();
       
   807         return false;
       
   808     }
       
   809 
       
   810     // Rules for backtracking differ depending on whether this is greedy or non-greedy.
       
   811     //
       
   812     // Greedy matches never should try just adding more - you should already have done
       
   813     // the 'more' cases.  Always backtrack, at least a leetle bit.  However cases where
       
   814     // you backtrack an item off the list needs checking, since we'll never have matched
       
   815     // the one less case.  Tracking forwards, still add as much as possible.
       
   816     //
       
   817     // Non-greedy, we've already done the one less case, so don't match on popping.
       
   818     // We haven't done the one more case, so always try to add that.
       
   819     //
       
   820     bool backtrackParentheses(ByteTerm& term, DisjunctionContext* context)
       
   821     {
       
   822         ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
       
   823 
       
   824         BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
       
   825 
       
   826         if (term.capture()) {
       
   827             unsigned subpatternId = term.atom.subpatternId;
       
   828             output[(subpatternId << 1)] = backTrack->prevBegin;
       
   829             output[(subpatternId << 1) + 1] = backTrack->prevEnd;
       
   830         }
       
   831 
       
   832         ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
       
   833 
       
   834         switch (term.atom.quantityType) {
       
   835         case QuantifierFixedCount: {
       
   836             ASSERT(backTrack->matchAmount == term.atom.quantityCount);
       
   837 
       
   838             ParenthesesDisjunctionContext* context = 0;
       
   839 
       
   840             if (!parenthesesDoBacktrack(term, backTrack))
       
   841                 return false;
       
   842 
       
   843             // While we haven't yet reached our fixed limit,
       
   844             while (backTrack->matchAmount < term.atom.quantityCount) {
       
   845                 // Try to do a match, and it it succeeds, add it to the list.
       
   846                 context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
       
   847                 if (matchDisjunction(disjunctionBody, context->getDisjunctionContext(term)))
       
   848                     appendParenthesesDisjunctionContext(backTrack, context);
       
   849                 else {
       
   850                     // The match failed; try to find an alternate point to carry on from.
       
   851                     resetMatches(term, context);
       
   852                     freeParenthesesDisjunctionContext(context);
       
   853                     if (!parenthesesDoBacktrack(term, backTrack))
       
   854                         return false;
       
   855                 }
       
   856             }
       
   857 
       
   858             ASSERT(backTrack->matchAmount == term.atom.quantityCount);
       
   859             context = backTrack->lastContext;
       
   860             recordParenthesesMatch(term, context);
       
   861             return true;
       
   862         }
       
   863 
       
   864         case QuantifierGreedy: {
       
   865             if (!backTrack->matchAmount)
       
   866                 return false;
       
   867 
       
   868             ParenthesesDisjunctionContext* context = backTrack->lastContext;
       
   869             if (matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true)) {
       
   870                 while (backTrack->matchAmount < term.atom.quantityCount) {
       
   871                     ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
       
   872                     if (matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term)))
       
   873                         appendParenthesesDisjunctionContext(backTrack, context);
       
   874                     else {
       
   875                         resetMatches(term, context);
       
   876                         freeParenthesesDisjunctionContext(context);
       
   877                         break;
       
   878                     }
       
   879                 }
       
   880             } else {
       
   881                 resetMatches(term, context);
       
   882                 popParenthesesDisjunctionContext(backTrack);
       
   883                 freeParenthesesDisjunctionContext(context);
       
   884             }
       
   885 
       
   886             if (backTrack->matchAmount) {
       
   887                 ParenthesesDisjunctionContext* context = backTrack->lastContext;
       
   888                 recordParenthesesMatch(term, context);
       
   889             }
       
   890             return true;
       
   891         }
       
   892 
       
   893         case QuantifierNonGreedy: {
       
   894             // If we've not reached the limit, try to add one more match.
       
   895             if (backTrack->matchAmount < term.atom.quantityCount) {
       
   896                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
       
   897                 if (matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term))) {
       
   898                     appendParenthesesDisjunctionContext(backTrack, context);
       
   899                     recordParenthesesMatch(term, context);
       
   900                     return true;
       
   901                 } else {
       
   902                     resetMatches(term, context);
       
   903                     freeParenthesesDisjunctionContext(context);
       
   904                 }
       
   905             }
       
   906 
       
   907             // Nope - okay backtrack looking for an alternative.
       
   908             while (backTrack->matchAmount) {
       
   909                 ParenthesesDisjunctionContext* context = backTrack->lastContext;
       
   910                 if (matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true)) {
       
   911                     // successful backtrack! we're back in the game!
       
   912                     if (backTrack->matchAmount) {
       
   913                         context = backTrack->lastContext;
       
   914                         recordParenthesesMatch(term, context);
       
   915                     }
       
   916                     return true;
       
   917                 }
       
   918 
       
   919                 // pop a match off the stack
       
   920                 resetMatches(term, context);
       
   921                 popParenthesesDisjunctionContext(backTrack);
       
   922                 freeParenthesesDisjunctionContext(context);
       
   923             }
       
   924 
       
   925             return false;
       
   926         }
       
   927         }
       
   928 
       
   929         ASSERT_NOT_REACHED();
       
   930         return false;
       
   931     }
       
   932 
       
   933 #define MATCH_NEXT() { ++context->term; goto matchAgain; }
       
   934 #define BACKTRACK() { --context->term; goto backtrack; }
       
   935 #define currentTerm() (disjunction->terms[context->term])
       
   936     bool matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
       
   937     {
       
   938         if (btrack)
       
   939             BACKTRACK();
       
   940 
       
   941         context->matchBegin = input.getPos();
       
   942         context->term = 0;
       
   943 
       
   944     matchAgain:
       
   945         ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
       
   946 
       
   947         switch (currentTerm().type) {
       
   948         case ByteTerm::TypeSubpatternBegin:
       
   949             MATCH_NEXT();
       
   950         case ByteTerm::TypeSubpatternEnd:
       
   951             context->matchEnd = input.getPos();
       
   952             return true;
       
   953 
       
   954         case ByteTerm::TypeBodyAlternativeBegin:
       
   955             MATCH_NEXT();
       
   956         case ByteTerm::TypeBodyAlternativeDisjunction:
       
   957         case ByteTerm::TypeBodyAlternativeEnd:
       
   958             context->matchEnd = input.getPos();
       
   959             return true;
       
   960 
       
   961         case ByteTerm::TypeAlternativeBegin:
       
   962             MATCH_NEXT();
       
   963         case ByteTerm::TypeAlternativeDisjunction:
       
   964         case ByteTerm::TypeAlternativeEnd: {
       
   965             int offset = currentTerm().alternative.end;
       
   966             BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
       
   967             backTrack->offset = offset;
       
   968             context->term += offset;
       
   969             MATCH_NEXT();
       
   970         }
       
   971 
       
   972         case ByteTerm::TypeAssertionBOL:
       
   973             if (matchAssertionBOL(currentTerm()))
       
   974                 MATCH_NEXT();
       
   975             BACKTRACK();
       
   976         case ByteTerm::TypeAssertionEOL:
       
   977             if (matchAssertionEOL(currentTerm()))
       
   978                 MATCH_NEXT();
       
   979             BACKTRACK();
       
   980         case ByteTerm::TypeAssertionWordBoundary:
       
   981             if (matchAssertionWordBoundary(currentTerm()))
       
   982                 MATCH_NEXT();
       
   983             BACKTRACK();
       
   984 
       
   985         case ByteTerm::TypePatternCharacterOnce:
       
   986         case ByteTerm::TypePatternCharacterFixed: {
       
   987             for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
       
   988                 if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition + matchAmount))
       
   989                     BACKTRACK();
       
   990             }
       
   991             MATCH_NEXT();
       
   992         }
       
   993         case ByteTerm::TypePatternCharacterGreedy: {
       
   994             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
       
   995             unsigned matchAmount = 0;
       
   996             while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {
       
   997                 if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition - 1)) {
       
   998                     input.uncheckInput(1);
       
   999                     break;
       
  1000                 }
       
  1001                 ++matchAmount;
       
  1002             }
       
  1003             backTrack->matchAmount = matchAmount;
       
  1004 
       
  1005             MATCH_NEXT();
       
  1006         }
       
  1007         case ByteTerm::TypePatternCharacterNonGreedy: {
       
  1008             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
       
  1009             backTrack->matchAmount = 0;
       
  1010             MATCH_NEXT();
       
  1011         }
       
  1012 
       
  1013         case ByteTerm::TypePatternCasedCharacterOnce:
       
  1014         case ByteTerm::TypePatternCasedCharacterFixed: {
       
  1015             for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
       
  1016                 if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition + matchAmount))
       
  1017                     BACKTRACK();
       
  1018             }
       
  1019             MATCH_NEXT();
       
  1020         }
       
  1021         case ByteTerm::TypePatternCasedCharacterGreedy: {
       
  1022             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
       
  1023             unsigned matchAmount = 0;
       
  1024             while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {
       
  1025                 if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - 1)) {
       
  1026                     input.uncheckInput(1);
       
  1027                     break;
       
  1028                 }
       
  1029                 ++matchAmount;
       
  1030             }
       
  1031             backTrack->matchAmount = matchAmount;
       
  1032 
       
  1033             MATCH_NEXT();
       
  1034         }
       
  1035         case ByteTerm::TypePatternCasedCharacterNonGreedy: {
       
  1036             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
       
  1037             backTrack->matchAmount = 0;
       
  1038             MATCH_NEXT();
       
  1039         }
       
  1040 
       
  1041         case ByteTerm::TypeCharacterClass:
       
  1042             if (matchCharacterClass(currentTerm(), context))
       
  1043                 MATCH_NEXT();
       
  1044             BACKTRACK();
       
  1045         case ByteTerm::TypeBackReference:
       
  1046             if (matchBackReference(currentTerm(), context))
       
  1047                 MATCH_NEXT();
       
  1048             BACKTRACK();
       
  1049         case ByteTerm::TypeParenthesesSubpattern:
       
  1050             if (matchParentheses(currentTerm(), context))
       
  1051                 MATCH_NEXT();
       
  1052             BACKTRACK();
       
  1053         case ByteTerm::TypeParenthesesSubpatternOnceBegin:
       
  1054             if (matchParenthesesOnceBegin(currentTerm(), context))
       
  1055                 MATCH_NEXT();
       
  1056             BACKTRACK();
       
  1057         case ByteTerm::TypeParenthesesSubpatternOnceEnd:
       
  1058             if (matchParenthesesOnceEnd(currentTerm(), context))
       
  1059                 MATCH_NEXT();
       
  1060             BACKTRACK();
       
  1061         case ByteTerm::TypeParentheticalAssertionBegin:
       
  1062             if (matchParentheticalAssertionBegin(currentTerm(), context))
       
  1063                 MATCH_NEXT();
       
  1064             BACKTRACK();
       
  1065         case ByteTerm::TypeParentheticalAssertionEnd:
       
  1066             if (matchParentheticalAssertionEnd(currentTerm(), context))
       
  1067                 MATCH_NEXT();
       
  1068             BACKTRACK();
       
  1069 
       
  1070         case ByteTerm::TypeCheckInput:
       
  1071             if (input.checkInput(currentTerm().checkInputCount))
       
  1072                 MATCH_NEXT();
       
  1073             BACKTRACK();
       
  1074         }
       
  1075 
       
  1076         // We should never fall-through to here.
       
  1077         ASSERT_NOT_REACHED();
       
  1078 
       
  1079     backtrack:
       
  1080         ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
       
  1081 
       
  1082         switch (currentTerm().type) {
       
  1083         case ByteTerm::TypeSubpatternBegin:
       
  1084             return false;
       
  1085         case ByteTerm::TypeSubpatternEnd:
       
  1086             ASSERT_NOT_REACHED();
       
  1087 
       
  1088         case ByteTerm::TypeBodyAlternativeBegin:
       
  1089         case ByteTerm::TypeBodyAlternativeDisjunction: {
       
  1090             int offset = currentTerm().alternative.next;
       
  1091             context->term += offset;
       
  1092             if (offset > 0)
       
  1093                 MATCH_NEXT();
       
  1094 
       
  1095             if (input.atEnd())
       
  1096                 return false;
       
  1097 
       
  1098             input.next();
       
  1099             context->matchBegin = input.getPos();
       
  1100             MATCH_NEXT();
       
  1101         }
       
  1102         case ByteTerm::TypeBodyAlternativeEnd:
       
  1103             ASSERT_NOT_REACHED();
       
  1104 
       
  1105             case ByteTerm::TypeAlternativeBegin:
       
  1106             case ByteTerm::TypeAlternativeDisjunction: {
       
  1107                 int offset = currentTerm().alternative.next;
       
  1108                 context->term += offset;
       
  1109                 if (offset > 0)
       
  1110                     MATCH_NEXT();
       
  1111                 BACKTRACK();
       
  1112             }
       
  1113             case ByteTerm::TypeAlternativeEnd: {
       
  1114                 // We should never backtrack back into an alternative of the main body of the regex.
       
  1115                 BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
       
  1116                 unsigned offset = backTrack->offset;
       
  1117                 context->term -= offset;
       
  1118                 BACKTRACK();
       
  1119             }
       
  1120 
       
  1121             case ByteTerm::TypeAssertionBOL:
       
  1122             case ByteTerm::TypeAssertionEOL:
       
  1123             case ByteTerm::TypeAssertionWordBoundary:
       
  1124                 BACKTRACK();
       
  1125 
       
  1126             case ByteTerm::TypePatternCharacterOnce:
       
  1127             case ByteTerm::TypePatternCharacterFixed:
       
  1128             case ByteTerm::TypePatternCharacterGreedy:
       
  1129             case ByteTerm::TypePatternCharacterNonGreedy:
       
  1130                 if (backtrackPatternCharacter(currentTerm(), context))
       
  1131                     MATCH_NEXT();
       
  1132                 BACKTRACK();
       
  1133             case ByteTerm::TypePatternCasedCharacterOnce:
       
  1134             case ByteTerm::TypePatternCasedCharacterFixed:
       
  1135             case ByteTerm::TypePatternCasedCharacterGreedy:
       
  1136             case ByteTerm::TypePatternCasedCharacterNonGreedy:
       
  1137                 if (backtrackPatternCasedCharacter(currentTerm(), context))
       
  1138                     MATCH_NEXT();
       
  1139                 BACKTRACK();
       
  1140             case ByteTerm::TypeCharacterClass:
       
  1141                 if (backtrackCharacterClass(currentTerm(), context))
       
  1142                     MATCH_NEXT();
       
  1143                 BACKTRACK();
       
  1144             case ByteTerm::TypeBackReference:
       
  1145                 if (backtrackBackReference(currentTerm(), context))
       
  1146                     MATCH_NEXT();
       
  1147                 BACKTRACK();
       
  1148             case ByteTerm::TypeParenthesesSubpattern:
       
  1149                 if (backtrackParentheses(currentTerm(), context))
       
  1150                     MATCH_NEXT();
       
  1151                 BACKTRACK();
       
  1152             case ByteTerm::TypeParenthesesSubpatternOnceBegin:
       
  1153                 if (backtrackParenthesesOnceBegin(currentTerm(), context))
       
  1154                     MATCH_NEXT();
       
  1155                 BACKTRACK();
       
  1156             case ByteTerm::TypeParenthesesSubpatternOnceEnd:
       
  1157                 if (backtrackParenthesesOnceEnd(currentTerm(), context))
       
  1158                     MATCH_NEXT();
       
  1159                 BACKTRACK();
       
  1160             case ByteTerm::TypeParentheticalAssertionBegin:
       
  1161                 if (backtrackParentheticalAssertionBegin(currentTerm(), context))
       
  1162                     MATCH_NEXT();
       
  1163                 BACKTRACK();
       
  1164             case ByteTerm::TypeParentheticalAssertionEnd:
       
  1165                 if (backtrackParentheticalAssertionEnd(currentTerm(), context))
       
  1166                     MATCH_NEXT();
       
  1167                 BACKTRACK();
       
  1168 
       
  1169             case ByteTerm::TypeCheckInput:
       
  1170                 input.uncheckInput(currentTerm().checkInputCount);
       
  1171                 BACKTRACK();
       
  1172         }
       
  1173 
       
  1174         ASSERT_NOT_REACHED();
       
  1175         return false;
       
  1176     }
       
  1177 
       
  1178     bool matchNonZeroDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
       
  1179     {
       
  1180         if (matchDisjunction(disjunction, context, btrack)) {
       
  1181             while (context->matchBegin == context->matchEnd) {
       
  1182                 if (!matchDisjunction(disjunction, context, true))
       
  1183                     return false;
       
  1184             }
       
  1185             return true;
       
  1186         }
       
  1187 
       
  1188         return false;
       
  1189     }
       
  1190 
       
  1191     int interpret()
       
  1192     {
       
  1193         for (unsigned i = 0; i < ((pattern->m_body->m_numSubpatterns + 1) << 1); ++i)
       
  1194             output[i] = -1;
       
  1195 
       
  1196         DisjunctionContext* context = allocDisjunctionContext(pattern->m_body.get());
       
  1197 
       
  1198         if (matchDisjunction(pattern->m_body.get(), context)) {
       
  1199             output[0] = context->matchBegin;
       
  1200             output[1] = context->matchEnd;
       
  1201         }
       
  1202 
       
  1203         freeDisjunctionContext(context);
       
  1204 
       
  1205         return output[0];
       
  1206     }
       
  1207 
       
  1208     Interpreter(BytecodePattern* pattern, int* output, const UChar* inputChar, unsigned start, unsigned length)
       
  1209         : pattern(pattern)
       
  1210         , output(output)
       
  1211         , input(inputChar, start, length)
       
  1212     {
       
  1213     }
       
  1214 
       
  1215 private:
       
  1216     BytecodePattern *pattern;
       
  1217     int* output;
       
  1218     InputStream input;
       
  1219 };
       
  1220 
       
  1221 
       
  1222 
       
  1223 class ByteCompiler {
       
  1224     struct ParenthesesStackEntry {
       
  1225         unsigned beginTerm;
       
  1226         unsigned savedAlternativeIndex;
       
  1227         ParenthesesStackEntry(unsigned beginTerm, unsigned savedAlternativeIndex/*, unsigned subpatternId, bool capture = false*/)
       
  1228             : beginTerm(beginTerm)
       
  1229             , savedAlternativeIndex(savedAlternativeIndex)
       
  1230         {
       
  1231         }
       
  1232     };
       
  1233 
       
  1234 public:
       
  1235     ByteCompiler(RegexPattern& pattern)
       
  1236         : m_pattern(pattern)
       
  1237     {
       
  1238         m_currentAlternativeIndex = 0;
       
  1239     }
       
  1240 
       
  1241     PassOwnPtr<BytecodePattern> compile()
       
  1242     {
       
  1243         regexBegin(m_pattern.m_numSubpatterns, m_pattern.m_body->m_callFrameSize);
       
  1244         emitDisjunction(m_pattern.m_body);
       
  1245         regexEnd();
       
  1246 
       
  1247         return adoptPtr(new BytecodePattern(m_bodyDisjunction.release(), m_allParenthesesInfo, m_pattern));
       
  1248     }
       
  1249 
       
  1250     void checkInput(unsigned count)
       
  1251     {
       
  1252         m_bodyDisjunction->terms.append(ByteTerm::CheckInput(count));
       
  1253     }
       
  1254 
       
  1255     void assertionBOL(int inputPosition)
       
  1256     {
       
  1257         m_bodyDisjunction->terms.append(ByteTerm::BOL(inputPosition));
       
  1258     }
       
  1259 
       
  1260     void assertionEOL(int inputPosition)
       
  1261     {
       
  1262         m_bodyDisjunction->terms.append(ByteTerm::EOL(inputPosition));
       
  1263     }
       
  1264 
       
  1265     void assertionWordBoundary(bool invert, int inputPosition)
       
  1266     {
       
  1267         m_bodyDisjunction->terms.append(ByteTerm::WordBoundary(invert, inputPosition));
       
  1268     }
       
  1269 
       
  1270     void atomPatternCharacter(UChar ch, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
       
  1271     {
       
  1272         if (m_pattern.m_ignoreCase) {
       
  1273             UChar lo = Unicode::toLower(ch);
       
  1274             UChar hi = Unicode::toUpper(ch);
       
  1275 
       
  1276             if (lo != hi) {
       
  1277                 m_bodyDisjunction->terms.append(ByteTerm(lo, hi, inputPosition, frameLocation, quantityCount, quantityType));
       
  1278                 return;
       
  1279             }
       
  1280         }
       
  1281 
       
  1282         m_bodyDisjunction->terms.append(ByteTerm(ch, inputPosition, frameLocation, quantityCount, quantityType));
       
  1283     }
       
  1284 
       
  1285     void atomCharacterClass(CharacterClass* characterClass, bool invert, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
       
  1286     {
       
  1287         m_bodyDisjunction->terms.append(ByteTerm(characterClass, invert, inputPosition));
       
  1288 
       
  1289         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount;
       
  1290         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
       
  1291         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
       
  1292     }
       
  1293 
       
  1294     void atomBackReference(unsigned subpatternId, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
       
  1295     {
       
  1296         ASSERT(subpatternId);
       
  1297 
       
  1298         m_bodyDisjunction->terms.append(ByteTerm::BackReference(subpatternId, inputPosition));
       
  1299 
       
  1300         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount;
       
  1301         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
       
  1302         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
       
  1303     }
       
  1304 
       
  1305     void atomParenthesesSubpatternBegin(unsigned subpatternId, bool capture, int inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
       
  1306     {
       
  1307         int beginTerm = m_bodyDisjunction->terms.size();
       
  1308 
       
  1309         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, inputPosition));
       
  1310         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
       
  1311         m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
       
  1312         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
       
  1313 
       
  1314         m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
       
  1315         m_currentAlternativeIndex = beginTerm + 1;
       
  1316     }
       
  1317 
       
  1318     void atomParentheticalAssertionBegin(unsigned subpatternId, bool invert, unsigned frameLocation, unsigned alternativeFrameLocation)
       
  1319     {
       
  1320         int beginTerm = m_bodyDisjunction->terms.size();
       
  1321 
       
  1322         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionBegin, subpatternId, invert, 0));
       
  1323         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
       
  1324         m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
       
  1325         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
       
  1326 
       
  1327         m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
       
  1328         m_currentAlternativeIndex = beginTerm + 1;
       
  1329     }
       
  1330 
       
  1331     unsigned popParenthesesStack()
       
  1332     {
       
  1333         ASSERT(m_parenthesesStack.size());
       
  1334         int stackEnd = m_parenthesesStack.size() - 1;
       
  1335         unsigned beginTerm = m_parenthesesStack[stackEnd].beginTerm;
       
  1336         m_currentAlternativeIndex = m_parenthesesStack[stackEnd].savedAlternativeIndex;
       
  1337         m_parenthesesStack.shrink(stackEnd);
       
  1338 
       
  1339         ASSERT(beginTerm < m_bodyDisjunction->terms.size());
       
  1340         ASSERT(m_currentAlternativeIndex < m_bodyDisjunction->terms.size());
       
  1341 
       
  1342         return beginTerm;
       
  1343     }
       
  1344 
       
  1345 #ifndef NDEBUG
       
  1346     void dumpDisjunction(ByteDisjunction* disjunction)
       
  1347     {
       
  1348         printf("ByteDisjunction(%p):\n\t", disjunction);
       
  1349         for (unsigned i = 0; i < disjunction->terms.size(); ++i)
       
  1350             printf("{ %d } ", disjunction->terms[i].type);
       
  1351         printf("\n");
       
  1352     }
       
  1353 #endif
       
  1354 
       
  1355     void closeAlternative(int beginTerm)
       
  1356     {
       
  1357         int origBeginTerm = beginTerm;
       
  1358         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeBegin);
       
  1359         int endIndex = m_bodyDisjunction->terms.size();
       
  1360 
       
  1361         unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation;
       
  1362 
       
  1363         if (!m_bodyDisjunction->terms[beginTerm].alternative.next)
       
  1364             m_bodyDisjunction->terms.remove(beginTerm);
       
  1365         else {
       
  1366             while (m_bodyDisjunction->terms[beginTerm].alternative.next) {
       
  1367                 beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next;
       
  1368                 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeDisjunction);
       
  1369                 m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;
       
  1370                 m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
       
  1371             }
       
  1372 
       
  1373             m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;
       
  1374 
       
  1375             m_bodyDisjunction->terms.append(ByteTerm::AlternativeEnd());
       
  1376             m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
       
  1377         }
       
  1378     }
       
  1379 
       
  1380     void closeBodyAlternative()
       
  1381     {
       
  1382         int beginTerm = 0;
       
  1383         int origBeginTerm = 0;
       
  1384         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeBegin);
       
  1385         int endIndex = m_bodyDisjunction->terms.size();
       
  1386 
       
  1387         unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation;
       
  1388 
       
  1389         while (m_bodyDisjunction->terms[beginTerm].alternative.next) {
       
  1390             beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next;
       
  1391             ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeDisjunction);
       
  1392             m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;
       
  1393             m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
       
  1394         }
       
  1395 
       
  1396         m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;
       
  1397 
       
  1398         m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeEnd());
       
  1399         m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
       
  1400     }
       
  1401 
       
  1402     void atomParenthesesEnd(bool doInline, unsigned lastSubpatternId, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType, unsigned callFrameSize = 0)
       
  1403     {
       
  1404         unsigned beginTerm = popParenthesesStack();
       
  1405         closeAlternative(beginTerm + 1);
       
  1406         unsigned endTerm = m_bodyDisjunction->terms.size();
       
  1407 
       
  1408         bool isAssertion = m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParentheticalAssertionBegin;
       
  1409         bool invertOrCapture = m_bodyDisjunction->terms[beginTerm].invertOrCapture;
       
  1410         unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
       
  1411 
       
  1412         m_bodyDisjunction->terms.append(ByteTerm(isAssertion ? ByteTerm::TypeParentheticalAssertionEnd : ByteTerm::TypeParenthesesSubpatternOnceEnd, subpatternId, invertOrCapture, inputPosition));
       
  1413         m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
       
  1414         m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
       
  1415         m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
       
  1416 
       
  1417         if (doInline) {
       
  1418             m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount;
       
  1419             m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
       
  1420             m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount;
       
  1421             m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
       
  1422         } else {
       
  1423             ByteTerm& parenthesesBegin = m_bodyDisjunction->terms[beginTerm];
       
  1424             ASSERT(parenthesesBegin.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
       
  1425 
       
  1426             bool invertOrCapture = parenthesesBegin.invertOrCapture;
       
  1427             unsigned subpatternId = parenthesesBegin.atom.subpatternId;
       
  1428 
       
  1429             unsigned numSubpatterns = lastSubpatternId - subpatternId + 1;
       
  1430             ByteDisjunction* parenthesesDisjunction = new ByteDisjunction(numSubpatterns, callFrameSize);
       
  1431 
       
  1432             parenthesesDisjunction->terms.append(ByteTerm::SubpatternBegin());
       
  1433             for (unsigned termInParentheses = beginTerm + 1; termInParentheses < endTerm; ++termInParentheses)
       
  1434                 parenthesesDisjunction->terms.append(m_bodyDisjunction->terms[termInParentheses]);
       
  1435             parenthesesDisjunction->terms.append(ByteTerm::SubpatternEnd());
       
  1436 
       
  1437             m_bodyDisjunction->terms.shrink(beginTerm);
       
  1438 
       
  1439             m_allParenthesesInfo.append(parenthesesDisjunction);
       
  1440             m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction, invertOrCapture, inputPosition));
       
  1441 
       
  1442             m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount;
       
  1443             m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
       
  1444             m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
       
  1445         }
       
  1446     }
       
  1447 
       
  1448     void regexBegin(unsigned numSubpatterns, unsigned callFrameSize)
       
  1449     {
       
  1450         m_bodyDisjunction = adoptPtr(new ByteDisjunction(numSubpatterns, callFrameSize));
       
  1451         m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeBegin());
       
  1452         m_bodyDisjunction->terms[0].frameLocation = 0;
       
  1453         m_currentAlternativeIndex = 0;
       
  1454     }
       
  1455 
       
  1456     void regexEnd()
       
  1457     {
       
  1458         closeBodyAlternative();
       
  1459     }
       
  1460 
       
  1461     void alternativeBodyDisjunction()
       
  1462     {
       
  1463         int newAlternativeIndex = m_bodyDisjunction->terms.size();
       
  1464         m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
       
  1465         m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeDisjunction());
       
  1466 
       
  1467         m_currentAlternativeIndex = newAlternativeIndex;
       
  1468     }
       
  1469 
       
  1470     void alternativeDisjunction()
       
  1471     {
       
  1472         int newAlternativeIndex = m_bodyDisjunction->terms.size();
       
  1473         m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
       
  1474         m_bodyDisjunction->terms.append(ByteTerm::AlternativeDisjunction());
       
  1475 
       
  1476         m_currentAlternativeIndex = newAlternativeIndex;
       
  1477     }
       
  1478 
       
  1479     void emitDisjunction(PatternDisjunction* disjunction, unsigned inputCountAlreadyChecked = 0, unsigned parenthesesInputCountAlreadyChecked = 0)
       
  1480     {
       
  1481         for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
       
  1482             unsigned currentCountAlreadyChecked = inputCountAlreadyChecked;
       
  1483 
       
  1484             if (alt) {
       
  1485                 if (disjunction == m_pattern.m_body)
       
  1486                     alternativeBodyDisjunction();
       
  1487                 else
       
  1488                     alternativeDisjunction();
       
  1489             }
       
  1490 
       
  1491             PatternAlternative* alternative = disjunction->m_alternatives[alt];
       
  1492             unsigned minimumSize = alternative->m_minimumSize;
       
  1493 
       
  1494             ASSERT(minimumSize >= parenthesesInputCountAlreadyChecked);
       
  1495             unsigned countToCheck = minimumSize - parenthesesInputCountAlreadyChecked;
       
  1496             if (countToCheck)
       
  1497                 checkInput(countToCheck);
       
  1498             currentCountAlreadyChecked += countToCheck;
       
  1499 
       
  1500             for (unsigned i = 0; i < alternative->m_terms.size(); ++i) {
       
  1501                 PatternTerm& term = alternative->m_terms[i];
       
  1502 
       
  1503                 switch (term.type) {
       
  1504                 case PatternTerm::TypeAssertionBOL:
       
  1505                     assertionBOL(term.inputPosition - currentCountAlreadyChecked);
       
  1506                     break;
       
  1507 
       
  1508                 case PatternTerm::TypeAssertionEOL:
       
  1509                     assertionEOL(term.inputPosition - currentCountAlreadyChecked);
       
  1510                     break;
       
  1511 
       
  1512                 case PatternTerm::TypeAssertionWordBoundary:
       
  1513                     assertionWordBoundary(term.invertOrCapture, term.inputPosition - currentCountAlreadyChecked);
       
  1514                     break;
       
  1515 
       
  1516                 case PatternTerm::TypePatternCharacter:
       
  1517                     atomPatternCharacter(term.patternCharacter, term.inputPosition - currentCountAlreadyChecked, term.frameLocation, term.quantityCount, term.quantityType);
       
  1518                     break;
       
  1519 
       
  1520                 case PatternTerm::TypeCharacterClass:
       
  1521                     atomCharacterClass(term.characterClass, term.invertOrCapture, term.inputPosition - currentCountAlreadyChecked, term.frameLocation, term.quantityCount, term.quantityType);
       
  1522                     break;
       
  1523 
       
  1524                 case PatternTerm::TypeBackReference:
       
  1525                     atomBackReference(term.subpatternId, term.inputPosition - currentCountAlreadyChecked, term.frameLocation, term.quantityCount, term.quantityType);
       
  1526                         break;
       
  1527 
       
  1528                 case PatternTerm::TypeForwardReference:
       
  1529                     break;
       
  1530 
       
  1531                 case PatternTerm::TypeParenthesesSubpattern: {
       
  1532                     unsigned disjunctionAlreadyCheckedCount = 0;
       
  1533                     if ((term.quantityCount == 1) && !term.parentheses.isCopy) {
       
  1534                         if (term.quantityType == QuantifierFixedCount) {
       
  1535                             disjunctionAlreadyCheckedCount = term.parentheses.disjunction->m_minimumSize;
       
  1536                             unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
       
  1537                             atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.invertOrCapture, delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, term.frameLocation);
       
  1538                             emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, term.parentheses.disjunction->m_minimumSize);
       
  1539                             atomParenthesesEnd(true, term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
       
  1540                         } else {
       
  1541                             unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
       
  1542                             atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.invertOrCapture, delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, term.frameLocation + RegexStackSpaceForBackTrackInfoParenthesesOnce);
       
  1543                             emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0);
       
  1544                             atomParenthesesEnd(true, term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
       
  1545                         }
       
  1546                     } else {
       
  1547                         unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
       
  1548                         atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.invertOrCapture, delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, 0);
       
  1549                         emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0);
       
  1550                         atomParenthesesEnd(false, term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
       
  1551                     }
       
  1552                     break;
       
  1553                 }
       
  1554 
       
  1555                 case PatternTerm::TypeParentheticalAssertion: {
       
  1556                     unsigned alternativeFrameLocation = term.frameLocation + RegexStackSpaceForBackTrackInfoParentheticalAssertion;
       
  1557 
       
  1558                     atomParentheticalAssertionBegin(term.parentheses.subpatternId, term.invertOrCapture, term.frameLocation, alternativeFrameLocation);
       
  1559                     emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0);
       
  1560                     atomParenthesesEnd(true, term.parentheses.lastSubpatternId, 0, term.frameLocation, term.quantityCount, term.quantityType);
       
  1561                     break;
       
  1562                 }
       
  1563                 }
       
  1564             }
       
  1565         }
       
  1566     }
       
  1567 
       
  1568 private:
       
  1569     RegexPattern& m_pattern;
       
  1570     OwnPtr<ByteDisjunction> m_bodyDisjunction;
       
  1571     unsigned m_currentAlternativeIndex;
       
  1572     Vector<ParenthesesStackEntry> m_parenthesesStack;
       
  1573     Vector<ByteDisjunction*> m_allParenthesesInfo;
       
  1574 };
       
  1575 
       
  1576 
       
  1577 PassOwnPtr<BytecodePattern> byteCompileRegex(const UString& patternString, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline)
       
  1578 {
       
  1579     RegexPattern pattern(ignoreCase, multiline);
       
  1580 
       
  1581     if ((error = compileRegex(patternString, pattern)))
       
  1582         return PassOwnPtr<BytecodePattern>();
       
  1583 
       
  1584     numSubpatterns = pattern.m_numSubpatterns;
       
  1585 
       
  1586     return ByteCompiler(pattern).compile();
       
  1587 }
       
  1588 
       
  1589 int interpretRegex(BytecodePattern* regex, const UChar* input, unsigned start, unsigned length, int* output)
       
  1590 {
       
  1591     return Interpreter(regex, output, input, start, length).interpret();
       
  1592 }
       
  1593 
       
  1594 
       
  1595 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoPatternCharacter) == (RegexStackSpaceForBackTrackInfoPatternCharacter * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoPatternCharacter);
       
  1596 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoCharacterClass) == (RegexStackSpaceForBackTrackInfoCharacterClass * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoCharacterClass);
       
  1597 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoBackReference) == (RegexStackSpaceForBackTrackInfoBackReference * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoBackReference);
       
  1598 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoAlternative) == (RegexStackSpaceForBackTrackInfoAlternative * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoAlternative);
       
  1599 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParentheticalAssertion) == (RegexStackSpaceForBackTrackInfoParentheticalAssertion * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoParentheticalAssertion);
       
  1600 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParenthesesOnce) == (RegexStackSpaceForBackTrackInfoParenthesesOnce * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoParenthesesOnce);
       
  1601 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParentheses) == (RegexStackSpaceForBackTrackInfoParentheses * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoParentheses);
       
  1602 
       
  1603 
       
  1604 } }
       
  1605 
       
  1606 #endif