JavaScriptCore/pcre/pcre_exec.cpp
changeset 0 4f2f89ce4247
equal deleted inserted replaced
-1:000000000000 0:4f2f89ce4247
       
     1 /* This is JavaScriptCore's variant of the PCRE library. While this library
       
     2 started out as a copy of PCRE, many of the features of PCRE have been
       
     3 removed. This library now supports only the regular expression features
       
     4 required by the JavaScript language specification, and has only the functions
       
     5 needed by JavaScriptCore and the rest of WebKit.
       
     6 
       
     7                  Originally written by Philip Hazel
       
     8            Copyright (c) 1997-2006 University of Cambridge
       
     9     Copyright (C) 2002, 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
       
    10     Copyright (C) 2007 Eric Seidel <eric@webkit.org>
       
    11 
       
    12 -----------------------------------------------------------------------------
       
    13 Redistribution and use in source and binary forms, with or without
       
    14 modification, are permitted provided that the following conditions are met:
       
    15 
       
    16     * Redistributions of source code must retain the above copyright notice,
       
    17       this list of conditions and the following disclaimer.
       
    18 
       
    19     * Redistributions in binary form must reproduce the above copyright
       
    20       notice, this list of conditions and the following disclaimer in the
       
    21       documentation and/or other materials provided with the distribution.
       
    22 
       
    23     * Neither the name of the University of Cambridge nor the names of its
       
    24       contributors may be used to endorse or promote products derived from
       
    25       this software without specific prior written permission.
       
    26 
       
    27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
       
    28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
       
    29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
       
    30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
       
    31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
       
    32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
       
    33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
       
    34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
       
    35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
       
    36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
       
    37 POSSIBILITY OF SUCH DAMAGE.
       
    38 -----------------------------------------------------------------------------
       
    39 */
       
    40 
       
    41 /* This module contains jsRegExpExecute(), the externally visible function
       
    42 that does pattern matching using an NFA algorithm, following the rules from
       
    43 the JavaScript specification. There are also some supporting functions. */
       
    44 
       
    45 #include "config.h"
       
    46 #include "pcre_internal.h"
       
    47 
       
    48 #include <limits.h>
       
    49 #include <wtf/ASCIICType.h>
       
    50 #include <wtf/Vector.h>
       
    51 
       
    52 #if REGEXP_HISTOGRAM
       
    53 #include <wtf/DateMath.h>
       
    54 #include <runtime/UString.h>
       
    55 #endif
       
    56 
       
    57 using namespace WTF;
       
    58 
       
    59 #if COMPILER(GCC)
       
    60 #define USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
       
    61 //#define USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
       
    62 #endif
       
    63 
       
    64 /* Avoid warnings on Windows. */
       
    65 #undef min
       
    66 #undef max
       
    67 
       
    68 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
       
    69 typedef int ReturnLocation;
       
    70 #else
       
    71 typedef void* ReturnLocation;
       
    72 #endif
       
    73 
       
    74 #if !REGEXP_HISTOGRAM
       
    75 
       
    76 class HistogramTimeLogger {
       
    77 public:
       
    78     HistogramTimeLogger(const JSRegExp*) { }
       
    79 };
       
    80 
       
    81 #else
       
    82 
       
    83 using namespace JSC;
       
    84 
       
    85 class Histogram {
       
    86 public:
       
    87     ~Histogram();
       
    88     void add(const JSRegExp*, double);
       
    89 
       
    90 private:
       
    91     typedef HashMap<RefPtr<UString::Rep>, double> Map;
       
    92     Map times;
       
    93 };
       
    94 
       
    95 class HistogramTimeLogger {
       
    96 public:
       
    97     HistogramTimeLogger(const JSRegExp*);
       
    98     ~HistogramTimeLogger();
       
    99 
       
   100 private:
       
   101     const JSRegExp* m_re;
       
   102     double m_startTime;
       
   103 };
       
   104 
       
   105 #endif
       
   106 
       
   107 /* Structure for building a chain of data for holding the values of
       
   108 the subject pointer at the start of each bracket, used to detect when
       
   109 an empty string has been matched by a bracket to break infinite loops. */ 
       
   110 struct BracketChainNode {
       
   111     BracketChainNode* previousBracket;
       
   112     const UChar* bracketStart;
       
   113 };
       
   114 
       
   115 struct MatchFrame : FastAllocBase {
       
   116     ReturnLocation returnLocation;
       
   117     struct MatchFrame* previousFrame;
       
   118     
       
   119     /* Function arguments that may change */
       
   120     struct {
       
   121         const UChar* subjectPtr;
       
   122         const unsigned char* instructionPtr;
       
   123         int offsetTop;
       
   124         BracketChainNode* bracketChain;
       
   125     } args;
       
   126     
       
   127     
       
   128     /* PCRE uses "fake" recursion built off of gotos, thus
       
   129      stack-based local variables are not safe to use.  Instead we have to
       
   130      store local variables on the current MatchFrame. */
       
   131     struct {
       
   132         const unsigned char* data;
       
   133         const unsigned char* startOfRepeatingBracket;
       
   134         const UChar* subjectPtrAtStartOfInstruction; // Several instrutions stash away a subjectPtr here for later compare
       
   135         const unsigned char* instructionPtrAtStartOfOnce;
       
   136         
       
   137         int repeatOthercase;
       
   138         
       
   139         int ctype;
       
   140         int fc;
       
   141         int fi;
       
   142         int length;
       
   143         int max;
       
   144         int number;
       
   145         int offset;
       
   146         int saveOffset1;
       
   147         int saveOffset2;
       
   148         int saveOffset3;
       
   149         
       
   150         BracketChainNode bracketChainNode;
       
   151     } locals;
       
   152 };
       
   153 
       
   154 /* Structure for passing "static" information around between the functions
       
   155 doing traditional NFA matching, so that they are thread-safe. */
       
   156 
       
   157 struct MatchData {
       
   158   int*   offsetVector;         /* Offset vector */
       
   159   int    offsetEnd;            /* One past the end */
       
   160   int    offsetMax;            /* The maximum usable for return data */
       
   161   bool   offsetOverflow;       /* Set if too many extractions */
       
   162   const UChar*  startSubject;         /* Start of the subject string */
       
   163   const UChar*  endSubject;           /* End of the subject string */
       
   164   const UChar*  endMatchPtr;         /* Subject position at end match */
       
   165   int    endOffsetTop;        /* Highwater mark at end of match */
       
   166   bool   multiline;
       
   167   bool   ignoreCase;
       
   168 };
       
   169 
       
   170 /* The maximum remaining length of subject we are prepared to search for a
       
   171 reqByte match. */
       
   172 
       
   173 #define REQ_BYTE_MAX 1000
       
   174 
       
   175 /* The below limit restricts the number of "recursive" match calls in order to
       
   176 avoid spending exponential time on complex regular expressions. */
       
   177 
       
   178 static const unsigned matchLimit = 1000000;
       
   179 
       
   180 #ifdef DEBUG
       
   181 /*************************************************
       
   182 *        Debugging function to print chars       *
       
   183 *************************************************/
       
   184 
       
   185 /* Print a sequence of chars in printable format, stopping at the end of the
       
   186 subject if the requested.
       
   187 
       
   188 Arguments:
       
   189   p           points to characters
       
   190   length      number to print
       
   191   isSubject  true if printing from within md.startSubject
       
   192   md          pointer to matching data block, if isSubject is true
       
   193 */
       
   194 
       
   195 static void pchars(const UChar* p, int length, bool isSubject, const MatchData& md)
       
   196 {
       
   197     if (isSubject && length > md.endSubject - p)
       
   198         length = md.endSubject - p;
       
   199     while (length-- > 0) {
       
   200         int c;
       
   201         if (isASCIIPrintable(c = *(p++)))
       
   202             printf("%c", c);
       
   203         else if (c < 256)
       
   204             printf("\\x%02x", c);
       
   205         else
       
   206             printf("\\x{%x}", c);
       
   207     }
       
   208 }
       
   209 #endif
       
   210 
       
   211 /*************************************************
       
   212 *          Match a back-reference                *
       
   213 *************************************************/
       
   214 
       
   215 /* If a back reference hasn't been set, the length that is passed is greater
       
   216 than the number of characters left in the string, so the match fails.
       
   217 
       
   218 Arguments:
       
   219   offset      index into the offset vector
       
   220   subjectPtr        points into the subject
       
   221   length      length to be matched
       
   222   md          points to match data block
       
   223 
       
   224 Returns:      true if matched
       
   225 */
       
   226 
       
   227 static bool matchRef(int offset, const UChar* subjectPtr, int length, const MatchData& md)
       
   228 {
       
   229     const UChar* p = md.startSubject + md.offsetVector[offset];
       
   230     
       
   231 #ifdef DEBUG
       
   232     if (subjectPtr >= md.endSubject)
       
   233         printf("matching subject <null>");
       
   234     else {
       
   235         printf("matching subject ");
       
   236         pchars(subjectPtr, length, true, md);
       
   237     }
       
   238     printf(" against backref ");
       
   239     pchars(p, length, false, md);
       
   240     printf("\n");
       
   241 #endif
       
   242     
       
   243     /* Always fail if not enough characters left */
       
   244     
       
   245     if (length > md.endSubject - subjectPtr)
       
   246         return false;
       
   247     
       
   248     /* Separate the caselesss case for speed */
       
   249     
       
   250     if (md.ignoreCase) {
       
   251         while (length-- > 0) {
       
   252             UChar c = *p++;
       
   253             int othercase = jsc_pcre_ucp_othercase(c);
       
   254             UChar d = *subjectPtr++;
       
   255             if (c != d && othercase != d)
       
   256                 return false;
       
   257         }
       
   258     }
       
   259     else {
       
   260         while (length-- > 0)
       
   261             if (*p++ != *subjectPtr++)
       
   262                 return false;
       
   263     }
       
   264     
       
   265     return true;
       
   266 }
       
   267 
       
   268 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
       
   269 
       
   270 /* Use numbered labels and switch statement at the bottom of the match function. */
       
   271 
       
   272 #define RMATCH_WHERE(num) num
       
   273 #define RRETURN_LABEL RRETURN_SWITCH
       
   274 
       
   275 #else
       
   276 
       
   277 /* Use GCC's computed goto extension. */
       
   278 
       
   279 /* For one test case this is more than 40% faster than the switch statement.
       
   280 We could avoid the use of the num argument entirely by using local labels,
       
   281 but using it for the GCC case as well as the non-GCC case allows us to share
       
   282 a bit more code and notice if we use conflicting numbers.*/
       
   283 
       
   284 #define RMATCH_WHERE(num) &&RRETURN_##num
       
   285 #define RRETURN_LABEL *stack.currentFrame->returnLocation
       
   286 
       
   287 #endif
       
   288 
       
   289 #define RECURSIVE_MATCH_COMMON(num) \
       
   290     goto RECURSE;\
       
   291     RRETURN_##num: \
       
   292     stack.popCurrentFrame();
       
   293 
       
   294 #define RECURSIVE_MATCH(num, ra, rb) \
       
   295     do { \
       
   296         stack.pushNewFrame((ra), (rb), RMATCH_WHERE(num)); \
       
   297         RECURSIVE_MATCH_COMMON(num) \
       
   298     } while (0)
       
   299 
       
   300 #define RECURSIVE_MATCH_NEW_GROUP(num, ra, rb) \
       
   301     do { \
       
   302         stack.pushNewFrame((ra), (rb), RMATCH_WHERE(num)); \
       
   303         startNewGroup(stack.currentFrame); \
       
   304         RECURSIVE_MATCH_COMMON(num) \
       
   305     } while (0)
       
   306 
       
   307 #define RRETURN goto RRETURN_LABEL
       
   308 
       
   309 #define RRETURN_NO_MATCH do { isMatch = false; RRETURN; } while (0)
       
   310 
       
   311 /*************************************************
       
   312 *         Match from current position            *
       
   313 *************************************************/
       
   314 
       
   315 /* On entry instructionPtr points to the first opcode, and subjectPtr to the first character
       
   316 in the subject string, while substringStart holds the value of subjectPtr at the start of the
       
   317 last bracketed group - used for breaking infinite loops matching zero-length
       
   318 strings. This function is called recursively in many circumstances. Whenever it
       
   319 returns a negative (error) response, the outer match() call must also return the
       
   320 same response.
       
   321 
       
   322 Arguments:
       
   323    subjectPtr        pointer in subject
       
   324    instructionPtr       position in code
       
   325    offsetTop  current top pointer
       
   326    md          pointer to "static" info for the match
       
   327 
       
   328 Returns:       1 if matched          )  these values are >= 0
       
   329                0 if failed to match  )
       
   330                a negative error value if aborted by an error condition
       
   331                  (e.g. stopped by repeated call or recursion limit)
       
   332 */
       
   333 
       
   334 static const unsigned numFramesOnStack = 16;
       
   335 
       
   336 struct MatchStack {
       
   337     MatchStack()
       
   338         : framesEnd(frames + numFramesOnStack)
       
   339         , currentFrame(frames)
       
   340         , size(1) // match() creates accesses the first frame w/o calling pushNewFrame
       
   341     {
       
   342         ASSERT((sizeof(frames) / sizeof(frames[0])) == numFramesOnStack);
       
   343     }
       
   344     
       
   345     MatchFrame frames[numFramesOnStack];
       
   346     MatchFrame* framesEnd;
       
   347     MatchFrame* currentFrame;
       
   348     unsigned size;
       
   349     
       
   350     inline bool canUseStackBufferForNextFrame()
       
   351     {
       
   352         return size < numFramesOnStack;
       
   353     }
       
   354     
       
   355     inline MatchFrame* allocateNextFrame()
       
   356     {
       
   357         if (canUseStackBufferForNextFrame())
       
   358             return currentFrame + 1;
       
   359         return new MatchFrame;
       
   360     }
       
   361     
       
   362     inline void pushNewFrame(const unsigned char* instructionPtr, BracketChainNode* bracketChain, ReturnLocation returnLocation)
       
   363     {
       
   364         MatchFrame* newframe = allocateNextFrame();
       
   365         newframe->previousFrame = currentFrame;
       
   366 
       
   367         newframe->args.subjectPtr = currentFrame->args.subjectPtr;
       
   368         newframe->args.offsetTop = currentFrame->args.offsetTop;
       
   369         newframe->args.instructionPtr = instructionPtr;
       
   370         newframe->args.bracketChain = bracketChain;
       
   371         newframe->returnLocation = returnLocation;
       
   372         size++;
       
   373 
       
   374         currentFrame = newframe;
       
   375     }
       
   376     
       
   377     inline void popCurrentFrame()
       
   378     {
       
   379         MatchFrame* oldFrame = currentFrame;
       
   380         currentFrame = currentFrame->previousFrame;
       
   381         if (size > numFramesOnStack)
       
   382             delete oldFrame;
       
   383         size--;
       
   384     }
       
   385 
       
   386     void popAllFrames()
       
   387     {
       
   388         while (size)
       
   389             popCurrentFrame();
       
   390     }
       
   391 };
       
   392 
       
   393 static int matchError(int errorCode, MatchStack& stack)
       
   394 {
       
   395     stack.popAllFrames();
       
   396     return errorCode;
       
   397 }
       
   398 
       
   399 /* Get the next UTF-8 character, not advancing the pointer, incrementing length
       
   400  if there are extra bytes. This is called when we know we are in UTF-8 mode. */
       
   401 
       
   402 static inline void getUTF8CharAndIncrementLength(int& c, const unsigned char* subjectPtr, int& len)
       
   403 {
       
   404     c = *subjectPtr;
       
   405     if ((c & 0xc0) == 0xc0) {
       
   406         int gcaa = jsc_pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */
       
   407         int gcss = 6 * gcaa;
       
   408         c = (c & jsc_pcre_utf8_table3[gcaa]) << gcss;
       
   409         for (int gcii = 1; gcii <= gcaa; gcii++) {
       
   410             gcss -= 6;
       
   411             c |= (subjectPtr[gcii] & 0x3f) << gcss;
       
   412         }
       
   413         len += gcaa;
       
   414     }
       
   415 }
       
   416 
       
   417 static inline void startNewGroup(MatchFrame* currentFrame)
       
   418 {
       
   419     /* At the start of a bracketed group, add the current subject pointer to the
       
   420      stack of such pointers, to be re-instated at the end of the group when we hit
       
   421      the closing ket. When match() is called in other circumstances, we don't add to
       
   422      this stack. */
       
   423     
       
   424     currentFrame->locals.bracketChainNode.previousBracket = currentFrame->args.bracketChain;
       
   425     currentFrame->locals.bracketChainNode.bracketStart = currentFrame->args.subjectPtr;
       
   426     currentFrame->args.bracketChain = &currentFrame->locals.bracketChainNode;
       
   427 }
       
   428 
       
   429 // FIXME: "minimize" means "not greedy", we should invert the callers to ask for "greedy" to be less confusing
       
   430 static inline void repeatInformationFromInstructionOffset(int instructionOffset, bool& minimize, int& minimumRepeats, int& maximumRepeats)
       
   431 {
       
   432     // Instruction offsets are based off of OP_CRSTAR, OP_STAR, OP_TYPESTAR, OP_NOTSTAR
       
   433     static const char minimumRepeatsFromInstructionOffset[] = { 0, 0, 1, 1, 0, 0 };
       
   434     static const int maximumRepeatsFromInstructionOffset[] = { INT_MAX, INT_MAX, INT_MAX, INT_MAX, 1, 1 };
       
   435 
       
   436     ASSERT(instructionOffset >= 0);
       
   437     ASSERT(instructionOffset <= (OP_CRMINQUERY - OP_CRSTAR));
       
   438 
       
   439     minimize = (instructionOffset & 1); // this assumes ordering: Instruction, MinimizeInstruction, Instruction2, MinimizeInstruction2
       
   440     minimumRepeats = minimumRepeatsFromInstructionOffset[instructionOffset];
       
   441     maximumRepeats = maximumRepeatsFromInstructionOffset[instructionOffset];
       
   442 }
       
   443 
       
   444 static int match(const UChar* subjectPtr, const unsigned char* instructionPtr, int offsetTop, MatchData& md)
       
   445 {
       
   446     bool isMatch = false;
       
   447     int min;
       
   448     bool minimize = false; /* Initialization not really needed, but some compilers think so. */
       
   449     unsigned remainingMatchCount = matchLimit;
       
   450     int othercase; /* Declare here to avoid errors during jumps */
       
   451     
       
   452     MatchStack stack;
       
   453 
       
   454     /* The opcode jump table. */
       
   455 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
       
   456 #define EMIT_JUMP_TABLE_ENTRY(opcode) &&LABEL_OP_##opcode,
       
   457     static void* opcodeJumpTable[256] = { FOR_EACH_OPCODE(EMIT_JUMP_TABLE_ENTRY) };
       
   458 #undef EMIT_JUMP_TABLE_ENTRY
       
   459 #endif
       
   460     
       
   461     /* One-time setup of the opcode jump table. */
       
   462 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
       
   463     for (int i = 255; !opcodeJumpTable[i]; i--)
       
   464         opcodeJumpTable[i] = &&CAPTURING_BRACKET;
       
   465 #endif
       
   466     
       
   467 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
       
   468     // Shark shows this as a hot line
       
   469     // Using a static const here makes this line disappear, but makes later access hotter (not sure why)
       
   470     stack.currentFrame->returnLocation = &&RETURN;
       
   471 #else
       
   472     stack.currentFrame->returnLocation = 0;
       
   473 #endif
       
   474     stack.currentFrame->args.subjectPtr = subjectPtr;
       
   475     stack.currentFrame->args.instructionPtr = instructionPtr;
       
   476     stack.currentFrame->args.offsetTop = offsetTop;
       
   477     stack.currentFrame->args.bracketChain = 0;
       
   478     startNewGroup(stack.currentFrame);
       
   479     
       
   480     /* This is where control jumps back to to effect "recursion" */
       
   481     
       
   482 RECURSE:
       
   483     if (!--remainingMatchCount)
       
   484         return matchError(JSRegExpErrorHitLimit, stack);
       
   485 
       
   486     /* Now start processing the operations. */
       
   487     
       
   488 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
       
   489     while (true)
       
   490 #endif
       
   491     {
       
   492         
       
   493 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
       
   494 #define BEGIN_OPCODE(opcode) LABEL_OP_##opcode
       
   495 #define NEXT_OPCODE goto *opcodeJumpTable[*stack.currentFrame->args.instructionPtr]
       
   496 #else
       
   497 #define BEGIN_OPCODE(opcode) case OP_##opcode
       
   498 #define NEXT_OPCODE continue
       
   499 #endif
       
   500         
       
   501 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
       
   502         NEXT_OPCODE;
       
   503 #else
       
   504         switch (*stack.currentFrame->args.instructionPtr)
       
   505 #endif
       
   506         {
       
   507             /* Non-capturing bracket: optimized */
       
   508                 
       
   509             BEGIN_OPCODE(BRA):
       
   510             NON_CAPTURING_BRACKET:
       
   511                 DPRINTF(("start bracket 0\n"));
       
   512                 do {
       
   513                     RECURSIVE_MATCH_NEW_GROUP(2, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain);
       
   514                     if (isMatch)
       
   515                         RRETURN;
       
   516                     stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1);
       
   517                 } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
       
   518                 DPRINTF(("bracket 0 failed\n"));
       
   519                 RRETURN;
       
   520                 
       
   521             /* Skip over large extraction number data if encountered. */
       
   522                 
       
   523             BEGIN_OPCODE(BRANUMBER):
       
   524                 stack.currentFrame->args.instructionPtr += 3;
       
   525                 NEXT_OPCODE;
       
   526                 
       
   527             /* End of the pattern. */
       
   528                 
       
   529             BEGIN_OPCODE(END):
       
   530                 md.endMatchPtr = stack.currentFrame->args.subjectPtr;          /* Record where we ended */
       
   531                 md.endOffsetTop = stack.currentFrame->args.offsetTop;   /* and how many extracts were taken */
       
   532                 isMatch = true;
       
   533                 RRETURN;
       
   534                 
       
   535             /* Assertion brackets. Check the alternative branches in turn - the
       
   536              matching won't pass the KET for an assertion. If any one branch matches,
       
   537              the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
       
   538              start of each branch to move the current point backwards, so the code at
       
   539              this level is identical to the lookahead case. */
       
   540                 
       
   541             BEGIN_OPCODE(ASSERT):
       
   542                 do {
       
   543                     RECURSIVE_MATCH_NEW_GROUP(6, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, NULL);
       
   544                     if (isMatch)
       
   545                         break;
       
   546                     stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1);
       
   547                 } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
       
   548                 if (*stack.currentFrame->args.instructionPtr == OP_KET)
       
   549                     RRETURN_NO_MATCH;
       
   550                 
       
   551                 /* Continue from after the assertion, updating the offsets high water
       
   552                  mark, since extracts may have been taken during the assertion. */
       
   553                 
       
   554                 advanceToEndOfBracket(stack.currentFrame->args.instructionPtr);
       
   555                 stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE;
       
   556                 stack.currentFrame->args.offsetTop = md.endOffsetTop;
       
   557                 NEXT_OPCODE;
       
   558                 
       
   559             /* Negative assertion: all branches must fail to match */
       
   560                 
       
   561             BEGIN_OPCODE(ASSERT_NOT):
       
   562                 do {
       
   563                     RECURSIVE_MATCH_NEW_GROUP(7, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, NULL);
       
   564                     if (isMatch)
       
   565                         RRETURN_NO_MATCH;
       
   566                     stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1);
       
   567                 } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
       
   568                 
       
   569                 stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE;
       
   570                 NEXT_OPCODE;
       
   571                 
       
   572             /* An alternation is the end of a branch; scan along to find the end of the
       
   573              bracketed group and go to there. */
       
   574                 
       
   575             BEGIN_OPCODE(ALT):
       
   576                 advanceToEndOfBracket(stack.currentFrame->args.instructionPtr);
       
   577                 NEXT_OPCODE;
       
   578                 
       
   579             /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating
       
   580              that it may occur zero times. It may repeat infinitely, or not at all -
       
   581              i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper
       
   582              repeat limits are compiled as a number of copies, with the optional ones
       
   583              preceded by BRAZERO or BRAMINZERO. */
       
   584                 
       
   585             BEGIN_OPCODE(BRAZERO): {
       
   586                 stack.currentFrame->locals.startOfRepeatingBracket = stack.currentFrame->args.instructionPtr + 1;
       
   587                 RECURSIVE_MATCH_NEW_GROUP(14, stack.currentFrame->locals.startOfRepeatingBracket, stack.currentFrame->args.bracketChain);
       
   588                 if (isMatch)
       
   589                     RRETURN;
       
   590                 advanceToEndOfBracket(stack.currentFrame->locals.startOfRepeatingBracket);
       
   591                 stack.currentFrame->args.instructionPtr = stack.currentFrame->locals.startOfRepeatingBracket + 1 + LINK_SIZE;
       
   592                 NEXT_OPCODE;
       
   593             }
       
   594                 
       
   595             BEGIN_OPCODE(BRAMINZERO): {
       
   596                 stack.currentFrame->locals.startOfRepeatingBracket = stack.currentFrame->args.instructionPtr + 1;
       
   597                 advanceToEndOfBracket(stack.currentFrame->locals.startOfRepeatingBracket);
       
   598                 RECURSIVE_MATCH_NEW_GROUP(15, stack.currentFrame->locals.startOfRepeatingBracket + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain);
       
   599                 if (isMatch)
       
   600                     RRETURN;
       
   601                 stack.currentFrame->args.instructionPtr++;
       
   602                 NEXT_OPCODE;
       
   603             }
       
   604                 
       
   605             /* End of a group, repeated or non-repeating. If we are at the end of
       
   606              an assertion "group", stop matching and return 1, but record the
       
   607              current high water mark for use by positive assertions. Do this also
       
   608              for the "once" (not-backup up) groups. */
       
   609                 
       
   610             BEGIN_OPCODE(KET):
       
   611             BEGIN_OPCODE(KETRMIN):
       
   612             BEGIN_OPCODE(KETRMAX):
       
   613                 stack.currentFrame->locals.instructionPtrAtStartOfOnce = stack.currentFrame->args.instructionPtr - getLinkValue(stack.currentFrame->args.instructionPtr + 1);
       
   614                 stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.bracketChain->bracketStart;
       
   615 
       
   616                 /* Back up the stack of bracket start pointers. */
       
   617 
       
   618                 stack.currentFrame->args.bracketChain = stack.currentFrame->args.bracketChain->previousBracket;
       
   619 
       
   620                 if (*stack.currentFrame->locals.instructionPtrAtStartOfOnce == OP_ASSERT || *stack.currentFrame->locals.instructionPtrAtStartOfOnce == OP_ASSERT_NOT) {
       
   621                     md.endOffsetTop = stack.currentFrame->args.offsetTop;
       
   622                     isMatch = true;
       
   623                     RRETURN;
       
   624                 }
       
   625                 
       
   626                 /* In all other cases except a conditional group we have to check the
       
   627                  group number back at the start and if necessary complete handling an
       
   628                  extraction by setting the offsets and bumping the high water mark. */
       
   629                 
       
   630                 stack.currentFrame->locals.number = *stack.currentFrame->locals.instructionPtrAtStartOfOnce - OP_BRA;
       
   631                 
       
   632                 /* For extended extraction brackets (large number), we have to fish out
       
   633                  the number from a dummy opcode at the start. */
       
   634                 
       
   635                 if (stack.currentFrame->locals.number > EXTRACT_BASIC_MAX)
       
   636                     stack.currentFrame->locals.number = get2ByteValue(stack.currentFrame->locals.instructionPtrAtStartOfOnce + 2 + LINK_SIZE);
       
   637                 stack.currentFrame->locals.offset = stack.currentFrame->locals.number << 1;
       
   638                 
       
   639 #ifdef DEBUG
       
   640                 printf("end bracket %d", stack.currentFrame->locals.number);
       
   641                 printf("\n");
       
   642 #endif
       
   643                 
       
   644                 /* Test for a numbered group. This includes groups called as a result
       
   645                  of recursion. Note that whole-pattern recursion is coded as a recurse
       
   646                  into group 0, so it won't be picked up here. Instead, we catch it when
       
   647                  the OP_END is reached. */
       
   648                 
       
   649                 if (stack.currentFrame->locals.number > 0) {
       
   650                     if (stack.currentFrame->locals.offset >= md.offsetMax)
       
   651                         md.offsetOverflow = true;
       
   652                     else {
       
   653                         md.offsetVector[stack.currentFrame->locals.offset] =
       
   654                         md.offsetVector[md.offsetEnd - stack.currentFrame->locals.number];
       
   655                         md.offsetVector[stack.currentFrame->locals.offset+1] = stack.currentFrame->args.subjectPtr - md.startSubject;
       
   656                         if (stack.currentFrame->args.offsetTop <= stack.currentFrame->locals.offset)
       
   657                             stack.currentFrame->args.offsetTop = stack.currentFrame->locals.offset + 2;
       
   658                     }
       
   659                 }
       
   660                 
       
   661                 /* For a non-repeating ket, just continue at this level. This also
       
   662                  happens for a repeating ket if no characters were matched in the group.
       
   663                  This is the forcible breaking of infinite loops as implemented in Perl
       
   664                  5.005. If there is an options reset, it will get obeyed in the normal
       
   665                  course of events. */
       
   666                 
       
   667                 if (*stack.currentFrame->args.instructionPtr == OP_KET || stack.currentFrame->args.subjectPtr == stack.currentFrame->locals.subjectPtrAtStartOfInstruction) {
       
   668                     stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE;
       
   669                     NEXT_OPCODE;
       
   670                 }
       
   671                 
       
   672                 /* The repeating kets try the rest of the pattern or restart from the
       
   673                  preceding bracket, in the appropriate order. */
       
   674                 
       
   675                 if (*stack.currentFrame->args.instructionPtr == OP_KETRMIN) {
       
   676                     RECURSIVE_MATCH(16, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain);
       
   677                     if (isMatch)
       
   678                         RRETURN;
       
   679                     RECURSIVE_MATCH_NEW_GROUP(17, stack.currentFrame->locals.instructionPtrAtStartOfOnce, stack.currentFrame->args.bracketChain);
       
   680                     if (isMatch)
       
   681                         RRETURN;
       
   682                 } else { /* OP_KETRMAX */
       
   683                     RECURSIVE_MATCH_NEW_GROUP(18, stack.currentFrame->locals.instructionPtrAtStartOfOnce, stack.currentFrame->args.bracketChain);
       
   684                     if (isMatch)
       
   685                         RRETURN;
       
   686                     RECURSIVE_MATCH(19, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain);
       
   687                     if (isMatch)
       
   688                         RRETURN;
       
   689                 }
       
   690                 RRETURN;
       
   691                 
       
   692             /* Start of subject. */
       
   693 
       
   694             BEGIN_OPCODE(CIRC):
       
   695                 if (stack.currentFrame->args.subjectPtr != md.startSubject)
       
   696                     RRETURN_NO_MATCH;
       
   697                 stack.currentFrame->args.instructionPtr++;
       
   698                 NEXT_OPCODE;
       
   699 
       
   700             /* After internal newline if multiline. */
       
   701 
       
   702             BEGIN_OPCODE(BOL):
       
   703                 if (stack.currentFrame->args.subjectPtr != md.startSubject && !isNewline(stack.currentFrame->args.subjectPtr[-1]))
       
   704                     RRETURN_NO_MATCH;
       
   705                 stack.currentFrame->args.instructionPtr++;
       
   706                 NEXT_OPCODE;
       
   707 
       
   708             /* End of subject. */
       
   709 
       
   710             BEGIN_OPCODE(DOLL):
       
   711                 if (stack.currentFrame->args.subjectPtr < md.endSubject)
       
   712                     RRETURN_NO_MATCH;
       
   713                 stack.currentFrame->args.instructionPtr++;
       
   714                 NEXT_OPCODE;
       
   715 
       
   716             /* Before internal newline if multiline. */
       
   717 
       
   718             BEGIN_OPCODE(EOL):
       
   719                 if (stack.currentFrame->args.subjectPtr < md.endSubject && !isNewline(*stack.currentFrame->args.subjectPtr))
       
   720                     RRETURN_NO_MATCH;
       
   721                 stack.currentFrame->args.instructionPtr++;
       
   722                 NEXT_OPCODE;
       
   723                 
       
   724             /* Word boundary assertions */
       
   725                 
       
   726             BEGIN_OPCODE(NOT_WORD_BOUNDARY):
       
   727             BEGIN_OPCODE(WORD_BOUNDARY): {
       
   728                 bool currentCharIsWordChar = false;
       
   729                 bool previousCharIsWordChar = false;
       
   730                 
       
   731                 if (stack.currentFrame->args.subjectPtr > md.startSubject)
       
   732                     previousCharIsWordChar = isWordChar(stack.currentFrame->args.subjectPtr[-1]);
       
   733                 if (stack.currentFrame->args.subjectPtr < md.endSubject)
       
   734                     currentCharIsWordChar = isWordChar(*stack.currentFrame->args.subjectPtr);
       
   735                 
       
   736                 /* Now see if the situation is what we want */
       
   737                 bool wordBoundaryDesired = (*stack.currentFrame->args.instructionPtr++ == OP_WORD_BOUNDARY);
       
   738                 if (wordBoundaryDesired ? currentCharIsWordChar == previousCharIsWordChar : currentCharIsWordChar != previousCharIsWordChar)
       
   739                     RRETURN_NO_MATCH;
       
   740                 NEXT_OPCODE;
       
   741             }
       
   742                 
       
   743             /* Match a single character type; inline for speed */
       
   744                 
       
   745             BEGIN_OPCODE(NOT_NEWLINE):
       
   746                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
       
   747                     RRETURN_NO_MATCH;
       
   748                 if (isNewline(*stack.currentFrame->args.subjectPtr++))
       
   749                     RRETURN_NO_MATCH;
       
   750                 stack.currentFrame->args.instructionPtr++;
       
   751                 NEXT_OPCODE;
       
   752 
       
   753             BEGIN_OPCODE(NOT_DIGIT):
       
   754                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
       
   755                     RRETURN_NO_MATCH;
       
   756                 if (isASCIIDigit(*stack.currentFrame->args.subjectPtr++))
       
   757                     RRETURN_NO_MATCH;
       
   758                 stack.currentFrame->args.instructionPtr++;
       
   759                 NEXT_OPCODE;
       
   760 
       
   761             BEGIN_OPCODE(DIGIT):
       
   762                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
       
   763                     RRETURN_NO_MATCH;
       
   764                 if (!isASCIIDigit(*stack.currentFrame->args.subjectPtr++))
       
   765                     RRETURN_NO_MATCH;
       
   766                 stack.currentFrame->args.instructionPtr++;
       
   767                 NEXT_OPCODE;
       
   768 
       
   769             BEGIN_OPCODE(NOT_WHITESPACE):
       
   770                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
       
   771                     RRETURN_NO_MATCH;
       
   772                 if (isSpaceChar(*stack.currentFrame->args.subjectPtr++))
       
   773                     RRETURN_NO_MATCH;
       
   774                 stack.currentFrame->args.instructionPtr++;
       
   775                 NEXT_OPCODE;
       
   776 
       
   777             BEGIN_OPCODE(WHITESPACE):
       
   778                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
       
   779                     RRETURN_NO_MATCH;
       
   780                 if (!isSpaceChar(*stack.currentFrame->args.subjectPtr++))
       
   781                     RRETURN_NO_MATCH;
       
   782                 stack.currentFrame->args.instructionPtr++;
       
   783                 NEXT_OPCODE;
       
   784                 
       
   785             BEGIN_OPCODE(NOT_WORDCHAR):
       
   786                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
       
   787                     RRETURN_NO_MATCH;
       
   788                 if (isWordChar(*stack.currentFrame->args.subjectPtr++))
       
   789                     RRETURN_NO_MATCH;
       
   790                 stack.currentFrame->args.instructionPtr++;
       
   791                 NEXT_OPCODE;
       
   792                 
       
   793             BEGIN_OPCODE(WORDCHAR):
       
   794                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
       
   795                     RRETURN_NO_MATCH;
       
   796                 if (!isWordChar(*stack.currentFrame->args.subjectPtr++))
       
   797                     RRETURN_NO_MATCH;
       
   798                 stack.currentFrame->args.instructionPtr++;
       
   799                 NEXT_OPCODE;
       
   800                 
       
   801             /* Match a back reference, possibly repeatedly. Look past the end of the
       
   802              item to see if there is repeat information following. The code is similar
       
   803              to that for character classes, but repeated for efficiency. Then obey
       
   804              similar code to character type repeats - written out again for speed.
       
   805              However, if the referenced string is the empty string, always treat
       
   806              it as matched, any number of times (otherwise there could be infinite
       
   807              loops). */
       
   808                 
       
   809             BEGIN_OPCODE(REF):
       
   810                 stack.currentFrame->locals.offset = get2ByteValue(stack.currentFrame->args.instructionPtr + 1) << 1;               /* Doubled ref number */
       
   811                 stack.currentFrame->args.instructionPtr += 3;                                 /* Advance past item */
       
   812                 
       
   813                 /* If the reference is unset, set the length to be longer than the amount
       
   814                  of subject left; this ensures that every attempt at a match fails. We
       
   815                  can't just fail here, because of the possibility of quantifiers with zero
       
   816                  minima. */
       
   817                 
       
   818                 if (stack.currentFrame->locals.offset >= stack.currentFrame->args.offsetTop || md.offsetVector[stack.currentFrame->locals.offset] < 0)
       
   819                     stack.currentFrame->locals.length = 0;
       
   820                 else
       
   821                     stack.currentFrame->locals.length = md.offsetVector[stack.currentFrame->locals.offset+1] - md.offsetVector[stack.currentFrame->locals.offset];
       
   822                 
       
   823                 /* Set up for repetition, or handle the non-repeated case */
       
   824                 
       
   825                 switch (*stack.currentFrame->args.instructionPtr) {
       
   826                     case OP_CRSTAR:
       
   827                     case OP_CRMINSTAR:
       
   828                     case OP_CRPLUS:
       
   829                     case OP_CRMINPLUS:
       
   830                     case OP_CRQUERY:
       
   831                     case OP_CRMINQUERY:
       
   832                         repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_CRSTAR, minimize, min, stack.currentFrame->locals.max);
       
   833                         break;
       
   834                         
       
   835                     case OP_CRRANGE:
       
   836                     case OP_CRMINRANGE:
       
   837                         minimize = (*stack.currentFrame->args.instructionPtr == OP_CRMINRANGE);
       
   838                         min = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
       
   839                         stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 3);
       
   840                         if (stack.currentFrame->locals.max == 0)
       
   841                             stack.currentFrame->locals.max = INT_MAX;
       
   842                         stack.currentFrame->args.instructionPtr += 5;
       
   843                         break;
       
   844                     
       
   845                     default:               /* No repeat follows */
       
   846                         if (!matchRef(stack.currentFrame->locals.offset, stack.currentFrame->args.subjectPtr, stack.currentFrame->locals.length, md))
       
   847                             RRETURN_NO_MATCH;
       
   848                         stack.currentFrame->args.subjectPtr += stack.currentFrame->locals.length;
       
   849                         NEXT_OPCODE;
       
   850                 }
       
   851                 
       
   852                 /* If the length of the reference is zero, just continue with the
       
   853                  main loop. */
       
   854                 
       
   855                 if (stack.currentFrame->locals.length == 0)
       
   856                     NEXT_OPCODE;
       
   857                 
       
   858                 /* First, ensure the minimum number of matches are present. */
       
   859                 
       
   860                 for (int i = 1; i <= min; i++) {
       
   861                     if (!matchRef(stack.currentFrame->locals.offset, stack.currentFrame->args.subjectPtr, stack.currentFrame->locals.length, md))
       
   862                         RRETURN_NO_MATCH;
       
   863                     stack.currentFrame->args.subjectPtr += stack.currentFrame->locals.length;
       
   864                 }
       
   865                 
       
   866                 /* If min = max, continue at the same level without recursion.
       
   867                  They are not both allowed to be zero. */
       
   868                 
       
   869                 if (min == stack.currentFrame->locals.max)
       
   870                     NEXT_OPCODE;
       
   871                 
       
   872                 /* If minimizing, keep trying and advancing the pointer */
       
   873                 
       
   874                 if (minimize) {
       
   875                     for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
       
   876                         RECURSIVE_MATCH(20, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
       
   877                         if (isMatch)
       
   878                             RRETURN;
       
   879                         if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || !matchRef(stack.currentFrame->locals.offset, stack.currentFrame->args.subjectPtr, stack.currentFrame->locals.length, md))
       
   880                             RRETURN;
       
   881                         stack.currentFrame->args.subjectPtr += stack.currentFrame->locals.length;
       
   882                     }
       
   883                     /* Control never reaches here */
       
   884                 }
       
   885                 
       
   886                 /* If maximizing, find the longest string and work backwards */
       
   887                 
       
   888                 else {
       
   889                     stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
       
   890                     for (int i = min; i < stack.currentFrame->locals.max; i++) {
       
   891                         if (!matchRef(stack.currentFrame->locals.offset, stack.currentFrame->args.subjectPtr, stack.currentFrame->locals.length, md))
       
   892                             break;
       
   893                         stack.currentFrame->args.subjectPtr += stack.currentFrame->locals.length;
       
   894                     }
       
   895                     while (stack.currentFrame->args.subjectPtr >= stack.currentFrame->locals.subjectPtrAtStartOfInstruction) {
       
   896                         RECURSIVE_MATCH(21, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
       
   897                         if (isMatch)
       
   898                             RRETURN;
       
   899                         stack.currentFrame->args.subjectPtr -= stack.currentFrame->locals.length;
       
   900                     }
       
   901                     RRETURN_NO_MATCH;
       
   902                 }
       
   903                 /* Control never reaches here */
       
   904                 
       
   905             /* Match a bit-mapped character class, possibly repeatedly. This op code is
       
   906              used when all the characters in the class have values in the range 0-255,
       
   907              and either the matching is caseful, or the characters are in the range
       
   908              0-127 when UTF-8 processing is enabled. The only difference between
       
   909              OP_CLASS and OP_NCLASS occurs when a data character outside the range is
       
   910              encountered.
       
   911              
       
   912              First, look past the end of the item to see if there is repeat information
       
   913              following. Then obey similar code to character type repeats - written out
       
   914              again for speed. */
       
   915                 
       
   916             BEGIN_OPCODE(NCLASS):
       
   917             BEGIN_OPCODE(CLASS):
       
   918                 stack.currentFrame->locals.data = stack.currentFrame->args.instructionPtr + 1;                /* Save for matching */
       
   919                 stack.currentFrame->args.instructionPtr += 33;                     /* Advance past the item */
       
   920                 
       
   921                 switch (*stack.currentFrame->args.instructionPtr) {
       
   922                     case OP_CRSTAR:
       
   923                     case OP_CRMINSTAR:
       
   924                     case OP_CRPLUS:
       
   925                     case OP_CRMINPLUS:
       
   926                     case OP_CRQUERY:
       
   927                     case OP_CRMINQUERY:
       
   928                         repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_CRSTAR, minimize, min, stack.currentFrame->locals.max);
       
   929                         break;
       
   930                         
       
   931                     case OP_CRRANGE:
       
   932                     case OP_CRMINRANGE:
       
   933                         minimize = (*stack.currentFrame->args.instructionPtr == OP_CRMINRANGE);
       
   934                         min = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
       
   935                         stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 3);
       
   936                         if (stack.currentFrame->locals.max == 0)
       
   937                             stack.currentFrame->locals.max = INT_MAX;
       
   938                         stack.currentFrame->args.instructionPtr += 5;
       
   939                         break;
       
   940                         
       
   941                     default:               /* No repeat follows */
       
   942                         min = stack.currentFrame->locals.max = 1;
       
   943                         break;
       
   944                 }
       
   945                 
       
   946                 /* First, ensure the minimum number of matches are present. */
       
   947                 
       
   948                 for (int i = 1; i <= min; i++) {
       
   949                     if (stack.currentFrame->args.subjectPtr >= md.endSubject)
       
   950                         RRETURN_NO_MATCH;
       
   951                     int c = *stack.currentFrame->args.subjectPtr++;
       
   952                     if (c > 255) {
       
   953                         if (stack.currentFrame->locals.data[-1] == OP_CLASS)
       
   954                             RRETURN_NO_MATCH;
       
   955                     } else {
       
   956                         if (!(stack.currentFrame->locals.data[c / 8] & (1 << (c & 7))))
       
   957                             RRETURN_NO_MATCH;
       
   958                     }
       
   959                 }
       
   960                 
       
   961                 /* If max == min we can continue with the main loop without the
       
   962                  need to recurse. */
       
   963                 
       
   964                 if (min == stack.currentFrame->locals.max)
       
   965                     NEXT_OPCODE;      
       
   966                 
       
   967                 /* If minimizing, keep testing the rest of the expression and advancing
       
   968                  the pointer while it matches the class. */
       
   969                 if (minimize) {
       
   970                     for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
       
   971                         RECURSIVE_MATCH(22, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
       
   972                         if (isMatch)
       
   973                             RRETURN;
       
   974                         if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject)
       
   975                             RRETURN;
       
   976                         int c = *stack.currentFrame->args.subjectPtr++;
       
   977                         if (c > 255) {
       
   978                             if (stack.currentFrame->locals.data[-1] == OP_CLASS)
       
   979                                 RRETURN;
       
   980                         } else {
       
   981                             if ((stack.currentFrame->locals.data[c/8] & (1 << (c&7))) == 0)
       
   982                                 RRETURN;
       
   983                         }
       
   984                     }
       
   985                     /* Control never reaches here */
       
   986                 }
       
   987                 /* If maximizing, find the longest possible run, then work backwards. */
       
   988                 else {
       
   989                     stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
       
   990                     
       
   991                     for (int i = min; i < stack.currentFrame->locals.max; i++) {
       
   992                         if (stack.currentFrame->args.subjectPtr >= md.endSubject)
       
   993                             break;
       
   994                         int c = *stack.currentFrame->args.subjectPtr;
       
   995                         if (c > 255) {
       
   996                             if (stack.currentFrame->locals.data[-1] == OP_CLASS)
       
   997                                 break;
       
   998                         } else {
       
   999                             if (!(stack.currentFrame->locals.data[c / 8] & (1 << (c & 7))))
       
  1000                                 break;
       
  1001                         }
       
  1002                         ++stack.currentFrame->args.subjectPtr;
       
  1003                     }
       
  1004                     for (;;) {
       
  1005                         RECURSIVE_MATCH(24, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
       
  1006                         if (isMatch)
       
  1007                             RRETURN;
       
  1008                         if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
       
  1009                             break;        /* Stop if tried at original pos */
       
  1010                     }
       
  1011                     
       
  1012                     RRETURN;
       
  1013                 }
       
  1014                 /* Control never reaches here */
       
  1015                 
       
  1016             /* Match an extended character class. */
       
  1017                 
       
  1018             BEGIN_OPCODE(XCLASS):
       
  1019                 stack.currentFrame->locals.data = stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE;                /* Save for matching */
       
  1020                 stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1);                      /* Advance past the item */
       
  1021                 
       
  1022                 switch (*stack.currentFrame->args.instructionPtr) {
       
  1023                     case OP_CRSTAR:
       
  1024                     case OP_CRMINSTAR:
       
  1025                     case OP_CRPLUS:
       
  1026                     case OP_CRMINPLUS:
       
  1027                     case OP_CRQUERY:
       
  1028                     case OP_CRMINQUERY:
       
  1029                         repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_CRSTAR, minimize, min, stack.currentFrame->locals.max);
       
  1030                         break;
       
  1031                         
       
  1032                     case OP_CRRANGE:
       
  1033                     case OP_CRMINRANGE:
       
  1034                         minimize = (*stack.currentFrame->args.instructionPtr == OP_CRMINRANGE);
       
  1035                         min = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
       
  1036                         stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 3);
       
  1037                         if (stack.currentFrame->locals.max == 0)
       
  1038                             stack.currentFrame->locals.max = INT_MAX;
       
  1039                         stack.currentFrame->args.instructionPtr += 5;
       
  1040                         break;
       
  1041                         
       
  1042                     default:               /* No repeat follows */
       
  1043                         min = stack.currentFrame->locals.max = 1;
       
  1044                 }
       
  1045                 
       
  1046                 /* First, ensure the minimum number of matches are present. */
       
  1047                 
       
  1048                 for (int i = 1; i <= min; i++) {
       
  1049                     if (stack.currentFrame->args.subjectPtr >= md.endSubject)
       
  1050                         RRETURN_NO_MATCH;
       
  1051                     int c = *stack.currentFrame->args.subjectPtr++;
       
  1052                     if (!jsc_pcre_xclass(c, stack.currentFrame->locals.data))
       
  1053                         RRETURN_NO_MATCH;
       
  1054                 }
       
  1055                 
       
  1056                 /* If max == min we can continue with the main loop without the
       
  1057                  need to recurse. */
       
  1058                 
       
  1059                 if (min == stack.currentFrame->locals.max)
       
  1060                     NEXT_OPCODE;
       
  1061                 
       
  1062                 /* If minimizing, keep testing the rest of the expression and advancing
       
  1063                  the pointer while it matches the class. */
       
  1064                 
       
  1065                 if (minimize) {
       
  1066                     for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
       
  1067                         RECURSIVE_MATCH(26, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
       
  1068                         if (isMatch)
       
  1069                             RRETURN;
       
  1070                         if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject)
       
  1071                             RRETURN;
       
  1072                         int c = *stack.currentFrame->args.subjectPtr++;
       
  1073                         if (!jsc_pcre_xclass(c, stack.currentFrame->locals.data))
       
  1074                             RRETURN;
       
  1075                     }
       
  1076                     /* Control never reaches here */
       
  1077                 }
       
  1078                 
       
  1079                 /* If maximizing, find the longest possible run, then work backwards. */
       
  1080                 
       
  1081                 else {
       
  1082                     stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
       
  1083                     for (int i = min; i < stack.currentFrame->locals.max; i++) {
       
  1084                         if (stack.currentFrame->args.subjectPtr >= md.endSubject)
       
  1085                             break;
       
  1086                         int c = *stack.currentFrame->args.subjectPtr;
       
  1087                         if (!jsc_pcre_xclass(c, stack.currentFrame->locals.data))
       
  1088                             break;
       
  1089                         ++stack.currentFrame->args.subjectPtr;
       
  1090                     }
       
  1091                     for(;;) {
       
  1092                         RECURSIVE_MATCH(27, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
       
  1093                         if (isMatch)
       
  1094                             RRETURN;
       
  1095                         if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
       
  1096                             break;        /* Stop if tried at original pos */
       
  1097                     }
       
  1098                     RRETURN;
       
  1099                 }
       
  1100                 
       
  1101                 /* Control never reaches here */
       
  1102                 
       
  1103             /* Match a single character, casefully */
       
  1104                 
       
  1105             BEGIN_OPCODE(CHAR):
       
  1106                 stack.currentFrame->locals.length = 1;
       
  1107                 stack.currentFrame->args.instructionPtr++;
       
  1108                 getUTF8CharAndIncrementLength(stack.currentFrame->locals.fc, stack.currentFrame->args.instructionPtr, stack.currentFrame->locals.length);
       
  1109                 stack.currentFrame->args.instructionPtr += stack.currentFrame->locals.length;
       
  1110                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
       
  1111                     RRETURN_NO_MATCH;
       
  1112                 if (stack.currentFrame->locals.fc != *stack.currentFrame->args.subjectPtr++)
       
  1113                     RRETURN_NO_MATCH;
       
  1114                 NEXT_OPCODE;
       
  1115                 
       
  1116             /* Match a single character, caselessly */
       
  1117                 
       
  1118             BEGIN_OPCODE(CHAR_IGNORING_CASE): {
       
  1119                 stack.currentFrame->locals.length = 1;
       
  1120                 stack.currentFrame->args.instructionPtr++;
       
  1121                 getUTF8CharAndIncrementLength(stack.currentFrame->locals.fc, stack.currentFrame->args.instructionPtr, stack.currentFrame->locals.length);
       
  1122                 stack.currentFrame->args.instructionPtr += stack.currentFrame->locals.length;
       
  1123                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
       
  1124                     RRETURN_NO_MATCH;
       
  1125                 int dc = *stack.currentFrame->args.subjectPtr++;
       
  1126                 if (stack.currentFrame->locals.fc != dc && jsc_pcre_ucp_othercase(stack.currentFrame->locals.fc) != dc)
       
  1127                     RRETURN_NO_MATCH;
       
  1128                 NEXT_OPCODE;
       
  1129             }
       
  1130                 
       
  1131             /* Match a single ASCII character. */
       
  1132                 
       
  1133             BEGIN_OPCODE(ASCII_CHAR):
       
  1134                 if (md.endSubject == stack.currentFrame->args.subjectPtr)
       
  1135                     RRETURN_NO_MATCH;
       
  1136                 if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->args.instructionPtr[1])
       
  1137                     RRETURN_NO_MATCH;
       
  1138                 ++stack.currentFrame->args.subjectPtr;
       
  1139                 stack.currentFrame->args.instructionPtr += 2;
       
  1140                 NEXT_OPCODE;
       
  1141                 
       
  1142             /* Match one of two cases of an ASCII letter. */
       
  1143                 
       
  1144             BEGIN_OPCODE(ASCII_LETTER_IGNORING_CASE):
       
  1145                 if (md.endSubject == stack.currentFrame->args.subjectPtr)
       
  1146                     RRETURN_NO_MATCH;
       
  1147                 if ((*stack.currentFrame->args.subjectPtr | 0x20) != stack.currentFrame->args.instructionPtr[1])
       
  1148                     RRETURN_NO_MATCH;
       
  1149                 ++stack.currentFrame->args.subjectPtr;
       
  1150                 stack.currentFrame->args.instructionPtr += 2;
       
  1151                 NEXT_OPCODE;
       
  1152                 
       
  1153             /* Match a single character repeatedly; different opcodes share code. */
       
  1154                 
       
  1155             BEGIN_OPCODE(EXACT):
       
  1156                 min = stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
       
  1157                 minimize = false;
       
  1158                 stack.currentFrame->args.instructionPtr += 3;
       
  1159                 goto REPEATCHAR;
       
  1160                 
       
  1161             BEGIN_OPCODE(UPTO):
       
  1162             BEGIN_OPCODE(MINUPTO):
       
  1163                 min = 0;
       
  1164                 stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
       
  1165                 minimize = *stack.currentFrame->args.instructionPtr == OP_MINUPTO;
       
  1166                 stack.currentFrame->args.instructionPtr += 3;
       
  1167                 goto REPEATCHAR;
       
  1168                 
       
  1169             BEGIN_OPCODE(STAR):
       
  1170             BEGIN_OPCODE(MINSTAR):
       
  1171             BEGIN_OPCODE(PLUS):
       
  1172             BEGIN_OPCODE(MINPLUS):
       
  1173             BEGIN_OPCODE(QUERY):
       
  1174             BEGIN_OPCODE(MINQUERY):
       
  1175                 repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_STAR, minimize, min, stack.currentFrame->locals.max);
       
  1176                 
       
  1177                 /* Common code for all repeated single-character matches. We can give
       
  1178                  up quickly if there are fewer than the minimum number of characters left in
       
  1179                  the subject. */
       
  1180                 
       
  1181             REPEATCHAR:
       
  1182                 
       
  1183                 stack.currentFrame->locals.length = 1;
       
  1184                 getUTF8CharAndIncrementLength(stack.currentFrame->locals.fc, stack.currentFrame->args.instructionPtr, stack.currentFrame->locals.length);
       
  1185                 if (min * (stack.currentFrame->locals.fc > 0xFFFF ? 2 : 1) > md.endSubject - stack.currentFrame->args.subjectPtr)
       
  1186                     RRETURN_NO_MATCH;
       
  1187                 stack.currentFrame->args.instructionPtr += stack.currentFrame->locals.length;
       
  1188                 
       
  1189                 if (stack.currentFrame->locals.fc <= 0xFFFF) {
       
  1190                     othercase = md.ignoreCase ? jsc_pcre_ucp_othercase(stack.currentFrame->locals.fc) : -1;
       
  1191                     
       
  1192                     for (int i = 1; i <= min; i++) {
       
  1193                         if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc && *stack.currentFrame->args.subjectPtr != othercase)
       
  1194                             RRETURN_NO_MATCH;
       
  1195                         ++stack.currentFrame->args.subjectPtr;
       
  1196                     }
       
  1197                     
       
  1198                     if (min == stack.currentFrame->locals.max)
       
  1199                         NEXT_OPCODE;
       
  1200                     
       
  1201                     if (minimize) {
       
  1202                         stack.currentFrame->locals.repeatOthercase = othercase;
       
  1203                         for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
       
  1204                             RECURSIVE_MATCH(28, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
       
  1205                             if (isMatch)
       
  1206                                 RRETURN;
       
  1207                             if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject)
       
  1208                                 RRETURN;
       
  1209                             if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc && *stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.repeatOthercase)
       
  1210                                 RRETURN;
       
  1211                             ++stack.currentFrame->args.subjectPtr;
       
  1212                         }
       
  1213                         /* Control never reaches here */
       
  1214                     } else {
       
  1215                         stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
       
  1216                         for (int i = min; i < stack.currentFrame->locals.max; i++) {
       
  1217                             if (stack.currentFrame->args.subjectPtr >= md.endSubject)
       
  1218                                 break;
       
  1219                             if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc && *stack.currentFrame->args.subjectPtr != othercase)
       
  1220                                 break;
       
  1221                             ++stack.currentFrame->args.subjectPtr;
       
  1222                         }
       
  1223                         while (stack.currentFrame->args.subjectPtr >= stack.currentFrame->locals.subjectPtrAtStartOfInstruction) {
       
  1224                             RECURSIVE_MATCH(29, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
       
  1225                             if (isMatch)
       
  1226                                 RRETURN;
       
  1227                             --stack.currentFrame->args.subjectPtr;
       
  1228                         }
       
  1229                         RRETURN_NO_MATCH;
       
  1230                     }
       
  1231                     /* Control never reaches here */
       
  1232                 } else {
       
  1233                     /* No case on surrogate pairs, so no need to bother with "othercase". */
       
  1234                     
       
  1235                     for (int i = 1; i <= min; i++) {
       
  1236                         if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc)
       
  1237                             RRETURN_NO_MATCH;
       
  1238                         stack.currentFrame->args.subjectPtr += 2;
       
  1239                     }
       
  1240                     
       
  1241                     if (min == stack.currentFrame->locals.max)
       
  1242                         NEXT_OPCODE;
       
  1243                     
       
  1244                     if (minimize) {
       
  1245                         for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
       
  1246                             RECURSIVE_MATCH(30, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
       
  1247                             if (isMatch)
       
  1248                                 RRETURN;
       
  1249                             if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject)
       
  1250                                 RRETURN;
       
  1251                             if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc)
       
  1252                                 RRETURN;
       
  1253                             stack.currentFrame->args.subjectPtr += 2;
       
  1254                         }
       
  1255                         /* Control never reaches here */
       
  1256                     } else {
       
  1257                         stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
       
  1258                         for (int i = min; i < stack.currentFrame->locals.max; i++) {
       
  1259                             if (stack.currentFrame->args.subjectPtr > md.endSubject - 2)
       
  1260                                 break;
       
  1261                             if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc)
       
  1262                                 break;
       
  1263                             stack.currentFrame->args.subjectPtr += 2;
       
  1264                         }
       
  1265                         while (stack.currentFrame->args.subjectPtr >= stack.currentFrame->locals.subjectPtrAtStartOfInstruction) {
       
  1266                             RECURSIVE_MATCH(31, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
       
  1267                             if (isMatch)
       
  1268                                 RRETURN;
       
  1269                             stack.currentFrame->args.subjectPtr -= 2;
       
  1270                         }
       
  1271                         RRETURN_NO_MATCH;
       
  1272                     }
       
  1273                     /* Control never reaches here */
       
  1274                 }
       
  1275                 /* Control never reaches here */
       
  1276                 
       
  1277             /* Match a negated single one-byte character. */
       
  1278                 
       
  1279             BEGIN_OPCODE(NOT): {
       
  1280                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
       
  1281                     RRETURN_NO_MATCH;
       
  1282                 int b = stack.currentFrame->args.instructionPtr[1];
       
  1283                 int c = *stack.currentFrame->args.subjectPtr++;
       
  1284                 stack.currentFrame->args.instructionPtr += 2;
       
  1285                 if (md.ignoreCase) {
       
  1286                     if (c < 128)
       
  1287                         c = toLowerCase(c);
       
  1288                     if (toLowerCase(b) == c)
       
  1289                         RRETURN_NO_MATCH;
       
  1290                 } else {
       
  1291                     if (b == c)
       
  1292                         RRETURN_NO_MATCH;
       
  1293                 }
       
  1294                 NEXT_OPCODE;
       
  1295             }
       
  1296                 
       
  1297             /* Match a negated single one-byte character repeatedly. This is almost a
       
  1298              repeat of the code for a repeated single character, but I haven't found a
       
  1299              nice way of commoning these up that doesn't require a test of the
       
  1300              positive/negative option for each character match. Maybe that wouldn't add
       
  1301              very much to the time taken, but character matching *is* what this is all
       
  1302              about... */
       
  1303                 
       
  1304             BEGIN_OPCODE(NOTEXACT):
       
  1305                 min = stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
       
  1306                 minimize = false;
       
  1307                 stack.currentFrame->args.instructionPtr += 3;
       
  1308                 goto REPEATNOTCHAR;
       
  1309                 
       
  1310             BEGIN_OPCODE(NOTUPTO):
       
  1311             BEGIN_OPCODE(NOTMINUPTO):
       
  1312                 min = 0;
       
  1313                 stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
       
  1314                 minimize = *stack.currentFrame->args.instructionPtr == OP_NOTMINUPTO;
       
  1315                 stack.currentFrame->args.instructionPtr += 3;
       
  1316                 goto REPEATNOTCHAR;
       
  1317                 
       
  1318             BEGIN_OPCODE(NOTSTAR):
       
  1319             BEGIN_OPCODE(NOTMINSTAR):
       
  1320             BEGIN_OPCODE(NOTPLUS):
       
  1321             BEGIN_OPCODE(NOTMINPLUS):
       
  1322             BEGIN_OPCODE(NOTQUERY):
       
  1323             BEGIN_OPCODE(NOTMINQUERY):
       
  1324                 repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_NOTSTAR, minimize, min, stack.currentFrame->locals.max);
       
  1325                 
       
  1326             /* Common code for all repeated single-byte matches. We can give up quickly
       
  1327              if there are fewer than the minimum number of bytes left in the
       
  1328              subject. */
       
  1329                 
       
  1330             REPEATNOTCHAR:
       
  1331                 if (min > md.endSubject - stack.currentFrame->args.subjectPtr)
       
  1332                     RRETURN_NO_MATCH;
       
  1333                 stack.currentFrame->locals.fc = *stack.currentFrame->args.instructionPtr++;
       
  1334                 
       
  1335                 /* The code is duplicated for the caseless and caseful cases, for speed,
       
  1336                  since matching characters is likely to be quite common. First, ensure the
       
  1337                  minimum number of matches are present. If min = max, continue at the same
       
  1338                  level without recursing. Otherwise, if minimizing, keep trying the rest of
       
  1339                  the expression and advancing one matching character if failing, up to the
       
  1340                  maximum. Alternatively, if maximizing, find the maximum number of
       
  1341                  characters and work backwards. */
       
  1342                 
       
  1343                 DPRINTF(("negative matching %c{%d,%d}\n", stack.currentFrame->locals.fc, min, stack.currentFrame->locals.max));
       
  1344                 
       
  1345                 if (md.ignoreCase) {
       
  1346                     if (stack.currentFrame->locals.fc < 128)
       
  1347                         stack.currentFrame->locals.fc = toLowerCase(stack.currentFrame->locals.fc);
       
  1348                     
       
  1349                     for (int i = 1; i <= min; i++) {
       
  1350                         int d = *stack.currentFrame->args.subjectPtr++;
       
  1351                         if (d < 128)
       
  1352                             d = toLowerCase(d);
       
  1353                         if (stack.currentFrame->locals.fc == d)
       
  1354                             RRETURN_NO_MATCH;
       
  1355                     }
       
  1356                     
       
  1357                     if (min == stack.currentFrame->locals.max)
       
  1358                         NEXT_OPCODE;      
       
  1359                     
       
  1360                     if (minimize) {
       
  1361                         for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
       
  1362                             RECURSIVE_MATCH(38, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
       
  1363                             if (isMatch)
       
  1364                                 RRETURN;
       
  1365                             int d = *stack.currentFrame->args.subjectPtr++;
       
  1366                             if (d < 128)
       
  1367                                 d = toLowerCase(d);
       
  1368                             if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject || stack.currentFrame->locals.fc == d)
       
  1369                                 RRETURN;
       
  1370                         }
       
  1371                         /* Control never reaches here */
       
  1372                     }
       
  1373                     
       
  1374                     /* Maximize case */
       
  1375                     
       
  1376                     else {
       
  1377                         stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
       
  1378                         
       
  1379                         for (int i = min; i < stack.currentFrame->locals.max; i++) {
       
  1380                             if (stack.currentFrame->args.subjectPtr >= md.endSubject)
       
  1381                                 break;
       
  1382                             int d = *stack.currentFrame->args.subjectPtr;
       
  1383                             if (d < 128)
       
  1384                                 d = toLowerCase(d);
       
  1385                             if (stack.currentFrame->locals.fc == d)
       
  1386                                 break;
       
  1387                             ++stack.currentFrame->args.subjectPtr;
       
  1388                         }
       
  1389                         for (;;) {
       
  1390                             RECURSIVE_MATCH(40, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
       
  1391                             if (isMatch)
       
  1392                                 RRETURN;
       
  1393                             if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
       
  1394                                 break;        /* Stop if tried at original pos */
       
  1395                         }
       
  1396                         
       
  1397                         RRETURN;
       
  1398                     }
       
  1399                     /* Control never reaches here */
       
  1400                 }
       
  1401                 
       
  1402                 /* Caseful comparisons */
       
  1403                 
       
  1404                 else {
       
  1405                     for (int i = 1; i <= min; i++) {
       
  1406                         int d = *stack.currentFrame->args.subjectPtr++;
       
  1407                         if (stack.currentFrame->locals.fc == d)
       
  1408                             RRETURN_NO_MATCH;
       
  1409                     }
       
  1410 
       
  1411                     if (min == stack.currentFrame->locals.max)
       
  1412                         NEXT_OPCODE;
       
  1413                     
       
  1414                     if (minimize) {
       
  1415                         for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
       
  1416                             RECURSIVE_MATCH(42, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
       
  1417                             if (isMatch)
       
  1418                                 RRETURN;
       
  1419                             int d = *stack.currentFrame->args.subjectPtr++;
       
  1420                             if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject || stack.currentFrame->locals.fc == d)
       
  1421                                 RRETURN;
       
  1422                         }
       
  1423                         /* Control never reaches here */
       
  1424                     }
       
  1425                     
       
  1426                     /* Maximize case */
       
  1427                     
       
  1428                     else {
       
  1429                         stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
       
  1430                         
       
  1431                         for (int i = min; i < stack.currentFrame->locals.max; i++) {
       
  1432                             if (stack.currentFrame->args.subjectPtr >= md.endSubject)
       
  1433                                 break;
       
  1434                             int d = *stack.currentFrame->args.subjectPtr;
       
  1435                             if (stack.currentFrame->locals.fc == d)
       
  1436                                 break;
       
  1437                             ++stack.currentFrame->args.subjectPtr;
       
  1438                         }
       
  1439                         for (;;) {
       
  1440                             RECURSIVE_MATCH(44, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
       
  1441                             if (isMatch)
       
  1442                                 RRETURN;
       
  1443                             if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
       
  1444                                 break;        /* Stop if tried at original pos */
       
  1445                         }
       
  1446 
       
  1447                         RRETURN;
       
  1448                     }
       
  1449                 }
       
  1450                 /* Control never reaches here */
       
  1451                 
       
  1452             /* Match a single character type repeatedly; several different opcodes
       
  1453              share code. This is very similar to the code for single characters, but we
       
  1454              repeat it in the interests of efficiency. */
       
  1455                 
       
  1456             BEGIN_OPCODE(TYPEEXACT):
       
  1457                 min = stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
       
  1458                 minimize = true;
       
  1459                 stack.currentFrame->args.instructionPtr += 3;
       
  1460                 goto REPEATTYPE;
       
  1461                 
       
  1462             BEGIN_OPCODE(TYPEUPTO):
       
  1463             BEGIN_OPCODE(TYPEMINUPTO):
       
  1464                 min = 0;
       
  1465                 stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
       
  1466                 minimize = *stack.currentFrame->args.instructionPtr == OP_TYPEMINUPTO;
       
  1467                 stack.currentFrame->args.instructionPtr += 3;
       
  1468                 goto REPEATTYPE;
       
  1469                 
       
  1470             BEGIN_OPCODE(TYPESTAR):
       
  1471             BEGIN_OPCODE(TYPEMINSTAR):
       
  1472             BEGIN_OPCODE(TYPEPLUS):
       
  1473             BEGIN_OPCODE(TYPEMINPLUS):
       
  1474             BEGIN_OPCODE(TYPEQUERY):
       
  1475             BEGIN_OPCODE(TYPEMINQUERY):
       
  1476                 repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_TYPESTAR, minimize, min, stack.currentFrame->locals.max);
       
  1477                 
       
  1478                 /* Common code for all repeated single character type matches. Note that
       
  1479                  in UTF-8 mode, '.' matches a character of any length, but for the other
       
  1480                  character types, the valid characters are all one-byte long. */
       
  1481                 
       
  1482             REPEATTYPE:
       
  1483                 stack.currentFrame->locals.ctype = *stack.currentFrame->args.instructionPtr++;      /* Code for the character type */
       
  1484                 
       
  1485                 /* First, ensure the minimum number of matches are present. Use inline
       
  1486                  code for maximizing the speed, and do the type test once at the start
       
  1487                  (i.e. keep it out of the loop). Also we can test that there are at least
       
  1488                  the minimum number of characters before we start. */
       
  1489                 
       
  1490                 if (min > md.endSubject - stack.currentFrame->args.subjectPtr)
       
  1491                     RRETURN_NO_MATCH;
       
  1492                 if (min > 0) {
       
  1493                     switch (stack.currentFrame->locals.ctype) {
       
  1494                         case OP_NOT_NEWLINE:
       
  1495                             for (int i = 1; i <= min; i++) {
       
  1496                                 if (isNewline(*stack.currentFrame->args.subjectPtr))
       
  1497                                     RRETURN_NO_MATCH;
       
  1498                                 ++stack.currentFrame->args.subjectPtr;
       
  1499                             }
       
  1500                             break;
       
  1501                             
       
  1502                         case OP_NOT_DIGIT:
       
  1503                             for (int i = 1; i <= min; i++) {
       
  1504                                 if (isASCIIDigit(*stack.currentFrame->args.subjectPtr))
       
  1505                                     RRETURN_NO_MATCH;
       
  1506                                 ++stack.currentFrame->args.subjectPtr;
       
  1507                             }
       
  1508                             break;
       
  1509                             
       
  1510                         case OP_DIGIT:
       
  1511                             for (int i = 1; i <= min; i++) {
       
  1512                                 if (!isASCIIDigit(*stack.currentFrame->args.subjectPtr))
       
  1513                                     RRETURN_NO_MATCH;
       
  1514                                 ++stack.currentFrame->args.subjectPtr;
       
  1515                             }
       
  1516                             break;
       
  1517                             
       
  1518                         case OP_NOT_WHITESPACE:
       
  1519                             for (int i = 1; i <= min; i++) {
       
  1520                                 if (isSpaceChar(*stack.currentFrame->args.subjectPtr))
       
  1521                                     RRETURN_NO_MATCH;
       
  1522                                 ++stack.currentFrame->args.subjectPtr;
       
  1523                             }
       
  1524                             break;
       
  1525                             
       
  1526                         case OP_WHITESPACE:
       
  1527                             for (int i = 1; i <= min; i++) {
       
  1528                                 if (!isSpaceChar(*stack.currentFrame->args.subjectPtr))
       
  1529                                     RRETURN_NO_MATCH;
       
  1530                                 ++stack.currentFrame->args.subjectPtr;
       
  1531                             }
       
  1532                             break;
       
  1533                             
       
  1534                         case OP_NOT_WORDCHAR:
       
  1535                             for (int i = 1; i <= min; i++) {
       
  1536                                 if (isWordChar(*stack.currentFrame->args.subjectPtr))
       
  1537                                     RRETURN_NO_MATCH;
       
  1538                                 ++stack.currentFrame->args.subjectPtr;
       
  1539                             }
       
  1540                             break;
       
  1541                             
       
  1542                         case OP_WORDCHAR:
       
  1543                             for (int i = 1; i <= min; i++) {
       
  1544                                 if (!isWordChar(*stack.currentFrame->args.subjectPtr))
       
  1545                                     RRETURN_NO_MATCH;
       
  1546                                 ++stack.currentFrame->args.subjectPtr;
       
  1547                             }
       
  1548                             break;
       
  1549                             
       
  1550                         default:
       
  1551                             ASSERT_NOT_REACHED();
       
  1552                             return matchError(JSRegExpErrorInternal, stack);
       
  1553                     }  /* End switch(stack.currentFrame->locals.ctype) */
       
  1554                 }
       
  1555                 
       
  1556                 /* If min = max, continue at the same level without recursing */
       
  1557                 
       
  1558                 if (min == stack.currentFrame->locals.max)
       
  1559                     NEXT_OPCODE;    
       
  1560                 
       
  1561                 /* If minimizing, we have to test the rest of the pattern before each
       
  1562                  subsequent match. */
       
  1563                 
       
  1564                 if (minimize) {
       
  1565                     for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
       
  1566                         RECURSIVE_MATCH(48, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
       
  1567                         if (isMatch)
       
  1568                             RRETURN;
       
  1569                         if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject)
       
  1570                             RRETURN;
       
  1571                         
       
  1572                         int c = *stack.currentFrame->args.subjectPtr++;
       
  1573                         switch (stack.currentFrame->locals.ctype) {
       
  1574                             case OP_NOT_NEWLINE:
       
  1575                                 if (isNewline(c))
       
  1576                                     RRETURN;
       
  1577                                 break;
       
  1578                                 
       
  1579                             case OP_NOT_DIGIT:
       
  1580                                 if (isASCIIDigit(c))
       
  1581                                     RRETURN;
       
  1582                                 break;
       
  1583                                 
       
  1584                             case OP_DIGIT:
       
  1585                                 if (!isASCIIDigit(c))
       
  1586                                     RRETURN;
       
  1587                                 break;
       
  1588                                 
       
  1589                             case OP_NOT_WHITESPACE:
       
  1590                                 if (isSpaceChar(c))
       
  1591                                     RRETURN;
       
  1592                                 break;
       
  1593                                 
       
  1594                             case OP_WHITESPACE:
       
  1595                                 if (!isSpaceChar(c))
       
  1596                                     RRETURN;
       
  1597                                 break;
       
  1598                                 
       
  1599                             case OP_NOT_WORDCHAR:
       
  1600                                 if (isWordChar(c))
       
  1601                                     RRETURN;
       
  1602                                 break;
       
  1603                                 
       
  1604                             case OP_WORDCHAR:
       
  1605                                 if (!isWordChar(c))
       
  1606                                     RRETURN;
       
  1607                                 break;
       
  1608                                 
       
  1609                             default:
       
  1610                                 ASSERT_NOT_REACHED();
       
  1611                                 return matchError(JSRegExpErrorInternal, stack);
       
  1612                         }
       
  1613                     }
       
  1614                     /* Control never reaches here */
       
  1615                 }
       
  1616                 
       
  1617                 /* If maximizing it is worth using inline code for speed, doing the type
       
  1618                  test once at the start (i.e. keep it out of the loop). */
       
  1619                 
       
  1620                 else {
       
  1621                     stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;  /* Remember where we started */
       
  1622                     
       
  1623                     switch (stack.currentFrame->locals.ctype) {
       
  1624                         case OP_NOT_NEWLINE:
       
  1625                             for (int i = min; i < stack.currentFrame->locals.max; i++) {
       
  1626                                 if (stack.currentFrame->args.subjectPtr >= md.endSubject || isNewline(*stack.currentFrame->args.subjectPtr))
       
  1627                                     break;
       
  1628                                 stack.currentFrame->args.subjectPtr++;
       
  1629                             }
       
  1630                             break;
       
  1631                             
       
  1632                         case OP_NOT_DIGIT:
       
  1633                             for (int i = min; i < stack.currentFrame->locals.max; i++) {
       
  1634                                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
       
  1635                                     break;
       
  1636                                 int c = *stack.currentFrame->args.subjectPtr;
       
  1637                                 if (isASCIIDigit(c))
       
  1638                                     break;
       
  1639                                 ++stack.currentFrame->args.subjectPtr;
       
  1640                             }
       
  1641                             break;
       
  1642                             
       
  1643                         case OP_DIGIT:
       
  1644                             for (int i = min; i < stack.currentFrame->locals.max; i++) {
       
  1645                                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
       
  1646                                     break;
       
  1647                                 int c = *stack.currentFrame->args.subjectPtr;
       
  1648                                 if (!isASCIIDigit(c))
       
  1649                                     break;
       
  1650                                 ++stack.currentFrame->args.subjectPtr;
       
  1651                             }
       
  1652                             break;
       
  1653                             
       
  1654                         case OP_NOT_WHITESPACE:
       
  1655                             for (int i = min; i < stack.currentFrame->locals.max; i++) {
       
  1656                                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
       
  1657                                     break;
       
  1658                                 int c = *stack.currentFrame->args.subjectPtr;
       
  1659                                 if (isSpaceChar(c))
       
  1660                                     break;
       
  1661                                 ++stack.currentFrame->args.subjectPtr;
       
  1662                             }
       
  1663                             break;
       
  1664                             
       
  1665                         case OP_WHITESPACE:
       
  1666                             for (int i = min; i < stack.currentFrame->locals.max; i++) {
       
  1667                                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
       
  1668                                     break;
       
  1669                                 int c = *stack.currentFrame->args.subjectPtr;
       
  1670                                 if (!isSpaceChar(c))
       
  1671                                     break;
       
  1672                                 ++stack.currentFrame->args.subjectPtr;
       
  1673                             }
       
  1674                             break;
       
  1675                             
       
  1676                         case OP_NOT_WORDCHAR:
       
  1677                             for (int i = min; i < stack.currentFrame->locals.max; i++) {
       
  1678                                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
       
  1679                                     break;
       
  1680                                 int c = *stack.currentFrame->args.subjectPtr;
       
  1681                                 if (isWordChar(c))
       
  1682                                     break;
       
  1683                                 ++stack.currentFrame->args.subjectPtr;
       
  1684                             }
       
  1685                             break;
       
  1686                             
       
  1687                         case OP_WORDCHAR:
       
  1688                             for (int i = min; i < stack.currentFrame->locals.max; i++) {
       
  1689                                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
       
  1690                                     break;
       
  1691                                 int c = *stack.currentFrame->args.subjectPtr;
       
  1692                                 if (!isWordChar(c))
       
  1693                                     break;
       
  1694                                 ++stack.currentFrame->args.subjectPtr;
       
  1695                             }
       
  1696                             break;
       
  1697                             
       
  1698                         default:
       
  1699                             ASSERT_NOT_REACHED();
       
  1700                             return matchError(JSRegExpErrorInternal, stack);
       
  1701                     }
       
  1702                     
       
  1703                     /* stack.currentFrame->args.subjectPtr is now past the end of the maximum run */
       
  1704                     
       
  1705                     for (;;) {
       
  1706                         RECURSIVE_MATCH(52, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
       
  1707                         if (isMatch)
       
  1708                             RRETURN;
       
  1709                         if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
       
  1710                             break;        /* Stop if tried at original pos */
       
  1711                     }
       
  1712                     
       
  1713                     /* Get here if we can't make it match with any permitted repetitions */
       
  1714                     
       
  1715                     RRETURN;
       
  1716                 }
       
  1717                 /* Control never reaches here */
       
  1718                 
       
  1719             BEGIN_OPCODE(CRMINPLUS):
       
  1720             BEGIN_OPCODE(CRMINQUERY):
       
  1721             BEGIN_OPCODE(CRMINRANGE):
       
  1722             BEGIN_OPCODE(CRMINSTAR):
       
  1723             BEGIN_OPCODE(CRPLUS):
       
  1724             BEGIN_OPCODE(CRQUERY):
       
  1725             BEGIN_OPCODE(CRRANGE):
       
  1726             BEGIN_OPCODE(CRSTAR):
       
  1727                 ASSERT_NOT_REACHED();
       
  1728                 return matchError(JSRegExpErrorInternal, stack);
       
  1729                 
       
  1730 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
       
  1731             CAPTURING_BRACKET:
       
  1732 #else
       
  1733             default:
       
  1734 #endif
       
  1735                 /* Opening capturing bracket. If there is space in the offset vector, save
       
  1736                  the current subject position in the working slot at the top of the vector. We
       
  1737                  mustn't change the current values of the data slot, because they may be set
       
  1738                  from a previous iteration of this group, and be referred to by a reference
       
  1739                  inside the group.
       
  1740                  
       
  1741                  If the bracket fails to match, we need to restore this value and also the
       
  1742                  values of the final offsets, in case they were set by a previous iteration of
       
  1743                  the same bracket.
       
  1744                  
       
  1745                  If there isn't enough space in the offset vector, treat this as if it were a
       
  1746                  non-capturing bracket. Don't worry about setting the flag for the error case
       
  1747                  here; that is handled in the code for KET. */
       
  1748                 
       
  1749                 ASSERT(*stack.currentFrame->args.instructionPtr > OP_BRA);
       
  1750                 
       
  1751                 stack.currentFrame->locals.number = *stack.currentFrame->args.instructionPtr - OP_BRA;
       
  1752                 
       
  1753                 /* For extended extraction brackets (large number), we have to fish out the
       
  1754                  number from a dummy opcode at the start. */
       
  1755                 
       
  1756                 if (stack.currentFrame->locals.number > EXTRACT_BASIC_MAX)
       
  1757                     stack.currentFrame->locals.number = get2ByteValue(stack.currentFrame->args.instructionPtr + 2 + LINK_SIZE);
       
  1758                 stack.currentFrame->locals.offset = stack.currentFrame->locals.number << 1;
       
  1759                 
       
  1760 #ifdef DEBUG
       
  1761                 printf("start bracket %d subject=", stack.currentFrame->locals.number);
       
  1762                 pchars(stack.currentFrame->args.subjectPtr, 16, true, md);
       
  1763                 printf("\n");
       
  1764 #endif
       
  1765                 
       
  1766                 if (stack.currentFrame->locals.offset < md.offsetMax) {
       
  1767                     stack.currentFrame->locals.saveOffset1 = md.offsetVector[stack.currentFrame->locals.offset];
       
  1768                     stack.currentFrame->locals.saveOffset2 = md.offsetVector[stack.currentFrame->locals.offset + 1];
       
  1769                     stack.currentFrame->locals.saveOffset3 = md.offsetVector[md.offsetEnd - stack.currentFrame->locals.number];
       
  1770                     
       
  1771                     DPRINTF(("saving %d %d %d\n", stack.currentFrame->locals.saveOffset1, stack.currentFrame->locals.saveOffset2, stack.currentFrame->locals.saveOffset3));
       
  1772                     md.offsetVector[md.offsetEnd - stack.currentFrame->locals.number] = stack.currentFrame->args.subjectPtr - md.startSubject;
       
  1773                     
       
  1774                     do {
       
  1775                         RECURSIVE_MATCH_NEW_GROUP(1, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain);
       
  1776                         if (isMatch)
       
  1777                             RRETURN;
       
  1778                         stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1);
       
  1779                     } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
       
  1780                     
       
  1781                     DPRINTF(("bracket %d failed\n", stack.currentFrame->locals.number));
       
  1782                     
       
  1783                     md.offsetVector[stack.currentFrame->locals.offset] = stack.currentFrame->locals.saveOffset1;
       
  1784                     md.offsetVector[stack.currentFrame->locals.offset + 1] = stack.currentFrame->locals.saveOffset2;
       
  1785                     md.offsetVector[md.offsetEnd - stack.currentFrame->locals.number] = stack.currentFrame->locals.saveOffset3;
       
  1786                     
       
  1787                     RRETURN;
       
  1788                 }
       
  1789                 
       
  1790                 /* Insufficient room for saving captured contents */
       
  1791                 
       
  1792                 goto NON_CAPTURING_BRACKET;
       
  1793         }
       
  1794         
       
  1795         /* Do not stick any code in here without much thought; it is assumed
       
  1796          that "continue" in the code above comes out to here to repeat the main
       
  1797          loop. */
       
  1798         
       
  1799     } /* End of main loop */
       
  1800     
       
  1801     ASSERT_NOT_REACHED();
       
  1802     
       
  1803 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
       
  1804     
       
  1805 RRETURN_SWITCH:
       
  1806     switch (stack.currentFrame->returnLocation) {
       
  1807         case 0: goto RETURN;
       
  1808         case 1: goto RRETURN_1;
       
  1809         case 2: goto RRETURN_2;
       
  1810         case 6: goto RRETURN_6;
       
  1811         case 7: goto RRETURN_7;
       
  1812         case 14: goto RRETURN_14;
       
  1813         case 15: goto RRETURN_15;
       
  1814         case 16: goto RRETURN_16;
       
  1815         case 17: goto RRETURN_17;
       
  1816         case 18: goto RRETURN_18;
       
  1817         case 19: goto RRETURN_19;
       
  1818         case 20: goto RRETURN_20;
       
  1819         case 21: goto RRETURN_21;
       
  1820         case 22: goto RRETURN_22;
       
  1821         case 24: goto RRETURN_24;
       
  1822         case 26: goto RRETURN_26;
       
  1823         case 27: goto RRETURN_27;
       
  1824         case 28: goto RRETURN_28;
       
  1825         case 29: goto RRETURN_29;
       
  1826         case 30: goto RRETURN_30;
       
  1827         case 31: goto RRETURN_31;
       
  1828         case 38: goto RRETURN_38;
       
  1829         case 40: goto RRETURN_40;
       
  1830         case 42: goto RRETURN_42;
       
  1831         case 44: goto RRETURN_44;
       
  1832         case 48: goto RRETURN_48;
       
  1833         case 52: goto RRETURN_52;
       
  1834     }
       
  1835     
       
  1836     ASSERT_NOT_REACHED();
       
  1837     return matchError(JSRegExpErrorInternal, stack);
       
  1838     
       
  1839 #endif
       
  1840     
       
  1841 RETURN:
       
  1842     return isMatch;
       
  1843 }
       
  1844 
       
  1845 
       
  1846 /*************************************************
       
  1847 *         Execute a Regular Expression           *
       
  1848 *************************************************/
       
  1849 
       
  1850 /* This function applies a compiled re to a subject string and picks out
       
  1851 portions of the string if it matches. Two elements in the vector are set for
       
  1852 each substring: the offsets to the start and end of the substring.
       
  1853 
       
  1854 Arguments:
       
  1855   re              points to the compiled expression
       
  1856   extra_data      points to extra data or is NULL
       
  1857   subject         points to the subject string
       
  1858   length          length of subject string (may contain binary zeros)
       
  1859   start_offset    where to start in the subject string
       
  1860   options         option bits
       
  1861   offsets         points to a vector of ints to be filled in with offsets
       
  1862   offsetCount     the number of elements in the vector
       
  1863 
       
  1864 Returns:          > 0 => success; value is the number of elements filled in
       
  1865                   = 0 => success, but offsets is not big enough
       
  1866                    -1 => failed to match
       
  1867                  < -1 => some kind of unexpected problem
       
  1868 */
       
  1869 
       
  1870 static void tryFirstByteOptimization(const UChar*& subjectPtr, const UChar* endSubject, int firstByte, bool firstByteIsCaseless, bool useMultiLineFirstCharOptimization, const UChar* originalSubjectStart)
       
  1871 {
       
  1872     // If firstByte is set, try scanning to the first instance of that byte
       
  1873     // no need to try and match against any earlier part of the subject string.
       
  1874     if (firstByte >= 0) {
       
  1875         UChar firstChar = firstByte;
       
  1876         if (firstByteIsCaseless)
       
  1877             while (subjectPtr < endSubject) {
       
  1878                 int c = *subjectPtr;
       
  1879                 if (c > 127)
       
  1880                     break;
       
  1881                 if (toLowerCase(c) == firstChar)
       
  1882                     break;
       
  1883                 subjectPtr++;
       
  1884             }
       
  1885         else {
       
  1886             while (subjectPtr < endSubject && *subjectPtr != firstChar)
       
  1887                 subjectPtr++;
       
  1888         }
       
  1889     } else if (useMultiLineFirstCharOptimization) {
       
  1890         /* Or to just after \n for a multiline match if possible */
       
  1891         // I'm not sure why this != originalSubjectStart check is necessary -- ecs 11/18/07
       
  1892         if (subjectPtr > originalSubjectStart) {
       
  1893             while (subjectPtr < endSubject && !isNewline(subjectPtr[-1]))
       
  1894                 subjectPtr++;
       
  1895         }
       
  1896     }
       
  1897 }
       
  1898 
       
  1899 static bool tryRequiredByteOptimization(const UChar*& subjectPtr, const UChar* endSubject, int reqByte, int reqByte2, bool reqByteIsCaseless, bool hasFirstByte, const UChar*& reqBytePtr)
       
  1900 {
       
  1901     /* If reqByte is set, we know that that character must appear in the subject
       
  1902      for the match to succeed. If the first character is set, reqByte must be
       
  1903      later in the subject; otherwise the test starts at the match point. This
       
  1904      optimization can save a huge amount of backtracking in patterns with nested
       
  1905      unlimited repeats that aren't going to match. Writing separate code for
       
  1906      cased/caseless versions makes it go faster, as does using an autoincrement
       
  1907      and backing off on a match.
       
  1908      
       
  1909      HOWEVER: when the subject string is very, very long, searching to its end can
       
  1910      take a long time, and give bad performance on quite ordinary patterns. This
       
  1911      showed up when somebody was matching /^C/ on a 32-megabyte string... so we
       
  1912      don't do this when the string is sufficiently long.
       
  1913     */
       
  1914 
       
  1915     if (reqByte >= 0 && endSubject - subjectPtr < REQ_BYTE_MAX) {
       
  1916         const UChar* p = subjectPtr + (hasFirstByte ? 1 : 0);
       
  1917 
       
  1918         /* We don't need to repeat the search if we haven't yet reached the
       
  1919          place we found it at last time. */
       
  1920 
       
  1921         if (p > reqBytePtr) {
       
  1922             if (reqByteIsCaseless) {
       
  1923                 while (p < endSubject) {
       
  1924                     int pp = *p++;
       
  1925                     if (pp == reqByte || pp == reqByte2) {
       
  1926                         p--;
       
  1927                         break;
       
  1928                     }
       
  1929                 }
       
  1930             } else {
       
  1931                 while (p < endSubject) {
       
  1932                     if (*p++ == reqByte) {
       
  1933                         p--;
       
  1934                         break;
       
  1935                     }
       
  1936                 }
       
  1937             }
       
  1938 
       
  1939             /* If we can't find the required character, break the matching loop */
       
  1940 
       
  1941             if (p >= endSubject)
       
  1942                 return true;
       
  1943 
       
  1944             /* If we have found the required character, save the point where we
       
  1945              found it, so that we don't search again next time round the loop if
       
  1946              the start hasn't passed this character yet. */
       
  1947 
       
  1948             reqBytePtr = p;
       
  1949         }
       
  1950     }
       
  1951     return false;
       
  1952 }
       
  1953 
       
  1954 int jsRegExpExecute(const JSRegExp* re,
       
  1955                     const UChar* subject, int length, int start_offset, int* offsets,
       
  1956                     int offsetCount)
       
  1957 {
       
  1958     ASSERT(re);
       
  1959     ASSERT(subject || !length);
       
  1960     ASSERT(offsetCount >= 0);
       
  1961     ASSERT(offsets || offsetCount == 0);
       
  1962 
       
  1963     HistogramTimeLogger logger(re);
       
  1964 
       
  1965     MatchData matchBlock;
       
  1966     matchBlock.startSubject = subject;
       
  1967     matchBlock.endSubject = matchBlock.startSubject + length;
       
  1968     const UChar* endSubject = matchBlock.endSubject;
       
  1969     
       
  1970     matchBlock.multiline = (re->options & MatchAcrossMultipleLinesOption);
       
  1971     matchBlock.ignoreCase = (re->options & IgnoreCaseOption);
       
  1972     
       
  1973     /* If the expression has got more back references than the offsets supplied can
       
  1974      hold, we get a temporary chunk of working store to use during the matching.
       
  1975      Otherwise, we can use the vector supplied, rounding down its size to a multiple
       
  1976      of 3. */
       
  1977     
       
  1978     int ocount = offsetCount - (offsetCount % 3);
       
  1979     
       
  1980     // FIXME: This is lame that we have to second-guess our caller here.
       
  1981     // The API should change to either fail-hard when we don't have enough offset space
       
  1982     // or that we shouldn't ask our callers to pre-allocate in the first place.
       
  1983     bool usingTemporaryOffsets = false;
       
  1984     if (re->topBackref > 0 && re->topBackref >= ocount/3) {
       
  1985         ocount = re->topBackref * 3 + 3;
       
  1986         matchBlock.offsetVector = new int[ocount];
       
  1987         if (!matchBlock.offsetVector)
       
  1988             return JSRegExpErrorNoMemory;
       
  1989         usingTemporaryOffsets = true;
       
  1990     } else
       
  1991         matchBlock.offsetVector = offsets;
       
  1992     
       
  1993     matchBlock.offsetEnd = ocount;
       
  1994     matchBlock.offsetMax = (2*ocount)/3;
       
  1995     matchBlock.offsetOverflow = false;
       
  1996     
       
  1997     /* Compute the minimum number of offsets that we need to reset each time. Doing
       
  1998      this makes a huge difference to execution time when there aren't many brackets
       
  1999      in the pattern. */
       
  2000     
       
  2001     int resetCount = 2 + re->topBracket * 2;
       
  2002     if (resetCount > offsetCount)
       
  2003         resetCount = ocount;
       
  2004     
       
  2005     /* Reset the working variable associated with each extraction. These should
       
  2006      never be used unless previously set, but they get saved and restored, and so we
       
  2007      initialize them to avoid reading uninitialized locations. */
       
  2008     
       
  2009     if (matchBlock.offsetVector) {
       
  2010         int* iptr = matchBlock.offsetVector + ocount;
       
  2011         int* iend = iptr - resetCount/2 + 1;
       
  2012         while (--iptr >= iend)
       
  2013             *iptr = -1;
       
  2014     }
       
  2015     
       
  2016     /* Set up the first character to match, if available. The firstByte value is
       
  2017      never set for an anchored regular expression, but the anchoring may be forced
       
  2018      at run time, so we have to test for anchoring. The first char may be unset for
       
  2019      an unanchored pattern, of course. If there's no first char and the pattern was
       
  2020      studied, there may be a bitmap of possible first characters. */
       
  2021     
       
  2022     bool firstByteIsCaseless = false;
       
  2023     int firstByte = -1;
       
  2024     if (re->options & UseFirstByteOptimizationOption) {
       
  2025         firstByte = re->firstByte & 255;
       
  2026         if ((firstByteIsCaseless = (re->firstByte & REQ_IGNORE_CASE)))
       
  2027             firstByte = toLowerCase(firstByte);
       
  2028     }
       
  2029     
       
  2030     /* For anchored or unanchored matches, there may be a "last known required
       
  2031      character" set. */
       
  2032     
       
  2033     bool reqByteIsCaseless = false;
       
  2034     int reqByte = -1;
       
  2035     int reqByte2 = -1;
       
  2036     if (re->options & UseRequiredByteOptimizationOption) {
       
  2037         reqByte = re->reqByte & 255; // FIXME: This optimization could be made to work for UTF16 chars as well...
       
  2038         reqByteIsCaseless = (re->reqByte & REQ_IGNORE_CASE);
       
  2039         reqByte2 = flipCase(reqByte);
       
  2040     }
       
  2041     
       
  2042     /* Loop for handling unanchored repeated matching attempts; for anchored regexs
       
  2043      the loop runs just once. */
       
  2044     
       
  2045     const UChar* startMatch = subject + start_offset;
       
  2046     const UChar* reqBytePtr = startMatch - 1;
       
  2047     bool useMultiLineFirstCharOptimization = re->options & UseMultiLineFirstByteOptimizationOption;
       
  2048     
       
  2049     do {
       
  2050         /* Reset the maximum number of extractions we might see. */
       
  2051         if (matchBlock.offsetVector) {
       
  2052             int* iptr = matchBlock.offsetVector;
       
  2053             int* iend = iptr + resetCount;
       
  2054             while (iptr < iend)
       
  2055                 *iptr++ = -1;
       
  2056         }
       
  2057         
       
  2058         tryFirstByteOptimization(startMatch, endSubject, firstByte, firstByteIsCaseless, useMultiLineFirstCharOptimization, matchBlock.startSubject + start_offset);
       
  2059         if (tryRequiredByteOptimization(startMatch, endSubject, reqByte, reqByte2, reqByteIsCaseless, firstByte >= 0, reqBytePtr))
       
  2060             break;
       
  2061                 
       
  2062         /* When a match occurs, substrings will be set for all internal extractions;
       
  2063          we just need to set up the whole thing as substring 0 before returning. If
       
  2064          there were too many extractions, set the return code to zero. In the case
       
  2065          where we had to get some local store to hold offsets for backreferences, copy
       
  2066          those back references that we can. In this case there need not be overflow
       
  2067          if certain parts of the pattern were not used. */
       
  2068         
       
  2069         /* The code starts after the JSRegExp block and the capture name table. */
       
  2070         const unsigned char* start_code = (const unsigned char*)(re + 1);
       
  2071         
       
  2072         int returnCode = match(startMatch, start_code, 2, matchBlock);
       
  2073         
       
  2074         /* When the result is no match, advance the pointer to the next character
       
  2075          and continue. */
       
  2076         if (returnCode == 0) {
       
  2077             startMatch++;
       
  2078             continue;
       
  2079         }
       
  2080 
       
  2081         if (returnCode != 1) {
       
  2082             ASSERT(returnCode == JSRegExpErrorHitLimit || returnCode == JSRegExpErrorNoMemory);
       
  2083             DPRINTF((">>>> error: returning %d\n", returnCode));
       
  2084             return returnCode;
       
  2085         }
       
  2086         
       
  2087         /* We have a match! Copy the offset information from temporary store if
       
  2088          necessary */
       
  2089         
       
  2090         if (usingTemporaryOffsets) {
       
  2091             if (offsetCount >= 4) {
       
  2092                 memcpy(offsets + 2, matchBlock.offsetVector + 2, (offsetCount - 2) * sizeof(int));
       
  2093                 DPRINTF(("Copied offsets from temporary memory\n"));
       
  2094             }
       
  2095             if (matchBlock.endOffsetTop > offsetCount)
       
  2096                 matchBlock.offsetOverflow = true;
       
  2097             
       
  2098             DPRINTF(("Freeing temporary memory\n"));
       
  2099             delete [] matchBlock.offsetVector;
       
  2100         }
       
  2101         
       
  2102         returnCode = matchBlock.offsetOverflow ? 0 : matchBlock.endOffsetTop / 2;
       
  2103         
       
  2104         if (offsetCount < 2)
       
  2105             returnCode = 0;
       
  2106         else {
       
  2107             offsets[0] = startMatch - matchBlock.startSubject;
       
  2108             offsets[1] = matchBlock.endMatchPtr - matchBlock.startSubject;
       
  2109         }
       
  2110         
       
  2111         DPRINTF((">>>> returning %d\n", returnCode));
       
  2112         return returnCode;
       
  2113     } while (!(re->options & IsAnchoredOption) && startMatch <= endSubject);
       
  2114     
       
  2115     if (usingTemporaryOffsets) {
       
  2116         DPRINTF(("Freeing temporary memory\n"));
       
  2117         delete [] matchBlock.offsetVector;
       
  2118     }
       
  2119     
       
  2120     DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
       
  2121     return JSRegExpErrorNoMatch;
       
  2122 }
       
  2123 
       
  2124 #if REGEXP_HISTOGRAM
       
  2125 
       
  2126 class CompareHistogramEntries {
       
  2127 public:
       
  2128     bool operator()(const pair<UString, double>& a, const pair<UString, double>& b)
       
  2129     {
       
  2130         if (a.second == b.second)
       
  2131             return a.first < b.first;
       
  2132         return a.second < b.second;
       
  2133     }
       
  2134 };
       
  2135 
       
  2136 Histogram::~Histogram()
       
  2137 {
       
  2138     Vector<pair<UString, double> > values;
       
  2139     Map::iterator end = times.end();
       
  2140     for (Map::iterator it = times.begin(); it != end; ++it)
       
  2141         values.append(*it);
       
  2142     sort(values.begin(), values.end(), CompareHistogramEntries());
       
  2143     size_t size = values.size();
       
  2144     printf("Regular Expressions, sorted by time spent evaluating them:\n");
       
  2145     for (size_t i = 0; i < size; ++i)
       
  2146         printf("    %f - %s\n", values[size - i - 1].second, values[size - i - 1].first.UTF8String().c_str());
       
  2147 }
       
  2148 
       
  2149 void Histogram::add(const JSRegExp* re, double elapsedTime)
       
  2150 {
       
  2151     UString string(reinterpret_cast<const UChar*>(reinterpret_cast<const char*>(re) + re->stringOffset), re->stringLength);
       
  2152     if (re->options & IgnoreCaseOption && re->options & MatchAcrossMultipleLinesOption)
       
  2153         string += " (multi-line, ignore case)";
       
  2154     else {
       
  2155         if (re->options & IgnoreCaseOption)
       
  2156             string += " (ignore case)";
       
  2157         if (re->options & MatchAcrossMultipleLinesOption)
       
  2158             string += " (multi-line)";
       
  2159     }
       
  2160     pair<Map::iterator, bool> result = times.add(string.rep(), elapsedTime);
       
  2161     if (!result.second)
       
  2162         result.first->second += elapsedTime;
       
  2163 }
       
  2164 
       
  2165 HistogramTimeLogger::HistogramTimeLogger(const JSRegExp* re)
       
  2166     : m_re(re)
       
  2167     , m_startTime(currentTimeMS())
       
  2168 {
       
  2169 }
       
  2170 
       
  2171 HistogramTimeLogger::~HistogramTimeLogger()
       
  2172 {
       
  2173     static Histogram histogram;
       
  2174     histogram.add(m_re, currentTimeMS() - m_startTime);
       
  2175 }
       
  2176 
       
  2177 #endif