JavaScriptCore/runtime/RegExp.cpp
changeset 0 4f2f89ce4247
equal deleted inserted replaced
-1:000000000000 0:4f2f89ce4247
       
     1 /*
       
     2  *  Copyright (C) 1999-2001, 2004 Harri Porten (porten@kde.org)
       
     3  *  Copyright (c) 2007, 2008 Apple Inc. All rights reserved.
       
     4  *  Copyright (C) 2009 Torch Mobile, Inc.
       
     5  *
       
     6  *  This library is free software; you can redistribute it and/or
       
     7  *  modify it under the terms of the GNU Lesser General Public
       
     8  *  License as published by the Free Software Foundation; either
       
     9  *  version 2 of the License, or (at your option) any later version.
       
    10  *
       
    11  *  This library is distributed in the hope that it will be useful,
       
    12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
       
    13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
       
    14  *  Lesser General Public License for more details.
       
    15  *
       
    16  *  You should have received a copy of the GNU Lesser General Public
       
    17  *  License along with this library; if not, write to the Free Software
       
    18  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
       
    19  *
       
    20  */
       
    21 
       
    22 #include "config.h"
       
    23 #include "RegExp.h"
       
    24 #include "Lexer.h"
       
    25 #include <stdio.h>
       
    26 #include <stdlib.h>
       
    27 #include <string.h>
       
    28 #include <wtf/Assertions.h>
       
    29 #include <wtf/OwnArrayPtr.h>
       
    30 
       
    31 
       
    32 #if ENABLE(YARR)
       
    33 
       
    34 #include "yarr/RegexCompiler.h"
       
    35 #if ENABLE(YARR_JIT)
       
    36 #include "yarr/RegexJIT.h"
       
    37 #else
       
    38 #include "yarr/RegexInterpreter.h"
       
    39 #endif
       
    40 
       
    41 #else
       
    42 
       
    43 #include <pcre/pcre.h>
       
    44 
       
    45 #endif
       
    46 
       
    47 namespace JSC {
       
    48 
       
    49 inline RegExp::RegExp(JSGlobalData* globalData, const UString& pattern, const UString& flags)
       
    50     : m_pattern(pattern)
       
    51     , m_flagBits(0)
       
    52     , m_constructionError(0)
       
    53     , m_numSubpatterns(0)
       
    54     , m_lastMatchStart(-1)
       
    55 {
       
    56     // NOTE: The global flag is handled on a case-by-case basis by functions like
       
    57     // String::match and RegExpObject::match.
       
    58     if (!flags.isNull()) {
       
    59         if (flags.find('g') != UString::NotFound)
       
    60             m_flagBits |= Global;
       
    61         if (flags.find('i') != UString::NotFound)
       
    62             m_flagBits |= IgnoreCase;
       
    63         if (flags.find('m') != UString::NotFound)
       
    64             m_flagBits |= Multiline;
       
    65     }
       
    66     compile(globalData);
       
    67 }
       
    68 
       
    69 #if !ENABLE(YARR)
       
    70 RegExp::~RegExp()
       
    71 {
       
    72     jsRegExpFree(m_regExp);
       
    73 }
       
    74 #endif
       
    75 
       
    76 PassRefPtr<RegExp> RegExp::create(JSGlobalData* globalData, const UString& pattern, const UString& flags)
       
    77 {
       
    78     return adoptRef(new RegExp(globalData, pattern, flags));
       
    79 }
       
    80 
       
    81 #if ENABLE(YARR)
       
    82 
       
    83 void RegExp::compile(JSGlobalData* globalData)
       
    84 {
       
    85 #if ENABLE(YARR_JIT)
       
    86     Yarr::jitCompileRegex(globalData, m_regExpJITCode, m_pattern, m_numSubpatterns, m_constructionError, ignoreCase(), multiline());
       
    87 #else
       
    88     UNUSED_PARAM(globalData);
       
    89     m_regExpBytecode.set(Yarr::byteCompileRegex(m_pattern, m_numSubpatterns, m_constructionError, ignoreCase(), multiline()));
       
    90 #endif
       
    91 }
       
    92 
       
    93 int RegExp::match(const UString& s, int startOffset, Vector<int, 32>* ovector)
       
    94 {
       
    95     if (startOffset < 0)
       
    96         startOffset = 0;
       
    97     if (ovector)
       
    98         ovector->resize(0);
       
    99 
       
   100     if (static_cast<unsigned>(startOffset) > s.size() || s.isNull()) {
       
   101         m_lastMatchString = UString();
       
   102         m_lastMatchStart = -1;
       
   103         m_lastOVector.shrink(0);
       
   104         return -1;
       
   105     }
       
   106     
       
   107     // Perform check to see if this match call is the same as the last match invocation
       
   108     // and if it is return the prior result.
       
   109     if ((startOffset == m_lastMatchStart) && (s.rep() == m_lastMatchString.rep())) {
       
   110         if (ovector)
       
   111             *ovector = m_lastOVector;
       
   112         
       
   113         if (m_lastOVector.isEmpty())
       
   114             return -1;
       
   115 
       
   116         return m_lastOVector.at(0);
       
   117     }
       
   118 
       
   119 #if ENABLE(YARR_JIT)
       
   120     if (!!m_regExpJITCode) {
       
   121 #else
       
   122     if (m_regExpBytecode) {
       
   123 #endif
       
   124         int offsetVectorSize = (m_numSubpatterns + 1) * 3; // FIXME: should be 2 - but adding temporary fallback to pcre.
       
   125         int* offsetVector;
       
   126         Vector<int, 32> nonReturnedOvector;
       
   127         if (ovector) {
       
   128             ovector->resize(offsetVectorSize);
       
   129             offsetVector = ovector->data();
       
   130         } else {
       
   131             nonReturnedOvector.resize(offsetVectorSize);
       
   132             offsetVector = nonReturnedOvector.data();
       
   133         }
       
   134 
       
   135         ASSERT(offsetVector);
       
   136         for (int j = 0; j < offsetVectorSize; ++j)
       
   137             offsetVector[j] = -1;
       
   138 
       
   139 #if ENABLE(YARR_JIT)
       
   140         int result = Yarr::executeRegex(m_regExpJITCode, s.data(), startOffset, s.size(), offsetVector, offsetVectorSize);
       
   141 #else
       
   142         int result = Yarr::interpretRegex(m_regExpBytecode.get(), s.data(), startOffset, s.size(), offsetVector);
       
   143 #endif
       
   144 
       
   145         if (result < 0) {
       
   146 #ifndef NDEBUG
       
   147             // TODO: define up a symbol, rather than magic -1
       
   148             if (result != -1)
       
   149                 fprintf(stderr, "jsRegExpExecute failed with result %d\n", result);
       
   150 #endif
       
   151             if (ovector)
       
   152                 ovector->clear();
       
   153         }
       
   154         
       
   155         m_lastMatchString = s;
       
   156         m_lastMatchStart = startOffset;
       
   157 
       
   158         if (ovector)
       
   159             m_lastOVector = *ovector;
       
   160         else
       
   161             m_lastOVector = nonReturnedOvector;
       
   162 
       
   163         return result;
       
   164     }
       
   165 
       
   166     m_lastMatchString = UString();
       
   167     m_lastMatchStart = -1;
       
   168     m_lastOVector.shrink(0);
       
   169 
       
   170     return -1;
       
   171 }
       
   172 
       
   173 #else
       
   174 
       
   175 void RegExp::compile(JSGlobalData*)
       
   176 {
       
   177     m_regExp = 0;
       
   178     JSRegExpIgnoreCaseOption ignoreCaseOption = ignoreCase() ? JSRegExpIgnoreCase : JSRegExpDoNotIgnoreCase;
       
   179     JSRegExpMultilineOption multilineOption = multiline() ? JSRegExpMultiline : JSRegExpSingleLine;
       
   180     m_regExp = jsRegExpCompile(reinterpret_cast<const UChar*>(m_pattern.data()), m_pattern.size(), ignoreCaseOption, multilineOption, &m_numSubpatterns, &m_constructionError);
       
   181 }
       
   182 
       
   183 int RegExp::match(const UString& s, int startOffset, Vector<int, 32>* ovector)
       
   184 {
       
   185     if (startOffset < 0)
       
   186         startOffset = 0;
       
   187     if (ovector)
       
   188         ovector->clear();
       
   189 
       
   190     if (static_cast<unsigned>(startOffset) > s.size() || s.isNull())
       
   191         return -1;
       
   192 
       
   193     if (m_regExp) {
       
   194         // Set up the offset vector for the result.
       
   195         // First 2/3 used for result, the last third used by PCRE.
       
   196         int* offsetVector;
       
   197         int offsetVectorSize;
       
   198         int fixedSizeOffsetVector[3];
       
   199         if (!ovector) {
       
   200             offsetVectorSize = 3;
       
   201             offsetVector = fixedSizeOffsetVector;
       
   202         } else {
       
   203             offsetVectorSize = (m_numSubpatterns + 1) * 3;
       
   204             ovector->resize(offsetVectorSize);
       
   205             offsetVector = ovector->data();
       
   206         }
       
   207 
       
   208         int numMatches = jsRegExpExecute(m_regExp, reinterpret_cast<const UChar*>(s.data()), s.size(), startOffset, offsetVector, offsetVectorSize);
       
   209     
       
   210         if (numMatches < 0) {
       
   211 #ifndef NDEBUG
       
   212             if (numMatches != JSRegExpErrorNoMatch)
       
   213                 fprintf(stderr, "jsRegExpExecute failed with result %d\n", numMatches);
       
   214 #endif
       
   215             if (ovector)
       
   216                 ovector->clear();
       
   217             return -1;
       
   218         }
       
   219 
       
   220         return offsetVector[0];
       
   221     }
       
   222 
       
   223     return -1;
       
   224 }
       
   225 
       
   226 #endif
       
   227 
       
   228 } // namespace JSC