WebCore/xml/XPathParser.cpp
changeset 0 4f2f89ce4247
equal deleted inserted replaced
-1:000000000000 0:4f2f89ce4247
       
     1 /*
       
     2  * Copyright 2005 Maksim Orlovich <maksim@kde.org>
       
     3  * Copyright (C) 2006 Apple Computer, Inc.
       
     4  * Copyright (C) 2007 Alexey Proskuryakov <ap@webkit.org>
       
     5  *
       
     6  * Redistribution and use in source and binary forms, with or without
       
     7  * modification, are permitted provided that the following conditions
       
     8  * are met:
       
     9  * 
       
    10  * 1. Redistributions of source code must retain the above copyright
       
    11  *    notice, this list of conditions and the following disclaimer.
       
    12  * 2. Redistributions in binary form must reproduce the above copyright
       
    13  *    notice, this list of conditions and the following disclaimer in the
       
    14  *    documentation and/or other materials provided with the distribution.
       
    15  * 
       
    16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
       
    17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
       
    18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
       
    19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
       
    20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
       
    21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
       
    22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
       
    23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
       
    24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
       
    25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
       
    26  */
       
    27 
       
    28 #include "config.h"
       
    29 #include "XPathParser.h"
       
    30 
       
    31 #if ENABLE(XPATH)
       
    32 
       
    33 #include "ExceptionCode.h"
       
    34 #include "StringHash.h"
       
    35 #include "XPathEvaluator.h"
       
    36 #include "XPathException.h"
       
    37 #include "XPathNSResolver.h"
       
    38 #include "XPathStep.h"
       
    39 #include <wtf/StdLibExtras.h>
       
    40 
       
    41 int xpathyyparse(void*);
       
    42 
       
    43 using namespace WTF;
       
    44 using namespace Unicode;
       
    45 
       
    46 namespace WebCore {
       
    47 namespace XPath {
       
    48 
       
    49 class LocationPath;
       
    50 
       
    51 #include "XPathGrammar.h"    
       
    52 
       
    53 Parser* Parser::currentParser = 0;
       
    54     
       
    55 enum XMLCat { NameStart, NameCont, NotPartOfName };
       
    56 
       
    57 typedef HashMap<String, Step::Axis> AxisNamesMap;
       
    58 
       
    59 static XMLCat charCat(UChar aChar)
       
    60 {
       
    61     //### might need to add some special cases from the XML spec.
       
    62 
       
    63     if (aChar == '_')
       
    64         return NameStart;
       
    65 
       
    66     if (aChar == '.' || aChar == '-')
       
    67         return NameCont;
       
    68     CharCategory category = Unicode::category(aChar);
       
    69     if (category & (Letter_Uppercase | Letter_Lowercase | Letter_Other | Letter_Titlecase | Number_Letter))
       
    70         return NameStart;
       
    71     if (category & (Mark_NonSpacing | Mark_SpacingCombining | Mark_Enclosing | Letter_Modifier | Number_DecimalDigit))
       
    72         return NameCont;
       
    73     return NotPartOfName;
       
    74 }
       
    75 
       
    76 static void setUpAxisNamesMap(AxisNamesMap& axisNames)
       
    77 {
       
    78     struct AxisName {
       
    79         const char* name;
       
    80         Step::Axis axis;
       
    81     };
       
    82     const AxisName axisNameList[] = {
       
    83         { "ancestor", Step::AncestorAxis },
       
    84         { "ancestor-or-self", Step::AncestorOrSelfAxis },
       
    85         { "attribute", Step::AttributeAxis },
       
    86         { "child", Step::ChildAxis },
       
    87         { "descendant", Step::DescendantAxis },
       
    88         { "descendant-or-self", Step::DescendantOrSelfAxis },
       
    89         { "following", Step::FollowingAxis },
       
    90         { "following-sibling", Step::FollowingSiblingAxis },
       
    91         { "namespace", Step::NamespaceAxis },
       
    92         { "parent", Step::ParentAxis },
       
    93         { "preceding", Step::PrecedingAxis },
       
    94         { "preceding-sibling", Step::PrecedingSiblingAxis },
       
    95         { "self", Step::SelfAxis }
       
    96     };
       
    97     for (unsigned i = 0; i < sizeof(axisNameList) / sizeof(axisNameList[0]); ++i)
       
    98         axisNames.set(axisNameList[i].name, axisNameList[i].axis);
       
    99 }
       
   100 
       
   101 static bool isAxisName(const String& name, Step::Axis& type)
       
   102 {
       
   103     DEFINE_STATIC_LOCAL(AxisNamesMap, axisNames, ());
       
   104 
       
   105     if (axisNames.isEmpty())
       
   106         setUpAxisNamesMap(axisNames);
       
   107 
       
   108     AxisNamesMap::iterator it = axisNames.find(name);
       
   109     if (it == axisNames.end())
       
   110         return false;
       
   111     type = it->second;
       
   112     return true;
       
   113 }
       
   114 
       
   115 static bool isNodeTypeName(const String& name)
       
   116 {
       
   117     DEFINE_STATIC_LOCAL(HashSet<String>, nodeTypeNames, ());
       
   118     if (nodeTypeNames.isEmpty()) {
       
   119         nodeTypeNames.add("comment");
       
   120         nodeTypeNames.add("text");
       
   121         nodeTypeNames.add("processing-instruction");
       
   122         nodeTypeNames.add("node");
       
   123     }
       
   124     return nodeTypeNames.contains(name);
       
   125 }
       
   126 
       
   127 /* Returns whether the last parsed token matches the [32] Operator rule
       
   128  * (check http://www.w3.org/TR/xpath#exprlex). Necessary to disambiguate
       
   129  * the tokens.
       
   130  */
       
   131 bool Parser::isOperatorContext() const
       
   132 {
       
   133     if (m_nextPos == 0)
       
   134         return false;
       
   135 
       
   136     switch (m_lastTokenType) {
       
   137         case AND: case OR: case MULOP:
       
   138         case '/': case SLASHSLASH: case '|': case PLUS: case MINUS:
       
   139         case EQOP: case RELOP:
       
   140         case '@': case AXISNAME:   case '(': case '[':
       
   141             return false;
       
   142         default:
       
   143             return true;
       
   144     }
       
   145 }
       
   146 
       
   147 void Parser::skipWS()
       
   148 {
       
   149     while (m_nextPos < m_data.length() && isSpaceOrNewline(m_data[m_nextPos]))
       
   150         ++m_nextPos;
       
   151 }
       
   152 
       
   153 Token Parser::makeTokenAndAdvance(int code, int advance)
       
   154 {
       
   155     m_nextPos += advance;
       
   156     return Token(code);
       
   157 }
       
   158 
       
   159 Token Parser::makeTokenAndAdvance(int code, NumericOp::Opcode val, int advance)
       
   160 {
       
   161     m_nextPos += advance;
       
   162     return Token(code, val);
       
   163 }
       
   164 
       
   165 Token Parser::makeTokenAndAdvance(int code, EqTestOp::Opcode val, int advance)
       
   166 {
       
   167     m_nextPos += advance;
       
   168     return Token(code, val);
       
   169 }
       
   170 
       
   171 // Returns next char if it's there and interesting, 0 otherwise
       
   172 char Parser::peekAheadHelper()
       
   173 {
       
   174     if (m_nextPos + 1 >= m_data.length())
       
   175         return 0;
       
   176     UChar next = m_data[m_nextPos + 1];
       
   177     if (next >= 0xff)
       
   178         return 0;
       
   179     return next;
       
   180 }
       
   181 
       
   182 char Parser::peekCurHelper()
       
   183 {
       
   184     if (m_nextPos >= m_data.length())
       
   185         return 0;
       
   186     UChar next = m_data[m_nextPos];
       
   187     if (next >= 0xff)
       
   188         return 0;
       
   189     return next;
       
   190 }
       
   191 
       
   192 Token Parser::lexString()
       
   193 {
       
   194     UChar delimiter = m_data[m_nextPos];
       
   195     int startPos = m_nextPos + 1;
       
   196 
       
   197     for (m_nextPos = startPos; m_nextPos < m_data.length(); ++m_nextPos) {
       
   198         if (m_data[m_nextPos] == delimiter) {
       
   199             String value = m_data.substring(startPos, m_nextPos - startPos);
       
   200             if (value.isNull())
       
   201                 value = "";
       
   202             ++m_nextPos; // Consume the char.
       
   203             return Token(LITERAL, value);
       
   204         }
       
   205     }
       
   206 
       
   207     // Ouch, went off the end -- report error.
       
   208     return Token(XPATH_ERROR);
       
   209 }
       
   210 
       
   211 Token Parser::lexNumber()
       
   212 {
       
   213     int startPos = m_nextPos;
       
   214     bool seenDot = false;
       
   215 
       
   216     // Go until end or a non-digits character.
       
   217     for (; m_nextPos < m_data.length(); ++m_nextPos) {
       
   218         UChar aChar = m_data[m_nextPos];
       
   219         if (aChar >= 0xff) break;
       
   220 
       
   221         if (aChar < '0' || aChar > '9') {
       
   222             if (aChar == '.' && !seenDot)
       
   223                 seenDot = true;
       
   224             else
       
   225                 break;
       
   226         }
       
   227     }
       
   228 
       
   229     return Token(NUMBER, m_data.substring(startPos, m_nextPos - startPos));
       
   230 }
       
   231 
       
   232 bool Parser::lexNCName(String& name)
       
   233 {
       
   234     int startPos = m_nextPos;
       
   235     if (m_nextPos >= m_data.length())
       
   236         return false;
       
   237 
       
   238     if (charCat(m_data[m_nextPos]) != NameStart)
       
   239         return false;
       
   240 
       
   241     // Keep going until we get a character that's not good for names.
       
   242     for (; m_nextPos < m_data.length(); ++m_nextPos)
       
   243         if (charCat(m_data[m_nextPos]) == NotPartOfName)
       
   244             break;
       
   245 
       
   246     name = m_data.substring(startPos, m_nextPos - startPos);
       
   247     return true;
       
   248 }
       
   249 
       
   250 bool Parser::lexQName(String& name)
       
   251 {
       
   252     String n1;
       
   253     if (!lexNCName(n1))
       
   254         return false;
       
   255 
       
   256     skipWS();
       
   257 
       
   258     // If the next character is :, what we just got it the prefix, if not,
       
   259     // it's the whole thing.
       
   260     if (peekAheadHelper() != ':') {
       
   261         name = n1;
       
   262         return true;
       
   263     }
       
   264 
       
   265     String n2;
       
   266     if (!lexNCName(n2))
       
   267         return false;
       
   268 
       
   269     name = n1 + ":" + n2;
       
   270     return true;
       
   271 }
       
   272 
       
   273 Token Parser::nextTokenInternal()
       
   274 {
       
   275     skipWS();
       
   276 
       
   277     if (m_nextPos >= m_data.length())
       
   278         return Token(0);
       
   279 
       
   280     char code = peekCurHelper();
       
   281     switch (code) {
       
   282         case '(': case ')': case '[': case ']':
       
   283         case '@': case ',': case '|':
       
   284             return makeTokenAndAdvance(code);
       
   285         case '\'':
       
   286         case '\"':
       
   287             return lexString();
       
   288         case '0': case '1': case '2': case '3': case '4':
       
   289         case '5': case '6': case '7': case '8': case '9':
       
   290             return lexNumber();
       
   291         case '.': {
       
   292             char next = peekAheadHelper();
       
   293             if (next == '.')
       
   294                 return makeTokenAndAdvance(DOTDOT, 2);
       
   295             if (next >= '0' && next <= '9')
       
   296                 return lexNumber();
       
   297             return makeTokenAndAdvance('.');
       
   298         }
       
   299         case '/':
       
   300             if (peekAheadHelper() == '/')
       
   301                 return makeTokenAndAdvance(SLASHSLASH, 2);
       
   302             return makeTokenAndAdvance('/');
       
   303         case '+':
       
   304             return makeTokenAndAdvance(PLUS);
       
   305         case '-':
       
   306             return makeTokenAndAdvance(MINUS);
       
   307         case '=':
       
   308             return makeTokenAndAdvance(EQOP, EqTestOp::OP_EQ);
       
   309         case '!':
       
   310             if (peekAheadHelper() == '=')
       
   311                 return makeTokenAndAdvance(EQOP, EqTestOp::OP_NE, 2);
       
   312             return Token(XPATH_ERROR);
       
   313         case '<':
       
   314             if (peekAheadHelper() == '=')
       
   315                 return makeTokenAndAdvance(RELOP, EqTestOp::OP_LE, 2);
       
   316             return makeTokenAndAdvance(RELOP, EqTestOp::OP_LT);
       
   317         case '>':
       
   318             if (peekAheadHelper() == '=')
       
   319                 return makeTokenAndAdvance(RELOP, EqTestOp::OP_GE, 2);
       
   320             return makeTokenAndAdvance(RELOP, EqTestOp::OP_GT);
       
   321         case '*':
       
   322             if (isOperatorContext())
       
   323                 return makeTokenAndAdvance(MULOP, NumericOp::OP_Mul);
       
   324             ++m_nextPos;
       
   325             return Token(NAMETEST, "*");
       
   326         case '$': { // $ QName
       
   327             m_nextPos++;
       
   328             String name;
       
   329             if (!lexQName(name))
       
   330                 return Token(XPATH_ERROR);
       
   331             return Token(VARIABLEREFERENCE, name);
       
   332         }
       
   333     }
       
   334 
       
   335     String name;
       
   336     if (!lexNCName(name))
       
   337         return Token(XPATH_ERROR);
       
   338 
       
   339     skipWS();
       
   340     // If we're in an operator context, check for any operator names
       
   341     if (isOperatorContext()) {
       
   342         if (name == "and") //### hash?
       
   343             return Token(AND);
       
   344         if (name == "or")
       
   345             return Token(OR);
       
   346         if (name == "mod")
       
   347             return Token(MULOP, NumericOp::OP_Mod);
       
   348         if (name == "div")
       
   349             return Token(MULOP, NumericOp::OP_Div);
       
   350     }
       
   351 
       
   352     // See whether we are at a :
       
   353     if (peekCurHelper() == ':') {
       
   354         m_nextPos++;
       
   355         // Any chance it's an axis name?
       
   356         if (peekCurHelper() == ':') {
       
   357             m_nextPos++;
       
   358             
       
   359             //It might be an axis name.
       
   360             Step::Axis axis;
       
   361             if (isAxisName(name, axis))
       
   362                 return Token(AXISNAME, axis);
       
   363             // Ugh, :: is only valid in axis names -> error
       
   364             return Token(XPATH_ERROR);
       
   365         }
       
   366 
       
   367         // Seems like this is a fully qualified qname, or perhaps the * modified one from NameTest
       
   368         skipWS();
       
   369         if (peekCurHelper() == '*') {
       
   370             m_nextPos++;
       
   371             return Token(NAMETEST, name + ":*");
       
   372         }
       
   373         
       
   374         // Make a full qname.
       
   375         String n2;
       
   376         if (!lexNCName(n2))
       
   377             return Token(XPATH_ERROR);
       
   378         
       
   379         name = name + ":" + n2;
       
   380     }
       
   381 
       
   382     skipWS();
       
   383     if (peekCurHelper() == '(') {
       
   384         //note: we don't swallow the (here!
       
   385         
       
   386         //either node type of function name
       
   387         if (isNodeTypeName(name)) {
       
   388             if (name == "processing-instruction")
       
   389                 return Token(PI, name);
       
   390 
       
   391             return Token(NODETYPE, name);
       
   392         }
       
   393         //must be a function name.
       
   394         return Token(FUNCTIONNAME, name);
       
   395     }
       
   396 
       
   397     // At this point, it must be NAMETEST.
       
   398     return Token(NAMETEST, name);
       
   399 }
       
   400 
       
   401 Token Parser::nextToken()
       
   402 {
       
   403     Token toRet = nextTokenInternal();
       
   404     m_lastTokenType = toRet.type;
       
   405     return toRet;
       
   406 }
       
   407 
       
   408 Parser::Parser()
       
   409 {
       
   410     reset(String());
       
   411 }
       
   412 
       
   413 void Parser::reset(const String& data)
       
   414 {
       
   415     m_nextPos = 0;
       
   416     m_data = data;
       
   417     m_lastTokenType = 0;
       
   418     
       
   419     m_topExpr = 0;
       
   420     m_gotNamespaceError = false;
       
   421 }
       
   422 
       
   423 int Parser::lex(void* data)
       
   424 {
       
   425     YYSTYPE* yylval = static_cast<YYSTYPE*>(data);
       
   426     Token tok = nextToken();
       
   427 
       
   428     switch (tok.type) {
       
   429         case AXISNAME:
       
   430             yylval->axis = tok.axis;
       
   431             break;
       
   432         case MULOP:
       
   433             yylval->numop = tok.numop;
       
   434             break;
       
   435         case RELOP:
       
   436         case EQOP:
       
   437             yylval->eqop = tok.eqop;
       
   438             break;
       
   439         case NODETYPE:
       
   440         case PI:
       
   441         case FUNCTIONNAME:
       
   442         case LITERAL:
       
   443         case VARIABLEREFERENCE:
       
   444         case NUMBER:
       
   445         case NAMETEST:
       
   446             yylval->str = new String(tok.str);
       
   447             registerString(yylval->str);
       
   448             break;
       
   449     }
       
   450 
       
   451     return tok.type;
       
   452 }
       
   453 
       
   454 bool Parser::expandQName(const String& qName, String& localName, String& namespaceURI)
       
   455 {
       
   456     int colon = qName.find(':');
       
   457     if (colon >= 0) {
       
   458         if (!m_resolver)
       
   459             return false;
       
   460         namespaceURI = m_resolver->lookupNamespaceURI(qName.left(colon));
       
   461         if (namespaceURI.isNull())
       
   462             return false;
       
   463         localName = qName.substring(colon + 1);
       
   464     } else
       
   465         localName = qName;
       
   466     
       
   467     return true;
       
   468 }
       
   469 
       
   470 Expression* Parser::parseStatement(const String& statement, PassRefPtr<XPathNSResolver> resolver, ExceptionCode& ec)
       
   471 {
       
   472     reset(statement);
       
   473 
       
   474     m_resolver = resolver;
       
   475     
       
   476     Parser* oldParser = currentParser;
       
   477     currentParser = this;
       
   478     int parseError = xpathyyparse(this);
       
   479     currentParser = oldParser;
       
   480 
       
   481     if (parseError) {
       
   482         deleteAllValues(m_parseNodes);
       
   483         m_parseNodes.clear();
       
   484 
       
   485         HashSet<Vector<Predicate*>*>::iterator pend = m_predicateVectors.end();
       
   486         for (HashSet<Vector<Predicate*>*>::iterator it = m_predicateVectors.begin(); it != pend; ++it) {
       
   487             deleteAllValues(**it);
       
   488             delete *it;
       
   489         }
       
   490         m_predicateVectors.clear();
       
   491 
       
   492         HashSet<Vector<Expression*>*>::iterator eend = m_expressionVectors.end();
       
   493         for (HashSet<Vector<Expression*>*>::iterator it = m_expressionVectors.begin(); it != eend; ++it) {
       
   494             deleteAllValues(**it);
       
   495             delete *it;
       
   496         }
       
   497         m_expressionVectors.clear();
       
   498 
       
   499         deleteAllValues(m_strings);
       
   500         m_strings.clear();
       
   501 
       
   502         deleteAllValues(m_nodeTests);
       
   503         m_nodeTests.clear();
       
   504 
       
   505         m_topExpr = 0;
       
   506 
       
   507         if (m_gotNamespaceError)
       
   508             ec = NAMESPACE_ERR;
       
   509         else
       
   510             ec = XPathException::INVALID_EXPRESSION_ERR;
       
   511         return 0;
       
   512     }
       
   513 
       
   514     ASSERT(m_parseNodes.size() == 1);
       
   515     ASSERT(*m_parseNodes.begin() == m_topExpr);
       
   516     ASSERT(m_expressionVectors.size() == 0);
       
   517     ASSERT(m_predicateVectors.size() == 0);
       
   518     ASSERT(m_strings.size() == 0);
       
   519     ASSERT(m_nodeTests.size() == 0);
       
   520 
       
   521     m_parseNodes.clear();
       
   522     Expression* result = m_topExpr;
       
   523     m_topExpr = 0;
       
   524 
       
   525     return result;
       
   526 }
       
   527 
       
   528 void Parser::registerParseNode(ParseNode* node)
       
   529 {
       
   530     if (node == 0)
       
   531         return;
       
   532     
       
   533     ASSERT(!m_parseNodes.contains(node));
       
   534     
       
   535     m_parseNodes.add(node);
       
   536 }
       
   537 
       
   538 void Parser::unregisterParseNode(ParseNode* node)
       
   539 {
       
   540     if (node == 0)
       
   541         return;
       
   542     
       
   543     ASSERT(m_parseNodes.contains(node));
       
   544 
       
   545     m_parseNodes.remove(node);
       
   546 }
       
   547 
       
   548 void Parser::registerPredicateVector(Vector<Predicate*>* vector)
       
   549 {
       
   550     if (vector == 0)
       
   551         return;
       
   552 
       
   553     ASSERT(!m_predicateVectors.contains(vector));
       
   554     
       
   555     m_predicateVectors.add(vector);
       
   556 }
       
   557 
       
   558 void Parser::deletePredicateVector(Vector<Predicate*>* vector)
       
   559 {
       
   560     if (vector == 0)
       
   561         return;
       
   562 
       
   563     ASSERT(m_predicateVectors.contains(vector));
       
   564     
       
   565     m_predicateVectors.remove(vector);
       
   566     delete vector;
       
   567 }
       
   568 
       
   569 
       
   570 void Parser::registerExpressionVector(Vector<Expression*>* vector)
       
   571 {
       
   572     if (vector == 0)
       
   573         return;
       
   574 
       
   575     ASSERT(!m_expressionVectors.contains(vector));
       
   576     
       
   577     m_expressionVectors.add(vector);    
       
   578 }
       
   579 
       
   580 void Parser::deleteExpressionVector(Vector<Expression*>* vector)
       
   581 {
       
   582     if (vector == 0)
       
   583         return;
       
   584 
       
   585     ASSERT(m_expressionVectors.contains(vector));
       
   586     
       
   587     m_expressionVectors.remove(vector);
       
   588     delete vector;
       
   589 }
       
   590 
       
   591 void Parser::registerString(String* s)
       
   592 {
       
   593     if (s == 0)
       
   594         return;
       
   595     
       
   596     ASSERT(!m_strings.contains(s));
       
   597     
       
   598     m_strings.add(s);        
       
   599 }
       
   600 
       
   601 void Parser::deleteString(String* s)
       
   602 {
       
   603     if (s == 0)
       
   604         return;
       
   605     
       
   606     ASSERT(m_strings.contains(s));
       
   607     
       
   608     m_strings.remove(s);
       
   609     delete s;
       
   610 }
       
   611 
       
   612 void Parser::registerNodeTest(Step::NodeTest* t)
       
   613 {
       
   614     if (t == 0)
       
   615         return;
       
   616     
       
   617     ASSERT(!m_nodeTests.contains(t));
       
   618     
       
   619     m_nodeTests.add(t);        
       
   620 }
       
   621 
       
   622 void Parser::deleteNodeTest(Step::NodeTest* t)
       
   623 {
       
   624     if (t == 0)
       
   625         return;
       
   626     
       
   627     ASSERT(m_nodeTests.contains(t));
       
   628     
       
   629     m_nodeTests.remove(t);
       
   630     delete t;
       
   631 }
       
   632 
       
   633 }
       
   634 }
       
   635 
       
   636 #endif // ENABLE(XPATH)