WebCore/xml/XPathParser.cpp
changeset 0 4f2f89ce4247
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/WebCore/xml/XPathParser.cpp	Fri Sep 17 09:02:29 2010 +0300
@@ -0,0 +1,636 @@
+/*
+ * Copyright 2005 Maksim Orlovich <maksim@kde.org>
+ * Copyright (C) 2006 Apple Computer, Inc.
+ * Copyright (C) 2007 Alexey Proskuryakov <ap@webkit.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "XPathParser.h"
+
+#if ENABLE(XPATH)
+
+#include "ExceptionCode.h"
+#include "StringHash.h"
+#include "XPathEvaluator.h"
+#include "XPathException.h"
+#include "XPathNSResolver.h"
+#include "XPathStep.h"
+#include <wtf/StdLibExtras.h>
+
+int xpathyyparse(void*);
+
+using namespace WTF;
+using namespace Unicode;
+
+namespace WebCore {
+namespace XPath {
+
+class LocationPath;
+
+#include "XPathGrammar.h"    
+
+Parser* Parser::currentParser = 0;
+    
+enum XMLCat { NameStart, NameCont, NotPartOfName };
+
+typedef HashMap<String, Step::Axis> AxisNamesMap;
+
+static XMLCat charCat(UChar aChar)
+{
+    //### might need to add some special cases from the XML spec.
+
+    if (aChar == '_')
+        return NameStart;
+
+    if (aChar == '.' || aChar == '-')
+        return NameCont;
+    CharCategory category = Unicode::category(aChar);
+    if (category & (Letter_Uppercase | Letter_Lowercase | Letter_Other | Letter_Titlecase | Number_Letter))
+        return NameStart;
+    if (category & (Mark_NonSpacing | Mark_SpacingCombining | Mark_Enclosing | Letter_Modifier | Number_DecimalDigit))
+        return NameCont;
+    return NotPartOfName;
+}
+
+static void setUpAxisNamesMap(AxisNamesMap& axisNames)
+{
+    struct AxisName {
+        const char* name;
+        Step::Axis axis;
+    };
+    const AxisName axisNameList[] = {
+        { "ancestor", Step::AncestorAxis },
+        { "ancestor-or-self", Step::AncestorOrSelfAxis },
+        { "attribute", Step::AttributeAxis },
+        { "child", Step::ChildAxis },
+        { "descendant", Step::DescendantAxis },
+        { "descendant-or-self", Step::DescendantOrSelfAxis },
+        { "following", Step::FollowingAxis },
+        { "following-sibling", Step::FollowingSiblingAxis },
+        { "namespace", Step::NamespaceAxis },
+        { "parent", Step::ParentAxis },
+        { "preceding", Step::PrecedingAxis },
+        { "preceding-sibling", Step::PrecedingSiblingAxis },
+        { "self", Step::SelfAxis }
+    };
+    for (unsigned i = 0; i < sizeof(axisNameList) / sizeof(axisNameList[0]); ++i)
+        axisNames.set(axisNameList[i].name, axisNameList[i].axis);
+}
+
+static bool isAxisName(const String& name, Step::Axis& type)
+{
+    DEFINE_STATIC_LOCAL(AxisNamesMap, axisNames, ());
+
+    if (axisNames.isEmpty())
+        setUpAxisNamesMap(axisNames);
+
+    AxisNamesMap::iterator it = axisNames.find(name);
+    if (it == axisNames.end())
+        return false;
+    type = it->second;
+    return true;
+}
+
+static bool isNodeTypeName(const String& name)
+{
+    DEFINE_STATIC_LOCAL(HashSet<String>, nodeTypeNames, ());
+    if (nodeTypeNames.isEmpty()) {
+        nodeTypeNames.add("comment");
+        nodeTypeNames.add("text");
+        nodeTypeNames.add("processing-instruction");
+        nodeTypeNames.add("node");
+    }
+    return nodeTypeNames.contains(name);
+}
+
+/* Returns whether the last parsed token matches the [32] Operator rule
+ * (check http://www.w3.org/TR/xpath#exprlex). Necessary to disambiguate
+ * the tokens.
+ */
+bool Parser::isOperatorContext() const
+{
+    if (m_nextPos == 0)
+        return false;
+
+    switch (m_lastTokenType) {
+        case AND: case OR: case MULOP:
+        case '/': case SLASHSLASH: case '|': case PLUS: case MINUS:
+        case EQOP: case RELOP:
+        case '@': case AXISNAME:   case '(': case '[':
+            return false;
+        default:
+            return true;
+    }
+}
+
+void Parser::skipWS()
+{
+    while (m_nextPos < m_data.length() && isSpaceOrNewline(m_data[m_nextPos]))
+        ++m_nextPos;
+}
+
+Token Parser::makeTokenAndAdvance(int code, int advance)
+{
+    m_nextPos += advance;
+    return Token(code);
+}
+
+Token Parser::makeTokenAndAdvance(int code, NumericOp::Opcode val, int advance)
+{
+    m_nextPos += advance;
+    return Token(code, val);
+}
+
+Token Parser::makeTokenAndAdvance(int code, EqTestOp::Opcode val, int advance)
+{
+    m_nextPos += advance;
+    return Token(code, val);
+}
+
+// Returns next char if it's there and interesting, 0 otherwise
+char Parser::peekAheadHelper()
+{
+    if (m_nextPos + 1 >= m_data.length())
+        return 0;
+    UChar next = m_data[m_nextPos + 1];
+    if (next >= 0xff)
+        return 0;
+    return next;
+}
+
+char Parser::peekCurHelper()
+{
+    if (m_nextPos >= m_data.length())
+        return 0;
+    UChar next = m_data[m_nextPos];
+    if (next >= 0xff)
+        return 0;
+    return next;
+}
+
+Token Parser::lexString()
+{
+    UChar delimiter = m_data[m_nextPos];
+    int startPos = m_nextPos + 1;
+
+    for (m_nextPos = startPos; m_nextPos < m_data.length(); ++m_nextPos) {
+        if (m_data[m_nextPos] == delimiter) {
+            String value = m_data.substring(startPos, m_nextPos - startPos);
+            if (value.isNull())
+                value = "";
+            ++m_nextPos; // Consume the char.
+            return Token(LITERAL, value);
+        }
+    }
+
+    // Ouch, went off the end -- report error.
+    return Token(XPATH_ERROR);
+}
+
+Token Parser::lexNumber()
+{
+    int startPos = m_nextPos;
+    bool seenDot = false;
+
+    // Go until end or a non-digits character.
+    for (; m_nextPos < m_data.length(); ++m_nextPos) {
+        UChar aChar = m_data[m_nextPos];
+        if (aChar >= 0xff) break;
+
+        if (aChar < '0' || aChar > '9') {
+            if (aChar == '.' && !seenDot)
+                seenDot = true;
+            else
+                break;
+        }
+    }
+
+    return Token(NUMBER, m_data.substring(startPos, m_nextPos - startPos));
+}
+
+bool Parser::lexNCName(String& name)
+{
+    int startPos = m_nextPos;
+    if (m_nextPos >= m_data.length())
+        return false;
+
+    if (charCat(m_data[m_nextPos]) != NameStart)
+        return false;
+
+    // Keep going until we get a character that's not good for names.
+    for (; m_nextPos < m_data.length(); ++m_nextPos)
+        if (charCat(m_data[m_nextPos]) == NotPartOfName)
+            break;
+
+    name = m_data.substring(startPos, m_nextPos - startPos);
+    return true;
+}
+
+bool Parser::lexQName(String& name)
+{
+    String n1;
+    if (!lexNCName(n1))
+        return false;
+
+    skipWS();
+
+    // If the next character is :, what we just got it the prefix, if not,
+    // it's the whole thing.
+    if (peekAheadHelper() != ':') {
+        name = n1;
+        return true;
+    }
+
+    String n2;
+    if (!lexNCName(n2))
+        return false;
+
+    name = n1 + ":" + n2;
+    return true;
+}
+
+Token Parser::nextTokenInternal()
+{
+    skipWS();
+
+    if (m_nextPos >= m_data.length())
+        return Token(0);
+
+    char code = peekCurHelper();
+    switch (code) {
+        case '(': case ')': case '[': case ']':
+        case '@': case ',': case '|':
+            return makeTokenAndAdvance(code);
+        case '\'':
+        case '\"':
+            return lexString();
+        case '0': case '1': case '2': case '3': case '4':
+        case '5': case '6': case '7': case '8': case '9':
+            return lexNumber();
+        case '.': {
+            char next = peekAheadHelper();
+            if (next == '.')
+                return makeTokenAndAdvance(DOTDOT, 2);
+            if (next >= '0' && next <= '9')
+                return lexNumber();
+            return makeTokenAndAdvance('.');
+        }
+        case '/':
+            if (peekAheadHelper() == '/')
+                return makeTokenAndAdvance(SLASHSLASH, 2);
+            return makeTokenAndAdvance('/');
+        case '+':
+            return makeTokenAndAdvance(PLUS);
+        case '-':
+            return makeTokenAndAdvance(MINUS);
+        case '=':
+            return makeTokenAndAdvance(EQOP, EqTestOp::OP_EQ);
+        case '!':
+            if (peekAheadHelper() == '=')
+                return makeTokenAndAdvance(EQOP, EqTestOp::OP_NE, 2);
+            return Token(XPATH_ERROR);
+        case '<':
+            if (peekAheadHelper() == '=')
+                return makeTokenAndAdvance(RELOP, EqTestOp::OP_LE, 2);
+            return makeTokenAndAdvance(RELOP, EqTestOp::OP_LT);
+        case '>':
+            if (peekAheadHelper() == '=')
+                return makeTokenAndAdvance(RELOP, EqTestOp::OP_GE, 2);
+            return makeTokenAndAdvance(RELOP, EqTestOp::OP_GT);
+        case '*':
+            if (isOperatorContext())
+                return makeTokenAndAdvance(MULOP, NumericOp::OP_Mul);
+            ++m_nextPos;
+            return Token(NAMETEST, "*");
+        case '$': { // $ QName
+            m_nextPos++;
+            String name;
+            if (!lexQName(name))
+                return Token(XPATH_ERROR);
+            return Token(VARIABLEREFERENCE, name);
+        }
+    }
+
+    String name;
+    if (!lexNCName(name))
+        return Token(XPATH_ERROR);
+
+    skipWS();
+    // If we're in an operator context, check for any operator names
+    if (isOperatorContext()) {
+        if (name == "and") //### hash?
+            return Token(AND);
+        if (name == "or")
+            return Token(OR);
+        if (name == "mod")
+            return Token(MULOP, NumericOp::OP_Mod);
+        if (name == "div")
+            return Token(MULOP, NumericOp::OP_Div);
+    }
+
+    // See whether we are at a :
+    if (peekCurHelper() == ':') {
+        m_nextPos++;
+        // Any chance it's an axis name?
+        if (peekCurHelper() == ':') {
+            m_nextPos++;
+            
+            //It might be an axis name.
+            Step::Axis axis;
+            if (isAxisName(name, axis))
+                return Token(AXISNAME, axis);
+            // Ugh, :: is only valid in axis names -> error
+            return Token(XPATH_ERROR);
+        }
+
+        // Seems like this is a fully qualified qname, or perhaps the * modified one from NameTest
+        skipWS();
+        if (peekCurHelper() == '*') {
+            m_nextPos++;
+            return Token(NAMETEST, name + ":*");
+        }
+        
+        // Make a full qname.
+        String n2;
+        if (!lexNCName(n2))
+            return Token(XPATH_ERROR);
+        
+        name = name + ":" + n2;
+    }
+
+    skipWS();
+    if (peekCurHelper() == '(') {
+        //note: we don't swallow the (here!
+        
+        //either node type of function name
+        if (isNodeTypeName(name)) {
+            if (name == "processing-instruction")
+                return Token(PI, name);
+
+            return Token(NODETYPE, name);
+        }
+        //must be a function name.
+        return Token(FUNCTIONNAME, name);
+    }
+
+    // At this point, it must be NAMETEST.
+    return Token(NAMETEST, name);
+}
+
+Token Parser::nextToken()
+{
+    Token toRet = nextTokenInternal();
+    m_lastTokenType = toRet.type;
+    return toRet;
+}
+
+Parser::Parser()
+{
+    reset(String());
+}
+
+void Parser::reset(const String& data)
+{
+    m_nextPos = 0;
+    m_data = data;
+    m_lastTokenType = 0;
+    
+    m_topExpr = 0;
+    m_gotNamespaceError = false;
+}
+
+int Parser::lex(void* data)
+{
+    YYSTYPE* yylval = static_cast<YYSTYPE*>(data);
+    Token tok = nextToken();
+
+    switch (tok.type) {
+        case AXISNAME:
+            yylval->axis = tok.axis;
+            break;
+        case MULOP:
+            yylval->numop = tok.numop;
+            break;
+        case RELOP:
+        case EQOP:
+            yylval->eqop = tok.eqop;
+            break;
+        case NODETYPE:
+        case PI:
+        case FUNCTIONNAME:
+        case LITERAL:
+        case VARIABLEREFERENCE:
+        case NUMBER:
+        case NAMETEST:
+            yylval->str = new String(tok.str);
+            registerString(yylval->str);
+            break;
+    }
+
+    return tok.type;
+}
+
+bool Parser::expandQName(const String& qName, String& localName, String& namespaceURI)
+{
+    int colon = qName.find(':');
+    if (colon >= 0) {
+        if (!m_resolver)
+            return false;
+        namespaceURI = m_resolver->lookupNamespaceURI(qName.left(colon));
+        if (namespaceURI.isNull())
+            return false;
+        localName = qName.substring(colon + 1);
+    } else
+        localName = qName;
+    
+    return true;
+}
+
+Expression* Parser::parseStatement(const String& statement, PassRefPtr<XPathNSResolver> resolver, ExceptionCode& ec)
+{
+    reset(statement);
+
+    m_resolver = resolver;
+    
+    Parser* oldParser = currentParser;
+    currentParser = this;
+    int parseError = xpathyyparse(this);
+    currentParser = oldParser;
+
+    if (parseError) {
+        deleteAllValues(m_parseNodes);
+        m_parseNodes.clear();
+
+        HashSet<Vector<Predicate*>*>::iterator pend = m_predicateVectors.end();
+        for (HashSet<Vector<Predicate*>*>::iterator it = m_predicateVectors.begin(); it != pend; ++it) {
+            deleteAllValues(**it);
+            delete *it;
+        }
+        m_predicateVectors.clear();
+
+        HashSet<Vector<Expression*>*>::iterator eend = m_expressionVectors.end();
+        for (HashSet<Vector<Expression*>*>::iterator it = m_expressionVectors.begin(); it != eend; ++it) {
+            deleteAllValues(**it);
+            delete *it;
+        }
+        m_expressionVectors.clear();
+
+        deleteAllValues(m_strings);
+        m_strings.clear();
+
+        deleteAllValues(m_nodeTests);
+        m_nodeTests.clear();
+
+        m_topExpr = 0;
+
+        if (m_gotNamespaceError)
+            ec = NAMESPACE_ERR;
+        else
+            ec = XPathException::INVALID_EXPRESSION_ERR;
+        return 0;
+    }
+
+    ASSERT(m_parseNodes.size() == 1);
+    ASSERT(*m_parseNodes.begin() == m_topExpr);
+    ASSERT(m_expressionVectors.size() == 0);
+    ASSERT(m_predicateVectors.size() == 0);
+    ASSERT(m_strings.size() == 0);
+    ASSERT(m_nodeTests.size() == 0);
+
+    m_parseNodes.clear();
+    Expression* result = m_topExpr;
+    m_topExpr = 0;
+
+    return result;
+}
+
+void Parser::registerParseNode(ParseNode* node)
+{
+    if (node == 0)
+        return;
+    
+    ASSERT(!m_parseNodes.contains(node));
+    
+    m_parseNodes.add(node);
+}
+
+void Parser::unregisterParseNode(ParseNode* node)
+{
+    if (node == 0)
+        return;
+    
+    ASSERT(m_parseNodes.contains(node));
+
+    m_parseNodes.remove(node);
+}
+
+void Parser::registerPredicateVector(Vector<Predicate*>* vector)
+{
+    if (vector == 0)
+        return;
+
+    ASSERT(!m_predicateVectors.contains(vector));
+    
+    m_predicateVectors.add(vector);
+}
+
+void Parser::deletePredicateVector(Vector<Predicate*>* vector)
+{
+    if (vector == 0)
+        return;
+
+    ASSERT(m_predicateVectors.contains(vector));
+    
+    m_predicateVectors.remove(vector);
+    delete vector;
+}
+
+
+void Parser::registerExpressionVector(Vector<Expression*>* vector)
+{
+    if (vector == 0)
+        return;
+
+    ASSERT(!m_expressionVectors.contains(vector));
+    
+    m_expressionVectors.add(vector);    
+}
+
+void Parser::deleteExpressionVector(Vector<Expression*>* vector)
+{
+    if (vector == 0)
+        return;
+
+    ASSERT(m_expressionVectors.contains(vector));
+    
+    m_expressionVectors.remove(vector);
+    delete vector;
+}
+
+void Parser::registerString(String* s)
+{
+    if (s == 0)
+        return;
+    
+    ASSERT(!m_strings.contains(s));
+    
+    m_strings.add(s);        
+}
+
+void Parser::deleteString(String* s)
+{
+    if (s == 0)
+        return;
+    
+    ASSERT(m_strings.contains(s));
+    
+    m_strings.remove(s);
+    delete s;
+}
+
+void Parser::registerNodeTest(Step::NodeTest* t)
+{
+    if (t == 0)
+        return;
+    
+    ASSERT(!m_nodeTests.contains(t));
+    
+    m_nodeTests.add(t);        
+}
+
+void Parser::deleteNodeTest(Step::NodeTest* t)
+{
+    if (t == 0)
+        return;
+    
+    ASSERT(m_nodeTests.contains(t));
+    
+    m_nodeTests.remove(t);
+    delete t;
+}
+
+}
+}
+
+#endif // ENABLE(XPATH)