|
1 /* |
|
2 * Copyright 2005 Maksim Orlovich <maksim@kde.org> |
|
3 * Copyright (C) 2006 Apple Computer, Inc. |
|
4 * Copyright (C) 2007 Alexey Proskuryakov <ap@webkit.org> |
|
5 * |
|
6 * Redistribution and use in source and binary forms, with or without |
|
7 * modification, are permitted provided that the following conditions |
|
8 * are met: |
|
9 * |
|
10 * 1. Redistributions of source code must retain the above copyright |
|
11 * notice, this list of conditions and the following disclaimer. |
|
12 * 2. Redistributions in binary form must reproduce the above copyright |
|
13 * notice, this list of conditions and the following disclaimer in the |
|
14 * documentation and/or other materials provided with the distribution. |
|
15 * |
|
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
|
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
|
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
|
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
|
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
|
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
|
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
|
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
|
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
|
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
26 */ |
|
27 |
|
28 #include "config.h" |
|
29 #include "XPathParser.h" |
|
30 |
|
31 #if ENABLE(XPATH) |
|
32 |
|
33 #include "ExceptionCode.h" |
|
34 #include "StringHash.h" |
|
35 #include "XPathEvaluator.h" |
|
36 #include "XPathException.h" |
|
37 #include "XPathNSResolver.h" |
|
38 #include "XPathStep.h" |
|
39 #include <wtf/StdLibExtras.h> |
|
40 |
|
41 int xpathyyparse(void*); |
|
42 |
|
43 using namespace WTF; |
|
44 using namespace Unicode; |
|
45 |
|
46 namespace WebCore { |
|
47 namespace XPath { |
|
48 |
|
49 class LocationPath; |
|
50 |
|
51 #include "XPathGrammar.h" |
|
52 |
|
53 Parser* Parser::currentParser = 0; |
|
54 |
|
55 enum XMLCat { NameStart, NameCont, NotPartOfName }; |
|
56 |
|
57 typedef HashMap<String, Step::Axis> AxisNamesMap; |
|
58 |
|
59 static XMLCat charCat(UChar aChar) |
|
60 { |
|
61 //### might need to add some special cases from the XML spec. |
|
62 |
|
63 if (aChar == '_') |
|
64 return NameStart; |
|
65 |
|
66 if (aChar == '.' || aChar == '-') |
|
67 return NameCont; |
|
68 CharCategory category = Unicode::category(aChar); |
|
69 if (category & (Letter_Uppercase | Letter_Lowercase | Letter_Other | Letter_Titlecase | Number_Letter)) |
|
70 return NameStart; |
|
71 if (category & (Mark_NonSpacing | Mark_SpacingCombining | Mark_Enclosing | Letter_Modifier | Number_DecimalDigit)) |
|
72 return NameCont; |
|
73 return NotPartOfName; |
|
74 } |
|
75 |
|
76 static void setUpAxisNamesMap(AxisNamesMap& axisNames) |
|
77 { |
|
78 struct AxisName { |
|
79 const char* name; |
|
80 Step::Axis axis; |
|
81 }; |
|
82 const AxisName axisNameList[] = { |
|
83 { "ancestor", Step::AncestorAxis }, |
|
84 { "ancestor-or-self", Step::AncestorOrSelfAxis }, |
|
85 { "attribute", Step::AttributeAxis }, |
|
86 { "child", Step::ChildAxis }, |
|
87 { "descendant", Step::DescendantAxis }, |
|
88 { "descendant-or-self", Step::DescendantOrSelfAxis }, |
|
89 { "following", Step::FollowingAxis }, |
|
90 { "following-sibling", Step::FollowingSiblingAxis }, |
|
91 { "namespace", Step::NamespaceAxis }, |
|
92 { "parent", Step::ParentAxis }, |
|
93 { "preceding", Step::PrecedingAxis }, |
|
94 { "preceding-sibling", Step::PrecedingSiblingAxis }, |
|
95 { "self", Step::SelfAxis } |
|
96 }; |
|
97 for (unsigned i = 0; i < sizeof(axisNameList) / sizeof(axisNameList[0]); ++i) |
|
98 axisNames.set(axisNameList[i].name, axisNameList[i].axis); |
|
99 } |
|
100 |
|
101 static bool isAxisName(const String& name, Step::Axis& type) |
|
102 { |
|
103 DEFINE_STATIC_LOCAL(AxisNamesMap, axisNames, ()); |
|
104 |
|
105 if (axisNames.isEmpty()) |
|
106 setUpAxisNamesMap(axisNames); |
|
107 |
|
108 AxisNamesMap::iterator it = axisNames.find(name); |
|
109 if (it == axisNames.end()) |
|
110 return false; |
|
111 type = it->second; |
|
112 return true; |
|
113 } |
|
114 |
|
115 static bool isNodeTypeName(const String& name) |
|
116 { |
|
117 DEFINE_STATIC_LOCAL(HashSet<String>, nodeTypeNames, ()); |
|
118 if (nodeTypeNames.isEmpty()) { |
|
119 nodeTypeNames.add("comment"); |
|
120 nodeTypeNames.add("text"); |
|
121 nodeTypeNames.add("processing-instruction"); |
|
122 nodeTypeNames.add("node"); |
|
123 } |
|
124 return nodeTypeNames.contains(name); |
|
125 } |
|
126 |
|
127 /* Returns whether the last parsed token matches the [32] Operator rule |
|
128 * (check http://www.w3.org/TR/xpath#exprlex). Necessary to disambiguate |
|
129 * the tokens. |
|
130 */ |
|
131 bool Parser::isOperatorContext() const |
|
132 { |
|
133 if (m_nextPos == 0) |
|
134 return false; |
|
135 |
|
136 switch (m_lastTokenType) { |
|
137 case AND: case OR: case MULOP: |
|
138 case '/': case SLASHSLASH: case '|': case PLUS: case MINUS: |
|
139 case EQOP: case RELOP: |
|
140 case '@': case AXISNAME: case '(': case '[': |
|
141 return false; |
|
142 default: |
|
143 return true; |
|
144 } |
|
145 } |
|
146 |
|
147 void Parser::skipWS() |
|
148 { |
|
149 while (m_nextPos < m_data.length() && isSpaceOrNewline(m_data[m_nextPos])) |
|
150 ++m_nextPos; |
|
151 } |
|
152 |
|
153 Token Parser::makeTokenAndAdvance(int code, int advance) |
|
154 { |
|
155 m_nextPos += advance; |
|
156 return Token(code); |
|
157 } |
|
158 |
|
159 Token Parser::makeTokenAndAdvance(int code, NumericOp::Opcode val, int advance) |
|
160 { |
|
161 m_nextPos += advance; |
|
162 return Token(code, val); |
|
163 } |
|
164 |
|
165 Token Parser::makeTokenAndAdvance(int code, EqTestOp::Opcode val, int advance) |
|
166 { |
|
167 m_nextPos += advance; |
|
168 return Token(code, val); |
|
169 } |
|
170 |
|
171 // Returns next char if it's there and interesting, 0 otherwise |
|
172 char Parser::peekAheadHelper() |
|
173 { |
|
174 if (m_nextPos + 1 >= m_data.length()) |
|
175 return 0; |
|
176 UChar next = m_data[m_nextPos + 1]; |
|
177 if (next >= 0xff) |
|
178 return 0; |
|
179 return next; |
|
180 } |
|
181 |
|
182 char Parser::peekCurHelper() |
|
183 { |
|
184 if (m_nextPos >= m_data.length()) |
|
185 return 0; |
|
186 UChar next = m_data[m_nextPos]; |
|
187 if (next >= 0xff) |
|
188 return 0; |
|
189 return next; |
|
190 } |
|
191 |
|
192 Token Parser::lexString() |
|
193 { |
|
194 UChar delimiter = m_data[m_nextPos]; |
|
195 int startPos = m_nextPos + 1; |
|
196 |
|
197 for (m_nextPos = startPos; m_nextPos < m_data.length(); ++m_nextPos) { |
|
198 if (m_data[m_nextPos] == delimiter) { |
|
199 String value = m_data.substring(startPos, m_nextPos - startPos); |
|
200 if (value.isNull()) |
|
201 value = ""; |
|
202 ++m_nextPos; // Consume the char. |
|
203 return Token(LITERAL, value); |
|
204 } |
|
205 } |
|
206 |
|
207 // Ouch, went off the end -- report error. |
|
208 return Token(XPATH_ERROR); |
|
209 } |
|
210 |
|
211 Token Parser::lexNumber() |
|
212 { |
|
213 int startPos = m_nextPos; |
|
214 bool seenDot = false; |
|
215 |
|
216 // Go until end or a non-digits character. |
|
217 for (; m_nextPos < m_data.length(); ++m_nextPos) { |
|
218 UChar aChar = m_data[m_nextPos]; |
|
219 if (aChar >= 0xff) break; |
|
220 |
|
221 if (aChar < '0' || aChar > '9') { |
|
222 if (aChar == '.' && !seenDot) |
|
223 seenDot = true; |
|
224 else |
|
225 break; |
|
226 } |
|
227 } |
|
228 |
|
229 return Token(NUMBER, m_data.substring(startPos, m_nextPos - startPos)); |
|
230 } |
|
231 |
|
232 bool Parser::lexNCName(String& name) |
|
233 { |
|
234 int startPos = m_nextPos; |
|
235 if (m_nextPos >= m_data.length()) |
|
236 return false; |
|
237 |
|
238 if (charCat(m_data[m_nextPos]) != NameStart) |
|
239 return false; |
|
240 |
|
241 // Keep going until we get a character that's not good for names. |
|
242 for (; m_nextPos < m_data.length(); ++m_nextPos) |
|
243 if (charCat(m_data[m_nextPos]) == NotPartOfName) |
|
244 break; |
|
245 |
|
246 name = m_data.substring(startPos, m_nextPos - startPos); |
|
247 return true; |
|
248 } |
|
249 |
|
250 bool Parser::lexQName(String& name) |
|
251 { |
|
252 String n1; |
|
253 if (!lexNCName(n1)) |
|
254 return false; |
|
255 |
|
256 skipWS(); |
|
257 |
|
258 // If the next character is :, what we just got it the prefix, if not, |
|
259 // it's the whole thing. |
|
260 if (peekAheadHelper() != ':') { |
|
261 name = n1; |
|
262 return true; |
|
263 } |
|
264 |
|
265 String n2; |
|
266 if (!lexNCName(n2)) |
|
267 return false; |
|
268 |
|
269 name = n1 + ":" + n2; |
|
270 return true; |
|
271 } |
|
272 |
|
273 Token Parser::nextTokenInternal() |
|
274 { |
|
275 skipWS(); |
|
276 |
|
277 if (m_nextPos >= m_data.length()) |
|
278 return Token(0); |
|
279 |
|
280 char code = peekCurHelper(); |
|
281 switch (code) { |
|
282 case '(': case ')': case '[': case ']': |
|
283 case '@': case ',': case '|': |
|
284 return makeTokenAndAdvance(code); |
|
285 case '\'': |
|
286 case '\"': |
|
287 return lexString(); |
|
288 case '0': case '1': case '2': case '3': case '4': |
|
289 case '5': case '6': case '7': case '8': case '9': |
|
290 return lexNumber(); |
|
291 case '.': { |
|
292 char next = peekAheadHelper(); |
|
293 if (next == '.') |
|
294 return makeTokenAndAdvance(DOTDOT, 2); |
|
295 if (next >= '0' && next <= '9') |
|
296 return lexNumber(); |
|
297 return makeTokenAndAdvance('.'); |
|
298 } |
|
299 case '/': |
|
300 if (peekAheadHelper() == '/') |
|
301 return makeTokenAndAdvance(SLASHSLASH, 2); |
|
302 return makeTokenAndAdvance('/'); |
|
303 case '+': |
|
304 return makeTokenAndAdvance(PLUS); |
|
305 case '-': |
|
306 return makeTokenAndAdvance(MINUS); |
|
307 case '=': |
|
308 return makeTokenAndAdvance(EQOP, EqTestOp::OP_EQ); |
|
309 case '!': |
|
310 if (peekAheadHelper() == '=') |
|
311 return makeTokenAndAdvance(EQOP, EqTestOp::OP_NE, 2); |
|
312 return Token(XPATH_ERROR); |
|
313 case '<': |
|
314 if (peekAheadHelper() == '=') |
|
315 return makeTokenAndAdvance(RELOP, EqTestOp::OP_LE, 2); |
|
316 return makeTokenAndAdvance(RELOP, EqTestOp::OP_LT); |
|
317 case '>': |
|
318 if (peekAheadHelper() == '=') |
|
319 return makeTokenAndAdvance(RELOP, EqTestOp::OP_GE, 2); |
|
320 return makeTokenAndAdvance(RELOP, EqTestOp::OP_GT); |
|
321 case '*': |
|
322 if (isOperatorContext()) |
|
323 return makeTokenAndAdvance(MULOP, NumericOp::OP_Mul); |
|
324 ++m_nextPos; |
|
325 return Token(NAMETEST, "*"); |
|
326 case '$': { // $ QName |
|
327 m_nextPos++; |
|
328 String name; |
|
329 if (!lexQName(name)) |
|
330 return Token(XPATH_ERROR); |
|
331 return Token(VARIABLEREFERENCE, name); |
|
332 } |
|
333 } |
|
334 |
|
335 String name; |
|
336 if (!lexNCName(name)) |
|
337 return Token(XPATH_ERROR); |
|
338 |
|
339 skipWS(); |
|
340 // If we're in an operator context, check for any operator names |
|
341 if (isOperatorContext()) { |
|
342 if (name == "and") //### hash? |
|
343 return Token(AND); |
|
344 if (name == "or") |
|
345 return Token(OR); |
|
346 if (name == "mod") |
|
347 return Token(MULOP, NumericOp::OP_Mod); |
|
348 if (name == "div") |
|
349 return Token(MULOP, NumericOp::OP_Div); |
|
350 } |
|
351 |
|
352 // See whether we are at a : |
|
353 if (peekCurHelper() == ':') { |
|
354 m_nextPos++; |
|
355 // Any chance it's an axis name? |
|
356 if (peekCurHelper() == ':') { |
|
357 m_nextPos++; |
|
358 |
|
359 //It might be an axis name. |
|
360 Step::Axis axis; |
|
361 if (isAxisName(name, axis)) |
|
362 return Token(AXISNAME, axis); |
|
363 // Ugh, :: is only valid in axis names -> error |
|
364 return Token(XPATH_ERROR); |
|
365 } |
|
366 |
|
367 // Seems like this is a fully qualified qname, or perhaps the * modified one from NameTest |
|
368 skipWS(); |
|
369 if (peekCurHelper() == '*') { |
|
370 m_nextPos++; |
|
371 return Token(NAMETEST, name + ":*"); |
|
372 } |
|
373 |
|
374 // Make a full qname. |
|
375 String n2; |
|
376 if (!lexNCName(n2)) |
|
377 return Token(XPATH_ERROR); |
|
378 |
|
379 name = name + ":" + n2; |
|
380 } |
|
381 |
|
382 skipWS(); |
|
383 if (peekCurHelper() == '(') { |
|
384 //note: we don't swallow the (here! |
|
385 |
|
386 //either node type of function name |
|
387 if (isNodeTypeName(name)) { |
|
388 if (name == "processing-instruction") |
|
389 return Token(PI, name); |
|
390 |
|
391 return Token(NODETYPE, name); |
|
392 } |
|
393 //must be a function name. |
|
394 return Token(FUNCTIONNAME, name); |
|
395 } |
|
396 |
|
397 // At this point, it must be NAMETEST. |
|
398 return Token(NAMETEST, name); |
|
399 } |
|
400 |
|
401 Token Parser::nextToken() |
|
402 { |
|
403 Token toRet = nextTokenInternal(); |
|
404 m_lastTokenType = toRet.type; |
|
405 return toRet; |
|
406 } |
|
407 |
|
408 Parser::Parser() |
|
409 { |
|
410 reset(String()); |
|
411 } |
|
412 |
|
413 void Parser::reset(const String& data) |
|
414 { |
|
415 m_nextPos = 0; |
|
416 m_data = data; |
|
417 m_lastTokenType = 0; |
|
418 |
|
419 m_topExpr = 0; |
|
420 m_gotNamespaceError = false; |
|
421 } |
|
422 |
|
423 int Parser::lex(void* data) |
|
424 { |
|
425 YYSTYPE* yylval = static_cast<YYSTYPE*>(data); |
|
426 Token tok = nextToken(); |
|
427 |
|
428 switch (tok.type) { |
|
429 case AXISNAME: |
|
430 yylval->axis = tok.axis; |
|
431 break; |
|
432 case MULOP: |
|
433 yylval->numop = tok.numop; |
|
434 break; |
|
435 case RELOP: |
|
436 case EQOP: |
|
437 yylval->eqop = tok.eqop; |
|
438 break; |
|
439 case NODETYPE: |
|
440 case PI: |
|
441 case FUNCTIONNAME: |
|
442 case LITERAL: |
|
443 case VARIABLEREFERENCE: |
|
444 case NUMBER: |
|
445 case NAMETEST: |
|
446 yylval->str = new String(tok.str); |
|
447 registerString(yylval->str); |
|
448 break; |
|
449 } |
|
450 |
|
451 return tok.type; |
|
452 } |
|
453 |
|
454 bool Parser::expandQName(const String& qName, String& localName, String& namespaceURI) |
|
455 { |
|
456 int colon = qName.find(':'); |
|
457 if (colon >= 0) { |
|
458 if (!m_resolver) |
|
459 return false; |
|
460 namespaceURI = m_resolver->lookupNamespaceURI(qName.left(colon)); |
|
461 if (namespaceURI.isNull()) |
|
462 return false; |
|
463 localName = qName.substring(colon + 1); |
|
464 } else |
|
465 localName = qName; |
|
466 |
|
467 return true; |
|
468 } |
|
469 |
|
470 Expression* Parser::parseStatement(const String& statement, PassRefPtr<XPathNSResolver> resolver, ExceptionCode& ec) |
|
471 { |
|
472 reset(statement); |
|
473 |
|
474 m_resolver = resolver; |
|
475 |
|
476 Parser* oldParser = currentParser; |
|
477 currentParser = this; |
|
478 int parseError = xpathyyparse(this); |
|
479 currentParser = oldParser; |
|
480 |
|
481 if (parseError) { |
|
482 deleteAllValues(m_parseNodes); |
|
483 m_parseNodes.clear(); |
|
484 |
|
485 HashSet<Vector<Predicate*>*>::iterator pend = m_predicateVectors.end(); |
|
486 for (HashSet<Vector<Predicate*>*>::iterator it = m_predicateVectors.begin(); it != pend; ++it) { |
|
487 deleteAllValues(**it); |
|
488 delete *it; |
|
489 } |
|
490 m_predicateVectors.clear(); |
|
491 |
|
492 HashSet<Vector<Expression*>*>::iterator eend = m_expressionVectors.end(); |
|
493 for (HashSet<Vector<Expression*>*>::iterator it = m_expressionVectors.begin(); it != eend; ++it) { |
|
494 deleteAllValues(**it); |
|
495 delete *it; |
|
496 } |
|
497 m_expressionVectors.clear(); |
|
498 |
|
499 deleteAllValues(m_strings); |
|
500 m_strings.clear(); |
|
501 |
|
502 deleteAllValues(m_nodeTests); |
|
503 m_nodeTests.clear(); |
|
504 |
|
505 m_topExpr = 0; |
|
506 |
|
507 if (m_gotNamespaceError) |
|
508 ec = NAMESPACE_ERR; |
|
509 else |
|
510 ec = XPathException::INVALID_EXPRESSION_ERR; |
|
511 return 0; |
|
512 } |
|
513 |
|
514 ASSERT(m_parseNodes.size() == 1); |
|
515 ASSERT(*m_parseNodes.begin() == m_topExpr); |
|
516 ASSERT(m_expressionVectors.size() == 0); |
|
517 ASSERT(m_predicateVectors.size() == 0); |
|
518 ASSERT(m_strings.size() == 0); |
|
519 ASSERT(m_nodeTests.size() == 0); |
|
520 |
|
521 m_parseNodes.clear(); |
|
522 Expression* result = m_topExpr; |
|
523 m_topExpr = 0; |
|
524 |
|
525 return result; |
|
526 } |
|
527 |
|
528 void Parser::registerParseNode(ParseNode* node) |
|
529 { |
|
530 if (node == 0) |
|
531 return; |
|
532 |
|
533 ASSERT(!m_parseNodes.contains(node)); |
|
534 |
|
535 m_parseNodes.add(node); |
|
536 } |
|
537 |
|
538 void Parser::unregisterParseNode(ParseNode* node) |
|
539 { |
|
540 if (node == 0) |
|
541 return; |
|
542 |
|
543 ASSERT(m_parseNodes.contains(node)); |
|
544 |
|
545 m_parseNodes.remove(node); |
|
546 } |
|
547 |
|
548 void Parser::registerPredicateVector(Vector<Predicate*>* vector) |
|
549 { |
|
550 if (vector == 0) |
|
551 return; |
|
552 |
|
553 ASSERT(!m_predicateVectors.contains(vector)); |
|
554 |
|
555 m_predicateVectors.add(vector); |
|
556 } |
|
557 |
|
558 void Parser::deletePredicateVector(Vector<Predicate*>* vector) |
|
559 { |
|
560 if (vector == 0) |
|
561 return; |
|
562 |
|
563 ASSERT(m_predicateVectors.contains(vector)); |
|
564 |
|
565 m_predicateVectors.remove(vector); |
|
566 delete vector; |
|
567 } |
|
568 |
|
569 |
|
570 void Parser::registerExpressionVector(Vector<Expression*>* vector) |
|
571 { |
|
572 if (vector == 0) |
|
573 return; |
|
574 |
|
575 ASSERT(!m_expressionVectors.contains(vector)); |
|
576 |
|
577 m_expressionVectors.add(vector); |
|
578 } |
|
579 |
|
580 void Parser::deleteExpressionVector(Vector<Expression*>* vector) |
|
581 { |
|
582 if (vector == 0) |
|
583 return; |
|
584 |
|
585 ASSERT(m_expressionVectors.contains(vector)); |
|
586 |
|
587 m_expressionVectors.remove(vector); |
|
588 delete vector; |
|
589 } |
|
590 |
|
591 void Parser::registerString(String* s) |
|
592 { |
|
593 if (s == 0) |
|
594 return; |
|
595 |
|
596 ASSERT(!m_strings.contains(s)); |
|
597 |
|
598 m_strings.add(s); |
|
599 } |
|
600 |
|
601 void Parser::deleteString(String* s) |
|
602 { |
|
603 if (s == 0) |
|
604 return; |
|
605 |
|
606 ASSERT(m_strings.contains(s)); |
|
607 |
|
608 m_strings.remove(s); |
|
609 delete s; |
|
610 } |
|
611 |
|
612 void Parser::registerNodeTest(Step::NodeTest* t) |
|
613 { |
|
614 if (t == 0) |
|
615 return; |
|
616 |
|
617 ASSERT(!m_nodeTests.contains(t)); |
|
618 |
|
619 m_nodeTests.add(t); |
|
620 } |
|
621 |
|
622 void Parser::deleteNodeTest(Step::NodeTest* t) |
|
623 { |
|
624 if (t == 0) |
|
625 return; |
|
626 |
|
627 ASSERT(m_nodeTests.contains(t)); |
|
628 |
|
629 m_nodeTests.remove(t); |
|
630 delete t; |
|
631 } |
|
632 |
|
633 } |
|
634 } |
|
635 |
|
636 #endif // ENABLE(XPATH) |