|
1 /* |
|
2 * Copyright (C) 2000 Peter Kelly (pmk@post.com) |
|
3 * Copyright (C) 2005, 2006, 2008 Apple Inc. All rights reserved. |
|
4 * Copyright (C) 2006 Alexey Proskuryakov (ap@webkit.org) |
|
5 * Copyright (C) 2007 Samuel Weinig (sam@webkit.org) |
|
6 * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies) |
|
7 * Copyright (C) 2008 Holger Hans Peter Freyther |
|
8 * Copyright (C) 2008, 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/) |
|
9 * |
|
10 * This library is free software; you can redistribute it and/or |
|
11 * modify it under the terms of the GNU Library General Public |
|
12 * License as published by the Free Software Foundation; either |
|
13 * version 2 of the License, or (at your option) any later version. |
|
14 * |
|
15 * This library is distributed in the hope that it will be useful, |
|
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
18 * Library General Public License for more details. |
|
19 * |
|
20 * You should have received a copy of the GNU Library General Public License |
|
21 * along with this library; see the file COPYING.LIB. If not, write to |
|
22 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
|
23 * Boston, MA 02110-1301, USA. |
|
24 */ |
|
25 |
|
26 #include "config.h" |
|
27 #include "XMLDocumentParser.h" |
|
28 |
|
29 #include "CDATASection.h" |
|
30 #include "CachedScript.h" |
|
31 #include "Comment.h" |
|
32 #include "DocLoader.h" |
|
33 #include "Document.h" |
|
34 #include "DocumentFragment.h" |
|
35 #include "DocumentType.h" |
|
36 #include "Frame.h" |
|
37 #include "FrameLoader.h" |
|
38 #include "FrameView.h" |
|
39 #include "HTMLLinkElement.h" |
|
40 #include "HTMLStyleElement.h" |
|
41 #include "LegacyHTMLDocumentParser.h" // for decodeNamedEntity |
|
42 #include "ProcessingInstruction.h" |
|
43 #include "ResourceError.h" |
|
44 #include "ResourceHandle.h" |
|
45 #include "ResourceRequest.h" |
|
46 #include "ResourceResponse.h" |
|
47 #include "ScriptElement.h" |
|
48 #include "ScriptSourceCode.h" |
|
49 #include "ScriptValue.h" |
|
50 #include "TextResourceDecoder.h" |
|
51 #include "TransformSource.h" |
|
52 #include "XMLNSNames.h" |
|
53 #include "XMLDocumentParserScope.h" |
|
54 #include <libxml/parser.h> |
|
55 #include <libxml/parserInternals.h> |
|
56 #include <wtf/text/CString.h> |
|
57 #include <wtf/StringExtras.h> |
|
58 #include <wtf/Threading.h> |
|
59 #include <wtf/UnusedParam.h> |
|
60 #include <wtf/Vector.h> |
|
61 |
|
62 #if ENABLE(XSLT) |
|
63 #include <libxslt/xslt.h> |
|
64 #endif |
|
65 |
|
66 #if ENABLE(XHTMLMP) |
|
67 #include "HTMLNames.h" |
|
68 #include "HTMLScriptElement.h" |
|
69 #endif |
|
70 |
|
71 using namespace std; |
|
72 |
|
73 namespace WebCore { |
|
74 |
|
75 class PendingCallbacks : public Noncopyable { |
|
76 public: |
|
77 ~PendingCallbacks() |
|
78 { |
|
79 deleteAllValues(m_callbacks); |
|
80 } |
|
81 |
|
82 void appendStartElementNSCallback(const xmlChar* xmlLocalName, const xmlChar* xmlPrefix, const xmlChar* xmlURI, int nb_namespaces, |
|
83 const xmlChar** namespaces, int nb_attributes, int nb_defaulted, const xmlChar** attributes) |
|
84 { |
|
85 PendingStartElementNSCallback* callback = new PendingStartElementNSCallback; |
|
86 |
|
87 callback->xmlLocalName = xmlStrdup(xmlLocalName); |
|
88 callback->xmlPrefix = xmlStrdup(xmlPrefix); |
|
89 callback->xmlURI = xmlStrdup(xmlURI); |
|
90 callback->nb_namespaces = nb_namespaces; |
|
91 callback->namespaces = static_cast<xmlChar**>(xmlMalloc(sizeof(xmlChar*) * nb_namespaces * 2)); |
|
92 for (int i = 0; i < nb_namespaces * 2 ; i++) |
|
93 callback->namespaces[i] = xmlStrdup(namespaces[i]); |
|
94 callback->nb_attributes = nb_attributes; |
|
95 callback->nb_defaulted = nb_defaulted; |
|
96 callback->attributes = static_cast<xmlChar**>(xmlMalloc(sizeof(xmlChar*) * nb_attributes * 5)); |
|
97 for (int i = 0; i < nb_attributes; i++) { |
|
98 // Each attribute has 5 elements in the array: |
|
99 // name, prefix, uri, value and an end pointer. |
|
100 |
|
101 for (int j = 0; j < 3; j++) |
|
102 callback->attributes[i * 5 + j] = xmlStrdup(attributes[i * 5 + j]); |
|
103 |
|
104 int len = attributes[i * 5 + 4] - attributes[i * 5 + 3]; |
|
105 |
|
106 callback->attributes[i * 5 + 3] = xmlStrndup(attributes[i * 5 + 3], len); |
|
107 callback->attributes[i * 5 + 4] = callback->attributes[i * 5 + 3] + len; |
|
108 } |
|
109 |
|
110 m_callbacks.append(callback); |
|
111 } |
|
112 |
|
113 void appendEndElementNSCallback() |
|
114 { |
|
115 PendingEndElementNSCallback* callback = new PendingEndElementNSCallback; |
|
116 |
|
117 m_callbacks.append(callback); |
|
118 } |
|
119 |
|
120 void appendCharactersCallback(const xmlChar* s, int len) |
|
121 { |
|
122 PendingCharactersCallback* callback = new PendingCharactersCallback; |
|
123 |
|
124 callback->s = xmlStrndup(s, len); |
|
125 callback->len = len; |
|
126 |
|
127 m_callbacks.append(callback); |
|
128 } |
|
129 |
|
130 void appendProcessingInstructionCallback(const xmlChar* target, const xmlChar* data) |
|
131 { |
|
132 PendingProcessingInstructionCallback* callback = new PendingProcessingInstructionCallback; |
|
133 |
|
134 callback->target = xmlStrdup(target); |
|
135 callback->data = xmlStrdup(data); |
|
136 |
|
137 m_callbacks.append(callback); |
|
138 } |
|
139 |
|
140 void appendCDATABlockCallback(const xmlChar* s, int len) |
|
141 { |
|
142 PendingCDATABlockCallback* callback = new PendingCDATABlockCallback; |
|
143 |
|
144 callback->s = xmlStrndup(s, len); |
|
145 callback->len = len; |
|
146 |
|
147 m_callbacks.append(callback); |
|
148 } |
|
149 |
|
150 void appendCommentCallback(const xmlChar* s) |
|
151 { |
|
152 PendingCommentCallback* callback = new PendingCommentCallback; |
|
153 |
|
154 callback->s = xmlStrdup(s); |
|
155 |
|
156 m_callbacks.append(callback); |
|
157 } |
|
158 |
|
159 void appendInternalSubsetCallback(const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID) |
|
160 { |
|
161 PendingInternalSubsetCallback* callback = new PendingInternalSubsetCallback; |
|
162 |
|
163 callback->name = xmlStrdup(name); |
|
164 callback->externalID = xmlStrdup(externalID); |
|
165 callback->systemID = xmlStrdup(systemID); |
|
166 |
|
167 m_callbacks.append(callback); |
|
168 } |
|
169 |
|
170 void appendErrorCallback(XMLDocumentParser::ErrorType type, const xmlChar* message, int lineNumber, int columnNumber) |
|
171 { |
|
172 PendingErrorCallback* callback = new PendingErrorCallback; |
|
173 |
|
174 callback->message = xmlStrdup(message); |
|
175 callback->type = type; |
|
176 callback->lineNumber = lineNumber; |
|
177 callback->columnNumber = columnNumber; |
|
178 |
|
179 m_callbacks.append(callback); |
|
180 } |
|
181 |
|
182 void callAndRemoveFirstCallback(XMLDocumentParser* parser) |
|
183 { |
|
184 OwnPtr<PendingCallback> callback(m_callbacks.takeFirst()); |
|
185 callback->call(parser); |
|
186 } |
|
187 |
|
188 bool isEmpty() const { return m_callbacks.isEmpty(); } |
|
189 |
|
190 private: |
|
191 struct PendingCallback { |
|
192 virtual ~PendingCallback() { } |
|
193 virtual void call(XMLDocumentParser* parser) = 0; |
|
194 }; |
|
195 |
|
196 struct PendingStartElementNSCallback : public PendingCallback { |
|
197 virtual ~PendingStartElementNSCallback() |
|
198 { |
|
199 xmlFree(xmlLocalName); |
|
200 xmlFree(xmlPrefix); |
|
201 xmlFree(xmlURI); |
|
202 for (int i = 0; i < nb_namespaces * 2; i++) |
|
203 xmlFree(namespaces[i]); |
|
204 xmlFree(namespaces); |
|
205 for (int i = 0; i < nb_attributes; i++) |
|
206 for (int j = 0; j < 4; j++) |
|
207 xmlFree(attributes[i * 5 + j]); |
|
208 xmlFree(attributes); |
|
209 } |
|
210 |
|
211 virtual void call(XMLDocumentParser* parser) |
|
212 { |
|
213 parser->startElementNs(xmlLocalName, xmlPrefix, xmlURI, |
|
214 nb_namespaces, const_cast<const xmlChar**>(namespaces), |
|
215 nb_attributes, nb_defaulted, const_cast<const xmlChar**>(attributes)); |
|
216 } |
|
217 |
|
218 xmlChar* xmlLocalName; |
|
219 xmlChar* xmlPrefix; |
|
220 xmlChar* xmlURI; |
|
221 int nb_namespaces; |
|
222 xmlChar** namespaces; |
|
223 int nb_attributes; |
|
224 int nb_defaulted; |
|
225 xmlChar** attributes; |
|
226 }; |
|
227 |
|
228 struct PendingEndElementNSCallback : public PendingCallback { |
|
229 virtual void call(XMLDocumentParser* parser) |
|
230 { |
|
231 parser->endElementNs(); |
|
232 } |
|
233 }; |
|
234 |
|
235 struct PendingCharactersCallback : public PendingCallback { |
|
236 virtual ~PendingCharactersCallback() |
|
237 { |
|
238 xmlFree(s); |
|
239 } |
|
240 |
|
241 virtual void call(XMLDocumentParser* parser) |
|
242 { |
|
243 parser->characters(s, len); |
|
244 } |
|
245 |
|
246 xmlChar* s; |
|
247 int len; |
|
248 }; |
|
249 |
|
250 struct PendingProcessingInstructionCallback : public PendingCallback { |
|
251 virtual ~PendingProcessingInstructionCallback() |
|
252 { |
|
253 xmlFree(target); |
|
254 xmlFree(data); |
|
255 } |
|
256 |
|
257 virtual void call(XMLDocumentParser* parser) |
|
258 { |
|
259 parser->processingInstruction(target, data); |
|
260 } |
|
261 |
|
262 xmlChar* target; |
|
263 xmlChar* data; |
|
264 }; |
|
265 |
|
266 struct PendingCDATABlockCallback : public PendingCallback { |
|
267 virtual ~PendingCDATABlockCallback() |
|
268 { |
|
269 xmlFree(s); |
|
270 } |
|
271 |
|
272 virtual void call(XMLDocumentParser* parser) |
|
273 { |
|
274 parser->cdataBlock(s, len); |
|
275 } |
|
276 |
|
277 xmlChar* s; |
|
278 int len; |
|
279 }; |
|
280 |
|
281 struct PendingCommentCallback : public PendingCallback { |
|
282 virtual ~PendingCommentCallback() |
|
283 { |
|
284 xmlFree(s); |
|
285 } |
|
286 |
|
287 virtual void call(XMLDocumentParser* parser) |
|
288 { |
|
289 parser->comment(s); |
|
290 } |
|
291 |
|
292 xmlChar* s; |
|
293 }; |
|
294 |
|
295 struct PendingInternalSubsetCallback : public PendingCallback { |
|
296 virtual ~PendingInternalSubsetCallback() |
|
297 { |
|
298 xmlFree(name); |
|
299 xmlFree(externalID); |
|
300 xmlFree(systemID); |
|
301 } |
|
302 |
|
303 virtual void call(XMLDocumentParser* parser) |
|
304 { |
|
305 parser->internalSubset(name, externalID, systemID); |
|
306 } |
|
307 |
|
308 xmlChar* name; |
|
309 xmlChar* externalID; |
|
310 xmlChar* systemID; |
|
311 }; |
|
312 |
|
313 struct PendingErrorCallback: public PendingCallback { |
|
314 virtual ~PendingErrorCallback() |
|
315 { |
|
316 xmlFree(message); |
|
317 } |
|
318 |
|
319 virtual void call(XMLDocumentParser* parser) |
|
320 { |
|
321 parser->handleError(type, reinterpret_cast<char*>(message), lineNumber, columnNumber); |
|
322 } |
|
323 |
|
324 XMLDocumentParser::ErrorType type; |
|
325 xmlChar* message; |
|
326 int lineNumber; |
|
327 int columnNumber; |
|
328 }; |
|
329 |
|
330 Deque<PendingCallback*> m_callbacks; |
|
331 }; |
|
332 // -------------------------------- |
|
333 |
|
334 static int globalDescriptor = 0; |
|
335 static ThreadIdentifier libxmlLoaderThread = 0; |
|
336 |
|
337 static int matchFunc(const char*) |
|
338 { |
|
339 // Only match loads initiated due to uses of libxml2 from within XMLDocumentParser to avoid |
|
340 // interfering with client applications that also use libxml2. http://bugs.webkit.org/show_bug.cgi?id=17353 |
|
341 return XMLDocumentParserScope::currentDocLoader && currentThread() == libxmlLoaderThread; |
|
342 } |
|
343 |
|
344 class OffsetBuffer { |
|
345 public: |
|
346 OffsetBuffer(const Vector<char>& b) : m_buffer(b), m_currentOffset(0) { } |
|
347 |
|
348 int readOutBytes(char* outputBuffer, unsigned askedToRead) |
|
349 { |
|
350 unsigned bytesLeft = m_buffer.size() - m_currentOffset; |
|
351 unsigned lenToCopy = min(askedToRead, bytesLeft); |
|
352 if (lenToCopy) { |
|
353 memcpy(outputBuffer, m_buffer.data() + m_currentOffset, lenToCopy); |
|
354 m_currentOffset += lenToCopy; |
|
355 } |
|
356 return lenToCopy; |
|
357 } |
|
358 |
|
359 private: |
|
360 Vector<char> m_buffer; |
|
361 unsigned m_currentOffset; |
|
362 }; |
|
363 |
|
364 static bool shouldAllowExternalLoad(const KURL& url) |
|
365 { |
|
366 String urlString = url.string(); |
|
367 |
|
368 // On non-Windows platforms libxml asks for this URL, the |
|
369 // "XML_XML_DEFAULT_CATALOG", on initialization. |
|
370 if (urlString == "file:///etc/xml/catalog") |
|
371 return false; |
|
372 |
|
373 // On Windows, libxml computes a URL relative to where its DLL resides. |
|
374 if (urlString.startsWith("file:///", false) && urlString.endsWith("/etc/catalog", false)) |
|
375 return false; |
|
376 |
|
377 // The most common DTD. There isn't much point in hammering www.w3c.org |
|
378 // by requesting this URL for every XHTML document. |
|
379 if (urlString.startsWith("http://www.w3.org/TR/xhtml", false)) |
|
380 return false; |
|
381 |
|
382 // Similarly, there isn't much point in requesting the SVG DTD. |
|
383 if (urlString.startsWith("http://www.w3.org/Graphics/SVG", false)) |
|
384 return false; |
|
385 |
|
386 // The libxml doesn't give us a lot of context for deciding whether to |
|
387 // allow this request. In the worst case, this load could be for an |
|
388 // external entity and the resulting document could simply read the |
|
389 // retrieved content. If we had more context, we could potentially allow |
|
390 // the parser to load a DTD. As things stand, we take the conservative |
|
391 // route and allow same-origin requests only. |
|
392 if (!XMLDocumentParserScope::currentDocLoader->doc()->securityOrigin()->canRequest(url)) { |
|
393 XMLDocumentParserScope::currentDocLoader->printAccessDeniedMessage(url); |
|
394 return false; |
|
395 } |
|
396 |
|
397 return true; |
|
398 } |
|
399 |
|
400 static void* openFunc(const char* uri) |
|
401 { |
|
402 ASSERT(XMLDocumentParserScope::currentDocLoader); |
|
403 ASSERT(currentThread() == libxmlLoaderThread); |
|
404 |
|
405 KURL url(KURL(), uri); |
|
406 |
|
407 if (!shouldAllowExternalLoad(url)) |
|
408 return &globalDescriptor; |
|
409 |
|
410 ResourceError error; |
|
411 ResourceResponse response; |
|
412 Vector<char> data; |
|
413 |
|
414 |
|
415 { |
|
416 DocLoader* docLoader = XMLDocumentParserScope::currentDocLoader; |
|
417 XMLDocumentParserScope scope(0); |
|
418 // FIXME: We should restore the original global error handler as well. |
|
419 |
|
420 if (docLoader->frame()) |
|
421 docLoader->frame()->loader()->loadResourceSynchronously(url, AllowStoredCredentials, error, response, data); |
|
422 } |
|
423 |
|
424 // We have to check the URL again after the load to catch redirects. |
|
425 // See <https://bugs.webkit.org/show_bug.cgi?id=21963>. |
|
426 if (!shouldAllowExternalLoad(response.url())) |
|
427 return &globalDescriptor; |
|
428 |
|
429 return new OffsetBuffer(data); |
|
430 } |
|
431 |
|
432 static int readFunc(void* context, char* buffer, int len) |
|
433 { |
|
434 // Do 0-byte reads in case of a null descriptor |
|
435 if (context == &globalDescriptor) |
|
436 return 0; |
|
437 |
|
438 OffsetBuffer* data = static_cast<OffsetBuffer*>(context); |
|
439 return data->readOutBytes(buffer, len); |
|
440 } |
|
441 |
|
442 static int writeFunc(void*, const char*, int) |
|
443 { |
|
444 // Always just do 0-byte writes |
|
445 return 0; |
|
446 } |
|
447 |
|
448 static int closeFunc(void* context) |
|
449 { |
|
450 if (context != &globalDescriptor) { |
|
451 OffsetBuffer* data = static_cast<OffsetBuffer*>(context); |
|
452 delete data; |
|
453 } |
|
454 return 0; |
|
455 } |
|
456 |
|
457 #if ENABLE(XSLT) |
|
458 static void errorFunc(void*, const char*, ...) |
|
459 { |
|
460 // FIXME: It would be nice to display error messages somewhere. |
|
461 } |
|
462 #endif |
|
463 |
|
464 static bool didInit = false; |
|
465 |
|
466 PassRefPtr<XMLParserContext> XMLParserContext::createStringParser(xmlSAXHandlerPtr handlers, void* userData) |
|
467 { |
|
468 if (!didInit) { |
|
469 xmlInitParser(); |
|
470 xmlRegisterInputCallbacks(matchFunc, openFunc, readFunc, closeFunc); |
|
471 xmlRegisterOutputCallbacks(matchFunc, openFunc, writeFunc, closeFunc); |
|
472 libxmlLoaderThread = currentThread(); |
|
473 didInit = true; |
|
474 } |
|
475 |
|
476 xmlParserCtxtPtr parser = xmlCreatePushParserCtxt(handlers, 0, 0, 0, 0); |
|
477 parser->_private = userData; |
|
478 parser->replaceEntities = true; |
|
479 const UChar BOM = 0xFEFF; |
|
480 const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM); |
|
481 xmlSwitchEncoding(parser, BOMHighByte == 0xFF ? XML_CHAR_ENCODING_UTF16LE : XML_CHAR_ENCODING_UTF16BE); |
|
482 |
|
483 return adoptRef(new XMLParserContext(parser)); |
|
484 } |
|
485 |
|
486 |
|
487 // Chunk should be encoded in UTF-8 |
|
488 PassRefPtr<XMLParserContext> XMLParserContext::createMemoryParser(xmlSAXHandlerPtr handlers, void* userData, const char* chunk) |
|
489 { |
|
490 if (!didInit) { |
|
491 xmlInitParser(); |
|
492 xmlRegisterInputCallbacks(matchFunc, openFunc, readFunc, closeFunc); |
|
493 xmlRegisterOutputCallbacks(matchFunc, openFunc, writeFunc, closeFunc); |
|
494 libxmlLoaderThread = currentThread(); |
|
495 didInit = true; |
|
496 } |
|
497 |
|
498 xmlParserCtxtPtr parser = xmlCreateMemoryParserCtxt(chunk, xmlStrlen((const xmlChar*)chunk)); |
|
499 |
|
500 if (!parser) |
|
501 return 0; |
|
502 |
|
503 // Copy the sax handler |
|
504 memcpy(parser->sax, handlers, sizeof(xmlSAXHandler)); |
|
505 |
|
506 // Set parser options. |
|
507 // XML_PARSE_NODICT: default dictionary option. |
|
508 // XML_PARSE_NOENT: force entities substitutions. |
|
509 xmlCtxtUseOptions(parser, XML_PARSE_NODICT | XML_PARSE_NOENT); |
|
510 |
|
511 // Internal initialization |
|
512 parser->sax2 = 1; |
|
513 parser->instate = XML_PARSER_CONTENT; // We are parsing a CONTENT |
|
514 parser->depth = 0; |
|
515 parser->str_xml = xmlDictLookup(parser->dict, BAD_CAST "xml", 3); |
|
516 parser->str_xmlns = xmlDictLookup(parser->dict, BAD_CAST "xmlns", 5); |
|
517 parser->str_xml_ns = xmlDictLookup(parser->dict, XML_XML_NAMESPACE, 36); |
|
518 parser->_private = userData; |
|
519 |
|
520 return adoptRef(new XMLParserContext(parser)); |
|
521 } |
|
522 |
|
523 // -------------------------------- |
|
524 |
|
525 XMLDocumentParser::XMLDocumentParser(Document* document, FrameView* frameView) |
|
526 : ScriptableDocumentParser(document) |
|
527 , m_view(frameView) |
|
528 , m_context(0) |
|
529 , m_pendingCallbacks(new PendingCallbacks) |
|
530 , m_currentNode(document) |
|
531 , m_sawError(false) |
|
532 , m_sawXSLTransform(false) |
|
533 , m_sawFirstElement(false) |
|
534 , m_isXHTMLDocument(false) |
|
535 #if ENABLE(XHTMLMP) |
|
536 , m_isXHTMLMPDocument(false) |
|
537 , m_hasDocTypeDeclaration(false) |
|
538 #endif |
|
539 , m_parserPaused(false) |
|
540 , m_requestingScript(false) |
|
541 , m_finishCalled(false) |
|
542 , m_errorCount(0) |
|
543 , m_lastErrorLine(0) |
|
544 , m_lastErrorColumn(0) |
|
545 , m_pendingScript(0) |
|
546 , m_scriptStartLine(0) |
|
547 , m_parsingFragment(false) |
|
548 , m_scriptingPermission(FragmentScriptingAllowed) |
|
549 { |
|
550 } |
|
551 |
|
552 XMLDocumentParser::XMLDocumentParser(DocumentFragment* fragment, Element* parentElement, FragmentScriptingPermission scriptingPermission) |
|
553 : ScriptableDocumentParser(fragment->document()) |
|
554 , m_view(0) |
|
555 , m_context(0) |
|
556 , m_pendingCallbacks(new PendingCallbacks) |
|
557 , m_currentNode(fragment) |
|
558 , m_sawError(false) |
|
559 , m_sawXSLTransform(false) |
|
560 , m_sawFirstElement(false) |
|
561 , m_isXHTMLDocument(false) |
|
562 #if ENABLE(XHTMLMP) |
|
563 , m_isXHTMLMPDocument(false) |
|
564 , m_hasDocTypeDeclaration(false) |
|
565 #endif |
|
566 , m_parserPaused(false) |
|
567 , m_requestingScript(false) |
|
568 , m_finishCalled(false) |
|
569 , m_errorCount(0) |
|
570 , m_lastErrorLine(0) |
|
571 , m_lastErrorColumn(0) |
|
572 , m_pendingScript(0) |
|
573 , m_scriptStartLine(0) |
|
574 , m_parsingFragment(true) |
|
575 , m_scriptingPermission(scriptingPermission) |
|
576 { |
|
577 fragment->ref(); |
|
578 |
|
579 // Add namespaces based on the parent node |
|
580 Vector<Element*> elemStack; |
|
581 while (parentElement) { |
|
582 elemStack.append(parentElement); |
|
583 |
|
584 Node* n = parentElement->parentNode(); |
|
585 if (!n || !n->isElementNode()) |
|
586 break; |
|
587 parentElement = static_cast<Element*>(n); |
|
588 } |
|
589 |
|
590 if (elemStack.isEmpty()) |
|
591 return; |
|
592 |
|
593 for (Element* element = elemStack.last(); !elemStack.isEmpty(); elemStack.removeLast()) { |
|
594 if (NamedNodeMap* attrs = element->attributes()) { |
|
595 for (unsigned i = 0; i < attrs->length(); i++) { |
|
596 Attribute* attr = attrs->attributeItem(i); |
|
597 if (attr->localName() == xmlnsAtom) |
|
598 m_defaultNamespaceURI = attr->value(); |
|
599 else if (attr->prefix() == xmlnsAtom) |
|
600 m_prefixToNamespaceMap.set(attr->localName(), attr->value()); |
|
601 } |
|
602 } |
|
603 } |
|
604 |
|
605 // If the parent element is not in document tree, there may be no xmlns attribute; just default to the parent's namespace. |
|
606 if (m_defaultNamespaceURI.isNull() && !parentElement->inDocument()) |
|
607 m_defaultNamespaceURI = parentElement->namespaceURI(); |
|
608 } |
|
609 |
|
610 XMLParserContext::~XMLParserContext() |
|
611 { |
|
612 if (m_context->myDoc) |
|
613 xmlFreeDoc(m_context->myDoc); |
|
614 xmlFreeParserCtxt(m_context); |
|
615 } |
|
616 |
|
617 XMLDocumentParser::~XMLDocumentParser() |
|
618 { |
|
619 clearCurrentNodeStack(); |
|
620 if (m_pendingScript) |
|
621 m_pendingScript->removeClient(this); |
|
622 } |
|
623 |
|
624 void XMLDocumentParser::doWrite(const String& parseString) |
|
625 { |
|
626 if (!m_context) |
|
627 initializeParserContext(); |
|
628 |
|
629 // Protect the libxml context from deletion during a callback |
|
630 RefPtr<XMLParserContext> context = m_context; |
|
631 |
|
632 // libXML throws an error if you try to switch the encoding for an empty string. |
|
633 if (parseString.length()) { |
|
634 // Hack around libxml2's lack of encoding overide support by manually |
|
635 // resetting the encoding to UTF-16 before every chunk. Otherwise libxml |
|
636 // will detect <?xml version="1.0" encoding="<encoding name>"?> blocks |
|
637 // and switch encodings, causing the parse to fail. |
|
638 const UChar BOM = 0xFEFF; |
|
639 const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM); |
|
640 xmlSwitchEncoding(context->context(), BOMHighByte == 0xFF ? XML_CHAR_ENCODING_UTF16LE : XML_CHAR_ENCODING_UTF16BE); |
|
641 |
|
642 XMLDocumentParserScope scope(document()->docLoader()); |
|
643 xmlParseChunk(context->context(), reinterpret_cast<const char*>(parseString.characters()), sizeof(UChar) * parseString.length(), 0); |
|
644 } |
|
645 |
|
646 if (document()->decoder() && document()->decoder()->sawError()) { |
|
647 // If the decoder saw an error, report it as fatal (stops parsing) |
|
648 handleError(fatal, "Encoding error", context->context()->input->line, context->context()->input->col); |
|
649 } |
|
650 |
|
651 return; |
|
652 } |
|
653 |
|
654 static inline String toString(const xmlChar* str, unsigned len) |
|
655 { |
|
656 return UTF8Encoding().decode(reinterpret_cast<const char*>(str), len); |
|
657 } |
|
658 |
|
659 static inline String toString(const xmlChar* str) |
|
660 { |
|
661 if (!str) |
|
662 return String(); |
|
663 |
|
664 return UTF8Encoding().decode(reinterpret_cast<const char*>(str), strlen(reinterpret_cast<const char*>(str))); |
|
665 } |
|
666 |
|
667 struct _xmlSAX2Namespace { |
|
668 const xmlChar* prefix; |
|
669 const xmlChar* uri; |
|
670 }; |
|
671 typedef struct _xmlSAX2Namespace xmlSAX2Namespace; |
|
672 |
|
673 static inline void handleElementNamespaces(Element* newElement, const xmlChar** libxmlNamespaces, int nb_namespaces, ExceptionCode& ec, FragmentScriptingPermission scriptingPermission) |
|
674 { |
|
675 xmlSAX2Namespace* namespaces = reinterpret_cast<xmlSAX2Namespace*>(libxmlNamespaces); |
|
676 for (int i = 0; i < nb_namespaces; i++) { |
|
677 AtomicString namespaceQName = xmlnsAtom; |
|
678 String namespaceURI = toString(namespaces[i].uri); |
|
679 if (namespaces[i].prefix) |
|
680 namespaceQName = "xmlns:" + toString(namespaces[i].prefix); |
|
681 newElement->setAttributeNS(XMLNSNames::xmlnsNamespaceURI, namespaceQName, namespaceURI, ec, scriptingPermission); |
|
682 if (ec) // exception setting attributes |
|
683 return; |
|
684 } |
|
685 } |
|
686 |
|
687 struct _xmlSAX2Attributes { |
|
688 const xmlChar* localname; |
|
689 const xmlChar* prefix; |
|
690 const xmlChar* uri; |
|
691 const xmlChar* value; |
|
692 const xmlChar* end; |
|
693 }; |
|
694 typedef struct _xmlSAX2Attributes xmlSAX2Attributes; |
|
695 |
|
696 static inline void handleElementAttributes(Element* newElement, const xmlChar** libxmlAttributes, int nb_attributes, ExceptionCode& ec, FragmentScriptingPermission scriptingPermission) |
|
697 { |
|
698 xmlSAX2Attributes* attributes = reinterpret_cast<xmlSAX2Attributes*>(libxmlAttributes); |
|
699 for (int i = 0; i < nb_attributes; i++) { |
|
700 String attrLocalName = toString(attributes[i].localname); |
|
701 int valueLength = (int) (attributes[i].end - attributes[i].value); |
|
702 String attrValue = toString(attributes[i].value, valueLength); |
|
703 String attrPrefix = toString(attributes[i].prefix); |
|
704 String attrURI = attrPrefix.isEmpty() ? String() : toString(attributes[i].uri); |
|
705 String attrQName = attrPrefix.isEmpty() ? attrLocalName : attrPrefix + ":" + attrLocalName; |
|
706 |
|
707 newElement->setAttributeNS(attrURI, attrQName, attrValue, ec, scriptingPermission); |
|
708 if (ec) // exception setting attributes |
|
709 return; |
|
710 } |
|
711 } |
|
712 |
|
713 void XMLDocumentParser::startElementNs(const xmlChar* xmlLocalName, const xmlChar* xmlPrefix, const xmlChar* xmlURI, int nb_namespaces, |
|
714 const xmlChar** libxmlNamespaces, int nb_attributes, int nb_defaulted, const xmlChar** libxmlAttributes) |
|
715 { |
|
716 if (m_parserStopped) |
|
717 return; |
|
718 |
|
719 if (m_parserPaused) { |
|
720 m_pendingCallbacks->appendStartElementNSCallback(xmlLocalName, xmlPrefix, xmlURI, nb_namespaces, libxmlNamespaces, |
|
721 nb_attributes, nb_defaulted, libxmlAttributes); |
|
722 return; |
|
723 } |
|
724 |
|
725 #if ENABLE(XHTMLMP) |
|
726 // check if the DOCTYPE Declaration of XHTMLMP document exists |
|
727 if (!m_hasDocTypeDeclaration && document()->isXHTMLMPDocument()) { |
|
728 handleError(fatal, "DOCTYPE declaration lost.", lineNumber(), columnNumber()); |
|
729 return; |
|
730 } |
|
731 #endif |
|
732 |
|
733 exitText(); |
|
734 |
|
735 String localName = toString(xmlLocalName); |
|
736 String uri = toString(xmlURI); |
|
737 String prefix = toString(xmlPrefix); |
|
738 |
|
739 if (m_parsingFragment && uri.isNull()) { |
|
740 if (!prefix.isNull()) |
|
741 uri = m_prefixToNamespaceMap.get(prefix); |
|
742 else |
|
743 uri = m_defaultNamespaceURI; |
|
744 } |
|
745 |
|
746 #if ENABLE(XHTMLMP) |
|
747 if (!m_sawFirstElement && isXHTMLMPDocument()) { |
|
748 // As per the section 7.1 of OMA-WAP-XHTMLMP-V1_1-20061020-A.pdf, |
|
749 // we should make sure that the root element MUST be 'html' and |
|
750 // ensure the name of the default namespace on the root elment 'html' |
|
751 // MUST be 'http://www.w3.org/1999/xhtml' |
|
752 if (localName != HTMLNames::htmlTag.localName()) { |
|
753 handleError(fatal, "XHTMLMP document expects 'html' as root element.", lineNumber(), columnNumber()); |
|
754 return; |
|
755 } |
|
756 |
|
757 if (uri.isNull()) { |
|
758 m_defaultNamespaceURI = HTMLNames::xhtmlNamespaceURI; |
|
759 uri = m_defaultNamespaceURI; |
|
760 } |
|
761 } |
|
762 #endif |
|
763 |
|
764 bool isFirstElement = !m_sawFirstElement; |
|
765 m_sawFirstElement = true; |
|
766 |
|
767 QualifiedName qName(prefix, localName, uri); |
|
768 RefPtr<Element> newElement = document()->createElement(qName, true); |
|
769 if (!newElement) { |
|
770 stopParsing(); |
|
771 return; |
|
772 } |
|
773 |
|
774 ExceptionCode ec = 0; |
|
775 handleElementNamespaces(newElement.get(), libxmlNamespaces, nb_namespaces, ec, m_scriptingPermission); |
|
776 if (ec) { |
|
777 stopParsing(); |
|
778 return; |
|
779 } |
|
780 |
|
781 handleElementAttributes(newElement.get(), libxmlAttributes, nb_attributes, ec, m_scriptingPermission); |
|
782 if (ec) { |
|
783 stopParsing(); |
|
784 return; |
|
785 } |
|
786 |
|
787 newElement->beginParsingChildren(); |
|
788 |
|
789 ScriptElement* scriptElement = toScriptElement(newElement.get()); |
|
790 if (scriptElement) |
|
791 m_scriptStartLine = lineNumber(); |
|
792 |
|
793 if (!m_currentNode->legacyParserAddChild(newElement.get())) { |
|
794 stopParsing(); |
|
795 return; |
|
796 } |
|
797 |
|
798 pushCurrentNode(newElement.get()); |
|
799 if (m_view && !newElement->attached()) |
|
800 newElement->attach(); |
|
801 |
|
802 if (!m_parsingFragment && isFirstElement && document()->frame()) |
|
803 document()->frame()->loader()->dispatchDocumentElementAvailable(); |
|
804 } |
|
805 |
|
806 void XMLDocumentParser::endElementNs() |
|
807 { |
|
808 if (m_parserStopped) |
|
809 return; |
|
810 |
|
811 if (m_parserPaused) { |
|
812 m_pendingCallbacks->appendEndElementNSCallback(); |
|
813 return; |
|
814 } |
|
815 |
|
816 exitText(); |
|
817 |
|
818 Node* n = m_currentNode; |
|
819 n->finishParsingChildren(); |
|
820 |
|
821 if (m_scriptingPermission == FragmentScriptingNotAllowed && n->isElementNode() && toScriptElement(static_cast<Element*>(n))) { |
|
822 popCurrentNode(); |
|
823 ExceptionCode ec; |
|
824 n->remove(ec); |
|
825 return; |
|
826 } |
|
827 |
|
828 if (!n->isElementNode() || !m_view) { |
|
829 popCurrentNode(); |
|
830 return; |
|
831 } |
|
832 |
|
833 Element* element = static_cast<Element*>(n); |
|
834 |
|
835 // The element's parent may have already been removed from document. |
|
836 // Parsing continues in this case, but scripts aren't executed. |
|
837 if (!element->inDocument()) { |
|
838 popCurrentNode(); |
|
839 return; |
|
840 } |
|
841 |
|
842 ScriptElement* scriptElement = toScriptElement(element); |
|
843 if (!scriptElement) { |
|
844 popCurrentNode(); |
|
845 return; |
|
846 } |
|
847 |
|
848 // Don't load external scripts for standalone documents (for now). |
|
849 ASSERT(!m_pendingScript); |
|
850 m_requestingScript = true; |
|
851 |
|
852 #if ENABLE(XHTMLMP) |
|
853 if (!scriptElement->shouldExecuteAsJavaScript()) |
|
854 document()->setShouldProcessNoscriptElement(true); |
|
855 else |
|
856 #endif |
|
857 { |
|
858 String scriptHref = scriptElement->sourceAttributeValue(); |
|
859 if (!scriptHref.isEmpty()) { |
|
860 // we have a src attribute |
|
861 String scriptCharset = scriptElement->scriptCharset(); |
|
862 if (element->dispatchBeforeLoadEvent(scriptHref) && |
|
863 (m_pendingScript = document()->docLoader()->requestScript(scriptHref, scriptCharset))) { |
|
864 m_scriptElement = element; |
|
865 m_pendingScript->addClient(this); |
|
866 |
|
867 // m_pendingScript will be 0 if script was already loaded and ref() executed it |
|
868 if (m_pendingScript) |
|
869 pauseParsing(); |
|
870 } else |
|
871 m_scriptElement = 0; |
|
872 } else |
|
873 m_view->frame()->script()->executeScript(ScriptSourceCode(scriptElement->scriptContent(), document()->url(), m_scriptStartLine)); |
|
874 } |
|
875 m_requestingScript = false; |
|
876 popCurrentNode(); |
|
877 } |
|
878 |
|
879 void XMLDocumentParser::characters(const xmlChar* s, int len) |
|
880 { |
|
881 if (m_parserStopped) |
|
882 return; |
|
883 |
|
884 if (m_parserPaused) { |
|
885 m_pendingCallbacks->appendCharactersCallback(s, len); |
|
886 return; |
|
887 } |
|
888 |
|
889 if (m_currentNode->isTextNode() || enterText()) |
|
890 m_bufferedText.append(s, len); |
|
891 } |
|
892 |
|
893 void XMLDocumentParser::error(ErrorType type, const char* message, va_list args) |
|
894 { |
|
895 if (m_parserStopped) |
|
896 return; |
|
897 |
|
898 #if COMPILER(MSVC) || COMPILER(RVCT) |
|
899 char m[1024]; |
|
900 vsnprintf(m, sizeof(m) - 1, message, args); |
|
901 #else |
|
902 char* m; |
|
903 if (vasprintf(&m, message, args) == -1) |
|
904 return; |
|
905 #endif |
|
906 |
|
907 if (m_parserPaused) |
|
908 m_pendingCallbacks->appendErrorCallback(type, reinterpret_cast<const xmlChar*>(m), lineNumber(), columnNumber()); |
|
909 else |
|
910 handleError(type, m, lineNumber(), columnNumber()); |
|
911 |
|
912 #if !COMPILER(MSVC) && !COMPILER(RVCT) |
|
913 free(m); |
|
914 #endif |
|
915 } |
|
916 |
|
917 void XMLDocumentParser::processingInstruction(const xmlChar* target, const xmlChar* data) |
|
918 { |
|
919 if (m_parserStopped) |
|
920 return; |
|
921 |
|
922 if (m_parserPaused) { |
|
923 m_pendingCallbacks->appendProcessingInstructionCallback(target, data); |
|
924 return; |
|
925 } |
|
926 |
|
927 exitText(); |
|
928 |
|
929 // ### handle exceptions |
|
930 int exception = 0; |
|
931 RefPtr<ProcessingInstruction> pi = document()->createProcessingInstruction( |
|
932 toString(target), toString(data), exception); |
|
933 if (exception) |
|
934 return; |
|
935 |
|
936 pi->setCreatedByParser(true); |
|
937 |
|
938 if (!m_currentNode->legacyParserAddChild(pi.get())) |
|
939 return; |
|
940 if (m_view && !pi->attached()) |
|
941 pi->attach(); |
|
942 |
|
943 pi->finishParsingChildren(); |
|
944 |
|
945 #if ENABLE(XSLT) |
|
946 m_sawXSLTransform = !m_sawFirstElement && pi->isXSL(); |
|
947 if (m_sawXSLTransform && !document()->transformSourceDocument()) |
|
948 stopParsing(); |
|
949 #endif |
|
950 } |
|
951 |
|
952 void XMLDocumentParser::cdataBlock(const xmlChar* s, int len) |
|
953 { |
|
954 if (m_parserStopped) |
|
955 return; |
|
956 |
|
957 if (m_parserPaused) { |
|
958 m_pendingCallbacks->appendCDATABlockCallback(s, len); |
|
959 return; |
|
960 } |
|
961 |
|
962 exitText(); |
|
963 |
|
964 RefPtr<Node> newNode = CDATASection::create(document(), toString(s, len)); |
|
965 if (!m_currentNode->legacyParserAddChild(newNode.get())) |
|
966 return; |
|
967 if (m_view && !newNode->attached()) |
|
968 newNode->attach(); |
|
969 } |
|
970 |
|
971 void XMLDocumentParser::comment(const xmlChar* s) |
|
972 { |
|
973 if (m_parserStopped) |
|
974 return; |
|
975 |
|
976 if (m_parserPaused) { |
|
977 m_pendingCallbacks->appendCommentCallback(s); |
|
978 return; |
|
979 } |
|
980 |
|
981 exitText(); |
|
982 |
|
983 RefPtr<Node> newNode = Comment::create(document(), toString(s)); |
|
984 m_currentNode->legacyParserAddChild(newNode.get()); |
|
985 if (m_view && !newNode->attached()) |
|
986 newNode->attach(); |
|
987 } |
|
988 |
|
989 void XMLDocumentParser::startDocument(const xmlChar* version, const xmlChar* encoding, int standalone) |
|
990 { |
|
991 ExceptionCode ec = 0; |
|
992 |
|
993 if (version) |
|
994 document()->setXMLVersion(toString(version), ec); |
|
995 document()->setXMLStandalone(standalone == 1, ec); // possible values are 0, 1, and -1 |
|
996 if (encoding) |
|
997 document()->setXMLEncoding(toString(encoding)); |
|
998 } |
|
999 |
|
1000 void XMLDocumentParser::endDocument() |
|
1001 { |
|
1002 exitText(); |
|
1003 #if ENABLE(XHTMLMP) |
|
1004 m_hasDocTypeDeclaration = false; |
|
1005 #endif |
|
1006 } |
|
1007 |
|
1008 void XMLDocumentParser::internalSubset(const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID) |
|
1009 { |
|
1010 if (m_parserStopped) |
|
1011 return; |
|
1012 |
|
1013 if (m_parserPaused) { |
|
1014 m_pendingCallbacks->appendInternalSubsetCallback(name, externalID, systemID); |
|
1015 return; |
|
1016 } |
|
1017 |
|
1018 if (document()) { |
|
1019 #if ENABLE(WML) || ENABLE(XHTMLMP) |
|
1020 String extId = toString(externalID); |
|
1021 #endif |
|
1022 #if ENABLE(WML) |
|
1023 if (isWMLDocument() |
|
1024 && extId != "-//WAPFORUM//DTD WML 1.3//EN" |
|
1025 && extId != "-//WAPFORUM//DTD WML 1.2//EN" |
|
1026 && extId != "-//WAPFORUM//DTD WML 1.1//EN" |
|
1027 && extId != "-//WAPFORUM//DTD WML 1.0//EN") |
|
1028 handleError(fatal, "Invalid DTD Public ID", lineNumber(), columnNumber()); |
|
1029 #endif |
|
1030 #if ENABLE(XHTMLMP) |
|
1031 String dtdName = toString(name); |
|
1032 if (extId == "-//WAPFORUM//DTD XHTML Mobile 1.0//EN" |
|
1033 || extId == "-//WAPFORUM//DTD XHTML Mobile 1.1//EN") { |
|
1034 if (dtdName != HTMLNames::htmlTag.localName()) { |
|
1035 handleError(fatal, "Invalid DOCTYPE declaration, expected 'html' as root element.", lineNumber(), columnNumber()); |
|
1036 return; |
|
1037 } |
|
1038 |
|
1039 if (document()->isXHTMLMPDocument()) |
|
1040 setIsXHTMLMPDocument(true); |
|
1041 else |
|
1042 setIsXHTMLDocument(true); |
|
1043 |
|
1044 m_hasDocTypeDeclaration = true; |
|
1045 } |
|
1046 #endif |
|
1047 |
|
1048 document()->legacyParserAddChild(DocumentType::create(document(), toString(name), toString(externalID), toString(systemID))); |
|
1049 } |
|
1050 } |
|
1051 |
|
1052 static inline XMLDocumentParser* getParser(void* closure) |
|
1053 { |
|
1054 xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure); |
|
1055 return static_cast<XMLDocumentParser*>(ctxt->_private); |
|
1056 } |
|
1057 |
|
1058 // This is a hack around http://bugzilla.gnome.org/show_bug.cgi?id=159219 |
|
1059 // Otherwise libxml seems to call all the SAX callbacks twice for any replaced entity. |
|
1060 static inline bool hackAroundLibXMLEntityBug(void* closure) |
|
1061 { |
|
1062 #if LIBXML_VERSION >= 20627 |
|
1063 UNUSED_PARAM(closure); |
|
1064 |
|
1065 // This bug has been fixed in libxml 2.6.27. |
|
1066 return false; |
|
1067 #else |
|
1068 return static_cast<xmlParserCtxtPtr>(closure)->node; |
|
1069 #endif |
|
1070 } |
|
1071 |
|
1072 static void startElementNsHandler(void* closure, const xmlChar* localname, const xmlChar* prefix, const xmlChar* uri, int nb_namespaces, const xmlChar** namespaces, int nb_attributes, int nb_defaulted, const xmlChar** libxmlAttributes) |
|
1073 { |
|
1074 if (hackAroundLibXMLEntityBug(closure)) |
|
1075 return; |
|
1076 |
|
1077 getParser(closure)->startElementNs(localname, prefix, uri, nb_namespaces, namespaces, nb_attributes, nb_defaulted, libxmlAttributes); |
|
1078 } |
|
1079 |
|
1080 static void endElementNsHandler(void* closure, const xmlChar*, const xmlChar*, const xmlChar*) |
|
1081 { |
|
1082 if (hackAroundLibXMLEntityBug(closure)) |
|
1083 return; |
|
1084 |
|
1085 getParser(closure)->endElementNs(); |
|
1086 } |
|
1087 |
|
1088 static void charactersHandler(void* closure, const xmlChar* s, int len) |
|
1089 { |
|
1090 if (hackAroundLibXMLEntityBug(closure)) |
|
1091 return; |
|
1092 |
|
1093 getParser(closure)->characters(s, len); |
|
1094 } |
|
1095 |
|
1096 static void processingInstructionHandler(void* closure, const xmlChar* target, const xmlChar* data) |
|
1097 { |
|
1098 if (hackAroundLibXMLEntityBug(closure)) |
|
1099 return; |
|
1100 |
|
1101 getParser(closure)->processingInstruction(target, data); |
|
1102 } |
|
1103 |
|
1104 static void cdataBlockHandler(void* closure, const xmlChar* s, int len) |
|
1105 { |
|
1106 if (hackAroundLibXMLEntityBug(closure)) |
|
1107 return; |
|
1108 |
|
1109 getParser(closure)->cdataBlock(s, len); |
|
1110 } |
|
1111 |
|
1112 static void commentHandler(void* closure, const xmlChar* comment) |
|
1113 { |
|
1114 if (hackAroundLibXMLEntityBug(closure)) |
|
1115 return; |
|
1116 |
|
1117 getParser(closure)->comment(comment); |
|
1118 } |
|
1119 |
|
1120 WTF_ATTRIBUTE_PRINTF(2, 3) |
|
1121 static void warningHandler(void* closure, const char* message, ...) |
|
1122 { |
|
1123 va_list args; |
|
1124 va_start(args, message); |
|
1125 getParser(closure)->error(XMLDocumentParser::warning, message, args); |
|
1126 va_end(args); |
|
1127 } |
|
1128 |
|
1129 WTF_ATTRIBUTE_PRINTF(2, 3) |
|
1130 static void fatalErrorHandler(void* closure, const char* message, ...) |
|
1131 { |
|
1132 va_list args; |
|
1133 va_start(args, message); |
|
1134 getParser(closure)->error(XMLDocumentParser::fatal, message, args); |
|
1135 va_end(args); |
|
1136 } |
|
1137 |
|
1138 WTF_ATTRIBUTE_PRINTF(2, 3) |
|
1139 static void normalErrorHandler(void* closure, const char* message, ...) |
|
1140 { |
|
1141 va_list args; |
|
1142 va_start(args, message); |
|
1143 getParser(closure)->error(XMLDocumentParser::nonFatal, message, args); |
|
1144 va_end(args); |
|
1145 } |
|
1146 |
|
1147 // Using a static entity and marking it XML_INTERNAL_PREDEFINED_ENTITY is |
|
1148 // a hack to avoid malloc/free. Using a global variable like this could cause trouble |
|
1149 // if libxml implementation details were to change |
|
1150 static xmlChar sharedXHTMLEntityResult[5] = {0, 0, 0, 0, 0}; |
|
1151 |
|
1152 static xmlEntityPtr sharedXHTMLEntity() |
|
1153 { |
|
1154 static xmlEntity entity; |
|
1155 if (!entity.type) { |
|
1156 entity.type = XML_ENTITY_DECL; |
|
1157 entity.orig = sharedXHTMLEntityResult; |
|
1158 entity.content = sharedXHTMLEntityResult; |
|
1159 entity.etype = XML_INTERNAL_PREDEFINED_ENTITY; |
|
1160 } |
|
1161 return &entity; |
|
1162 } |
|
1163 |
|
1164 static xmlEntityPtr getXHTMLEntity(const xmlChar* name) |
|
1165 { |
|
1166 UChar c = decodeNamedEntity(reinterpret_cast<const char*>(name)); |
|
1167 if (!c) |
|
1168 return 0; |
|
1169 |
|
1170 CString value = String(&c, 1).utf8(); |
|
1171 ASSERT(value.length() < 5); |
|
1172 xmlEntityPtr entity = sharedXHTMLEntity(); |
|
1173 entity->length = value.length(); |
|
1174 entity->name = name; |
|
1175 memcpy(sharedXHTMLEntityResult, value.data(), entity->length + 1); |
|
1176 |
|
1177 return entity; |
|
1178 } |
|
1179 |
|
1180 static xmlEntityPtr getEntityHandler(void* closure, const xmlChar* name) |
|
1181 { |
|
1182 xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure); |
|
1183 xmlEntityPtr ent = xmlGetPredefinedEntity(name); |
|
1184 if (ent) { |
|
1185 ent->etype = XML_INTERNAL_PREDEFINED_ENTITY; |
|
1186 return ent; |
|
1187 } |
|
1188 |
|
1189 ent = xmlGetDocEntity(ctxt->myDoc, name); |
|
1190 if (!ent && (getParser(closure)->isXHTMLDocument() |
|
1191 #if ENABLE(XHTMLMP) |
|
1192 || getParser(closure)->isXHTMLMPDocument() |
|
1193 #endif |
|
1194 #if ENABLE(WML) |
|
1195 || getParser(closure)->isWMLDocument() |
|
1196 #endif |
|
1197 )) { |
|
1198 ent = getXHTMLEntity(name); |
|
1199 if (ent) |
|
1200 ent->etype = XML_INTERNAL_GENERAL_ENTITY; |
|
1201 } |
|
1202 |
|
1203 return ent; |
|
1204 } |
|
1205 |
|
1206 static void startDocumentHandler(void* closure) |
|
1207 { |
|
1208 xmlParserCtxt* ctxt = static_cast<xmlParserCtxt*>(closure); |
|
1209 getParser(closure)->startDocument(ctxt->version, ctxt->encoding, ctxt->standalone); |
|
1210 xmlSAX2StartDocument(closure); |
|
1211 } |
|
1212 |
|
1213 static void endDocumentHandler(void* closure) |
|
1214 { |
|
1215 getParser(closure)->endDocument(); |
|
1216 xmlSAX2EndDocument(closure); |
|
1217 } |
|
1218 |
|
1219 static void internalSubsetHandler(void* closure, const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID) |
|
1220 { |
|
1221 getParser(closure)->internalSubset(name, externalID, systemID); |
|
1222 xmlSAX2InternalSubset(closure, name, externalID, systemID); |
|
1223 } |
|
1224 |
|
1225 static void externalSubsetHandler(void* closure, const xmlChar*, const xmlChar* externalId, const xmlChar*) |
|
1226 { |
|
1227 String extId = toString(externalId); |
|
1228 if ((extId == "-//W3C//DTD XHTML 1.0 Transitional//EN") |
|
1229 || (extId == "-//W3C//DTD XHTML 1.1//EN") |
|
1230 || (extId == "-//W3C//DTD XHTML 1.0 Strict//EN") |
|
1231 || (extId == "-//W3C//DTD XHTML 1.0 Frameset//EN") |
|
1232 || (extId == "-//W3C//DTD XHTML Basic 1.0//EN") |
|
1233 || (extId == "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN") |
|
1234 || (extId == "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN") |
|
1235 || (extId == "-//WAPFORUM//DTD XHTML Mobile 1.0//EN") |
|
1236 ) |
|
1237 getParser(closure)->setIsXHTMLDocument(true); // controls if we replace entities or not. |
|
1238 } |
|
1239 |
|
1240 static void ignorableWhitespaceHandler(void*, const xmlChar*, int) |
|
1241 { |
|
1242 // nothing to do, but we need this to work around a crasher |
|
1243 // http://bugzilla.gnome.org/show_bug.cgi?id=172255 |
|
1244 // http://bugs.webkit.org/show_bug.cgi?id=5792 |
|
1245 } |
|
1246 |
|
1247 void XMLDocumentParser::initializeParserContext(const char* chunk) |
|
1248 { |
|
1249 xmlSAXHandler sax; |
|
1250 memset(&sax, 0, sizeof(sax)); |
|
1251 |
|
1252 sax.error = normalErrorHandler; |
|
1253 sax.fatalError = fatalErrorHandler; |
|
1254 sax.characters = charactersHandler; |
|
1255 sax.processingInstruction = processingInstructionHandler; |
|
1256 sax.cdataBlock = cdataBlockHandler; |
|
1257 sax.comment = commentHandler; |
|
1258 sax.warning = warningHandler; |
|
1259 sax.startElementNs = startElementNsHandler; |
|
1260 sax.endElementNs = endElementNsHandler; |
|
1261 sax.getEntity = getEntityHandler; |
|
1262 sax.startDocument = startDocumentHandler; |
|
1263 sax.endDocument = endDocumentHandler; |
|
1264 sax.internalSubset = internalSubsetHandler; |
|
1265 sax.externalSubset = externalSubsetHandler; |
|
1266 sax.ignorableWhitespace = ignorableWhitespaceHandler; |
|
1267 sax.entityDecl = xmlSAX2EntityDecl; |
|
1268 sax.initialized = XML_SAX2_MAGIC; |
|
1269 m_parserStopped = false; |
|
1270 m_sawError = false; |
|
1271 m_sawXSLTransform = false; |
|
1272 m_sawFirstElement = false; |
|
1273 |
|
1274 XMLDocumentParserScope scope(document()->docLoader()); |
|
1275 if (m_parsingFragment) |
|
1276 m_context = XMLParserContext::createMemoryParser(&sax, this, chunk); |
|
1277 else |
|
1278 m_context = XMLParserContext::createStringParser(&sax, this); |
|
1279 } |
|
1280 |
|
1281 void XMLDocumentParser::doEnd() |
|
1282 { |
|
1283 #if ENABLE(XSLT) |
|
1284 if (m_sawXSLTransform) { |
|
1285 void* doc = xmlDocPtrForString(document()->docLoader(), m_originalSourceForTransform, document()->url().string()); |
|
1286 document()->setTransformSource(new TransformSource(doc)); |
|
1287 |
|
1288 document()->setParsing(false); // Make the doc think it's done, so it will apply xsl sheets. |
|
1289 document()->updateStyleSelector(); |
|
1290 document()->setParsing(true); |
|
1291 m_parserStopped = true; |
|
1292 } |
|
1293 #endif |
|
1294 |
|
1295 if (m_parserStopped) |
|
1296 return; |
|
1297 |
|
1298 if (m_context) { |
|
1299 // Tell libxml we're done. |
|
1300 { |
|
1301 XMLDocumentParserScope scope(document()->docLoader()); |
|
1302 xmlParseChunk(context(), 0, 0, 1); |
|
1303 } |
|
1304 |
|
1305 m_context = 0; |
|
1306 } |
|
1307 } |
|
1308 |
|
1309 #if ENABLE(XSLT) |
|
1310 void* xmlDocPtrForString(DocLoader* docLoader, const String& source, const String& url) |
|
1311 { |
|
1312 if (source.isEmpty()) |
|
1313 return 0; |
|
1314 |
|
1315 // Parse in a single chunk into an xmlDocPtr |
|
1316 // FIXME: Hook up error handlers so that a failure to parse the main document results in |
|
1317 // good error messages. |
|
1318 const UChar BOM = 0xFEFF; |
|
1319 const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM); |
|
1320 |
|
1321 XMLDocumentParserScope scope(docLoader, errorFunc, 0); |
|
1322 xmlDocPtr sourceDoc = xmlReadMemory(reinterpret_cast<const char*>(source.characters()), |
|
1323 source.length() * sizeof(UChar), |
|
1324 url.latin1().data(), |
|
1325 BOMHighByte == 0xFF ? "UTF-16LE" : "UTF-16BE", |
|
1326 XSLT_PARSE_OPTIONS); |
|
1327 return sourceDoc; |
|
1328 } |
|
1329 #endif |
|
1330 |
|
1331 int XMLDocumentParser::lineNumber() const |
|
1332 { |
|
1333 return context() ? context()->input->line : 1; |
|
1334 } |
|
1335 |
|
1336 int XMLDocumentParser::columnNumber() const |
|
1337 { |
|
1338 return context() ? context()->input->col : 1; |
|
1339 } |
|
1340 |
|
1341 void XMLDocumentParser::stopParsing() |
|
1342 { |
|
1343 DocumentParser::stopParsing(); |
|
1344 if (context()) |
|
1345 xmlStopParser(context()); |
|
1346 } |
|
1347 |
|
1348 void XMLDocumentParser::resumeParsing() |
|
1349 { |
|
1350 ASSERT(m_parserPaused); |
|
1351 |
|
1352 m_parserPaused = false; |
|
1353 |
|
1354 // First, execute any pending callbacks |
|
1355 while (!m_pendingCallbacks->isEmpty()) { |
|
1356 m_pendingCallbacks->callAndRemoveFirstCallback(this); |
|
1357 |
|
1358 // A callback paused the parser |
|
1359 if (m_parserPaused) |
|
1360 return; |
|
1361 } |
|
1362 |
|
1363 // Then, write any pending data |
|
1364 SegmentedString rest = m_pendingSrc; |
|
1365 m_pendingSrc.clear(); |
|
1366 append(rest); |
|
1367 |
|
1368 // Finally, if finish() has been called and write() didn't result |
|
1369 // in any further callbacks being queued, call end() |
|
1370 if (m_finishCalled && m_pendingCallbacks->isEmpty()) |
|
1371 end(); |
|
1372 } |
|
1373 |
|
1374 // FIXME: This method should be possible to implement using the DocumentParser |
|
1375 // API, instead of needing to grab at libxml2 state directly. |
|
1376 bool XMLDocumentParser::parseDocumentFragment(const String& chunk, DocumentFragment* fragment, Element* parent, FragmentScriptingPermission scriptingPermission) |
|
1377 { |
|
1378 if (!chunk.length()) |
|
1379 return true; |
|
1380 |
|
1381 XMLDocumentParser parser(fragment, parent, scriptingPermission); |
|
1382 |
|
1383 CString chunkAsUtf8 = chunk.utf8(); |
|
1384 parser.initializeParserContext(chunkAsUtf8.data()); |
|
1385 |
|
1386 xmlParseContent(parser.context()); |
|
1387 |
|
1388 parser.endDocument(); |
|
1389 |
|
1390 // Check if all the chunk has been processed. |
|
1391 long bytesProcessed = xmlByteConsumed(parser.context()); |
|
1392 if (bytesProcessed == -1 || ((unsigned long)bytesProcessed) != chunkAsUtf8.length()) |
|
1393 return false; |
|
1394 |
|
1395 // No error if the chunk is well formed or it is not but we have no error. |
|
1396 return parser.context()->wellFormed || xmlCtxtGetLastError(parser.context()) == 0; |
|
1397 } |
|
1398 |
|
1399 // -------------------------------- |
|
1400 |
|
1401 struct AttributeParseState { |
|
1402 HashMap<String, String> attributes; |
|
1403 bool gotAttributes; |
|
1404 }; |
|
1405 |
|
1406 static void attributesStartElementNsHandler(void* closure, const xmlChar* xmlLocalName, const xmlChar* /*xmlPrefix*/, |
|
1407 const xmlChar* /*xmlURI*/, int /*nb_namespaces*/, const xmlChar** /*namespaces*/, |
|
1408 int nb_attributes, int /*nb_defaulted*/, const xmlChar** libxmlAttributes) |
|
1409 { |
|
1410 if (strcmp(reinterpret_cast<const char*>(xmlLocalName), "attrs") != 0) |
|
1411 return; |
|
1412 |
|
1413 xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure); |
|
1414 AttributeParseState* state = static_cast<AttributeParseState*>(ctxt->_private); |
|
1415 |
|
1416 state->gotAttributes = true; |
|
1417 |
|
1418 xmlSAX2Attributes* attributes = reinterpret_cast<xmlSAX2Attributes*>(libxmlAttributes); |
|
1419 for (int i = 0; i < nb_attributes; i++) { |
|
1420 String attrLocalName = toString(attributes[i].localname); |
|
1421 int valueLength = (int) (attributes[i].end - attributes[i].value); |
|
1422 String attrValue = toString(attributes[i].value, valueLength); |
|
1423 String attrPrefix = toString(attributes[i].prefix); |
|
1424 String attrQName = attrPrefix.isEmpty() ? attrLocalName : attrPrefix + ":" + attrLocalName; |
|
1425 |
|
1426 state->attributes.set(attrQName, attrValue); |
|
1427 } |
|
1428 } |
|
1429 |
|
1430 HashMap<String, String> parseAttributes(const String& string, bool& attrsOK) |
|
1431 { |
|
1432 AttributeParseState state; |
|
1433 state.gotAttributes = false; |
|
1434 |
|
1435 xmlSAXHandler sax; |
|
1436 memset(&sax, 0, sizeof(sax)); |
|
1437 sax.startElementNs = attributesStartElementNsHandler; |
|
1438 sax.initialized = XML_SAX2_MAGIC; |
|
1439 RefPtr<XMLParserContext> parser = XMLParserContext::createStringParser(&sax, &state); |
|
1440 String parseString = "<?xml version=\"1.0\"?><attrs " + string + " />"; |
|
1441 xmlParseChunk(parser->context(), reinterpret_cast<const char*>(parseString.characters()), parseString.length() * sizeof(UChar), 1); |
|
1442 attrsOK = state.gotAttributes; |
|
1443 return state.attributes; |
|
1444 } |
|
1445 |
|
1446 } |