WebCore/dom/XMLDocumentParserLibxml2.cpp
changeset 0 4f2f89ce4247
equal deleted inserted replaced
-1:000000000000 0:4f2f89ce4247
       
     1 /*
       
     2  * Copyright (C) 2000 Peter Kelly (pmk@post.com)
       
     3  * Copyright (C) 2005, 2006, 2008 Apple Inc. All rights reserved.
       
     4  * Copyright (C) 2006 Alexey Proskuryakov (ap@webkit.org)
       
     5  * Copyright (C) 2007 Samuel Weinig (sam@webkit.org)
       
     6  * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
       
     7  * Copyright (C) 2008 Holger Hans Peter Freyther
       
     8  * Copyright (C) 2008, 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/)
       
     9  *
       
    10  * This library is free software; you can redistribute it and/or
       
    11  * modify it under the terms of the GNU Library General Public
       
    12  * License as published by the Free Software Foundation; either
       
    13  * version 2 of the License, or (at your option) any later version.
       
    14  *
       
    15  * This library is distributed in the hope that it will be useful,
       
    16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
       
    17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
       
    18  * Library General Public License for more details.
       
    19  *
       
    20  * You should have received a copy of the GNU Library General Public License
       
    21  * along with this library; see the file COPYING.LIB.  If not, write to
       
    22  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
       
    23  * Boston, MA 02110-1301, USA.
       
    24  */
       
    25 
       
    26 #include "config.h"
       
    27 #include "XMLDocumentParser.h"
       
    28 
       
    29 #include "CDATASection.h"
       
    30 #include "CachedScript.h"
       
    31 #include "Comment.h"
       
    32 #include "DocLoader.h"
       
    33 #include "Document.h"
       
    34 #include "DocumentFragment.h"
       
    35 #include "DocumentType.h"
       
    36 #include "Frame.h"
       
    37 #include "FrameLoader.h"
       
    38 #include "FrameView.h"
       
    39 #include "HTMLLinkElement.h"
       
    40 #include "HTMLStyleElement.h"
       
    41 #include "LegacyHTMLDocumentParser.h" // for decodeNamedEntity
       
    42 #include "ProcessingInstruction.h"
       
    43 #include "ResourceError.h"
       
    44 #include "ResourceHandle.h"
       
    45 #include "ResourceRequest.h"
       
    46 #include "ResourceResponse.h"
       
    47 #include "ScriptElement.h"
       
    48 #include "ScriptSourceCode.h"
       
    49 #include "ScriptValue.h"
       
    50 #include "TextResourceDecoder.h"
       
    51 #include "TransformSource.h"
       
    52 #include "XMLNSNames.h"
       
    53 #include "XMLDocumentParserScope.h"
       
    54 #include <libxml/parser.h>
       
    55 #include <libxml/parserInternals.h>
       
    56 #include <wtf/text/CString.h>
       
    57 #include <wtf/StringExtras.h>
       
    58 #include <wtf/Threading.h>
       
    59 #include <wtf/UnusedParam.h>
       
    60 #include <wtf/Vector.h>
       
    61 
       
    62 #if ENABLE(XSLT)
       
    63 #include <libxslt/xslt.h>
       
    64 #endif
       
    65 
       
    66 #if ENABLE(XHTMLMP)
       
    67 #include "HTMLNames.h"
       
    68 #include "HTMLScriptElement.h"
       
    69 #endif
       
    70 
       
    71 using namespace std;
       
    72 
       
    73 namespace WebCore {
       
    74 
       
    75 class PendingCallbacks : public Noncopyable {
       
    76 public:
       
    77     ~PendingCallbacks()
       
    78     {
       
    79         deleteAllValues(m_callbacks);
       
    80     }
       
    81 
       
    82     void appendStartElementNSCallback(const xmlChar* xmlLocalName, const xmlChar* xmlPrefix, const xmlChar* xmlURI, int nb_namespaces,
       
    83                                       const xmlChar** namespaces, int nb_attributes, int nb_defaulted, const xmlChar** attributes)
       
    84     {
       
    85         PendingStartElementNSCallback* callback = new PendingStartElementNSCallback;
       
    86 
       
    87         callback->xmlLocalName = xmlStrdup(xmlLocalName);
       
    88         callback->xmlPrefix = xmlStrdup(xmlPrefix);
       
    89         callback->xmlURI = xmlStrdup(xmlURI);
       
    90         callback->nb_namespaces = nb_namespaces;
       
    91         callback->namespaces = static_cast<xmlChar**>(xmlMalloc(sizeof(xmlChar*) * nb_namespaces * 2));
       
    92         for (int i = 0; i < nb_namespaces * 2 ; i++)
       
    93             callback->namespaces[i] = xmlStrdup(namespaces[i]);
       
    94         callback->nb_attributes = nb_attributes;
       
    95         callback->nb_defaulted = nb_defaulted;
       
    96         callback->attributes = static_cast<xmlChar**>(xmlMalloc(sizeof(xmlChar*) * nb_attributes * 5));
       
    97         for (int i = 0; i < nb_attributes; i++) {
       
    98             // Each attribute has 5 elements in the array:
       
    99             // name, prefix, uri, value and an end pointer.
       
   100 
       
   101             for (int j = 0; j < 3; j++)
       
   102                 callback->attributes[i * 5 + j] = xmlStrdup(attributes[i * 5 + j]);
       
   103 
       
   104             int len = attributes[i * 5 + 4] - attributes[i * 5 + 3];
       
   105 
       
   106             callback->attributes[i * 5 + 3] = xmlStrndup(attributes[i * 5 + 3], len);
       
   107             callback->attributes[i * 5 + 4] = callback->attributes[i * 5 + 3] + len;
       
   108         }
       
   109 
       
   110         m_callbacks.append(callback);
       
   111     }
       
   112 
       
   113     void appendEndElementNSCallback()
       
   114     {
       
   115         PendingEndElementNSCallback* callback = new PendingEndElementNSCallback;
       
   116 
       
   117         m_callbacks.append(callback);
       
   118     }
       
   119 
       
   120     void appendCharactersCallback(const xmlChar* s, int len)
       
   121     {
       
   122         PendingCharactersCallback* callback = new PendingCharactersCallback;
       
   123 
       
   124         callback->s = xmlStrndup(s, len);
       
   125         callback->len = len;
       
   126 
       
   127         m_callbacks.append(callback);
       
   128     }
       
   129 
       
   130     void appendProcessingInstructionCallback(const xmlChar* target, const xmlChar* data)
       
   131     {
       
   132         PendingProcessingInstructionCallback* callback = new PendingProcessingInstructionCallback;
       
   133 
       
   134         callback->target = xmlStrdup(target);
       
   135         callback->data = xmlStrdup(data);
       
   136 
       
   137         m_callbacks.append(callback);
       
   138     }
       
   139 
       
   140     void appendCDATABlockCallback(const xmlChar* s, int len)
       
   141     {
       
   142         PendingCDATABlockCallback* callback = new PendingCDATABlockCallback;
       
   143 
       
   144         callback->s = xmlStrndup(s, len);
       
   145         callback->len = len;
       
   146 
       
   147         m_callbacks.append(callback);
       
   148     }
       
   149 
       
   150     void appendCommentCallback(const xmlChar* s)
       
   151     {
       
   152         PendingCommentCallback* callback = new PendingCommentCallback;
       
   153 
       
   154         callback->s = xmlStrdup(s);
       
   155 
       
   156         m_callbacks.append(callback);
       
   157     }
       
   158 
       
   159     void appendInternalSubsetCallback(const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID)
       
   160     {
       
   161         PendingInternalSubsetCallback* callback = new PendingInternalSubsetCallback;
       
   162 
       
   163         callback->name = xmlStrdup(name);
       
   164         callback->externalID = xmlStrdup(externalID);
       
   165         callback->systemID = xmlStrdup(systemID);
       
   166 
       
   167         m_callbacks.append(callback);
       
   168     }
       
   169 
       
   170     void appendErrorCallback(XMLDocumentParser::ErrorType type, const xmlChar* message, int lineNumber, int columnNumber)
       
   171     {
       
   172         PendingErrorCallback* callback = new PendingErrorCallback;
       
   173 
       
   174         callback->message = xmlStrdup(message);
       
   175         callback->type = type;
       
   176         callback->lineNumber = lineNumber;
       
   177         callback->columnNumber = columnNumber;
       
   178 
       
   179         m_callbacks.append(callback);
       
   180     }
       
   181 
       
   182     void callAndRemoveFirstCallback(XMLDocumentParser* parser)
       
   183     {
       
   184         OwnPtr<PendingCallback> callback(m_callbacks.takeFirst());
       
   185         callback->call(parser);
       
   186     }
       
   187 
       
   188     bool isEmpty() const { return m_callbacks.isEmpty(); }
       
   189 
       
   190 private:
       
   191     struct PendingCallback {
       
   192         virtual ~PendingCallback() { }
       
   193         virtual void call(XMLDocumentParser* parser) = 0;
       
   194     };
       
   195 
       
   196     struct PendingStartElementNSCallback : public PendingCallback {
       
   197         virtual ~PendingStartElementNSCallback()
       
   198         {
       
   199             xmlFree(xmlLocalName);
       
   200             xmlFree(xmlPrefix);
       
   201             xmlFree(xmlURI);
       
   202             for (int i = 0; i < nb_namespaces * 2; i++)
       
   203                 xmlFree(namespaces[i]);
       
   204             xmlFree(namespaces);
       
   205             for (int i = 0; i < nb_attributes; i++)
       
   206                 for (int j = 0; j < 4; j++)
       
   207                     xmlFree(attributes[i * 5 + j]);
       
   208             xmlFree(attributes);
       
   209         }
       
   210 
       
   211         virtual void call(XMLDocumentParser* parser)
       
   212         {
       
   213             parser->startElementNs(xmlLocalName, xmlPrefix, xmlURI,
       
   214                                       nb_namespaces, const_cast<const xmlChar**>(namespaces),
       
   215                                       nb_attributes, nb_defaulted, const_cast<const xmlChar**>(attributes));
       
   216         }
       
   217 
       
   218         xmlChar* xmlLocalName;
       
   219         xmlChar* xmlPrefix;
       
   220         xmlChar* xmlURI;
       
   221         int nb_namespaces;
       
   222         xmlChar** namespaces;
       
   223         int nb_attributes;
       
   224         int nb_defaulted;
       
   225         xmlChar** attributes;
       
   226     };
       
   227 
       
   228     struct PendingEndElementNSCallback : public PendingCallback {
       
   229         virtual void call(XMLDocumentParser* parser)
       
   230         {
       
   231             parser->endElementNs();
       
   232         }
       
   233     };
       
   234 
       
   235     struct PendingCharactersCallback : public PendingCallback {
       
   236         virtual ~PendingCharactersCallback()
       
   237         {
       
   238             xmlFree(s);
       
   239         }
       
   240 
       
   241         virtual void call(XMLDocumentParser* parser)
       
   242         {
       
   243             parser->characters(s, len);
       
   244         }
       
   245 
       
   246         xmlChar* s;
       
   247         int len;
       
   248     };
       
   249 
       
   250     struct PendingProcessingInstructionCallback : public PendingCallback {
       
   251         virtual ~PendingProcessingInstructionCallback()
       
   252         {
       
   253             xmlFree(target);
       
   254             xmlFree(data);
       
   255         }
       
   256 
       
   257         virtual void call(XMLDocumentParser* parser)
       
   258         {
       
   259             parser->processingInstruction(target, data);
       
   260         }
       
   261 
       
   262         xmlChar* target;
       
   263         xmlChar* data;
       
   264     };
       
   265 
       
   266     struct PendingCDATABlockCallback : public PendingCallback {
       
   267         virtual ~PendingCDATABlockCallback()
       
   268         {
       
   269             xmlFree(s);
       
   270         }
       
   271 
       
   272         virtual void call(XMLDocumentParser* parser)
       
   273         {
       
   274             parser->cdataBlock(s, len);
       
   275         }
       
   276 
       
   277         xmlChar* s;
       
   278         int len;
       
   279     };
       
   280 
       
   281     struct PendingCommentCallback : public PendingCallback {
       
   282         virtual ~PendingCommentCallback()
       
   283         {
       
   284             xmlFree(s);
       
   285         }
       
   286 
       
   287         virtual void call(XMLDocumentParser* parser)
       
   288         {
       
   289             parser->comment(s);
       
   290         }
       
   291 
       
   292         xmlChar* s;
       
   293     };
       
   294 
       
   295     struct PendingInternalSubsetCallback : public PendingCallback {
       
   296         virtual ~PendingInternalSubsetCallback()
       
   297         {
       
   298             xmlFree(name);
       
   299             xmlFree(externalID);
       
   300             xmlFree(systemID);
       
   301         }
       
   302 
       
   303         virtual void call(XMLDocumentParser* parser)
       
   304         {
       
   305             parser->internalSubset(name, externalID, systemID);
       
   306         }
       
   307 
       
   308         xmlChar* name;
       
   309         xmlChar* externalID;
       
   310         xmlChar* systemID;
       
   311     };
       
   312 
       
   313     struct PendingErrorCallback: public PendingCallback {
       
   314         virtual ~PendingErrorCallback()
       
   315         {
       
   316             xmlFree(message);
       
   317         }
       
   318 
       
   319         virtual void call(XMLDocumentParser* parser)
       
   320         {
       
   321             parser->handleError(type, reinterpret_cast<char*>(message), lineNumber, columnNumber);
       
   322         }
       
   323 
       
   324         XMLDocumentParser::ErrorType type;
       
   325         xmlChar* message;
       
   326         int lineNumber;
       
   327         int columnNumber;
       
   328     };
       
   329 
       
   330     Deque<PendingCallback*> m_callbacks;
       
   331 };
       
   332 // --------------------------------
       
   333 
       
   334 static int globalDescriptor = 0;
       
   335 static ThreadIdentifier libxmlLoaderThread = 0;
       
   336 
       
   337 static int matchFunc(const char*)
       
   338 {
       
   339     // Only match loads initiated due to uses of libxml2 from within XMLDocumentParser to avoid
       
   340     // interfering with client applications that also use libxml2.  http://bugs.webkit.org/show_bug.cgi?id=17353
       
   341     return XMLDocumentParserScope::currentDocLoader && currentThread() == libxmlLoaderThread;
       
   342 }
       
   343 
       
   344 class OffsetBuffer {
       
   345 public:
       
   346     OffsetBuffer(const Vector<char>& b) : m_buffer(b), m_currentOffset(0) { }
       
   347 
       
   348     int readOutBytes(char* outputBuffer, unsigned askedToRead)
       
   349     {
       
   350         unsigned bytesLeft = m_buffer.size() - m_currentOffset;
       
   351         unsigned lenToCopy = min(askedToRead, bytesLeft);
       
   352         if (lenToCopy) {
       
   353             memcpy(outputBuffer, m_buffer.data() + m_currentOffset, lenToCopy);
       
   354             m_currentOffset += lenToCopy;
       
   355         }
       
   356         return lenToCopy;
       
   357     }
       
   358 
       
   359 private:
       
   360     Vector<char> m_buffer;
       
   361     unsigned m_currentOffset;
       
   362 };
       
   363 
       
   364 static bool shouldAllowExternalLoad(const KURL& url)
       
   365 {
       
   366     String urlString = url.string();
       
   367 
       
   368     // On non-Windows platforms libxml asks for this URL, the
       
   369     // "XML_XML_DEFAULT_CATALOG", on initialization.
       
   370     if (urlString == "file:///etc/xml/catalog")
       
   371         return false;
       
   372 
       
   373     // On Windows, libxml computes a URL relative to where its DLL resides.
       
   374     if (urlString.startsWith("file:///", false) && urlString.endsWith("/etc/catalog", false))
       
   375         return false;
       
   376 
       
   377     // The most common DTD.  There isn't much point in hammering www.w3c.org
       
   378     // by requesting this URL for every XHTML document.
       
   379     if (urlString.startsWith("http://www.w3.org/TR/xhtml", false))
       
   380         return false;
       
   381 
       
   382     // Similarly, there isn't much point in requesting the SVG DTD.
       
   383     if (urlString.startsWith("http://www.w3.org/Graphics/SVG", false))
       
   384         return false;
       
   385 
       
   386     // The libxml doesn't give us a lot of context for deciding whether to
       
   387     // allow this request.  In the worst case, this load could be for an
       
   388     // external entity and the resulting document could simply read the
       
   389     // retrieved content.  If we had more context, we could potentially allow
       
   390     // the parser to load a DTD.  As things stand, we take the conservative
       
   391     // route and allow same-origin requests only.
       
   392     if (!XMLDocumentParserScope::currentDocLoader->doc()->securityOrigin()->canRequest(url)) {
       
   393         XMLDocumentParserScope::currentDocLoader->printAccessDeniedMessage(url);
       
   394         return false;
       
   395     }
       
   396 
       
   397     return true;
       
   398 }
       
   399 
       
   400 static void* openFunc(const char* uri)
       
   401 {
       
   402     ASSERT(XMLDocumentParserScope::currentDocLoader);
       
   403     ASSERT(currentThread() == libxmlLoaderThread);
       
   404 
       
   405     KURL url(KURL(), uri);
       
   406 
       
   407     if (!shouldAllowExternalLoad(url))
       
   408         return &globalDescriptor;
       
   409 
       
   410     ResourceError error;
       
   411     ResourceResponse response;
       
   412     Vector<char> data;
       
   413 
       
   414 
       
   415     {
       
   416         DocLoader* docLoader = XMLDocumentParserScope::currentDocLoader;
       
   417         XMLDocumentParserScope scope(0);
       
   418         // FIXME: We should restore the original global error handler as well.
       
   419 
       
   420         if (docLoader->frame())
       
   421             docLoader->frame()->loader()->loadResourceSynchronously(url, AllowStoredCredentials, error, response, data);
       
   422     }
       
   423 
       
   424     // We have to check the URL again after the load to catch redirects.
       
   425     // See <https://bugs.webkit.org/show_bug.cgi?id=21963>.
       
   426     if (!shouldAllowExternalLoad(response.url()))
       
   427         return &globalDescriptor;
       
   428 
       
   429     return new OffsetBuffer(data);
       
   430 }
       
   431 
       
   432 static int readFunc(void* context, char* buffer, int len)
       
   433 {
       
   434     // Do 0-byte reads in case of a null descriptor
       
   435     if (context == &globalDescriptor)
       
   436         return 0;
       
   437 
       
   438     OffsetBuffer* data = static_cast<OffsetBuffer*>(context);
       
   439     return data->readOutBytes(buffer, len);
       
   440 }
       
   441 
       
   442 static int writeFunc(void*, const char*, int)
       
   443 {
       
   444     // Always just do 0-byte writes
       
   445     return 0;
       
   446 }
       
   447 
       
   448 static int closeFunc(void* context)
       
   449 {
       
   450     if (context != &globalDescriptor) {
       
   451         OffsetBuffer* data = static_cast<OffsetBuffer*>(context);
       
   452         delete data;
       
   453     }
       
   454     return 0;
       
   455 }
       
   456 
       
   457 #if ENABLE(XSLT)
       
   458 static void errorFunc(void*, const char*, ...)
       
   459 {
       
   460     // FIXME: It would be nice to display error messages somewhere.
       
   461 }
       
   462 #endif
       
   463 
       
   464 static bool didInit = false;
       
   465 
       
   466 PassRefPtr<XMLParserContext> XMLParserContext::createStringParser(xmlSAXHandlerPtr handlers, void* userData)
       
   467 {
       
   468     if (!didInit) {
       
   469         xmlInitParser();
       
   470         xmlRegisterInputCallbacks(matchFunc, openFunc, readFunc, closeFunc);
       
   471         xmlRegisterOutputCallbacks(matchFunc, openFunc, writeFunc, closeFunc);
       
   472         libxmlLoaderThread = currentThread();
       
   473         didInit = true;
       
   474     }
       
   475 
       
   476     xmlParserCtxtPtr parser = xmlCreatePushParserCtxt(handlers, 0, 0, 0, 0);
       
   477     parser->_private = userData;
       
   478     parser->replaceEntities = true;
       
   479     const UChar BOM = 0xFEFF;
       
   480     const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM);
       
   481     xmlSwitchEncoding(parser, BOMHighByte == 0xFF ? XML_CHAR_ENCODING_UTF16LE : XML_CHAR_ENCODING_UTF16BE);
       
   482 
       
   483     return adoptRef(new XMLParserContext(parser));
       
   484 }
       
   485 
       
   486 
       
   487 // Chunk should be encoded in UTF-8
       
   488 PassRefPtr<XMLParserContext> XMLParserContext::createMemoryParser(xmlSAXHandlerPtr handlers, void* userData, const char* chunk)
       
   489 {
       
   490     if (!didInit) {
       
   491         xmlInitParser();
       
   492         xmlRegisterInputCallbacks(matchFunc, openFunc, readFunc, closeFunc);
       
   493         xmlRegisterOutputCallbacks(matchFunc, openFunc, writeFunc, closeFunc);
       
   494         libxmlLoaderThread = currentThread();
       
   495         didInit = true;
       
   496     }
       
   497 
       
   498     xmlParserCtxtPtr parser = xmlCreateMemoryParserCtxt(chunk, xmlStrlen((const xmlChar*)chunk));
       
   499 
       
   500     if (!parser)
       
   501         return 0;
       
   502 
       
   503     // Copy the sax handler
       
   504     memcpy(parser->sax, handlers, sizeof(xmlSAXHandler));
       
   505 
       
   506     // Set parser options.
       
   507     // XML_PARSE_NODICT: default dictionary option.
       
   508     // XML_PARSE_NOENT: force entities substitutions.
       
   509     xmlCtxtUseOptions(parser, XML_PARSE_NODICT | XML_PARSE_NOENT);
       
   510 
       
   511     // Internal initialization
       
   512     parser->sax2 = 1;
       
   513     parser->instate = XML_PARSER_CONTENT; // We are parsing a CONTENT
       
   514     parser->depth = 0;
       
   515     parser->str_xml = xmlDictLookup(parser->dict, BAD_CAST "xml", 3);
       
   516     parser->str_xmlns = xmlDictLookup(parser->dict, BAD_CAST "xmlns", 5);
       
   517     parser->str_xml_ns = xmlDictLookup(parser->dict, XML_XML_NAMESPACE, 36);
       
   518     parser->_private = userData;
       
   519 
       
   520     return adoptRef(new XMLParserContext(parser));
       
   521 }
       
   522 
       
   523 // --------------------------------
       
   524 
       
   525 XMLDocumentParser::XMLDocumentParser(Document* document, FrameView* frameView)
       
   526     : ScriptableDocumentParser(document)
       
   527     , m_view(frameView)
       
   528     , m_context(0)
       
   529     , m_pendingCallbacks(new PendingCallbacks)
       
   530     , m_currentNode(document)
       
   531     , m_sawError(false)
       
   532     , m_sawXSLTransform(false)
       
   533     , m_sawFirstElement(false)
       
   534     , m_isXHTMLDocument(false)
       
   535 #if ENABLE(XHTMLMP)
       
   536     , m_isXHTMLMPDocument(false)
       
   537     , m_hasDocTypeDeclaration(false)
       
   538 #endif
       
   539     , m_parserPaused(false)
       
   540     , m_requestingScript(false)
       
   541     , m_finishCalled(false)
       
   542     , m_errorCount(0)
       
   543     , m_lastErrorLine(0)
       
   544     , m_lastErrorColumn(0)
       
   545     , m_pendingScript(0)
       
   546     , m_scriptStartLine(0)
       
   547     , m_parsingFragment(false)
       
   548     , m_scriptingPermission(FragmentScriptingAllowed)
       
   549 {
       
   550 }
       
   551 
       
   552 XMLDocumentParser::XMLDocumentParser(DocumentFragment* fragment, Element* parentElement, FragmentScriptingPermission scriptingPermission)
       
   553     : ScriptableDocumentParser(fragment->document())
       
   554     , m_view(0)
       
   555     , m_context(0)
       
   556     , m_pendingCallbacks(new PendingCallbacks)
       
   557     , m_currentNode(fragment)
       
   558     , m_sawError(false)
       
   559     , m_sawXSLTransform(false)
       
   560     , m_sawFirstElement(false)
       
   561     , m_isXHTMLDocument(false)
       
   562 #if ENABLE(XHTMLMP)
       
   563     , m_isXHTMLMPDocument(false)
       
   564     , m_hasDocTypeDeclaration(false)
       
   565 #endif
       
   566     , m_parserPaused(false)
       
   567     , m_requestingScript(false)
       
   568     , m_finishCalled(false)
       
   569     , m_errorCount(0)
       
   570     , m_lastErrorLine(0)
       
   571     , m_lastErrorColumn(0)
       
   572     , m_pendingScript(0)
       
   573     , m_scriptStartLine(0)
       
   574     , m_parsingFragment(true)
       
   575     , m_scriptingPermission(scriptingPermission)
       
   576 {
       
   577     fragment->ref();
       
   578 
       
   579     // Add namespaces based on the parent node
       
   580     Vector<Element*> elemStack;
       
   581     while (parentElement) {
       
   582         elemStack.append(parentElement);
       
   583 
       
   584         Node* n = parentElement->parentNode();
       
   585         if (!n || !n->isElementNode())
       
   586             break;
       
   587         parentElement = static_cast<Element*>(n);
       
   588     }
       
   589 
       
   590     if (elemStack.isEmpty())
       
   591         return;
       
   592 
       
   593     for (Element* element = elemStack.last(); !elemStack.isEmpty(); elemStack.removeLast()) {
       
   594         if (NamedNodeMap* attrs = element->attributes()) {
       
   595             for (unsigned i = 0; i < attrs->length(); i++) {
       
   596                 Attribute* attr = attrs->attributeItem(i);
       
   597                 if (attr->localName() == xmlnsAtom)
       
   598                     m_defaultNamespaceURI = attr->value();
       
   599                 else if (attr->prefix() == xmlnsAtom)
       
   600                     m_prefixToNamespaceMap.set(attr->localName(), attr->value());
       
   601             }
       
   602         }
       
   603     }
       
   604 
       
   605     // If the parent element is not in document tree, there may be no xmlns attribute; just default to the parent's namespace.
       
   606     if (m_defaultNamespaceURI.isNull() && !parentElement->inDocument())
       
   607         m_defaultNamespaceURI = parentElement->namespaceURI();
       
   608 }
       
   609 
       
   610 XMLParserContext::~XMLParserContext()
       
   611 {
       
   612     if (m_context->myDoc)
       
   613         xmlFreeDoc(m_context->myDoc);
       
   614     xmlFreeParserCtxt(m_context);
       
   615 }
       
   616 
       
   617 XMLDocumentParser::~XMLDocumentParser()
       
   618 {
       
   619     clearCurrentNodeStack();
       
   620     if (m_pendingScript)
       
   621         m_pendingScript->removeClient(this);
       
   622 }
       
   623 
       
   624 void XMLDocumentParser::doWrite(const String& parseString)
       
   625 {
       
   626     if (!m_context)
       
   627         initializeParserContext();
       
   628 
       
   629     // Protect the libxml context from deletion during a callback
       
   630     RefPtr<XMLParserContext> context = m_context;
       
   631 
       
   632     // libXML throws an error if you try to switch the encoding for an empty string.
       
   633     if (parseString.length()) {
       
   634         // Hack around libxml2's lack of encoding overide support by manually
       
   635         // resetting the encoding to UTF-16 before every chunk.  Otherwise libxml
       
   636         // will detect <?xml version="1.0" encoding="<encoding name>"?> blocks
       
   637         // and switch encodings, causing the parse to fail.
       
   638         const UChar BOM = 0xFEFF;
       
   639         const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM);
       
   640         xmlSwitchEncoding(context->context(), BOMHighByte == 0xFF ? XML_CHAR_ENCODING_UTF16LE : XML_CHAR_ENCODING_UTF16BE);
       
   641 
       
   642         XMLDocumentParserScope scope(document()->docLoader());
       
   643         xmlParseChunk(context->context(), reinterpret_cast<const char*>(parseString.characters()), sizeof(UChar) * parseString.length(), 0);
       
   644     }
       
   645 
       
   646     if (document()->decoder() && document()->decoder()->sawError()) {
       
   647         // If the decoder saw an error, report it as fatal (stops parsing)
       
   648         handleError(fatal, "Encoding error", context->context()->input->line, context->context()->input->col);
       
   649     }
       
   650 
       
   651     return;
       
   652 }
       
   653 
       
   654 static inline String toString(const xmlChar* str, unsigned len)
       
   655 {
       
   656     return UTF8Encoding().decode(reinterpret_cast<const char*>(str), len);
       
   657 }
       
   658 
       
   659 static inline String toString(const xmlChar* str)
       
   660 {
       
   661     if (!str)
       
   662         return String();
       
   663 
       
   664     return UTF8Encoding().decode(reinterpret_cast<const char*>(str), strlen(reinterpret_cast<const char*>(str)));
       
   665 }
       
   666 
       
   667 struct _xmlSAX2Namespace {
       
   668     const xmlChar* prefix;
       
   669     const xmlChar* uri;
       
   670 };
       
   671 typedef struct _xmlSAX2Namespace xmlSAX2Namespace;
       
   672 
       
   673 static inline void handleElementNamespaces(Element* newElement, const xmlChar** libxmlNamespaces, int nb_namespaces, ExceptionCode& ec, FragmentScriptingPermission scriptingPermission)
       
   674 {
       
   675     xmlSAX2Namespace* namespaces = reinterpret_cast<xmlSAX2Namespace*>(libxmlNamespaces);
       
   676     for (int i = 0; i < nb_namespaces; i++) {
       
   677         AtomicString namespaceQName = xmlnsAtom;
       
   678         String namespaceURI = toString(namespaces[i].uri);
       
   679         if (namespaces[i].prefix)
       
   680             namespaceQName = "xmlns:" + toString(namespaces[i].prefix);
       
   681         newElement->setAttributeNS(XMLNSNames::xmlnsNamespaceURI, namespaceQName, namespaceURI, ec, scriptingPermission);
       
   682         if (ec) // exception setting attributes
       
   683             return;
       
   684     }
       
   685 }
       
   686 
       
   687 struct _xmlSAX2Attributes {
       
   688     const xmlChar* localname;
       
   689     const xmlChar* prefix;
       
   690     const xmlChar* uri;
       
   691     const xmlChar* value;
       
   692     const xmlChar* end;
       
   693 };
       
   694 typedef struct _xmlSAX2Attributes xmlSAX2Attributes;
       
   695 
       
   696 static inline void handleElementAttributes(Element* newElement, const xmlChar** libxmlAttributes, int nb_attributes, ExceptionCode& ec, FragmentScriptingPermission scriptingPermission)
       
   697 {
       
   698     xmlSAX2Attributes* attributes = reinterpret_cast<xmlSAX2Attributes*>(libxmlAttributes);
       
   699     for (int i = 0; i < nb_attributes; i++) {
       
   700         String attrLocalName = toString(attributes[i].localname);
       
   701         int valueLength = (int) (attributes[i].end - attributes[i].value);
       
   702         String attrValue = toString(attributes[i].value, valueLength);
       
   703         String attrPrefix = toString(attributes[i].prefix);
       
   704         String attrURI = attrPrefix.isEmpty() ? String() : toString(attributes[i].uri);
       
   705         String attrQName = attrPrefix.isEmpty() ? attrLocalName : attrPrefix + ":" + attrLocalName;
       
   706 
       
   707         newElement->setAttributeNS(attrURI, attrQName, attrValue, ec, scriptingPermission);
       
   708         if (ec) // exception setting attributes
       
   709             return;
       
   710     }
       
   711 }
       
   712 
       
   713 void XMLDocumentParser::startElementNs(const xmlChar* xmlLocalName, const xmlChar* xmlPrefix, const xmlChar* xmlURI, int nb_namespaces,
       
   714                                   const xmlChar** libxmlNamespaces, int nb_attributes, int nb_defaulted, const xmlChar** libxmlAttributes)
       
   715 {
       
   716     if (m_parserStopped)
       
   717         return;
       
   718 
       
   719     if (m_parserPaused) {
       
   720         m_pendingCallbacks->appendStartElementNSCallback(xmlLocalName, xmlPrefix, xmlURI, nb_namespaces, libxmlNamespaces,
       
   721                                                          nb_attributes, nb_defaulted, libxmlAttributes);
       
   722         return;
       
   723     }
       
   724 
       
   725 #if ENABLE(XHTMLMP)
       
   726     // check if the DOCTYPE Declaration of XHTMLMP document exists
       
   727     if (!m_hasDocTypeDeclaration && document()->isXHTMLMPDocument()) {
       
   728         handleError(fatal, "DOCTYPE declaration lost.", lineNumber(), columnNumber());
       
   729         return;
       
   730     }
       
   731 #endif
       
   732 
       
   733     exitText();
       
   734 
       
   735     String localName = toString(xmlLocalName);
       
   736     String uri = toString(xmlURI);
       
   737     String prefix = toString(xmlPrefix);
       
   738 
       
   739     if (m_parsingFragment && uri.isNull()) {
       
   740         if (!prefix.isNull())
       
   741             uri = m_prefixToNamespaceMap.get(prefix);
       
   742         else
       
   743             uri = m_defaultNamespaceURI;
       
   744     }
       
   745 
       
   746 #if ENABLE(XHTMLMP)
       
   747     if (!m_sawFirstElement && isXHTMLMPDocument()) {
       
   748         // As per the section 7.1 of OMA-WAP-XHTMLMP-V1_1-20061020-A.pdf,
       
   749         // we should make sure that the root element MUST be 'html' and
       
   750         // ensure the name of the default namespace on the root elment 'html'
       
   751         // MUST be 'http://www.w3.org/1999/xhtml'
       
   752         if (localName != HTMLNames::htmlTag.localName()) {
       
   753             handleError(fatal, "XHTMLMP document expects 'html' as root element.", lineNumber(), columnNumber());
       
   754             return;
       
   755         }
       
   756 
       
   757         if (uri.isNull()) {
       
   758             m_defaultNamespaceURI = HTMLNames::xhtmlNamespaceURI;
       
   759             uri = m_defaultNamespaceURI;
       
   760         }
       
   761     }
       
   762 #endif
       
   763 
       
   764     bool isFirstElement = !m_sawFirstElement;
       
   765     m_sawFirstElement = true;
       
   766 
       
   767     QualifiedName qName(prefix, localName, uri);
       
   768     RefPtr<Element> newElement = document()->createElement(qName, true);
       
   769     if (!newElement) {
       
   770         stopParsing();
       
   771         return;
       
   772     }
       
   773 
       
   774     ExceptionCode ec = 0;
       
   775     handleElementNamespaces(newElement.get(), libxmlNamespaces, nb_namespaces, ec, m_scriptingPermission);
       
   776     if (ec) {
       
   777         stopParsing();
       
   778         return;
       
   779     }
       
   780 
       
   781     handleElementAttributes(newElement.get(), libxmlAttributes, nb_attributes, ec, m_scriptingPermission);
       
   782     if (ec) {
       
   783         stopParsing();
       
   784         return;
       
   785     }
       
   786 
       
   787     newElement->beginParsingChildren();
       
   788 
       
   789     ScriptElement* scriptElement = toScriptElement(newElement.get());
       
   790     if (scriptElement)
       
   791         m_scriptStartLine = lineNumber();
       
   792 
       
   793     if (!m_currentNode->legacyParserAddChild(newElement.get())) {
       
   794         stopParsing();
       
   795         return;
       
   796     }
       
   797 
       
   798     pushCurrentNode(newElement.get());
       
   799     if (m_view && !newElement->attached())
       
   800         newElement->attach();
       
   801 
       
   802     if (!m_parsingFragment && isFirstElement && document()->frame())
       
   803         document()->frame()->loader()->dispatchDocumentElementAvailable();
       
   804 }
       
   805 
       
   806 void XMLDocumentParser::endElementNs()
       
   807 {
       
   808     if (m_parserStopped)
       
   809         return;
       
   810 
       
   811     if (m_parserPaused) {
       
   812         m_pendingCallbacks->appendEndElementNSCallback();
       
   813         return;
       
   814     }
       
   815 
       
   816     exitText();
       
   817 
       
   818     Node* n = m_currentNode;
       
   819     n->finishParsingChildren();
       
   820 
       
   821     if (m_scriptingPermission == FragmentScriptingNotAllowed && n->isElementNode() && toScriptElement(static_cast<Element*>(n))) {
       
   822         popCurrentNode();
       
   823         ExceptionCode ec;
       
   824         n->remove(ec);
       
   825         return;
       
   826     }
       
   827 
       
   828     if (!n->isElementNode() || !m_view) {
       
   829         popCurrentNode();
       
   830         return;
       
   831     }
       
   832 
       
   833     Element* element = static_cast<Element*>(n);
       
   834 
       
   835     // The element's parent may have already been removed from document.
       
   836     // Parsing continues in this case, but scripts aren't executed.
       
   837     if (!element->inDocument()) {
       
   838         popCurrentNode();
       
   839         return;
       
   840     }
       
   841 
       
   842     ScriptElement* scriptElement = toScriptElement(element);
       
   843     if (!scriptElement) {
       
   844         popCurrentNode();
       
   845         return;
       
   846     }
       
   847 
       
   848     // Don't load external scripts for standalone documents (for now).
       
   849     ASSERT(!m_pendingScript);
       
   850     m_requestingScript = true;
       
   851 
       
   852 #if ENABLE(XHTMLMP)
       
   853     if (!scriptElement->shouldExecuteAsJavaScript())
       
   854         document()->setShouldProcessNoscriptElement(true);
       
   855     else
       
   856 #endif
       
   857     {
       
   858         String scriptHref = scriptElement->sourceAttributeValue();
       
   859         if (!scriptHref.isEmpty()) {
       
   860             // we have a src attribute
       
   861             String scriptCharset = scriptElement->scriptCharset();
       
   862             if (element->dispatchBeforeLoadEvent(scriptHref) &&
       
   863                 (m_pendingScript = document()->docLoader()->requestScript(scriptHref, scriptCharset))) {
       
   864                 m_scriptElement = element;
       
   865                 m_pendingScript->addClient(this);
       
   866 
       
   867                 // m_pendingScript will be 0 if script was already loaded and ref() executed it
       
   868                 if (m_pendingScript)
       
   869                     pauseParsing();
       
   870             } else
       
   871                 m_scriptElement = 0;
       
   872         } else
       
   873             m_view->frame()->script()->executeScript(ScriptSourceCode(scriptElement->scriptContent(), document()->url(), m_scriptStartLine));
       
   874     }
       
   875     m_requestingScript = false;
       
   876     popCurrentNode();
       
   877 }
       
   878 
       
   879 void XMLDocumentParser::characters(const xmlChar* s, int len)
       
   880 {
       
   881     if (m_parserStopped)
       
   882         return;
       
   883 
       
   884     if (m_parserPaused) {
       
   885         m_pendingCallbacks->appendCharactersCallback(s, len);
       
   886         return;
       
   887     }
       
   888 
       
   889     if (m_currentNode->isTextNode() || enterText())
       
   890         m_bufferedText.append(s, len);
       
   891 }
       
   892 
       
   893 void XMLDocumentParser::error(ErrorType type, const char* message, va_list args)
       
   894 {
       
   895     if (m_parserStopped)
       
   896         return;
       
   897 
       
   898 #if COMPILER(MSVC) || COMPILER(RVCT)
       
   899     char m[1024];
       
   900     vsnprintf(m, sizeof(m) - 1, message, args);
       
   901 #else
       
   902     char* m;
       
   903     if (vasprintf(&m, message, args) == -1)
       
   904         return;
       
   905 #endif
       
   906 
       
   907     if (m_parserPaused)
       
   908         m_pendingCallbacks->appendErrorCallback(type, reinterpret_cast<const xmlChar*>(m), lineNumber(), columnNumber());
       
   909     else
       
   910         handleError(type, m, lineNumber(), columnNumber());
       
   911 
       
   912 #if !COMPILER(MSVC) && !COMPILER(RVCT)
       
   913     free(m);
       
   914 #endif
       
   915 }
       
   916 
       
   917 void XMLDocumentParser::processingInstruction(const xmlChar* target, const xmlChar* data)
       
   918 {
       
   919     if (m_parserStopped)
       
   920         return;
       
   921 
       
   922     if (m_parserPaused) {
       
   923         m_pendingCallbacks->appendProcessingInstructionCallback(target, data);
       
   924         return;
       
   925     }
       
   926 
       
   927     exitText();
       
   928 
       
   929     // ### handle exceptions
       
   930     int exception = 0;
       
   931     RefPtr<ProcessingInstruction> pi = document()->createProcessingInstruction(
       
   932         toString(target), toString(data), exception);
       
   933     if (exception)
       
   934         return;
       
   935 
       
   936     pi->setCreatedByParser(true);
       
   937 
       
   938     if (!m_currentNode->legacyParserAddChild(pi.get()))
       
   939         return;
       
   940     if (m_view && !pi->attached())
       
   941         pi->attach();
       
   942 
       
   943     pi->finishParsingChildren();
       
   944 
       
   945 #if ENABLE(XSLT)
       
   946     m_sawXSLTransform = !m_sawFirstElement && pi->isXSL();
       
   947     if (m_sawXSLTransform && !document()->transformSourceDocument())
       
   948         stopParsing();
       
   949 #endif
       
   950 }
       
   951 
       
   952 void XMLDocumentParser::cdataBlock(const xmlChar* s, int len)
       
   953 {
       
   954     if (m_parserStopped)
       
   955         return;
       
   956 
       
   957     if (m_parserPaused) {
       
   958         m_pendingCallbacks->appendCDATABlockCallback(s, len);
       
   959         return;
       
   960     }
       
   961 
       
   962     exitText();
       
   963 
       
   964     RefPtr<Node> newNode = CDATASection::create(document(), toString(s, len));
       
   965     if (!m_currentNode->legacyParserAddChild(newNode.get()))
       
   966         return;
       
   967     if (m_view && !newNode->attached())
       
   968         newNode->attach();
       
   969 }
       
   970 
       
   971 void XMLDocumentParser::comment(const xmlChar* s)
       
   972 {
       
   973     if (m_parserStopped)
       
   974         return;
       
   975 
       
   976     if (m_parserPaused) {
       
   977         m_pendingCallbacks->appendCommentCallback(s);
       
   978         return;
       
   979     }
       
   980 
       
   981     exitText();
       
   982 
       
   983     RefPtr<Node> newNode = Comment::create(document(), toString(s));
       
   984     m_currentNode->legacyParserAddChild(newNode.get());
       
   985     if (m_view && !newNode->attached())
       
   986         newNode->attach();
       
   987 }
       
   988 
       
   989 void XMLDocumentParser::startDocument(const xmlChar* version, const xmlChar* encoding, int standalone)
       
   990 {
       
   991     ExceptionCode ec = 0;
       
   992 
       
   993     if (version)
       
   994         document()->setXMLVersion(toString(version), ec);
       
   995     document()->setXMLStandalone(standalone == 1, ec); // possible values are 0, 1, and -1
       
   996     if (encoding)
       
   997         document()->setXMLEncoding(toString(encoding));
       
   998 }
       
   999 
       
  1000 void XMLDocumentParser::endDocument()
       
  1001 {
       
  1002     exitText();
       
  1003 #if ENABLE(XHTMLMP)
       
  1004     m_hasDocTypeDeclaration = false;
       
  1005 #endif
       
  1006 }
       
  1007 
       
  1008 void XMLDocumentParser::internalSubset(const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID)
       
  1009 {
       
  1010     if (m_parserStopped)
       
  1011         return;
       
  1012 
       
  1013     if (m_parserPaused) {
       
  1014         m_pendingCallbacks->appendInternalSubsetCallback(name, externalID, systemID);
       
  1015         return;
       
  1016     }
       
  1017 
       
  1018     if (document()) {
       
  1019 #if ENABLE(WML) || ENABLE(XHTMLMP)
       
  1020         String extId = toString(externalID);
       
  1021 #endif
       
  1022 #if ENABLE(WML)
       
  1023         if (isWMLDocument()
       
  1024             && extId != "-//WAPFORUM//DTD WML 1.3//EN"
       
  1025             && extId != "-//WAPFORUM//DTD WML 1.2//EN"
       
  1026             && extId != "-//WAPFORUM//DTD WML 1.1//EN"
       
  1027             && extId != "-//WAPFORUM//DTD WML 1.0//EN")
       
  1028             handleError(fatal, "Invalid DTD Public ID", lineNumber(), columnNumber());
       
  1029 #endif
       
  1030 #if ENABLE(XHTMLMP)
       
  1031         String dtdName = toString(name);
       
  1032         if (extId == "-//WAPFORUM//DTD XHTML Mobile 1.0//EN"
       
  1033             || extId == "-//WAPFORUM//DTD XHTML Mobile 1.1//EN") {
       
  1034             if (dtdName != HTMLNames::htmlTag.localName()) {
       
  1035                 handleError(fatal, "Invalid DOCTYPE declaration, expected 'html' as root element.", lineNumber(), columnNumber());
       
  1036                 return;
       
  1037             }
       
  1038 
       
  1039             if (document()->isXHTMLMPDocument())
       
  1040                 setIsXHTMLMPDocument(true);
       
  1041             else
       
  1042                 setIsXHTMLDocument(true);
       
  1043 
       
  1044             m_hasDocTypeDeclaration = true;
       
  1045         }
       
  1046 #endif
       
  1047 
       
  1048         document()->legacyParserAddChild(DocumentType::create(document(), toString(name), toString(externalID), toString(systemID)));
       
  1049     }
       
  1050 }
       
  1051 
       
  1052 static inline XMLDocumentParser* getParser(void* closure)
       
  1053 {
       
  1054     xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
       
  1055     return static_cast<XMLDocumentParser*>(ctxt->_private);
       
  1056 }
       
  1057 
       
  1058 // This is a hack around http://bugzilla.gnome.org/show_bug.cgi?id=159219
       
  1059 // Otherwise libxml seems to call all the SAX callbacks twice for any replaced entity.
       
  1060 static inline bool hackAroundLibXMLEntityBug(void* closure)
       
  1061 {
       
  1062 #if LIBXML_VERSION >= 20627
       
  1063     UNUSED_PARAM(closure);
       
  1064 
       
  1065     // This bug has been fixed in libxml 2.6.27.
       
  1066     return false;
       
  1067 #else
       
  1068     return static_cast<xmlParserCtxtPtr>(closure)->node;
       
  1069 #endif
       
  1070 }
       
  1071 
       
  1072 static void startElementNsHandler(void* closure, const xmlChar* localname, const xmlChar* prefix, const xmlChar* uri, int nb_namespaces, const xmlChar** namespaces, int nb_attributes, int nb_defaulted, const xmlChar** libxmlAttributes)
       
  1073 {
       
  1074     if (hackAroundLibXMLEntityBug(closure))
       
  1075         return;
       
  1076 
       
  1077     getParser(closure)->startElementNs(localname, prefix, uri, nb_namespaces, namespaces, nb_attributes, nb_defaulted, libxmlAttributes);
       
  1078 }
       
  1079 
       
  1080 static void endElementNsHandler(void* closure, const xmlChar*, const xmlChar*, const xmlChar*)
       
  1081 {
       
  1082     if (hackAroundLibXMLEntityBug(closure))
       
  1083         return;
       
  1084 
       
  1085     getParser(closure)->endElementNs();
       
  1086 }
       
  1087 
       
  1088 static void charactersHandler(void* closure, const xmlChar* s, int len)
       
  1089 {
       
  1090     if (hackAroundLibXMLEntityBug(closure))
       
  1091         return;
       
  1092 
       
  1093     getParser(closure)->characters(s, len);
       
  1094 }
       
  1095 
       
  1096 static void processingInstructionHandler(void* closure, const xmlChar* target, const xmlChar* data)
       
  1097 {
       
  1098     if (hackAroundLibXMLEntityBug(closure))
       
  1099         return;
       
  1100 
       
  1101     getParser(closure)->processingInstruction(target, data);
       
  1102 }
       
  1103 
       
  1104 static void cdataBlockHandler(void* closure, const xmlChar* s, int len)
       
  1105 {
       
  1106     if (hackAroundLibXMLEntityBug(closure))
       
  1107         return;
       
  1108 
       
  1109     getParser(closure)->cdataBlock(s, len);
       
  1110 }
       
  1111 
       
  1112 static void commentHandler(void* closure, const xmlChar* comment)
       
  1113 {
       
  1114     if (hackAroundLibXMLEntityBug(closure))
       
  1115         return;
       
  1116 
       
  1117     getParser(closure)->comment(comment);
       
  1118 }
       
  1119 
       
  1120 WTF_ATTRIBUTE_PRINTF(2, 3)
       
  1121 static void warningHandler(void* closure, const char* message, ...)
       
  1122 {
       
  1123     va_list args;
       
  1124     va_start(args, message);
       
  1125     getParser(closure)->error(XMLDocumentParser::warning, message, args);
       
  1126     va_end(args);
       
  1127 }
       
  1128 
       
  1129 WTF_ATTRIBUTE_PRINTF(2, 3)
       
  1130 static void fatalErrorHandler(void* closure, const char* message, ...)
       
  1131 {
       
  1132     va_list args;
       
  1133     va_start(args, message);
       
  1134     getParser(closure)->error(XMLDocumentParser::fatal, message, args);
       
  1135     va_end(args);
       
  1136 }
       
  1137 
       
  1138 WTF_ATTRIBUTE_PRINTF(2, 3)
       
  1139 static void normalErrorHandler(void* closure, const char* message, ...)
       
  1140 {
       
  1141     va_list args;
       
  1142     va_start(args, message);
       
  1143     getParser(closure)->error(XMLDocumentParser::nonFatal, message, args);
       
  1144     va_end(args);
       
  1145 }
       
  1146 
       
  1147 // Using a static entity and marking it XML_INTERNAL_PREDEFINED_ENTITY is
       
  1148 // a hack to avoid malloc/free. Using a global variable like this could cause trouble
       
  1149 // if libxml implementation details were to change
       
  1150 static xmlChar sharedXHTMLEntityResult[5] = {0, 0, 0, 0, 0};
       
  1151 
       
  1152 static xmlEntityPtr sharedXHTMLEntity()
       
  1153 {
       
  1154     static xmlEntity entity;
       
  1155     if (!entity.type) {
       
  1156         entity.type = XML_ENTITY_DECL;
       
  1157         entity.orig = sharedXHTMLEntityResult;
       
  1158         entity.content = sharedXHTMLEntityResult;
       
  1159         entity.etype = XML_INTERNAL_PREDEFINED_ENTITY;
       
  1160     }
       
  1161     return &entity;
       
  1162 }
       
  1163 
       
  1164 static xmlEntityPtr getXHTMLEntity(const xmlChar* name)
       
  1165 {
       
  1166     UChar c = decodeNamedEntity(reinterpret_cast<const char*>(name));
       
  1167     if (!c)
       
  1168         return 0;
       
  1169 
       
  1170     CString value = String(&c, 1).utf8();
       
  1171     ASSERT(value.length() < 5);
       
  1172     xmlEntityPtr entity = sharedXHTMLEntity();
       
  1173     entity->length = value.length();
       
  1174     entity->name = name;
       
  1175     memcpy(sharedXHTMLEntityResult, value.data(), entity->length + 1);
       
  1176 
       
  1177     return entity;
       
  1178 }
       
  1179 
       
  1180 static xmlEntityPtr getEntityHandler(void* closure, const xmlChar* name)
       
  1181 {
       
  1182     xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
       
  1183     xmlEntityPtr ent = xmlGetPredefinedEntity(name);
       
  1184     if (ent) {
       
  1185         ent->etype = XML_INTERNAL_PREDEFINED_ENTITY;
       
  1186         return ent;
       
  1187     }
       
  1188 
       
  1189     ent = xmlGetDocEntity(ctxt->myDoc, name);
       
  1190     if (!ent && (getParser(closure)->isXHTMLDocument()
       
  1191 #if ENABLE(XHTMLMP)
       
  1192                  || getParser(closure)->isXHTMLMPDocument()
       
  1193 #endif
       
  1194 #if ENABLE(WML)
       
  1195                  || getParser(closure)->isWMLDocument()
       
  1196 #endif
       
  1197        )) {
       
  1198         ent = getXHTMLEntity(name);
       
  1199         if (ent)
       
  1200             ent->etype = XML_INTERNAL_GENERAL_ENTITY;
       
  1201     }
       
  1202 
       
  1203     return ent;
       
  1204 }
       
  1205 
       
  1206 static void startDocumentHandler(void* closure)
       
  1207 {
       
  1208     xmlParserCtxt* ctxt = static_cast<xmlParserCtxt*>(closure);
       
  1209     getParser(closure)->startDocument(ctxt->version, ctxt->encoding, ctxt->standalone);
       
  1210     xmlSAX2StartDocument(closure);
       
  1211 }
       
  1212 
       
  1213 static void endDocumentHandler(void* closure)
       
  1214 {
       
  1215     getParser(closure)->endDocument();
       
  1216     xmlSAX2EndDocument(closure);
       
  1217 }
       
  1218 
       
  1219 static void internalSubsetHandler(void* closure, const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID)
       
  1220 {
       
  1221     getParser(closure)->internalSubset(name, externalID, systemID);
       
  1222     xmlSAX2InternalSubset(closure, name, externalID, systemID);
       
  1223 }
       
  1224 
       
  1225 static void externalSubsetHandler(void* closure, const xmlChar*, const xmlChar* externalId, const xmlChar*)
       
  1226 {
       
  1227     String extId = toString(externalId);
       
  1228     if ((extId == "-//W3C//DTD XHTML 1.0 Transitional//EN")
       
  1229         || (extId == "-//W3C//DTD XHTML 1.1//EN")
       
  1230         || (extId == "-//W3C//DTD XHTML 1.0 Strict//EN")
       
  1231         || (extId == "-//W3C//DTD XHTML 1.0 Frameset//EN")
       
  1232         || (extId == "-//W3C//DTD XHTML Basic 1.0//EN")
       
  1233         || (extId == "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN")
       
  1234         || (extId == "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN")
       
  1235         || (extId == "-//WAPFORUM//DTD XHTML Mobile 1.0//EN")
       
  1236        )
       
  1237         getParser(closure)->setIsXHTMLDocument(true); // controls if we replace entities or not.
       
  1238 }
       
  1239 
       
  1240 static void ignorableWhitespaceHandler(void*, const xmlChar*, int)
       
  1241 {
       
  1242     // nothing to do, but we need this to work around a crasher
       
  1243     // http://bugzilla.gnome.org/show_bug.cgi?id=172255
       
  1244     // http://bugs.webkit.org/show_bug.cgi?id=5792
       
  1245 }
       
  1246 
       
  1247 void XMLDocumentParser::initializeParserContext(const char* chunk)
       
  1248 {
       
  1249     xmlSAXHandler sax;
       
  1250     memset(&sax, 0, sizeof(sax));
       
  1251 
       
  1252     sax.error = normalErrorHandler;
       
  1253     sax.fatalError = fatalErrorHandler;
       
  1254     sax.characters = charactersHandler;
       
  1255     sax.processingInstruction = processingInstructionHandler;
       
  1256     sax.cdataBlock = cdataBlockHandler;
       
  1257     sax.comment = commentHandler;
       
  1258     sax.warning = warningHandler;
       
  1259     sax.startElementNs = startElementNsHandler;
       
  1260     sax.endElementNs = endElementNsHandler;
       
  1261     sax.getEntity = getEntityHandler;
       
  1262     sax.startDocument = startDocumentHandler;
       
  1263     sax.endDocument = endDocumentHandler;
       
  1264     sax.internalSubset = internalSubsetHandler;
       
  1265     sax.externalSubset = externalSubsetHandler;
       
  1266     sax.ignorableWhitespace = ignorableWhitespaceHandler;
       
  1267     sax.entityDecl = xmlSAX2EntityDecl;
       
  1268     sax.initialized = XML_SAX2_MAGIC;
       
  1269     m_parserStopped = false;
       
  1270     m_sawError = false;
       
  1271     m_sawXSLTransform = false;
       
  1272     m_sawFirstElement = false;
       
  1273 
       
  1274     XMLDocumentParserScope scope(document()->docLoader());
       
  1275     if (m_parsingFragment)
       
  1276         m_context = XMLParserContext::createMemoryParser(&sax, this, chunk);
       
  1277     else
       
  1278         m_context = XMLParserContext::createStringParser(&sax, this);
       
  1279 }
       
  1280 
       
  1281 void XMLDocumentParser::doEnd()
       
  1282 {
       
  1283 #if ENABLE(XSLT)
       
  1284     if (m_sawXSLTransform) {
       
  1285         void* doc = xmlDocPtrForString(document()->docLoader(), m_originalSourceForTransform, document()->url().string());
       
  1286         document()->setTransformSource(new TransformSource(doc));
       
  1287 
       
  1288         document()->setParsing(false); // Make the doc think it's done, so it will apply xsl sheets.
       
  1289         document()->updateStyleSelector();
       
  1290         document()->setParsing(true);
       
  1291         m_parserStopped = true;
       
  1292     }
       
  1293 #endif
       
  1294 
       
  1295     if (m_parserStopped)
       
  1296         return;
       
  1297 
       
  1298     if (m_context) {
       
  1299         // Tell libxml we're done.
       
  1300         {
       
  1301             XMLDocumentParserScope scope(document()->docLoader());
       
  1302             xmlParseChunk(context(), 0, 0, 1);
       
  1303         }
       
  1304 
       
  1305         m_context = 0;
       
  1306     }
       
  1307 }
       
  1308 
       
  1309 #if ENABLE(XSLT)
       
  1310 void* xmlDocPtrForString(DocLoader* docLoader, const String& source, const String& url)
       
  1311 {
       
  1312     if (source.isEmpty())
       
  1313         return 0;
       
  1314 
       
  1315     // Parse in a single chunk into an xmlDocPtr
       
  1316     // FIXME: Hook up error handlers so that a failure to parse the main document results in
       
  1317     // good error messages.
       
  1318     const UChar BOM = 0xFEFF;
       
  1319     const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM);
       
  1320 
       
  1321     XMLDocumentParserScope scope(docLoader, errorFunc, 0);
       
  1322     xmlDocPtr sourceDoc = xmlReadMemory(reinterpret_cast<const char*>(source.characters()),
       
  1323                                         source.length() * sizeof(UChar),
       
  1324                                         url.latin1().data(),
       
  1325                                         BOMHighByte == 0xFF ? "UTF-16LE" : "UTF-16BE",
       
  1326                                         XSLT_PARSE_OPTIONS);
       
  1327     return sourceDoc;
       
  1328 }
       
  1329 #endif
       
  1330 
       
  1331 int XMLDocumentParser::lineNumber() const
       
  1332 {
       
  1333     return context() ? context()->input->line : 1;
       
  1334 }
       
  1335 
       
  1336 int XMLDocumentParser::columnNumber() const
       
  1337 {
       
  1338     return context() ? context()->input->col : 1;
       
  1339 }
       
  1340 
       
  1341 void XMLDocumentParser::stopParsing()
       
  1342 {
       
  1343     DocumentParser::stopParsing();
       
  1344     if (context())
       
  1345         xmlStopParser(context());
       
  1346 }
       
  1347 
       
  1348 void XMLDocumentParser::resumeParsing()
       
  1349 {
       
  1350     ASSERT(m_parserPaused);
       
  1351 
       
  1352     m_parserPaused = false;
       
  1353 
       
  1354     // First, execute any pending callbacks
       
  1355     while (!m_pendingCallbacks->isEmpty()) {
       
  1356         m_pendingCallbacks->callAndRemoveFirstCallback(this);
       
  1357 
       
  1358         // A callback paused the parser
       
  1359         if (m_parserPaused)
       
  1360             return;
       
  1361     }
       
  1362 
       
  1363     // Then, write any pending data
       
  1364     SegmentedString rest = m_pendingSrc;
       
  1365     m_pendingSrc.clear();
       
  1366     append(rest);
       
  1367 
       
  1368     // Finally, if finish() has been called and write() didn't result
       
  1369     // in any further callbacks being queued, call end()
       
  1370     if (m_finishCalled && m_pendingCallbacks->isEmpty())
       
  1371         end();
       
  1372 }
       
  1373 
       
  1374 // FIXME: This method should be possible to implement using the DocumentParser
       
  1375 // API, instead of needing to grab at libxml2 state directly.
       
  1376 bool XMLDocumentParser::parseDocumentFragment(const String& chunk, DocumentFragment* fragment, Element* parent, FragmentScriptingPermission scriptingPermission)
       
  1377 {
       
  1378     if (!chunk.length())
       
  1379         return true;
       
  1380 
       
  1381     XMLDocumentParser parser(fragment, parent, scriptingPermission);
       
  1382 
       
  1383     CString chunkAsUtf8 = chunk.utf8();
       
  1384     parser.initializeParserContext(chunkAsUtf8.data());
       
  1385 
       
  1386     xmlParseContent(parser.context());
       
  1387 
       
  1388     parser.endDocument();
       
  1389 
       
  1390     // Check if all the chunk has been processed.
       
  1391     long bytesProcessed = xmlByteConsumed(parser.context());
       
  1392     if (bytesProcessed == -1 || ((unsigned long)bytesProcessed) != chunkAsUtf8.length())
       
  1393         return false;
       
  1394 
       
  1395     // No error if the chunk is well formed or it is not but we have no error.
       
  1396     return parser.context()->wellFormed || xmlCtxtGetLastError(parser.context()) == 0;
       
  1397 }
       
  1398 
       
  1399 // --------------------------------
       
  1400 
       
  1401 struct AttributeParseState {
       
  1402     HashMap<String, String> attributes;
       
  1403     bool gotAttributes;
       
  1404 };
       
  1405 
       
  1406 static void attributesStartElementNsHandler(void* closure, const xmlChar* xmlLocalName, const xmlChar* /*xmlPrefix*/,
       
  1407                                             const xmlChar* /*xmlURI*/, int /*nb_namespaces*/, const xmlChar** /*namespaces*/,
       
  1408                                             int nb_attributes, int /*nb_defaulted*/, const xmlChar** libxmlAttributes)
       
  1409 {
       
  1410     if (strcmp(reinterpret_cast<const char*>(xmlLocalName), "attrs") != 0)
       
  1411         return;
       
  1412 
       
  1413     xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
       
  1414     AttributeParseState* state = static_cast<AttributeParseState*>(ctxt->_private);
       
  1415 
       
  1416     state->gotAttributes = true;
       
  1417 
       
  1418     xmlSAX2Attributes* attributes = reinterpret_cast<xmlSAX2Attributes*>(libxmlAttributes);
       
  1419     for (int i = 0; i < nb_attributes; i++) {
       
  1420         String attrLocalName = toString(attributes[i].localname);
       
  1421         int valueLength = (int) (attributes[i].end - attributes[i].value);
       
  1422         String attrValue = toString(attributes[i].value, valueLength);
       
  1423         String attrPrefix = toString(attributes[i].prefix);
       
  1424         String attrQName = attrPrefix.isEmpty() ? attrLocalName : attrPrefix + ":" + attrLocalName;
       
  1425 
       
  1426         state->attributes.set(attrQName, attrValue);
       
  1427     }
       
  1428 }
       
  1429 
       
  1430 HashMap<String, String> parseAttributes(const String& string, bool& attrsOK)
       
  1431 {
       
  1432     AttributeParseState state;
       
  1433     state.gotAttributes = false;
       
  1434 
       
  1435     xmlSAXHandler sax;
       
  1436     memset(&sax, 0, sizeof(sax));
       
  1437     sax.startElementNs = attributesStartElementNsHandler;
       
  1438     sax.initialized = XML_SAX2_MAGIC;
       
  1439     RefPtr<XMLParserContext> parser = XMLParserContext::createStringParser(&sax, &state);
       
  1440     String parseString = "<?xml version=\"1.0\"?><attrs " + string + " />";
       
  1441     xmlParseChunk(parser->context(), reinterpret_cast<const char*>(parseString.characters()), parseString.length() * sizeof(UChar), 1);
       
  1442     attrsOK = state.gotAttributes;
       
  1443     return state.attributes;
       
  1444 }
       
  1445 
       
  1446 }