FCL/sf/mw/qtwebkit: comparison WebCore/html/LegacyHTMLDocumentParser.cpp

equal deleted inserted replaced

--1:000000000000
+:4f2f89ce4247
+/*
+Copyright (C) 1997 Martin Jones (mjones@kde.org)
+(C) 1997 Torben Weis (weis@kde.org)
+(C) 1998 Waldo Bastian (bastian@kde.org)
+(C) 1999 Lars Knoll (knoll@kde.org)
+(C) 1999 Antti Koivisto (koivisto@kde.org)
+(C) 2001 Dirk Mueller (mueller@kde.org)
+Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
+Copyright (C) 2005, 2006 Alexey Proskuryakov (ap@nypop.com)
+Copyright (C) 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/)
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public
+License as published by the Free Software Foundation; either
+version 2 of the License, or (at your option) any later version.
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Library General Public License for more details.
+You should have received a copy of the GNU Library General Public License
+along with this library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+Boston, MA 02110-1301, USA.
+*/
+#include "config.h"
+#include "LegacyHTMLDocumentParser.h"
+#include "Attribute.h"
+#include "CSSHelper.h"
+#include "Cache.h"
+#include "CachedScript.h"
+#include "DocLoader.h"
+#include "DocumentFragment.h"
+#include "Event.h"
+#include "EventNames.h"
+#include "Frame.h"
+#include "FrameLoader.h"
+#include "FrameView.h"
+#include "HTMLElement.h"
+#include "HTMLNames.h"
+#include "LegacyHTMLTreeBuilder.h"
+#include "HTMLScriptElement.h"
+#include "HTMLViewSourceDocument.h"
+#include "ImageLoader.h"
+#include "InspectorTimelineAgent.h"
+#include "Page.h"
+#include "LegacyPreloadScanner.h"
+#include "ScriptSourceCode.h"
+#include "ScriptValue.h"
+#include "XSSAuditor.h"
+#include <wtf/ASCIICType.h>
+#include <wtf/CurrentTime.h>
+#include "HTMLEntityNames.cpp"
+#define PRELOAD_SCANNER_ENABLED 1
+using namespace WTF;
+using namespace std;
+namespace WebCore {
+using namespace HTMLNames;
+// This value is used to define how many loops (approximately tokens)
+// the parser will make before checking if it should yield.
+// To increase responsiveness reduce both ChunkSize and TimeDelay contants.
+static const int defaultTokenizerChunkSize = 4096;
+// FIXME: We would like this constant to be 200ms.
+// Yielding more aggressively results in increased responsiveness and better incremental rendering.
+// It slows down overall page-load on slower machines, though, so for now we set a value of 500.
+// TimeDelay controls the maximum time the parser will run before yielding.
+// Inline script execution can cause the parser to excede this limit.
+static const double defaultTokenizerTimeDelay = 0.500;
+static const char commentStart [] = "<!--";
+static const char doctypeStart [] = "<!doctype";
+static const char publicStart [] = "public";
+static const char systemStart [] = "system";
+static const char scriptEnd [] = "</script";
+static const char xmpEnd [] = "</xmp";
+static const char styleEnd [] =  "</style";
+static const char textareaEnd [] = "</textarea";
+static const char titleEnd [] = "</title";
+static const char iframeEnd [] = "</iframe";
+// Full support for MS Windows extensions to Latin-1.
+// Technically these extensions should only be activated for pages
+// marked "windows-1252" or "cp1252", but
+// in the standard Microsoft way, these extensions infect hundreds of thousands
+// of web pages.  Note that people with non-latin-1 Microsoft extensions
+// are SOL.
+//
+// See: http://www.microsoft.com/globaldev/reference/WinCP.asp
+//      http://www.bbsinc.com/iso8859.html
+//      http://www.obviously.com/
+//
+// There may be better equivalents
+// We only need this for entities. For non-entity text, we handle this in the text encoding.
+static const UChar windowsLatin1ExtensionArray[32] = {
+0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, // 80-87
+0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008D, 0x017D, 0x008F, // 88-8F
+0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, // 90-97
+0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178  // 98-9F
+};
+static inline UChar fixUpChar(UChar c)
+{
+if ((c & ~0x1F) != 0x0080)
+return c;
+return windowsLatin1ExtensionArray[c - 0x80];
+}
+static inline bool tagMatch(const char* s1, const UChar* s2, unsigned length)
+{
+for (unsigned i = 0; i != length; ++i) {
+unsigned char c1 = s1[i];
+unsigned char uc1 = toASCIIUpper(static_cast<char>(c1));
+UChar c2 = s2[i];
+if (c1 != c2 && uc1 != c2)
+return false;
+}
+return true;
+}
+inline void Token::addAttribute(AtomicString& attrName, const AtomicString& attributeValue, bool viewSourceMode)
+{
+if (!attrName.isEmpty()) {
+ASSERT(!attrName.contains('/'));
+RefPtr<Attribute> a = Attribute::createMapped(attrName, attributeValue);
+if (!attrs) {
+attrs = NamedNodeMap::create();
+attrs->reserveInitialCapacity(10);
+}
+attrs->insertAttribute(a.release(), viewSourceMode);
+}
+attrName = emptyAtom;
+}
+// ----------------------------------------------------------------------------
+LegacyHTMLDocumentParser::LegacyHTMLDocumentParser(HTMLDocument* document, bool reportErrors)
+: ScriptableDocumentParser(document)
+, m_buffer(0)
+, m_scriptCode(0)
+, m_scriptCodeSize(0)
+, m_scriptCodeCapacity(0)
+, m_scriptCodeResync(0)
+, m_executingScript(0)
+, m_requestingScript(false)
+, m_hasScriptsWaitingForStylesheets(false)
+, m_timer(this, &LegacyHTMLDocumentParser::timerFired)
+, m_externalScriptsTimer(this, &LegacyHTMLDocumentParser::executeExternalScriptsTimerFired)
+, m_treeBuilder(new LegacyHTMLTreeBuilder(document, reportErrors))
+, m_inWrite(false)
+, m_fragment(false)
+, m_scriptingPermission(FragmentScriptingAllowed)
+{
+begin();
+}
+LegacyHTMLDocumentParser::LegacyHTMLDocumentParser(HTMLViewSourceDocument* document)
+: ScriptableDocumentParser(document, true)
+, m_buffer(0)
+, m_scriptCode(0)
+, m_scriptCodeSize(0)
+, m_scriptCodeCapacity(0)
+, m_scriptCodeResync(0)
+, m_executingScript(0)
+, m_requestingScript(false)
+, m_hasScriptsWaitingForStylesheets(false)
+, m_timer(this, &LegacyHTMLDocumentParser::timerFired)
+, m_externalScriptsTimer(this, &LegacyHTMLDocumentParser::executeExternalScriptsTimerFired)
+, m_inWrite(false)
+, m_fragment(false)
+, m_scriptingPermission(FragmentScriptingAllowed)
+{
+begin();
+}
+LegacyHTMLDocumentParser::LegacyHTMLDocumentParser(DocumentFragment* frag, FragmentScriptingPermission scriptingPermission)
+: ScriptableDocumentParser(frag->document())
+, m_buffer(0)
+, m_scriptCode(0)
+, m_scriptCodeSize(0)
+, m_scriptCodeCapacity(0)
+, m_scriptCodeResync(0)
+, m_executingScript(0)
+, m_requestingScript(false)
+, m_hasScriptsWaitingForStylesheets(false)
+, m_timer(this, &LegacyHTMLDocumentParser::timerFired)
+, m_externalScriptsTimer(this, &LegacyHTMLDocumentParser::executeExternalScriptsTimerFired)
+, m_treeBuilder(new LegacyHTMLTreeBuilder(frag, scriptingPermission))
+, m_inWrite(false)
+, m_fragment(true)
+, m_scriptingPermission(scriptingPermission)
+{
+begin();
+}
+void LegacyHTMLDocumentParser::reset()
+{
+ASSERT(m_executingScript == 0);
+while (!m_pendingScripts.isEmpty()) {
+CachedResourceHandle<CachedScript> cs = m_pendingScripts.takeFirst();
+ASSERT(cache()->disabled() || cs->accessCount() > 0);
+cs->removeClient(this);
+}
+fastFree(m_buffer);
+m_buffer = m_dest = 0;
+m_bufferSize = 0;
+fastFree(m_scriptCode);
+m_scriptCode = 0;
+m_scriptCodeSize = m_scriptCodeCapacity = m_scriptCodeResync = 0;
+m_timer.stop();
+m_externalScriptsTimer.stop();
+m_state.setAllowYield(false);
+m_state.setForceSynchronous(false);
+m_currentToken.reset();
+m_doctypeToken.reset();
+m_doctypeSearchCount = 0;
+m_doctypeSecondarySearchCount = 0;
+m_hasScriptsWaitingForStylesheets = false;
+}
+void LegacyHTMLDocumentParser::begin()
+{
+m_executingScript = 0;
+m_requestingScript = false;
+m_hasScriptsWaitingForStylesheets = false;
+m_state.setLoadingExtScript(false);
+reset();
+m_bufferSize = 254;
+m_buffer = static_cast<UChar*>(fastMalloc(sizeof(UChar) * 254));
+m_dest = m_buffer;
+tquote = NoQuote;
+searchCount = 0;
+m_state.setEntityState(NoEntity);
+m_scriptTagSrcAttrValue = String();
+m_pendingSrc.clear();
+m_currentPrependingSrc = 0;
+m_noMoreData = false;
+m_brokenComments = false;
+m_brokenServer = false;
+m_lineNumber = 0;
+m_currentScriptTagStartLineNumber = 0;
+m_currentTagStartLineNumber = 0;
+m_state.setForceSynchronous(false);
+Page* page = document()->page();
+if (page && page->hasCustomHTMLTokenizerTimeDelay())
+m_tokenizerTimeDelay = page->customHTMLTokenizerTimeDelay();
+else
+m_tokenizerTimeDelay = defaultTokenizerTimeDelay;
+if (page && page->hasCustomHTMLTokenizerChunkSize())
+m_tokenizerChunkSize = page->customHTMLTokenizerChunkSize();
+else
+m_tokenizerChunkSize = defaultTokenizerChunkSize;
+}
+void LegacyHTMLDocumentParser::setForceSynchronous(bool force)
+{
+m_state.setForceSynchronous(force);
+}
+LegacyHTMLDocumentParser::State LegacyHTMLDocumentParser::processListing(SegmentedString list, State state)
+{
+// This function adds the listing 'list' as
+// preformatted text-tokens to the token-collection
+while (!list.isEmpty()) {
+if (state.skipLF()) {
+state.setSkipLF(false);
+if (*list == '\n') {
+list.advance();
+continue;
+}
+}
+checkBuffer();
+if (*list == '\n' || *list == '\r') {
+if (state.discardLF())
+// Ignore this LF
+state.setDiscardLF(false); // We have discarded 1 LF
+else
+*m_dest++ = '\n';
+/* Check for MS-DOS CRLF sequence */
+if (*list == '\r')
+state.setSkipLF(true);
+list.advance();
+} else {
+state.setDiscardLF(false);
+*m_dest++ = *list;
+list.advance();
+}
+}
+return state;
+}
+LegacyHTMLDocumentParser::State LegacyHTMLDocumentParser::parseNonHTMLText(SegmentedString& src, State state)
+{
+ASSERT(state.inTextArea() || state.inTitle() || state.inIFrame() || !state.hasEntityState());
+ASSERT(!state.hasTagState());
+ASSERT(state.inXmp() + state.inTextArea() + state.inTitle() + state.inStyle() + state.inScript() + state.inIFrame() == 1);
+if (state.inScript() && !m_currentScriptTagStartLineNumber)
+m_currentScriptTagStartLineNumber = m_lineNumber;
+if (state.inComment())
+state = parseComment(src, state);
+int lastDecodedEntityPosition = -1;
+while (!src.isEmpty()) {
+checkScriptBuffer();
+UChar ch = *src;
+if (!m_scriptCodeResync && !m_brokenComments &&
+!state.inXmp() && ch == '-' && m_scriptCodeSize >= 3 && !src.escaped() &&
+m_scriptCode[m_scriptCodeSize - 3] == '<' && m_scriptCode[m_scriptCodeSize - 2] == '!' && m_scriptCode[m_scriptCodeSize - 1] == '-' &&
+(lastDecodedEntityPosition < m_scriptCodeSize - 3)) {
+state.setInComment(true);
+state = parseComment(src, state);
+continue;
+}
+if (m_scriptCodeResync && !tquote && ch == '>') {
+src.advancePastNonNewline();
+m_scriptCodeSize = m_scriptCodeResync - 1;
+m_scriptCodeResync = 0;
+m_scriptCode[m_scriptCodeSize] = m_scriptCode[m_scriptCodeSize + 1] = 0;
+if (state.inScript())
+state = scriptHandler(state);
+else {
+state = processListing(SegmentedString(m_scriptCode, m_scriptCodeSize), state);
+processToken();
+if (state.inStyle()) {
+m_currentToken.tagName = styleTag.localName();
+m_currentToken.beginTag = false;
+} else if (state.inTextArea()) {
+m_currentToken.tagName = textareaTag.localName();
+m_currentToken.beginTag = false;
+} else if (state.inTitle()) {
+m_currentToken.tagName = titleTag.localName();
+m_currentToken.beginTag = false;
+} else if (state.inXmp()) {
+m_currentToken.tagName = xmpTag.localName();
+m_currentToken.beginTag = false;
+} else if (state.inIFrame()) {
+m_currentToken.tagName = iframeTag.localName();
+m_currentToken.beginTag = false;
+}
+processToken();
+state.setInStyle(false);
+state.setInScript(false);
+state.setInTextArea(false);
+state.setInTitle(false);
+state.setInXmp(false);
+state.setInIFrame(false);
+tquote = NoQuote;
+m_scriptCodeSize = m_scriptCodeResync = 0;
+}
+return state;
+}
+// possible end of tagname, lets check.
+if (!m_scriptCodeResync && !state.escaped() && !src.escaped() && (ch == '>' || ch == '/' || isASCIISpace(ch)) &&
+m_scriptCodeSize >= m_searchStopperLength &&
+tagMatch(m_searchStopper, m_scriptCode + m_scriptCodeSize - m_searchStopperLength, m_searchStopperLength) &&
+(lastDecodedEntityPosition < m_scriptCodeSize - m_searchStopperLength)) {
+m_scriptCodeResync = m_scriptCodeSize-m_searchStopperLength+1;
+tquote = NoQuote;
+continue;
+}
+if (m_scriptCodeResync && !state.escaped()) {
+if (ch == '\"')
+tquote = (tquote == NoQuote) ? DoubleQuote : ((tquote == SingleQuote) ? SingleQuote : NoQuote);
+else if (ch == '\'')
+tquote = (tquote == NoQuote) ? SingleQuote : (tquote == DoubleQuote) ? DoubleQuote : NoQuote;
+else if (tquote != NoQuote && (ch == '\r' || ch == '\n'))
+tquote = NoQuote;
+}
+state.setEscaped(!state.escaped() && ch == '\\');
+if (!m_scriptCodeResync && (state.inTextArea() || state.inTitle() || state.inIFrame()) && !src.escaped() && ch == '&') {
+UChar* scriptCodeDest = m_scriptCode + m_scriptCodeSize;
+src.advancePastNonNewline();
+state = parseEntity(src, scriptCodeDest, state, m_cBufferPos, true, false);
+if (scriptCodeDest == m_scriptCode + m_scriptCodeSize)
+lastDecodedEntityPosition = m_scriptCodeSize;
+else
+m_scriptCodeSize = scriptCodeDest - m_scriptCode;
+} else {
+m_scriptCode[m_scriptCodeSize++] = ch;
+src.advance(m_lineNumber);
+}
+}
+return state;
+}
+LegacyHTMLDocumentParser::State LegacyHTMLDocumentParser::scriptHandler(State state)
+{
+// We are inside a <script>
+bool doScriptExec = false;
+int startLine = m_currentScriptTagStartLineNumber + 1; // Script line numbers are 1 based, HTMLTokenzier line numbers are 0 based
+// Reset m_currentScriptTagStartLineNumber to indicate that we've finished parsing the current script element
+m_currentScriptTagStartLineNumber = 0;
+// (Bugzilla 3837) Scripts following a frameset element should not execute or,
+// in the case of extern scripts, even load.
+bool followingFrameset = (document()->body() && document()->body()->hasTagName(framesetTag));
+CachedScript* cs = 0;
+// don't load external scripts for standalone documents (for now)
+if (!inViewSourceMode()) {
+if (!m_scriptTagSrcAttrValue.isEmpty() && document()->frame()) {
+// forget what we just got; load from src url instead
+if (!m_treeBuilder->skipMode() && !followingFrameset) {
+// The parser might have been stopped by for example a window.close call in an earlier script.
+// If so, we don't want to load scripts.
+if (!m_parserStopped && m_scriptNode->dispatchBeforeLoadEvent(m_scriptTagSrcAttrValue) &&
+(cs = document()->docLoader()->requestScript(m_scriptTagSrcAttrValue, m_scriptTagCharsetAttrValue)))
+m_pendingScripts.append(cs);
+else
+m_scriptNode = 0;
+} else
+m_scriptNode = 0;
+m_scriptTagSrcAttrValue = String();
+} else {
+// Parse m_scriptCode containing <script> info
+doScriptExec = m_scriptNode->shouldExecuteAsJavaScript();
+#if ENABLE(XHTMLMP)
+if (!doScriptExec)
+document()->setShouldProcessNoscriptElement(true);
+#endif
+m_scriptNode = 0;
+}
+}
+state = processListing(SegmentedString(m_scriptCode, m_scriptCodeSize), state);
+RefPtr<Node> node = processToken();
+if (node && m_scriptingPermission == FragmentScriptingNotAllowed) {
+ExceptionCode ec;
+node->remove(ec);
+node = 0;
+}
+String scriptString = node ? node->textContent() : "";
+m_currentToken.tagName = scriptTag.localName();
+m_currentToken.beginTag = false;
+processToken();
+state.setInScript(false);
+m_scriptCodeSize = m_scriptCodeResync = 0;
+// FIXME: The script should be syntax highlighted.
+if (inViewSourceMode())
+return state;
+SegmentedString* savedPrependingSrc = m_currentPrependingSrc;
+SegmentedString prependingSrc;
+m_currentPrependingSrc = &prependingSrc;
+if (!m_treeBuilder->skipMode() && !followingFrameset) {
+if (cs) {
+if (savedPrependingSrc)
+savedPrependingSrc->append(m_src);
+else
+m_pendingSrc.prepend(m_src);
+setSrc(SegmentedString());
+// the ref() call below may call notifyFinished if the script is already in cache,
+// and that mucks with the state directly, so we must write it back to the object.
+m_state = state;
+bool savedRequestingScript = m_requestingScript;
+m_requestingScript = true;
+cs->addClient(this);
+m_requestingScript = savedRequestingScript;
+state = m_state;
+// will be 0 if script was already loaded and ref() executed it
+if (!m_pendingScripts.isEmpty())
+state.setLoadingExtScript(true);
+} else if (!m_fragment && doScriptExec) {
+if (!m_executingScript)
+m_pendingSrc.prepend(m_src);
+else
+prependingSrc = m_src;
+setSrc(SegmentedString());
+state = scriptExecution(ScriptSourceCode(scriptString, document()->frame() ? document()->frame()->document()->url() : KURL(), startLine), state);
+}
+}
+if (!m_executingScript && !state.loadingExtScript()) {
+m_src.append(m_pendingSrc);
+m_pendingSrc.clear();
+} else if (!prependingSrc.isEmpty()) {
+// restore first so that the write appends in the right place
+// (does not hurt to do it again below)
+m_currentPrependingSrc = savedPrependingSrc;
+// we need to do this slightly modified bit of one of the write() cases
+// because we want to prepend to m_pendingSrc rather than appending
+// if there's no previous prependingSrc
+if (!m_pendingScripts.isEmpty()) {
+if (m_currentPrependingSrc)
+m_currentPrependingSrc->append(prependingSrc);
+else
+m_pendingSrc.prepend(prependingSrc);
+} else {
+m_state = state;
+write(prependingSrc, false);
+state = m_state;
+}
+}
+#if PRELOAD_SCANNER_ENABLED
+if (!m_pendingScripts.isEmpty() && !m_executingScript) {
+if (!m_preloadScanner)
+m_preloadScanner.set(new LegacyPreloadScanner(document()));
+if (!m_preloadScanner->inProgress()) {
+m_preloadScanner->begin();
+m_preloadScanner->write(m_pendingSrc);
+}
+}
+#endif
+m_currentPrependingSrc = savedPrependingSrc;
+return state;
+}
+LegacyHTMLDocumentParser::State LegacyHTMLDocumentParser::scriptExecution(const ScriptSourceCode& sourceCode, State state)
+{
+if (m_fragment || !document()->frame())
+return state;
+m_executingScript++;
+SegmentedString* savedPrependingSrc = m_currentPrependingSrc;
+SegmentedString prependingSrc;
+m_currentPrependingSrc = &prependingSrc;
+m_state = state;
+document()->frame()->script()->executeScript(sourceCode);
+state = m_state;
+state.setAllowYield(true);
+m_executingScript--;
+if (!m_executingScript && !state.loadingExtScript()) {
+m_pendingSrc.prepend(prependingSrc);
+m_src.append(m_pendingSrc);
+m_pendingSrc.clear();
+} else if (!prependingSrc.isEmpty()) {
+// restore first so that the write appends in the right place
+// (does not hurt to do it again below)
+m_currentPrependingSrc = savedPrependingSrc;
+// we need to do this slightly modified bit of one of the write() cases
+// because we want to prepend to m_pendingSrc rather than appending
+// if there's no previous prependingSrc
+if (!m_pendingScripts.isEmpty()) {
+if (m_currentPrependingSrc)
+m_currentPrependingSrc->append(prependingSrc);
+else
+m_pendingSrc.prepend(prependingSrc);
+#if PRELOAD_SCANNER_ENABLED
+// We are stuck waiting for another script. Lets check the source that
+// was just document.write()n for anything to load.
+LegacyPreloadScanner documentWritePreloadScanner(document());
+documentWritePreloadScanner.begin();
+documentWritePreloadScanner.write(prependingSrc);
+documentWritePreloadScanner.end();
+#endif
+} else {
+m_state = state;
+write(prependingSrc, false);
+state = m_state;
+}
+}
+m_currentPrependingSrc = savedPrependingSrc;
+return state;
+}
+LegacyHTMLDocumentParser::State LegacyHTMLDocumentParser::parseComment(SegmentedString& src, State state)
+{
+// FIXME: Why does this code even run for comments inside <script> and <style>? This seems bogus.
+checkScriptBuffer(src.length());
+while (!src.isEmpty()) {
+UChar ch = *src;
+m_scriptCode[m_scriptCodeSize++] = ch;
+if (ch == '>') {
+bool handleBrokenComments = m_brokenComments && !(state.inScript() || state.inStyle());
+int endCharsCount = 1; // start off with one for the '>' character
+if (m_scriptCodeSize > 2 && m_scriptCode[m_scriptCodeSize-3] == '-' && m_scriptCode[m_scriptCodeSize-2] == '-') {
+endCharsCount = 3;
+} else if (m_scriptCodeSize > 3 && m_scriptCode[m_scriptCodeSize-4] == '-' && m_scriptCode[m_scriptCodeSize-3] == '-' &&
+m_scriptCode[m_scriptCodeSize-2] == '!') {
+// Other browsers will accept --!> as a close comment, even though it's
+// not technically valid.
+endCharsCount = 4;
+}
+if (handleBrokenComments || endCharsCount > 1) {
+src.advancePastNonNewline();
+if (!(state.inTitle() || state.inScript() || state.inXmp() || state.inTextArea() || state.inStyle() || state.inIFrame())) {
+checkScriptBuffer();
+m_scriptCode[m_scriptCodeSize] = 0;
+m_scriptCode[m_scriptCodeSize + 1] = 0;
+m_currentToken.tagName = commentAtom;
+m_currentToken.beginTag = true;
+state = processListing(SegmentedString(m_scriptCode, m_scriptCodeSize - endCharsCount), state);
+processToken();
+m_currentToken.tagName = commentAtom;
+m_currentToken.beginTag = false;
+processToken();
+m_scriptCodeSize = 0;
+}
+state.setInComment(false);
+return state; // Finished parsing comment
+}
+}
+src.advance(m_lineNumber);
+}
+return state;
+}
+LegacyHTMLDocumentParser::State LegacyHTMLDocumentParser::parseServer(SegmentedString& src, State state)
+{
+checkScriptBuffer(src.length());
+while (!src.isEmpty()) {
+UChar ch = *src;
+m_scriptCode[m_scriptCodeSize++] = ch;
+if (ch == '>' && m_scriptCodeSize > 1 && m_scriptCode[m_scriptCodeSize - 2] == '%') {
+src.advancePastNonNewline();
+state.setInServer(false);
+m_scriptCodeSize = 0;
+return state; // Finished parsing server include
+}
+src.advance(m_lineNumber);
+}
+return state;
+}
+LegacyHTMLDocumentParser::State LegacyHTMLDocumentParser::parseProcessingInstruction(SegmentedString& src, State state)
+{
+UChar oldchar = 0;
+while (!src.isEmpty()) {
+UChar chbegin = *src;
+if (chbegin == '\'')
+tquote = tquote == SingleQuote ? NoQuote : SingleQuote;
+else if (chbegin == '\"')
+tquote = tquote == DoubleQuote ? NoQuote : DoubleQuote;
+// Look for '?>'
+// Some crappy sites omit the "?" before it, so
+// we look for an unquoted '>' instead. (IE compatible)
+else if (chbegin == '>' && (!tquote || oldchar == '?')) {
+// We got a '?>' sequence
+state.setInProcessingInstruction(false);
+src.advancePastNonNewline();
+state.setDiscardLF(true);
+return state; // Finished parsing comment!
+}
+src.advance(m_lineNumber);
+oldchar = chbegin;
+}
+return state;
+}
+LegacyHTMLDocumentParser::State LegacyHTMLDocumentParser::parseText(SegmentedString& src, State state)
+{
+while (!src.isEmpty()) {
+UChar cc = *src;
+if (state.skipLF()) {
+state.setSkipLF(false);
+if (cc == '\n') {
+src.advancePastNewline(m_lineNumber);
+continue;
+}
+}
+// do we need to enlarge the buffer?
+checkBuffer();
+if (cc == '\r') {
+state.setSkipLF(true);
+*m_dest++ = '\n';
+} else
+*m_dest++ = cc;
+src.advance(m_lineNumber);
+}
+return state;
+}
+LegacyHTMLDocumentParser::State LegacyHTMLDocumentParser::parseEntity(SegmentedString& src, UChar*& dest, State state, unsigned& cBufferPos, bool start, bool parsingTag)
+{
+if (start) {
+cBufferPos = 0;
+state.setEntityState(SearchEntity);
+EntityUnicodeValue = 0;
+}
+while (!src.isEmpty()) {
+UChar cc = *src;
+switch (state.entityState()) {
+case NoEntity:
+ASSERT(state.entityState() != NoEntity);
+return state;
+case SearchEntity:
+if (cc == '#') {
+m_cBuffer[cBufferPos++] = cc;
+src.advancePastNonNewline();
+state.setEntityState(NumericSearch);
+} else
+state.setEntityState(EntityName);
+break;
+case NumericSearch:
+if (cc == 'x' || cc == 'X') {
+m_cBuffer[cBufferPos++] = cc;
+src.advancePastNonNewline();
+state.setEntityState(Hexadecimal);
+} else if (cc >= '0' && cc <= '9')
+state.setEntityState(Decimal);
+else
+state.setEntityState(SearchSemicolon);
+break;
+case Hexadecimal: {
+int ll = min(src.length(), 10 - cBufferPos);
+while (ll--) {
+cc = *src;
+if (!((cc >= '0' && cc <= '9') || (cc >= 'a' && cc <= 'f') || (cc >= 'A' && cc <= 'F'))) {
+state.setEntityState(SearchSemicolon);
+break;
+}
+int digit;
+if (cc < 'A')
+digit = cc - '0';
+else
+digit = (cc - 'A' + 10) & 0xF; // handle both upper and lower case without a branch
+EntityUnicodeValue = EntityUnicodeValue * 16 + digit;
+m_cBuffer[cBufferPos++] = cc;
+src.advancePastNonNewline();
+}
+if (cBufferPos == 10)
+state.setEntityState(SearchSemicolon);
+break;
+}
+case Decimal:
+{
+int ll = min(src.length(), 9-cBufferPos);
+while (ll--) {
+cc = *src;
+if (!(cc >= '0' && cc <= '9')) {
+state.setEntityState(SearchSemicolon);
+break;
+}
+EntityUnicodeValue = EntityUnicodeValue * 10 + (cc - '0');
+m_cBuffer[cBufferPos++] = cc;
+src.advancePastNonNewline();
+}
+if (cBufferPos == 9)
+state.setEntityState(SearchSemicolon);
+break;
+}
+case EntityName:
+{
+int ll = min(src.length(), 9-cBufferPos);
+while (ll--) {
+cc = *src;
+if (!((cc >= 'a' && cc <= 'z') || (cc >= '0' && cc <= '9') || (cc >= 'A' && cc <= 'Z'))) {
+state.setEntityState(SearchSemicolon);
+break;
+}
+m_cBuffer[cBufferPos++] = cc;
+src.advancePastNonNewline();
+}
+if (cBufferPos == 9)
+state.setEntityState(SearchSemicolon);
+if (state.entityState() == SearchSemicolon) {
+if (cBufferPos > 1) {
+// Since the maximum length of entity name is 9,
+// so a single char array which is allocated on
+// the stack, its length is 10, should be OK.
+// Also if we have an illegal character, we treat it
+// as illegal entity name.
+unsigned testedEntityNameLen = 0;
+char tmpEntityNameBuffer[10];
+ASSERT(cBufferPos < 10);
+for (; testedEntityNameLen < cBufferPos; ++testedEntityNameLen) {
+if (m_cBuffer[testedEntityNameLen] > 0x7e)
+break;
+tmpEntityNameBuffer[testedEntityNameLen] = m_cBuffer[testedEntityNameLen];
+}
+const Entity *e;
+if (testedEntityNameLen == cBufferPos)
+e = findEntity(tmpEntityNameBuffer, cBufferPos);
+else
+e = 0;
+if (e)
+EntityUnicodeValue = e->code;
+// be IE compatible
+if (parsingTag && EntityUnicodeValue > 255 && *src != ';')
+EntityUnicodeValue = 0;
+}
+}
+else
+break;
+}
+case SearchSemicolon:
+// Don't allow values that are more than 21 bits.
+if (EntityUnicodeValue > 0 && EntityUnicodeValue <= 0x10FFFF) {
+if (!inViewSourceMode()) {
+if (*src == ';')
+src.advancePastNonNewline();
+if (EntityUnicodeValue <= 0xFFFF) {
+checkBuffer();
+src.push(fixUpChar(EntityUnicodeValue));
+} else {
+// Convert to UTF-16, using surrogate code points.
+checkBuffer(2);
+src.push(U16_LEAD(EntityUnicodeValue));
+src.push(U16_TRAIL(EntityUnicodeValue));
+}
+} else {
+// FIXME: We should eventually colorize entities by sending them as a special token.
+// 12 bytes required: up to 10 bytes in m_cBuffer plus the
+// leading '&' and trailing ';'
+checkBuffer(12);
+*dest++ = '&';
+for (unsigned i = 0; i < cBufferPos; i++)
+dest[i] = m_cBuffer[i];
+dest += cBufferPos;
+if (*src == ';') {
+*dest++ = ';';
+src.advancePastNonNewline();
+}
+}
+} else {
+// 11 bytes required: up to 10 bytes in m_cBuffer plus the
+// leading '&'
+checkBuffer(11);
+// ignore the sequence, add it to the buffer as plaintext
+*dest++ = '&';
+for (unsigned i = 0; i < cBufferPos; i++)
+dest[i] = m_cBuffer[i];
+dest += cBufferPos;
+}
+state.setEntityState(NoEntity);
+return state;
+}
+}
+return state;
+}
+LegacyHTMLDocumentParser::State LegacyHTMLDocumentParser::parseDoctype(SegmentedString& src, State state)
+{
+ASSERT(state.inDoctype());
+while (!src.isEmpty() && state.inDoctype()) {
+UChar c = *src;
+bool isWhitespace = c == '\r' || c == '\n' || c == '\t' || c == ' ';
+switch (m_doctypeToken.state()) {
+case DoctypeBegin: {
+m_doctypeToken.setState(DoctypeBeforeName);
+if (isWhitespace) {
+src.advance(m_lineNumber);
+if (inViewSourceMode())
+m_doctypeToken.m_source.append(c);
+}
+break;
+}
+case DoctypeBeforeName: {
+if (c == '>') {
+// Malformed.  Just exit.
+src.advancePastNonNewline();
+state.setInDoctype(false);
+if (inViewSourceMode())
+processDoctypeToken();
+} else if (isWhitespace) {
+src.advance(m_lineNumber);
+if (inViewSourceMode())
+m_doctypeToken.m_source.append(c);
+} else
+m_doctypeToken.setState(DoctypeName);
+break;
+}
+case DoctypeName: {
+if (c == '>') {
+// Valid doctype. Emit it.
+src.advancePastNonNewline();
+state.setInDoctype(false);
+processDoctypeToken();
+} else if (isWhitespace) {
+m_doctypeSearchCount = 0; // Used now to scan for PUBLIC
+m_doctypeSecondarySearchCount = 0; // Used now to scan for SYSTEM
+m_doctypeToken.setState(DoctypeAfterName);
+src.advance(m_lineNumber);
+if (inViewSourceMode())
+m_doctypeToken.m_source.append(c);
+} else {
+src.advancePastNonNewline();
+m_doctypeToken.m_name.append(c);
+if (inViewSourceMode())
+m_doctypeToken.m_source.append(c);
+}
+break;
+}
+case DoctypeAfterName: {
+if (c == '>') {
+// Valid doctype. Emit it.
+src.advancePastNonNewline();
+state.setInDoctype(false);
+processDoctypeToken();
+} else if (!isWhitespace) {
+src.advancePastNonNewline();
+if (toASCIILower(c) == publicStart[m_doctypeSearchCount]) {
+m_doctypeSearchCount++;
+if (m_doctypeSearchCount == 6)
+// Found 'PUBLIC' sequence
+m_doctypeToken.setState(DoctypeBeforePublicID);
+} else if (m_doctypeSearchCount > 0) {
+m_doctypeSearchCount = 0;
+m_doctypeToken.setState(DoctypeBogus);
+} else if (toASCIILower(c) == systemStart[m_doctypeSecondarySearchCount]) {
+m_doctypeSecondarySearchCount++;
+if (m_doctypeSecondarySearchCount == 6)
+// Found 'SYSTEM' sequence
+m_doctypeToken.setState(DoctypeBeforeSystemID);
+} else {
+m_doctypeSecondarySearchCount = 0;
+m_doctypeToken.setState(DoctypeBogus);
+}
+if (inViewSourceMode())
+m_doctypeToken.m_source.append(c);
+} else {
+src.advance(m_lineNumber); // Whitespace keeps us in the after name state.
+if (inViewSourceMode())
+m_doctypeToken.m_source.append(c);
+}
+break;
+}
+case DoctypeBeforePublicID: {
+if (c == '\"' || c == '\'') {
+tquote = c == '\"' ? DoubleQuote : SingleQuote;
+m_doctypeToken.setState(DoctypePublicID);
+src.advancePastNonNewline();
+if (inViewSourceMode())
+m_doctypeToken.m_source.append(c);
+} else if (c == '>') {
+// Considered bogus.  Don't process the doctype.
+src.advancePastNonNewline();
+state.setInDoctype(false);
+if (inViewSourceMode())
+processDoctypeToken();
+} else if (isWhitespace) {
+src.advance(m_lineNumber);
+if (inViewSourceMode())
+m_doctypeToken.m_source.append(c);
+} else
+m_doctypeToken.setState(DoctypeBogus);
+break;
+}
+case DoctypePublicID: {
+if ((c == '\"' && tquote == DoubleQuote) || (c == '\'' && tquote == SingleQuote)) {
+src.advancePastNonNewline();
+m_doctypeToken.setState(DoctypeAfterPublicID);
+if (inViewSourceMode())
+m_doctypeToken.m_source.append(c);
+} else if (c == '>') {
+// Considered bogus.  Don't process the doctype.
+src.advancePastNonNewline();
+state.setInDoctype(false);
+if (inViewSourceMode())
+processDoctypeToken();
+} else {
+m_doctypeToken.m_publicID.append(c);
+src.advance(m_lineNumber);
+if (inViewSourceMode())
+m_doctypeToken.m_source.append(c);
+}
+break;
+}
+case DoctypeAfterPublicID:
+if (c == '\"' || c == '\'') {
+tquote = c == '\"' ? DoubleQuote : SingleQuote;
+m_doctypeToken.setState(DoctypeSystemID);
+src.advancePastNonNewline();
+if (inViewSourceMode())
+m_doctypeToken.m_source.append(c);
+} else if (c == '>') {
+// Valid doctype. Emit it now.
+src.advancePastNonNewline();
+state.setInDoctype(false);
+processDoctypeToken();
+} else if (isWhitespace) {
+src.advance(m_lineNumber);
+if (inViewSourceMode())
+m_doctypeToken.m_source.append(c);
+} else
+m_doctypeToken.setState(DoctypeBogus);
+break;
+case DoctypeBeforeSystemID:
+if (c == '\"' || c == '\'') {
+tquote = c == '\"' ? DoubleQuote : SingleQuote;
+m_doctypeToken.setState(DoctypeSystemID);
+src.advancePastNonNewline();
+if (inViewSourceMode())
+m_doctypeToken.m_source.append(c);
+} else if (c == '>') {
+// Considered bogus.  Don't process the doctype.
+src.advancePastNonNewline();
+state.setInDoctype(false);
+} else if (isWhitespace) {
+src.advance(m_lineNumber);
+if (inViewSourceMode())
+m_doctypeToken.m_source.append(c);
+} else
+m_doctypeToken.setState(DoctypeBogus);
+break;
+case DoctypeSystemID:
+if ((c == '\"' && tquote == DoubleQuote) || (c == '\'' && tquote == SingleQuote)) {
+src.advancePastNonNewline();
+m_doctypeToken.setState(DoctypeAfterSystemID);
+if (inViewSourceMode())
+m_doctypeToken.m_source.append(c);
+} else if (c == '>') {
+// Considered bogus.  Don't process the doctype.
+src.advancePastNonNewline();
+state.setInDoctype(false);
+if (inViewSourceMode())
+processDoctypeToken();
+} else {
+m_doctypeToken.m_systemID.append(c);
+src.advance(m_lineNumber);
+if (inViewSourceMode())
+m_doctypeToken.m_source.append(c);
+}
+break;
+case DoctypeAfterSystemID:
+if (c == '>') {
+// Valid doctype. Emit it now.
+src.advancePastNonNewline();
+state.setInDoctype(false);
+processDoctypeToken();
+} else if (isWhitespace) {
+src.advance(m_lineNumber);
+if (inViewSourceMode())
+m_doctypeToken.m_source.append(c);
+} else
+m_doctypeToken.setState(DoctypeBogus);
+break;
+case DoctypeBogus:
+if (c == '>') {
+// Done with the bogus doctype.
+src.advancePastNonNewline();
+state.setInDoctype(false);
+if (inViewSourceMode())
+processDoctypeToken();
+} else {
+src.advance(m_lineNumber); // Just keep scanning for '>'
+if (inViewSourceMode())
+m_doctypeToken.m_source.append(c);
+}
+break;
+default:
+break;
+}
+}
+return state;
+}
+LegacyHTMLDocumentParser::State LegacyHTMLDocumentParser::parseTag(SegmentedString& src, State state)
+{
+ASSERT(!state.hasEntityState());
+unsigned cBufferPos = m_cBufferPos;
+bool lastIsSlash = false;
+while (!src.isEmpty()) {
+checkBuffer();
+switch (state.tagState()) {
+case NoTag:
+{
+m_cBufferPos = cBufferPos;
+return state;
+}
+case TagName:
+{
+if (searchCount > 0) {
+if (*src == commentStart[searchCount]) {
+searchCount++;
+if (searchCount == 2)
+m_doctypeSearchCount++; // A '!' is also part of a doctype, so we are moving through that still as well.
+else
+m_doctypeSearchCount = 0;
+if (searchCount == 4) {
+// Found '<!--' sequence
+src.advancePastNonNewline();
+m_dest = m_buffer; // ignore the previous part of this tag
+state.setInComment(true);
+state.setTagState(NoTag);
+// Fix bug 34302 at kde.bugs.org.  Go ahead and treat
+// <!--> as a valid comment, since both mozilla and IE on windows
+// can handle this case.  Only do this in quirks mode. -dwh
+if (!src.isEmpty() && *src == '>' && document()->inCompatMode()) {
+state.setInComment(false);
+src.advancePastNonNewline();
+if (!src.isEmpty())
+m_cBuffer[cBufferPos++] = *src;
+} else
+state = parseComment(src, state);
+m_cBufferPos = cBufferPos;
+return state; // Finished parsing tag!
+}
+m_cBuffer[cBufferPos++] = *src;
+src.advancePastNonNewline();
+break;
+} else
+searchCount = 0; // Stop looking for '<!--' sequence
+}
+if (m_doctypeSearchCount > 0) {
+if (toASCIILower(*src) == doctypeStart[m_doctypeSearchCount]) {
+m_doctypeSearchCount++;
+m_cBuffer[cBufferPos++] = *src;
+src.advancePastNonNewline();
+if (m_doctypeSearchCount == 9) {
+// Found '<!DOCTYPE' sequence
+state.setInDoctype(true);
+state.setTagState(NoTag);
+m_doctypeToken.reset();
+if (inViewSourceMode())
+m_doctypeToken.m_source.append(m_cBuffer, cBufferPos);
+state = parseDoctype(src, state);
+m_cBufferPos = cBufferPos;
+return state;
+}
+break;
+} else
+m_doctypeSearchCount = 0; // Stop looking for '<!DOCTYPE' sequence
+}
+bool finish = false;
+unsigned int ll = min(src.length(), CBUFLEN - cBufferPos);
+while (ll--) {
+UChar curchar = *src;
+if (isASCIISpace(curchar) || curchar == '>' || curchar == '<') {
+finish = true;
+break;
+}
+// tolower() shows up on profiles. This is faster!
+if (curchar >= 'A' && curchar <= 'Z' && !inViewSourceMode())
+m_cBuffer[cBufferPos++] = curchar + ('a' - 'A');
+else
+m_cBuffer[cBufferPos++] = curchar;
+src.advancePastNonNewline();
+}
+// Disadvantage: we add the possible rest of the tag
+// as attribute names. ### judge if this causes problems
+if (finish || CBUFLEN == cBufferPos) {
+bool beginTag;
+UChar* ptr = m_cBuffer;
+unsigned int len = cBufferPos;
+m_cBuffer[cBufferPos] = '\0';
+if ((cBufferPos > 0) && (*ptr == '/')) {
+// End Tag
+beginTag = false;
+ptr++;
+len--;
+}
+else
+// Start Tag
+beginTag = true;
+// Ignore the / in fake xml tags like <br/>.  We trim off the "/" so that we'll get "br" as the tag name and not "br/".
+if (len > 1 && ptr[len-1] == '/' && !inViewSourceMode())
+ptr[--len] = '\0';
+// Now that we've shaved off any invalid / that might have followed the name), make the tag.
+// FIXME: FireFox and WinIE turn !foo nodes into comments, we ignore comments. (fast/parser/tag-with-exclamation-point.html)
+if (ptr[0] != '!' || inViewSourceMode()) {
+m_currentToken.tagName = AtomicString(ptr);
+m_currentToken.beginTag = beginTag;
+}
+m_dest = m_buffer;
+state.setTagState(SearchAttribute);
+cBufferPos = 0;
+}
+break;
+}
+case SearchAttribute:
+while (!src.isEmpty()) {
+UChar curchar = *src;
+// In this mode just ignore any quotes we encounter and treat them like spaces.
+if (!isASCIISpace(curchar) && curchar != '\'' && curchar != '"') {
+if (curchar == '<' || curchar == '>')
+state.setTagState(SearchEnd);
+else
+state.setTagState(AttributeName);
+cBufferPos = 0;
+break;
+}
+if (inViewSourceMode())
+m_currentToken.addViewSourceChar(curchar);
+src.advance(m_lineNumber);
+}
+break;
+case AttributeName:
+{
+m_rawAttributeBeforeValue.clear();
+int ll = min(src.length(), CBUFLEN - cBufferPos);
+while (ll--) {
+UChar curchar = *src;
+// If we encounter a "/" when scanning an attribute name, treat it as a delimiter.  This allows the
+// cases like <input type=checkbox checked/> to work (and accommodates XML-style syntax as per HTML5).
+if (curchar <= '>' && (curchar >= '<' || isASCIISpace(curchar) || curchar == '/')) {
+m_cBuffer[cBufferPos] = '\0';
+m_attrName = AtomicString(m_cBuffer);
+m_dest = m_buffer;
+*m_dest++ = 0;
+state.setTagState(SearchEqual);
+if (inViewSourceMode())
+m_currentToken.addViewSourceChar('a');
+break;
+}
+// tolower() shows up on profiles. This is faster!
+if (curchar >= 'A' && curchar <= 'Z' && !inViewSourceMode())
+m_cBuffer[cBufferPos++] = curchar + ('a' - 'A');
+else
+m_cBuffer[cBufferPos++] = curchar;
+m_rawAttributeBeforeValue.append(curchar);
+src.advance(m_lineNumber);
+}
+if (cBufferPos == CBUFLEN) {
+m_cBuffer[cBufferPos] = '\0';
+m_attrName = AtomicString(m_cBuffer);
+m_dest = m_buffer;
+*m_dest++ = 0;
+state.setTagState(SearchEqual);
+if (inViewSourceMode())
+m_currentToken.addViewSourceChar('a');
+}
+break;
+}
+case SearchEqual:
+while (!src.isEmpty()) {
+UChar curchar = *src;
+if (lastIsSlash && curchar == '>') {
+// This is a quirk (with a long sad history).  We have to do this
+// since widgets do <script src="foo.js"/> and expect the tag to close.
+if (m_currentToken.tagName == scriptTag)
+m_currentToken.selfClosingTag = true;
+m_currentToken.brokenXMLStyle = true;
+}
+// In this mode just ignore any quotes or slashes we encounter and treat them like spaces.
+if (!isASCIISpace(curchar) && curchar != '\'' && curchar != '"' && curchar != '/') {
+if (curchar == '=') {
+state.setTagState(SearchValue);
+if (inViewSourceMode())
+m_currentToken.addViewSourceChar(curchar);
+m_rawAttributeBeforeValue.append(curchar);
+src.advancePastNonNewline();
+} else {
+m_currentToken.addAttribute(m_attrName, emptyAtom, inViewSourceMode());
+m_dest = m_buffer;
+state.setTagState(SearchAttribute);
+lastIsSlash = false;
+}
+break;
+}
+lastIsSlash = curchar == '/';
+if (inViewSourceMode())
+m_currentToken.addViewSourceChar(curchar);
+m_rawAttributeBeforeValue.append(curchar);
+src.advance(m_lineNumber);
+}
+break;
+case SearchValue:
+while (!src.isEmpty()) {
+UChar curchar = *src;
+if (!isASCIISpace(curchar)) {
+if (curchar == '\'' || curchar == '\"') {
+tquote = curchar == '\"' ? DoubleQuote : SingleQuote;
+state.setTagState(QuotedValue);
+if (inViewSourceMode())
+m_currentToken.addViewSourceChar(curchar);
+m_rawAttributeBeforeValue.append(curchar);
+src.advancePastNonNewline();
+} else
+state.setTagState(Value);
+break;
+}
+if (inViewSourceMode())
+m_currentToken.addViewSourceChar(curchar);
+m_rawAttributeBeforeValue.append(curchar);
+src.advance(m_lineNumber);
+}
+break;
+case QuotedValue:
+while (!src.isEmpty()) {
+checkBuffer();
+UChar curchar = *src;
+if (curchar <= '>' && !src.escaped()) {
+if (curchar == '>' && m_attrName.isEmpty()) {
+// Handle a case like <img '>.  Just go ahead and be willing
+// to close the whole tag.  Don't consume the character and
+// just go back into SearchEnd while ignoring the whole
+// value.
+// FIXME: Note that this is actually not a very good solution.
+// It doesn't handle the general case of
+// unmatched quotes among attributes that have names. -dwh
+while (m_dest > m_buffer + 1 && (m_dest[-1] == '\n' || m_dest[-1] == '\r'))
+m_dest--; // remove trailing newlines
+AtomicString attributeValue(m_buffer + 1, m_dest - m_buffer - 1);
+if (!attributeValue.contains('/'))
+m_attrName = attributeValue; // Just make the name/value match. (FIXME: Is this some WinIE quirk?)
+m_currentToken.addAttribute(m_attrName, attributeValue, inViewSourceMode());
+if (inViewSourceMode())
+m_currentToken.addViewSourceChar('x');
+state.setTagState(SearchAttribute);
+m_dest = m_buffer;
+tquote = NoQuote;
+break;
+}
+if (curchar == '&') {
+src.advancePastNonNewline();
+state = parseEntity(src, m_dest, state, cBufferPos, true, true);
+break;
+}
+if ((tquote == SingleQuote && curchar == '\'') || (tquote == DoubleQuote && curchar == '\"')) {
+// some <input type=hidden> rely on trailing spaces. argh
+while (m_dest > m_buffer + 1 && (m_dest[-1] == '\n' || m_dest[-1] == '\r'))
+m_dest--; // remove trailing newlines
+AtomicString attributeValue(m_buffer + 1, m_dest - m_buffer - 1);
+if (m_attrName.isEmpty() && !attributeValue.contains('/')) {
+m_attrName = attributeValue; // Make the name match the value. (FIXME: Is this a WinIE quirk?)
+if (inViewSourceMode())
+m_currentToken.addViewSourceChar('x');
+} else if (inViewSourceMode())
+m_currentToken.addViewSourceChar('v');
+if (m_currentToken.beginTag && m_currentToken.tagName == scriptTag && !inViewSourceMode() && !m_treeBuilder->skipMode() && m_attrName == srcAttr) {
+String context(m_rawAttributeBeforeValue.data(), m_rawAttributeBeforeValue.size());
+if (xssAuditor() && !xssAuditor()->canLoadExternalScriptFromSrc(attributeValue))
+attributeValue = blankURL().string();
+}
+m_currentToken.addAttribute(m_attrName, attributeValue, inViewSourceMode());
+m_dest = m_buffer;
+state.setTagState(SearchAttribute);
+tquote = NoQuote;
+if (inViewSourceMode())
+m_currentToken.addViewSourceChar(curchar);
+src.advancePastNonNewline();
+break;
+}
+}
+*m_dest++ = curchar;
+src.advance(m_lineNumber);
+}
+break;
+case Value:
+while (!src.isEmpty()) {
+checkBuffer();
+UChar curchar = *src;
+if (curchar <= '>' && !src.escaped()) {
+// parse Entities
+if (curchar == '&') {
+src.advancePastNonNewline();
+state = parseEntity(src, m_dest, state, cBufferPos, true, true);
+break;
+}
+// no quotes. Every space means end of value
+// '/' does not delimit in IE!
+if (isASCIISpace(curchar) || curchar == '>') {
+AtomicString attributeValue(m_buffer + 1, m_dest - m_buffer - 1);
+if (m_currentToken.beginTag && m_currentToken.tagName == scriptTag && !inViewSourceMode() && !m_treeBuilder->skipMode() && m_attrName == srcAttr) {
+String context(m_rawAttributeBeforeValue.data(), m_rawAttributeBeforeValue.size());
+if (xssAuditor() && !xssAuditor()->canLoadExternalScriptFromSrc(attributeValue))
+attributeValue = blankURL().string();
+}
+m_currentToken.addAttribute(m_attrName, attributeValue, inViewSourceMode());
+if (inViewSourceMode())
+m_currentToken.addViewSourceChar('v');
+m_dest = m_buffer;
+state.setTagState(SearchAttribute);
+break;
+}
+}
+*m_dest++ = curchar;
+src.advance(m_lineNumber);
+}
+break;
+case SearchEnd:
+{
+while (!src.isEmpty()) {
+UChar ch = *src;
+if (ch == '>' || ch == '<')
+break;
+if (ch == '/')
+m_currentToken.selfClosingTag = true;
+if (inViewSourceMode())
+m_currentToken.addViewSourceChar(ch);
+src.advance(m_lineNumber);
+}
+if (src.isEmpty())
+break;
+searchCount = 0; // Stop looking for '<!--' sequence
+state.setTagState(NoTag);
+tquote = NoQuote;
+if (*src != '<')
+src.advance(m_lineNumber);
+if (m_currentToken.tagName == nullAtom) { //stop if tag is unknown
+m_cBufferPos = cBufferPos;
+return state;
+}
+AtomicString tagName = m_currentToken.tagName;
+// Handle <script src="foo"/> like Mozilla/Opera. We have to do this now for Dashboard
+// compatibility.
+bool isSelfClosingScript = m_currentToken.selfClosingTag && m_currentToken.beginTag && m_currentToken.tagName == scriptTag;
+bool beginTag = !m_currentToken.selfClosingTag && m_currentToken.beginTag;
+if (m_currentToken.beginTag && m_currentToken.tagName == scriptTag && !inViewSourceMode() && !m_treeBuilder->skipMode()) {
+Attribute* a = 0;
+m_scriptTagSrcAttrValue = String();
+m_scriptTagCharsetAttrValue = String();
+if (m_currentToken.attrs && !m_fragment) {
+if (document()->frame() && document()->frame()->script()->canExecuteScripts(NotAboutToExecuteScript)) {
+if ((a = m_currentToken.attrs->getAttributeItem(srcAttr)))
+m_scriptTagSrcAttrValue = document()->completeURL(deprecatedParseURL(a->value())).string();
+}
+}
+}
+RefPtr<Node> n = processToken();
+m_cBufferPos = cBufferPos;
+if (n || inViewSourceMode()) {
+State savedState = state;
+SegmentedString savedSrc = src;
+long savedLineno = m_lineNumber;
+if ((tagName == preTag || tagName == listingTag) && !inViewSourceMode()) {
+if (beginTag)
+state.setDiscardLF(true); // Discard the first LF after we open a pre.
+} else if (tagName == scriptTag) {
+ASSERT(!m_scriptNode);
+m_scriptNode = static_pointer_cast<HTMLScriptElement>(n);
+if (m_scriptNode)
+m_scriptTagCharsetAttrValue = m_scriptNode->scriptCharset();
+if (beginTag) {
+m_searchStopper = scriptEnd;
+m_searchStopperLength = 8;
+state.setInScript(true);
+state = parseNonHTMLText(src, state);
+} else if (isSelfClosingScript) { // Handle <script src="foo"/>
+state.setInScript(true);
+state = scriptHandler(state);
+}
+} else if (tagName == styleTag) {
+if (beginTag) {
+m_searchStopper = styleEnd;
+m_searchStopperLength = 7;
+state.setInStyle(true);
+state = parseNonHTMLText(src, state);
+}
+} else if (tagName == textareaTag) {
+if (beginTag) {
+m_searchStopper = textareaEnd;
+m_searchStopperLength = 10;
+state.setInTextArea(true);
+state = parseNonHTMLText(src, state);
+}
+} else if (tagName == titleTag) {
+if (beginTag) {
+m_searchStopper = titleEnd;
+m_searchStopperLength = 7;
+state.setInTitle(true);
+state = parseNonHTMLText(src, state);
+}
+} else if (tagName == xmpTag) {
+if (beginTag) {
+m_searchStopper = xmpEnd;
+m_searchStopperLength = 5;
+state.setInXmp(true);
+state = parseNonHTMLText(src, state);
+}
+} else if (tagName == iframeTag) {
+if (beginTag) {
+m_searchStopper = iframeEnd;
+m_searchStopperLength = 8;
+state.setInIFrame(true);
+state = parseNonHTMLText(src, state);
+}
+}
+if (src.isEmpty() && (state.inTitle() || inViewSourceMode()) && !state.inComment() && !(state.inScript() && m_currentScriptTagStartLineNumber)) {
+// We just ate the rest of the document as the #text node under the special tag!
+// Reset the state then retokenize without special handling.
+// Let the parser clean up the missing close tag.
+// FIXME: This is incorrect, because src.isEmpty() doesn't mean we're
+// at the end of the document unless m_noMoreData is also true. We need
+// to detect this case elsewhere, and save the state somewhere other
+// than a local variable.
+state = savedState;
+src = savedSrc;
+m_lineNumber = savedLineno;
+m_scriptCodeSize = 0;
+}
+}
+if (tagName == plaintextTag)
+state.setInPlainText(beginTag);
+return state; // Finished parsing tag!
+}
+} // end switch
+}
+m_cBufferPos = cBufferPos;
+return state;
+}
+inline bool LegacyHTMLDocumentParser::continueProcessing(int& processedCount, double startTime, State &state)
+{
+// We don't want to be checking elapsed time with every character, so we only check after we've
+// processed a certain number of characters.
+bool allowedYield = state.allowYield();
+state.setAllowYield(false);
+if (!state.loadingExtScript() && !state.forceSynchronous() && !m_executingScript && (processedCount > m_tokenizerChunkSize || allowedYield)) {
+processedCount = 0;
+if (currentTime() - startTime > m_tokenizerTimeDelay) {
+/* FIXME: We'd like to yield aggressively to give stylesheets the opportunity to
+load, but this hurts overall performance on slower machines.  For now turn this
+off.
+|| (!document()->haveStylesheetsLoaded() &&
+(document()->documentElement()->id() != ID_HTML || document()->body()))) {*/
+// Schedule the timer to keep processing as soon as possible.
+m_timer.startOneShot(0);
+return false;
+}
+}
+processedCount++;
+return true;
+}
+// Turns the statemachine one crank using the passed in State object.
+// This does not modify m_state directly in order to be reentrant.
+ALWAYS_INLINE void LegacyHTMLDocumentParser::advance(State& state)
+{
+// do we need to enlarge the buffer?
+checkBuffer();
+UChar cc = *m_src;
+bool wasSkipLF = state.skipLF();
+if (wasSkipLF)
+state.setSkipLF(false);
+if (wasSkipLF && (cc == '\n'))
+m_src.advance();
+else if (state.needsSpecialWriteHandling()) {
+// it's important to keep needsSpecialWriteHandling with the flags this block tests
+if (state.hasEntityState())
+state = parseEntity(m_src, m_dest, state, m_cBufferPos, false, state.hasTagState());
+else if (state.inPlainText())
+state = parseText(m_src, state);
+else if (state.inAnyNonHTMLText())
+state = parseNonHTMLText(m_src, state);
+else if (state.inComment())
+state = parseComment(m_src, state);
+else if (state.inDoctype())
+state = parseDoctype(m_src, state);
+else if (state.inServer())
+state = parseServer(m_src, state);
+else if (state.inProcessingInstruction())
+state = parseProcessingInstruction(m_src, state);
+else if (state.hasTagState())
+state = parseTag(m_src, state);
+else if (state.startTag()) {
+state.setStartTag(false);
+switch (cc) {
+case '/':
+break;
+case '!': {
+// <!-- comment --> or <!DOCTYPE ...>
+searchCount = 1; // Look for '<!--' sequence to start comment or '<!DOCTYPE' sequence to start doctype
+m_doctypeSearchCount = 1;
+break;
+}
+case '?': {
+// xml processing instruction
+state.setInProcessingInstruction(true);
+tquote = NoQuote;
+state = parseProcessingInstruction(m_src, state);
+return;
+}
+case '%':
+if (!m_brokenServer) {
+// <% server stuff, handle as comment %>
+state.setInServer(true);
+tquote = NoQuote;
+state = parseServer(m_src, state);
+return;
+}
+// else fall through
+default: {
+if (((cc >= 'a') && (cc <= 'z')) || ((cc >= 'A') && (cc <= 'Z'))) {
+// Start of a Start-Tag
+} else {
+// Invalid tag
+// Add as is
+*m_dest = '<';
+m_dest++;
+return;
+}
+}
+}; // end case
+processToken();
+m_cBufferPos = 0;
+state.setTagState(TagName);
+state = parseTag(m_src, state);
+}
+} else if (cc == '&' && !m_src.escaped()) {
+m_src.advancePastNonNewline();
+state = parseEntity(m_src, m_dest, state, m_cBufferPos, true, state.hasTagState());
+} else if (cc == '<' && !m_src.escaped()) {
+m_currentTagStartLineNumber = m_lineNumber;
+m_src.advancePastNonNewline();
+state.setStartTag(true);
+state.setDiscardLF(false);
+} else if (cc == '\n' || cc == '\r') {
+if (state.discardLF())
+// Ignore this LF
+state.setDiscardLF(false); // We have discarded 1 LF
+else {
+// Process this LF
+*m_dest++ = '\n';
+if (cc == '\r' && !m_src.excludeLineNumbers())
+m_lineNumber++;
+}
+/* Check for MS-DOS CRLF sequence */
+if (cc == '\r')
+state.setSkipLF(true);
+m_src.advance(m_lineNumber);
+} else {
+state.setDiscardLF(false);
+*m_dest++ = cc;
+m_src.advancePastNonNewline();
+}
+}
+void LegacyHTMLDocumentParser::willWriteHTML(const SegmentedString& source)
+{
+#if ENABLE(INSPECTOR)
+if (InspectorTimelineAgent* timelineAgent = document()->inspectorTimelineAgent())
+timelineAgent->willWriteHTML(source.length(), m_lineNumber);
+#endif
+}
+void LegacyHTMLDocumentParser::didWriteHTML()
+{
+#if ENABLE(INSPECTOR)
+if (InspectorTimelineAgent* timelineAgent = document()->inspectorTimelineAgent())
+timelineAgent->didWriteHTML(m_lineNumber);
+#endif
+}
+void LegacyHTMLDocumentParser::write(const SegmentedString& str, bool appendData)
+{
+if (!m_buffer)
+return;
+if (m_parserStopped)
+return;
+SegmentedString source(str);
+if (m_executingScript)
+source.setExcludeLineNumbers();
+if ((m_executingScript && appendData) || !m_pendingScripts.isEmpty()) {
+// don't parse; we will do this later
+if (m_currentPrependingSrc)
+m_currentPrependingSrc->append(source);
+else {
+m_pendingSrc.append(source);
+#if PRELOAD_SCANNER_ENABLED
+if (m_preloadScanner && m_preloadScanner->inProgress() && appendData)
+m_preloadScanner->write(source);
+#endif
+}
+return;
+}
+#if PRELOAD_SCANNER_ENABLED
+if (m_preloadScanner && m_preloadScanner->inProgress() && appendData)
+m_preloadScanner->end();
+#endif
+if (!m_src.isEmpty())
+m_src.append(source);
+else
+setSrc(source);
+// Once a timer is set, it has control of when the parser continues.
+if (m_timer.isActive())
+return;
+bool wasInWrite = m_inWrite;
+m_inWrite = true;
+willWriteHTML(source);
+Frame* frame = document()->frame();
+State state = m_state;
+int processedCount = 0;
+double startTime = currentTime();
+while (!m_src.isEmpty() && (!frame || !frame->redirectScheduler()->locationChangePending())) {
+if (!continueProcessing(processedCount, startTime, state))
+break;
+advance(state);
+}
+didWriteHTML();
+m_inWrite = wasInWrite;
+m_state = state;
+if (m_noMoreData && !m_inWrite && !state.loadingExtScript() && !m_executingScript && !m_timer.isActive())
+end(); // this actually causes us to be deleted
+// After parsing, go ahead and dispatch image beforeload events, but only if we're doing
+// document parsing.  For document fragments we wait, since they'll likely end up in the document by the time
+// the beforeload events fire.
+if (!m_fragment)
+ImageLoader::dispatchPendingBeforeLoadEvents();
+}
+void LegacyHTMLDocumentParser::insert(const SegmentedString& source)
+{
+// FIXME: forceSynchronous should always be the same as the bool passed to
+// write().  However LegacyHTMLDocumentParser uses write("", false) to pump
+// the parser (after running external scripts, etc.) thus necessitating a
+// separate state for forceSynchronous.
+bool wasForcedSynchronous = forceSynchronous();
+setForceSynchronous(true);
+write(source, false);
+setForceSynchronous(wasForcedSynchronous);
+}
+void LegacyHTMLDocumentParser::append(const SegmentedString& source)
+{
+write(source, true);
+}
+void LegacyHTMLDocumentParser::stopParsing()
+{
+DocumentParser::stopParsing();
+m_timer.stop();
+// FIXME: Why is LegacyHTMLDocumentParser the only DocumentParser which calls checkCompleted?
+// The FrameLoader needs to know that the parser has finished with its data,
+// regardless of whether it happened naturally or due to manual intervention.
+if (!m_fragment && document()->frame())
+document()->frame()->loader()->checkCompleted();
+}
+bool LegacyHTMLDocumentParser::processingData() const
+{
+return m_timer.isActive() || m_inWrite;
+}
+void LegacyHTMLDocumentParser::timerFired(Timer<LegacyHTMLDocumentParser>*)
+{
+if (document()->view() && document()->view()->layoutPending() && !document()->minimumLayoutDelay()) {
+// Restart the timer and let layout win.  This is basically a way of ensuring that the layout
+// timer has higher priority than our timer.
+m_timer.startOneShot(0);
+return;
+}
+// Invoke write() as though more data came in. This might cause us to get deleted.
+write(SegmentedString(), true);
+}
+void LegacyHTMLDocumentParser::end()
+{
+ASSERT(!m_timer.isActive());
+m_timer.stop(); // Only helps if assertion above fires, but do it anyway.
+if (m_buffer) {
+// parseTag is using the buffer for different matters
+if (!m_state.hasTagState())
+processToken();
+fastFree(m_scriptCode);
+m_scriptCode = 0;
+m_scriptCodeSize = m_scriptCodeCapacity = m_scriptCodeResync = 0;
+fastFree(m_buffer);
+m_buffer = 0;
+}
+if (!inViewSourceMode())
+m_treeBuilder->finished();
+else
+document()->finishedParsing();
+}
+void LegacyHTMLDocumentParser::finish()
+{
+// do this as long as we don't find matching comment ends
+while ((m_state.inComment() || m_state.inServer()) && m_scriptCode && m_scriptCodeSize) {
+// we've found an unmatched comment start
+if (m_state.inComment())
+m_brokenComments = true;
+else
+m_brokenServer = true;
+checkScriptBuffer();
+m_scriptCode[m_scriptCodeSize] = 0;
+m_scriptCode[m_scriptCodeSize + 1] = 0;
+int pos;
+String food;
+if (m_state.inScript() || m_state.inStyle() || m_state.inTextArea())
+food = String(m_scriptCode, m_scriptCodeSize);
+else if (m_state.inServer()) {
+food = "<";
+food.append(m_scriptCode, m_scriptCodeSize);
+} else {
+pos = find(m_scriptCode, m_scriptCodeSize, '>');
+food = String(m_scriptCode + pos + 1, m_scriptCodeSize - pos - 1);
+}
+fastFree(m_scriptCode);
+m_scriptCode = 0;
+m_scriptCodeSize = m_scriptCodeCapacity = m_scriptCodeResync = 0;
+m_state.setInComment(false);
+m_state.setInServer(false);
+if (!food.isEmpty())
+write(food, true);
+}
+// this indicates we will not receive any more data... but if we are waiting on
+// an external script to load, we can't finish parsing until that is done
+m_noMoreData = true;
+if (!m_inWrite && !m_state.loadingExtScript() && !m_executingScript && !m_timer.isActive())
+end(); // this actually causes us to be deleted
+}
+bool LegacyHTMLDocumentParser::finishWasCalled()
+{
+return m_noMoreData;
+}
+PassRefPtr<Node> LegacyHTMLDocumentParser::processToken()
+{
+if (m_dest > m_buffer) {
+m_currentToken.text = StringImpl::createStrippingNullCharacters(m_buffer, m_dest - m_buffer);
+if (m_currentToken.tagName != commentAtom)
+m_currentToken.tagName = textAtom;
+} else if (m_currentToken.tagName == nullAtom) {
+m_currentToken.reset();
+return 0;
+}
+m_dest = m_buffer;
+RefPtr<Node> n;
+if (!m_parserStopped) {
+if (NamedNodeMap* map = m_currentToken.attrs.get())
+map->shrinkToLength();
+if (inViewSourceMode())
+static_cast<HTMLViewSourceDocument*>(document())->addViewSourceToken(&m_currentToken);
+else
+// pass the token over to the parser, the parser DOES NOT delete the token
+n = m_treeBuilder->parseToken(&m_currentToken);
+}
+m_currentToken.reset();
+return n.release();
+}
+void LegacyHTMLDocumentParser::processDoctypeToken()
+{
+if (inViewSourceMode())
+static_cast<HTMLViewSourceDocument*>(document())->addViewSourceDoctypeToken(&m_doctypeToken);
+else
+m_treeBuilder->parseDoctypeToken(&m_doctypeToken);
+}
+LegacyHTMLDocumentParser::~LegacyHTMLDocumentParser()
+{
+ASSERT(!m_inWrite);
+reset();
+}
+void LegacyHTMLDocumentParser::enlargeBuffer(int len)
+{
+// Resize policy: Always at least double the size of the buffer each time.
+int delta = max(len, m_bufferSize);
+// Check for overflow.
+// For now, handle overflow the same way we handle fastRealloc failure, with CRASH.
+static const int maxSize = INT_MAX / sizeof(UChar);
+if (delta > maxSize - m_bufferSize)
+CRASH();
+int newSize = m_bufferSize + delta;
+int oldOffset = m_dest - m_buffer;
+m_buffer = static_cast<UChar*>(fastRealloc(m_buffer, newSize * sizeof(UChar)));
+m_dest = m_buffer + oldOffset;
+m_bufferSize = newSize;
+}
+void LegacyHTMLDocumentParser::enlargeScriptBuffer(int len)
+{
+// Resize policy: Always at least double the size of the buffer each time.
+int delta = max(len, m_scriptCodeCapacity);
+// Check for overflow.
+// For now, handle overflow the same way we handle fastRealloc failure, with CRASH.
+static const int maxSize = INT_MAX / sizeof(UChar);
+if (delta > maxSize - m_scriptCodeCapacity)
+CRASH();
+int newSize = m_scriptCodeCapacity + delta;
+// If we allow fastRealloc(ptr, 0), it will call CRASH(). We run into this
+// case if the HTML being parsed begins with "<!--" and there's more data
+// coming.
+if (!newSize) {
+ASSERT(!m_scriptCode);
+return;
+}
+m_scriptCode = static_cast<UChar*>(fastRealloc(m_scriptCode, newSize * sizeof(UChar)));
+m_scriptCodeCapacity = newSize;
+}
+void LegacyHTMLDocumentParser::executeScriptsWaitingForStylesheets()
+{
+ASSERT(document()->haveStylesheetsLoaded());
+if (m_hasScriptsWaitingForStylesheets)
+notifyFinished(0);
+}
+void LegacyHTMLDocumentParser::notifyFinished(CachedResource*)
+{
+executeExternalScriptsIfReady();
+}
+void LegacyHTMLDocumentParser::executeExternalScriptsIfReady()
+{
+ASSERT(!m_pendingScripts.isEmpty());
+// Make external scripts wait for external stylesheets.
+// FIXME: This needs to be done for inline scripts too.
+m_hasScriptsWaitingForStylesheets = !document()->haveStylesheetsLoaded();
+if (m_hasScriptsWaitingForStylesheets)
+return;
+bool finished = false;
+double startTime = currentTime();
+while (!finished && m_pendingScripts.first()->isLoaded()) {
+if (!continueExecutingExternalScripts(startTime))
+break;
+CachedResourceHandle<CachedScript> cs = m_pendingScripts.takeFirst();
+ASSERT(cache()->disabled() || cs->accessCount() > 0);
+setSrc(SegmentedString());
+// make sure we forget about the script before we execute the new one
+// infinite recursion might happen otherwise
+ScriptSourceCode sourceCode(cs.get());
+bool errorOccurred = cs->errorOccurred();
+cs->removeClient(this);
+RefPtr<Node> n = m_scriptNode.release();
+if (errorOccurred)
+n->dispatchEvent(Event::create(eventNames().errorEvent, true, false));
+else {
+if (static_cast<HTMLScriptElement*>(n.get())->shouldExecuteAsJavaScript())
+m_state = scriptExecution(sourceCode, m_state);
+#if ENABLE(XHTMLMP)
+else
+document()->setShouldProcessNoscriptElement(true);
+#endif
+n->dispatchEvent(Event::create(eventNames().loadEvent, false, false));
+}
+// The state of m_pendingScripts.isEmpty() can change inside the scriptExecution()
+// call above, so test afterwards.
+finished = m_pendingScripts.isEmpty();
+if (finished) {
+ASSERT(!m_hasScriptsWaitingForStylesheets);
+m_state.setLoadingExtScript(false);
+} else if (m_hasScriptsWaitingForStylesheets) {
+// m_hasScriptsWaitingForStylesheets flag might have changed during the script execution.
+// If it did we are now blocked waiting for stylesheets and should not execute more scripts until they arrive.
+finished = true;
+}
+// 'm_requestingScript' is true when we are called synchronously from
+// scriptHandler(). In that case scriptHandler() will take care
+// of m_pendingSrc.
+if (!m_requestingScript) {
+SegmentedString rest = m_pendingSrc;
+m_pendingSrc.clear();
+write(rest, false);
+// we might be deleted at this point, do not access any members.
+}
+}
+}
+void LegacyHTMLDocumentParser::executeExternalScriptsTimerFired(Timer<LegacyHTMLDocumentParser>*)
+{
+if (document()->view() && document()->view()->layoutPending() && !document()->minimumLayoutDelay()) {
+// Restart the timer and do layout first.
+m_externalScriptsTimer.startOneShot(0);
+return;
+}
+// Continue executing external scripts.
+executeExternalScriptsIfReady();
+}
+bool LegacyHTMLDocumentParser::continueExecutingExternalScripts(double startTime)
+{
+if (m_externalScriptsTimer.isActive())
+return false;
+if (currentTime() - startTime > m_tokenizerTimeDelay) {
+// Schedule the timer to keep processing as soon as possible.
+m_externalScriptsTimer.startOneShot(0);
+return false;
+}
+return true;
+}
+bool LegacyHTMLDocumentParser::isWaitingForScripts() const
+{
+return m_state.loadingExtScript();
+}
+void LegacyHTMLDocumentParser::setSrc(const SegmentedString& source)
+{
+m_src = source;
+}
+void LegacyHTMLDocumentParser::parseDocumentFragment(const String& source, DocumentFragment* fragment, FragmentScriptingPermission scriptingPermission)
+{
+LegacyHTMLDocumentParser parser(fragment, scriptingPermission);
+parser.setForceSynchronous(true);
+parser.write(source, true);
+parser.finish();
+ASSERT(!parser.processingData()); // make sure we're done (see 3963151)
+}
+UChar decodeNamedEntity(const char* name)
+{
+const Entity* e = findEntity(name, strlen(name));
+return e ? e->code : 0;
+}
+}