WebCore/html/LegacyHTMLDocumentParser.cpp
changeset 0 4f2f89ce4247
equal deleted inserted replaced
-1:000000000000 0:4f2f89ce4247
       
     1 /*
       
     2     Copyright (C) 1997 Martin Jones (mjones@kde.org)
       
     3               (C) 1997 Torben Weis (weis@kde.org)
       
     4               (C) 1998 Waldo Bastian (bastian@kde.org)
       
     5               (C) 1999 Lars Knoll (knoll@kde.org)
       
     6               (C) 1999 Antti Koivisto (koivisto@kde.org)
       
     7               (C) 2001 Dirk Mueller (mueller@kde.org)
       
     8     Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
       
     9     Copyright (C) 2005, 2006 Alexey Proskuryakov (ap@nypop.com)
       
    10     Copyright (C) 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/)
       
    11 
       
    12     This library is free software; you can redistribute it and/or
       
    13     modify it under the terms of the GNU Library General Public
       
    14     License as published by the Free Software Foundation; either
       
    15     version 2 of the License, or (at your option) any later version.
       
    16 
       
    17     This library is distributed in the hope that it will be useful,
       
    18     but WITHOUT ANY WARRANTY; without even the implied warranty of
       
    19     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
       
    20     Library General Public License for more details.
       
    21 
       
    22     You should have received a copy of the GNU Library General Public License
       
    23     along with this library; see the file COPYING.LIB.  If not, write to
       
    24     the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
       
    25     Boston, MA 02110-1301, USA.
       
    26 */
       
    27 
       
    28 #include "config.h"
       
    29 #include "LegacyHTMLDocumentParser.h"
       
    30 
       
    31 #include "Attribute.h"
       
    32 #include "CSSHelper.h"
       
    33 #include "Cache.h"
       
    34 #include "CachedScript.h"
       
    35 #include "DocLoader.h"
       
    36 #include "DocumentFragment.h"
       
    37 #include "Event.h"
       
    38 #include "EventNames.h"
       
    39 #include "Frame.h"
       
    40 #include "FrameLoader.h"
       
    41 #include "FrameView.h"
       
    42 #include "HTMLElement.h"
       
    43 #include "HTMLNames.h"
       
    44 #include "LegacyHTMLTreeBuilder.h"
       
    45 #include "HTMLScriptElement.h"
       
    46 #include "HTMLViewSourceDocument.h"
       
    47 #include "ImageLoader.h"
       
    48 #include "InspectorTimelineAgent.h"
       
    49 #include "Page.h"
       
    50 #include "LegacyPreloadScanner.h"
       
    51 #include "ScriptSourceCode.h"
       
    52 #include "ScriptValue.h"
       
    53 #include "XSSAuditor.h"
       
    54 #include <wtf/ASCIICType.h>
       
    55 #include <wtf/CurrentTime.h>
       
    56 
       
    57 #include "HTMLEntityNames.cpp"
       
    58 
       
    59 #define PRELOAD_SCANNER_ENABLED 1
       
    60 
       
    61 using namespace WTF;
       
    62 using namespace std;
       
    63 
       
    64 namespace WebCore {
       
    65 
       
    66 using namespace HTMLNames;
       
    67 
       
    68 // This value is used to define how many loops (approximately tokens)
       
    69 // the parser will make before checking if it should yield.
       
    70 // To increase responsiveness reduce both ChunkSize and TimeDelay contants.
       
    71 static const int defaultTokenizerChunkSize = 4096;
       
    72 
       
    73 // FIXME: We would like this constant to be 200ms.
       
    74 // Yielding more aggressively results in increased responsiveness and better incremental rendering.
       
    75 // It slows down overall page-load on slower machines, though, so for now we set a value of 500.
       
    76 // TimeDelay controls the maximum time the parser will run before yielding.
       
    77 // Inline script execution can cause the parser to excede this limit.
       
    78 static const double defaultTokenizerTimeDelay = 0.500;
       
    79 
       
    80 static const char commentStart [] = "<!--";
       
    81 static const char doctypeStart [] = "<!doctype";
       
    82 static const char publicStart [] = "public";
       
    83 static const char systemStart [] = "system";
       
    84 static const char scriptEnd [] = "</script";
       
    85 static const char xmpEnd [] = "</xmp";
       
    86 static const char styleEnd [] =  "</style";
       
    87 static const char textareaEnd [] = "</textarea";
       
    88 static const char titleEnd [] = "</title";
       
    89 static const char iframeEnd [] = "</iframe";
       
    90 
       
    91 // Full support for MS Windows extensions to Latin-1.
       
    92 // Technically these extensions should only be activated for pages
       
    93 // marked "windows-1252" or "cp1252", but
       
    94 // in the standard Microsoft way, these extensions infect hundreds of thousands
       
    95 // of web pages.  Note that people with non-latin-1 Microsoft extensions
       
    96 // are SOL.
       
    97 //
       
    98 // See: http://www.microsoft.com/globaldev/reference/WinCP.asp
       
    99 //      http://www.bbsinc.com/iso8859.html
       
   100 //      http://www.obviously.com/
       
   101 //
       
   102 // There may be better equivalents
       
   103 
       
   104 // We only need this for entities. For non-entity text, we handle this in the text encoding.
       
   105 
       
   106 static const UChar windowsLatin1ExtensionArray[32] = {
       
   107     0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, // 80-87
       
   108     0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008D, 0x017D, 0x008F, // 88-8F
       
   109     0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, // 90-97
       
   110     0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178  // 98-9F
       
   111 };
       
   112 
       
   113 static inline UChar fixUpChar(UChar c)
       
   114 {
       
   115     if ((c & ~0x1F) != 0x0080)
       
   116         return c;
       
   117     return windowsLatin1ExtensionArray[c - 0x80];
       
   118 }
       
   119 
       
   120 static inline bool tagMatch(const char* s1, const UChar* s2, unsigned length)
       
   121 {
       
   122     for (unsigned i = 0; i != length; ++i) {
       
   123         unsigned char c1 = s1[i];
       
   124         unsigned char uc1 = toASCIIUpper(static_cast<char>(c1));
       
   125         UChar c2 = s2[i];
       
   126         if (c1 != c2 && uc1 != c2)
       
   127             return false;
       
   128     }
       
   129     return true;
       
   130 }
       
   131 
       
   132 inline void Token::addAttribute(AtomicString& attrName, const AtomicString& attributeValue, bool viewSourceMode)
       
   133 {
       
   134     if (!attrName.isEmpty()) {
       
   135         ASSERT(!attrName.contains('/'));
       
   136         RefPtr<Attribute> a = Attribute::createMapped(attrName, attributeValue);
       
   137         if (!attrs) {
       
   138             attrs = NamedNodeMap::create();
       
   139             attrs->reserveInitialCapacity(10);
       
   140         }
       
   141         attrs->insertAttribute(a.release(), viewSourceMode);
       
   142     }
       
   143 
       
   144     attrName = emptyAtom;
       
   145 }
       
   146 
       
   147 // ----------------------------------------------------------------------------
       
   148 
       
   149 LegacyHTMLDocumentParser::LegacyHTMLDocumentParser(HTMLDocument* document, bool reportErrors)
       
   150     : ScriptableDocumentParser(document)
       
   151     , m_buffer(0)
       
   152     , m_scriptCode(0)
       
   153     , m_scriptCodeSize(0)
       
   154     , m_scriptCodeCapacity(0)
       
   155     , m_scriptCodeResync(0)
       
   156     , m_executingScript(0)
       
   157     , m_requestingScript(false)
       
   158     , m_hasScriptsWaitingForStylesheets(false)
       
   159     , m_timer(this, &LegacyHTMLDocumentParser::timerFired)
       
   160     , m_externalScriptsTimer(this, &LegacyHTMLDocumentParser::executeExternalScriptsTimerFired)
       
   161     , m_treeBuilder(new LegacyHTMLTreeBuilder(document, reportErrors))
       
   162     , m_inWrite(false)
       
   163     , m_fragment(false)
       
   164     , m_scriptingPermission(FragmentScriptingAllowed)
       
   165 {
       
   166     begin();
       
   167 }
       
   168 
       
   169 LegacyHTMLDocumentParser::LegacyHTMLDocumentParser(HTMLViewSourceDocument* document)
       
   170     : ScriptableDocumentParser(document, true)
       
   171     , m_buffer(0)
       
   172     , m_scriptCode(0)
       
   173     , m_scriptCodeSize(0)
       
   174     , m_scriptCodeCapacity(0)
       
   175     , m_scriptCodeResync(0)
       
   176     , m_executingScript(0)
       
   177     , m_requestingScript(false)
       
   178     , m_hasScriptsWaitingForStylesheets(false)
       
   179     , m_timer(this, &LegacyHTMLDocumentParser::timerFired)
       
   180     , m_externalScriptsTimer(this, &LegacyHTMLDocumentParser::executeExternalScriptsTimerFired)
       
   181     , m_inWrite(false)
       
   182     , m_fragment(false)
       
   183     , m_scriptingPermission(FragmentScriptingAllowed)
       
   184 {
       
   185     begin();
       
   186 }
       
   187 
       
   188 LegacyHTMLDocumentParser::LegacyHTMLDocumentParser(DocumentFragment* frag, FragmentScriptingPermission scriptingPermission)
       
   189     : ScriptableDocumentParser(frag->document())
       
   190     , m_buffer(0)
       
   191     , m_scriptCode(0)
       
   192     , m_scriptCodeSize(0)
       
   193     , m_scriptCodeCapacity(0)
       
   194     , m_scriptCodeResync(0)
       
   195     , m_executingScript(0)
       
   196     , m_requestingScript(false)
       
   197     , m_hasScriptsWaitingForStylesheets(false)
       
   198     , m_timer(this, &LegacyHTMLDocumentParser::timerFired)
       
   199     , m_externalScriptsTimer(this, &LegacyHTMLDocumentParser::executeExternalScriptsTimerFired)
       
   200     , m_treeBuilder(new LegacyHTMLTreeBuilder(frag, scriptingPermission))
       
   201     , m_inWrite(false)
       
   202     , m_fragment(true)
       
   203     , m_scriptingPermission(scriptingPermission)
       
   204 {
       
   205     begin();
       
   206 }
       
   207 
       
   208 void LegacyHTMLDocumentParser::reset()
       
   209 {
       
   210     ASSERT(m_executingScript == 0);
       
   211 
       
   212     while (!m_pendingScripts.isEmpty()) {
       
   213         CachedResourceHandle<CachedScript> cs = m_pendingScripts.takeFirst();
       
   214         ASSERT(cache()->disabled() || cs->accessCount() > 0);
       
   215         cs->removeClient(this);
       
   216     }
       
   217 
       
   218     fastFree(m_buffer);
       
   219     m_buffer = m_dest = 0;
       
   220     m_bufferSize = 0;
       
   221 
       
   222     fastFree(m_scriptCode);
       
   223     m_scriptCode = 0;
       
   224     m_scriptCodeSize = m_scriptCodeCapacity = m_scriptCodeResync = 0;
       
   225 
       
   226     m_timer.stop();
       
   227     m_externalScriptsTimer.stop();
       
   228 
       
   229     m_state.setAllowYield(false);
       
   230     m_state.setForceSynchronous(false);
       
   231 
       
   232     m_currentToken.reset();
       
   233     m_doctypeToken.reset();
       
   234     m_doctypeSearchCount = 0;
       
   235     m_doctypeSecondarySearchCount = 0;
       
   236     m_hasScriptsWaitingForStylesheets = false;
       
   237 }
       
   238 
       
   239 void LegacyHTMLDocumentParser::begin()
       
   240 {
       
   241     m_executingScript = 0;
       
   242     m_requestingScript = false;
       
   243     m_hasScriptsWaitingForStylesheets = false;
       
   244     m_state.setLoadingExtScript(false);
       
   245     reset();
       
   246     m_bufferSize = 254;
       
   247     m_buffer = static_cast<UChar*>(fastMalloc(sizeof(UChar) * 254));
       
   248     m_dest = m_buffer;
       
   249     tquote = NoQuote;
       
   250     searchCount = 0;
       
   251     m_state.setEntityState(NoEntity);
       
   252     m_scriptTagSrcAttrValue = String();
       
   253     m_pendingSrc.clear();
       
   254     m_currentPrependingSrc = 0;
       
   255     m_noMoreData = false;
       
   256     m_brokenComments = false;
       
   257     m_brokenServer = false;
       
   258     m_lineNumber = 0;
       
   259     m_currentScriptTagStartLineNumber = 0;
       
   260     m_currentTagStartLineNumber = 0;
       
   261     m_state.setForceSynchronous(false);
       
   262 
       
   263     Page* page = document()->page();
       
   264     if (page && page->hasCustomHTMLTokenizerTimeDelay())
       
   265         m_tokenizerTimeDelay = page->customHTMLTokenizerTimeDelay();
       
   266     else
       
   267         m_tokenizerTimeDelay = defaultTokenizerTimeDelay;
       
   268 
       
   269     if (page && page->hasCustomHTMLTokenizerChunkSize())
       
   270         m_tokenizerChunkSize = page->customHTMLTokenizerChunkSize();
       
   271     else
       
   272         m_tokenizerChunkSize = defaultTokenizerChunkSize;
       
   273 }
       
   274 
       
   275 void LegacyHTMLDocumentParser::setForceSynchronous(bool force)
       
   276 {
       
   277     m_state.setForceSynchronous(force);
       
   278 }
       
   279 
       
   280 LegacyHTMLDocumentParser::State LegacyHTMLDocumentParser::processListing(SegmentedString list, State state)
       
   281 {
       
   282     // This function adds the listing 'list' as
       
   283     // preformatted text-tokens to the token-collection
       
   284     while (!list.isEmpty()) {
       
   285         if (state.skipLF()) {
       
   286             state.setSkipLF(false);
       
   287             if (*list == '\n') {
       
   288                 list.advance();
       
   289                 continue;
       
   290             }
       
   291         }
       
   292 
       
   293         checkBuffer();
       
   294 
       
   295         if (*list == '\n' || *list == '\r') {
       
   296             if (state.discardLF())
       
   297                 // Ignore this LF
       
   298                 state.setDiscardLF(false); // We have discarded 1 LF
       
   299             else
       
   300                 *m_dest++ = '\n';
       
   301 
       
   302             /* Check for MS-DOS CRLF sequence */
       
   303             if (*list == '\r')
       
   304                 state.setSkipLF(true);
       
   305 
       
   306             list.advance();
       
   307         } else {
       
   308             state.setDiscardLF(false);
       
   309             *m_dest++ = *list;
       
   310             list.advance();
       
   311         }
       
   312     }
       
   313 
       
   314     return state;
       
   315 }
       
   316 
       
   317 LegacyHTMLDocumentParser::State LegacyHTMLDocumentParser::parseNonHTMLText(SegmentedString& src, State state)
       
   318 {
       
   319     ASSERT(state.inTextArea() || state.inTitle() || state.inIFrame() || !state.hasEntityState());
       
   320     ASSERT(!state.hasTagState());
       
   321     ASSERT(state.inXmp() + state.inTextArea() + state.inTitle() + state.inStyle() + state.inScript() + state.inIFrame() == 1);
       
   322     if (state.inScript() && !m_currentScriptTagStartLineNumber)
       
   323         m_currentScriptTagStartLineNumber = m_lineNumber;
       
   324 
       
   325     if (state.inComment())
       
   326         state = parseComment(src, state);
       
   327 
       
   328     int lastDecodedEntityPosition = -1;
       
   329     while (!src.isEmpty()) {
       
   330         checkScriptBuffer();
       
   331         UChar ch = *src;
       
   332 
       
   333         if (!m_scriptCodeResync && !m_brokenComments &&
       
   334             !state.inXmp() && ch == '-' && m_scriptCodeSize >= 3 && !src.escaped() &&
       
   335             m_scriptCode[m_scriptCodeSize - 3] == '<' && m_scriptCode[m_scriptCodeSize - 2] == '!' && m_scriptCode[m_scriptCodeSize - 1] == '-' &&
       
   336             (lastDecodedEntityPosition < m_scriptCodeSize - 3)) {
       
   337             state.setInComment(true);
       
   338             state = parseComment(src, state);
       
   339             continue;
       
   340         }
       
   341         if (m_scriptCodeResync && !tquote && ch == '>') {
       
   342             src.advancePastNonNewline();
       
   343             m_scriptCodeSize = m_scriptCodeResync - 1;
       
   344             m_scriptCodeResync = 0;
       
   345             m_scriptCode[m_scriptCodeSize] = m_scriptCode[m_scriptCodeSize + 1] = 0;
       
   346             if (state.inScript())
       
   347                 state = scriptHandler(state);
       
   348             else {
       
   349                 state = processListing(SegmentedString(m_scriptCode, m_scriptCodeSize), state);
       
   350                 processToken();
       
   351                 if (state.inStyle()) {
       
   352                     m_currentToken.tagName = styleTag.localName();
       
   353                     m_currentToken.beginTag = false;
       
   354                 } else if (state.inTextArea()) {
       
   355                     m_currentToken.tagName = textareaTag.localName();
       
   356                     m_currentToken.beginTag = false;
       
   357                 } else if (state.inTitle()) {
       
   358                     m_currentToken.tagName = titleTag.localName();
       
   359                     m_currentToken.beginTag = false;
       
   360                 } else if (state.inXmp()) {
       
   361                     m_currentToken.tagName = xmpTag.localName();
       
   362                     m_currentToken.beginTag = false;
       
   363                 } else if (state.inIFrame()) {
       
   364                     m_currentToken.tagName = iframeTag.localName();
       
   365                     m_currentToken.beginTag = false;
       
   366                 }
       
   367                 processToken();
       
   368                 state.setInStyle(false);
       
   369                 state.setInScript(false);
       
   370                 state.setInTextArea(false);
       
   371                 state.setInTitle(false);
       
   372                 state.setInXmp(false);
       
   373                 state.setInIFrame(false);
       
   374                 tquote = NoQuote;
       
   375                 m_scriptCodeSize = m_scriptCodeResync = 0;
       
   376             }
       
   377             return state;
       
   378         }
       
   379         // possible end of tagname, lets check.
       
   380         if (!m_scriptCodeResync && !state.escaped() && !src.escaped() && (ch == '>' || ch == '/' || isASCIISpace(ch)) &&
       
   381              m_scriptCodeSize >= m_searchStopperLength &&
       
   382              tagMatch(m_searchStopper, m_scriptCode + m_scriptCodeSize - m_searchStopperLength, m_searchStopperLength) &&
       
   383              (lastDecodedEntityPosition < m_scriptCodeSize - m_searchStopperLength)) {
       
   384             m_scriptCodeResync = m_scriptCodeSize-m_searchStopperLength+1;
       
   385             tquote = NoQuote;
       
   386             continue;
       
   387         }
       
   388         if (m_scriptCodeResync && !state.escaped()) {
       
   389             if (ch == '\"')
       
   390                 tquote = (tquote == NoQuote) ? DoubleQuote : ((tquote == SingleQuote) ? SingleQuote : NoQuote);
       
   391             else if (ch == '\'')
       
   392                 tquote = (tquote == NoQuote) ? SingleQuote : (tquote == DoubleQuote) ? DoubleQuote : NoQuote;
       
   393             else if (tquote != NoQuote && (ch == '\r' || ch == '\n'))
       
   394                 tquote = NoQuote;
       
   395         }
       
   396         state.setEscaped(!state.escaped() && ch == '\\');
       
   397         if (!m_scriptCodeResync && (state.inTextArea() || state.inTitle() || state.inIFrame()) && !src.escaped() && ch == '&') {
       
   398             UChar* scriptCodeDest = m_scriptCode + m_scriptCodeSize;
       
   399             src.advancePastNonNewline();
       
   400             state = parseEntity(src, scriptCodeDest, state, m_cBufferPos, true, false);
       
   401             if (scriptCodeDest == m_scriptCode + m_scriptCodeSize)
       
   402                 lastDecodedEntityPosition = m_scriptCodeSize;
       
   403             else
       
   404                 m_scriptCodeSize = scriptCodeDest - m_scriptCode;
       
   405         } else {
       
   406             m_scriptCode[m_scriptCodeSize++] = ch;
       
   407             src.advance(m_lineNumber);
       
   408         }
       
   409     }
       
   410 
       
   411     return state;
       
   412 }
       
   413 
       
   414 LegacyHTMLDocumentParser::State LegacyHTMLDocumentParser::scriptHandler(State state)
       
   415 {
       
   416     // We are inside a <script>
       
   417     bool doScriptExec = false;
       
   418     int startLine = m_currentScriptTagStartLineNumber + 1; // Script line numbers are 1 based, HTMLTokenzier line numbers are 0 based
       
   419 
       
   420     // Reset m_currentScriptTagStartLineNumber to indicate that we've finished parsing the current script element
       
   421     m_currentScriptTagStartLineNumber = 0;
       
   422 
       
   423     // (Bugzilla 3837) Scripts following a frameset element should not execute or,
       
   424     // in the case of extern scripts, even load.
       
   425     bool followingFrameset = (document()->body() && document()->body()->hasTagName(framesetTag));
       
   426 
       
   427     CachedScript* cs = 0;
       
   428     // don't load external scripts for standalone documents (for now)
       
   429     if (!inViewSourceMode()) {
       
   430         if (!m_scriptTagSrcAttrValue.isEmpty() && document()->frame()) {
       
   431             // forget what we just got; load from src url instead
       
   432             if (!m_treeBuilder->skipMode() && !followingFrameset) {
       
   433                 // The parser might have been stopped by for example a window.close call in an earlier script.
       
   434                 // If so, we don't want to load scripts.
       
   435                 if (!m_parserStopped && m_scriptNode->dispatchBeforeLoadEvent(m_scriptTagSrcAttrValue) &&
       
   436                     (cs = document()->docLoader()->requestScript(m_scriptTagSrcAttrValue, m_scriptTagCharsetAttrValue)))
       
   437                     m_pendingScripts.append(cs);
       
   438                 else
       
   439                     m_scriptNode = 0;
       
   440             } else
       
   441                 m_scriptNode = 0;
       
   442             m_scriptTagSrcAttrValue = String();
       
   443         } else {
       
   444             // Parse m_scriptCode containing <script> info
       
   445             doScriptExec = m_scriptNode->shouldExecuteAsJavaScript();
       
   446 #if ENABLE(XHTMLMP)
       
   447             if (!doScriptExec)
       
   448                 document()->setShouldProcessNoscriptElement(true);
       
   449 #endif
       
   450             m_scriptNode = 0;
       
   451         }
       
   452     }
       
   453 
       
   454     state = processListing(SegmentedString(m_scriptCode, m_scriptCodeSize), state);
       
   455     RefPtr<Node> node = processToken();
       
   456 
       
   457     if (node && m_scriptingPermission == FragmentScriptingNotAllowed) {
       
   458         ExceptionCode ec;
       
   459         node->remove(ec);
       
   460         node = 0;
       
   461     }
       
   462 
       
   463     String scriptString = node ? node->textContent() : "";
       
   464     m_currentToken.tagName = scriptTag.localName();
       
   465     m_currentToken.beginTag = false;
       
   466     processToken();
       
   467 
       
   468     state.setInScript(false);
       
   469     m_scriptCodeSize = m_scriptCodeResync = 0;
       
   470 
       
   471     // FIXME: The script should be syntax highlighted.
       
   472     if (inViewSourceMode())
       
   473         return state;
       
   474 
       
   475     SegmentedString* savedPrependingSrc = m_currentPrependingSrc;
       
   476     SegmentedString prependingSrc;
       
   477     m_currentPrependingSrc = &prependingSrc;
       
   478 
       
   479     if (!m_treeBuilder->skipMode() && !followingFrameset) {
       
   480         if (cs) {
       
   481             if (savedPrependingSrc)
       
   482                 savedPrependingSrc->append(m_src);
       
   483             else
       
   484                 m_pendingSrc.prepend(m_src);
       
   485             setSrc(SegmentedString());
       
   486 
       
   487             // the ref() call below may call notifyFinished if the script is already in cache,
       
   488             // and that mucks with the state directly, so we must write it back to the object.
       
   489             m_state = state;
       
   490             bool savedRequestingScript = m_requestingScript;
       
   491             m_requestingScript = true;
       
   492             cs->addClient(this);
       
   493             m_requestingScript = savedRequestingScript;
       
   494             state = m_state;
       
   495             // will be 0 if script was already loaded and ref() executed it
       
   496             if (!m_pendingScripts.isEmpty())
       
   497                 state.setLoadingExtScript(true);
       
   498         } else if (!m_fragment && doScriptExec) {
       
   499             if (!m_executingScript)
       
   500                 m_pendingSrc.prepend(m_src);
       
   501             else
       
   502                 prependingSrc = m_src;
       
   503             setSrc(SegmentedString());
       
   504             state = scriptExecution(ScriptSourceCode(scriptString, document()->frame() ? document()->frame()->document()->url() : KURL(), startLine), state);
       
   505         }
       
   506     }
       
   507 
       
   508     if (!m_executingScript && !state.loadingExtScript()) {
       
   509         m_src.append(m_pendingSrc);
       
   510         m_pendingSrc.clear();
       
   511     } else if (!prependingSrc.isEmpty()) {
       
   512         // restore first so that the write appends in the right place
       
   513         // (does not hurt to do it again below)
       
   514         m_currentPrependingSrc = savedPrependingSrc;
       
   515 
       
   516         // we need to do this slightly modified bit of one of the write() cases
       
   517         // because we want to prepend to m_pendingSrc rather than appending
       
   518         // if there's no previous prependingSrc
       
   519         if (!m_pendingScripts.isEmpty()) {
       
   520             if (m_currentPrependingSrc)
       
   521                 m_currentPrependingSrc->append(prependingSrc);
       
   522             else
       
   523                 m_pendingSrc.prepend(prependingSrc);
       
   524         } else {
       
   525             m_state = state;
       
   526             write(prependingSrc, false);
       
   527             state = m_state;
       
   528         }
       
   529     }
       
   530 
       
   531 #if PRELOAD_SCANNER_ENABLED
       
   532     if (!m_pendingScripts.isEmpty() && !m_executingScript) {
       
   533         if (!m_preloadScanner)
       
   534             m_preloadScanner.set(new LegacyPreloadScanner(document()));
       
   535         if (!m_preloadScanner->inProgress()) {
       
   536             m_preloadScanner->begin();
       
   537             m_preloadScanner->write(m_pendingSrc);
       
   538         }
       
   539     }
       
   540 #endif
       
   541     m_currentPrependingSrc = savedPrependingSrc;
       
   542 
       
   543     return state;
       
   544 }
       
   545 
       
   546 LegacyHTMLDocumentParser::State LegacyHTMLDocumentParser::scriptExecution(const ScriptSourceCode& sourceCode, State state)
       
   547 {
       
   548     if (m_fragment || !document()->frame())
       
   549         return state;
       
   550     m_executingScript++;
       
   551 
       
   552     SegmentedString* savedPrependingSrc = m_currentPrependingSrc;
       
   553     SegmentedString prependingSrc;
       
   554     m_currentPrependingSrc = &prependingSrc;
       
   555 
       
   556     m_state = state;
       
   557     document()->frame()->script()->executeScript(sourceCode);
       
   558     state = m_state;
       
   559 
       
   560     state.setAllowYield(true);
       
   561 
       
   562     m_executingScript--;
       
   563 
       
   564     if (!m_executingScript && !state.loadingExtScript()) {
       
   565         m_pendingSrc.prepend(prependingSrc);
       
   566         m_src.append(m_pendingSrc);
       
   567         m_pendingSrc.clear();
       
   568     } else if (!prependingSrc.isEmpty()) {
       
   569         // restore first so that the write appends in the right place
       
   570         // (does not hurt to do it again below)
       
   571         m_currentPrependingSrc = savedPrependingSrc;
       
   572 
       
   573         // we need to do this slightly modified bit of one of the write() cases
       
   574         // because we want to prepend to m_pendingSrc rather than appending
       
   575         // if there's no previous prependingSrc
       
   576         if (!m_pendingScripts.isEmpty()) {
       
   577             if (m_currentPrependingSrc)
       
   578                 m_currentPrependingSrc->append(prependingSrc);
       
   579             else
       
   580                 m_pendingSrc.prepend(prependingSrc);
       
   581 
       
   582 #if PRELOAD_SCANNER_ENABLED
       
   583             // We are stuck waiting for another script. Lets check the source that
       
   584             // was just document.write()n for anything to load.
       
   585             LegacyPreloadScanner documentWritePreloadScanner(document());
       
   586             documentWritePreloadScanner.begin();
       
   587             documentWritePreloadScanner.write(prependingSrc);
       
   588             documentWritePreloadScanner.end();
       
   589 #endif
       
   590         } else {
       
   591             m_state = state;
       
   592             write(prependingSrc, false);
       
   593             state = m_state;
       
   594         }
       
   595     }
       
   596 
       
   597     m_currentPrependingSrc = savedPrependingSrc;
       
   598 
       
   599     return state;
       
   600 }
       
   601 
       
   602 LegacyHTMLDocumentParser::State LegacyHTMLDocumentParser::parseComment(SegmentedString& src, State state)
       
   603 {
       
   604     // FIXME: Why does this code even run for comments inside <script> and <style>? This seems bogus.
       
   605     checkScriptBuffer(src.length());
       
   606     while (!src.isEmpty()) {
       
   607         UChar ch = *src;
       
   608         m_scriptCode[m_scriptCodeSize++] = ch;
       
   609         if (ch == '>') {
       
   610             bool handleBrokenComments = m_brokenComments && !(state.inScript() || state.inStyle());
       
   611             int endCharsCount = 1; // start off with one for the '>' character
       
   612             if (m_scriptCodeSize > 2 && m_scriptCode[m_scriptCodeSize-3] == '-' && m_scriptCode[m_scriptCodeSize-2] == '-') {
       
   613                 endCharsCount = 3;
       
   614             } else if (m_scriptCodeSize > 3 && m_scriptCode[m_scriptCodeSize-4] == '-' && m_scriptCode[m_scriptCodeSize-3] == '-' &&
       
   615                 m_scriptCode[m_scriptCodeSize-2] == '!') {
       
   616                 // Other browsers will accept --!> as a close comment, even though it's
       
   617                 // not technically valid.
       
   618                 endCharsCount = 4;
       
   619             }
       
   620             if (handleBrokenComments || endCharsCount > 1) {
       
   621                 src.advancePastNonNewline();
       
   622                 if (!(state.inTitle() || state.inScript() || state.inXmp() || state.inTextArea() || state.inStyle() || state.inIFrame())) {
       
   623                     checkScriptBuffer();
       
   624                     m_scriptCode[m_scriptCodeSize] = 0;
       
   625                     m_scriptCode[m_scriptCodeSize + 1] = 0;
       
   626                     m_currentToken.tagName = commentAtom;
       
   627                     m_currentToken.beginTag = true;
       
   628                     state = processListing(SegmentedString(m_scriptCode, m_scriptCodeSize - endCharsCount), state);
       
   629                     processToken();
       
   630                     m_currentToken.tagName = commentAtom;
       
   631                     m_currentToken.beginTag = false;
       
   632                     processToken();
       
   633                     m_scriptCodeSize = 0;
       
   634                 }
       
   635                 state.setInComment(false);
       
   636                 return state; // Finished parsing comment
       
   637             }
       
   638         }
       
   639         src.advance(m_lineNumber);
       
   640     }
       
   641 
       
   642     return state;
       
   643 }
       
   644 
       
   645 LegacyHTMLDocumentParser::State LegacyHTMLDocumentParser::parseServer(SegmentedString& src, State state)
       
   646 {
       
   647     checkScriptBuffer(src.length());
       
   648     while (!src.isEmpty()) {
       
   649         UChar ch = *src;
       
   650         m_scriptCode[m_scriptCodeSize++] = ch;
       
   651         if (ch == '>' && m_scriptCodeSize > 1 && m_scriptCode[m_scriptCodeSize - 2] == '%') {
       
   652             src.advancePastNonNewline();
       
   653             state.setInServer(false);
       
   654             m_scriptCodeSize = 0;
       
   655             return state; // Finished parsing server include
       
   656         }
       
   657         src.advance(m_lineNumber);
       
   658     }
       
   659     return state;
       
   660 }
       
   661 
       
   662 LegacyHTMLDocumentParser::State LegacyHTMLDocumentParser::parseProcessingInstruction(SegmentedString& src, State state)
       
   663 {
       
   664     UChar oldchar = 0;
       
   665     while (!src.isEmpty()) {
       
   666         UChar chbegin = *src;
       
   667         if (chbegin == '\'')
       
   668             tquote = tquote == SingleQuote ? NoQuote : SingleQuote;
       
   669         else if (chbegin == '\"')
       
   670             tquote = tquote == DoubleQuote ? NoQuote : DoubleQuote;
       
   671         // Look for '?>'
       
   672         // Some crappy sites omit the "?" before it, so
       
   673         // we look for an unquoted '>' instead. (IE compatible)
       
   674         else if (chbegin == '>' && (!tquote || oldchar == '?')) {
       
   675             // We got a '?>' sequence
       
   676             state.setInProcessingInstruction(false);
       
   677             src.advancePastNonNewline();
       
   678             state.setDiscardLF(true);
       
   679             return state; // Finished parsing comment!
       
   680         }
       
   681         src.advance(m_lineNumber);
       
   682         oldchar = chbegin;
       
   683     }
       
   684 
       
   685     return state;
       
   686 }
       
   687 
       
   688 LegacyHTMLDocumentParser::State LegacyHTMLDocumentParser::parseText(SegmentedString& src, State state)
       
   689 {
       
   690     while (!src.isEmpty()) {
       
   691         UChar cc = *src;
       
   692 
       
   693         if (state.skipLF()) {
       
   694             state.setSkipLF(false);
       
   695             if (cc == '\n') {
       
   696                 src.advancePastNewline(m_lineNumber);
       
   697                 continue;
       
   698             }
       
   699         }
       
   700 
       
   701         // do we need to enlarge the buffer?
       
   702         checkBuffer();
       
   703 
       
   704         if (cc == '\r') {
       
   705             state.setSkipLF(true);
       
   706             *m_dest++ = '\n';
       
   707         } else
       
   708             *m_dest++ = cc;
       
   709         src.advance(m_lineNumber);
       
   710     }
       
   711 
       
   712     return state;
       
   713 }
       
   714 
       
   715 
       
   716 LegacyHTMLDocumentParser::State LegacyHTMLDocumentParser::parseEntity(SegmentedString& src, UChar*& dest, State state, unsigned& cBufferPos, bool start, bool parsingTag)
       
   717 {
       
   718     if (start) {
       
   719         cBufferPos = 0;
       
   720         state.setEntityState(SearchEntity);
       
   721         EntityUnicodeValue = 0;
       
   722     }
       
   723 
       
   724     while (!src.isEmpty()) {
       
   725         UChar cc = *src;
       
   726         switch (state.entityState()) {
       
   727         case NoEntity:
       
   728             ASSERT(state.entityState() != NoEntity);
       
   729             return state;
       
   730 
       
   731         case SearchEntity:
       
   732             if (cc == '#') {
       
   733                 m_cBuffer[cBufferPos++] = cc;
       
   734                 src.advancePastNonNewline();
       
   735                 state.setEntityState(NumericSearch);
       
   736             } else
       
   737                 state.setEntityState(EntityName);
       
   738             break;
       
   739 
       
   740         case NumericSearch:
       
   741             if (cc == 'x' || cc == 'X') {
       
   742                 m_cBuffer[cBufferPos++] = cc;
       
   743                 src.advancePastNonNewline();
       
   744                 state.setEntityState(Hexadecimal);
       
   745             } else if (cc >= '0' && cc <= '9')
       
   746                 state.setEntityState(Decimal);
       
   747             else
       
   748                 state.setEntityState(SearchSemicolon);
       
   749             break;
       
   750 
       
   751         case Hexadecimal: {
       
   752             int ll = min(src.length(), 10 - cBufferPos);
       
   753             while (ll--) {
       
   754                 cc = *src;
       
   755                 if (!((cc >= '0' && cc <= '9') || (cc >= 'a' && cc <= 'f') || (cc >= 'A' && cc <= 'F'))) {
       
   756                     state.setEntityState(SearchSemicolon);
       
   757                     break;
       
   758                 }
       
   759                 int digit;
       
   760                 if (cc < 'A')
       
   761                     digit = cc - '0';
       
   762                 else
       
   763                     digit = (cc - 'A' + 10) & 0xF; // handle both upper and lower case without a branch
       
   764                 EntityUnicodeValue = EntityUnicodeValue * 16 + digit;
       
   765                 m_cBuffer[cBufferPos++] = cc;
       
   766                 src.advancePastNonNewline();
       
   767             }
       
   768             if (cBufferPos == 10)
       
   769                 state.setEntityState(SearchSemicolon);
       
   770             break;
       
   771         }
       
   772         case Decimal:
       
   773         {
       
   774             int ll = min(src.length(), 9-cBufferPos);
       
   775             while (ll--) {
       
   776                 cc = *src;
       
   777 
       
   778                 if (!(cc >= '0' && cc <= '9')) {
       
   779                     state.setEntityState(SearchSemicolon);
       
   780                     break;
       
   781                 }
       
   782 
       
   783                 EntityUnicodeValue = EntityUnicodeValue * 10 + (cc - '0');
       
   784                 m_cBuffer[cBufferPos++] = cc;
       
   785                 src.advancePastNonNewline();
       
   786             }
       
   787             if (cBufferPos == 9)
       
   788                 state.setEntityState(SearchSemicolon);
       
   789             break;
       
   790         }
       
   791         case EntityName:
       
   792         {
       
   793             int ll = min(src.length(), 9-cBufferPos);
       
   794             while (ll--) {
       
   795                 cc = *src;
       
   796 
       
   797                 if (!((cc >= 'a' && cc <= 'z') || (cc >= '0' && cc <= '9') || (cc >= 'A' && cc <= 'Z'))) {
       
   798                     state.setEntityState(SearchSemicolon);
       
   799                     break;
       
   800                 }
       
   801 
       
   802                 m_cBuffer[cBufferPos++] = cc;
       
   803                 src.advancePastNonNewline();
       
   804             }
       
   805             if (cBufferPos == 9)
       
   806                 state.setEntityState(SearchSemicolon);
       
   807             if (state.entityState() == SearchSemicolon) {
       
   808                 if (cBufferPos > 1) {
       
   809                     // Since the maximum length of entity name is 9,
       
   810                     // so a single char array which is allocated on
       
   811                     // the stack, its length is 10, should be OK.
       
   812                     // Also if we have an illegal character, we treat it
       
   813                     // as illegal entity name.
       
   814                     unsigned testedEntityNameLen = 0;
       
   815                     char tmpEntityNameBuffer[10];
       
   816 
       
   817                     ASSERT(cBufferPos < 10);
       
   818                     for (; testedEntityNameLen < cBufferPos; ++testedEntityNameLen) {
       
   819                         if (m_cBuffer[testedEntityNameLen] > 0x7e)
       
   820                             break;
       
   821                         tmpEntityNameBuffer[testedEntityNameLen] = m_cBuffer[testedEntityNameLen];
       
   822                     }
       
   823 
       
   824                     const Entity *e;
       
   825 
       
   826                     if (testedEntityNameLen == cBufferPos)
       
   827                         e = findEntity(tmpEntityNameBuffer, cBufferPos);
       
   828                     else
       
   829                         e = 0;
       
   830 
       
   831                     if (e)
       
   832                         EntityUnicodeValue = e->code;
       
   833 
       
   834                     // be IE compatible
       
   835                     if (parsingTag && EntityUnicodeValue > 255 && *src != ';')
       
   836                         EntityUnicodeValue = 0;
       
   837                 }
       
   838             }
       
   839             else
       
   840                 break;
       
   841         }
       
   842         case SearchSemicolon:
       
   843             // Don't allow values that are more than 21 bits.
       
   844             if (EntityUnicodeValue > 0 && EntityUnicodeValue <= 0x10FFFF) {
       
   845                 if (!inViewSourceMode()) {
       
   846                     if (*src == ';')
       
   847                         src.advancePastNonNewline();
       
   848                     if (EntityUnicodeValue <= 0xFFFF) {
       
   849                         checkBuffer();
       
   850                         src.push(fixUpChar(EntityUnicodeValue));
       
   851                     } else {
       
   852                         // Convert to UTF-16, using surrogate code points.
       
   853                         checkBuffer(2);
       
   854                         src.push(U16_LEAD(EntityUnicodeValue));
       
   855                         src.push(U16_TRAIL(EntityUnicodeValue));
       
   856                     }
       
   857                 } else {
       
   858                     // FIXME: We should eventually colorize entities by sending them as a special token.
       
   859                     // 12 bytes required: up to 10 bytes in m_cBuffer plus the
       
   860                     // leading '&' and trailing ';'
       
   861                     checkBuffer(12);
       
   862                     *dest++ = '&';
       
   863                     for (unsigned i = 0; i < cBufferPos; i++)
       
   864                         dest[i] = m_cBuffer[i];
       
   865                     dest += cBufferPos;
       
   866                     if (*src == ';') {
       
   867                         *dest++ = ';';
       
   868                         src.advancePastNonNewline();
       
   869                     }
       
   870                 }
       
   871             } else {
       
   872                 // 11 bytes required: up to 10 bytes in m_cBuffer plus the
       
   873                 // leading '&'
       
   874                 checkBuffer(11);
       
   875                 // ignore the sequence, add it to the buffer as plaintext
       
   876                 *dest++ = '&';
       
   877                 for (unsigned i = 0; i < cBufferPos; i++)
       
   878                     dest[i] = m_cBuffer[i];
       
   879                 dest += cBufferPos;
       
   880             }
       
   881 
       
   882             state.setEntityState(NoEntity);
       
   883             return state;
       
   884         }
       
   885     }
       
   886 
       
   887     return state;
       
   888 }
       
   889 
       
   890 LegacyHTMLDocumentParser::State LegacyHTMLDocumentParser::parseDoctype(SegmentedString& src, State state)
       
   891 {
       
   892     ASSERT(state.inDoctype());
       
   893     while (!src.isEmpty() && state.inDoctype()) {
       
   894         UChar c = *src;
       
   895         bool isWhitespace = c == '\r' || c == '\n' || c == '\t' || c == ' ';
       
   896         switch (m_doctypeToken.state()) {
       
   897             case DoctypeBegin: {
       
   898                 m_doctypeToken.setState(DoctypeBeforeName);
       
   899                 if (isWhitespace) {
       
   900                     src.advance(m_lineNumber);
       
   901                     if (inViewSourceMode())
       
   902                         m_doctypeToken.m_source.append(c);
       
   903                 }
       
   904                 break;
       
   905             }
       
   906             case DoctypeBeforeName: {
       
   907                 if (c == '>') {
       
   908                     // Malformed.  Just exit.
       
   909                     src.advancePastNonNewline();
       
   910                     state.setInDoctype(false);
       
   911                     if (inViewSourceMode())
       
   912                         processDoctypeToken();
       
   913                 } else if (isWhitespace) {
       
   914                     src.advance(m_lineNumber);
       
   915                     if (inViewSourceMode())
       
   916                         m_doctypeToken.m_source.append(c);
       
   917                 } else
       
   918                     m_doctypeToken.setState(DoctypeName);
       
   919                 break;
       
   920             }
       
   921             case DoctypeName: {
       
   922                 if (c == '>') {
       
   923                     // Valid doctype. Emit it.
       
   924                     src.advancePastNonNewline();
       
   925                     state.setInDoctype(false);
       
   926                     processDoctypeToken();
       
   927                 } else if (isWhitespace) {
       
   928                     m_doctypeSearchCount = 0; // Used now to scan for PUBLIC
       
   929                     m_doctypeSecondarySearchCount = 0; // Used now to scan for SYSTEM
       
   930                     m_doctypeToken.setState(DoctypeAfterName);
       
   931                     src.advance(m_lineNumber);
       
   932                     if (inViewSourceMode())
       
   933                         m_doctypeToken.m_source.append(c);
       
   934                 } else {
       
   935                     src.advancePastNonNewline();
       
   936                     m_doctypeToken.m_name.append(c);
       
   937                     if (inViewSourceMode())
       
   938                         m_doctypeToken.m_source.append(c);
       
   939                 }
       
   940                 break;
       
   941             }
       
   942             case DoctypeAfterName: {
       
   943                 if (c == '>') {
       
   944                     // Valid doctype. Emit it.
       
   945                     src.advancePastNonNewline();
       
   946                     state.setInDoctype(false);
       
   947                     processDoctypeToken();
       
   948                 } else if (!isWhitespace) {
       
   949                     src.advancePastNonNewline();
       
   950                     if (toASCIILower(c) == publicStart[m_doctypeSearchCount]) {
       
   951                         m_doctypeSearchCount++;
       
   952                         if (m_doctypeSearchCount == 6)
       
   953                             // Found 'PUBLIC' sequence
       
   954                             m_doctypeToken.setState(DoctypeBeforePublicID);
       
   955                     } else if (m_doctypeSearchCount > 0) {
       
   956                         m_doctypeSearchCount = 0;
       
   957                         m_doctypeToken.setState(DoctypeBogus);
       
   958                     } else if (toASCIILower(c) == systemStart[m_doctypeSecondarySearchCount]) {
       
   959                         m_doctypeSecondarySearchCount++;
       
   960                         if (m_doctypeSecondarySearchCount == 6)
       
   961                             // Found 'SYSTEM' sequence
       
   962                             m_doctypeToken.setState(DoctypeBeforeSystemID);
       
   963                     } else {
       
   964                         m_doctypeSecondarySearchCount = 0;
       
   965                         m_doctypeToken.setState(DoctypeBogus);
       
   966                     }
       
   967                     if (inViewSourceMode())
       
   968                         m_doctypeToken.m_source.append(c);
       
   969                 } else {
       
   970                     src.advance(m_lineNumber); // Whitespace keeps us in the after name state.
       
   971                     if (inViewSourceMode())
       
   972                         m_doctypeToken.m_source.append(c);
       
   973                 }
       
   974                 break;
       
   975             }
       
   976             case DoctypeBeforePublicID: {
       
   977                 if (c == '\"' || c == '\'') {
       
   978                     tquote = c == '\"' ? DoubleQuote : SingleQuote;
       
   979                     m_doctypeToken.setState(DoctypePublicID);
       
   980                     src.advancePastNonNewline();
       
   981                     if (inViewSourceMode())
       
   982                         m_doctypeToken.m_source.append(c);
       
   983                 } else if (c == '>') {
       
   984                     // Considered bogus.  Don't process the doctype.
       
   985                     src.advancePastNonNewline();
       
   986                     state.setInDoctype(false);
       
   987                     if (inViewSourceMode())
       
   988                         processDoctypeToken();
       
   989                 } else if (isWhitespace) {
       
   990                     src.advance(m_lineNumber);
       
   991                     if (inViewSourceMode())
       
   992                         m_doctypeToken.m_source.append(c);
       
   993                 } else
       
   994                     m_doctypeToken.setState(DoctypeBogus);
       
   995                 break;
       
   996             }
       
   997             case DoctypePublicID: {
       
   998                 if ((c == '\"' && tquote == DoubleQuote) || (c == '\'' && tquote == SingleQuote)) {
       
   999                     src.advancePastNonNewline();
       
  1000                     m_doctypeToken.setState(DoctypeAfterPublicID);
       
  1001                     if (inViewSourceMode())
       
  1002                         m_doctypeToken.m_source.append(c);
       
  1003                 } else if (c == '>') {
       
  1004                      // Considered bogus.  Don't process the doctype.
       
  1005                     src.advancePastNonNewline();
       
  1006                     state.setInDoctype(false);
       
  1007                     if (inViewSourceMode())
       
  1008                         processDoctypeToken();
       
  1009                 } else {
       
  1010                     m_doctypeToken.m_publicID.append(c);
       
  1011                     src.advance(m_lineNumber);
       
  1012                     if (inViewSourceMode())
       
  1013                         m_doctypeToken.m_source.append(c);
       
  1014                 }
       
  1015                 break;
       
  1016             }
       
  1017             case DoctypeAfterPublicID:
       
  1018                 if (c == '\"' || c == '\'') {
       
  1019                     tquote = c == '\"' ? DoubleQuote : SingleQuote;
       
  1020                     m_doctypeToken.setState(DoctypeSystemID);
       
  1021                     src.advancePastNonNewline();
       
  1022                     if (inViewSourceMode())
       
  1023                         m_doctypeToken.m_source.append(c);
       
  1024                 } else if (c == '>') {
       
  1025                     // Valid doctype. Emit it now.
       
  1026                     src.advancePastNonNewline();
       
  1027                     state.setInDoctype(false);
       
  1028                     processDoctypeToken();
       
  1029                 } else if (isWhitespace) {
       
  1030                     src.advance(m_lineNumber);
       
  1031                     if (inViewSourceMode())
       
  1032                         m_doctypeToken.m_source.append(c);
       
  1033                 } else
       
  1034                     m_doctypeToken.setState(DoctypeBogus);
       
  1035                 break;
       
  1036             case DoctypeBeforeSystemID:
       
  1037                 if (c == '\"' || c == '\'') {
       
  1038                     tquote = c == '\"' ? DoubleQuote : SingleQuote;
       
  1039                     m_doctypeToken.setState(DoctypeSystemID);
       
  1040                     src.advancePastNonNewline();
       
  1041                     if (inViewSourceMode())
       
  1042                         m_doctypeToken.m_source.append(c);
       
  1043                 } else if (c == '>') {
       
  1044                     // Considered bogus.  Don't process the doctype.
       
  1045                     src.advancePastNonNewline();
       
  1046                     state.setInDoctype(false);
       
  1047                 } else if (isWhitespace) {
       
  1048                     src.advance(m_lineNumber);
       
  1049                     if (inViewSourceMode())
       
  1050                         m_doctypeToken.m_source.append(c);
       
  1051                 } else
       
  1052                     m_doctypeToken.setState(DoctypeBogus);
       
  1053                 break;
       
  1054             case DoctypeSystemID:
       
  1055                 if ((c == '\"' && tquote == DoubleQuote) || (c == '\'' && tquote == SingleQuote)) {
       
  1056                     src.advancePastNonNewline();
       
  1057                     m_doctypeToken.setState(DoctypeAfterSystemID);
       
  1058                     if (inViewSourceMode())
       
  1059                         m_doctypeToken.m_source.append(c);
       
  1060                 } else if (c == '>') {
       
  1061                      // Considered bogus.  Don't process the doctype.
       
  1062                     src.advancePastNonNewline();
       
  1063                     state.setInDoctype(false);
       
  1064                     if (inViewSourceMode())
       
  1065                         processDoctypeToken();
       
  1066                 } else {
       
  1067                     m_doctypeToken.m_systemID.append(c);
       
  1068                     src.advance(m_lineNumber);
       
  1069                     if (inViewSourceMode())
       
  1070                         m_doctypeToken.m_source.append(c);
       
  1071                 }
       
  1072                 break;
       
  1073             case DoctypeAfterSystemID:
       
  1074                 if (c == '>') {
       
  1075                     // Valid doctype. Emit it now.
       
  1076                     src.advancePastNonNewline();
       
  1077                     state.setInDoctype(false);
       
  1078                     processDoctypeToken();
       
  1079                 } else if (isWhitespace) {
       
  1080                     src.advance(m_lineNumber);
       
  1081                     if (inViewSourceMode())
       
  1082                         m_doctypeToken.m_source.append(c);
       
  1083                 } else
       
  1084                     m_doctypeToken.setState(DoctypeBogus);
       
  1085                 break;
       
  1086             case DoctypeBogus:
       
  1087                 if (c == '>') {
       
  1088                     // Done with the bogus doctype.
       
  1089                     src.advancePastNonNewline();
       
  1090                     state.setInDoctype(false);
       
  1091                     if (inViewSourceMode())
       
  1092                        processDoctypeToken();
       
  1093                 } else {
       
  1094                     src.advance(m_lineNumber); // Just keep scanning for '>'
       
  1095                     if (inViewSourceMode())
       
  1096                         m_doctypeToken.m_source.append(c);
       
  1097                 }
       
  1098                 break;
       
  1099             default:
       
  1100                 break;
       
  1101         }
       
  1102     }
       
  1103     return state;
       
  1104 }
       
  1105 
       
  1106 LegacyHTMLDocumentParser::State LegacyHTMLDocumentParser::parseTag(SegmentedString& src, State state)
       
  1107 {
       
  1108     ASSERT(!state.hasEntityState());
       
  1109 
       
  1110     unsigned cBufferPos = m_cBufferPos;
       
  1111 
       
  1112     bool lastIsSlash = false;
       
  1113 
       
  1114     while (!src.isEmpty()) {
       
  1115         checkBuffer();
       
  1116         switch (state.tagState()) {
       
  1117         case NoTag:
       
  1118         {
       
  1119             m_cBufferPos = cBufferPos;
       
  1120             return state;
       
  1121         }
       
  1122         case TagName:
       
  1123         {
       
  1124             if (searchCount > 0) {
       
  1125                 if (*src == commentStart[searchCount]) {
       
  1126                     searchCount++;
       
  1127                     if (searchCount == 2)
       
  1128                         m_doctypeSearchCount++; // A '!' is also part of a doctype, so we are moving through that still as well.
       
  1129                     else
       
  1130                         m_doctypeSearchCount = 0;
       
  1131                     if (searchCount == 4) {
       
  1132                         // Found '<!--' sequence
       
  1133                         src.advancePastNonNewline();
       
  1134                         m_dest = m_buffer; // ignore the previous part of this tag
       
  1135                         state.setInComment(true);
       
  1136                         state.setTagState(NoTag);
       
  1137 
       
  1138                         // Fix bug 34302 at kde.bugs.org.  Go ahead and treat
       
  1139                         // <!--> as a valid comment, since both mozilla and IE on windows
       
  1140                         // can handle this case.  Only do this in quirks mode. -dwh
       
  1141                         if (!src.isEmpty() && *src == '>' && document()->inCompatMode()) {
       
  1142                             state.setInComment(false);
       
  1143                             src.advancePastNonNewline();
       
  1144                             if (!src.isEmpty())
       
  1145                                 m_cBuffer[cBufferPos++] = *src;
       
  1146                         } else
       
  1147                           state = parseComment(src, state);
       
  1148 
       
  1149                         m_cBufferPos = cBufferPos;
       
  1150                         return state; // Finished parsing tag!
       
  1151                     }
       
  1152                     m_cBuffer[cBufferPos++] = *src;
       
  1153                     src.advancePastNonNewline();
       
  1154                     break;
       
  1155                 } else
       
  1156                     searchCount = 0; // Stop looking for '<!--' sequence
       
  1157             }
       
  1158 
       
  1159             if (m_doctypeSearchCount > 0) {
       
  1160                 if (toASCIILower(*src) == doctypeStart[m_doctypeSearchCount]) {
       
  1161                     m_doctypeSearchCount++;
       
  1162                     m_cBuffer[cBufferPos++] = *src;
       
  1163                     src.advancePastNonNewline();
       
  1164                     if (m_doctypeSearchCount == 9) {
       
  1165                         // Found '<!DOCTYPE' sequence
       
  1166                         state.setInDoctype(true);
       
  1167                         state.setTagState(NoTag);
       
  1168                         m_doctypeToken.reset();
       
  1169                         if (inViewSourceMode())
       
  1170                             m_doctypeToken.m_source.append(m_cBuffer, cBufferPos);
       
  1171                         state = parseDoctype(src, state);
       
  1172                         m_cBufferPos = cBufferPos;
       
  1173                         return state;
       
  1174                     }
       
  1175                     break;
       
  1176                 } else
       
  1177                     m_doctypeSearchCount = 0; // Stop looking for '<!DOCTYPE' sequence
       
  1178             }
       
  1179 
       
  1180             bool finish = false;
       
  1181             unsigned int ll = min(src.length(), CBUFLEN - cBufferPos);
       
  1182             while (ll--) {
       
  1183                 UChar curchar = *src;
       
  1184                 if (isASCIISpace(curchar) || curchar == '>' || curchar == '<') {
       
  1185                     finish = true;
       
  1186                     break;
       
  1187                 }
       
  1188 
       
  1189                 // tolower() shows up on profiles. This is faster!
       
  1190                 if (curchar >= 'A' && curchar <= 'Z' && !inViewSourceMode())
       
  1191                     m_cBuffer[cBufferPos++] = curchar + ('a' - 'A');
       
  1192                 else
       
  1193                     m_cBuffer[cBufferPos++] = curchar;
       
  1194                 src.advancePastNonNewline();
       
  1195             }
       
  1196 
       
  1197             // Disadvantage: we add the possible rest of the tag
       
  1198             // as attribute names. ### judge if this causes problems
       
  1199             if (finish || CBUFLEN == cBufferPos) {
       
  1200                 bool beginTag;
       
  1201                 UChar* ptr = m_cBuffer;
       
  1202                 unsigned int len = cBufferPos;
       
  1203                 m_cBuffer[cBufferPos] = '\0';
       
  1204                 if ((cBufferPos > 0) && (*ptr == '/')) {
       
  1205                     // End Tag
       
  1206                     beginTag = false;
       
  1207                     ptr++;
       
  1208                     len--;
       
  1209                 }
       
  1210                 else
       
  1211                     // Start Tag
       
  1212                     beginTag = true;
       
  1213 
       
  1214                 // Ignore the / in fake xml tags like <br/>.  We trim off the "/" so that we'll get "br" as the tag name and not "br/".
       
  1215                 if (len > 1 && ptr[len-1] == '/' && !inViewSourceMode())
       
  1216                     ptr[--len] = '\0';
       
  1217 
       
  1218                 // Now that we've shaved off any invalid / that might have followed the name), make the tag.
       
  1219                 // FIXME: FireFox and WinIE turn !foo nodes into comments, we ignore comments. (fast/parser/tag-with-exclamation-point.html)
       
  1220                 if (ptr[0] != '!' || inViewSourceMode()) {
       
  1221                     m_currentToken.tagName = AtomicString(ptr);
       
  1222                     m_currentToken.beginTag = beginTag;
       
  1223                 }
       
  1224                 m_dest = m_buffer;
       
  1225                 state.setTagState(SearchAttribute);
       
  1226                 cBufferPos = 0;
       
  1227             }
       
  1228             break;
       
  1229         }
       
  1230         case SearchAttribute:
       
  1231             while (!src.isEmpty()) {
       
  1232                 UChar curchar = *src;
       
  1233                 // In this mode just ignore any quotes we encounter and treat them like spaces.
       
  1234                 if (!isASCIISpace(curchar) && curchar != '\'' && curchar != '"') {
       
  1235                     if (curchar == '<' || curchar == '>')
       
  1236                         state.setTagState(SearchEnd);
       
  1237                     else
       
  1238                         state.setTagState(AttributeName);
       
  1239 
       
  1240                     cBufferPos = 0;
       
  1241                     break;
       
  1242                 }
       
  1243                 if (inViewSourceMode())
       
  1244                     m_currentToken.addViewSourceChar(curchar);
       
  1245                 src.advance(m_lineNumber);
       
  1246             }
       
  1247             break;
       
  1248         case AttributeName:
       
  1249         {
       
  1250             m_rawAttributeBeforeValue.clear();
       
  1251             int ll = min(src.length(), CBUFLEN - cBufferPos);
       
  1252             while (ll--) {
       
  1253                 UChar curchar = *src;
       
  1254                 // If we encounter a "/" when scanning an attribute name, treat it as a delimiter.  This allows the
       
  1255                 // cases like <input type=checkbox checked/> to work (and accommodates XML-style syntax as per HTML5).
       
  1256                 if (curchar <= '>' && (curchar >= '<' || isASCIISpace(curchar) || curchar == '/')) {
       
  1257                     m_cBuffer[cBufferPos] = '\0';
       
  1258                     m_attrName = AtomicString(m_cBuffer);
       
  1259                     m_dest = m_buffer;
       
  1260                     *m_dest++ = 0;
       
  1261                     state.setTagState(SearchEqual);
       
  1262                     if (inViewSourceMode())
       
  1263                         m_currentToken.addViewSourceChar('a');
       
  1264                     break;
       
  1265                 }
       
  1266 
       
  1267                 // tolower() shows up on profiles. This is faster!
       
  1268                 if (curchar >= 'A' && curchar <= 'Z' && !inViewSourceMode())
       
  1269                     m_cBuffer[cBufferPos++] = curchar + ('a' - 'A');
       
  1270                 else
       
  1271                     m_cBuffer[cBufferPos++] = curchar;
       
  1272 
       
  1273                 m_rawAttributeBeforeValue.append(curchar);
       
  1274                 src.advance(m_lineNumber);
       
  1275             }
       
  1276             if (cBufferPos == CBUFLEN) {
       
  1277                 m_cBuffer[cBufferPos] = '\0';
       
  1278                 m_attrName = AtomicString(m_cBuffer);
       
  1279                 m_dest = m_buffer;
       
  1280                 *m_dest++ = 0;
       
  1281                 state.setTagState(SearchEqual);
       
  1282                 if (inViewSourceMode())
       
  1283                     m_currentToken.addViewSourceChar('a');
       
  1284             }
       
  1285             break;
       
  1286         }
       
  1287         case SearchEqual:
       
  1288             while (!src.isEmpty()) {
       
  1289                 UChar curchar = *src;
       
  1290 
       
  1291                 if (lastIsSlash && curchar == '>') {
       
  1292                     // This is a quirk (with a long sad history).  We have to do this
       
  1293                     // since widgets do <script src="foo.js"/> and expect the tag to close.
       
  1294                     if (m_currentToken.tagName == scriptTag)
       
  1295                         m_currentToken.selfClosingTag = true;
       
  1296                     m_currentToken.brokenXMLStyle = true;
       
  1297                 }
       
  1298 
       
  1299                 // In this mode just ignore any quotes or slashes we encounter and treat them like spaces.
       
  1300                 if (!isASCIISpace(curchar) && curchar != '\'' && curchar != '"' && curchar != '/') {
       
  1301                     if (curchar == '=') {
       
  1302                         state.setTagState(SearchValue);
       
  1303                         if (inViewSourceMode())
       
  1304                             m_currentToken.addViewSourceChar(curchar);
       
  1305                         m_rawAttributeBeforeValue.append(curchar);
       
  1306                         src.advancePastNonNewline();
       
  1307                     } else {
       
  1308                         m_currentToken.addAttribute(m_attrName, emptyAtom, inViewSourceMode());
       
  1309                         m_dest = m_buffer;
       
  1310                         state.setTagState(SearchAttribute);
       
  1311                         lastIsSlash = false;
       
  1312                     }
       
  1313                     break;
       
  1314                 }
       
  1315 
       
  1316                 lastIsSlash = curchar == '/';
       
  1317 
       
  1318                 if (inViewSourceMode())
       
  1319                     m_currentToken.addViewSourceChar(curchar);
       
  1320                 m_rawAttributeBeforeValue.append(curchar);
       
  1321                 src.advance(m_lineNumber);
       
  1322             }
       
  1323             break;
       
  1324         case SearchValue:
       
  1325             while (!src.isEmpty()) {
       
  1326                 UChar curchar = *src;
       
  1327                 if (!isASCIISpace(curchar)) {
       
  1328                     if (curchar == '\'' || curchar == '\"') {
       
  1329                         tquote = curchar == '\"' ? DoubleQuote : SingleQuote;
       
  1330                         state.setTagState(QuotedValue);
       
  1331                         if (inViewSourceMode())
       
  1332                             m_currentToken.addViewSourceChar(curchar);
       
  1333                         m_rawAttributeBeforeValue.append(curchar);
       
  1334                         src.advancePastNonNewline();
       
  1335                     } else
       
  1336                         state.setTagState(Value);
       
  1337 
       
  1338                     break;
       
  1339                 }
       
  1340                 if (inViewSourceMode())
       
  1341                     m_currentToken.addViewSourceChar(curchar);
       
  1342                 m_rawAttributeBeforeValue.append(curchar);
       
  1343                 src.advance(m_lineNumber);
       
  1344             }
       
  1345             break;
       
  1346         case QuotedValue:
       
  1347             while (!src.isEmpty()) {
       
  1348                 checkBuffer();
       
  1349 
       
  1350                 UChar curchar = *src;
       
  1351                 if (curchar <= '>' && !src.escaped()) {
       
  1352                     if (curchar == '>' && m_attrName.isEmpty()) {
       
  1353                         // Handle a case like <img '>.  Just go ahead and be willing
       
  1354                         // to close the whole tag.  Don't consume the character and
       
  1355                         // just go back into SearchEnd while ignoring the whole
       
  1356                         // value.
       
  1357                         // FIXME: Note that this is actually not a very good solution.
       
  1358                         // It doesn't handle the general case of
       
  1359                         // unmatched quotes among attributes that have names. -dwh
       
  1360                         while (m_dest > m_buffer + 1 && (m_dest[-1] == '\n' || m_dest[-1] == '\r'))
       
  1361                             m_dest--; // remove trailing newlines
       
  1362                         AtomicString attributeValue(m_buffer + 1, m_dest - m_buffer - 1);
       
  1363                         if (!attributeValue.contains('/'))
       
  1364                             m_attrName = attributeValue; // Just make the name/value match. (FIXME: Is this some WinIE quirk?)
       
  1365                         m_currentToken.addAttribute(m_attrName, attributeValue, inViewSourceMode());
       
  1366                         if (inViewSourceMode())
       
  1367                             m_currentToken.addViewSourceChar('x');
       
  1368                         state.setTagState(SearchAttribute);
       
  1369                         m_dest = m_buffer;
       
  1370                         tquote = NoQuote;
       
  1371                         break;
       
  1372                     }
       
  1373 
       
  1374                     if (curchar == '&') {
       
  1375                         src.advancePastNonNewline();
       
  1376                         state = parseEntity(src, m_dest, state, cBufferPos, true, true);
       
  1377                         break;
       
  1378                     }
       
  1379 
       
  1380                     if ((tquote == SingleQuote && curchar == '\'') || (tquote == DoubleQuote && curchar == '\"')) {
       
  1381                         // some <input type=hidden> rely on trailing spaces. argh
       
  1382                         while (m_dest > m_buffer + 1 && (m_dest[-1] == '\n' || m_dest[-1] == '\r'))
       
  1383                             m_dest--; // remove trailing newlines
       
  1384                         AtomicString attributeValue(m_buffer + 1, m_dest - m_buffer - 1);
       
  1385                         if (m_attrName.isEmpty() && !attributeValue.contains('/')) {
       
  1386                             m_attrName = attributeValue; // Make the name match the value. (FIXME: Is this a WinIE quirk?)
       
  1387                             if (inViewSourceMode())
       
  1388                                 m_currentToken.addViewSourceChar('x');
       
  1389                         } else if (inViewSourceMode())
       
  1390                             m_currentToken.addViewSourceChar('v');
       
  1391 
       
  1392                         if (m_currentToken.beginTag && m_currentToken.tagName == scriptTag && !inViewSourceMode() && !m_treeBuilder->skipMode() && m_attrName == srcAttr) {
       
  1393                             String context(m_rawAttributeBeforeValue.data(), m_rawAttributeBeforeValue.size());
       
  1394                             if (xssAuditor() && !xssAuditor()->canLoadExternalScriptFromSrc(attributeValue))
       
  1395                                 attributeValue = blankURL().string();
       
  1396                         }
       
  1397 
       
  1398                         m_currentToken.addAttribute(m_attrName, attributeValue, inViewSourceMode());
       
  1399                         m_dest = m_buffer;
       
  1400                         state.setTagState(SearchAttribute);
       
  1401                         tquote = NoQuote;
       
  1402                         if (inViewSourceMode())
       
  1403                             m_currentToken.addViewSourceChar(curchar);
       
  1404                         src.advancePastNonNewline();
       
  1405                         break;
       
  1406                     }
       
  1407                 }
       
  1408 
       
  1409                 *m_dest++ = curchar;
       
  1410                 src.advance(m_lineNumber);
       
  1411             }
       
  1412             break;
       
  1413         case Value:
       
  1414             while (!src.isEmpty()) {
       
  1415                 checkBuffer();
       
  1416                 UChar curchar = *src;
       
  1417                 if (curchar <= '>' && !src.escaped()) {
       
  1418                     // parse Entities
       
  1419                     if (curchar == '&') {
       
  1420                         src.advancePastNonNewline();
       
  1421                         state = parseEntity(src, m_dest, state, cBufferPos, true, true);
       
  1422                         break;
       
  1423                     }
       
  1424                     // no quotes. Every space means end of value
       
  1425                     // '/' does not delimit in IE!
       
  1426                     if (isASCIISpace(curchar) || curchar == '>') {
       
  1427                         AtomicString attributeValue(m_buffer + 1, m_dest - m_buffer - 1);
       
  1428 
       
  1429                         if (m_currentToken.beginTag && m_currentToken.tagName == scriptTag && !inViewSourceMode() && !m_treeBuilder->skipMode() && m_attrName == srcAttr) {
       
  1430                             String context(m_rawAttributeBeforeValue.data(), m_rawAttributeBeforeValue.size());
       
  1431                             if (xssAuditor() && !xssAuditor()->canLoadExternalScriptFromSrc(attributeValue))
       
  1432                                 attributeValue = blankURL().string();
       
  1433                         }
       
  1434 
       
  1435                         m_currentToken.addAttribute(m_attrName, attributeValue, inViewSourceMode());
       
  1436                         if (inViewSourceMode())
       
  1437                             m_currentToken.addViewSourceChar('v');
       
  1438                         m_dest = m_buffer;
       
  1439                         state.setTagState(SearchAttribute);
       
  1440                         break;
       
  1441                     }
       
  1442                 }
       
  1443 
       
  1444                 *m_dest++ = curchar;
       
  1445                 src.advance(m_lineNumber);
       
  1446             }
       
  1447             break;
       
  1448         case SearchEnd:
       
  1449         {
       
  1450             while (!src.isEmpty()) {
       
  1451                 UChar ch = *src;
       
  1452                 if (ch == '>' || ch == '<')
       
  1453                     break;
       
  1454                 if (ch == '/')
       
  1455                     m_currentToken.selfClosingTag = true;
       
  1456                 if (inViewSourceMode())
       
  1457                     m_currentToken.addViewSourceChar(ch);
       
  1458                 src.advance(m_lineNumber);
       
  1459             }
       
  1460             if (src.isEmpty())
       
  1461                 break;
       
  1462 
       
  1463             searchCount = 0; // Stop looking for '<!--' sequence
       
  1464             state.setTagState(NoTag);
       
  1465             tquote = NoQuote;
       
  1466 
       
  1467             if (*src != '<')
       
  1468                 src.advance(m_lineNumber);
       
  1469 
       
  1470             if (m_currentToken.tagName == nullAtom) { //stop if tag is unknown
       
  1471                 m_cBufferPos = cBufferPos;
       
  1472                 return state;
       
  1473             }
       
  1474 
       
  1475             AtomicString tagName = m_currentToken.tagName;
       
  1476 
       
  1477             // Handle <script src="foo"/> like Mozilla/Opera. We have to do this now for Dashboard
       
  1478             // compatibility.
       
  1479             bool isSelfClosingScript = m_currentToken.selfClosingTag && m_currentToken.beginTag && m_currentToken.tagName == scriptTag;
       
  1480             bool beginTag = !m_currentToken.selfClosingTag && m_currentToken.beginTag;
       
  1481             if (m_currentToken.beginTag && m_currentToken.tagName == scriptTag && !inViewSourceMode() && !m_treeBuilder->skipMode()) {
       
  1482                 Attribute* a = 0;
       
  1483                 m_scriptTagSrcAttrValue = String();
       
  1484                 m_scriptTagCharsetAttrValue = String();
       
  1485                 if (m_currentToken.attrs && !m_fragment) {
       
  1486                     if (document()->frame() && document()->frame()->script()->canExecuteScripts(NotAboutToExecuteScript)) {
       
  1487                         if ((a = m_currentToken.attrs->getAttributeItem(srcAttr)))
       
  1488                             m_scriptTagSrcAttrValue = document()->completeURL(deprecatedParseURL(a->value())).string();
       
  1489                     }
       
  1490                 }
       
  1491             }
       
  1492 
       
  1493             RefPtr<Node> n = processToken();
       
  1494             m_cBufferPos = cBufferPos;
       
  1495             if (n || inViewSourceMode()) {
       
  1496                 State savedState = state;
       
  1497                 SegmentedString savedSrc = src;
       
  1498                 long savedLineno = m_lineNumber;
       
  1499                 if ((tagName == preTag || tagName == listingTag) && !inViewSourceMode()) {
       
  1500                     if (beginTag)
       
  1501                         state.setDiscardLF(true); // Discard the first LF after we open a pre.
       
  1502                 } else if (tagName == scriptTag) {
       
  1503                     ASSERT(!m_scriptNode);
       
  1504                     m_scriptNode = static_pointer_cast<HTMLScriptElement>(n);
       
  1505                     if (m_scriptNode)
       
  1506                         m_scriptTagCharsetAttrValue = m_scriptNode->scriptCharset();
       
  1507                     if (beginTag) {
       
  1508                         m_searchStopper = scriptEnd;
       
  1509                         m_searchStopperLength = 8;
       
  1510                         state.setInScript(true);
       
  1511                         state = parseNonHTMLText(src, state);
       
  1512                     } else if (isSelfClosingScript) { // Handle <script src="foo"/>
       
  1513                         state.setInScript(true);
       
  1514                         state = scriptHandler(state);
       
  1515                     }
       
  1516                 } else if (tagName == styleTag) {
       
  1517                     if (beginTag) {
       
  1518                         m_searchStopper = styleEnd;
       
  1519                         m_searchStopperLength = 7;
       
  1520                         state.setInStyle(true);
       
  1521                         state = parseNonHTMLText(src, state);
       
  1522                     }
       
  1523                 } else if (tagName == textareaTag) {
       
  1524                     if (beginTag) {
       
  1525                         m_searchStopper = textareaEnd;
       
  1526                         m_searchStopperLength = 10;
       
  1527                         state.setInTextArea(true);
       
  1528                         state = parseNonHTMLText(src, state);
       
  1529                     }
       
  1530                 } else if (tagName == titleTag) {
       
  1531                     if (beginTag) {
       
  1532                         m_searchStopper = titleEnd;
       
  1533                         m_searchStopperLength = 7;
       
  1534                         state.setInTitle(true);
       
  1535                         state = parseNonHTMLText(src, state);
       
  1536                     }
       
  1537                 } else if (tagName == xmpTag) {
       
  1538                     if (beginTag) {
       
  1539                         m_searchStopper = xmpEnd;
       
  1540                         m_searchStopperLength = 5;
       
  1541                         state.setInXmp(true);
       
  1542                         state = parseNonHTMLText(src, state);
       
  1543                     }
       
  1544                 } else if (tagName == iframeTag) {
       
  1545                     if (beginTag) {
       
  1546                         m_searchStopper = iframeEnd;
       
  1547                         m_searchStopperLength = 8;
       
  1548                         state.setInIFrame(true);
       
  1549                         state = parseNonHTMLText(src, state);
       
  1550                     }
       
  1551                 }
       
  1552                 if (src.isEmpty() && (state.inTitle() || inViewSourceMode()) && !state.inComment() && !(state.inScript() && m_currentScriptTagStartLineNumber)) {
       
  1553                     // We just ate the rest of the document as the #text node under the special tag!
       
  1554                     // Reset the state then retokenize without special handling.
       
  1555                     // Let the parser clean up the missing close tag.
       
  1556                     // FIXME: This is incorrect, because src.isEmpty() doesn't mean we're
       
  1557                     // at the end of the document unless m_noMoreData is also true. We need
       
  1558                     // to detect this case elsewhere, and save the state somewhere other
       
  1559                     // than a local variable.
       
  1560                     state = savedState;
       
  1561                     src = savedSrc;
       
  1562                     m_lineNumber = savedLineno;
       
  1563                     m_scriptCodeSize = 0;
       
  1564                 }
       
  1565             }
       
  1566             if (tagName == plaintextTag)
       
  1567                 state.setInPlainText(beginTag);
       
  1568             return state; // Finished parsing tag!
       
  1569         }
       
  1570         } // end switch
       
  1571     }
       
  1572     m_cBufferPos = cBufferPos;
       
  1573     return state;
       
  1574 }
       
  1575 
       
  1576 inline bool LegacyHTMLDocumentParser::continueProcessing(int& processedCount, double startTime, State &state)
       
  1577 {
       
  1578     // We don't want to be checking elapsed time with every character, so we only check after we've
       
  1579     // processed a certain number of characters.
       
  1580     bool allowedYield = state.allowYield();
       
  1581     state.setAllowYield(false);
       
  1582     if (!state.loadingExtScript() && !state.forceSynchronous() && !m_executingScript && (processedCount > m_tokenizerChunkSize || allowedYield)) {
       
  1583         processedCount = 0;
       
  1584         if (currentTime() - startTime > m_tokenizerTimeDelay) {
       
  1585             /* FIXME: We'd like to yield aggressively to give stylesheets the opportunity to
       
  1586                load, but this hurts overall performance on slower machines.  For now turn this
       
  1587                off.
       
  1588             || (!document()->haveStylesheetsLoaded() &&
       
  1589                 (document()->documentElement()->id() != ID_HTML || document()->body()))) {*/
       
  1590             // Schedule the timer to keep processing as soon as possible.
       
  1591             m_timer.startOneShot(0);
       
  1592             return false;
       
  1593         }
       
  1594     }
       
  1595 
       
  1596     processedCount++;
       
  1597     return true;
       
  1598 }
       
  1599 
       
  1600 // Turns the statemachine one crank using the passed in State object.
       
  1601 // This does not modify m_state directly in order to be reentrant.
       
  1602 ALWAYS_INLINE void LegacyHTMLDocumentParser::advance(State& state)
       
  1603 {
       
  1604     // do we need to enlarge the buffer?
       
  1605     checkBuffer();
       
  1606 
       
  1607     UChar cc = *m_src;
       
  1608 
       
  1609     bool wasSkipLF = state.skipLF();
       
  1610     if (wasSkipLF)
       
  1611         state.setSkipLF(false);
       
  1612 
       
  1613     if (wasSkipLF && (cc == '\n'))
       
  1614         m_src.advance();
       
  1615     else if (state.needsSpecialWriteHandling()) {
       
  1616         // it's important to keep needsSpecialWriteHandling with the flags this block tests
       
  1617         if (state.hasEntityState())
       
  1618             state = parseEntity(m_src, m_dest, state, m_cBufferPos, false, state.hasTagState());
       
  1619         else if (state.inPlainText())
       
  1620             state = parseText(m_src, state);
       
  1621         else if (state.inAnyNonHTMLText())
       
  1622             state = parseNonHTMLText(m_src, state);
       
  1623         else if (state.inComment())
       
  1624             state = parseComment(m_src, state);
       
  1625         else if (state.inDoctype())
       
  1626             state = parseDoctype(m_src, state);
       
  1627         else if (state.inServer())
       
  1628             state = parseServer(m_src, state);
       
  1629         else if (state.inProcessingInstruction())
       
  1630             state = parseProcessingInstruction(m_src, state);
       
  1631         else if (state.hasTagState())
       
  1632             state = parseTag(m_src, state);
       
  1633         else if (state.startTag()) {
       
  1634             state.setStartTag(false);
       
  1635 
       
  1636             switch (cc) {
       
  1637             case '/':
       
  1638                 break;
       
  1639             case '!': {
       
  1640                 // <!-- comment --> or <!DOCTYPE ...>
       
  1641                 searchCount = 1; // Look for '<!--' sequence to start comment or '<!DOCTYPE' sequence to start doctype
       
  1642                 m_doctypeSearchCount = 1;
       
  1643                 break;
       
  1644             }
       
  1645             case '?': {
       
  1646                 // xml processing instruction
       
  1647                 state.setInProcessingInstruction(true);
       
  1648                 tquote = NoQuote;
       
  1649                 state = parseProcessingInstruction(m_src, state);
       
  1650                 return;
       
  1651             }
       
  1652             case '%':
       
  1653                 if (!m_brokenServer) {
       
  1654                     // <% server stuff, handle as comment %>
       
  1655                     state.setInServer(true);
       
  1656                     tquote = NoQuote;
       
  1657                     state = parseServer(m_src, state);
       
  1658                     return;
       
  1659                 }
       
  1660                 // else fall through
       
  1661             default: {
       
  1662                 if (((cc >= 'a') && (cc <= 'z')) || ((cc >= 'A') && (cc <= 'Z'))) {
       
  1663                     // Start of a Start-Tag
       
  1664                 } else {
       
  1665                     // Invalid tag
       
  1666                     // Add as is
       
  1667                     *m_dest = '<';
       
  1668                     m_dest++;
       
  1669                     return;
       
  1670                 }
       
  1671             }
       
  1672             }; // end case
       
  1673 
       
  1674             processToken();
       
  1675 
       
  1676             m_cBufferPos = 0;
       
  1677             state.setTagState(TagName);
       
  1678             state = parseTag(m_src, state);
       
  1679         }
       
  1680     } else if (cc == '&' && !m_src.escaped()) {
       
  1681         m_src.advancePastNonNewline();
       
  1682         state = parseEntity(m_src, m_dest, state, m_cBufferPos, true, state.hasTagState());
       
  1683     } else if (cc == '<' && !m_src.escaped()) {
       
  1684         m_currentTagStartLineNumber = m_lineNumber;
       
  1685         m_src.advancePastNonNewline();
       
  1686         state.setStartTag(true);
       
  1687         state.setDiscardLF(false);
       
  1688     } else if (cc == '\n' || cc == '\r') {
       
  1689         if (state.discardLF())
       
  1690             // Ignore this LF
       
  1691             state.setDiscardLF(false); // We have discarded 1 LF
       
  1692         else {
       
  1693             // Process this LF
       
  1694             *m_dest++ = '\n';
       
  1695             if (cc == '\r' && !m_src.excludeLineNumbers())
       
  1696                 m_lineNumber++;
       
  1697         }
       
  1698 
       
  1699         /* Check for MS-DOS CRLF sequence */
       
  1700         if (cc == '\r')
       
  1701             state.setSkipLF(true);
       
  1702         m_src.advance(m_lineNumber);
       
  1703     } else {
       
  1704         state.setDiscardLF(false);
       
  1705         *m_dest++ = cc;
       
  1706         m_src.advancePastNonNewline();
       
  1707     }
       
  1708 }
       
  1709 
       
  1710 void LegacyHTMLDocumentParser::willWriteHTML(const SegmentedString& source)
       
  1711 {
       
  1712     #if ENABLE(INSPECTOR)
       
  1713         if (InspectorTimelineAgent* timelineAgent = document()->inspectorTimelineAgent())
       
  1714             timelineAgent->willWriteHTML(source.length(), m_lineNumber);
       
  1715     #endif
       
  1716 }
       
  1717 
       
  1718 void LegacyHTMLDocumentParser::didWriteHTML()
       
  1719 {
       
  1720     #if ENABLE(INSPECTOR)
       
  1721         if (InspectorTimelineAgent* timelineAgent = document()->inspectorTimelineAgent())
       
  1722             timelineAgent->didWriteHTML(m_lineNumber);
       
  1723     #endif
       
  1724 }
       
  1725 
       
  1726 void LegacyHTMLDocumentParser::write(const SegmentedString& str, bool appendData)
       
  1727 {
       
  1728     if (!m_buffer)
       
  1729         return;
       
  1730 
       
  1731     if (m_parserStopped)
       
  1732         return;
       
  1733 
       
  1734     SegmentedString source(str);
       
  1735     if (m_executingScript)
       
  1736         source.setExcludeLineNumbers();
       
  1737 
       
  1738     if ((m_executingScript && appendData) || !m_pendingScripts.isEmpty()) {
       
  1739         // don't parse; we will do this later
       
  1740         if (m_currentPrependingSrc)
       
  1741             m_currentPrependingSrc->append(source);
       
  1742         else {
       
  1743             m_pendingSrc.append(source);
       
  1744 #if PRELOAD_SCANNER_ENABLED
       
  1745             if (m_preloadScanner && m_preloadScanner->inProgress() && appendData)
       
  1746                 m_preloadScanner->write(source);
       
  1747 #endif
       
  1748         }
       
  1749         return;
       
  1750     }
       
  1751 
       
  1752 #if PRELOAD_SCANNER_ENABLED
       
  1753     if (m_preloadScanner && m_preloadScanner->inProgress() && appendData)
       
  1754         m_preloadScanner->end();
       
  1755 #endif
       
  1756 
       
  1757     if (!m_src.isEmpty())
       
  1758         m_src.append(source);
       
  1759     else
       
  1760         setSrc(source);
       
  1761 
       
  1762     // Once a timer is set, it has control of when the parser continues.
       
  1763     if (m_timer.isActive())
       
  1764         return;
       
  1765 
       
  1766     bool wasInWrite = m_inWrite;
       
  1767     m_inWrite = true;
       
  1768 
       
  1769     willWriteHTML(source);
       
  1770 
       
  1771     Frame* frame = document()->frame();
       
  1772     State state = m_state;
       
  1773     int processedCount = 0;
       
  1774     double startTime = currentTime();
       
  1775 
       
  1776     while (!m_src.isEmpty() && (!frame || !frame->redirectScheduler()->locationChangePending())) {
       
  1777         if (!continueProcessing(processedCount, startTime, state))
       
  1778             break;
       
  1779         advance(state);
       
  1780     }
       
  1781 
       
  1782     didWriteHTML();
       
  1783 
       
  1784     m_inWrite = wasInWrite;
       
  1785     m_state = state;
       
  1786 
       
  1787     if (m_noMoreData && !m_inWrite && !state.loadingExtScript() && !m_executingScript && !m_timer.isActive())
       
  1788         end(); // this actually causes us to be deleted
       
  1789 
       
  1790     // After parsing, go ahead and dispatch image beforeload events, but only if we're doing
       
  1791     // document parsing.  For document fragments we wait, since they'll likely end up in the document by the time
       
  1792     // the beforeload events fire.
       
  1793     if (!m_fragment)
       
  1794         ImageLoader::dispatchPendingBeforeLoadEvents();
       
  1795 }
       
  1796 
       
  1797 void LegacyHTMLDocumentParser::insert(const SegmentedString& source)
       
  1798 {
       
  1799     // FIXME: forceSynchronous should always be the same as the bool passed to
       
  1800     // write().  However LegacyHTMLDocumentParser uses write("", false) to pump
       
  1801     // the parser (after running external scripts, etc.) thus necessitating a
       
  1802     // separate state for forceSynchronous.
       
  1803     bool wasForcedSynchronous = forceSynchronous();
       
  1804     setForceSynchronous(true);
       
  1805     write(source, false);
       
  1806     setForceSynchronous(wasForcedSynchronous);
       
  1807 }
       
  1808 
       
  1809 void LegacyHTMLDocumentParser::append(const SegmentedString& source)
       
  1810 {
       
  1811     write(source, true);
       
  1812 }
       
  1813 
       
  1814 void LegacyHTMLDocumentParser::stopParsing()
       
  1815 {
       
  1816     DocumentParser::stopParsing();
       
  1817     m_timer.stop();
       
  1818 
       
  1819     // FIXME: Why is LegacyHTMLDocumentParser the only DocumentParser which calls checkCompleted?
       
  1820     // The FrameLoader needs to know that the parser has finished with its data,
       
  1821     // regardless of whether it happened naturally or due to manual intervention.
       
  1822     if (!m_fragment && document()->frame())
       
  1823         document()->frame()->loader()->checkCompleted();
       
  1824 }
       
  1825 
       
  1826 bool LegacyHTMLDocumentParser::processingData() const
       
  1827 {
       
  1828     return m_timer.isActive() || m_inWrite;
       
  1829 }
       
  1830 
       
  1831 void LegacyHTMLDocumentParser::timerFired(Timer<LegacyHTMLDocumentParser>*)
       
  1832 {
       
  1833     if (document()->view() && document()->view()->layoutPending() && !document()->minimumLayoutDelay()) {
       
  1834         // Restart the timer and let layout win.  This is basically a way of ensuring that the layout
       
  1835         // timer has higher priority than our timer.
       
  1836         m_timer.startOneShot(0);
       
  1837         return;
       
  1838     }
       
  1839 
       
  1840     // Invoke write() as though more data came in. This might cause us to get deleted.
       
  1841     write(SegmentedString(), true);
       
  1842 }
       
  1843 
       
  1844 void LegacyHTMLDocumentParser::end()
       
  1845 {
       
  1846     ASSERT(!m_timer.isActive());
       
  1847     m_timer.stop(); // Only helps if assertion above fires, but do it anyway.
       
  1848 
       
  1849     if (m_buffer) {
       
  1850         // parseTag is using the buffer for different matters
       
  1851         if (!m_state.hasTagState())
       
  1852             processToken();
       
  1853 
       
  1854         fastFree(m_scriptCode);
       
  1855         m_scriptCode = 0;
       
  1856         m_scriptCodeSize = m_scriptCodeCapacity = m_scriptCodeResync = 0;
       
  1857 
       
  1858         fastFree(m_buffer);
       
  1859         m_buffer = 0;
       
  1860     }
       
  1861 
       
  1862     if (!inViewSourceMode())
       
  1863         m_treeBuilder->finished();
       
  1864     else
       
  1865         document()->finishedParsing();
       
  1866 }
       
  1867 
       
  1868 void LegacyHTMLDocumentParser::finish()
       
  1869 {
       
  1870     // do this as long as we don't find matching comment ends
       
  1871     while ((m_state.inComment() || m_state.inServer()) && m_scriptCode && m_scriptCodeSize) {
       
  1872         // we've found an unmatched comment start
       
  1873         if (m_state.inComment())
       
  1874             m_brokenComments = true;
       
  1875         else
       
  1876             m_brokenServer = true;
       
  1877         checkScriptBuffer();
       
  1878         m_scriptCode[m_scriptCodeSize] = 0;
       
  1879         m_scriptCode[m_scriptCodeSize + 1] = 0;
       
  1880         int pos;
       
  1881         String food;
       
  1882         if (m_state.inScript() || m_state.inStyle() || m_state.inTextArea())
       
  1883             food = String(m_scriptCode, m_scriptCodeSize);
       
  1884         else if (m_state.inServer()) {
       
  1885             food = "<";
       
  1886             food.append(m_scriptCode, m_scriptCodeSize);
       
  1887         } else {
       
  1888             pos = find(m_scriptCode, m_scriptCodeSize, '>');
       
  1889             food = String(m_scriptCode + pos + 1, m_scriptCodeSize - pos - 1);
       
  1890         }
       
  1891         fastFree(m_scriptCode);
       
  1892         m_scriptCode = 0;
       
  1893         m_scriptCodeSize = m_scriptCodeCapacity = m_scriptCodeResync = 0;
       
  1894         m_state.setInComment(false);
       
  1895         m_state.setInServer(false);
       
  1896         if (!food.isEmpty())
       
  1897             write(food, true);
       
  1898     }
       
  1899     // this indicates we will not receive any more data... but if we are waiting on
       
  1900     // an external script to load, we can't finish parsing until that is done
       
  1901     m_noMoreData = true;
       
  1902     if (!m_inWrite && !m_state.loadingExtScript() && !m_executingScript && !m_timer.isActive())
       
  1903         end(); // this actually causes us to be deleted
       
  1904 }
       
  1905 
       
  1906 bool LegacyHTMLDocumentParser::finishWasCalled()
       
  1907 {
       
  1908     return m_noMoreData;
       
  1909 }
       
  1910 
       
  1911 PassRefPtr<Node> LegacyHTMLDocumentParser::processToken()
       
  1912 {
       
  1913     if (m_dest > m_buffer) {
       
  1914         m_currentToken.text = StringImpl::createStrippingNullCharacters(m_buffer, m_dest - m_buffer);
       
  1915         if (m_currentToken.tagName != commentAtom)
       
  1916             m_currentToken.tagName = textAtom;
       
  1917     } else if (m_currentToken.tagName == nullAtom) {
       
  1918         m_currentToken.reset();
       
  1919         return 0;
       
  1920     }
       
  1921 
       
  1922     m_dest = m_buffer;
       
  1923 
       
  1924     RefPtr<Node> n;
       
  1925 
       
  1926     if (!m_parserStopped) {
       
  1927         if (NamedNodeMap* map = m_currentToken.attrs.get())
       
  1928             map->shrinkToLength();
       
  1929         if (inViewSourceMode())
       
  1930             static_cast<HTMLViewSourceDocument*>(document())->addViewSourceToken(&m_currentToken);
       
  1931         else
       
  1932             // pass the token over to the parser, the parser DOES NOT delete the token
       
  1933             n = m_treeBuilder->parseToken(&m_currentToken);
       
  1934     }
       
  1935     m_currentToken.reset();
       
  1936 
       
  1937     return n.release();
       
  1938 }
       
  1939 
       
  1940 void LegacyHTMLDocumentParser::processDoctypeToken()
       
  1941 {
       
  1942     if (inViewSourceMode())
       
  1943         static_cast<HTMLViewSourceDocument*>(document())->addViewSourceDoctypeToken(&m_doctypeToken);
       
  1944     else
       
  1945         m_treeBuilder->parseDoctypeToken(&m_doctypeToken);
       
  1946 }
       
  1947 
       
  1948 LegacyHTMLDocumentParser::~LegacyHTMLDocumentParser()
       
  1949 {
       
  1950     ASSERT(!m_inWrite);
       
  1951     reset();
       
  1952 }
       
  1953 
       
  1954 
       
  1955 void LegacyHTMLDocumentParser::enlargeBuffer(int len)
       
  1956 {
       
  1957     // Resize policy: Always at least double the size of the buffer each time.
       
  1958     int delta = max(len, m_bufferSize);
       
  1959 
       
  1960     // Check for overflow.
       
  1961     // For now, handle overflow the same way we handle fastRealloc failure, with CRASH.
       
  1962     static const int maxSize = INT_MAX / sizeof(UChar);
       
  1963     if (delta > maxSize - m_bufferSize)
       
  1964         CRASH();
       
  1965 
       
  1966     int newSize = m_bufferSize + delta;
       
  1967     int oldOffset = m_dest - m_buffer;
       
  1968     m_buffer = static_cast<UChar*>(fastRealloc(m_buffer, newSize * sizeof(UChar)));
       
  1969     m_dest = m_buffer + oldOffset;
       
  1970     m_bufferSize = newSize;
       
  1971 }
       
  1972 
       
  1973 void LegacyHTMLDocumentParser::enlargeScriptBuffer(int len)
       
  1974 {
       
  1975     // Resize policy: Always at least double the size of the buffer each time.
       
  1976     int delta = max(len, m_scriptCodeCapacity);
       
  1977 
       
  1978     // Check for overflow.
       
  1979     // For now, handle overflow the same way we handle fastRealloc failure, with CRASH.
       
  1980     static const int maxSize = INT_MAX / sizeof(UChar);
       
  1981     if (delta > maxSize - m_scriptCodeCapacity)
       
  1982         CRASH();
       
  1983 
       
  1984     int newSize = m_scriptCodeCapacity + delta;
       
  1985     // If we allow fastRealloc(ptr, 0), it will call CRASH(). We run into this
       
  1986     // case if the HTML being parsed begins with "<!--" and there's more data
       
  1987     // coming.
       
  1988     if (!newSize) {
       
  1989         ASSERT(!m_scriptCode);
       
  1990         return;
       
  1991     }
       
  1992 
       
  1993     m_scriptCode = static_cast<UChar*>(fastRealloc(m_scriptCode, newSize * sizeof(UChar)));
       
  1994     m_scriptCodeCapacity = newSize;
       
  1995 }
       
  1996 
       
  1997 void LegacyHTMLDocumentParser::executeScriptsWaitingForStylesheets()
       
  1998 {
       
  1999     ASSERT(document()->haveStylesheetsLoaded());
       
  2000 
       
  2001     if (m_hasScriptsWaitingForStylesheets)
       
  2002         notifyFinished(0);
       
  2003 }
       
  2004 
       
  2005 void LegacyHTMLDocumentParser::notifyFinished(CachedResource*)
       
  2006 {
       
  2007     executeExternalScriptsIfReady();
       
  2008 }
       
  2009 
       
  2010 void LegacyHTMLDocumentParser::executeExternalScriptsIfReady()
       
  2011 {
       
  2012     ASSERT(!m_pendingScripts.isEmpty());
       
  2013 
       
  2014     // Make external scripts wait for external stylesheets.
       
  2015     // FIXME: This needs to be done for inline scripts too.
       
  2016     m_hasScriptsWaitingForStylesheets = !document()->haveStylesheetsLoaded();
       
  2017     if (m_hasScriptsWaitingForStylesheets)
       
  2018         return;
       
  2019 
       
  2020     bool finished = false;
       
  2021 
       
  2022     double startTime = currentTime();
       
  2023     while (!finished && m_pendingScripts.first()->isLoaded()) {
       
  2024         if (!continueExecutingExternalScripts(startTime))
       
  2025             break;
       
  2026 
       
  2027         CachedResourceHandle<CachedScript> cs = m_pendingScripts.takeFirst();
       
  2028         ASSERT(cache()->disabled() || cs->accessCount() > 0);
       
  2029 
       
  2030         setSrc(SegmentedString());
       
  2031 
       
  2032         // make sure we forget about the script before we execute the new one
       
  2033         // infinite recursion might happen otherwise
       
  2034         ScriptSourceCode sourceCode(cs.get());
       
  2035         bool errorOccurred = cs->errorOccurred();
       
  2036         cs->removeClient(this);
       
  2037 
       
  2038         RefPtr<Node> n = m_scriptNode.release();
       
  2039 
       
  2040         if (errorOccurred)
       
  2041             n->dispatchEvent(Event::create(eventNames().errorEvent, true, false));
       
  2042         else {
       
  2043             if (static_cast<HTMLScriptElement*>(n.get())->shouldExecuteAsJavaScript())
       
  2044                 m_state = scriptExecution(sourceCode, m_state);
       
  2045 #if ENABLE(XHTMLMP)
       
  2046             else
       
  2047                 document()->setShouldProcessNoscriptElement(true);
       
  2048 #endif
       
  2049             n->dispatchEvent(Event::create(eventNames().loadEvent, false, false));
       
  2050         }
       
  2051 
       
  2052         // The state of m_pendingScripts.isEmpty() can change inside the scriptExecution()
       
  2053         // call above, so test afterwards.
       
  2054         finished = m_pendingScripts.isEmpty();
       
  2055         if (finished) {
       
  2056             ASSERT(!m_hasScriptsWaitingForStylesheets);
       
  2057             m_state.setLoadingExtScript(false);
       
  2058         } else if (m_hasScriptsWaitingForStylesheets) {
       
  2059             // m_hasScriptsWaitingForStylesheets flag might have changed during the script execution.
       
  2060             // If it did we are now blocked waiting for stylesheets and should not execute more scripts until they arrive.
       
  2061             finished = true;
       
  2062         }
       
  2063 
       
  2064         // 'm_requestingScript' is true when we are called synchronously from
       
  2065         // scriptHandler(). In that case scriptHandler() will take care
       
  2066         // of m_pendingSrc.
       
  2067         if (!m_requestingScript) {
       
  2068             SegmentedString rest = m_pendingSrc;
       
  2069             m_pendingSrc.clear();
       
  2070             write(rest, false);
       
  2071             // we might be deleted at this point, do not access any members.
       
  2072         }
       
  2073     }
       
  2074 }
       
  2075 
       
  2076 void LegacyHTMLDocumentParser::executeExternalScriptsTimerFired(Timer<LegacyHTMLDocumentParser>*)
       
  2077 {
       
  2078     if (document()->view() && document()->view()->layoutPending() && !document()->minimumLayoutDelay()) {
       
  2079         // Restart the timer and do layout first.
       
  2080         m_externalScriptsTimer.startOneShot(0);
       
  2081         return;
       
  2082     }
       
  2083 
       
  2084     // Continue executing external scripts.
       
  2085     executeExternalScriptsIfReady();
       
  2086 }
       
  2087 
       
  2088 bool LegacyHTMLDocumentParser::continueExecutingExternalScripts(double startTime)
       
  2089 {
       
  2090     if (m_externalScriptsTimer.isActive())
       
  2091         return false;
       
  2092 
       
  2093     if (currentTime() - startTime > m_tokenizerTimeDelay) {
       
  2094         // Schedule the timer to keep processing as soon as possible.
       
  2095         m_externalScriptsTimer.startOneShot(0);
       
  2096         return false;
       
  2097     }
       
  2098     return true;
       
  2099 }
       
  2100 
       
  2101 bool LegacyHTMLDocumentParser::isWaitingForScripts() const
       
  2102 {
       
  2103     return m_state.loadingExtScript();
       
  2104 }
       
  2105 
       
  2106 void LegacyHTMLDocumentParser::setSrc(const SegmentedString& source)
       
  2107 {
       
  2108     m_src = source;
       
  2109 }
       
  2110 
       
  2111 void LegacyHTMLDocumentParser::parseDocumentFragment(const String& source, DocumentFragment* fragment, FragmentScriptingPermission scriptingPermission)
       
  2112 {
       
  2113     LegacyHTMLDocumentParser parser(fragment, scriptingPermission);
       
  2114     parser.setForceSynchronous(true);
       
  2115     parser.write(source, true);
       
  2116     parser.finish();
       
  2117     ASSERT(!parser.processingData()); // make sure we're done (see 3963151)
       
  2118 }
       
  2119 
       
  2120 UChar decodeNamedEntity(const char* name)
       
  2121 {
       
  2122     const Entity* e = findEntity(name, strlen(name));
       
  2123     return e ? e->code : 0;
       
  2124 }
       
  2125 
       
  2126 }