WebCore/html/HTMLDocumentParser.cpp
changeset 0 4f2f89ce4247
equal deleted inserted replaced
-1:000000000000 0:4f2f89ce4247
       
     1 /*
       
     2  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
       
     3  *
       
     4  * Redistribution and use in source and binary forms, with or without
       
     5  * modification, are permitted provided that the following conditions
       
     6  * are met:
       
     7  * 1. Redistributions of source code must retain the above copyright
       
     8  *    notice, this list of conditions and the following disclaimer.
       
     9  * 2. Redistributions in binary form must reproduce the above copyright
       
    10  *    notice, this list of conditions and the following disclaimer in the
       
    11  *    documentation and/or other materials provided with the distribution.
       
    12  *
       
    13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
       
    14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
       
    15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
       
    16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
       
    17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
       
    18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
       
    19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
       
    20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
       
    21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
       
    22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
       
    23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
       
    24  */
       
    25 
       
    26 #include "config.h"
       
    27 #include "HTMLDocumentParser.h"
       
    28 
       
    29 #include "DocumentFragment.h"
       
    30 #include "Element.h"
       
    31 #include "Frame.h"
       
    32 #include "HTMLParserScheduler.h"
       
    33 #include "HTMLTokenizer.h"
       
    34 #include "HTMLPreloadScanner.h"
       
    35 #include "HTMLScriptRunner.h"
       
    36 #include "HTMLTreeBuilder.h"
       
    37 #include "HTMLDocument.h"
       
    38 #include "XSSAuditor.h"
       
    39 #include <wtf/CurrentTime.h>
       
    40 
       
    41 #if ENABLE(INSPECTOR)
       
    42 #include "InspectorTimelineAgent.h"
       
    43 #endif
       
    44 
       
    45 namespace WebCore {
       
    46 
       
    47 namespace {
       
    48 
       
    49 class NestingLevelIncrementer : public Noncopyable {
       
    50 public:
       
    51     explicit NestingLevelIncrementer(int& counter)
       
    52         : m_counter(&counter)
       
    53     {
       
    54         ++(*m_counter);
       
    55     }
       
    56 
       
    57     ~NestingLevelIncrementer()
       
    58     {
       
    59         --(*m_counter);
       
    60     }
       
    61 
       
    62 private:
       
    63     int* m_counter;
       
    64 };
       
    65 
       
    66 } // namespace
       
    67 
       
    68 HTMLDocumentParser::HTMLDocumentParser(HTMLDocument* document, bool reportErrors)
       
    69     : ScriptableDocumentParser(document)
       
    70     , m_tokenizer(new HTMLTokenizer)
       
    71     , m_scriptRunner(new HTMLScriptRunner(document, this))
       
    72     , m_treeBuilder(new HTMLTreeBuilder(m_tokenizer.get(), document, reportErrors))
       
    73     , m_parserScheduler(new HTMLParserScheduler(this))
       
    74     , m_endWasDelayed(false)
       
    75     , m_writeNestingLevel(0)
       
    76 {
       
    77     begin();
       
    78 }
       
    79 
       
    80 // FIXME: Member variables should be grouped into self-initializing structs to
       
    81 // minimize code duplication between these constructors.
       
    82 HTMLDocumentParser::HTMLDocumentParser(DocumentFragment* fragment, FragmentScriptingPermission scriptingPermission)
       
    83     : ScriptableDocumentParser(fragment->document())
       
    84     , m_tokenizer(new HTMLTokenizer)
       
    85     , m_treeBuilder(new HTMLTreeBuilder(m_tokenizer.get(), fragment, scriptingPermission))
       
    86     , m_endWasDelayed(false)
       
    87     , m_writeNestingLevel(0)
       
    88 {
       
    89     begin();
       
    90 }
       
    91 
       
    92 HTMLDocumentParser::~HTMLDocumentParser()
       
    93 {
       
    94     // FIXME: We'd like to ASSERT that normal operation of this class clears
       
    95     // out any delayed actions, but we can't because we're unceremoniously
       
    96     // deleted.  If there were a required call to some sort of cancel function,
       
    97     // then we could ASSERT some invariants here.
       
    98 }
       
    99 
       
   100 void HTMLDocumentParser::begin()
       
   101 {
       
   102     // FIXME: Should we reset the tokenizer?
       
   103 }
       
   104 
       
   105 void HTMLDocumentParser::stopParsing()
       
   106 {
       
   107     DocumentParser::stopParsing();
       
   108     m_parserScheduler.clear(); // Deleting the scheduler will clear any timers.
       
   109 }
       
   110 
       
   111 bool HTMLDocumentParser::processingData() const
       
   112 {
       
   113     return isScheduledForResume() || inWrite();
       
   114 }
       
   115 
       
   116 void HTMLDocumentParser::pumpTokenizerIfPossible(SynchronousMode mode)
       
   117 {
       
   118     if (m_parserStopped || m_treeBuilder->isPaused())
       
   119         return;
       
   120 
       
   121     // Once a resume is scheduled, HTMLParserScheduler controls when we next pump.
       
   122     if (isScheduledForResume()) {
       
   123         ASSERT(mode == AllowYield);
       
   124         return;
       
   125     }
       
   126 
       
   127     pumpTokenizer(mode);
       
   128 }
       
   129 
       
   130 bool HTMLDocumentParser::isScheduledForResume() const
       
   131 {
       
   132     return m_parserScheduler && m_parserScheduler->isScheduledForResume();
       
   133 }
       
   134 
       
   135 // Used by HTMLParserScheduler
       
   136 void HTMLDocumentParser::resumeParsingAfterYield()
       
   137 {
       
   138     // We should never be here unless we can pump immediately.  Call pumpTokenizer()
       
   139     // directly so that ASSERTS will fire if we're wrong.
       
   140     pumpTokenizer(AllowYield);
       
   141 }
       
   142 
       
   143 bool HTMLDocumentParser::runScriptsForPausedTreeBuilder()
       
   144 {
       
   145     ASSERT(m_treeBuilder->isPaused());
       
   146 
       
   147     int scriptStartLine = 0;
       
   148     RefPtr<Element> scriptElement = m_treeBuilder->takeScriptToProcess(scriptStartLine);
       
   149     // We will not have a scriptRunner when parsing a DocumentFragment.
       
   150     if (!m_scriptRunner)
       
   151         return true;
       
   152     return m_scriptRunner->execute(scriptElement.release(), scriptStartLine);
       
   153 }
       
   154 
       
   155 void HTMLDocumentParser::pumpTokenizer(SynchronousMode mode)
       
   156 {
       
   157     ASSERT(!m_parserStopped);
       
   158     ASSERT(!m_treeBuilder->isPaused());
       
   159     ASSERT(!isScheduledForResume());
       
   160 
       
   161     // We tell the InspectorTimelineAgent about every pump, even if we
       
   162     // end up pumping nothing.  It can filter out empty pumps itself.
       
   163     willPumpLexer();
       
   164 
       
   165     HTMLParserScheduler::PumpSession session;
       
   166     // FIXME: This loop body has is now too long and needs cleanup.
       
   167     while (mode == ForceSynchronous || (!m_parserStopped && m_parserScheduler->shouldContinueParsing(session))) {
       
   168         if (!m_tokenizer->nextToken(m_input.current(), m_token))
       
   169             break;
       
   170 
       
   171         m_treeBuilder->constructTreeFromToken(m_token);
       
   172         m_token.clear();
       
   173 
       
   174         // The parser will pause itself when waiting on a script to load or run.
       
   175         if (!m_treeBuilder->isPaused())
       
   176             continue;
       
   177 
       
   178         // If we're paused waiting for a script, we try to execute scripts before continuing.
       
   179         bool shouldContinueParsing = runScriptsForPausedTreeBuilder();
       
   180         m_treeBuilder->setPaused(!shouldContinueParsing);
       
   181         if (!shouldContinueParsing)
       
   182             break;
       
   183     }
       
   184 
       
   185     if (isWaitingForScripts()) {
       
   186         ASSERT(m_tokenizer->state() == HTMLTokenizer::DataState);
       
   187         if (!m_preloadScanner) {
       
   188             m_preloadScanner.set(new HTMLPreloadScanner(m_document));
       
   189             m_preloadScanner->appendToEnd(m_input.current());
       
   190         }
       
   191         m_preloadScanner->scan();
       
   192     }
       
   193 
       
   194     didPumpLexer();
       
   195 }
       
   196 
       
   197 void HTMLDocumentParser::willPumpLexer()
       
   198 {
       
   199 #if ENABLE(INSPECTOR)
       
   200     // FIXME: m_input.current().length() is only accurate if we
       
   201     // end up parsing the whole buffer in this pump.  We should pass how
       
   202     // much we parsed as part of didWriteHTML instead of willWriteHTML.
       
   203     if (InspectorTimelineAgent* timelineAgent = m_document->inspectorTimelineAgent())
       
   204         timelineAgent->willWriteHTML(m_input.current().length(), m_tokenizer->lineNumber());
       
   205 #endif
       
   206 }
       
   207 
       
   208 void HTMLDocumentParser::didPumpLexer()
       
   209 {
       
   210 #if ENABLE(INSPECTOR)
       
   211     if (InspectorTimelineAgent* timelineAgent = m_document->inspectorTimelineAgent())
       
   212         timelineAgent->didWriteHTML(m_tokenizer->lineNumber());
       
   213 #endif
       
   214 }
       
   215 
       
   216 void HTMLDocumentParser::insert(const SegmentedString& source)
       
   217 {
       
   218     if (m_parserStopped)
       
   219         return;
       
   220 
       
   221     if (m_scriptRunner && !m_scriptRunner->inScriptExecution() && m_input.haveSeenEndOfFile()) {
       
   222         // document.write was called without a current insertion point.
       
   223         // According to the spec, we're supposed to implicitly open the
       
   224         // document.  Unfortunately, that behavior isn't sufficiently compatible
       
   225         // with the web.  The working group is mulling over what exactly to
       
   226         // do.  In the meantime, we're going to try one of the potential
       
   227         // solutions, which is to ignore the write.
       
   228         // http://www.w3.org/Bugs/Public/show_bug.cgi?id=9767
       
   229         return;
       
   230     }
       
   231 
       
   232     {
       
   233         NestingLevelIncrementer nestingLevelIncrementer(m_writeNestingLevel);
       
   234 
       
   235         SegmentedString excludedLineNumberSource(source);
       
   236         excludedLineNumberSource.setExcludeLineNumbers();
       
   237         m_input.insertAtCurrentInsertionPoint(excludedLineNumberSource);
       
   238         pumpTokenizerIfPossible(ForceSynchronous);
       
   239     }
       
   240 
       
   241     endIfDelayed();
       
   242 }
       
   243 
       
   244 void HTMLDocumentParser::append(const SegmentedString& source)
       
   245 {
       
   246     if (m_parserStopped)
       
   247         return;
       
   248 
       
   249     {
       
   250         NestingLevelIncrementer nestingLevelIncrementer(m_writeNestingLevel);
       
   251 
       
   252         m_input.appendToEnd(source);
       
   253         if (m_preloadScanner)
       
   254             m_preloadScanner->appendToEnd(source);
       
   255 
       
   256         if (m_writeNestingLevel > 1) {
       
   257             // We've gotten data off the network in a nested write.
       
   258             // We don't want to consume any more of the input stream now.  Do
       
   259             // not worry.  We'll consume this data in a less-nested write().
       
   260             return;
       
   261         }
       
   262 
       
   263         pumpTokenizerIfPossible(AllowYield);
       
   264     }
       
   265 
       
   266     endIfDelayed();
       
   267 }
       
   268 
       
   269 void HTMLDocumentParser::end()
       
   270 {
       
   271     ASSERT(!isScheduledForResume());
       
   272     // NOTE: This pump should only ever emit buffered character tokens,
       
   273     // so ForceSynchronous vs. AllowYield should be meaningless.
       
   274     pumpTokenizerIfPossible(ForceSynchronous);
       
   275 
       
   276     // Informs the the rest of WebCore that parsing is really finished (and deletes this).
       
   277     m_treeBuilder->finished();
       
   278 }
       
   279 
       
   280 void HTMLDocumentParser::attemptToEnd()
       
   281 {
       
   282     // finish() indicates we will not receive any more data. If we are waiting on
       
   283     // an external script to load, we can't finish parsing quite yet.
       
   284 
       
   285     if (shouldDelayEnd()) {
       
   286         m_endWasDelayed = true;
       
   287         return;
       
   288     }
       
   289     end();
       
   290 }
       
   291 
       
   292 void HTMLDocumentParser::endIfDelayed()
       
   293 {
       
   294     if (!m_endWasDelayed || shouldDelayEnd())
       
   295         return;
       
   296 
       
   297     m_endWasDelayed = false;
       
   298     end();
       
   299 }
       
   300 
       
   301 void HTMLDocumentParser::finish()
       
   302 {
       
   303     // We're not going to get any more data off the network, so we tell the
       
   304     // input stream we've reached the end of file.  finish() can be called more
       
   305     // than once, if the first time does not call end().
       
   306     if (!m_input.haveSeenEndOfFile())
       
   307         m_input.markEndOfFile();
       
   308     attemptToEnd();
       
   309 }
       
   310 
       
   311 bool HTMLDocumentParser::finishWasCalled()
       
   312 {
       
   313     return m_input.haveSeenEndOfFile();
       
   314 }
       
   315 
       
   316 // This function is virtual and just for the DocumentParser interface.
       
   317 bool HTMLDocumentParser::isExecutingScript() const
       
   318 {
       
   319     return inScriptExecution();
       
   320 }
       
   321 
       
   322 // This function is non-virtual and used throughout the implementation.
       
   323 bool HTMLDocumentParser::inScriptExecution() const
       
   324 {
       
   325     if (!m_scriptRunner)
       
   326         return false;
       
   327     return m_scriptRunner->inScriptExecution();
       
   328 }
       
   329 
       
   330 int HTMLDocumentParser::lineNumber() const
       
   331 {
       
   332     return m_tokenizer->lineNumber();
       
   333 }
       
   334 
       
   335 int HTMLDocumentParser::columnNumber() const
       
   336 {
       
   337     return m_tokenizer->columnNumber();
       
   338 }
       
   339 
       
   340 LegacyHTMLTreeBuilder* HTMLDocumentParser::htmlTreeBuilder() const
       
   341 {
       
   342     return m_treeBuilder->legacyTreeBuilder();
       
   343 }
       
   344 
       
   345 bool HTMLDocumentParser::isWaitingForScripts() const
       
   346 {
       
   347     return m_treeBuilder->isPaused();
       
   348 }
       
   349 
       
   350 void HTMLDocumentParser::resumeParsingAfterScriptExecution()
       
   351 {
       
   352     ASSERT(!inScriptExecution());
       
   353     ASSERT(!m_treeBuilder->isPaused());
       
   354 
       
   355     m_preloadScanner.clear();
       
   356     pumpTokenizerIfPossible(AllowYield);
       
   357     endIfDelayed();
       
   358 }
       
   359 
       
   360 void HTMLDocumentParser::watchForLoad(CachedResource* cachedScript)
       
   361 {
       
   362     ASSERT(!cachedScript->isLoaded());
       
   363     // addClient would call notifyFinished if the load were complete.
       
   364     // Callers do not expect to be re-entered from this call, so they should
       
   365     // not an already-loaded CachedResource.
       
   366     cachedScript->addClient(this);
       
   367 }
       
   368 
       
   369 void HTMLDocumentParser::stopWatchingForLoad(CachedResource* cachedScript)
       
   370 {
       
   371     cachedScript->removeClient(this);
       
   372 }
       
   373 
       
   374 bool HTMLDocumentParser::shouldLoadExternalScriptFromSrc(const AtomicString& srcValue)
       
   375 {
       
   376     if (!xssAuditor())
       
   377         return true;
       
   378     return xssAuditor()->canLoadExternalScriptFromSrc(srcValue);
       
   379 }
       
   380 
       
   381 void HTMLDocumentParser::notifyFinished(CachedResource* cachedResource)
       
   382 {
       
   383     ASSERT(m_scriptRunner);
       
   384     ASSERT(!inScriptExecution());
       
   385     ASSERT(m_treeBuilder->isPaused());
       
   386     // Note: We only ever wait on one script at a time, so we always know this
       
   387     // is the one we were waiting on and can un-pause the tree builder.
       
   388     m_treeBuilder->setPaused(false);
       
   389     bool shouldContinueParsing = m_scriptRunner->executeScriptsWaitingForLoad(cachedResource);
       
   390     m_treeBuilder->setPaused(!shouldContinueParsing);
       
   391     if (shouldContinueParsing)
       
   392         resumeParsingAfterScriptExecution();
       
   393 }
       
   394 
       
   395 void HTMLDocumentParser::executeScriptsWaitingForStylesheets()
       
   396 {
       
   397     // Document only calls this when the Document owns the DocumentParser
       
   398     // so this will not be called in the DocumentFragment case.
       
   399     ASSERT(m_scriptRunner);
       
   400     // Ignore calls unless we have a script blocking the parser waiting on a
       
   401     // stylesheet load.  Otherwise we are currently parsing and this
       
   402     // is a re-entrant call from encountering a </ style> tag.
       
   403     if (!m_scriptRunner->hasScriptsWaitingForStylesheets())
       
   404         return;
       
   405     ASSERT(!m_scriptRunner->inScriptExecution());
       
   406     ASSERT(m_treeBuilder->isPaused());
       
   407     // Note: We only ever wait on one script at a time, so we always know this
       
   408     // is the one we were waiting on and can un-pause the tree builder.
       
   409     m_treeBuilder->setPaused(false);
       
   410     bool shouldContinueParsing = m_scriptRunner->executeScriptsWaitingForStylesheets();
       
   411     m_treeBuilder->setPaused(!shouldContinueParsing);
       
   412     if (shouldContinueParsing)
       
   413         resumeParsingAfterScriptExecution();
       
   414 }
       
   415 
       
   416 ScriptController* HTMLDocumentParser::script() const
       
   417 {
       
   418     return m_document->frame() ? m_document->frame()->script() : 0;
       
   419 }
       
   420 
       
   421 void HTMLDocumentParser::parseDocumentFragment(const String& source, DocumentFragment* fragment, FragmentScriptingPermission scriptingPermission)
       
   422 {
       
   423     HTMLDocumentParser parser(fragment, scriptingPermission);
       
   424     parser.insert(source); // Use insert() so that the parser will not yield.
       
   425     parser.finish();
       
   426     ASSERT(!parser.processingData()); // Make sure we're done. <rdar://problem/3963151>
       
   427 }
       
   428 
       
   429 }