WebCore/html/HTMLTokenizer.cpp
changeset 0 4f2f89ce4247
equal deleted inserted replaced
-1:000000000000 0:4f2f89ce4247
       
     1 /*
       
     2  * Copyright (C) 2008 Apple Inc. All Rights Reserved.
       
     3  * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/
       
     4  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
       
     5  *
       
     6  * Redistribution and use in source and binary forms, with or without
       
     7  * modification, are permitted provided that the following conditions
       
     8  * are met:
       
     9  * 1. Redistributions of source code must retain the above copyright
       
    10  *    notice, this list of conditions and the following disclaimer.
       
    11  * 2. Redistributions in binary form must reproduce the above copyright
       
    12  *    notice, this list of conditions and the following disclaimer in the
       
    13  *    documentation and/or other materials provided with the distribution.
       
    14  *
       
    15  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
       
    16  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
       
    17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
       
    18  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
       
    19  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
       
    20  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
       
    21  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
       
    22  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
       
    23  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
       
    24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
       
    25  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
       
    26  */
       
    27 
       
    28 #include "config.h"
       
    29 #include "HTMLTokenizer.h"
       
    30 
       
    31 #include "AtomicString.h"
       
    32 #include "HTMLEntityParser.h"
       
    33 #include "HTMLToken.h"
       
    34 #include "HTMLNames.h"
       
    35 #include "NotImplemented.h"
       
    36 #include <wtf/ASCIICType.h>
       
    37 #include <wtf/CurrentTime.h>
       
    38 #include <wtf/UnusedParam.h>
       
    39 #include <wtf/text/CString.h>
       
    40 #include <wtf/unicode/Unicode.h>
       
    41 
       
    42 using namespace WTF;
       
    43 
       
    44 namespace WebCore {
       
    45 
       
    46 using namespace HTMLNames;
       
    47 
       
    48 const UChar HTMLTokenizer::InputStreamPreprocessor::endOfFileMarker = 0;
       
    49 
       
    50 namespace {
       
    51 
       
    52 inline UChar toLowerCase(UChar cc)
       
    53 {
       
    54     ASSERT(isASCIIUpper(cc));
       
    55     const int lowerCaseOffset = 0x20;
       
    56     return cc + lowerCaseOffset;
       
    57 }
       
    58 
       
    59 inline bool isTokenizerWhitespace(UChar cc)
       
    60 {
       
    61     return cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ';
       
    62 }
       
    63 
       
    64 inline void advanceStringAndASSERTIgnoringCase(SegmentedString& source, const char* expectedCharacters)
       
    65 {
       
    66     while (*expectedCharacters)
       
    67         source.advanceAndASSERTIgnoringCase(*expectedCharacters++);
       
    68 }
       
    69 
       
    70 inline bool vectorEqualsString(const Vector<UChar, 32>& vector, const String& string)
       
    71 {
       
    72     if (vector.size() != string.length())
       
    73         return false;
       
    74     const UChar* stringData = string.characters();
       
    75     const UChar* vectorData = vector.data();
       
    76     // FIXME: Is there a higher-level function we should be calling here?
       
    77     return !memcmp(stringData, vectorData, vector.size() * sizeof(UChar));
       
    78 }
       
    79 
       
    80 inline bool isEndTagBufferingState(HTMLTokenizer::State state)
       
    81 {
       
    82     switch (state) {
       
    83     case HTMLTokenizer::RCDATAEndTagOpenState:
       
    84     case HTMLTokenizer::RCDATAEndTagNameState:
       
    85     case HTMLTokenizer::RAWTEXTEndTagOpenState:
       
    86     case HTMLTokenizer::RAWTEXTEndTagNameState:
       
    87     case HTMLTokenizer::ScriptDataEndTagOpenState:
       
    88     case HTMLTokenizer::ScriptDataEndTagNameState:
       
    89     case HTMLTokenizer::ScriptDataEscapedEndTagOpenState:
       
    90     case HTMLTokenizer::ScriptDataEscapedEndTagNameState:
       
    91         return true;
       
    92     default:
       
    93         return false;
       
    94     }
       
    95 }
       
    96 
       
    97 }
       
    98 
       
    99 HTMLTokenizer::HTMLTokenizer()
       
   100 {
       
   101     reset();
       
   102 }
       
   103 
       
   104 HTMLTokenizer::~HTMLTokenizer()
       
   105 {
       
   106 }
       
   107 
       
   108 void HTMLTokenizer::reset()
       
   109 {
       
   110     m_state = DataState;
       
   111     m_token = 0;
       
   112     m_lineNumber = 0;
       
   113     m_skipLeadingNewLineForListing = false;
       
   114     m_additionalAllowedCharacter = '\0';
       
   115 }
       
   116 
       
   117 inline bool HTMLTokenizer::processEntity(SegmentedString& source)
       
   118 {
       
   119     bool notEnoughCharacters = false;
       
   120     unsigned value = consumeHTMLEntity(source, notEnoughCharacters);
       
   121     if (notEnoughCharacters)
       
   122         return false;
       
   123     if (!value)
       
   124         bufferCharacter('&');
       
   125     else
       
   126         bufferCodePoint(value);
       
   127     return true;
       
   128 }
       
   129 
       
   130 #if COMPILER(MSVC)
       
   131 // We need to disable the "unreachable code" warning because we want to assert
       
   132 // that some code points aren't reached in the state machine.
       
   133 #pragma warning(disable: 4702)
       
   134 #endif
       
   135 
       
   136 #define BEGIN_STATE(stateName) case stateName: stateName:
       
   137 #define END_STATE() ASSERT_NOT_REACHED(); break;
       
   138 
       
   139 // We use this macro when the HTML5 spec says "reconsume the current input
       
   140 // character in the <mumble> state."
       
   141 #define RECONSUME_IN(stateName)                                            \
       
   142     do {                                                                   \
       
   143         m_state = stateName;                                               \
       
   144         goto stateName;                                                    \
       
   145     } while (false)
       
   146 
       
   147 // We use this macro when the HTML5 spec says "consume the next input
       
   148 // character ... and switch to the <mumble> state."
       
   149 #define ADVANCE_TO(stateName)                                              \
       
   150     do {                                                                   \
       
   151         m_state = stateName;                                               \
       
   152         if (!m_inputStreamPreprocessor.advance(source, m_lineNumber))      \
       
   153             return shouldEmitBufferedCharacterToken(source);               \
       
   154         cc = m_inputStreamPreprocessor.nextInputCharacter();               \
       
   155         goto stateName;                                                    \
       
   156     } while (false)
       
   157 
       
   158 // Sometimes there's more complicated logic in the spec that separates when
       
   159 // we consume the next input character and when we switch to a particular
       
   160 // state.  We handle those cases by advancing the source directly and using
       
   161 // this macro to switch to the indicated state.
       
   162 #define SWITCH_TO(stateName)                                               \
       
   163     do {                                                                   \
       
   164         m_state = stateName;                                               \
       
   165         if (source.isEmpty() || !m_inputStreamPreprocessor.peek(source, m_lineNumber)) \
       
   166             return shouldEmitBufferedCharacterToken(source);               \
       
   167         cc = m_inputStreamPreprocessor.nextInputCharacter();               \
       
   168         goto stateName;                                                    \
       
   169     } while (false)
       
   170 
       
   171 
       
   172 inline void HTMLTokenizer::saveEndTagNameIfNeeded()
       
   173 {
       
   174     ASSERT(m_token->type() != HTMLToken::Uninitialized);
       
   175     if (m_token->type() == HTMLToken::StartTag)
       
   176         m_appropriateEndTagName = m_token->name();
       
   177 }
       
   178 
       
   179 // We use this function when the HTML5 spec says "Emit the current <mumble>
       
   180 // token. Switch to the <mumble> state."  We use the word "resume" instead of
       
   181 // switch to indicate that this macro actually returns and that we'll end up
       
   182 // in the state when we "resume" (i.e., are called again).
       
   183 bool HTMLTokenizer::emitAndResumeIn(SegmentedString& source, State state)
       
   184 {
       
   185     m_state = state;
       
   186     source.advance(m_lineNumber);
       
   187     saveEndTagNameIfNeeded();
       
   188     return true;
       
   189 }
       
   190 
       
   191 // Identical to emitAndResumeIn, except does not advance.
       
   192 bool HTMLTokenizer::emitAndReconsumeIn(SegmentedString&, State state)
       
   193 {
       
   194     m_state = state;
       
   195     saveEndTagNameIfNeeded();
       
   196     return true;
       
   197 }
       
   198 
       
   199 // Used to emit the EndOfFile token.
       
   200 // Check if we have buffered characters to emit first before emitting the EOF.
       
   201 bool HTMLTokenizer::emitEndOfFile(SegmentedString& source)
       
   202 {
       
   203     if (shouldEmitBufferedCharacterToken(source))
       
   204         return true;
       
   205     m_state = DataState;
       
   206     source.advance(m_lineNumber);
       
   207     m_token->clear();
       
   208     m_token->makeEndOfFile();
       
   209     return true;
       
   210 }
       
   211 
       
   212 bool HTMLTokenizer::flushBufferedEndTag(SegmentedString& source)
       
   213 {
       
   214     ASSERT(m_token->type() == HTMLToken::Character || m_token->type() == HTMLToken::Uninitialized);
       
   215     source.advance(m_lineNumber);
       
   216     if (m_token->type() == HTMLToken::Character)
       
   217         return true;
       
   218     m_token->beginEndTag(m_bufferedEndTagName);
       
   219     m_bufferedEndTagName.clear();
       
   220     return false;
       
   221 }
       
   222 
       
   223 #define FLUSH_AND_ADVANCE_TO(stateName)                                    \
       
   224     do {                                                                   \
       
   225         m_state = stateName;                                               \
       
   226         if (flushBufferedEndTag(source))                                   \
       
   227             return true;                                                   \
       
   228         if (source.isEmpty()                                               \
       
   229             || !m_inputStreamPreprocessor.peek(source, m_lineNumber))      \
       
   230             return shouldEmitBufferedCharacterToken(source);               \
       
   231         cc = m_inputStreamPreprocessor.nextInputCharacter();               \
       
   232         goto stateName;                                                    \
       
   233     } while (false)
       
   234 
       
   235 bool HTMLTokenizer::flushEmitAndResumeIn(SegmentedString& source, State state)
       
   236 {
       
   237     m_state = state;
       
   238     flushBufferedEndTag(source);
       
   239     return true;
       
   240 }
       
   241 
       
   242 bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
       
   243 {
       
   244     // If we have a token in progress, then we're supposed to be called back
       
   245     // with the same token so we can finish it.
       
   246     ASSERT(!m_token || m_token == &token || token.type() == HTMLToken::Uninitialized);
       
   247     m_token = &token;
       
   248 
       
   249     if (!m_bufferedEndTagName.isEmpty() && !isEndTagBufferingState(m_state)) {
       
   250         // FIXME: This should call flushBufferedEndTag().
       
   251         // We started an end tag during our last iteration.
       
   252         m_token->beginEndTag(m_bufferedEndTagName);
       
   253         m_bufferedEndTagName.clear();
       
   254         if (m_state == DataState) {
       
   255             // We're back in the data state, so we must be done with the tag.
       
   256             return true;
       
   257         }
       
   258     }
       
   259 
       
   260     if (source.isEmpty() || !m_inputStreamPreprocessor.peek(source, m_lineNumber))
       
   261         return shouldEmitBufferedCharacterToken(source);
       
   262     UChar cc = m_inputStreamPreprocessor.nextInputCharacter();
       
   263 
       
   264     // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
       
   265     // Note that this logic is different than the generic \r\n collapsing
       
   266     // handled in the input stream preprocessor.  This logic is here as an
       
   267     // "authoring convenience" so folks can write:
       
   268     //
       
   269     // <pre>
       
   270     // lorem ipsum
       
   271     // lorem ipsum
       
   272     // </pre>
       
   273     //
       
   274     // without getting an extra newline at the start of their <pre> element.
       
   275     if (m_skipLeadingNewLineForListing) {
       
   276         m_skipLeadingNewLineForListing = false;
       
   277         if (cc == '\n') {
       
   278             if (m_state == DataState)
       
   279                 ADVANCE_TO(DataState);
       
   280             if (m_state == RCDATAState)
       
   281                 ADVANCE_TO(RCDATAState);
       
   282             ASSERT_NOT_REACHED();
       
   283         }
       
   284     }
       
   285 
       
   286     // Source: http://www.whatwg.org/specs/web-apps/current-work/#tokenisation0
       
   287     switch (m_state) {
       
   288     BEGIN_STATE(DataState) {
       
   289         if (cc == '&')
       
   290             ADVANCE_TO(CharacterReferenceInDataState);
       
   291         else if (cc == '<') {
       
   292             if (m_token->type() == HTMLToken::Character) {
       
   293                 // We have a bunch of character tokens queued up that we
       
   294                 // are emitting lazily here.
       
   295                 return true;
       
   296             }
       
   297             ADVANCE_TO(TagOpenState);
       
   298         } else if (cc == InputStreamPreprocessor::endOfFileMarker)
       
   299             return emitEndOfFile(source);
       
   300         else {
       
   301             bufferCharacter(cc);
       
   302             ADVANCE_TO(DataState);
       
   303         }
       
   304     }
       
   305     END_STATE()
       
   306 
       
   307     BEGIN_STATE(CharacterReferenceInDataState) {
       
   308         if (!processEntity(source))
       
   309             return shouldEmitBufferedCharacterToken(source);
       
   310         SWITCH_TO(DataState);
       
   311     }
       
   312     END_STATE()
       
   313 
       
   314     BEGIN_STATE(RCDATAState) {
       
   315         if (cc == '&')
       
   316             ADVANCE_TO(CharacterReferenceInRCDATAState);
       
   317         else if (cc == '<')
       
   318             ADVANCE_TO(RCDATALessThanSignState);
       
   319         else if (cc == InputStreamPreprocessor::endOfFileMarker)
       
   320             return emitEndOfFile(source);
       
   321         else {
       
   322             bufferCharacter(cc);
       
   323             ADVANCE_TO(RCDATAState);
       
   324         }
       
   325     }
       
   326     END_STATE()
       
   327 
       
   328     BEGIN_STATE(CharacterReferenceInRCDATAState) {
       
   329         if (!processEntity(source))
       
   330             return shouldEmitBufferedCharacterToken(source);
       
   331         SWITCH_TO(RCDATAState);
       
   332     }
       
   333     END_STATE()
       
   334 
       
   335     BEGIN_STATE(RAWTEXTState) {
       
   336         if (cc == '<')
       
   337             ADVANCE_TO(RAWTEXTLessThanSignState);
       
   338         else if (cc == InputStreamPreprocessor::endOfFileMarker)
       
   339             return emitEndOfFile(source);
       
   340         else {
       
   341             bufferCharacter(cc);
       
   342             ADVANCE_TO(RAWTEXTState);
       
   343         }
       
   344     }
       
   345     END_STATE()
       
   346 
       
   347     BEGIN_STATE(ScriptDataState) {
       
   348         if (cc == '<')
       
   349             ADVANCE_TO(ScriptDataLessThanSignState);
       
   350         else if (cc == InputStreamPreprocessor::endOfFileMarker)
       
   351             return emitEndOfFile(source);
       
   352         else {
       
   353             bufferCharacter(cc);
       
   354             ADVANCE_TO(ScriptDataState);
       
   355         }
       
   356     }
       
   357     END_STATE()
       
   358 
       
   359     BEGIN_STATE(PLAINTEXTState) {
       
   360         if (cc == InputStreamPreprocessor::endOfFileMarker)
       
   361             return emitEndOfFile(source);
       
   362         else
       
   363             bufferCharacter(cc);
       
   364         ADVANCE_TO(PLAINTEXTState);
       
   365     }
       
   366     END_STATE()
       
   367 
       
   368     BEGIN_STATE(TagOpenState) {
       
   369         if (cc == '!')
       
   370             ADVANCE_TO(MarkupDeclarationOpenState);
       
   371         else if (cc == '/')
       
   372             ADVANCE_TO(EndTagOpenState);
       
   373         else if (isASCIIUpper(cc)) {
       
   374             m_token->beginStartTag(toLowerCase(cc));
       
   375             ADVANCE_TO(TagNameState);
       
   376         } else if (isASCIILower(cc)) {
       
   377             m_token->beginStartTag(cc);
       
   378             ADVANCE_TO(TagNameState);
       
   379         } else if (cc == '?') {
       
   380             parseError();
       
   381             // The spec consumes the current character before switching
       
   382             // to the bogus comment state, but it's easier to implement
       
   383             // if we reconsume the current character.
       
   384             RECONSUME_IN(BogusCommentState);
       
   385         } else {
       
   386             parseError();
       
   387             bufferCharacter('<');
       
   388             RECONSUME_IN(DataState);
       
   389         }
       
   390     }
       
   391     END_STATE()
       
   392 
       
   393     BEGIN_STATE(EndTagOpenState) {
       
   394         if (isASCIIUpper(cc)) {
       
   395             m_token->beginEndTag(toLowerCase(cc));
       
   396             ADVANCE_TO(TagNameState);
       
   397         } else if (isASCIILower(cc)) {
       
   398             m_token->beginEndTag(cc);
       
   399             ADVANCE_TO(TagNameState);
       
   400         } else if (cc == '>') {
       
   401             parseError();
       
   402             ADVANCE_TO(DataState);
       
   403         } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
       
   404             parseError();
       
   405             bufferCharacter('<');
       
   406             bufferCharacter('/');
       
   407             RECONSUME_IN(DataState);
       
   408         } else {
       
   409             parseError();
       
   410             RECONSUME_IN(BogusCommentState);
       
   411         }
       
   412     }
       
   413     END_STATE()
       
   414 
       
   415     BEGIN_STATE(TagNameState) {
       
   416         if (isTokenizerWhitespace(cc))
       
   417             ADVANCE_TO(BeforeAttributeNameState);
       
   418         else if (cc == '/')
       
   419             ADVANCE_TO(SelfClosingStartTagState);
       
   420         else if (cc == '>')
       
   421             return emitAndResumeIn(source, DataState);
       
   422         else if (isASCIIUpper(cc)) {
       
   423             m_token->appendToName(toLowerCase(cc));
       
   424             ADVANCE_TO(TagNameState);
       
   425         } if (cc == InputStreamPreprocessor::endOfFileMarker) {
       
   426             parseError();
       
   427             RECONSUME_IN(DataState);
       
   428         } else {
       
   429             m_token->appendToName(cc);
       
   430             ADVANCE_TO(TagNameState);
       
   431         }
       
   432     }
       
   433     END_STATE()
       
   434 
       
   435     BEGIN_STATE(RCDATALessThanSignState) {
       
   436         if (cc == '/') {
       
   437             m_temporaryBuffer.clear();
       
   438             ASSERT(m_bufferedEndTagName.isEmpty());
       
   439             ADVANCE_TO(RCDATAEndTagOpenState);
       
   440         } else {
       
   441             bufferCharacter('<');
       
   442             RECONSUME_IN(RCDATAState);
       
   443         }
       
   444     }
       
   445     END_STATE()
       
   446 
       
   447     BEGIN_STATE(RCDATAEndTagOpenState) {
       
   448         if (isASCIIUpper(cc)) {
       
   449             m_temporaryBuffer.append(cc);
       
   450             addToPossibleEndTag(toLowerCase(cc));
       
   451             ADVANCE_TO(RCDATAEndTagNameState);
       
   452         } else if (isASCIILower(cc)) {
       
   453             m_temporaryBuffer.append(cc);
       
   454             addToPossibleEndTag(cc);
       
   455             ADVANCE_TO(RCDATAEndTagNameState);
       
   456         } else {
       
   457             bufferCharacter('<');
       
   458             bufferCharacter('/');
       
   459             RECONSUME_IN(RCDATAState);
       
   460         }
       
   461     }
       
   462     END_STATE()
       
   463 
       
   464     BEGIN_STATE(RCDATAEndTagNameState) {
       
   465         if (isASCIIUpper(cc)) {
       
   466             m_temporaryBuffer.append(cc);
       
   467             addToPossibleEndTag(toLowerCase(cc));
       
   468             ADVANCE_TO(RCDATAEndTagNameState);
       
   469         } else if (isASCIILower(cc)) {
       
   470             m_temporaryBuffer.append(cc);
       
   471             addToPossibleEndTag(cc);
       
   472             ADVANCE_TO(RCDATAEndTagNameState);
       
   473         } else {
       
   474             if (isTokenizerWhitespace(cc)) {
       
   475                 if (isAppropriateEndTag())
       
   476                     FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState);
       
   477             } else if (cc == '/') {
       
   478                 if (isAppropriateEndTag())
       
   479                     FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState);
       
   480             } else if (cc == '>') {
       
   481                 if (isAppropriateEndTag())
       
   482                     return flushEmitAndResumeIn(source, DataState);
       
   483             }
       
   484             bufferCharacter('<');
       
   485             bufferCharacter('/');
       
   486             m_token->appendToCharacter(m_temporaryBuffer);
       
   487             m_bufferedEndTagName.clear();
       
   488             RECONSUME_IN(RCDATAState);
       
   489         }
       
   490     }
       
   491     END_STATE()
       
   492 
       
   493     BEGIN_STATE(RAWTEXTLessThanSignState) {
       
   494         if (cc == '/') {
       
   495             m_temporaryBuffer.clear();
       
   496             ASSERT(m_bufferedEndTagName.isEmpty());
       
   497             ADVANCE_TO(RAWTEXTEndTagOpenState);
       
   498         } else {
       
   499             bufferCharacter('<');
       
   500             RECONSUME_IN(RAWTEXTState);
       
   501         }
       
   502     }
       
   503     END_STATE()
       
   504 
       
   505     BEGIN_STATE(RAWTEXTEndTagOpenState) {
       
   506         if (isASCIIUpper(cc)) {
       
   507             m_temporaryBuffer.append(cc);
       
   508             addToPossibleEndTag(toLowerCase(cc));
       
   509             ADVANCE_TO(RAWTEXTEndTagNameState);
       
   510         } else if (isASCIILower(cc)) {
       
   511             m_temporaryBuffer.append(cc);
       
   512             addToPossibleEndTag(cc);
       
   513             ADVANCE_TO(RAWTEXTEndTagNameState);
       
   514         } else {
       
   515             bufferCharacter('<');
       
   516             bufferCharacter('/');
       
   517             RECONSUME_IN(RAWTEXTState);
       
   518         }
       
   519     }
       
   520     END_STATE()
       
   521 
       
   522     BEGIN_STATE(RAWTEXTEndTagNameState) {
       
   523         if (isASCIIUpper(cc)) {
       
   524             m_temporaryBuffer.append(cc);
       
   525             addToPossibleEndTag(toLowerCase(cc));
       
   526             ADVANCE_TO(RAWTEXTEndTagNameState);
       
   527         } else if (isASCIILower(cc)) {
       
   528             m_temporaryBuffer.append(cc);
       
   529             addToPossibleEndTag(cc);
       
   530             ADVANCE_TO(RAWTEXTEndTagNameState);
       
   531         } else {
       
   532             if (isTokenizerWhitespace(cc)) {
       
   533                 if (isAppropriateEndTag())
       
   534                     FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState);
       
   535             } else if (cc == '/') {
       
   536                 if (isAppropriateEndTag())
       
   537                     FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState);
       
   538             } else if (cc == '>') {
       
   539                 if (isAppropriateEndTag())
       
   540                     return flushEmitAndResumeIn(source, DataState);
       
   541             }
       
   542             bufferCharacter('<');
       
   543             bufferCharacter('/');
       
   544             m_token->appendToCharacter(m_temporaryBuffer);
       
   545             m_bufferedEndTagName.clear();
       
   546             RECONSUME_IN(RAWTEXTState);
       
   547         }
       
   548     }
       
   549     END_STATE()
       
   550 
       
   551     BEGIN_STATE(ScriptDataLessThanSignState) {
       
   552         if (cc == '/') {
       
   553             m_temporaryBuffer.clear();
       
   554             ASSERT(m_bufferedEndTagName.isEmpty());
       
   555             ADVANCE_TO(ScriptDataEndTagOpenState);
       
   556         } else if (cc == '!') {
       
   557             bufferCharacter('<');
       
   558             bufferCharacter('!');
       
   559             ADVANCE_TO(ScriptDataEscapeStartState);
       
   560         } else {
       
   561             bufferCharacter('<');
       
   562             RECONSUME_IN(ScriptDataState);
       
   563         }
       
   564     }
       
   565     END_STATE()
       
   566 
       
   567     BEGIN_STATE(ScriptDataEndTagOpenState) {
       
   568         if (isASCIIUpper(cc)) {
       
   569             m_temporaryBuffer.append(cc);
       
   570             addToPossibleEndTag(toLowerCase(cc));
       
   571             ADVANCE_TO(ScriptDataEndTagNameState);
       
   572         } else if (isASCIILower(cc)) {
       
   573             m_temporaryBuffer.append(cc);
       
   574             addToPossibleEndTag(cc);
       
   575             ADVANCE_TO(ScriptDataEndTagNameState);
       
   576         } else {
       
   577             bufferCharacter('<');
       
   578             bufferCharacter('/');
       
   579             RECONSUME_IN(ScriptDataState);
       
   580         }
       
   581     }
       
   582     END_STATE()
       
   583 
       
   584     BEGIN_STATE(ScriptDataEndTagNameState) {
       
   585         if (isASCIIUpper(cc)) {
       
   586             m_temporaryBuffer.append(cc);
       
   587             addToPossibleEndTag(toLowerCase(cc));
       
   588             ADVANCE_TO(ScriptDataEndTagNameState);
       
   589         } else if (isASCIILower(cc)) {
       
   590             m_temporaryBuffer.append(cc);
       
   591             addToPossibleEndTag(cc);
       
   592             ADVANCE_TO(ScriptDataEndTagNameState);
       
   593         } else {
       
   594             if (isTokenizerWhitespace(cc)) {
       
   595                 if (isAppropriateEndTag())
       
   596                     FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState);
       
   597             } else if (cc == '/') {
       
   598                 if (isAppropriateEndTag())
       
   599                     FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState);
       
   600             } else if (cc == '>') {
       
   601                 if (isAppropriateEndTag())
       
   602                     return flushEmitAndResumeIn(source, DataState);
       
   603             }
       
   604             bufferCharacter('<');
       
   605             bufferCharacter('/');
       
   606             m_token->appendToCharacter(m_temporaryBuffer);
       
   607             m_bufferedEndTagName.clear();
       
   608             RECONSUME_IN(ScriptDataState);
       
   609         }
       
   610     }
       
   611     END_STATE()
       
   612 
       
   613     BEGIN_STATE(ScriptDataEscapeStartState) {
       
   614         if (cc == '-') {
       
   615             bufferCharacter(cc);
       
   616             ADVANCE_TO(ScriptDataEscapeStartDashState);
       
   617         } else
       
   618             RECONSUME_IN(ScriptDataState);
       
   619     }
       
   620     END_STATE()
       
   621 
       
   622     BEGIN_STATE(ScriptDataEscapeStartDashState) {
       
   623         if (cc == '-') {
       
   624             bufferCharacter(cc);
       
   625             ADVANCE_TO(ScriptDataEscapedDashDashState);
       
   626         } else
       
   627             RECONSUME_IN(ScriptDataState);
       
   628     }
       
   629     END_STATE()
       
   630 
       
   631     BEGIN_STATE(ScriptDataEscapedState) {
       
   632         if (cc == '-') {
       
   633             bufferCharacter(cc);
       
   634             ADVANCE_TO(ScriptDataEscapedDashState);
       
   635         } else if (cc == '<')
       
   636             ADVANCE_TO(ScriptDataEscapedLessThanSignState);
       
   637         else if (cc == InputStreamPreprocessor::endOfFileMarker) {
       
   638             parseError();
       
   639             RECONSUME_IN(DataState);
       
   640         } else {
       
   641             bufferCharacter(cc);
       
   642             ADVANCE_TO(ScriptDataEscapedState);
       
   643         }
       
   644     }
       
   645     END_STATE()
       
   646 
       
   647     BEGIN_STATE(ScriptDataEscapedDashState) {
       
   648         if (cc == '-') {
       
   649             bufferCharacter(cc);
       
   650             ADVANCE_TO(ScriptDataEscapedDashDashState);
       
   651         } else if (cc == '<')
       
   652             ADVANCE_TO(ScriptDataEscapedLessThanSignState);
       
   653         else if (cc == InputStreamPreprocessor::endOfFileMarker) {
       
   654             parseError();
       
   655             RECONSUME_IN(DataState);
       
   656         } else {
       
   657             bufferCharacter(cc);
       
   658             ADVANCE_TO(ScriptDataEscapedState);
       
   659         }
       
   660     }
       
   661     END_STATE()
       
   662 
       
   663     BEGIN_STATE(ScriptDataEscapedDashDashState) {
       
   664         if (cc == '-') {
       
   665             bufferCharacter(cc);
       
   666             ADVANCE_TO(ScriptDataEscapedDashDashState);
       
   667         } else if (cc == '<')
       
   668             ADVANCE_TO(ScriptDataEscapedLessThanSignState);
       
   669         else if (cc == '>') {
       
   670             bufferCharacter(cc);
       
   671             ADVANCE_TO(ScriptDataState);
       
   672         } if (cc == InputStreamPreprocessor::endOfFileMarker) {
       
   673             parseError();
       
   674             RECONSUME_IN(DataState);
       
   675         } else {
       
   676             bufferCharacter(cc);
       
   677             ADVANCE_TO(ScriptDataEscapedState);
       
   678         }
       
   679     }
       
   680     END_STATE()
       
   681 
       
   682     BEGIN_STATE(ScriptDataEscapedLessThanSignState) {
       
   683         if (cc == '/') {
       
   684             m_temporaryBuffer.clear();
       
   685             ASSERT(m_bufferedEndTagName.isEmpty());
       
   686             ADVANCE_TO(ScriptDataEscapedEndTagOpenState);
       
   687         } else if (isASCIIUpper(cc)) {
       
   688             bufferCharacter('<');
       
   689             bufferCharacter(cc);
       
   690             m_temporaryBuffer.clear();
       
   691             m_temporaryBuffer.append(toLowerCase(cc));
       
   692             ADVANCE_TO(ScriptDataDoubleEscapeStartState);
       
   693         } else if (isASCIILower(cc)) {
       
   694             bufferCharacter('<');
       
   695             bufferCharacter(cc);
       
   696             m_temporaryBuffer.clear();
       
   697             m_temporaryBuffer.append(cc);
       
   698             ADVANCE_TO(ScriptDataDoubleEscapeStartState);
       
   699         } else {
       
   700             bufferCharacter('<');
       
   701             RECONSUME_IN(ScriptDataEscapedState);
       
   702         }
       
   703     }
       
   704     END_STATE()
       
   705 
       
   706     BEGIN_STATE(ScriptDataEscapedEndTagOpenState) {
       
   707         if (isASCIIUpper(cc)) {
       
   708             m_temporaryBuffer.append(cc);
       
   709             addToPossibleEndTag(toLowerCase(cc));
       
   710             ADVANCE_TO(ScriptDataEscapedEndTagNameState);
       
   711         } else if (isASCIILower(cc)) {
       
   712             m_temporaryBuffer.append(cc);
       
   713             addToPossibleEndTag(cc);
       
   714             ADVANCE_TO(ScriptDataEscapedEndTagNameState);
       
   715         } else {
       
   716             bufferCharacter('<');
       
   717             bufferCharacter('/');
       
   718             RECONSUME_IN(ScriptDataEscapedState);
       
   719         }
       
   720     }
       
   721     END_STATE()
       
   722 
       
   723     BEGIN_STATE(ScriptDataEscapedEndTagNameState) {
       
   724         if (isASCIIUpper(cc)) {
       
   725             m_temporaryBuffer.append(cc);
       
   726             addToPossibleEndTag(toLowerCase(cc));
       
   727             ADVANCE_TO(ScriptDataEscapedEndTagNameState);
       
   728         } else if (isASCIILower(cc)) {
       
   729             m_temporaryBuffer.append(cc);
       
   730             addToPossibleEndTag(cc);
       
   731             ADVANCE_TO(ScriptDataEscapedEndTagNameState);
       
   732         } else {
       
   733             if (isTokenizerWhitespace(cc)) {
       
   734                 if (isAppropriateEndTag())
       
   735                     FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState);
       
   736             } else if (cc == '/') {
       
   737                 if (isAppropriateEndTag())
       
   738                     FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState);
       
   739             } else if (cc == '>') {
       
   740                 if (isAppropriateEndTag())
       
   741                     return flushEmitAndResumeIn(source, DataState);
       
   742             }
       
   743             bufferCharacter('<');
       
   744             bufferCharacter('/');
       
   745             m_token->appendToCharacter(m_temporaryBuffer);
       
   746             m_bufferedEndTagName.clear();
       
   747             RECONSUME_IN(ScriptDataEscapedState);
       
   748         }
       
   749     }
       
   750     END_STATE()
       
   751 
       
   752     BEGIN_STATE(ScriptDataDoubleEscapeStartState) {
       
   753         if (isTokenizerWhitespace(cc) || cc == '/' || cc == '>') {
       
   754             bufferCharacter(cc);
       
   755             if (temporaryBufferIs(scriptTag.localName()))
       
   756                 ADVANCE_TO(ScriptDataDoubleEscapedState);
       
   757             else
       
   758                 ADVANCE_TO(ScriptDataEscapedState);
       
   759         } else if (isASCIIUpper(cc)) {
       
   760             bufferCharacter(cc);
       
   761             m_temporaryBuffer.append(toLowerCase(cc));
       
   762             ADVANCE_TO(ScriptDataDoubleEscapeStartState);
       
   763         } else if (isASCIILower(cc)) {
       
   764             bufferCharacter(cc);
       
   765             m_temporaryBuffer.append(cc);
       
   766             ADVANCE_TO(ScriptDataDoubleEscapeStartState);
       
   767         } else
       
   768             RECONSUME_IN(ScriptDataEscapedState);
       
   769     }
       
   770     END_STATE()
       
   771 
       
   772     BEGIN_STATE(ScriptDataDoubleEscapedState) {
       
   773         if (cc == '-') {
       
   774             bufferCharacter(cc);
       
   775             ADVANCE_TO(ScriptDataDoubleEscapedDashState);
       
   776         } else if (cc == '<') {
       
   777             bufferCharacter(cc);
       
   778             ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState);
       
   779         } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
       
   780             parseError();
       
   781             RECONSUME_IN(DataState);
       
   782         } else {
       
   783             bufferCharacter(cc);
       
   784             ADVANCE_TO(ScriptDataDoubleEscapedState);
       
   785         }
       
   786     }
       
   787     END_STATE()
       
   788 
       
   789     BEGIN_STATE(ScriptDataDoubleEscapedDashState) {
       
   790         if (cc == '-') {
       
   791             bufferCharacter(cc);
       
   792             ADVANCE_TO(ScriptDataDoubleEscapedDashDashState);
       
   793         } else if (cc == '<') {
       
   794             bufferCharacter(cc);
       
   795             ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState);
       
   796         } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
       
   797             parseError();
       
   798             RECONSUME_IN(DataState);
       
   799         } else {
       
   800             bufferCharacter(cc);
       
   801             ADVANCE_TO(ScriptDataDoubleEscapedState);
       
   802         }
       
   803     }
       
   804     END_STATE()
       
   805 
       
   806     BEGIN_STATE(ScriptDataDoubleEscapedDashDashState) {
       
   807         if (cc == '-') {
       
   808             bufferCharacter(cc);
       
   809             ADVANCE_TO(ScriptDataDoubleEscapedDashDashState);
       
   810         } else if (cc == '<') {
       
   811             bufferCharacter(cc);
       
   812             ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState);
       
   813         } else if (cc == '>') {
       
   814             bufferCharacter(cc);
       
   815             ADVANCE_TO(ScriptDataState);
       
   816         } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
       
   817             parseError();
       
   818             RECONSUME_IN(DataState);
       
   819         } else {
       
   820             bufferCharacter(cc);
       
   821             ADVANCE_TO(ScriptDataDoubleEscapedState);
       
   822         }
       
   823     }
       
   824     END_STATE()
       
   825 
       
   826     BEGIN_STATE(ScriptDataDoubleEscapedLessThanSignState) {
       
   827         if (cc == '/') {
       
   828             bufferCharacter(cc);
       
   829             m_temporaryBuffer.clear();
       
   830             ADVANCE_TO(ScriptDataDoubleEscapeEndState);
       
   831         } else
       
   832             RECONSUME_IN(ScriptDataDoubleEscapedState);
       
   833     }
       
   834     END_STATE()
       
   835 
       
   836     BEGIN_STATE(ScriptDataDoubleEscapeEndState) {
       
   837         if (isTokenizerWhitespace(cc) || cc == '/' || cc == '>') {
       
   838             bufferCharacter(cc);
       
   839             if (temporaryBufferIs(scriptTag.localName()))
       
   840                 ADVANCE_TO(ScriptDataEscapedState);
       
   841             else
       
   842                 ADVANCE_TO(ScriptDataDoubleEscapedState);
       
   843         } else if (isASCIIUpper(cc)) {
       
   844             bufferCharacter(cc);
       
   845             m_temporaryBuffer.append(toLowerCase(cc));
       
   846             ADVANCE_TO(ScriptDataDoubleEscapeEndState);
       
   847         } else if (isASCIILower(cc)) {
       
   848             bufferCharacter(cc);
       
   849             m_temporaryBuffer.append(cc);
       
   850             ADVANCE_TO(ScriptDataDoubleEscapeEndState);
       
   851         } else
       
   852             RECONSUME_IN(ScriptDataDoubleEscapedState);
       
   853     }
       
   854     END_STATE()
       
   855 
       
   856     BEGIN_STATE(BeforeAttributeNameState) {
       
   857         if (isTokenizerWhitespace(cc))
       
   858             ADVANCE_TO(BeforeAttributeNameState);
       
   859         else if (cc == '/')
       
   860             ADVANCE_TO(SelfClosingStartTagState);
       
   861         else if (cc == '>')
       
   862             return emitAndResumeIn(source, DataState);
       
   863         else if (isASCIIUpper(cc)) {
       
   864             m_token->addNewAttribute();
       
   865             m_token->appendToAttributeName(toLowerCase(cc));
       
   866             ADVANCE_TO(AttributeNameState);
       
   867         } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
       
   868             parseError();
       
   869             RECONSUME_IN(DataState);
       
   870         } else {
       
   871             if (cc == '"' || cc == '\'' || cc == '<' || cc == '=')
       
   872                 parseError();
       
   873             m_token->addNewAttribute();
       
   874             m_token->appendToAttributeName(cc);
       
   875             ADVANCE_TO(AttributeNameState);
       
   876         }
       
   877     }
       
   878     END_STATE()
       
   879 
       
   880     BEGIN_STATE(AttributeNameState) {
       
   881         if (isTokenizerWhitespace(cc))
       
   882             ADVANCE_TO(AfterAttributeNameState);
       
   883         else if (cc == '/')
       
   884             ADVANCE_TO(SelfClosingStartTagState);
       
   885         else if (cc == '=')
       
   886             ADVANCE_TO(BeforeAttributeValueState);
       
   887         else if (cc == '>')
       
   888             return emitAndResumeIn(source, DataState);
       
   889         else if (isASCIIUpper(cc)) {
       
   890             m_token->appendToAttributeName(toLowerCase(cc));
       
   891             ADVANCE_TO(AttributeNameState);
       
   892         } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
       
   893             parseError();
       
   894             RECONSUME_IN(DataState);
       
   895         } else {
       
   896             if (cc == '"' || cc == '\'' || cc == '<' || cc == '=')
       
   897                 parseError();
       
   898             m_token->appendToAttributeName(cc);
       
   899             ADVANCE_TO(AttributeNameState);
       
   900         }
       
   901     }
       
   902     END_STATE()
       
   903 
       
   904     BEGIN_STATE(AfterAttributeNameState) {
       
   905         if (isTokenizerWhitespace(cc))
       
   906             ADVANCE_TO(AfterAttributeNameState);
       
   907         else if (cc == '/')
       
   908             ADVANCE_TO(SelfClosingStartTagState);
       
   909         else if (cc == '=')
       
   910             ADVANCE_TO(BeforeAttributeValueState);
       
   911         else if (cc == '>')
       
   912             return emitAndResumeIn(source, DataState);
       
   913         else if (isASCIIUpper(cc)) {
       
   914             m_token->addNewAttribute();
       
   915             m_token->appendToAttributeName(toLowerCase(cc));
       
   916             ADVANCE_TO(AttributeNameState);
       
   917         } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
       
   918             parseError();
       
   919             RECONSUME_IN(DataState);
       
   920         } else {
       
   921             if (cc == '"' || cc == '\'' || cc == '<')
       
   922                 parseError();
       
   923             m_token->addNewAttribute();
       
   924             m_token->appendToAttributeName(cc);
       
   925             ADVANCE_TO(AttributeNameState);
       
   926         }
       
   927     }
       
   928     END_STATE()
       
   929 
       
   930     BEGIN_STATE(BeforeAttributeValueState) {
       
   931         if (isTokenizerWhitespace(cc))
       
   932             ADVANCE_TO(BeforeAttributeValueState);
       
   933         else if (cc == '"')
       
   934             ADVANCE_TO(AttributeValueDoubleQuotedState);
       
   935         else if (cc == '&')
       
   936             RECONSUME_IN(AttributeValueUnquotedState);
       
   937         else if (cc == '\'')
       
   938             ADVANCE_TO(AttributeValueSingleQuotedState);
       
   939         else if (cc == '>') {
       
   940             parseError();
       
   941             return emitAndResumeIn(source, DataState);
       
   942         } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
       
   943             parseError();
       
   944             RECONSUME_IN(DataState);
       
   945         } else {
       
   946             if (cc == '<' || cc == '=' || cc == '`')
       
   947                 parseError();
       
   948             m_token->appendToAttributeValue(cc);
       
   949             ADVANCE_TO(AttributeValueUnquotedState);
       
   950         }
       
   951     }
       
   952     END_STATE()
       
   953 
       
   954     BEGIN_STATE(AttributeValueDoubleQuotedState) {
       
   955         if (cc == '"')
       
   956             ADVANCE_TO(AfterAttributeValueQuotedState);
       
   957         else if (cc == '&') {
       
   958             m_additionalAllowedCharacter = '"';
       
   959             ADVANCE_TO(CharacterReferenceInAttributeValueState);
       
   960         } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
       
   961             parseError();
       
   962             RECONSUME_IN(DataState);
       
   963         } else {
       
   964             m_token->appendToAttributeValue(cc);
       
   965             ADVANCE_TO(AttributeValueDoubleQuotedState);
       
   966         }
       
   967     }
       
   968     END_STATE()
       
   969 
       
   970     BEGIN_STATE(AttributeValueSingleQuotedState) {
       
   971         if (cc == '\'')
       
   972             ADVANCE_TO(AfterAttributeValueQuotedState);
       
   973         else if (cc == '&') {
       
   974             m_additionalAllowedCharacter = '\'';
       
   975             ADVANCE_TO(CharacterReferenceInAttributeValueState);
       
   976         } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
       
   977             parseError();
       
   978             RECONSUME_IN(DataState);
       
   979         } else {
       
   980             m_token->appendToAttributeValue(cc);
       
   981             ADVANCE_TO(AttributeValueSingleQuotedState);
       
   982         }
       
   983     }
       
   984     END_STATE()
       
   985 
       
   986     BEGIN_STATE(AttributeValueUnquotedState) {
       
   987         if (isTokenizerWhitespace(cc))
       
   988             ADVANCE_TO(BeforeAttributeNameState);
       
   989         else if (cc == '&') {
       
   990             m_additionalAllowedCharacter = '>';
       
   991             ADVANCE_TO(CharacterReferenceInAttributeValueState);
       
   992         } else if (cc == '>')
       
   993             return emitAndResumeIn(source, DataState);
       
   994         else if (cc == InputStreamPreprocessor::endOfFileMarker) {
       
   995             parseError();
       
   996             RECONSUME_IN(DataState);
       
   997         } else {
       
   998             if (cc == '"' || cc == '\'' || cc == '<' || cc == '=' || cc == '`')
       
   999                 parseError();
       
  1000             m_token->appendToAttributeValue(cc);
       
  1001             ADVANCE_TO(AttributeValueUnquotedState);
       
  1002         }
       
  1003     }
       
  1004     END_STATE()
       
  1005 
       
  1006     BEGIN_STATE(CharacterReferenceInAttributeValueState) {
       
  1007         bool notEnoughCharacters = false;
       
  1008         unsigned value = consumeHTMLEntity(source, notEnoughCharacters, m_additionalAllowedCharacter);
       
  1009         if (notEnoughCharacters)
       
  1010             return shouldEmitBufferedCharacterToken(source);
       
  1011         if (!value)
       
  1012             m_token->appendToAttributeValue('&');
       
  1013         else if (value < 0xFFFF)
       
  1014             m_token->appendToAttributeValue(value);
       
  1015         else {
       
  1016             m_token->appendToAttributeValue(U16_LEAD(value));
       
  1017             m_token->appendToAttributeValue(U16_TRAIL(value));
       
  1018         }
       
  1019         // We're supposed to switch back to the attribute value state that
       
  1020         // we were in when we were switched into this state.  Rather than
       
  1021         // keeping track of this explictly, we observe that the previous
       
  1022         // state can be determined by m_additionalAllowedCharacter.
       
  1023         if (m_additionalAllowedCharacter == '"')
       
  1024             SWITCH_TO(AttributeValueDoubleQuotedState);
       
  1025         else if (m_additionalAllowedCharacter == '\'')
       
  1026             SWITCH_TO(AttributeValueSingleQuotedState);
       
  1027         else if (m_additionalAllowedCharacter == '>')
       
  1028             SWITCH_TO(AttributeValueUnquotedState);
       
  1029         else
       
  1030             ASSERT_NOT_REACHED();
       
  1031     }
       
  1032     END_STATE()
       
  1033 
       
  1034     BEGIN_STATE(AfterAttributeValueQuotedState) {
       
  1035         if (isTokenizerWhitespace(cc))
       
  1036             ADVANCE_TO(BeforeAttributeNameState);
       
  1037         else if (cc == '/')
       
  1038             ADVANCE_TO(SelfClosingStartTagState);
       
  1039         else if (cc == '>')
       
  1040             return emitAndResumeIn(source, DataState);
       
  1041         else if (cc == InputStreamPreprocessor::endOfFileMarker) {
       
  1042             parseError();
       
  1043             RECONSUME_IN(DataState);
       
  1044         } else {
       
  1045             parseError();
       
  1046             RECONSUME_IN(BeforeAttributeNameState);
       
  1047         }
       
  1048     }
       
  1049     END_STATE()
       
  1050 
       
  1051     BEGIN_STATE(SelfClosingStartTagState) {
       
  1052         if (cc == '>') {
       
  1053             m_token->setSelfClosing();
       
  1054             return emitAndResumeIn(source, DataState);
       
  1055         } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
       
  1056             parseError();
       
  1057             RECONSUME_IN(DataState);
       
  1058         } else {
       
  1059             parseError();
       
  1060             RECONSUME_IN(BeforeAttributeNameState);
       
  1061         }
       
  1062     }
       
  1063     END_STATE()
       
  1064 
       
  1065     BEGIN_STATE(BogusCommentState) {
       
  1066         m_token->beginComment();
       
  1067         RECONSUME_IN(ContinueBogusCommentState);
       
  1068     }
       
  1069     END_STATE()
       
  1070 
       
  1071     BEGIN_STATE(ContinueBogusCommentState) {
       
  1072         if (cc == '>')
       
  1073             return emitAndResumeIn(source, DataState);
       
  1074         else if (cc == InputStreamPreprocessor::endOfFileMarker)
       
  1075             return emitAndReconsumeIn(source, DataState);
       
  1076         else {
       
  1077             m_token->appendToComment(cc);
       
  1078             ADVANCE_TO(ContinueBogusCommentState);
       
  1079         }
       
  1080     }
       
  1081     END_STATE()
       
  1082 
       
  1083     BEGIN_STATE(MarkupDeclarationOpenState) {
       
  1084         DEFINE_STATIC_LOCAL(String, dashDashString, ("--"));
       
  1085         DEFINE_STATIC_LOCAL(String, doctypeString, ("doctype"));
       
  1086         if (cc == '-') {
       
  1087             SegmentedString::LookAheadResult result = source.lookAhead(dashDashString);
       
  1088             if (result == SegmentedString::DidMatch) {
       
  1089                 source.advanceAndASSERT('-');
       
  1090                 source.advanceAndASSERT('-');
       
  1091                 m_token->beginComment();
       
  1092                 SWITCH_TO(CommentStartState);
       
  1093             } else if (result == SegmentedString::NotEnoughCharacters)
       
  1094                 return shouldEmitBufferedCharacterToken(source);
       
  1095         } else if (cc == 'D' || cc == 'd') {
       
  1096             SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(doctypeString);
       
  1097             if (result == SegmentedString::DidMatch) {
       
  1098                 advanceStringAndASSERTIgnoringCase(source, "doctype");
       
  1099                 SWITCH_TO(DOCTYPEState);
       
  1100             } else if (result == SegmentedString::NotEnoughCharacters)
       
  1101                 return shouldEmitBufferedCharacterToken(source);
       
  1102         }
       
  1103         notImplemented();
       
  1104         // FIXME: We're still missing the bits about the insertion mode being in foreign content:
       
  1105         // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#markup-declaration-open-state
       
  1106         parseError();
       
  1107         RECONSUME_IN(BogusCommentState);
       
  1108     }
       
  1109     END_STATE()
       
  1110 
       
  1111     BEGIN_STATE(CommentStartState) {
       
  1112         if (cc == '-')
       
  1113             ADVANCE_TO(CommentStartDashState);
       
  1114         else if (cc == '>') {
       
  1115             parseError();
       
  1116             return emitAndResumeIn(source, DataState);
       
  1117         } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
       
  1118             parseError();
       
  1119             return emitAndReconsumeIn(source, DataState);
       
  1120         } else {
       
  1121             m_token->appendToComment(cc);
       
  1122             ADVANCE_TO(CommentState);
       
  1123         }
       
  1124     }
       
  1125     END_STATE()
       
  1126 
       
  1127     BEGIN_STATE(CommentStartDashState) {
       
  1128         if (cc == '-')
       
  1129             ADVANCE_TO(CommentEndState);
       
  1130         else if (cc == '>') {
       
  1131             parseError();
       
  1132             return emitAndResumeIn(source, DataState);
       
  1133         } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
       
  1134             parseError();
       
  1135             return emitAndReconsumeIn(source, DataState);
       
  1136         } else {
       
  1137             m_token->appendToComment('-');
       
  1138             m_token->appendToComment(cc);
       
  1139             ADVANCE_TO(CommentState);
       
  1140         }
       
  1141     }
       
  1142     END_STATE()
       
  1143 
       
  1144     BEGIN_STATE(CommentState) {
       
  1145         if (cc == '-')
       
  1146             ADVANCE_TO(CommentEndDashState);
       
  1147         else if (cc == InputStreamPreprocessor::endOfFileMarker) {
       
  1148             parseError();
       
  1149             return emitAndReconsumeIn(source, DataState);
       
  1150         } else {
       
  1151             m_token->appendToComment(cc);
       
  1152             ADVANCE_TO(CommentState);
       
  1153         }
       
  1154     }
       
  1155     END_STATE()
       
  1156 
       
  1157     BEGIN_STATE(CommentEndDashState) {
       
  1158         if (cc == '-')
       
  1159             ADVANCE_TO(CommentEndState);
       
  1160         else if (cc == InputStreamPreprocessor::endOfFileMarker) {
       
  1161             parseError();
       
  1162             return emitAndReconsumeIn(source, DataState);
       
  1163         } else {
       
  1164             m_token->appendToComment('-');
       
  1165             m_token->appendToComment(cc);
       
  1166             ADVANCE_TO(CommentState);
       
  1167         }
       
  1168     }
       
  1169     END_STATE()
       
  1170 
       
  1171     BEGIN_STATE(CommentEndState) {
       
  1172         if (cc == '>')
       
  1173             return emitAndResumeIn(source, DataState);
       
  1174         else if (isTokenizerWhitespace(cc)) {
       
  1175             parseError();
       
  1176             m_token->appendToComment('-');
       
  1177             m_token->appendToComment('-');
       
  1178             m_token->appendToComment(cc);
       
  1179             ADVANCE_TO(CommentEndSpaceState);
       
  1180         } else if (cc == '!') {
       
  1181             parseError();
       
  1182             ADVANCE_TO(CommentEndBangState);
       
  1183         } else if (cc == '-') {
       
  1184             parseError();
       
  1185             m_token->appendToComment('-');
       
  1186             ADVANCE_TO(CommentEndState);
       
  1187         } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
       
  1188             parseError();
       
  1189             return emitAndReconsumeIn(source, DataState);
       
  1190         } else {
       
  1191             parseError();
       
  1192             m_token->appendToComment('-');
       
  1193             m_token->appendToComment('-');
       
  1194             m_token->appendToComment(cc);
       
  1195             ADVANCE_TO(CommentState);
       
  1196         }
       
  1197     }
       
  1198     END_STATE()
       
  1199 
       
  1200     BEGIN_STATE(CommentEndBangState) {
       
  1201         if (cc == '-') {
       
  1202             m_token->appendToComment('-');
       
  1203             m_token->appendToComment('-');
       
  1204             m_token->appendToComment('!');
       
  1205             ADVANCE_TO(CommentEndDashState);
       
  1206         } else if (cc == '>')
       
  1207             return emitAndResumeIn(source, DataState);
       
  1208         else if (cc == InputStreamPreprocessor::endOfFileMarker) {
       
  1209             parseError();
       
  1210             return emitAndReconsumeIn(source, DataState);
       
  1211         } else {
       
  1212             m_token->appendToComment('-');
       
  1213             m_token->appendToComment('-');
       
  1214             m_token->appendToComment('!');
       
  1215             m_token->appendToComment(cc);
       
  1216             ADVANCE_TO(CommentState);
       
  1217         }
       
  1218     }
       
  1219     END_STATE()
       
  1220 
       
  1221     BEGIN_STATE(CommentEndSpaceState) {
       
  1222         if (isTokenizerWhitespace(cc)) {
       
  1223             m_token->appendToComment(cc);
       
  1224             ADVANCE_TO(CommentEndSpaceState);
       
  1225         } else if (cc == '-')
       
  1226             ADVANCE_TO(CommentEndDashState);
       
  1227         else if (cc == '>')
       
  1228             return emitAndResumeIn(source, DataState);
       
  1229         else if (cc == InputStreamPreprocessor::endOfFileMarker) {
       
  1230             parseError();
       
  1231             return emitAndReconsumeIn(source, DataState);
       
  1232         } else {
       
  1233             m_token->appendToComment(cc);
       
  1234             ADVANCE_TO(CommentState);
       
  1235         }
       
  1236     }
       
  1237     END_STATE()
       
  1238 
       
  1239     BEGIN_STATE(DOCTYPEState) {
       
  1240         if (isTokenizerWhitespace(cc))
       
  1241             ADVANCE_TO(BeforeDOCTYPENameState);
       
  1242         else if (cc == InputStreamPreprocessor::endOfFileMarker) {
       
  1243             parseError();
       
  1244             m_token->beginDOCTYPE();
       
  1245             m_token->setForceQuirks();
       
  1246             return emitAndReconsumeIn(source, DataState);
       
  1247         } else {
       
  1248             parseError();
       
  1249             RECONSUME_IN(BeforeDOCTYPENameState);
       
  1250         }
       
  1251     }
       
  1252     END_STATE()
       
  1253 
       
  1254     BEGIN_STATE(BeforeDOCTYPENameState) {
       
  1255         if (isTokenizerWhitespace(cc))
       
  1256             ADVANCE_TO(BeforeDOCTYPENameState);
       
  1257         else if (isASCIIUpper(cc)) {
       
  1258             m_token->beginDOCTYPE(toLowerCase(cc));
       
  1259             ADVANCE_TO(DOCTYPENameState);
       
  1260         } else if (cc == '>') {
       
  1261             parseError();
       
  1262             m_token->beginDOCTYPE();
       
  1263             m_token->setForceQuirks();
       
  1264             return emitAndResumeIn(source, DataState);
       
  1265         } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
       
  1266             parseError();
       
  1267             m_token->beginDOCTYPE();
       
  1268             m_token->setForceQuirks();
       
  1269             return emitAndReconsumeIn(source, DataState);
       
  1270         } else {
       
  1271             m_token->beginDOCTYPE(cc);
       
  1272             ADVANCE_TO(DOCTYPENameState);
       
  1273         }
       
  1274     }
       
  1275     END_STATE()
       
  1276 
       
  1277     BEGIN_STATE(DOCTYPENameState) {
       
  1278         if (isTokenizerWhitespace(cc))
       
  1279             ADVANCE_TO(AfterDOCTYPENameState);
       
  1280         else if (cc == '>')
       
  1281             return emitAndResumeIn(source, DataState);
       
  1282         else if (isASCIIUpper(cc)) {
       
  1283             m_token->appendToName(toLowerCase(cc));
       
  1284             ADVANCE_TO(DOCTYPENameState);
       
  1285         } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
       
  1286             parseError();
       
  1287             m_token->setForceQuirks();
       
  1288             return emitAndReconsumeIn(source, DataState);
       
  1289         } else {
       
  1290             m_token->appendToName(cc);
       
  1291             ADVANCE_TO(DOCTYPENameState);
       
  1292         }
       
  1293     }
       
  1294     END_STATE()
       
  1295 
       
  1296     BEGIN_STATE(AfterDOCTYPENameState) {
       
  1297         if (isTokenizerWhitespace(cc))
       
  1298             ADVANCE_TO(AfterDOCTYPENameState);
       
  1299         if (cc == '>')
       
  1300             return emitAndResumeIn(source, DataState);
       
  1301         else if (cc == InputStreamPreprocessor::endOfFileMarker) {
       
  1302             parseError();
       
  1303             m_token->setForceQuirks();
       
  1304             return emitAndReconsumeIn(source, DataState);
       
  1305         } else {
       
  1306             DEFINE_STATIC_LOCAL(String, publicString, ("public"));
       
  1307             DEFINE_STATIC_LOCAL(String, systemString, ("system"));
       
  1308             if (cc == 'P' || cc == 'p') {
       
  1309                 SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(publicString);
       
  1310                 if (result == SegmentedString::DidMatch) {
       
  1311                     advanceStringAndASSERTIgnoringCase(source, "public");
       
  1312                     SWITCH_TO(AfterDOCTYPEPublicKeywordState);
       
  1313                 } else if (result == SegmentedString::NotEnoughCharacters)
       
  1314                     return shouldEmitBufferedCharacterToken(source);
       
  1315             } else if (cc == 'S' || cc == 's') {
       
  1316                 SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(systemString);
       
  1317                 if (result == SegmentedString::DidMatch) {
       
  1318                     advanceStringAndASSERTIgnoringCase(source, "system");
       
  1319                     SWITCH_TO(AfterDOCTYPESystemKeywordState);
       
  1320                 } else if (result == SegmentedString::NotEnoughCharacters)
       
  1321                     return shouldEmitBufferedCharacterToken(source);
       
  1322             }
       
  1323             parseError();
       
  1324             m_token->setForceQuirks();
       
  1325             ADVANCE_TO(BogusDOCTYPEState);
       
  1326         }
       
  1327     }
       
  1328     END_STATE()
       
  1329 
       
  1330     BEGIN_STATE(AfterDOCTYPEPublicKeywordState) {
       
  1331         if (isTokenizerWhitespace(cc))
       
  1332             ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState);
       
  1333         else if (cc == '"') {
       
  1334             parseError();
       
  1335             m_token->setPublicIdentifierToEmptyString();
       
  1336             ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
       
  1337         } else if (cc == '\'') {
       
  1338             parseError();
       
  1339             m_token->setPublicIdentifierToEmptyString();
       
  1340             ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
       
  1341         } else if (cc == '>') {
       
  1342             parseError();
       
  1343             m_token->setForceQuirks();
       
  1344             return emitAndResumeIn(source, DataState);
       
  1345         } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
       
  1346             parseError();
       
  1347             m_token->setForceQuirks();
       
  1348             return emitAndReconsumeIn(source, DataState);
       
  1349         } else {
       
  1350             parseError();
       
  1351             m_token->setForceQuirks();
       
  1352             ADVANCE_TO(BogusDOCTYPEState);
       
  1353         }
       
  1354     }
       
  1355     END_STATE()
       
  1356 
       
  1357     BEGIN_STATE(BeforeDOCTYPEPublicIdentifierState) {
       
  1358         if (isTokenizerWhitespace(cc))
       
  1359             ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState);
       
  1360         else if (cc == '"') {
       
  1361             m_token->setPublicIdentifierToEmptyString();
       
  1362             ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
       
  1363         } else if (cc == '\'') {
       
  1364             m_token->setPublicIdentifierToEmptyString();
       
  1365             ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
       
  1366         } else if (cc == '>') {
       
  1367             parseError();
       
  1368             m_token->setForceQuirks();
       
  1369             return emitAndResumeIn(source, DataState);
       
  1370         } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
       
  1371             parseError();
       
  1372             m_token->setForceQuirks();
       
  1373             return emitAndReconsumeIn(source, DataState);
       
  1374         } else {
       
  1375             parseError();
       
  1376             m_token->setForceQuirks();
       
  1377             ADVANCE_TO(BogusDOCTYPEState);
       
  1378         }
       
  1379     }
       
  1380     END_STATE()
       
  1381 
       
  1382     BEGIN_STATE(DOCTYPEPublicIdentifierDoubleQuotedState) {
       
  1383         if (cc == '"')
       
  1384             ADVANCE_TO(AfterDOCTYPEPublicIdentifierState);
       
  1385         else if (cc == '>') {
       
  1386             parseError();
       
  1387             m_token->setForceQuirks();
       
  1388             return emitAndResumeIn(source, DataState);
       
  1389         } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
       
  1390             parseError();
       
  1391             m_token->setForceQuirks();
       
  1392             return emitAndReconsumeIn(source, DataState);
       
  1393         } else {
       
  1394             m_token->appendToPublicIdentifier(cc);
       
  1395             ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
       
  1396         }
       
  1397     }
       
  1398     END_STATE()
       
  1399 
       
  1400     BEGIN_STATE(DOCTYPEPublicIdentifierSingleQuotedState) {
       
  1401         if (cc == '\'')
       
  1402             ADVANCE_TO(AfterDOCTYPEPublicIdentifierState);
       
  1403         else if (cc == '>') {
       
  1404             parseError();
       
  1405             m_token->setForceQuirks();
       
  1406             return emitAndResumeIn(source, DataState);
       
  1407         } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
       
  1408             parseError();
       
  1409             m_token->setForceQuirks();
       
  1410             return emitAndReconsumeIn(source, DataState);
       
  1411         } else {
       
  1412             m_token->appendToPublicIdentifier(cc);
       
  1413             ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
       
  1414         }
       
  1415     }
       
  1416     END_STATE()
       
  1417 
       
  1418     BEGIN_STATE(AfterDOCTYPEPublicIdentifierState) {
       
  1419         if (isTokenizerWhitespace(cc))
       
  1420             ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState);
       
  1421         else if (cc == '>')
       
  1422             return emitAndResumeIn(source, DataState);
       
  1423         else if (cc == '"') {
       
  1424             parseError();
       
  1425             m_token->setSystemIdentifierToEmptyString();
       
  1426             ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
       
  1427         } else if (cc == '\'') {
       
  1428             parseError();
       
  1429             m_token->setSystemIdentifierToEmptyString();
       
  1430             ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
       
  1431         } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
       
  1432             parseError();
       
  1433             m_token->setForceQuirks();
       
  1434             return emitAndReconsumeIn(source, DataState);
       
  1435         } else {
       
  1436             parseError();
       
  1437             m_token->setForceQuirks();
       
  1438             ADVANCE_TO(BogusDOCTYPEState);
       
  1439         }
       
  1440     }
       
  1441     END_STATE()
       
  1442 
       
  1443     BEGIN_STATE(BetweenDOCTYPEPublicAndSystemIdentifiersState) {
       
  1444         if (isTokenizerWhitespace(cc))
       
  1445             ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState);
       
  1446         else if (cc == '>')
       
  1447             return emitAndResumeIn(source, DataState);
       
  1448         else if (cc == '"') {
       
  1449             m_token->setSystemIdentifierToEmptyString();
       
  1450             ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
       
  1451         } else if (cc == '\'') {
       
  1452             m_token->setSystemIdentifierToEmptyString();
       
  1453             ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
       
  1454         } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
       
  1455             parseError();
       
  1456             m_token->setForceQuirks();
       
  1457             return emitAndReconsumeIn(source, DataState);
       
  1458         } else {
       
  1459             parseError();
       
  1460             m_token->setForceQuirks();
       
  1461             ADVANCE_TO(BogusDOCTYPEState);
       
  1462         }
       
  1463     }
       
  1464     END_STATE()
       
  1465 
       
  1466     BEGIN_STATE(AfterDOCTYPESystemKeywordState) {
       
  1467         if (isTokenizerWhitespace(cc))
       
  1468             ADVANCE_TO(BeforeDOCTYPESystemIdentifierState);
       
  1469         else if (cc == '"') {
       
  1470             parseError();
       
  1471             m_token->setSystemIdentifierToEmptyString();
       
  1472             ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
       
  1473         } else if (cc == '\'') {
       
  1474             parseError();
       
  1475             m_token->setSystemIdentifierToEmptyString();
       
  1476             ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
       
  1477         } else if (cc == '>') {
       
  1478             parseError();
       
  1479             m_token->setForceQuirks();
       
  1480             return emitAndResumeIn(source, DataState);
       
  1481         } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
       
  1482             parseError();
       
  1483             m_token->setForceQuirks();
       
  1484             return emitAndReconsumeIn(source, DataState);
       
  1485         } else {
       
  1486             parseError();
       
  1487             m_token->setForceQuirks();
       
  1488             ADVANCE_TO(BogusDOCTYPEState);
       
  1489         }
       
  1490     }
       
  1491     END_STATE()
       
  1492 
       
  1493     BEGIN_STATE(BeforeDOCTYPESystemIdentifierState) {
       
  1494         if (isTokenizerWhitespace(cc))
       
  1495             ADVANCE_TO(BeforeDOCTYPESystemIdentifierState);
       
  1496         if (cc == '"') {
       
  1497             m_token->setSystemIdentifierToEmptyString();
       
  1498             ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
       
  1499         } else if (cc == '\'') {
       
  1500             m_token->setSystemIdentifierToEmptyString();
       
  1501             ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
       
  1502         } else if (cc == '>') {
       
  1503             parseError();
       
  1504             m_token->setForceQuirks();
       
  1505             return emitAndResumeIn(source, DataState);
       
  1506         } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
       
  1507             parseError();
       
  1508             m_token->setForceQuirks();
       
  1509             return emitAndReconsumeIn(source, DataState);
       
  1510         } else {
       
  1511             parseError();
       
  1512             m_token->setForceQuirks();
       
  1513             ADVANCE_TO(BogusDOCTYPEState);
       
  1514         }
       
  1515     }
       
  1516     END_STATE()
       
  1517 
       
  1518     BEGIN_STATE(DOCTYPESystemIdentifierDoubleQuotedState) {
       
  1519         if (cc == '"')
       
  1520             ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
       
  1521         else if (cc == '>') {
       
  1522             parseError();
       
  1523             m_token->setForceQuirks();
       
  1524             return emitAndResumeIn(source, DataState);
       
  1525         } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
       
  1526             parseError();
       
  1527             m_token->setForceQuirks();
       
  1528             return emitAndReconsumeIn(source, DataState);
       
  1529         } else {
       
  1530             m_token->appendToSystemIdentifier(cc);
       
  1531             ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
       
  1532         }
       
  1533     }
       
  1534     END_STATE()
       
  1535 
       
  1536     BEGIN_STATE(DOCTYPESystemIdentifierSingleQuotedState) {
       
  1537         if (cc == '\'')
       
  1538             ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
       
  1539         else if (cc == '>') {
       
  1540             parseError();
       
  1541             m_token->setForceQuirks();
       
  1542             return emitAndResumeIn(source, DataState);
       
  1543         } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
       
  1544             parseError();
       
  1545             m_token->setForceQuirks();
       
  1546             return emitAndReconsumeIn(source, DataState);
       
  1547         } else {
       
  1548             m_token->appendToSystemIdentifier(cc);
       
  1549             ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
       
  1550         }
       
  1551     }
       
  1552     END_STATE()
       
  1553 
       
  1554     BEGIN_STATE(AfterDOCTYPESystemIdentifierState) {
       
  1555         if (isTokenizerWhitespace(cc))
       
  1556             ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
       
  1557         else if (cc == '>')
       
  1558             return emitAndResumeIn(source, DataState);
       
  1559         else if (cc == InputStreamPreprocessor::endOfFileMarker) {
       
  1560             parseError();
       
  1561             m_token->setForceQuirks();
       
  1562             return emitAndReconsumeIn(source, DataState);
       
  1563         } else {
       
  1564             parseError();
       
  1565             ADVANCE_TO(BogusDOCTYPEState);
       
  1566         }
       
  1567     }
       
  1568     END_STATE()
       
  1569 
       
  1570     BEGIN_STATE(BogusDOCTYPEState) {
       
  1571         if (cc == '>')
       
  1572             return emitAndResumeIn(source, DataState);
       
  1573         else if (cc == InputStreamPreprocessor::endOfFileMarker)
       
  1574             return emitAndReconsumeIn(source, DataState);
       
  1575         ADVANCE_TO(BogusDOCTYPEState);
       
  1576     }
       
  1577     END_STATE()
       
  1578 
       
  1579     BEGIN_STATE(CDATASectionState) {
       
  1580         notImplemented();
       
  1581         ADVANCE_TO(CDATASectionState);
       
  1582         // FIXME: Handle EOF properly.
       
  1583     }
       
  1584     END_STATE()
       
  1585 
       
  1586     }
       
  1587 
       
  1588     ASSERT_NOT_REACHED();
       
  1589     return false;
       
  1590 }
       
  1591 
       
  1592 inline bool HTMLTokenizer::temporaryBufferIs(const String& expectedString)
       
  1593 {
       
  1594     return vectorEqualsString(m_temporaryBuffer, expectedString);
       
  1595 }
       
  1596 
       
  1597 inline void HTMLTokenizer::addToPossibleEndTag(UChar cc)
       
  1598 {
       
  1599     ASSERT(isEndTagBufferingState(m_state));
       
  1600     m_bufferedEndTagName.append(cc);
       
  1601 }
       
  1602 
       
  1603 inline bool HTMLTokenizer::isAppropriateEndTag()
       
  1604 {
       
  1605     return m_bufferedEndTagName == m_appropriateEndTagName;
       
  1606 }
       
  1607 
       
  1608 inline void HTMLTokenizer::bufferCharacter(UChar character)
       
  1609 {
       
  1610     ASSERT(character != InputStreamPreprocessor::endOfFileMarker);
       
  1611     if (m_token->type() != HTMLToken::Character) {
       
  1612         m_token->beginCharacter(character);
       
  1613         return;
       
  1614     }
       
  1615     m_token->appendToCharacter(character);
       
  1616 }
       
  1617 
       
  1618 inline void HTMLTokenizer::bufferCodePoint(unsigned value)
       
  1619 {
       
  1620     if (value < 0xFFFF) {
       
  1621         bufferCharacter(value);
       
  1622         return;
       
  1623     }
       
  1624     bufferCharacter(U16_LEAD(value));
       
  1625     bufferCharacter(U16_TRAIL(value));
       
  1626 }
       
  1627 
       
  1628 inline void HTMLTokenizer::parseError()
       
  1629 {
       
  1630     notImplemented();
       
  1631 }
       
  1632 
       
  1633 inline bool HTMLTokenizer::shouldEmitBufferedCharacterToken(const SegmentedString& source)
       
  1634 {
       
  1635     return source.isClosed() && m_token->type() == HTMLToken::Character;
       
  1636 }
       
  1637 
       
  1638 }
       
  1639