|
1 /* |
|
2 * Copyright (C) 2008 Apple Inc. All Rights Reserved. |
|
3 * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/ |
|
4 * Copyright (C) 2010 Google, Inc. All Rights Reserved. |
|
5 * |
|
6 * Redistribution and use in source and binary forms, with or without |
|
7 * modification, are permitted provided that the following conditions |
|
8 * are met: |
|
9 * 1. Redistributions of source code must retain the above copyright |
|
10 * notice, this list of conditions and the following disclaimer. |
|
11 * 2. Redistributions in binary form must reproduce the above copyright |
|
12 * notice, this list of conditions and the following disclaimer in the |
|
13 * documentation and/or other materials provided with the distribution. |
|
14 * |
|
15 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
|
16 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
|
18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
|
19 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
|
20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
|
21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
|
22 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
|
23 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
|
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
|
25 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
26 */ |
|
27 |
|
28 #include "config.h" |
|
29 #include "HTMLTokenizer.h" |
|
30 |
|
31 #include "AtomicString.h" |
|
32 #include "HTMLEntityParser.h" |
|
33 #include "HTMLToken.h" |
|
34 #include "HTMLNames.h" |
|
35 #include "NotImplemented.h" |
|
36 #include <wtf/ASCIICType.h> |
|
37 #include <wtf/CurrentTime.h> |
|
38 #include <wtf/UnusedParam.h> |
|
39 #include <wtf/text/CString.h> |
|
40 #include <wtf/unicode/Unicode.h> |
|
41 |
|
42 using namespace WTF; |
|
43 |
|
44 namespace WebCore { |
|
45 |
|
46 using namespace HTMLNames; |
|
47 |
|
48 const UChar HTMLTokenizer::InputStreamPreprocessor::endOfFileMarker = 0; |
|
49 |
|
50 namespace { |
|
51 |
|
52 inline UChar toLowerCase(UChar cc) |
|
53 { |
|
54 ASSERT(isASCIIUpper(cc)); |
|
55 const int lowerCaseOffset = 0x20; |
|
56 return cc + lowerCaseOffset; |
|
57 } |
|
58 |
|
59 inline bool isTokenizerWhitespace(UChar cc) |
|
60 { |
|
61 return cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' '; |
|
62 } |
|
63 |
|
64 inline void advanceStringAndASSERTIgnoringCase(SegmentedString& source, const char* expectedCharacters) |
|
65 { |
|
66 while (*expectedCharacters) |
|
67 source.advanceAndASSERTIgnoringCase(*expectedCharacters++); |
|
68 } |
|
69 |
|
70 inline bool vectorEqualsString(const Vector<UChar, 32>& vector, const String& string) |
|
71 { |
|
72 if (vector.size() != string.length()) |
|
73 return false; |
|
74 const UChar* stringData = string.characters(); |
|
75 const UChar* vectorData = vector.data(); |
|
76 // FIXME: Is there a higher-level function we should be calling here? |
|
77 return !memcmp(stringData, vectorData, vector.size() * sizeof(UChar)); |
|
78 } |
|
79 |
|
80 inline bool isEndTagBufferingState(HTMLTokenizer::State state) |
|
81 { |
|
82 switch (state) { |
|
83 case HTMLTokenizer::RCDATAEndTagOpenState: |
|
84 case HTMLTokenizer::RCDATAEndTagNameState: |
|
85 case HTMLTokenizer::RAWTEXTEndTagOpenState: |
|
86 case HTMLTokenizer::RAWTEXTEndTagNameState: |
|
87 case HTMLTokenizer::ScriptDataEndTagOpenState: |
|
88 case HTMLTokenizer::ScriptDataEndTagNameState: |
|
89 case HTMLTokenizer::ScriptDataEscapedEndTagOpenState: |
|
90 case HTMLTokenizer::ScriptDataEscapedEndTagNameState: |
|
91 return true; |
|
92 default: |
|
93 return false; |
|
94 } |
|
95 } |
|
96 |
|
97 } |
|
98 |
|
99 HTMLTokenizer::HTMLTokenizer() |
|
100 { |
|
101 reset(); |
|
102 } |
|
103 |
|
104 HTMLTokenizer::~HTMLTokenizer() |
|
105 { |
|
106 } |
|
107 |
|
108 void HTMLTokenizer::reset() |
|
109 { |
|
110 m_state = DataState; |
|
111 m_token = 0; |
|
112 m_lineNumber = 0; |
|
113 m_skipLeadingNewLineForListing = false; |
|
114 m_additionalAllowedCharacter = '\0'; |
|
115 } |
|
116 |
|
117 inline bool HTMLTokenizer::processEntity(SegmentedString& source) |
|
118 { |
|
119 bool notEnoughCharacters = false; |
|
120 unsigned value = consumeHTMLEntity(source, notEnoughCharacters); |
|
121 if (notEnoughCharacters) |
|
122 return false; |
|
123 if (!value) |
|
124 bufferCharacter('&'); |
|
125 else |
|
126 bufferCodePoint(value); |
|
127 return true; |
|
128 } |
|
129 |
|
130 #if COMPILER(MSVC) |
|
131 // We need to disable the "unreachable code" warning because we want to assert |
|
132 // that some code points aren't reached in the state machine. |
|
133 #pragma warning(disable: 4702) |
|
134 #endif |
|
135 |
|
136 #define BEGIN_STATE(stateName) case stateName: stateName: |
|
137 #define END_STATE() ASSERT_NOT_REACHED(); break; |
|
138 |
|
139 // We use this macro when the HTML5 spec says "reconsume the current input |
|
140 // character in the <mumble> state." |
|
141 #define RECONSUME_IN(stateName) \ |
|
142 do { \ |
|
143 m_state = stateName; \ |
|
144 goto stateName; \ |
|
145 } while (false) |
|
146 |
|
147 // We use this macro when the HTML5 spec says "consume the next input |
|
148 // character ... and switch to the <mumble> state." |
|
149 #define ADVANCE_TO(stateName) \ |
|
150 do { \ |
|
151 m_state = stateName; \ |
|
152 if (!m_inputStreamPreprocessor.advance(source, m_lineNumber)) \ |
|
153 return shouldEmitBufferedCharacterToken(source); \ |
|
154 cc = m_inputStreamPreprocessor.nextInputCharacter(); \ |
|
155 goto stateName; \ |
|
156 } while (false) |
|
157 |
|
158 // Sometimes there's more complicated logic in the spec that separates when |
|
159 // we consume the next input character and when we switch to a particular |
|
160 // state. We handle those cases by advancing the source directly and using |
|
161 // this macro to switch to the indicated state. |
|
162 #define SWITCH_TO(stateName) \ |
|
163 do { \ |
|
164 m_state = stateName; \ |
|
165 if (source.isEmpty() || !m_inputStreamPreprocessor.peek(source, m_lineNumber)) \ |
|
166 return shouldEmitBufferedCharacterToken(source); \ |
|
167 cc = m_inputStreamPreprocessor.nextInputCharacter(); \ |
|
168 goto stateName; \ |
|
169 } while (false) |
|
170 |
|
171 |
|
172 inline void HTMLTokenizer::saveEndTagNameIfNeeded() |
|
173 { |
|
174 ASSERT(m_token->type() != HTMLToken::Uninitialized); |
|
175 if (m_token->type() == HTMLToken::StartTag) |
|
176 m_appropriateEndTagName = m_token->name(); |
|
177 } |
|
178 |
|
179 // We use this function when the HTML5 spec says "Emit the current <mumble> |
|
180 // token. Switch to the <mumble> state." We use the word "resume" instead of |
|
181 // switch to indicate that this macro actually returns and that we'll end up |
|
182 // in the state when we "resume" (i.e., are called again). |
|
183 bool HTMLTokenizer::emitAndResumeIn(SegmentedString& source, State state) |
|
184 { |
|
185 m_state = state; |
|
186 source.advance(m_lineNumber); |
|
187 saveEndTagNameIfNeeded(); |
|
188 return true; |
|
189 } |
|
190 |
|
191 // Identical to emitAndResumeIn, except does not advance. |
|
192 bool HTMLTokenizer::emitAndReconsumeIn(SegmentedString&, State state) |
|
193 { |
|
194 m_state = state; |
|
195 saveEndTagNameIfNeeded(); |
|
196 return true; |
|
197 } |
|
198 |
|
199 // Used to emit the EndOfFile token. |
|
200 // Check if we have buffered characters to emit first before emitting the EOF. |
|
201 bool HTMLTokenizer::emitEndOfFile(SegmentedString& source) |
|
202 { |
|
203 if (shouldEmitBufferedCharacterToken(source)) |
|
204 return true; |
|
205 m_state = DataState; |
|
206 source.advance(m_lineNumber); |
|
207 m_token->clear(); |
|
208 m_token->makeEndOfFile(); |
|
209 return true; |
|
210 } |
|
211 |
|
212 bool HTMLTokenizer::flushBufferedEndTag(SegmentedString& source) |
|
213 { |
|
214 ASSERT(m_token->type() == HTMLToken::Character || m_token->type() == HTMLToken::Uninitialized); |
|
215 source.advance(m_lineNumber); |
|
216 if (m_token->type() == HTMLToken::Character) |
|
217 return true; |
|
218 m_token->beginEndTag(m_bufferedEndTagName); |
|
219 m_bufferedEndTagName.clear(); |
|
220 return false; |
|
221 } |
|
222 |
|
223 #define FLUSH_AND_ADVANCE_TO(stateName) \ |
|
224 do { \ |
|
225 m_state = stateName; \ |
|
226 if (flushBufferedEndTag(source)) \ |
|
227 return true; \ |
|
228 if (source.isEmpty() \ |
|
229 || !m_inputStreamPreprocessor.peek(source, m_lineNumber)) \ |
|
230 return shouldEmitBufferedCharacterToken(source); \ |
|
231 cc = m_inputStreamPreprocessor.nextInputCharacter(); \ |
|
232 goto stateName; \ |
|
233 } while (false) |
|
234 |
|
235 bool HTMLTokenizer::flushEmitAndResumeIn(SegmentedString& source, State state) |
|
236 { |
|
237 m_state = state; |
|
238 flushBufferedEndTag(source); |
|
239 return true; |
|
240 } |
|
241 |
|
242 bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) |
|
243 { |
|
244 // If we have a token in progress, then we're supposed to be called back |
|
245 // with the same token so we can finish it. |
|
246 ASSERT(!m_token || m_token == &token || token.type() == HTMLToken::Uninitialized); |
|
247 m_token = &token; |
|
248 |
|
249 if (!m_bufferedEndTagName.isEmpty() && !isEndTagBufferingState(m_state)) { |
|
250 // FIXME: This should call flushBufferedEndTag(). |
|
251 // We started an end tag during our last iteration. |
|
252 m_token->beginEndTag(m_bufferedEndTagName); |
|
253 m_bufferedEndTagName.clear(); |
|
254 if (m_state == DataState) { |
|
255 // We're back in the data state, so we must be done with the tag. |
|
256 return true; |
|
257 } |
|
258 } |
|
259 |
|
260 if (source.isEmpty() || !m_inputStreamPreprocessor.peek(source, m_lineNumber)) |
|
261 return shouldEmitBufferedCharacterToken(source); |
|
262 UChar cc = m_inputStreamPreprocessor.nextInputCharacter(); |
|
263 |
|
264 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody |
|
265 // Note that this logic is different than the generic \r\n collapsing |
|
266 // handled in the input stream preprocessor. This logic is here as an |
|
267 // "authoring convenience" so folks can write: |
|
268 // |
|
269 // <pre> |
|
270 // lorem ipsum |
|
271 // lorem ipsum |
|
272 // </pre> |
|
273 // |
|
274 // without getting an extra newline at the start of their <pre> element. |
|
275 if (m_skipLeadingNewLineForListing) { |
|
276 m_skipLeadingNewLineForListing = false; |
|
277 if (cc == '\n') { |
|
278 if (m_state == DataState) |
|
279 ADVANCE_TO(DataState); |
|
280 if (m_state == RCDATAState) |
|
281 ADVANCE_TO(RCDATAState); |
|
282 ASSERT_NOT_REACHED(); |
|
283 } |
|
284 } |
|
285 |
|
286 // Source: http://www.whatwg.org/specs/web-apps/current-work/#tokenisation0 |
|
287 switch (m_state) { |
|
288 BEGIN_STATE(DataState) { |
|
289 if (cc == '&') |
|
290 ADVANCE_TO(CharacterReferenceInDataState); |
|
291 else if (cc == '<') { |
|
292 if (m_token->type() == HTMLToken::Character) { |
|
293 // We have a bunch of character tokens queued up that we |
|
294 // are emitting lazily here. |
|
295 return true; |
|
296 } |
|
297 ADVANCE_TO(TagOpenState); |
|
298 } else if (cc == InputStreamPreprocessor::endOfFileMarker) |
|
299 return emitEndOfFile(source); |
|
300 else { |
|
301 bufferCharacter(cc); |
|
302 ADVANCE_TO(DataState); |
|
303 } |
|
304 } |
|
305 END_STATE() |
|
306 |
|
307 BEGIN_STATE(CharacterReferenceInDataState) { |
|
308 if (!processEntity(source)) |
|
309 return shouldEmitBufferedCharacterToken(source); |
|
310 SWITCH_TO(DataState); |
|
311 } |
|
312 END_STATE() |
|
313 |
|
314 BEGIN_STATE(RCDATAState) { |
|
315 if (cc == '&') |
|
316 ADVANCE_TO(CharacterReferenceInRCDATAState); |
|
317 else if (cc == '<') |
|
318 ADVANCE_TO(RCDATALessThanSignState); |
|
319 else if (cc == InputStreamPreprocessor::endOfFileMarker) |
|
320 return emitEndOfFile(source); |
|
321 else { |
|
322 bufferCharacter(cc); |
|
323 ADVANCE_TO(RCDATAState); |
|
324 } |
|
325 } |
|
326 END_STATE() |
|
327 |
|
328 BEGIN_STATE(CharacterReferenceInRCDATAState) { |
|
329 if (!processEntity(source)) |
|
330 return shouldEmitBufferedCharacterToken(source); |
|
331 SWITCH_TO(RCDATAState); |
|
332 } |
|
333 END_STATE() |
|
334 |
|
335 BEGIN_STATE(RAWTEXTState) { |
|
336 if (cc == '<') |
|
337 ADVANCE_TO(RAWTEXTLessThanSignState); |
|
338 else if (cc == InputStreamPreprocessor::endOfFileMarker) |
|
339 return emitEndOfFile(source); |
|
340 else { |
|
341 bufferCharacter(cc); |
|
342 ADVANCE_TO(RAWTEXTState); |
|
343 } |
|
344 } |
|
345 END_STATE() |
|
346 |
|
347 BEGIN_STATE(ScriptDataState) { |
|
348 if (cc == '<') |
|
349 ADVANCE_TO(ScriptDataLessThanSignState); |
|
350 else if (cc == InputStreamPreprocessor::endOfFileMarker) |
|
351 return emitEndOfFile(source); |
|
352 else { |
|
353 bufferCharacter(cc); |
|
354 ADVANCE_TO(ScriptDataState); |
|
355 } |
|
356 } |
|
357 END_STATE() |
|
358 |
|
359 BEGIN_STATE(PLAINTEXTState) { |
|
360 if (cc == InputStreamPreprocessor::endOfFileMarker) |
|
361 return emitEndOfFile(source); |
|
362 else |
|
363 bufferCharacter(cc); |
|
364 ADVANCE_TO(PLAINTEXTState); |
|
365 } |
|
366 END_STATE() |
|
367 |
|
368 BEGIN_STATE(TagOpenState) { |
|
369 if (cc == '!') |
|
370 ADVANCE_TO(MarkupDeclarationOpenState); |
|
371 else if (cc == '/') |
|
372 ADVANCE_TO(EndTagOpenState); |
|
373 else if (isASCIIUpper(cc)) { |
|
374 m_token->beginStartTag(toLowerCase(cc)); |
|
375 ADVANCE_TO(TagNameState); |
|
376 } else if (isASCIILower(cc)) { |
|
377 m_token->beginStartTag(cc); |
|
378 ADVANCE_TO(TagNameState); |
|
379 } else if (cc == '?') { |
|
380 parseError(); |
|
381 // The spec consumes the current character before switching |
|
382 // to the bogus comment state, but it's easier to implement |
|
383 // if we reconsume the current character. |
|
384 RECONSUME_IN(BogusCommentState); |
|
385 } else { |
|
386 parseError(); |
|
387 bufferCharacter('<'); |
|
388 RECONSUME_IN(DataState); |
|
389 } |
|
390 } |
|
391 END_STATE() |
|
392 |
|
393 BEGIN_STATE(EndTagOpenState) { |
|
394 if (isASCIIUpper(cc)) { |
|
395 m_token->beginEndTag(toLowerCase(cc)); |
|
396 ADVANCE_TO(TagNameState); |
|
397 } else if (isASCIILower(cc)) { |
|
398 m_token->beginEndTag(cc); |
|
399 ADVANCE_TO(TagNameState); |
|
400 } else if (cc == '>') { |
|
401 parseError(); |
|
402 ADVANCE_TO(DataState); |
|
403 } else if (cc == InputStreamPreprocessor::endOfFileMarker) { |
|
404 parseError(); |
|
405 bufferCharacter('<'); |
|
406 bufferCharacter('/'); |
|
407 RECONSUME_IN(DataState); |
|
408 } else { |
|
409 parseError(); |
|
410 RECONSUME_IN(BogusCommentState); |
|
411 } |
|
412 } |
|
413 END_STATE() |
|
414 |
|
415 BEGIN_STATE(TagNameState) { |
|
416 if (isTokenizerWhitespace(cc)) |
|
417 ADVANCE_TO(BeforeAttributeNameState); |
|
418 else if (cc == '/') |
|
419 ADVANCE_TO(SelfClosingStartTagState); |
|
420 else if (cc == '>') |
|
421 return emitAndResumeIn(source, DataState); |
|
422 else if (isASCIIUpper(cc)) { |
|
423 m_token->appendToName(toLowerCase(cc)); |
|
424 ADVANCE_TO(TagNameState); |
|
425 } if (cc == InputStreamPreprocessor::endOfFileMarker) { |
|
426 parseError(); |
|
427 RECONSUME_IN(DataState); |
|
428 } else { |
|
429 m_token->appendToName(cc); |
|
430 ADVANCE_TO(TagNameState); |
|
431 } |
|
432 } |
|
433 END_STATE() |
|
434 |
|
435 BEGIN_STATE(RCDATALessThanSignState) { |
|
436 if (cc == '/') { |
|
437 m_temporaryBuffer.clear(); |
|
438 ASSERT(m_bufferedEndTagName.isEmpty()); |
|
439 ADVANCE_TO(RCDATAEndTagOpenState); |
|
440 } else { |
|
441 bufferCharacter('<'); |
|
442 RECONSUME_IN(RCDATAState); |
|
443 } |
|
444 } |
|
445 END_STATE() |
|
446 |
|
447 BEGIN_STATE(RCDATAEndTagOpenState) { |
|
448 if (isASCIIUpper(cc)) { |
|
449 m_temporaryBuffer.append(cc); |
|
450 addToPossibleEndTag(toLowerCase(cc)); |
|
451 ADVANCE_TO(RCDATAEndTagNameState); |
|
452 } else if (isASCIILower(cc)) { |
|
453 m_temporaryBuffer.append(cc); |
|
454 addToPossibleEndTag(cc); |
|
455 ADVANCE_TO(RCDATAEndTagNameState); |
|
456 } else { |
|
457 bufferCharacter('<'); |
|
458 bufferCharacter('/'); |
|
459 RECONSUME_IN(RCDATAState); |
|
460 } |
|
461 } |
|
462 END_STATE() |
|
463 |
|
464 BEGIN_STATE(RCDATAEndTagNameState) { |
|
465 if (isASCIIUpper(cc)) { |
|
466 m_temporaryBuffer.append(cc); |
|
467 addToPossibleEndTag(toLowerCase(cc)); |
|
468 ADVANCE_TO(RCDATAEndTagNameState); |
|
469 } else if (isASCIILower(cc)) { |
|
470 m_temporaryBuffer.append(cc); |
|
471 addToPossibleEndTag(cc); |
|
472 ADVANCE_TO(RCDATAEndTagNameState); |
|
473 } else { |
|
474 if (isTokenizerWhitespace(cc)) { |
|
475 if (isAppropriateEndTag()) |
|
476 FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState); |
|
477 } else if (cc == '/') { |
|
478 if (isAppropriateEndTag()) |
|
479 FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState); |
|
480 } else if (cc == '>') { |
|
481 if (isAppropriateEndTag()) |
|
482 return flushEmitAndResumeIn(source, DataState); |
|
483 } |
|
484 bufferCharacter('<'); |
|
485 bufferCharacter('/'); |
|
486 m_token->appendToCharacter(m_temporaryBuffer); |
|
487 m_bufferedEndTagName.clear(); |
|
488 RECONSUME_IN(RCDATAState); |
|
489 } |
|
490 } |
|
491 END_STATE() |
|
492 |
|
493 BEGIN_STATE(RAWTEXTLessThanSignState) { |
|
494 if (cc == '/') { |
|
495 m_temporaryBuffer.clear(); |
|
496 ASSERT(m_bufferedEndTagName.isEmpty()); |
|
497 ADVANCE_TO(RAWTEXTEndTagOpenState); |
|
498 } else { |
|
499 bufferCharacter('<'); |
|
500 RECONSUME_IN(RAWTEXTState); |
|
501 } |
|
502 } |
|
503 END_STATE() |
|
504 |
|
505 BEGIN_STATE(RAWTEXTEndTagOpenState) { |
|
506 if (isASCIIUpper(cc)) { |
|
507 m_temporaryBuffer.append(cc); |
|
508 addToPossibleEndTag(toLowerCase(cc)); |
|
509 ADVANCE_TO(RAWTEXTEndTagNameState); |
|
510 } else if (isASCIILower(cc)) { |
|
511 m_temporaryBuffer.append(cc); |
|
512 addToPossibleEndTag(cc); |
|
513 ADVANCE_TO(RAWTEXTEndTagNameState); |
|
514 } else { |
|
515 bufferCharacter('<'); |
|
516 bufferCharacter('/'); |
|
517 RECONSUME_IN(RAWTEXTState); |
|
518 } |
|
519 } |
|
520 END_STATE() |
|
521 |
|
522 BEGIN_STATE(RAWTEXTEndTagNameState) { |
|
523 if (isASCIIUpper(cc)) { |
|
524 m_temporaryBuffer.append(cc); |
|
525 addToPossibleEndTag(toLowerCase(cc)); |
|
526 ADVANCE_TO(RAWTEXTEndTagNameState); |
|
527 } else if (isASCIILower(cc)) { |
|
528 m_temporaryBuffer.append(cc); |
|
529 addToPossibleEndTag(cc); |
|
530 ADVANCE_TO(RAWTEXTEndTagNameState); |
|
531 } else { |
|
532 if (isTokenizerWhitespace(cc)) { |
|
533 if (isAppropriateEndTag()) |
|
534 FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState); |
|
535 } else if (cc == '/') { |
|
536 if (isAppropriateEndTag()) |
|
537 FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState); |
|
538 } else if (cc == '>') { |
|
539 if (isAppropriateEndTag()) |
|
540 return flushEmitAndResumeIn(source, DataState); |
|
541 } |
|
542 bufferCharacter('<'); |
|
543 bufferCharacter('/'); |
|
544 m_token->appendToCharacter(m_temporaryBuffer); |
|
545 m_bufferedEndTagName.clear(); |
|
546 RECONSUME_IN(RAWTEXTState); |
|
547 } |
|
548 } |
|
549 END_STATE() |
|
550 |
|
551 BEGIN_STATE(ScriptDataLessThanSignState) { |
|
552 if (cc == '/') { |
|
553 m_temporaryBuffer.clear(); |
|
554 ASSERT(m_bufferedEndTagName.isEmpty()); |
|
555 ADVANCE_TO(ScriptDataEndTagOpenState); |
|
556 } else if (cc == '!') { |
|
557 bufferCharacter('<'); |
|
558 bufferCharacter('!'); |
|
559 ADVANCE_TO(ScriptDataEscapeStartState); |
|
560 } else { |
|
561 bufferCharacter('<'); |
|
562 RECONSUME_IN(ScriptDataState); |
|
563 } |
|
564 } |
|
565 END_STATE() |
|
566 |
|
567 BEGIN_STATE(ScriptDataEndTagOpenState) { |
|
568 if (isASCIIUpper(cc)) { |
|
569 m_temporaryBuffer.append(cc); |
|
570 addToPossibleEndTag(toLowerCase(cc)); |
|
571 ADVANCE_TO(ScriptDataEndTagNameState); |
|
572 } else if (isASCIILower(cc)) { |
|
573 m_temporaryBuffer.append(cc); |
|
574 addToPossibleEndTag(cc); |
|
575 ADVANCE_TO(ScriptDataEndTagNameState); |
|
576 } else { |
|
577 bufferCharacter('<'); |
|
578 bufferCharacter('/'); |
|
579 RECONSUME_IN(ScriptDataState); |
|
580 } |
|
581 } |
|
582 END_STATE() |
|
583 |
|
584 BEGIN_STATE(ScriptDataEndTagNameState) { |
|
585 if (isASCIIUpper(cc)) { |
|
586 m_temporaryBuffer.append(cc); |
|
587 addToPossibleEndTag(toLowerCase(cc)); |
|
588 ADVANCE_TO(ScriptDataEndTagNameState); |
|
589 } else if (isASCIILower(cc)) { |
|
590 m_temporaryBuffer.append(cc); |
|
591 addToPossibleEndTag(cc); |
|
592 ADVANCE_TO(ScriptDataEndTagNameState); |
|
593 } else { |
|
594 if (isTokenizerWhitespace(cc)) { |
|
595 if (isAppropriateEndTag()) |
|
596 FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState); |
|
597 } else if (cc == '/') { |
|
598 if (isAppropriateEndTag()) |
|
599 FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState); |
|
600 } else if (cc == '>') { |
|
601 if (isAppropriateEndTag()) |
|
602 return flushEmitAndResumeIn(source, DataState); |
|
603 } |
|
604 bufferCharacter('<'); |
|
605 bufferCharacter('/'); |
|
606 m_token->appendToCharacter(m_temporaryBuffer); |
|
607 m_bufferedEndTagName.clear(); |
|
608 RECONSUME_IN(ScriptDataState); |
|
609 } |
|
610 } |
|
611 END_STATE() |
|
612 |
|
613 BEGIN_STATE(ScriptDataEscapeStartState) { |
|
614 if (cc == '-') { |
|
615 bufferCharacter(cc); |
|
616 ADVANCE_TO(ScriptDataEscapeStartDashState); |
|
617 } else |
|
618 RECONSUME_IN(ScriptDataState); |
|
619 } |
|
620 END_STATE() |
|
621 |
|
622 BEGIN_STATE(ScriptDataEscapeStartDashState) { |
|
623 if (cc == '-') { |
|
624 bufferCharacter(cc); |
|
625 ADVANCE_TO(ScriptDataEscapedDashDashState); |
|
626 } else |
|
627 RECONSUME_IN(ScriptDataState); |
|
628 } |
|
629 END_STATE() |
|
630 |
|
631 BEGIN_STATE(ScriptDataEscapedState) { |
|
632 if (cc == '-') { |
|
633 bufferCharacter(cc); |
|
634 ADVANCE_TO(ScriptDataEscapedDashState); |
|
635 } else if (cc == '<') |
|
636 ADVANCE_TO(ScriptDataEscapedLessThanSignState); |
|
637 else if (cc == InputStreamPreprocessor::endOfFileMarker) { |
|
638 parseError(); |
|
639 RECONSUME_IN(DataState); |
|
640 } else { |
|
641 bufferCharacter(cc); |
|
642 ADVANCE_TO(ScriptDataEscapedState); |
|
643 } |
|
644 } |
|
645 END_STATE() |
|
646 |
|
647 BEGIN_STATE(ScriptDataEscapedDashState) { |
|
648 if (cc == '-') { |
|
649 bufferCharacter(cc); |
|
650 ADVANCE_TO(ScriptDataEscapedDashDashState); |
|
651 } else if (cc == '<') |
|
652 ADVANCE_TO(ScriptDataEscapedLessThanSignState); |
|
653 else if (cc == InputStreamPreprocessor::endOfFileMarker) { |
|
654 parseError(); |
|
655 RECONSUME_IN(DataState); |
|
656 } else { |
|
657 bufferCharacter(cc); |
|
658 ADVANCE_TO(ScriptDataEscapedState); |
|
659 } |
|
660 } |
|
661 END_STATE() |
|
662 |
|
663 BEGIN_STATE(ScriptDataEscapedDashDashState) { |
|
664 if (cc == '-') { |
|
665 bufferCharacter(cc); |
|
666 ADVANCE_TO(ScriptDataEscapedDashDashState); |
|
667 } else if (cc == '<') |
|
668 ADVANCE_TO(ScriptDataEscapedLessThanSignState); |
|
669 else if (cc == '>') { |
|
670 bufferCharacter(cc); |
|
671 ADVANCE_TO(ScriptDataState); |
|
672 } if (cc == InputStreamPreprocessor::endOfFileMarker) { |
|
673 parseError(); |
|
674 RECONSUME_IN(DataState); |
|
675 } else { |
|
676 bufferCharacter(cc); |
|
677 ADVANCE_TO(ScriptDataEscapedState); |
|
678 } |
|
679 } |
|
680 END_STATE() |
|
681 |
|
682 BEGIN_STATE(ScriptDataEscapedLessThanSignState) { |
|
683 if (cc == '/') { |
|
684 m_temporaryBuffer.clear(); |
|
685 ASSERT(m_bufferedEndTagName.isEmpty()); |
|
686 ADVANCE_TO(ScriptDataEscapedEndTagOpenState); |
|
687 } else if (isASCIIUpper(cc)) { |
|
688 bufferCharacter('<'); |
|
689 bufferCharacter(cc); |
|
690 m_temporaryBuffer.clear(); |
|
691 m_temporaryBuffer.append(toLowerCase(cc)); |
|
692 ADVANCE_TO(ScriptDataDoubleEscapeStartState); |
|
693 } else if (isASCIILower(cc)) { |
|
694 bufferCharacter('<'); |
|
695 bufferCharacter(cc); |
|
696 m_temporaryBuffer.clear(); |
|
697 m_temporaryBuffer.append(cc); |
|
698 ADVANCE_TO(ScriptDataDoubleEscapeStartState); |
|
699 } else { |
|
700 bufferCharacter('<'); |
|
701 RECONSUME_IN(ScriptDataEscapedState); |
|
702 } |
|
703 } |
|
704 END_STATE() |
|
705 |
|
706 BEGIN_STATE(ScriptDataEscapedEndTagOpenState) { |
|
707 if (isASCIIUpper(cc)) { |
|
708 m_temporaryBuffer.append(cc); |
|
709 addToPossibleEndTag(toLowerCase(cc)); |
|
710 ADVANCE_TO(ScriptDataEscapedEndTagNameState); |
|
711 } else if (isASCIILower(cc)) { |
|
712 m_temporaryBuffer.append(cc); |
|
713 addToPossibleEndTag(cc); |
|
714 ADVANCE_TO(ScriptDataEscapedEndTagNameState); |
|
715 } else { |
|
716 bufferCharacter('<'); |
|
717 bufferCharacter('/'); |
|
718 RECONSUME_IN(ScriptDataEscapedState); |
|
719 } |
|
720 } |
|
721 END_STATE() |
|
722 |
|
723 BEGIN_STATE(ScriptDataEscapedEndTagNameState) { |
|
724 if (isASCIIUpper(cc)) { |
|
725 m_temporaryBuffer.append(cc); |
|
726 addToPossibleEndTag(toLowerCase(cc)); |
|
727 ADVANCE_TO(ScriptDataEscapedEndTagNameState); |
|
728 } else if (isASCIILower(cc)) { |
|
729 m_temporaryBuffer.append(cc); |
|
730 addToPossibleEndTag(cc); |
|
731 ADVANCE_TO(ScriptDataEscapedEndTagNameState); |
|
732 } else { |
|
733 if (isTokenizerWhitespace(cc)) { |
|
734 if (isAppropriateEndTag()) |
|
735 FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState); |
|
736 } else if (cc == '/') { |
|
737 if (isAppropriateEndTag()) |
|
738 FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState); |
|
739 } else if (cc == '>') { |
|
740 if (isAppropriateEndTag()) |
|
741 return flushEmitAndResumeIn(source, DataState); |
|
742 } |
|
743 bufferCharacter('<'); |
|
744 bufferCharacter('/'); |
|
745 m_token->appendToCharacter(m_temporaryBuffer); |
|
746 m_bufferedEndTagName.clear(); |
|
747 RECONSUME_IN(ScriptDataEscapedState); |
|
748 } |
|
749 } |
|
750 END_STATE() |
|
751 |
|
752 BEGIN_STATE(ScriptDataDoubleEscapeStartState) { |
|
753 if (isTokenizerWhitespace(cc) || cc == '/' || cc == '>') { |
|
754 bufferCharacter(cc); |
|
755 if (temporaryBufferIs(scriptTag.localName())) |
|
756 ADVANCE_TO(ScriptDataDoubleEscapedState); |
|
757 else |
|
758 ADVANCE_TO(ScriptDataEscapedState); |
|
759 } else if (isASCIIUpper(cc)) { |
|
760 bufferCharacter(cc); |
|
761 m_temporaryBuffer.append(toLowerCase(cc)); |
|
762 ADVANCE_TO(ScriptDataDoubleEscapeStartState); |
|
763 } else if (isASCIILower(cc)) { |
|
764 bufferCharacter(cc); |
|
765 m_temporaryBuffer.append(cc); |
|
766 ADVANCE_TO(ScriptDataDoubleEscapeStartState); |
|
767 } else |
|
768 RECONSUME_IN(ScriptDataEscapedState); |
|
769 } |
|
770 END_STATE() |
|
771 |
|
772 BEGIN_STATE(ScriptDataDoubleEscapedState) { |
|
773 if (cc == '-') { |
|
774 bufferCharacter(cc); |
|
775 ADVANCE_TO(ScriptDataDoubleEscapedDashState); |
|
776 } else if (cc == '<') { |
|
777 bufferCharacter(cc); |
|
778 ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState); |
|
779 } else if (cc == InputStreamPreprocessor::endOfFileMarker) { |
|
780 parseError(); |
|
781 RECONSUME_IN(DataState); |
|
782 } else { |
|
783 bufferCharacter(cc); |
|
784 ADVANCE_TO(ScriptDataDoubleEscapedState); |
|
785 } |
|
786 } |
|
787 END_STATE() |
|
788 |
|
789 BEGIN_STATE(ScriptDataDoubleEscapedDashState) { |
|
790 if (cc == '-') { |
|
791 bufferCharacter(cc); |
|
792 ADVANCE_TO(ScriptDataDoubleEscapedDashDashState); |
|
793 } else if (cc == '<') { |
|
794 bufferCharacter(cc); |
|
795 ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState); |
|
796 } else if (cc == InputStreamPreprocessor::endOfFileMarker) { |
|
797 parseError(); |
|
798 RECONSUME_IN(DataState); |
|
799 } else { |
|
800 bufferCharacter(cc); |
|
801 ADVANCE_TO(ScriptDataDoubleEscapedState); |
|
802 } |
|
803 } |
|
804 END_STATE() |
|
805 |
|
806 BEGIN_STATE(ScriptDataDoubleEscapedDashDashState) { |
|
807 if (cc == '-') { |
|
808 bufferCharacter(cc); |
|
809 ADVANCE_TO(ScriptDataDoubleEscapedDashDashState); |
|
810 } else if (cc == '<') { |
|
811 bufferCharacter(cc); |
|
812 ADVANCE_TO(ScriptDataDoubleEscapedLessThanSignState); |
|
813 } else if (cc == '>') { |
|
814 bufferCharacter(cc); |
|
815 ADVANCE_TO(ScriptDataState); |
|
816 } else if (cc == InputStreamPreprocessor::endOfFileMarker) { |
|
817 parseError(); |
|
818 RECONSUME_IN(DataState); |
|
819 } else { |
|
820 bufferCharacter(cc); |
|
821 ADVANCE_TO(ScriptDataDoubleEscapedState); |
|
822 } |
|
823 } |
|
824 END_STATE() |
|
825 |
|
826 BEGIN_STATE(ScriptDataDoubleEscapedLessThanSignState) { |
|
827 if (cc == '/') { |
|
828 bufferCharacter(cc); |
|
829 m_temporaryBuffer.clear(); |
|
830 ADVANCE_TO(ScriptDataDoubleEscapeEndState); |
|
831 } else |
|
832 RECONSUME_IN(ScriptDataDoubleEscapedState); |
|
833 } |
|
834 END_STATE() |
|
835 |
|
836 BEGIN_STATE(ScriptDataDoubleEscapeEndState) { |
|
837 if (isTokenizerWhitespace(cc) || cc == '/' || cc == '>') { |
|
838 bufferCharacter(cc); |
|
839 if (temporaryBufferIs(scriptTag.localName())) |
|
840 ADVANCE_TO(ScriptDataEscapedState); |
|
841 else |
|
842 ADVANCE_TO(ScriptDataDoubleEscapedState); |
|
843 } else if (isASCIIUpper(cc)) { |
|
844 bufferCharacter(cc); |
|
845 m_temporaryBuffer.append(toLowerCase(cc)); |
|
846 ADVANCE_TO(ScriptDataDoubleEscapeEndState); |
|
847 } else if (isASCIILower(cc)) { |
|
848 bufferCharacter(cc); |
|
849 m_temporaryBuffer.append(cc); |
|
850 ADVANCE_TO(ScriptDataDoubleEscapeEndState); |
|
851 } else |
|
852 RECONSUME_IN(ScriptDataDoubleEscapedState); |
|
853 } |
|
854 END_STATE() |
|
855 |
|
856 BEGIN_STATE(BeforeAttributeNameState) { |
|
857 if (isTokenizerWhitespace(cc)) |
|
858 ADVANCE_TO(BeforeAttributeNameState); |
|
859 else if (cc == '/') |
|
860 ADVANCE_TO(SelfClosingStartTagState); |
|
861 else if (cc == '>') |
|
862 return emitAndResumeIn(source, DataState); |
|
863 else if (isASCIIUpper(cc)) { |
|
864 m_token->addNewAttribute(); |
|
865 m_token->appendToAttributeName(toLowerCase(cc)); |
|
866 ADVANCE_TO(AttributeNameState); |
|
867 } else if (cc == InputStreamPreprocessor::endOfFileMarker) { |
|
868 parseError(); |
|
869 RECONSUME_IN(DataState); |
|
870 } else { |
|
871 if (cc == '"' || cc == '\'' || cc == '<' || cc == '=') |
|
872 parseError(); |
|
873 m_token->addNewAttribute(); |
|
874 m_token->appendToAttributeName(cc); |
|
875 ADVANCE_TO(AttributeNameState); |
|
876 } |
|
877 } |
|
878 END_STATE() |
|
879 |
|
880 BEGIN_STATE(AttributeNameState) { |
|
881 if (isTokenizerWhitespace(cc)) |
|
882 ADVANCE_TO(AfterAttributeNameState); |
|
883 else if (cc == '/') |
|
884 ADVANCE_TO(SelfClosingStartTagState); |
|
885 else if (cc == '=') |
|
886 ADVANCE_TO(BeforeAttributeValueState); |
|
887 else if (cc == '>') |
|
888 return emitAndResumeIn(source, DataState); |
|
889 else if (isASCIIUpper(cc)) { |
|
890 m_token->appendToAttributeName(toLowerCase(cc)); |
|
891 ADVANCE_TO(AttributeNameState); |
|
892 } else if (cc == InputStreamPreprocessor::endOfFileMarker) { |
|
893 parseError(); |
|
894 RECONSUME_IN(DataState); |
|
895 } else { |
|
896 if (cc == '"' || cc == '\'' || cc == '<' || cc == '=') |
|
897 parseError(); |
|
898 m_token->appendToAttributeName(cc); |
|
899 ADVANCE_TO(AttributeNameState); |
|
900 } |
|
901 } |
|
902 END_STATE() |
|
903 |
|
904 BEGIN_STATE(AfterAttributeNameState) { |
|
905 if (isTokenizerWhitespace(cc)) |
|
906 ADVANCE_TO(AfterAttributeNameState); |
|
907 else if (cc == '/') |
|
908 ADVANCE_TO(SelfClosingStartTagState); |
|
909 else if (cc == '=') |
|
910 ADVANCE_TO(BeforeAttributeValueState); |
|
911 else if (cc == '>') |
|
912 return emitAndResumeIn(source, DataState); |
|
913 else if (isASCIIUpper(cc)) { |
|
914 m_token->addNewAttribute(); |
|
915 m_token->appendToAttributeName(toLowerCase(cc)); |
|
916 ADVANCE_TO(AttributeNameState); |
|
917 } else if (cc == InputStreamPreprocessor::endOfFileMarker) { |
|
918 parseError(); |
|
919 RECONSUME_IN(DataState); |
|
920 } else { |
|
921 if (cc == '"' || cc == '\'' || cc == '<') |
|
922 parseError(); |
|
923 m_token->addNewAttribute(); |
|
924 m_token->appendToAttributeName(cc); |
|
925 ADVANCE_TO(AttributeNameState); |
|
926 } |
|
927 } |
|
928 END_STATE() |
|
929 |
|
930 BEGIN_STATE(BeforeAttributeValueState) { |
|
931 if (isTokenizerWhitespace(cc)) |
|
932 ADVANCE_TO(BeforeAttributeValueState); |
|
933 else if (cc == '"') |
|
934 ADVANCE_TO(AttributeValueDoubleQuotedState); |
|
935 else if (cc == '&') |
|
936 RECONSUME_IN(AttributeValueUnquotedState); |
|
937 else if (cc == '\'') |
|
938 ADVANCE_TO(AttributeValueSingleQuotedState); |
|
939 else if (cc == '>') { |
|
940 parseError(); |
|
941 return emitAndResumeIn(source, DataState); |
|
942 } else if (cc == InputStreamPreprocessor::endOfFileMarker) { |
|
943 parseError(); |
|
944 RECONSUME_IN(DataState); |
|
945 } else { |
|
946 if (cc == '<' || cc == '=' || cc == '`') |
|
947 parseError(); |
|
948 m_token->appendToAttributeValue(cc); |
|
949 ADVANCE_TO(AttributeValueUnquotedState); |
|
950 } |
|
951 } |
|
952 END_STATE() |
|
953 |
|
954 BEGIN_STATE(AttributeValueDoubleQuotedState) { |
|
955 if (cc == '"') |
|
956 ADVANCE_TO(AfterAttributeValueQuotedState); |
|
957 else if (cc == '&') { |
|
958 m_additionalAllowedCharacter = '"'; |
|
959 ADVANCE_TO(CharacterReferenceInAttributeValueState); |
|
960 } else if (cc == InputStreamPreprocessor::endOfFileMarker) { |
|
961 parseError(); |
|
962 RECONSUME_IN(DataState); |
|
963 } else { |
|
964 m_token->appendToAttributeValue(cc); |
|
965 ADVANCE_TO(AttributeValueDoubleQuotedState); |
|
966 } |
|
967 } |
|
968 END_STATE() |
|
969 |
|
970 BEGIN_STATE(AttributeValueSingleQuotedState) { |
|
971 if (cc == '\'') |
|
972 ADVANCE_TO(AfterAttributeValueQuotedState); |
|
973 else if (cc == '&') { |
|
974 m_additionalAllowedCharacter = '\''; |
|
975 ADVANCE_TO(CharacterReferenceInAttributeValueState); |
|
976 } else if (cc == InputStreamPreprocessor::endOfFileMarker) { |
|
977 parseError(); |
|
978 RECONSUME_IN(DataState); |
|
979 } else { |
|
980 m_token->appendToAttributeValue(cc); |
|
981 ADVANCE_TO(AttributeValueSingleQuotedState); |
|
982 } |
|
983 } |
|
984 END_STATE() |
|
985 |
|
986 BEGIN_STATE(AttributeValueUnquotedState) { |
|
987 if (isTokenizerWhitespace(cc)) |
|
988 ADVANCE_TO(BeforeAttributeNameState); |
|
989 else if (cc == '&') { |
|
990 m_additionalAllowedCharacter = '>'; |
|
991 ADVANCE_TO(CharacterReferenceInAttributeValueState); |
|
992 } else if (cc == '>') |
|
993 return emitAndResumeIn(source, DataState); |
|
994 else if (cc == InputStreamPreprocessor::endOfFileMarker) { |
|
995 parseError(); |
|
996 RECONSUME_IN(DataState); |
|
997 } else { |
|
998 if (cc == '"' || cc == '\'' || cc == '<' || cc == '=' || cc == '`') |
|
999 parseError(); |
|
1000 m_token->appendToAttributeValue(cc); |
|
1001 ADVANCE_TO(AttributeValueUnquotedState); |
|
1002 } |
|
1003 } |
|
1004 END_STATE() |
|
1005 |
|
1006 BEGIN_STATE(CharacterReferenceInAttributeValueState) { |
|
1007 bool notEnoughCharacters = false; |
|
1008 unsigned value = consumeHTMLEntity(source, notEnoughCharacters, m_additionalAllowedCharacter); |
|
1009 if (notEnoughCharacters) |
|
1010 return shouldEmitBufferedCharacterToken(source); |
|
1011 if (!value) |
|
1012 m_token->appendToAttributeValue('&'); |
|
1013 else if (value < 0xFFFF) |
|
1014 m_token->appendToAttributeValue(value); |
|
1015 else { |
|
1016 m_token->appendToAttributeValue(U16_LEAD(value)); |
|
1017 m_token->appendToAttributeValue(U16_TRAIL(value)); |
|
1018 } |
|
1019 // We're supposed to switch back to the attribute value state that |
|
1020 // we were in when we were switched into this state. Rather than |
|
1021 // keeping track of this explictly, we observe that the previous |
|
1022 // state can be determined by m_additionalAllowedCharacter. |
|
1023 if (m_additionalAllowedCharacter == '"') |
|
1024 SWITCH_TO(AttributeValueDoubleQuotedState); |
|
1025 else if (m_additionalAllowedCharacter == '\'') |
|
1026 SWITCH_TO(AttributeValueSingleQuotedState); |
|
1027 else if (m_additionalAllowedCharacter == '>') |
|
1028 SWITCH_TO(AttributeValueUnquotedState); |
|
1029 else |
|
1030 ASSERT_NOT_REACHED(); |
|
1031 } |
|
1032 END_STATE() |
|
1033 |
|
1034 BEGIN_STATE(AfterAttributeValueQuotedState) { |
|
1035 if (isTokenizerWhitespace(cc)) |
|
1036 ADVANCE_TO(BeforeAttributeNameState); |
|
1037 else if (cc == '/') |
|
1038 ADVANCE_TO(SelfClosingStartTagState); |
|
1039 else if (cc == '>') |
|
1040 return emitAndResumeIn(source, DataState); |
|
1041 else if (cc == InputStreamPreprocessor::endOfFileMarker) { |
|
1042 parseError(); |
|
1043 RECONSUME_IN(DataState); |
|
1044 } else { |
|
1045 parseError(); |
|
1046 RECONSUME_IN(BeforeAttributeNameState); |
|
1047 } |
|
1048 } |
|
1049 END_STATE() |
|
1050 |
|
1051 BEGIN_STATE(SelfClosingStartTagState) { |
|
1052 if (cc == '>') { |
|
1053 m_token->setSelfClosing(); |
|
1054 return emitAndResumeIn(source, DataState); |
|
1055 } else if (cc == InputStreamPreprocessor::endOfFileMarker) { |
|
1056 parseError(); |
|
1057 RECONSUME_IN(DataState); |
|
1058 } else { |
|
1059 parseError(); |
|
1060 RECONSUME_IN(BeforeAttributeNameState); |
|
1061 } |
|
1062 } |
|
1063 END_STATE() |
|
1064 |
|
1065 BEGIN_STATE(BogusCommentState) { |
|
1066 m_token->beginComment(); |
|
1067 RECONSUME_IN(ContinueBogusCommentState); |
|
1068 } |
|
1069 END_STATE() |
|
1070 |
|
1071 BEGIN_STATE(ContinueBogusCommentState) { |
|
1072 if (cc == '>') |
|
1073 return emitAndResumeIn(source, DataState); |
|
1074 else if (cc == InputStreamPreprocessor::endOfFileMarker) |
|
1075 return emitAndReconsumeIn(source, DataState); |
|
1076 else { |
|
1077 m_token->appendToComment(cc); |
|
1078 ADVANCE_TO(ContinueBogusCommentState); |
|
1079 } |
|
1080 } |
|
1081 END_STATE() |
|
1082 |
|
1083 BEGIN_STATE(MarkupDeclarationOpenState) { |
|
1084 DEFINE_STATIC_LOCAL(String, dashDashString, ("--")); |
|
1085 DEFINE_STATIC_LOCAL(String, doctypeString, ("doctype")); |
|
1086 if (cc == '-') { |
|
1087 SegmentedString::LookAheadResult result = source.lookAhead(dashDashString); |
|
1088 if (result == SegmentedString::DidMatch) { |
|
1089 source.advanceAndASSERT('-'); |
|
1090 source.advanceAndASSERT('-'); |
|
1091 m_token->beginComment(); |
|
1092 SWITCH_TO(CommentStartState); |
|
1093 } else if (result == SegmentedString::NotEnoughCharacters) |
|
1094 return shouldEmitBufferedCharacterToken(source); |
|
1095 } else if (cc == 'D' || cc == 'd') { |
|
1096 SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(doctypeString); |
|
1097 if (result == SegmentedString::DidMatch) { |
|
1098 advanceStringAndASSERTIgnoringCase(source, "doctype"); |
|
1099 SWITCH_TO(DOCTYPEState); |
|
1100 } else if (result == SegmentedString::NotEnoughCharacters) |
|
1101 return shouldEmitBufferedCharacterToken(source); |
|
1102 } |
|
1103 notImplemented(); |
|
1104 // FIXME: We're still missing the bits about the insertion mode being in foreign content: |
|
1105 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#markup-declaration-open-state |
|
1106 parseError(); |
|
1107 RECONSUME_IN(BogusCommentState); |
|
1108 } |
|
1109 END_STATE() |
|
1110 |
|
1111 BEGIN_STATE(CommentStartState) { |
|
1112 if (cc == '-') |
|
1113 ADVANCE_TO(CommentStartDashState); |
|
1114 else if (cc == '>') { |
|
1115 parseError(); |
|
1116 return emitAndResumeIn(source, DataState); |
|
1117 } else if (cc == InputStreamPreprocessor::endOfFileMarker) { |
|
1118 parseError(); |
|
1119 return emitAndReconsumeIn(source, DataState); |
|
1120 } else { |
|
1121 m_token->appendToComment(cc); |
|
1122 ADVANCE_TO(CommentState); |
|
1123 } |
|
1124 } |
|
1125 END_STATE() |
|
1126 |
|
1127 BEGIN_STATE(CommentStartDashState) { |
|
1128 if (cc == '-') |
|
1129 ADVANCE_TO(CommentEndState); |
|
1130 else if (cc == '>') { |
|
1131 parseError(); |
|
1132 return emitAndResumeIn(source, DataState); |
|
1133 } else if (cc == InputStreamPreprocessor::endOfFileMarker) { |
|
1134 parseError(); |
|
1135 return emitAndReconsumeIn(source, DataState); |
|
1136 } else { |
|
1137 m_token->appendToComment('-'); |
|
1138 m_token->appendToComment(cc); |
|
1139 ADVANCE_TO(CommentState); |
|
1140 } |
|
1141 } |
|
1142 END_STATE() |
|
1143 |
|
1144 BEGIN_STATE(CommentState) { |
|
1145 if (cc == '-') |
|
1146 ADVANCE_TO(CommentEndDashState); |
|
1147 else if (cc == InputStreamPreprocessor::endOfFileMarker) { |
|
1148 parseError(); |
|
1149 return emitAndReconsumeIn(source, DataState); |
|
1150 } else { |
|
1151 m_token->appendToComment(cc); |
|
1152 ADVANCE_TO(CommentState); |
|
1153 } |
|
1154 } |
|
1155 END_STATE() |
|
1156 |
|
1157 BEGIN_STATE(CommentEndDashState) { |
|
1158 if (cc == '-') |
|
1159 ADVANCE_TO(CommentEndState); |
|
1160 else if (cc == InputStreamPreprocessor::endOfFileMarker) { |
|
1161 parseError(); |
|
1162 return emitAndReconsumeIn(source, DataState); |
|
1163 } else { |
|
1164 m_token->appendToComment('-'); |
|
1165 m_token->appendToComment(cc); |
|
1166 ADVANCE_TO(CommentState); |
|
1167 } |
|
1168 } |
|
1169 END_STATE() |
|
1170 |
|
1171 BEGIN_STATE(CommentEndState) { |
|
1172 if (cc == '>') |
|
1173 return emitAndResumeIn(source, DataState); |
|
1174 else if (isTokenizerWhitespace(cc)) { |
|
1175 parseError(); |
|
1176 m_token->appendToComment('-'); |
|
1177 m_token->appendToComment('-'); |
|
1178 m_token->appendToComment(cc); |
|
1179 ADVANCE_TO(CommentEndSpaceState); |
|
1180 } else if (cc == '!') { |
|
1181 parseError(); |
|
1182 ADVANCE_TO(CommentEndBangState); |
|
1183 } else if (cc == '-') { |
|
1184 parseError(); |
|
1185 m_token->appendToComment('-'); |
|
1186 ADVANCE_TO(CommentEndState); |
|
1187 } else if (cc == InputStreamPreprocessor::endOfFileMarker) { |
|
1188 parseError(); |
|
1189 return emitAndReconsumeIn(source, DataState); |
|
1190 } else { |
|
1191 parseError(); |
|
1192 m_token->appendToComment('-'); |
|
1193 m_token->appendToComment('-'); |
|
1194 m_token->appendToComment(cc); |
|
1195 ADVANCE_TO(CommentState); |
|
1196 } |
|
1197 } |
|
1198 END_STATE() |
|
1199 |
|
1200 BEGIN_STATE(CommentEndBangState) { |
|
1201 if (cc == '-') { |
|
1202 m_token->appendToComment('-'); |
|
1203 m_token->appendToComment('-'); |
|
1204 m_token->appendToComment('!'); |
|
1205 ADVANCE_TO(CommentEndDashState); |
|
1206 } else if (cc == '>') |
|
1207 return emitAndResumeIn(source, DataState); |
|
1208 else if (cc == InputStreamPreprocessor::endOfFileMarker) { |
|
1209 parseError(); |
|
1210 return emitAndReconsumeIn(source, DataState); |
|
1211 } else { |
|
1212 m_token->appendToComment('-'); |
|
1213 m_token->appendToComment('-'); |
|
1214 m_token->appendToComment('!'); |
|
1215 m_token->appendToComment(cc); |
|
1216 ADVANCE_TO(CommentState); |
|
1217 } |
|
1218 } |
|
1219 END_STATE() |
|
1220 |
|
1221 BEGIN_STATE(CommentEndSpaceState) { |
|
1222 if (isTokenizerWhitespace(cc)) { |
|
1223 m_token->appendToComment(cc); |
|
1224 ADVANCE_TO(CommentEndSpaceState); |
|
1225 } else if (cc == '-') |
|
1226 ADVANCE_TO(CommentEndDashState); |
|
1227 else if (cc == '>') |
|
1228 return emitAndResumeIn(source, DataState); |
|
1229 else if (cc == InputStreamPreprocessor::endOfFileMarker) { |
|
1230 parseError(); |
|
1231 return emitAndReconsumeIn(source, DataState); |
|
1232 } else { |
|
1233 m_token->appendToComment(cc); |
|
1234 ADVANCE_TO(CommentState); |
|
1235 } |
|
1236 } |
|
1237 END_STATE() |
|
1238 |
|
1239 BEGIN_STATE(DOCTYPEState) { |
|
1240 if (isTokenizerWhitespace(cc)) |
|
1241 ADVANCE_TO(BeforeDOCTYPENameState); |
|
1242 else if (cc == InputStreamPreprocessor::endOfFileMarker) { |
|
1243 parseError(); |
|
1244 m_token->beginDOCTYPE(); |
|
1245 m_token->setForceQuirks(); |
|
1246 return emitAndReconsumeIn(source, DataState); |
|
1247 } else { |
|
1248 parseError(); |
|
1249 RECONSUME_IN(BeforeDOCTYPENameState); |
|
1250 } |
|
1251 } |
|
1252 END_STATE() |
|
1253 |
|
1254 BEGIN_STATE(BeforeDOCTYPENameState) { |
|
1255 if (isTokenizerWhitespace(cc)) |
|
1256 ADVANCE_TO(BeforeDOCTYPENameState); |
|
1257 else if (isASCIIUpper(cc)) { |
|
1258 m_token->beginDOCTYPE(toLowerCase(cc)); |
|
1259 ADVANCE_TO(DOCTYPENameState); |
|
1260 } else if (cc == '>') { |
|
1261 parseError(); |
|
1262 m_token->beginDOCTYPE(); |
|
1263 m_token->setForceQuirks(); |
|
1264 return emitAndResumeIn(source, DataState); |
|
1265 } else if (cc == InputStreamPreprocessor::endOfFileMarker) { |
|
1266 parseError(); |
|
1267 m_token->beginDOCTYPE(); |
|
1268 m_token->setForceQuirks(); |
|
1269 return emitAndReconsumeIn(source, DataState); |
|
1270 } else { |
|
1271 m_token->beginDOCTYPE(cc); |
|
1272 ADVANCE_TO(DOCTYPENameState); |
|
1273 } |
|
1274 } |
|
1275 END_STATE() |
|
1276 |
|
1277 BEGIN_STATE(DOCTYPENameState) { |
|
1278 if (isTokenizerWhitespace(cc)) |
|
1279 ADVANCE_TO(AfterDOCTYPENameState); |
|
1280 else if (cc == '>') |
|
1281 return emitAndResumeIn(source, DataState); |
|
1282 else if (isASCIIUpper(cc)) { |
|
1283 m_token->appendToName(toLowerCase(cc)); |
|
1284 ADVANCE_TO(DOCTYPENameState); |
|
1285 } else if (cc == InputStreamPreprocessor::endOfFileMarker) { |
|
1286 parseError(); |
|
1287 m_token->setForceQuirks(); |
|
1288 return emitAndReconsumeIn(source, DataState); |
|
1289 } else { |
|
1290 m_token->appendToName(cc); |
|
1291 ADVANCE_TO(DOCTYPENameState); |
|
1292 } |
|
1293 } |
|
1294 END_STATE() |
|
1295 |
|
1296 BEGIN_STATE(AfterDOCTYPENameState) { |
|
1297 if (isTokenizerWhitespace(cc)) |
|
1298 ADVANCE_TO(AfterDOCTYPENameState); |
|
1299 if (cc == '>') |
|
1300 return emitAndResumeIn(source, DataState); |
|
1301 else if (cc == InputStreamPreprocessor::endOfFileMarker) { |
|
1302 parseError(); |
|
1303 m_token->setForceQuirks(); |
|
1304 return emitAndReconsumeIn(source, DataState); |
|
1305 } else { |
|
1306 DEFINE_STATIC_LOCAL(String, publicString, ("public")); |
|
1307 DEFINE_STATIC_LOCAL(String, systemString, ("system")); |
|
1308 if (cc == 'P' || cc == 'p') { |
|
1309 SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(publicString); |
|
1310 if (result == SegmentedString::DidMatch) { |
|
1311 advanceStringAndASSERTIgnoringCase(source, "public"); |
|
1312 SWITCH_TO(AfterDOCTYPEPublicKeywordState); |
|
1313 } else if (result == SegmentedString::NotEnoughCharacters) |
|
1314 return shouldEmitBufferedCharacterToken(source); |
|
1315 } else if (cc == 'S' || cc == 's') { |
|
1316 SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(systemString); |
|
1317 if (result == SegmentedString::DidMatch) { |
|
1318 advanceStringAndASSERTIgnoringCase(source, "system"); |
|
1319 SWITCH_TO(AfterDOCTYPESystemKeywordState); |
|
1320 } else if (result == SegmentedString::NotEnoughCharacters) |
|
1321 return shouldEmitBufferedCharacterToken(source); |
|
1322 } |
|
1323 parseError(); |
|
1324 m_token->setForceQuirks(); |
|
1325 ADVANCE_TO(BogusDOCTYPEState); |
|
1326 } |
|
1327 } |
|
1328 END_STATE() |
|
1329 |
|
1330 BEGIN_STATE(AfterDOCTYPEPublicKeywordState) { |
|
1331 if (isTokenizerWhitespace(cc)) |
|
1332 ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState); |
|
1333 else if (cc == '"') { |
|
1334 parseError(); |
|
1335 m_token->setPublicIdentifierToEmptyString(); |
|
1336 ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState); |
|
1337 } else if (cc == '\'') { |
|
1338 parseError(); |
|
1339 m_token->setPublicIdentifierToEmptyString(); |
|
1340 ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState); |
|
1341 } else if (cc == '>') { |
|
1342 parseError(); |
|
1343 m_token->setForceQuirks(); |
|
1344 return emitAndResumeIn(source, DataState); |
|
1345 } else if (cc == InputStreamPreprocessor::endOfFileMarker) { |
|
1346 parseError(); |
|
1347 m_token->setForceQuirks(); |
|
1348 return emitAndReconsumeIn(source, DataState); |
|
1349 } else { |
|
1350 parseError(); |
|
1351 m_token->setForceQuirks(); |
|
1352 ADVANCE_TO(BogusDOCTYPEState); |
|
1353 } |
|
1354 } |
|
1355 END_STATE() |
|
1356 |
|
1357 BEGIN_STATE(BeforeDOCTYPEPublicIdentifierState) { |
|
1358 if (isTokenizerWhitespace(cc)) |
|
1359 ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState); |
|
1360 else if (cc == '"') { |
|
1361 m_token->setPublicIdentifierToEmptyString(); |
|
1362 ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState); |
|
1363 } else if (cc == '\'') { |
|
1364 m_token->setPublicIdentifierToEmptyString(); |
|
1365 ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState); |
|
1366 } else if (cc == '>') { |
|
1367 parseError(); |
|
1368 m_token->setForceQuirks(); |
|
1369 return emitAndResumeIn(source, DataState); |
|
1370 } else if (cc == InputStreamPreprocessor::endOfFileMarker) { |
|
1371 parseError(); |
|
1372 m_token->setForceQuirks(); |
|
1373 return emitAndReconsumeIn(source, DataState); |
|
1374 } else { |
|
1375 parseError(); |
|
1376 m_token->setForceQuirks(); |
|
1377 ADVANCE_TO(BogusDOCTYPEState); |
|
1378 } |
|
1379 } |
|
1380 END_STATE() |
|
1381 |
|
1382 BEGIN_STATE(DOCTYPEPublicIdentifierDoubleQuotedState) { |
|
1383 if (cc == '"') |
|
1384 ADVANCE_TO(AfterDOCTYPEPublicIdentifierState); |
|
1385 else if (cc == '>') { |
|
1386 parseError(); |
|
1387 m_token->setForceQuirks(); |
|
1388 return emitAndResumeIn(source, DataState); |
|
1389 } else if (cc == InputStreamPreprocessor::endOfFileMarker) { |
|
1390 parseError(); |
|
1391 m_token->setForceQuirks(); |
|
1392 return emitAndReconsumeIn(source, DataState); |
|
1393 } else { |
|
1394 m_token->appendToPublicIdentifier(cc); |
|
1395 ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState); |
|
1396 } |
|
1397 } |
|
1398 END_STATE() |
|
1399 |
|
1400 BEGIN_STATE(DOCTYPEPublicIdentifierSingleQuotedState) { |
|
1401 if (cc == '\'') |
|
1402 ADVANCE_TO(AfterDOCTYPEPublicIdentifierState); |
|
1403 else if (cc == '>') { |
|
1404 parseError(); |
|
1405 m_token->setForceQuirks(); |
|
1406 return emitAndResumeIn(source, DataState); |
|
1407 } else if (cc == InputStreamPreprocessor::endOfFileMarker) { |
|
1408 parseError(); |
|
1409 m_token->setForceQuirks(); |
|
1410 return emitAndReconsumeIn(source, DataState); |
|
1411 } else { |
|
1412 m_token->appendToPublicIdentifier(cc); |
|
1413 ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState); |
|
1414 } |
|
1415 } |
|
1416 END_STATE() |
|
1417 |
|
1418 BEGIN_STATE(AfterDOCTYPEPublicIdentifierState) { |
|
1419 if (isTokenizerWhitespace(cc)) |
|
1420 ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState); |
|
1421 else if (cc == '>') |
|
1422 return emitAndResumeIn(source, DataState); |
|
1423 else if (cc == '"') { |
|
1424 parseError(); |
|
1425 m_token->setSystemIdentifierToEmptyString(); |
|
1426 ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState); |
|
1427 } else if (cc == '\'') { |
|
1428 parseError(); |
|
1429 m_token->setSystemIdentifierToEmptyString(); |
|
1430 ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState); |
|
1431 } else if (cc == InputStreamPreprocessor::endOfFileMarker) { |
|
1432 parseError(); |
|
1433 m_token->setForceQuirks(); |
|
1434 return emitAndReconsumeIn(source, DataState); |
|
1435 } else { |
|
1436 parseError(); |
|
1437 m_token->setForceQuirks(); |
|
1438 ADVANCE_TO(BogusDOCTYPEState); |
|
1439 } |
|
1440 } |
|
1441 END_STATE() |
|
1442 |
|
1443 BEGIN_STATE(BetweenDOCTYPEPublicAndSystemIdentifiersState) { |
|
1444 if (isTokenizerWhitespace(cc)) |
|
1445 ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState); |
|
1446 else if (cc == '>') |
|
1447 return emitAndResumeIn(source, DataState); |
|
1448 else if (cc == '"') { |
|
1449 m_token->setSystemIdentifierToEmptyString(); |
|
1450 ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState); |
|
1451 } else if (cc == '\'') { |
|
1452 m_token->setSystemIdentifierToEmptyString(); |
|
1453 ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState); |
|
1454 } else if (cc == InputStreamPreprocessor::endOfFileMarker) { |
|
1455 parseError(); |
|
1456 m_token->setForceQuirks(); |
|
1457 return emitAndReconsumeIn(source, DataState); |
|
1458 } else { |
|
1459 parseError(); |
|
1460 m_token->setForceQuirks(); |
|
1461 ADVANCE_TO(BogusDOCTYPEState); |
|
1462 } |
|
1463 } |
|
1464 END_STATE() |
|
1465 |
|
1466 BEGIN_STATE(AfterDOCTYPESystemKeywordState) { |
|
1467 if (isTokenizerWhitespace(cc)) |
|
1468 ADVANCE_TO(BeforeDOCTYPESystemIdentifierState); |
|
1469 else if (cc == '"') { |
|
1470 parseError(); |
|
1471 m_token->setSystemIdentifierToEmptyString(); |
|
1472 ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState); |
|
1473 } else if (cc == '\'') { |
|
1474 parseError(); |
|
1475 m_token->setSystemIdentifierToEmptyString(); |
|
1476 ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState); |
|
1477 } else if (cc == '>') { |
|
1478 parseError(); |
|
1479 m_token->setForceQuirks(); |
|
1480 return emitAndResumeIn(source, DataState); |
|
1481 } else if (cc == InputStreamPreprocessor::endOfFileMarker) { |
|
1482 parseError(); |
|
1483 m_token->setForceQuirks(); |
|
1484 return emitAndReconsumeIn(source, DataState); |
|
1485 } else { |
|
1486 parseError(); |
|
1487 m_token->setForceQuirks(); |
|
1488 ADVANCE_TO(BogusDOCTYPEState); |
|
1489 } |
|
1490 } |
|
1491 END_STATE() |
|
1492 |
|
1493 BEGIN_STATE(BeforeDOCTYPESystemIdentifierState) { |
|
1494 if (isTokenizerWhitespace(cc)) |
|
1495 ADVANCE_TO(BeforeDOCTYPESystemIdentifierState); |
|
1496 if (cc == '"') { |
|
1497 m_token->setSystemIdentifierToEmptyString(); |
|
1498 ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState); |
|
1499 } else if (cc == '\'') { |
|
1500 m_token->setSystemIdentifierToEmptyString(); |
|
1501 ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState); |
|
1502 } else if (cc == '>') { |
|
1503 parseError(); |
|
1504 m_token->setForceQuirks(); |
|
1505 return emitAndResumeIn(source, DataState); |
|
1506 } else if (cc == InputStreamPreprocessor::endOfFileMarker) { |
|
1507 parseError(); |
|
1508 m_token->setForceQuirks(); |
|
1509 return emitAndReconsumeIn(source, DataState); |
|
1510 } else { |
|
1511 parseError(); |
|
1512 m_token->setForceQuirks(); |
|
1513 ADVANCE_TO(BogusDOCTYPEState); |
|
1514 } |
|
1515 } |
|
1516 END_STATE() |
|
1517 |
|
1518 BEGIN_STATE(DOCTYPESystemIdentifierDoubleQuotedState) { |
|
1519 if (cc == '"') |
|
1520 ADVANCE_TO(AfterDOCTYPESystemIdentifierState); |
|
1521 else if (cc == '>') { |
|
1522 parseError(); |
|
1523 m_token->setForceQuirks(); |
|
1524 return emitAndResumeIn(source, DataState); |
|
1525 } else if (cc == InputStreamPreprocessor::endOfFileMarker) { |
|
1526 parseError(); |
|
1527 m_token->setForceQuirks(); |
|
1528 return emitAndReconsumeIn(source, DataState); |
|
1529 } else { |
|
1530 m_token->appendToSystemIdentifier(cc); |
|
1531 ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState); |
|
1532 } |
|
1533 } |
|
1534 END_STATE() |
|
1535 |
|
1536 BEGIN_STATE(DOCTYPESystemIdentifierSingleQuotedState) { |
|
1537 if (cc == '\'') |
|
1538 ADVANCE_TO(AfterDOCTYPESystemIdentifierState); |
|
1539 else if (cc == '>') { |
|
1540 parseError(); |
|
1541 m_token->setForceQuirks(); |
|
1542 return emitAndResumeIn(source, DataState); |
|
1543 } else if (cc == InputStreamPreprocessor::endOfFileMarker) { |
|
1544 parseError(); |
|
1545 m_token->setForceQuirks(); |
|
1546 return emitAndReconsumeIn(source, DataState); |
|
1547 } else { |
|
1548 m_token->appendToSystemIdentifier(cc); |
|
1549 ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState); |
|
1550 } |
|
1551 } |
|
1552 END_STATE() |
|
1553 |
|
1554 BEGIN_STATE(AfterDOCTYPESystemIdentifierState) { |
|
1555 if (isTokenizerWhitespace(cc)) |
|
1556 ADVANCE_TO(AfterDOCTYPESystemIdentifierState); |
|
1557 else if (cc == '>') |
|
1558 return emitAndResumeIn(source, DataState); |
|
1559 else if (cc == InputStreamPreprocessor::endOfFileMarker) { |
|
1560 parseError(); |
|
1561 m_token->setForceQuirks(); |
|
1562 return emitAndReconsumeIn(source, DataState); |
|
1563 } else { |
|
1564 parseError(); |
|
1565 ADVANCE_TO(BogusDOCTYPEState); |
|
1566 } |
|
1567 } |
|
1568 END_STATE() |
|
1569 |
|
1570 BEGIN_STATE(BogusDOCTYPEState) { |
|
1571 if (cc == '>') |
|
1572 return emitAndResumeIn(source, DataState); |
|
1573 else if (cc == InputStreamPreprocessor::endOfFileMarker) |
|
1574 return emitAndReconsumeIn(source, DataState); |
|
1575 ADVANCE_TO(BogusDOCTYPEState); |
|
1576 } |
|
1577 END_STATE() |
|
1578 |
|
1579 BEGIN_STATE(CDATASectionState) { |
|
1580 notImplemented(); |
|
1581 ADVANCE_TO(CDATASectionState); |
|
1582 // FIXME: Handle EOF properly. |
|
1583 } |
|
1584 END_STATE() |
|
1585 |
|
1586 } |
|
1587 |
|
1588 ASSERT_NOT_REACHED(); |
|
1589 return false; |
|
1590 } |
|
1591 |
|
1592 inline bool HTMLTokenizer::temporaryBufferIs(const String& expectedString) |
|
1593 { |
|
1594 return vectorEqualsString(m_temporaryBuffer, expectedString); |
|
1595 } |
|
1596 |
|
1597 inline void HTMLTokenizer::addToPossibleEndTag(UChar cc) |
|
1598 { |
|
1599 ASSERT(isEndTagBufferingState(m_state)); |
|
1600 m_bufferedEndTagName.append(cc); |
|
1601 } |
|
1602 |
|
1603 inline bool HTMLTokenizer::isAppropriateEndTag() |
|
1604 { |
|
1605 return m_bufferedEndTagName == m_appropriateEndTagName; |
|
1606 } |
|
1607 |
|
1608 inline void HTMLTokenizer::bufferCharacter(UChar character) |
|
1609 { |
|
1610 ASSERT(character != InputStreamPreprocessor::endOfFileMarker); |
|
1611 if (m_token->type() != HTMLToken::Character) { |
|
1612 m_token->beginCharacter(character); |
|
1613 return; |
|
1614 } |
|
1615 m_token->appendToCharacter(character); |
|
1616 } |
|
1617 |
|
1618 inline void HTMLTokenizer::bufferCodePoint(unsigned value) |
|
1619 { |
|
1620 if (value < 0xFFFF) { |
|
1621 bufferCharacter(value); |
|
1622 return; |
|
1623 } |
|
1624 bufferCharacter(U16_LEAD(value)); |
|
1625 bufferCharacter(U16_TRAIL(value)); |
|
1626 } |
|
1627 |
|
1628 inline void HTMLTokenizer::parseError() |
|
1629 { |
|
1630 notImplemented(); |
|
1631 } |
|
1632 |
|
1633 inline bool HTMLTokenizer::shouldEmitBufferedCharacterToken(const SegmentedString& source) |
|
1634 { |
|
1635 return source.isClosed() && m_token->type() == HTMLToken::Character; |
|
1636 } |
|
1637 |
|
1638 } |
|
1639 |