|
1 /* |
|
2 Copyright (C) 1997 Martin Jones (mjones@kde.org) |
|
3 (C) 1997 Torben Weis (weis@kde.org) |
|
4 (C) 1998 Waldo Bastian (bastian@kde.org) |
|
5 (C) 1999 Lars Knoll (knoll@kde.org) |
|
6 (C) 1999 Antti Koivisto (koivisto@kde.org) |
|
7 (C) 2001 Dirk Mueller (mueller@kde.org) |
|
8 Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. |
|
9 Copyright (C) 2005, 2006 Alexey Proskuryakov (ap@nypop.com) |
|
10 Copyright (C) 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/) |
|
11 |
|
12 This library is free software; you can redistribute it and/or |
|
13 modify it under the terms of the GNU Library General Public |
|
14 License as published by the Free Software Foundation; either |
|
15 version 2 of the License, or (at your option) any later version. |
|
16 |
|
17 This library is distributed in the hope that it will be useful, |
|
18 but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
20 Library General Public License for more details. |
|
21 |
|
22 You should have received a copy of the GNU Library General Public License |
|
23 along with this library; see the file COPYING.LIB. If not, write to |
|
24 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
|
25 Boston, MA 02110-1301, USA. |
|
26 */ |
|
27 |
|
28 #include "config.h" |
|
29 #include "LegacyHTMLDocumentParser.h" |
|
30 |
|
31 #include "Attribute.h" |
|
32 #include "CSSHelper.h" |
|
33 #include "Cache.h" |
|
34 #include "CachedScript.h" |
|
35 #include "DocLoader.h" |
|
36 #include "DocumentFragment.h" |
|
37 #include "Event.h" |
|
38 #include "EventNames.h" |
|
39 #include "Frame.h" |
|
40 #include "FrameLoader.h" |
|
41 #include "FrameView.h" |
|
42 #include "HTMLElement.h" |
|
43 #include "HTMLNames.h" |
|
44 #include "LegacyHTMLTreeBuilder.h" |
|
45 #include "HTMLScriptElement.h" |
|
46 #include "HTMLViewSourceDocument.h" |
|
47 #include "ImageLoader.h" |
|
48 #include "InspectorTimelineAgent.h" |
|
49 #include "Page.h" |
|
50 #include "LegacyPreloadScanner.h" |
|
51 #include "ScriptSourceCode.h" |
|
52 #include "ScriptValue.h" |
|
53 #include "XSSAuditor.h" |
|
54 #include <wtf/ASCIICType.h> |
|
55 #include <wtf/CurrentTime.h> |
|
56 |
|
57 #include "HTMLEntityNames.cpp" |
|
58 |
|
59 #define PRELOAD_SCANNER_ENABLED 1 |
|
60 |
|
61 using namespace WTF; |
|
62 using namespace std; |
|
63 |
|
64 namespace WebCore { |
|
65 |
|
66 using namespace HTMLNames; |
|
67 |
|
68 // This value is used to define how many loops (approximately tokens) |
|
69 // the parser will make before checking if it should yield. |
|
70 // To increase responsiveness reduce both ChunkSize and TimeDelay contants. |
|
71 static const int defaultTokenizerChunkSize = 4096; |
|
72 |
|
73 // FIXME: We would like this constant to be 200ms. |
|
74 // Yielding more aggressively results in increased responsiveness and better incremental rendering. |
|
75 // It slows down overall page-load on slower machines, though, so for now we set a value of 500. |
|
76 // TimeDelay controls the maximum time the parser will run before yielding. |
|
77 // Inline script execution can cause the parser to excede this limit. |
|
78 static const double defaultTokenizerTimeDelay = 0.500; |
|
79 |
|
80 static const char commentStart [] = "<!--"; |
|
81 static const char doctypeStart [] = "<!doctype"; |
|
82 static const char publicStart [] = "public"; |
|
83 static const char systemStart [] = "system"; |
|
84 static const char scriptEnd [] = "</script"; |
|
85 static const char xmpEnd [] = "</xmp"; |
|
86 static const char styleEnd [] = "</style"; |
|
87 static const char textareaEnd [] = "</textarea"; |
|
88 static const char titleEnd [] = "</title"; |
|
89 static const char iframeEnd [] = "</iframe"; |
|
90 |
|
91 // Full support for MS Windows extensions to Latin-1. |
|
92 // Technically these extensions should only be activated for pages |
|
93 // marked "windows-1252" or "cp1252", but |
|
94 // in the standard Microsoft way, these extensions infect hundreds of thousands |
|
95 // of web pages. Note that people with non-latin-1 Microsoft extensions |
|
96 // are SOL. |
|
97 // |
|
98 // See: http://www.microsoft.com/globaldev/reference/WinCP.asp |
|
99 // http://www.bbsinc.com/iso8859.html |
|
100 // http://www.obviously.com/ |
|
101 // |
|
102 // There may be better equivalents |
|
103 |
|
104 // We only need this for entities. For non-entity text, we handle this in the text encoding. |
|
105 |
|
106 static const UChar windowsLatin1ExtensionArray[32] = { |
|
107 0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, // 80-87 |
|
108 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008D, 0x017D, 0x008F, // 88-8F |
|
109 0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, // 90-97 |
|
110 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178 // 98-9F |
|
111 }; |
|
112 |
|
113 static inline UChar fixUpChar(UChar c) |
|
114 { |
|
115 if ((c & ~0x1F) != 0x0080) |
|
116 return c; |
|
117 return windowsLatin1ExtensionArray[c - 0x80]; |
|
118 } |
|
119 |
|
120 static inline bool tagMatch(const char* s1, const UChar* s2, unsigned length) |
|
121 { |
|
122 for (unsigned i = 0; i != length; ++i) { |
|
123 unsigned char c1 = s1[i]; |
|
124 unsigned char uc1 = toASCIIUpper(static_cast<char>(c1)); |
|
125 UChar c2 = s2[i]; |
|
126 if (c1 != c2 && uc1 != c2) |
|
127 return false; |
|
128 } |
|
129 return true; |
|
130 } |
|
131 |
|
132 inline void Token::addAttribute(AtomicString& attrName, const AtomicString& attributeValue, bool viewSourceMode) |
|
133 { |
|
134 if (!attrName.isEmpty()) { |
|
135 ASSERT(!attrName.contains('/')); |
|
136 RefPtr<Attribute> a = Attribute::createMapped(attrName, attributeValue); |
|
137 if (!attrs) { |
|
138 attrs = NamedNodeMap::create(); |
|
139 attrs->reserveInitialCapacity(10); |
|
140 } |
|
141 attrs->insertAttribute(a.release(), viewSourceMode); |
|
142 } |
|
143 |
|
144 attrName = emptyAtom; |
|
145 } |
|
146 |
|
147 // ---------------------------------------------------------------------------- |
|
148 |
|
149 LegacyHTMLDocumentParser::LegacyHTMLDocumentParser(HTMLDocument* document, bool reportErrors) |
|
150 : ScriptableDocumentParser(document) |
|
151 , m_buffer(0) |
|
152 , m_scriptCode(0) |
|
153 , m_scriptCodeSize(0) |
|
154 , m_scriptCodeCapacity(0) |
|
155 , m_scriptCodeResync(0) |
|
156 , m_executingScript(0) |
|
157 , m_requestingScript(false) |
|
158 , m_hasScriptsWaitingForStylesheets(false) |
|
159 , m_timer(this, &LegacyHTMLDocumentParser::timerFired) |
|
160 , m_externalScriptsTimer(this, &LegacyHTMLDocumentParser::executeExternalScriptsTimerFired) |
|
161 , m_treeBuilder(new LegacyHTMLTreeBuilder(document, reportErrors)) |
|
162 , m_inWrite(false) |
|
163 , m_fragment(false) |
|
164 , m_scriptingPermission(FragmentScriptingAllowed) |
|
165 { |
|
166 begin(); |
|
167 } |
|
168 |
|
169 LegacyHTMLDocumentParser::LegacyHTMLDocumentParser(HTMLViewSourceDocument* document) |
|
170 : ScriptableDocumentParser(document, true) |
|
171 , m_buffer(0) |
|
172 , m_scriptCode(0) |
|
173 , m_scriptCodeSize(0) |
|
174 , m_scriptCodeCapacity(0) |
|
175 , m_scriptCodeResync(0) |
|
176 , m_executingScript(0) |
|
177 , m_requestingScript(false) |
|
178 , m_hasScriptsWaitingForStylesheets(false) |
|
179 , m_timer(this, &LegacyHTMLDocumentParser::timerFired) |
|
180 , m_externalScriptsTimer(this, &LegacyHTMLDocumentParser::executeExternalScriptsTimerFired) |
|
181 , m_inWrite(false) |
|
182 , m_fragment(false) |
|
183 , m_scriptingPermission(FragmentScriptingAllowed) |
|
184 { |
|
185 begin(); |
|
186 } |
|
187 |
|
188 LegacyHTMLDocumentParser::LegacyHTMLDocumentParser(DocumentFragment* frag, FragmentScriptingPermission scriptingPermission) |
|
189 : ScriptableDocumentParser(frag->document()) |
|
190 , m_buffer(0) |
|
191 , m_scriptCode(0) |
|
192 , m_scriptCodeSize(0) |
|
193 , m_scriptCodeCapacity(0) |
|
194 , m_scriptCodeResync(0) |
|
195 , m_executingScript(0) |
|
196 , m_requestingScript(false) |
|
197 , m_hasScriptsWaitingForStylesheets(false) |
|
198 , m_timer(this, &LegacyHTMLDocumentParser::timerFired) |
|
199 , m_externalScriptsTimer(this, &LegacyHTMLDocumentParser::executeExternalScriptsTimerFired) |
|
200 , m_treeBuilder(new LegacyHTMLTreeBuilder(frag, scriptingPermission)) |
|
201 , m_inWrite(false) |
|
202 , m_fragment(true) |
|
203 , m_scriptingPermission(scriptingPermission) |
|
204 { |
|
205 begin(); |
|
206 } |
|
207 |
|
208 void LegacyHTMLDocumentParser::reset() |
|
209 { |
|
210 ASSERT(m_executingScript == 0); |
|
211 |
|
212 while (!m_pendingScripts.isEmpty()) { |
|
213 CachedResourceHandle<CachedScript> cs = m_pendingScripts.takeFirst(); |
|
214 ASSERT(cache()->disabled() || cs->accessCount() > 0); |
|
215 cs->removeClient(this); |
|
216 } |
|
217 |
|
218 fastFree(m_buffer); |
|
219 m_buffer = m_dest = 0; |
|
220 m_bufferSize = 0; |
|
221 |
|
222 fastFree(m_scriptCode); |
|
223 m_scriptCode = 0; |
|
224 m_scriptCodeSize = m_scriptCodeCapacity = m_scriptCodeResync = 0; |
|
225 |
|
226 m_timer.stop(); |
|
227 m_externalScriptsTimer.stop(); |
|
228 |
|
229 m_state.setAllowYield(false); |
|
230 m_state.setForceSynchronous(false); |
|
231 |
|
232 m_currentToken.reset(); |
|
233 m_doctypeToken.reset(); |
|
234 m_doctypeSearchCount = 0; |
|
235 m_doctypeSecondarySearchCount = 0; |
|
236 m_hasScriptsWaitingForStylesheets = false; |
|
237 } |
|
238 |
|
239 void LegacyHTMLDocumentParser::begin() |
|
240 { |
|
241 m_executingScript = 0; |
|
242 m_requestingScript = false; |
|
243 m_hasScriptsWaitingForStylesheets = false; |
|
244 m_state.setLoadingExtScript(false); |
|
245 reset(); |
|
246 m_bufferSize = 254; |
|
247 m_buffer = static_cast<UChar*>(fastMalloc(sizeof(UChar) * 254)); |
|
248 m_dest = m_buffer; |
|
249 tquote = NoQuote; |
|
250 searchCount = 0; |
|
251 m_state.setEntityState(NoEntity); |
|
252 m_scriptTagSrcAttrValue = String(); |
|
253 m_pendingSrc.clear(); |
|
254 m_currentPrependingSrc = 0; |
|
255 m_noMoreData = false; |
|
256 m_brokenComments = false; |
|
257 m_brokenServer = false; |
|
258 m_lineNumber = 0; |
|
259 m_currentScriptTagStartLineNumber = 0; |
|
260 m_currentTagStartLineNumber = 0; |
|
261 m_state.setForceSynchronous(false); |
|
262 |
|
263 Page* page = document()->page(); |
|
264 if (page && page->hasCustomHTMLTokenizerTimeDelay()) |
|
265 m_tokenizerTimeDelay = page->customHTMLTokenizerTimeDelay(); |
|
266 else |
|
267 m_tokenizerTimeDelay = defaultTokenizerTimeDelay; |
|
268 |
|
269 if (page && page->hasCustomHTMLTokenizerChunkSize()) |
|
270 m_tokenizerChunkSize = page->customHTMLTokenizerChunkSize(); |
|
271 else |
|
272 m_tokenizerChunkSize = defaultTokenizerChunkSize; |
|
273 } |
|
274 |
|
275 void LegacyHTMLDocumentParser::setForceSynchronous(bool force) |
|
276 { |
|
277 m_state.setForceSynchronous(force); |
|
278 } |
|
279 |
|
280 LegacyHTMLDocumentParser::State LegacyHTMLDocumentParser::processListing(SegmentedString list, State state) |
|
281 { |
|
282 // This function adds the listing 'list' as |
|
283 // preformatted text-tokens to the token-collection |
|
284 while (!list.isEmpty()) { |
|
285 if (state.skipLF()) { |
|
286 state.setSkipLF(false); |
|
287 if (*list == '\n') { |
|
288 list.advance(); |
|
289 continue; |
|
290 } |
|
291 } |
|
292 |
|
293 checkBuffer(); |
|
294 |
|
295 if (*list == '\n' || *list == '\r') { |
|
296 if (state.discardLF()) |
|
297 // Ignore this LF |
|
298 state.setDiscardLF(false); // We have discarded 1 LF |
|
299 else |
|
300 *m_dest++ = '\n'; |
|
301 |
|
302 /* Check for MS-DOS CRLF sequence */ |
|
303 if (*list == '\r') |
|
304 state.setSkipLF(true); |
|
305 |
|
306 list.advance(); |
|
307 } else { |
|
308 state.setDiscardLF(false); |
|
309 *m_dest++ = *list; |
|
310 list.advance(); |
|
311 } |
|
312 } |
|
313 |
|
314 return state; |
|
315 } |
|
316 |
|
317 LegacyHTMLDocumentParser::State LegacyHTMLDocumentParser::parseNonHTMLText(SegmentedString& src, State state) |
|
318 { |
|
319 ASSERT(state.inTextArea() || state.inTitle() || state.inIFrame() || !state.hasEntityState()); |
|
320 ASSERT(!state.hasTagState()); |
|
321 ASSERT(state.inXmp() + state.inTextArea() + state.inTitle() + state.inStyle() + state.inScript() + state.inIFrame() == 1); |
|
322 if (state.inScript() && !m_currentScriptTagStartLineNumber) |
|
323 m_currentScriptTagStartLineNumber = m_lineNumber; |
|
324 |
|
325 if (state.inComment()) |
|
326 state = parseComment(src, state); |
|
327 |
|
328 int lastDecodedEntityPosition = -1; |
|
329 while (!src.isEmpty()) { |
|
330 checkScriptBuffer(); |
|
331 UChar ch = *src; |
|
332 |
|
333 if (!m_scriptCodeResync && !m_brokenComments && |
|
334 !state.inXmp() && ch == '-' && m_scriptCodeSize >= 3 && !src.escaped() && |
|
335 m_scriptCode[m_scriptCodeSize - 3] == '<' && m_scriptCode[m_scriptCodeSize - 2] == '!' && m_scriptCode[m_scriptCodeSize - 1] == '-' && |
|
336 (lastDecodedEntityPosition < m_scriptCodeSize - 3)) { |
|
337 state.setInComment(true); |
|
338 state = parseComment(src, state); |
|
339 continue; |
|
340 } |
|
341 if (m_scriptCodeResync && !tquote && ch == '>') { |
|
342 src.advancePastNonNewline(); |
|
343 m_scriptCodeSize = m_scriptCodeResync - 1; |
|
344 m_scriptCodeResync = 0; |
|
345 m_scriptCode[m_scriptCodeSize] = m_scriptCode[m_scriptCodeSize + 1] = 0; |
|
346 if (state.inScript()) |
|
347 state = scriptHandler(state); |
|
348 else { |
|
349 state = processListing(SegmentedString(m_scriptCode, m_scriptCodeSize), state); |
|
350 processToken(); |
|
351 if (state.inStyle()) { |
|
352 m_currentToken.tagName = styleTag.localName(); |
|
353 m_currentToken.beginTag = false; |
|
354 } else if (state.inTextArea()) { |
|
355 m_currentToken.tagName = textareaTag.localName(); |
|
356 m_currentToken.beginTag = false; |
|
357 } else if (state.inTitle()) { |
|
358 m_currentToken.tagName = titleTag.localName(); |
|
359 m_currentToken.beginTag = false; |
|
360 } else if (state.inXmp()) { |
|
361 m_currentToken.tagName = xmpTag.localName(); |
|
362 m_currentToken.beginTag = false; |
|
363 } else if (state.inIFrame()) { |
|
364 m_currentToken.tagName = iframeTag.localName(); |
|
365 m_currentToken.beginTag = false; |
|
366 } |
|
367 processToken(); |
|
368 state.setInStyle(false); |
|
369 state.setInScript(false); |
|
370 state.setInTextArea(false); |
|
371 state.setInTitle(false); |
|
372 state.setInXmp(false); |
|
373 state.setInIFrame(false); |
|
374 tquote = NoQuote; |
|
375 m_scriptCodeSize = m_scriptCodeResync = 0; |
|
376 } |
|
377 return state; |
|
378 } |
|
379 // possible end of tagname, lets check. |
|
380 if (!m_scriptCodeResync && !state.escaped() && !src.escaped() && (ch == '>' || ch == '/' || isASCIISpace(ch)) && |
|
381 m_scriptCodeSize >= m_searchStopperLength && |
|
382 tagMatch(m_searchStopper, m_scriptCode + m_scriptCodeSize - m_searchStopperLength, m_searchStopperLength) && |
|
383 (lastDecodedEntityPosition < m_scriptCodeSize - m_searchStopperLength)) { |
|
384 m_scriptCodeResync = m_scriptCodeSize-m_searchStopperLength+1; |
|
385 tquote = NoQuote; |
|
386 continue; |
|
387 } |
|
388 if (m_scriptCodeResync && !state.escaped()) { |
|
389 if (ch == '\"') |
|
390 tquote = (tquote == NoQuote) ? DoubleQuote : ((tquote == SingleQuote) ? SingleQuote : NoQuote); |
|
391 else if (ch == '\'') |
|
392 tquote = (tquote == NoQuote) ? SingleQuote : (tquote == DoubleQuote) ? DoubleQuote : NoQuote; |
|
393 else if (tquote != NoQuote && (ch == '\r' || ch == '\n')) |
|
394 tquote = NoQuote; |
|
395 } |
|
396 state.setEscaped(!state.escaped() && ch == '\\'); |
|
397 if (!m_scriptCodeResync && (state.inTextArea() || state.inTitle() || state.inIFrame()) && !src.escaped() && ch == '&') { |
|
398 UChar* scriptCodeDest = m_scriptCode + m_scriptCodeSize; |
|
399 src.advancePastNonNewline(); |
|
400 state = parseEntity(src, scriptCodeDest, state, m_cBufferPos, true, false); |
|
401 if (scriptCodeDest == m_scriptCode + m_scriptCodeSize) |
|
402 lastDecodedEntityPosition = m_scriptCodeSize; |
|
403 else |
|
404 m_scriptCodeSize = scriptCodeDest - m_scriptCode; |
|
405 } else { |
|
406 m_scriptCode[m_scriptCodeSize++] = ch; |
|
407 src.advance(m_lineNumber); |
|
408 } |
|
409 } |
|
410 |
|
411 return state; |
|
412 } |
|
413 |
|
414 LegacyHTMLDocumentParser::State LegacyHTMLDocumentParser::scriptHandler(State state) |
|
415 { |
|
416 // We are inside a <script> |
|
417 bool doScriptExec = false; |
|
418 int startLine = m_currentScriptTagStartLineNumber + 1; // Script line numbers are 1 based, HTMLTokenzier line numbers are 0 based |
|
419 |
|
420 // Reset m_currentScriptTagStartLineNumber to indicate that we've finished parsing the current script element |
|
421 m_currentScriptTagStartLineNumber = 0; |
|
422 |
|
423 // (Bugzilla 3837) Scripts following a frameset element should not execute or, |
|
424 // in the case of extern scripts, even load. |
|
425 bool followingFrameset = (document()->body() && document()->body()->hasTagName(framesetTag)); |
|
426 |
|
427 CachedScript* cs = 0; |
|
428 // don't load external scripts for standalone documents (for now) |
|
429 if (!inViewSourceMode()) { |
|
430 if (!m_scriptTagSrcAttrValue.isEmpty() && document()->frame()) { |
|
431 // forget what we just got; load from src url instead |
|
432 if (!m_treeBuilder->skipMode() && !followingFrameset) { |
|
433 // The parser might have been stopped by for example a window.close call in an earlier script. |
|
434 // If so, we don't want to load scripts. |
|
435 if (!m_parserStopped && m_scriptNode->dispatchBeforeLoadEvent(m_scriptTagSrcAttrValue) && |
|
436 (cs = document()->docLoader()->requestScript(m_scriptTagSrcAttrValue, m_scriptTagCharsetAttrValue))) |
|
437 m_pendingScripts.append(cs); |
|
438 else |
|
439 m_scriptNode = 0; |
|
440 } else |
|
441 m_scriptNode = 0; |
|
442 m_scriptTagSrcAttrValue = String(); |
|
443 } else { |
|
444 // Parse m_scriptCode containing <script> info |
|
445 doScriptExec = m_scriptNode->shouldExecuteAsJavaScript(); |
|
446 #if ENABLE(XHTMLMP) |
|
447 if (!doScriptExec) |
|
448 document()->setShouldProcessNoscriptElement(true); |
|
449 #endif |
|
450 m_scriptNode = 0; |
|
451 } |
|
452 } |
|
453 |
|
454 state = processListing(SegmentedString(m_scriptCode, m_scriptCodeSize), state); |
|
455 RefPtr<Node> node = processToken(); |
|
456 |
|
457 if (node && m_scriptingPermission == FragmentScriptingNotAllowed) { |
|
458 ExceptionCode ec; |
|
459 node->remove(ec); |
|
460 node = 0; |
|
461 } |
|
462 |
|
463 String scriptString = node ? node->textContent() : ""; |
|
464 m_currentToken.tagName = scriptTag.localName(); |
|
465 m_currentToken.beginTag = false; |
|
466 processToken(); |
|
467 |
|
468 state.setInScript(false); |
|
469 m_scriptCodeSize = m_scriptCodeResync = 0; |
|
470 |
|
471 // FIXME: The script should be syntax highlighted. |
|
472 if (inViewSourceMode()) |
|
473 return state; |
|
474 |
|
475 SegmentedString* savedPrependingSrc = m_currentPrependingSrc; |
|
476 SegmentedString prependingSrc; |
|
477 m_currentPrependingSrc = &prependingSrc; |
|
478 |
|
479 if (!m_treeBuilder->skipMode() && !followingFrameset) { |
|
480 if (cs) { |
|
481 if (savedPrependingSrc) |
|
482 savedPrependingSrc->append(m_src); |
|
483 else |
|
484 m_pendingSrc.prepend(m_src); |
|
485 setSrc(SegmentedString()); |
|
486 |
|
487 // the ref() call below may call notifyFinished if the script is already in cache, |
|
488 // and that mucks with the state directly, so we must write it back to the object. |
|
489 m_state = state; |
|
490 bool savedRequestingScript = m_requestingScript; |
|
491 m_requestingScript = true; |
|
492 cs->addClient(this); |
|
493 m_requestingScript = savedRequestingScript; |
|
494 state = m_state; |
|
495 // will be 0 if script was already loaded and ref() executed it |
|
496 if (!m_pendingScripts.isEmpty()) |
|
497 state.setLoadingExtScript(true); |
|
498 } else if (!m_fragment && doScriptExec) { |
|
499 if (!m_executingScript) |
|
500 m_pendingSrc.prepend(m_src); |
|
501 else |
|
502 prependingSrc = m_src; |
|
503 setSrc(SegmentedString()); |
|
504 state = scriptExecution(ScriptSourceCode(scriptString, document()->frame() ? document()->frame()->document()->url() : KURL(), startLine), state); |
|
505 } |
|
506 } |
|
507 |
|
508 if (!m_executingScript && !state.loadingExtScript()) { |
|
509 m_src.append(m_pendingSrc); |
|
510 m_pendingSrc.clear(); |
|
511 } else if (!prependingSrc.isEmpty()) { |
|
512 // restore first so that the write appends in the right place |
|
513 // (does not hurt to do it again below) |
|
514 m_currentPrependingSrc = savedPrependingSrc; |
|
515 |
|
516 // we need to do this slightly modified bit of one of the write() cases |
|
517 // because we want to prepend to m_pendingSrc rather than appending |
|
518 // if there's no previous prependingSrc |
|
519 if (!m_pendingScripts.isEmpty()) { |
|
520 if (m_currentPrependingSrc) |
|
521 m_currentPrependingSrc->append(prependingSrc); |
|
522 else |
|
523 m_pendingSrc.prepend(prependingSrc); |
|
524 } else { |
|
525 m_state = state; |
|
526 write(prependingSrc, false); |
|
527 state = m_state; |
|
528 } |
|
529 } |
|
530 |
|
531 #if PRELOAD_SCANNER_ENABLED |
|
532 if (!m_pendingScripts.isEmpty() && !m_executingScript) { |
|
533 if (!m_preloadScanner) |
|
534 m_preloadScanner.set(new LegacyPreloadScanner(document())); |
|
535 if (!m_preloadScanner->inProgress()) { |
|
536 m_preloadScanner->begin(); |
|
537 m_preloadScanner->write(m_pendingSrc); |
|
538 } |
|
539 } |
|
540 #endif |
|
541 m_currentPrependingSrc = savedPrependingSrc; |
|
542 |
|
543 return state; |
|
544 } |
|
545 |
|
546 LegacyHTMLDocumentParser::State LegacyHTMLDocumentParser::scriptExecution(const ScriptSourceCode& sourceCode, State state) |
|
547 { |
|
548 if (m_fragment || !document()->frame()) |
|
549 return state; |
|
550 m_executingScript++; |
|
551 |
|
552 SegmentedString* savedPrependingSrc = m_currentPrependingSrc; |
|
553 SegmentedString prependingSrc; |
|
554 m_currentPrependingSrc = &prependingSrc; |
|
555 |
|
556 m_state = state; |
|
557 document()->frame()->script()->executeScript(sourceCode); |
|
558 state = m_state; |
|
559 |
|
560 state.setAllowYield(true); |
|
561 |
|
562 m_executingScript--; |
|
563 |
|
564 if (!m_executingScript && !state.loadingExtScript()) { |
|
565 m_pendingSrc.prepend(prependingSrc); |
|
566 m_src.append(m_pendingSrc); |
|
567 m_pendingSrc.clear(); |
|
568 } else if (!prependingSrc.isEmpty()) { |
|
569 // restore first so that the write appends in the right place |
|
570 // (does not hurt to do it again below) |
|
571 m_currentPrependingSrc = savedPrependingSrc; |
|
572 |
|
573 // we need to do this slightly modified bit of one of the write() cases |
|
574 // because we want to prepend to m_pendingSrc rather than appending |
|
575 // if there's no previous prependingSrc |
|
576 if (!m_pendingScripts.isEmpty()) { |
|
577 if (m_currentPrependingSrc) |
|
578 m_currentPrependingSrc->append(prependingSrc); |
|
579 else |
|
580 m_pendingSrc.prepend(prependingSrc); |
|
581 |
|
582 #if PRELOAD_SCANNER_ENABLED |
|
583 // We are stuck waiting for another script. Lets check the source that |
|
584 // was just document.write()n for anything to load. |
|
585 LegacyPreloadScanner documentWritePreloadScanner(document()); |
|
586 documentWritePreloadScanner.begin(); |
|
587 documentWritePreloadScanner.write(prependingSrc); |
|
588 documentWritePreloadScanner.end(); |
|
589 #endif |
|
590 } else { |
|
591 m_state = state; |
|
592 write(prependingSrc, false); |
|
593 state = m_state; |
|
594 } |
|
595 } |
|
596 |
|
597 m_currentPrependingSrc = savedPrependingSrc; |
|
598 |
|
599 return state; |
|
600 } |
|
601 |
|
602 LegacyHTMLDocumentParser::State LegacyHTMLDocumentParser::parseComment(SegmentedString& src, State state) |
|
603 { |
|
604 // FIXME: Why does this code even run for comments inside <script> and <style>? This seems bogus. |
|
605 checkScriptBuffer(src.length()); |
|
606 while (!src.isEmpty()) { |
|
607 UChar ch = *src; |
|
608 m_scriptCode[m_scriptCodeSize++] = ch; |
|
609 if (ch == '>') { |
|
610 bool handleBrokenComments = m_brokenComments && !(state.inScript() || state.inStyle()); |
|
611 int endCharsCount = 1; // start off with one for the '>' character |
|
612 if (m_scriptCodeSize > 2 && m_scriptCode[m_scriptCodeSize-3] == '-' && m_scriptCode[m_scriptCodeSize-2] == '-') { |
|
613 endCharsCount = 3; |
|
614 } else if (m_scriptCodeSize > 3 && m_scriptCode[m_scriptCodeSize-4] == '-' && m_scriptCode[m_scriptCodeSize-3] == '-' && |
|
615 m_scriptCode[m_scriptCodeSize-2] == '!') { |
|
616 // Other browsers will accept --!> as a close comment, even though it's |
|
617 // not technically valid. |
|
618 endCharsCount = 4; |
|
619 } |
|
620 if (handleBrokenComments || endCharsCount > 1) { |
|
621 src.advancePastNonNewline(); |
|
622 if (!(state.inTitle() || state.inScript() || state.inXmp() || state.inTextArea() || state.inStyle() || state.inIFrame())) { |
|
623 checkScriptBuffer(); |
|
624 m_scriptCode[m_scriptCodeSize] = 0; |
|
625 m_scriptCode[m_scriptCodeSize + 1] = 0; |
|
626 m_currentToken.tagName = commentAtom; |
|
627 m_currentToken.beginTag = true; |
|
628 state = processListing(SegmentedString(m_scriptCode, m_scriptCodeSize - endCharsCount), state); |
|
629 processToken(); |
|
630 m_currentToken.tagName = commentAtom; |
|
631 m_currentToken.beginTag = false; |
|
632 processToken(); |
|
633 m_scriptCodeSize = 0; |
|
634 } |
|
635 state.setInComment(false); |
|
636 return state; // Finished parsing comment |
|
637 } |
|
638 } |
|
639 src.advance(m_lineNumber); |
|
640 } |
|
641 |
|
642 return state; |
|
643 } |
|
644 |
|
645 LegacyHTMLDocumentParser::State LegacyHTMLDocumentParser::parseServer(SegmentedString& src, State state) |
|
646 { |
|
647 checkScriptBuffer(src.length()); |
|
648 while (!src.isEmpty()) { |
|
649 UChar ch = *src; |
|
650 m_scriptCode[m_scriptCodeSize++] = ch; |
|
651 if (ch == '>' && m_scriptCodeSize > 1 && m_scriptCode[m_scriptCodeSize - 2] == '%') { |
|
652 src.advancePastNonNewline(); |
|
653 state.setInServer(false); |
|
654 m_scriptCodeSize = 0; |
|
655 return state; // Finished parsing server include |
|
656 } |
|
657 src.advance(m_lineNumber); |
|
658 } |
|
659 return state; |
|
660 } |
|
661 |
|
662 LegacyHTMLDocumentParser::State LegacyHTMLDocumentParser::parseProcessingInstruction(SegmentedString& src, State state) |
|
663 { |
|
664 UChar oldchar = 0; |
|
665 while (!src.isEmpty()) { |
|
666 UChar chbegin = *src; |
|
667 if (chbegin == '\'') |
|
668 tquote = tquote == SingleQuote ? NoQuote : SingleQuote; |
|
669 else if (chbegin == '\"') |
|
670 tquote = tquote == DoubleQuote ? NoQuote : DoubleQuote; |
|
671 // Look for '?>' |
|
672 // Some crappy sites omit the "?" before it, so |
|
673 // we look for an unquoted '>' instead. (IE compatible) |
|
674 else if (chbegin == '>' && (!tquote || oldchar == '?')) { |
|
675 // We got a '?>' sequence |
|
676 state.setInProcessingInstruction(false); |
|
677 src.advancePastNonNewline(); |
|
678 state.setDiscardLF(true); |
|
679 return state; // Finished parsing comment! |
|
680 } |
|
681 src.advance(m_lineNumber); |
|
682 oldchar = chbegin; |
|
683 } |
|
684 |
|
685 return state; |
|
686 } |
|
687 |
|
688 LegacyHTMLDocumentParser::State LegacyHTMLDocumentParser::parseText(SegmentedString& src, State state) |
|
689 { |
|
690 while (!src.isEmpty()) { |
|
691 UChar cc = *src; |
|
692 |
|
693 if (state.skipLF()) { |
|
694 state.setSkipLF(false); |
|
695 if (cc == '\n') { |
|
696 src.advancePastNewline(m_lineNumber); |
|
697 continue; |
|
698 } |
|
699 } |
|
700 |
|
701 // do we need to enlarge the buffer? |
|
702 checkBuffer(); |
|
703 |
|
704 if (cc == '\r') { |
|
705 state.setSkipLF(true); |
|
706 *m_dest++ = '\n'; |
|
707 } else |
|
708 *m_dest++ = cc; |
|
709 src.advance(m_lineNumber); |
|
710 } |
|
711 |
|
712 return state; |
|
713 } |
|
714 |
|
715 |
|
716 LegacyHTMLDocumentParser::State LegacyHTMLDocumentParser::parseEntity(SegmentedString& src, UChar*& dest, State state, unsigned& cBufferPos, bool start, bool parsingTag) |
|
717 { |
|
718 if (start) { |
|
719 cBufferPos = 0; |
|
720 state.setEntityState(SearchEntity); |
|
721 EntityUnicodeValue = 0; |
|
722 } |
|
723 |
|
724 while (!src.isEmpty()) { |
|
725 UChar cc = *src; |
|
726 switch (state.entityState()) { |
|
727 case NoEntity: |
|
728 ASSERT(state.entityState() != NoEntity); |
|
729 return state; |
|
730 |
|
731 case SearchEntity: |
|
732 if (cc == '#') { |
|
733 m_cBuffer[cBufferPos++] = cc; |
|
734 src.advancePastNonNewline(); |
|
735 state.setEntityState(NumericSearch); |
|
736 } else |
|
737 state.setEntityState(EntityName); |
|
738 break; |
|
739 |
|
740 case NumericSearch: |
|
741 if (cc == 'x' || cc == 'X') { |
|
742 m_cBuffer[cBufferPos++] = cc; |
|
743 src.advancePastNonNewline(); |
|
744 state.setEntityState(Hexadecimal); |
|
745 } else if (cc >= '0' && cc <= '9') |
|
746 state.setEntityState(Decimal); |
|
747 else |
|
748 state.setEntityState(SearchSemicolon); |
|
749 break; |
|
750 |
|
751 case Hexadecimal: { |
|
752 int ll = min(src.length(), 10 - cBufferPos); |
|
753 while (ll--) { |
|
754 cc = *src; |
|
755 if (!((cc >= '0' && cc <= '9') || (cc >= 'a' && cc <= 'f') || (cc >= 'A' && cc <= 'F'))) { |
|
756 state.setEntityState(SearchSemicolon); |
|
757 break; |
|
758 } |
|
759 int digit; |
|
760 if (cc < 'A') |
|
761 digit = cc - '0'; |
|
762 else |
|
763 digit = (cc - 'A' + 10) & 0xF; // handle both upper and lower case without a branch |
|
764 EntityUnicodeValue = EntityUnicodeValue * 16 + digit; |
|
765 m_cBuffer[cBufferPos++] = cc; |
|
766 src.advancePastNonNewline(); |
|
767 } |
|
768 if (cBufferPos == 10) |
|
769 state.setEntityState(SearchSemicolon); |
|
770 break; |
|
771 } |
|
772 case Decimal: |
|
773 { |
|
774 int ll = min(src.length(), 9-cBufferPos); |
|
775 while (ll--) { |
|
776 cc = *src; |
|
777 |
|
778 if (!(cc >= '0' && cc <= '9')) { |
|
779 state.setEntityState(SearchSemicolon); |
|
780 break; |
|
781 } |
|
782 |
|
783 EntityUnicodeValue = EntityUnicodeValue * 10 + (cc - '0'); |
|
784 m_cBuffer[cBufferPos++] = cc; |
|
785 src.advancePastNonNewline(); |
|
786 } |
|
787 if (cBufferPos == 9) |
|
788 state.setEntityState(SearchSemicolon); |
|
789 break; |
|
790 } |
|
791 case EntityName: |
|
792 { |
|
793 int ll = min(src.length(), 9-cBufferPos); |
|
794 while (ll--) { |
|
795 cc = *src; |
|
796 |
|
797 if (!((cc >= 'a' && cc <= 'z') || (cc >= '0' && cc <= '9') || (cc >= 'A' && cc <= 'Z'))) { |
|
798 state.setEntityState(SearchSemicolon); |
|
799 break; |
|
800 } |
|
801 |
|
802 m_cBuffer[cBufferPos++] = cc; |
|
803 src.advancePastNonNewline(); |
|
804 } |
|
805 if (cBufferPos == 9) |
|
806 state.setEntityState(SearchSemicolon); |
|
807 if (state.entityState() == SearchSemicolon) { |
|
808 if (cBufferPos > 1) { |
|
809 // Since the maximum length of entity name is 9, |
|
810 // so a single char array which is allocated on |
|
811 // the stack, its length is 10, should be OK. |
|
812 // Also if we have an illegal character, we treat it |
|
813 // as illegal entity name. |
|
814 unsigned testedEntityNameLen = 0; |
|
815 char tmpEntityNameBuffer[10]; |
|
816 |
|
817 ASSERT(cBufferPos < 10); |
|
818 for (; testedEntityNameLen < cBufferPos; ++testedEntityNameLen) { |
|
819 if (m_cBuffer[testedEntityNameLen] > 0x7e) |
|
820 break; |
|
821 tmpEntityNameBuffer[testedEntityNameLen] = m_cBuffer[testedEntityNameLen]; |
|
822 } |
|
823 |
|
824 const Entity *e; |
|
825 |
|
826 if (testedEntityNameLen == cBufferPos) |
|
827 e = findEntity(tmpEntityNameBuffer, cBufferPos); |
|
828 else |
|
829 e = 0; |
|
830 |
|
831 if (e) |
|
832 EntityUnicodeValue = e->code; |
|
833 |
|
834 // be IE compatible |
|
835 if (parsingTag && EntityUnicodeValue > 255 && *src != ';') |
|
836 EntityUnicodeValue = 0; |
|
837 } |
|
838 } |
|
839 else |
|
840 break; |
|
841 } |
|
842 case SearchSemicolon: |
|
843 // Don't allow values that are more than 21 bits. |
|
844 if (EntityUnicodeValue > 0 && EntityUnicodeValue <= 0x10FFFF) { |
|
845 if (!inViewSourceMode()) { |
|
846 if (*src == ';') |
|
847 src.advancePastNonNewline(); |
|
848 if (EntityUnicodeValue <= 0xFFFF) { |
|
849 checkBuffer(); |
|
850 src.push(fixUpChar(EntityUnicodeValue)); |
|
851 } else { |
|
852 // Convert to UTF-16, using surrogate code points. |
|
853 checkBuffer(2); |
|
854 src.push(U16_LEAD(EntityUnicodeValue)); |
|
855 src.push(U16_TRAIL(EntityUnicodeValue)); |
|
856 } |
|
857 } else { |
|
858 // FIXME: We should eventually colorize entities by sending them as a special token. |
|
859 // 12 bytes required: up to 10 bytes in m_cBuffer plus the |
|
860 // leading '&' and trailing ';' |
|
861 checkBuffer(12); |
|
862 *dest++ = '&'; |
|
863 for (unsigned i = 0; i < cBufferPos; i++) |
|
864 dest[i] = m_cBuffer[i]; |
|
865 dest += cBufferPos; |
|
866 if (*src == ';') { |
|
867 *dest++ = ';'; |
|
868 src.advancePastNonNewline(); |
|
869 } |
|
870 } |
|
871 } else { |
|
872 // 11 bytes required: up to 10 bytes in m_cBuffer plus the |
|
873 // leading '&' |
|
874 checkBuffer(11); |
|
875 // ignore the sequence, add it to the buffer as plaintext |
|
876 *dest++ = '&'; |
|
877 for (unsigned i = 0; i < cBufferPos; i++) |
|
878 dest[i] = m_cBuffer[i]; |
|
879 dest += cBufferPos; |
|
880 } |
|
881 |
|
882 state.setEntityState(NoEntity); |
|
883 return state; |
|
884 } |
|
885 } |
|
886 |
|
887 return state; |
|
888 } |
|
889 |
|
890 LegacyHTMLDocumentParser::State LegacyHTMLDocumentParser::parseDoctype(SegmentedString& src, State state) |
|
891 { |
|
892 ASSERT(state.inDoctype()); |
|
893 while (!src.isEmpty() && state.inDoctype()) { |
|
894 UChar c = *src; |
|
895 bool isWhitespace = c == '\r' || c == '\n' || c == '\t' || c == ' '; |
|
896 switch (m_doctypeToken.state()) { |
|
897 case DoctypeBegin: { |
|
898 m_doctypeToken.setState(DoctypeBeforeName); |
|
899 if (isWhitespace) { |
|
900 src.advance(m_lineNumber); |
|
901 if (inViewSourceMode()) |
|
902 m_doctypeToken.m_source.append(c); |
|
903 } |
|
904 break; |
|
905 } |
|
906 case DoctypeBeforeName: { |
|
907 if (c == '>') { |
|
908 // Malformed. Just exit. |
|
909 src.advancePastNonNewline(); |
|
910 state.setInDoctype(false); |
|
911 if (inViewSourceMode()) |
|
912 processDoctypeToken(); |
|
913 } else if (isWhitespace) { |
|
914 src.advance(m_lineNumber); |
|
915 if (inViewSourceMode()) |
|
916 m_doctypeToken.m_source.append(c); |
|
917 } else |
|
918 m_doctypeToken.setState(DoctypeName); |
|
919 break; |
|
920 } |
|
921 case DoctypeName: { |
|
922 if (c == '>') { |
|
923 // Valid doctype. Emit it. |
|
924 src.advancePastNonNewline(); |
|
925 state.setInDoctype(false); |
|
926 processDoctypeToken(); |
|
927 } else if (isWhitespace) { |
|
928 m_doctypeSearchCount = 0; // Used now to scan for PUBLIC |
|
929 m_doctypeSecondarySearchCount = 0; // Used now to scan for SYSTEM |
|
930 m_doctypeToken.setState(DoctypeAfterName); |
|
931 src.advance(m_lineNumber); |
|
932 if (inViewSourceMode()) |
|
933 m_doctypeToken.m_source.append(c); |
|
934 } else { |
|
935 src.advancePastNonNewline(); |
|
936 m_doctypeToken.m_name.append(c); |
|
937 if (inViewSourceMode()) |
|
938 m_doctypeToken.m_source.append(c); |
|
939 } |
|
940 break; |
|
941 } |
|
942 case DoctypeAfterName: { |
|
943 if (c == '>') { |
|
944 // Valid doctype. Emit it. |
|
945 src.advancePastNonNewline(); |
|
946 state.setInDoctype(false); |
|
947 processDoctypeToken(); |
|
948 } else if (!isWhitespace) { |
|
949 src.advancePastNonNewline(); |
|
950 if (toASCIILower(c) == publicStart[m_doctypeSearchCount]) { |
|
951 m_doctypeSearchCount++; |
|
952 if (m_doctypeSearchCount == 6) |
|
953 // Found 'PUBLIC' sequence |
|
954 m_doctypeToken.setState(DoctypeBeforePublicID); |
|
955 } else if (m_doctypeSearchCount > 0) { |
|
956 m_doctypeSearchCount = 0; |
|
957 m_doctypeToken.setState(DoctypeBogus); |
|
958 } else if (toASCIILower(c) == systemStart[m_doctypeSecondarySearchCount]) { |
|
959 m_doctypeSecondarySearchCount++; |
|
960 if (m_doctypeSecondarySearchCount == 6) |
|
961 // Found 'SYSTEM' sequence |
|
962 m_doctypeToken.setState(DoctypeBeforeSystemID); |
|
963 } else { |
|
964 m_doctypeSecondarySearchCount = 0; |
|
965 m_doctypeToken.setState(DoctypeBogus); |
|
966 } |
|
967 if (inViewSourceMode()) |
|
968 m_doctypeToken.m_source.append(c); |
|
969 } else { |
|
970 src.advance(m_lineNumber); // Whitespace keeps us in the after name state. |
|
971 if (inViewSourceMode()) |
|
972 m_doctypeToken.m_source.append(c); |
|
973 } |
|
974 break; |
|
975 } |
|
976 case DoctypeBeforePublicID: { |
|
977 if (c == '\"' || c == '\'') { |
|
978 tquote = c == '\"' ? DoubleQuote : SingleQuote; |
|
979 m_doctypeToken.setState(DoctypePublicID); |
|
980 src.advancePastNonNewline(); |
|
981 if (inViewSourceMode()) |
|
982 m_doctypeToken.m_source.append(c); |
|
983 } else if (c == '>') { |
|
984 // Considered bogus. Don't process the doctype. |
|
985 src.advancePastNonNewline(); |
|
986 state.setInDoctype(false); |
|
987 if (inViewSourceMode()) |
|
988 processDoctypeToken(); |
|
989 } else if (isWhitespace) { |
|
990 src.advance(m_lineNumber); |
|
991 if (inViewSourceMode()) |
|
992 m_doctypeToken.m_source.append(c); |
|
993 } else |
|
994 m_doctypeToken.setState(DoctypeBogus); |
|
995 break; |
|
996 } |
|
997 case DoctypePublicID: { |
|
998 if ((c == '\"' && tquote == DoubleQuote) || (c == '\'' && tquote == SingleQuote)) { |
|
999 src.advancePastNonNewline(); |
|
1000 m_doctypeToken.setState(DoctypeAfterPublicID); |
|
1001 if (inViewSourceMode()) |
|
1002 m_doctypeToken.m_source.append(c); |
|
1003 } else if (c == '>') { |
|
1004 // Considered bogus. Don't process the doctype. |
|
1005 src.advancePastNonNewline(); |
|
1006 state.setInDoctype(false); |
|
1007 if (inViewSourceMode()) |
|
1008 processDoctypeToken(); |
|
1009 } else { |
|
1010 m_doctypeToken.m_publicID.append(c); |
|
1011 src.advance(m_lineNumber); |
|
1012 if (inViewSourceMode()) |
|
1013 m_doctypeToken.m_source.append(c); |
|
1014 } |
|
1015 break; |
|
1016 } |
|
1017 case DoctypeAfterPublicID: |
|
1018 if (c == '\"' || c == '\'') { |
|
1019 tquote = c == '\"' ? DoubleQuote : SingleQuote; |
|
1020 m_doctypeToken.setState(DoctypeSystemID); |
|
1021 src.advancePastNonNewline(); |
|
1022 if (inViewSourceMode()) |
|
1023 m_doctypeToken.m_source.append(c); |
|
1024 } else if (c == '>') { |
|
1025 // Valid doctype. Emit it now. |
|
1026 src.advancePastNonNewline(); |
|
1027 state.setInDoctype(false); |
|
1028 processDoctypeToken(); |
|
1029 } else if (isWhitespace) { |
|
1030 src.advance(m_lineNumber); |
|
1031 if (inViewSourceMode()) |
|
1032 m_doctypeToken.m_source.append(c); |
|
1033 } else |
|
1034 m_doctypeToken.setState(DoctypeBogus); |
|
1035 break; |
|
1036 case DoctypeBeforeSystemID: |
|
1037 if (c == '\"' || c == '\'') { |
|
1038 tquote = c == '\"' ? DoubleQuote : SingleQuote; |
|
1039 m_doctypeToken.setState(DoctypeSystemID); |
|
1040 src.advancePastNonNewline(); |
|
1041 if (inViewSourceMode()) |
|
1042 m_doctypeToken.m_source.append(c); |
|
1043 } else if (c == '>') { |
|
1044 // Considered bogus. Don't process the doctype. |
|
1045 src.advancePastNonNewline(); |
|
1046 state.setInDoctype(false); |
|
1047 } else if (isWhitespace) { |
|
1048 src.advance(m_lineNumber); |
|
1049 if (inViewSourceMode()) |
|
1050 m_doctypeToken.m_source.append(c); |
|
1051 } else |
|
1052 m_doctypeToken.setState(DoctypeBogus); |
|
1053 break; |
|
1054 case DoctypeSystemID: |
|
1055 if ((c == '\"' && tquote == DoubleQuote) || (c == '\'' && tquote == SingleQuote)) { |
|
1056 src.advancePastNonNewline(); |
|
1057 m_doctypeToken.setState(DoctypeAfterSystemID); |
|
1058 if (inViewSourceMode()) |
|
1059 m_doctypeToken.m_source.append(c); |
|
1060 } else if (c == '>') { |
|
1061 // Considered bogus. Don't process the doctype. |
|
1062 src.advancePastNonNewline(); |
|
1063 state.setInDoctype(false); |
|
1064 if (inViewSourceMode()) |
|
1065 processDoctypeToken(); |
|
1066 } else { |
|
1067 m_doctypeToken.m_systemID.append(c); |
|
1068 src.advance(m_lineNumber); |
|
1069 if (inViewSourceMode()) |
|
1070 m_doctypeToken.m_source.append(c); |
|
1071 } |
|
1072 break; |
|
1073 case DoctypeAfterSystemID: |
|
1074 if (c == '>') { |
|
1075 // Valid doctype. Emit it now. |
|
1076 src.advancePastNonNewline(); |
|
1077 state.setInDoctype(false); |
|
1078 processDoctypeToken(); |
|
1079 } else if (isWhitespace) { |
|
1080 src.advance(m_lineNumber); |
|
1081 if (inViewSourceMode()) |
|
1082 m_doctypeToken.m_source.append(c); |
|
1083 } else |
|
1084 m_doctypeToken.setState(DoctypeBogus); |
|
1085 break; |
|
1086 case DoctypeBogus: |
|
1087 if (c == '>') { |
|
1088 // Done with the bogus doctype. |
|
1089 src.advancePastNonNewline(); |
|
1090 state.setInDoctype(false); |
|
1091 if (inViewSourceMode()) |
|
1092 processDoctypeToken(); |
|
1093 } else { |
|
1094 src.advance(m_lineNumber); // Just keep scanning for '>' |
|
1095 if (inViewSourceMode()) |
|
1096 m_doctypeToken.m_source.append(c); |
|
1097 } |
|
1098 break; |
|
1099 default: |
|
1100 break; |
|
1101 } |
|
1102 } |
|
1103 return state; |
|
1104 } |
|
1105 |
|
1106 LegacyHTMLDocumentParser::State LegacyHTMLDocumentParser::parseTag(SegmentedString& src, State state) |
|
1107 { |
|
1108 ASSERT(!state.hasEntityState()); |
|
1109 |
|
1110 unsigned cBufferPos = m_cBufferPos; |
|
1111 |
|
1112 bool lastIsSlash = false; |
|
1113 |
|
1114 while (!src.isEmpty()) { |
|
1115 checkBuffer(); |
|
1116 switch (state.tagState()) { |
|
1117 case NoTag: |
|
1118 { |
|
1119 m_cBufferPos = cBufferPos; |
|
1120 return state; |
|
1121 } |
|
1122 case TagName: |
|
1123 { |
|
1124 if (searchCount > 0) { |
|
1125 if (*src == commentStart[searchCount]) { |
|
1126 searchCount++; |
|
1127 if (searchCount == 2) |
|
1128 m_doctypeSearchCount++; // A '!' is also part of a doctype, so we are moving through that still as well. |
|
1129 else |
|
1130 m_doctypeSearchCount = 0; |
|
1131 if (searchCount == 4) { |
|
1132 // Found '<!--' sequence |
|
1133 src.advancePastNonNewline(); |
|
1134 m_dest = m_buffer; // ignore the previous part of this tag |
|
1135 state.setInComment(true); |
|
1136 state.setTagState(NoTag); |
|
1137 |
|
1138 // Fix bug 34302 at kde.bugs.org. Go ahead and treat |
|
1139 // <!--> as a valid comment, since both mozilla and IE on windows |
|
1140 // can handle this case. Only do this in quirks mode. -dwh |
|
1141 if (!src.isEmpty() && *src == '>' && document()->inCompatMode()) { |
|
1142 state.setInComment(false); |
|
1143 src.advancePastNonNewline(); |
|
1144 if (!src.isEmpty()) |
|
1145 m_cBuffer[cBufferPos++] = *src; |
|
1146 } else |
|
1147 state = parseComment(src, state); |
|
1148 |
|
1149 m_cBufferPos = cBufferPos; |
|
1150 return state; // Finished parsing tag! |
|
1151 } |
|
1152 m_cBuffer[cBufferPos++] = *src; |
|
1153 src.advancePastNonNewline(); |
|
1154 break; |
|
1155 } else |
|
1156 searchCount = 0; // Stop looking for '<!--' sequence |
|
1157 } |
|
1158 |
|
1159 if (m_doctypeSearchCount > 0) { |
|
1160 if (toASCIILower(*src) == doctypeStart[m_doctypeSearchCount]) { |
|
1161 m_doctypeSearchCount++; |
|
1162 m_cBuffer[cBufferPos++] = *src; |
|
1163 src.advancePastNonNewline(); |
|
1164 if (m_doctypeSearchCount == 9) { |
|
1165 // Found '<!DOCTYPE' sequence |
|
1166 state.setInDoctype(true); |
|
1167 state.setTagState(NoTag); |
|
1168 m_doctypeToken.reset(); |
|
1169 if (inViewSourceMode()) |
|
1170 m_doctypeToken.m_source.append(m_cBuffer, cBufferPos); |
|
1171 state = parseDoctype(src, state); |
|
1172 m_cBufferPos = cBufferPos; |
|
1173 return state; |
|
1174 } |
|
1175 break; |
|
1176 } else |
|
1177 m_doctypeSearchCount = 0; // Stop looking for '<!DOCTYPE' sequence |
|
1178 } |
|
1179 |
|
1180 bool finish = false; |
|
1181 unsigned int ll = min(src.length(), CBUFLEN - cBufferPos); |
|
1182 while (ll--) { |
|
1183 UChar curchar = *src; |
|
1184 if (isASCIISpace(curchar) || curchar == '>' || curchar == '<') { |
|
1185 finish = true; |
|
1186 break; |
|
1187 } |
|
1188 |
|
1189 // tolower() shows up on profiles. This is faster! |
|
1190 if (curchar >= 'A' && curchar <= 'Z' && !inViewSourceMode()) |
|
1191 m_cBuffer[cBufferPos++] = curchar + ('a' - 'A'); |
|
1192 else |
|
1193 m_cBuffer[cBufferPos++] = curchar; |
|
1194 src.advancePastNonNewline(); |
|
1195 } |
|
1196 |
|
1197 // Disadvantage: we add the possible rest of the tag |
|
1198 // as attribute names. ### judge if this causes problems |
|
1199 if (finish || CBUFLEN == cBufferPos) { |
|
1200 bool beginTag; |
|
1201 UChar* ptr = m_cBuffer; |
|
1202 unsigned int len = cBufferPos; |
|
1203 m_cBuffer[cBufferPos] = '\0'; |
|
1204 if ((cBufferPos > 0) && (*ptr == '/')) { |
|
1205 // End Tag |
|
1206 beginTag = false; |
|
1207 ptr++; |
|
1208 len--; |
|
1209 } |
|
1210 else |
|
1211 // Start Tag |
|
1212 beginTag = true; |
|
1213 |
|
1214 // Ignore the / in fake xml tags like <br/>. We trim off the "/" so that we'll get "br" as the tag name and not "br/". |
|
1215 if (len > 1 && ptr[len-1] == '/' && !inViewSourceMode()) |
|
1216 ptr[--len] = '\0'; |
|
1217 |
|
1218 // Now that we've shaved off any invalid / that might have followed the name), make the tag. |
|
1219 // FIXME: FireFox and WinIE turn !foo nodes into comments, we ignore comments. (fast/parser/tag-with-exclamation-point.html) |
|
1220 if (ptr[0] != '!' || inViewSourceMode()) { |
|
1221 m_currentToken.tagName = AtomicString(ptr); |
|
1222 m_currentToken.beginTag = beginTag; |
|
1223 } |
|
1224 m_dest = m_buffer; |
|
1225 state.setTagState(SearchAttribute); |
|
1226 cBufferPos = 0; |
|
1227 } |
|
1228 break; |
|
1229 } |
|
1230 case SearchAttribute: |
|
1231 while (!src.isEmpty()) { |
|
1232 UChar curchar = *src; |
|
1233 // In this mode just ignore any quotes we encounter and treat them like spaces. |
|
1234 if (!isASCIISpace(curchar) && curchar != '\'' && curchar != '"') { |
|
1235 if (curchar == '<' || curchar == '>') |
|
1236 state.setTagState(SearchEnd); |
|
1237 else |
|
1238 state.setTagState(AttributeName); |
|
1239 |
|
1240 cBufferPos = 0; |
|
1241 break; |
|
1242 } |
|
1243 if (inViewSourceMode()) |
|
1244 m_currentToken.addViewSourceChar(curchar); |
|
1245 src.advance(m_lineNumber); |
|
1246 } |
|
1247 break; |
|
1248 case AttributeName: |
|
1249 { |
|
1250 m_rawAttributeBeforeValue.clear(); |
|
1251 int ll = min(src.length(), CBUFLEN - cBufferPos); |
|
1252 while (ll--) { |
|
1253 UChar curchar = *src; |
|
1254 // If we encounter a "/" when scanning an attribute name, treat it as a delimiter. This allows the |
|
1255 // cases like <input type=checkbox checked/> to work (and accommodates XML-style syntax as per HTML5). |
|
1256 if (curchar <= '>' && (curchar >= '<' || isASCIISpace(curchar) || curchar == '/')) { |
|
1257 m_cBuffer[cBufferPos] = '\0'; |
|
1258 m_attrName = AtomicString(m_cBuffer); |
|
1259 m_dest = m_buffer; |
|
1260 *m_dest++ = 0; |
|
1261 state.setTagState(SearchEqual); |
|
1262 if (inViewSourceMode()) |
|
1263 m_currentToken.addViewSourceChar('a'); |
|
1264 break; |
|
1265 } |
|
1266 |
|
1267 // tolower() shows up on profiles. This is faster! |
|
1268 if (curchar >= 'A' && curchar <= 'Z' && !inViewSourceMode()) |
|
1269 m_cBuffer[cBufferPos++] = curchar + ('a' - 'A'); |
|
1270 else |
|
1271 m_cBuffer[cBufferPos++] = curchar; |
|
1272 |
|
1273 m_rawAttributeBeforeValue.append(curchar); |
|
1274 src.advance(m_lineNumber); |
|
1275 } |
|
1276 if (cBufferPos == CBUFLEN) { |
|
1277 m_cBuffer[cBufferPos] = '\0'; |
|
1278 m_attrName = AtomicString(m_cBuffer); |
|
1279 m_dest = m_buffer; |
|
1280 *m_dest++ = 0; |
|
1281 state.setTagState(SearchEqual); |
|
1282 if (inViewSourceMode()) |
|
1283 m_currentToken.addViewSourceChar('a'); |
|
1284 } |
|
1285 break; |
|
1286 } |
|
1287 case SearchEqual: |
|
1288 while (!src.isEmpty()) { |
|
1289 UChar curchar = *src; |
|
1290 |
|
1291 if (lastIsSlash && curchar == '>') { |
|
1292 // This is a quirk (with a long sad history). We have to do this |
|
1293 // since widgets do <script src="foo.js"/> and expect the tag to close. |
|
1294 if (m_currentToken.tagName == scriptTag) |
|
1295 m_currentToken.selfClosingTag = true; |
|
1296 m_currentToken.brokenXMLStyle = true; |
|
1297 } |
|
1298 |
|
1299 // In this mode just ignore any quotes or slashes we encounter and treat them like spaces. |
|
1300 if (!isASCIISpace(curchar) && curchar != '\'' && curchar != '"' && curchar != '/') { |
|
1301 if (curchar == '=') { |
|
1302 state.setTagState(SearchValue); |
|
1303 if (inViewSourceMode()) |
|
1304 m_currentToken.addViewSourceChar(curchar); |
|
1305 m_rawAttributeBeforeValue.append(curchar); |
|
1306 src.advancePastNonNewline(); |
|
1307 } else { |
|
1308 m_currentToken.addAttribute(m_attrName, emptyAtom, inViewSourceMode()); |
|
1309 m_dest = m_buffer; |
|
1310 state.setTagState(SearchAttribute); |
|
1311 lastIsSlash = false; |
|
1312 } |
|
1313 break; |
|
1314 } |
|
1315 |
|
1316 lastIsSlash = curchar == '/'; |
|
1317 |
|
1318 if (inViewSourceMode()) |
|
1319 m_currentToken.addViewSourceChar(curchar); |
|
1320 m_rawAttributeBeforeValue.append(curchar); |
|
1321 src.advance(m_lineNumber); |
|
1322 } |
|
1323 break; |
|
1324 case SearchValue: |
|
1325 while (!src.isEmpty()) { |
|
1326 UChar curchar = *src; |
|
1327 if (!isASCIISpace(curchar)) { |
|
1328 if (curchar == '\'' || curchar == '\"') { |
|
1329 tquote = curchar == '\"' ? DoubleQuote : SingleQuote; |
|
1330 state.setTagState(QuotedValue); |
|
1331 if (inViewSourceMode()) |
|
1332 m_currentToken.addViewSourceChar(curchar); |
|
1333 m_rawAttributeBeforeValue.append(curchar); |
|
1334 src.advancePastNonNewline(); |
|
1335 } else |
|
1336 state.setTagState(Value); |
|
1337 |
|
1338 break; |
|
1339 } |
|
1340 if (inViewSourceMode()) |
|
1341 m_currentToken.addViewSourceChar(curchar); |
|
1342 m_rawAttributeBeforeValue.append(curchar); |
|
1343 src.advance(m_lineNumber); |
|
1344 } |
|
1345 break; |
|
1346 case QuotedValue: |
|
1347 while (!src.isEmpty()) { |
|
1348 checkBuffer(); |
|
1349 |
|
1350 UChar curchar = *src; |
|
1351 if (curchar <= '>' && !src.escaped()) { |
|
1352 if (curchar == '>' && m_attrName.isEmpty()) { |
|
1353 // Handle a case like <img '>. Just go ahead and be willing |
|
1354 // to close the whole tag. Don't consume the character and |
|
1355 // just go back into SearchEnd while ignoring the whole |
|
1356 // value. |
|
1357 // FIXME: Note that this is actually not a very good solution. |
|
1358 // It doesn't handle the general case of |
|
1359 // unmatched quotes among attributes that have names. -dwh |
|
1360 while (m_dest > m_buffer + 1 && (m_dest[-1] == '\n' || m_dest[-1] == '\r')) |
|
1361 m_dest--; // remove trailing newlines |
|
1362 AtomicString attributeValue(m_buffer + 1, m_dest - m_buffer - 1); |
|
1363 if (!attributeValue.contains('/')) |
|
1364 m_attrName = attributeValue; // Just make the name/value match. (FIXME: Is this some WinIE quirk?) |
|
1365 m_currentToken.addAttribute(m_attrName, attributeValue, inViewSourceMode()); |
|
1366 if (inViewSourceMode()) |
|
1367 m_currentToken.addViewSourceChar('x'); |
|
1368 state.setTagState(SearchAttribute); |
|
1369 m_dest = m_buffer; |
|
1370 tquote = NoQuote; |
|
1371 break; |
|
1372 } |
|
1373 |
|
1374 if (curchar == '&') { |
|
1375 src.advancePastNonNewline(); |
|
1376 state = parseEntity(src, m_dest, state, cBufferPos, true, true); |
|
1377 break; |
|
1378 } |
|
1379 |
|
1380 if ((tquote == SingleQuote && curchar == '\'') || (tquote == DoubleQuote && curchar == '\"')) { |
|
1381 // some <input type=hidden> rely on trailing spaces. argh |
|
1382 while (m_dest > m_buffer + 1 && (m_dest[-1] == '\n' || m_dest[-1] == '\r')) |
|
1383 m_dest--; // remove trailing newlines |
|
1384 AtomicString attributeValue(m_buffer + 1, m_dest - m_buffer - 1); |
|
1385 if (m_attrName.isEmpty() && !attributeValue.contains('/')) { |
|
1386 m_attrName = attributeValue; // Make the name match the value. (FIXME: Is this a WinIE quirk?) |
|
1387 if (inViewSourceMode()) |
|
1388 m_currentToken.addViewSourceChar('x'); |
|
1389 } else if (inViewSourceMode()) |
|
1390 m_currentToken.addViewSourceChar('v'); |
|
1391 |
|
1392 if (m_currentToken.beginTag && m_currentToken.tagName == scriptTag && !inViewSourceMode() && !m_treeBuilder->skipMode() && m_attrName == srcAttr) { |
|
1393 String context(m_rawAttributeBeforeValue.data(), m_rawAttributeBeforeValue.size()); |
|
1394 if (xssAuditor() && !xssAuditor()->canLoadExternalScriptFromSrc(attributeValue)) |
|
1395 attributeValue = blankURL().string(); |
|
1396 } |
|
1397 |
|
1398 m_currentToken.addAttribute(m_attrName, attributeValue, inViewSourceMode()); |
|
1399 m_dest = m_buffer; |
|
1400 state.setTagState(SearchAttribute); |
|
1401 tquote = NoQuote; |
|
1402 if (inViewSourceMode()) |
|
1403 m_currentToken.addViewSourceChar(curchar); |
|
1404 src.advancePastNonNewline(); |
|
1405 break; |
|
1406 } |
|
1407 } |
|
1408 |
|
1409 *m_dest++ = curchar; |
|
1410 src.advance(m_lineNumber); |
|
1411 } |
|
1412 break; |
|
1413 case Value: |
|
1414 while (!src.isEmpty()) { |
|
1415 checkBuffer(); |
|
1416 UChar curchar = *src; |
|
1417 if (curchar <= '>' && !src.escaped()) { |
|
1418 // parse Entities |
|
1419 if (curchar == '&') { |
|
1420 src.advancePastNonNewline(); |
|
1421 state = parseEntity(src, m_dest, state, cBufferPos, true, true); |
|
1422 break; |
|
1423 } |
|
1424 // no quotes. Every space means end of value |
|
1425 // '/' does not delimit in IE! |
|
1426 if (isASCIISpace(curchar) || curchar == '>') { |
|
1427 AtomicString attributeValue(m_buffer + 1, m_dest - m_buffer - 1); |
|
1428 |
|
1429 if (m_currentToken.beginTag && m_currentToken.tagName == scriptTag && !inViewSourceMode() && !m_treeBuilder->skipMode() && m_attrName == srcAttr) { |
|
1430 String context(m_rawAttributeBeforeValue.data(), m_rawAttributeBeforeValue.size()); |
|
1431 if (xssAuditor() && !xssAuditor()->canLoadExternalScriptFromSrc(attributeValue)) |
|
1432 attributeValue = blankURL().string(); |
|
1433 } |
|
1434 |
|
1435 m_currentToken.addAttribute(m_attrName, attributeValue, inViewSourceMode()); |
|
1436 if (inViewSourceMode()) |
|
1437 m_currentToken.addViewSourceChar('v'); |
|
1438 m_dest = m_buffer; |
|
1439 state.setTagState(SearchAttribute); |
|
1440 break; |
|
1441 } |
|
1442 } |
|
1443 |
|
1444 *m_dest++ = curchar; |
|
1445 src.advance(m_lineNumber); |
|
1446 } |
|
1447 break; |
|
1448 case SearchEnd: |
|
1449 { |
|
1450 while (!src.isEmpty()) { |
|
1451 UChar ch = *src; |
|
1452 if (ch == '>' || ch == '<') |
|
1453 break; |
|
1454 if (ch == '/') |
|
1455 m_currentToken.selfClosingTag = true; |
|
1456 if (inViewSourceMode()) |
|
1457 m_currentToken.addViewSourceChar(ch); |
|
1458 src.advance(m_lineNumber); |
|
1459 } |
|
1460 if (src.isEmpty()) |
|
1461 break; |
|
1462 |
|
1463 searchCount = 0; // Stop looking for '<!--' sequence |
|
1464 state.setTagState(NoTag); |
|
1465 tquote = NoQuote; |
|
1466 |
|
1467 if (*src != '<') |
|
1468 src.advance(m_lineNumber); |
|
1469 |
|
1470 if (m_currentToken.tagName == nullAtom) { //stop if tag is unknown |
|
1471 m_cBufferPos = cBufferPos; |
|
1472 return state; |
|
1473 } |
|
1474 |
|
1475 AtomicString tagName = m_currentToken.tagName; |
|
1476 |
|
1477 // Handle <script src="foo"/> like Mozilla/Opera. We have to do this now for Dashboard |
|
1478 // compatibility. |
|
1479 bool isSelfClosingScript = m_currentToken.selfClosingTag && m_currentToken.beginTag && m_currentToken.tagName == scriptTag; |
|
1480 bool beginTag = !m_currentToken.selfClosingTag && m_currentToken.beginTag; |
|
1481 if (m_currentToken.beginTag && m_currentToken.tagName == scriptTag && !inViewSourceMode() && !m_treeBuilder->skipMode()) { |
|
1482 Attribute* a = 0; |
|
1483 m_scriptTagSrcAttrValue = String(); |
|
1484 m_scriptTagCharsetAttrValue = String(); |
|
1485 if (m_currentToken.attrs && !m_fragment) { |
|
1486 if (document()->frame() && document()->frame()->script()->canExecuteScripts(NotAboutToExecuteScript)) { |
|
1487 if ((a = m_currentToken.attrs->getAttributeItem(srcAttr))) |
|
1488 m_scriptTagSrcAttrValue = document()->completeURL(deprecatedParseURL(a->value())).string(); |
|
1489 } |
|
1490 } |
|
1491 } |
|
1492 |
|
1493 RefPtr<Node> n = processToken(); |
|
1494 m_cBufferPos = cBufferPos; |
|
1495 if (n || inViewSourceMode()) { |
|
1496 State savedState = state; |
|
1497 SegmentedString savedSrc = src; |
|
1498 long savedLineno = m_lineNumber; |
|
1499 if ((tagName == preTag || tagName == listingTag) && !inViewSourceMode()) { |
|
1500 if (beginTag) |
|
1501 state.setDiscardLF(true); // Discard the first LF after we open a pre. |
|
1502 } else if (tagName == scriptTag) { |
|
1503 ASSERT(!m_scriptNode); |
|
1504 m_scriptNode = static_pointer_cast<HTMLScriptElement>(n); |
|
1505 if (m_scriptNode) |
|
1506 m_scriptTagCharsetAttrValue = m_scriptNode->scriptCharset(); |
|
1507 if (beginTag) { |
|
1508 m_searchStopper = scriptEnd; |
|
1509 m_searchStopperLength = 8; |
|
1510 state.setInScript(true); |
|
1511 state = parseNonHTMLText(src, state); |
|
1512 } else if (isSelfClosingScript) { // Handle <script src="foo"/> |
|
1513 state.setInScript(true); |
|
1514 state = scriptHandler(state); |
|
1515 } |
|
1516 } else if (tagName == styleTag) { |
|
1517 if (beginTag) { |
|
1518 m_searchStopper = styleEnd; |
|
1519 m_searchStopperLength = 7; |
|
1520 state.setInStyle(true); |
|
1521 state = parseNonHTMLText(src, state); |
|
1522 } |
|
1523 } else if (tagName == textareaTag) { |
|
1524 if (beginTag) { |
|
1525 m_searchStopper = textareaEnd; |
|
1526 m_searchStopperLength = 10; |
|
1527 state.setInTextArea(true); |
|
1528 state = parseNonHTMLText(src, state); |
|
1529 } |
|
1530 } else if (tagName == titleTag) { |
|
1531 if (beginTag) { |
|
1532 m_searchStopper = titleEnd; |
|
1533 m_searchStopperLength = 7; |
|
1534 state.setInTitle(true); |
|
1535 state = parseNonHTMLText(src, state); |
|
1536 } |
|
1537 } else if (tagName == xmpTag) { |
|
1538 if (beginTag) { |
|
1539 m_searchStopper = xmpEnd; |
|
1540 m_searchStopperLength = 5; |
|
1541 state.setInXmp(true); |
|
1542 state = parseNonHTMLText(src, state); |
|
1543 } |
|
1544 } else if (tagName == iframeTag) { |
|
1545 if (beginTag) { |
|
1546 m_searchStopper = iframeEnd; |
|
1547 m_searchStopperLength = 8; |
|
1548 state.setInIFrame(true); |
|
1549 state = parseNonHTMLText(src, state); |
|
1550 } |
|
1551 } |
|
1552 if (src.isEmpty() && (state.inTitle() || inViewSourceMode()) && !state.inComment() && !(state.inScript() && m_currentScriptTagStartLineNumber)) { |
|
1553 // We just ate the rest of the document as the #text node under the special tag! |
|
1554 // Reset the state then retokenize without special handling. |
|
1555 // Let the parser clean up the missing close tag. |
|
1556 // FIXME: This is incorrect, because src.isEmpty() doesn't mean we're |
|
1557 // at the end of the document unless m_noMoreData is also true. We need |
|
1558 // to detect this case elsewhere, and save the state somewhere other |
|
1559 // than a local variable. |
|
1560 state = savedState; |
|
1561 src = savedSrc; |
|
1562 m_lineNumber = savedLineno; |
|
1563 m_scriptCodeSize = 0; |
|
1564 } |
|
1565 } |
|
1566 if (tagName == plaintextTag) |
|
1567 state.setInPlainText(beginTag); |
|
1568 return state; // Finished parsing tag! |
|
1569 } |
|
1570 } // end switch |
|
1571 } |
|
1572 m_cBufferPos = cBufferPos; |
|
1573 return state; |
|
1574 } |
|
1575 |
|
1576 inline bool LegacyHTMLDocumentParser::continueProcessing(int& processedCount, double startTime, State &state) |
|
1577 { |
|
1578 // We don't want to be checking elapsed time with every character, so we only check after we've |
|
1579 // processed a certain number of characters. |
|
1580 bool allowedYield = state.allowYield(); |
|
1581 state.setAllowYield(false); |
|
1582 if (!state.loadingExtScript() && !state.forceSynchronous() && !m_executingScript && (processedCount > m_tokenizerChunkSize || allowedYield)) { |
|
1583 processedCount = 0; |
|
1584 if (currentTime() - startTime > m_tokenizerTimeDelay) { |
|
1585 /* FIXME: We'd like to yield aggressively to give stylesheets the opportunity to |
|
1586 load, but this hurts overall performance on slower machines. For now turn this |
|
1587 off. |
|
1588 || (!document()->haveStylesheetsLoaded() && |
|
1589 (document()->documentElement()->id() != ID_HTML || document()->body()))) {*/ |
|
1590 // Schedule the timer to keep processing as soon as possible. |
|
1591 m_timer.startOneShot(0); |
|
1592 return false; |
|
1593 } |
|
1594 } |
|
1595 |
|
1596 processedCount++; |
|
1597 return true; |
|
1598 } |
|
1599 |
|
1600 // Turns the statemachine one crank using the passed in State object. |
|
1601 // This does not modify m_state directly in order to be reentrant. |
|
1602 ALWAYS_INLINE void LegacyHTMLDocumentParser::advance(State& state) |
|
1603 { |
|
1604 // do we need to enlarge the buffer? |
|
1605 checkBuffer(); |
|
1606 |
|
1607 UChar cc = *m_src; |
|
1608 |
|
1609 bool wasSkipLF = state.skipLF(); |
|
1610 if (wasSkipLF) |
|
1611 state.setSkipLF(false); |
|
1612 |
|
1613 if (wasSkipLF && (cc == '\n')) |
|
1614 m_src.advance(); |
|
1615 else if (state.needsSpecialWriteHandling()) { |
|
1616 // it's important to keep needsSpecialWriteHandling with the flags this block tests |
|
1617 if (state.hasEntityState()) |
|
1618 state = parseEntity(m_src, m_dest, state, m_cBufferPos, false, state.hasTagState()); |
|
1619 else if (state.inPlainText()) |
|
1620 state = parseText(m_src, state); |
|
1621 else if (state.inAnyNonHTMLText()) |
|
1622 state = parseNonHTMLText(m_src, state); |
|
1623 else if (state.inComment()) |
|
1624 state = parseComment(m_src, state); |
|
1625 else if (state.inDoctype()) |
|
1626 state = parseDoctype(m_src, state); |
|
1627 else if (state.inServer()) |
|
1628 state = parseServer(m_src, state); |
|
1629 else if (state.inProcessingInstruction()) |
|
1630 state = parseProcessingInstruction(m_src, state); |
|
1631 else if (state.hasTagState()) |
|
1632 state = parseTag(m_src, state); |
|
1633 else if (state.startTag()) { |
|
1634 state.setStartTag(false); |
|
1635 |
|
1636 switch (cc) { |
|
1637 case '/': |
|
1638 break; |
|
1639 case '!': { |
|
1640 // <!-- comment --> or <!DOCTYPE ...> |
|
1641 searchCount = 1; // Look for '<!--' sequence to start comment or '<!DOCTYPE' sequence to start doctype |
|
1642 m_doctypeSearchCount = 1; |
|
1643 break; |
|
1644 } |
|
1645 case '?': { |
|
1646 // xml processing instruction |
|
1647 state.setInProcessingInstruction(true); |
|
1648 tquote = NoQuote; |
|
1649 state = parseProcessingInstruction(m_src, state); |
|
1650 return; |
|
1651 } |
|
1652 case '%': |
|
1653 if (!m_brokenServer) { |
|
1654 // <% server stuff, handle as comment %> |
|
1655 state.setInServer(true); |
|
1656 tquote = NoQuote; |
|
1657 state = parseServer(m_src, state); |
|
1658 return; |
|
1659 } |
|
1660 // else fall through |
|
1661 default: { |
|
1662 if (((cc >= 'a') && (cc <= 'z')) || ((cc >= 'A') && (cc <= 'Z'))) { |
|
1663 // Start of a Start-Tag |
|
1664 } else { |
|
1665 // Invalid tag |
|
1666 // Add as is |
|
1667 *m_dest = '<'; |
|
1668 m_dest++; |
|
1669 return; |
|
1670 } |
|
1671 } |
|
1672 }; // end case |
|
1673 |
|
1674 processToken(); |
|
1675 |
|
1676 m_cBufferPos = 0; |
|
1677 state.setTagState(TagName); |
|
1678 state = parseTag(m_src, state); |
|
1679 } |
|
1680 } else if (cc == '&' && !m_src.escaped()) { |
|
1681 m_src.advancePastNonNewline(); |
|
1682 state = parseEntity(m_src, m_dest, state, m_cBufferPos, true, state.hasTagState()); |
|
1683 } else if (cc == '<' && !m_src.escaped()) { |
|
1684 m_currentTagStartLineNumber = m_lineNumber; |
|
1685 m_src.advancePastNonNewline(); |
|
1686 state.setStartTag(true); |
|
1687 state.setDiscardLF(false); |
|
1688 } else if (cc == '\n' || cc == '\r') { |
|
1689 if (state.discardLF()) |
|
1690 // Ignore this LF |
|
1691 state.setDiscardLF(false); // We have discarded 1 LF |
|
1692 else { |
|
1693 // Process this LF |
|
1694 *m_dest++ = '\n'; |
|
1695 if (cc == '\r' && !m_src.excludeLineNumbers()) |
|
1696 m_lineNumber++; |
|
1697 } |
|
1698 |
|
1699 /* Check for MS-DOS CRLF sequence */ |
|
1700 if (cc == '\r') |
|
1701 state.setSkipLF(true); |
|
1702 m_src.advance(m_lineNumber); |
|
1703 } else { |
|
1704 state.setDiscardLF(false); |
|
1705 *m_dest++ = cc; |
|
1706 m_src.advancePastNonNewline(); |
|
1707 } |
|
1708 } |
|
1709 |
|
1710 void LegacyHTMLDocumentParser::willWriteHTML(const SegmentedString& source) |
|
1711 { |
|
1712 #if ENABLE(INSPECTOR) |
|
1713 if (InspectorTimelineAgent* timelineAgent = document()->inspectorTimelineAgent()) |
|
1714 timelineAgent->willWriteHTML(source.length(), m_lineNumber); |
|
1715 #endif |
|
1716 } |
|
1717 |
|
1718 void LegacyHTMLDocumentParser::didWriteHTML() |
|
1719 { |
|
1720 #if ENABLE(INSPECTOR) |
|
1721 if (InspectorTimelineAgent* timelineAgent = document()->inspectorTimelineAgent()) |
|
1722 timelineAgent->didWriteHTML(m_lineNumber); |
|
1723 #endif |
|
1724 } |
|
1725 |
|
1726 void LegacyHTMLDocumentParser::write(const SegmentedString& str, bool appendData) |
|
1727 { |
|
1728 if (!m_buffer) |
|
1729 return; |
|
1730 |
|
1731 if (m_parserStopped) |
|
1732 return; |
|
1733 |
|
1734 SegmentedString source(str); |
|
1735 if (m_executingScript) |
|
1736 source.setExcludeLineNumbers(); |
|
1737 |
|
1738 if ((m_executingScript && appendData) || !m_pendingScripts.isEmpty()) { |
|
1739 // don't parse; we will do this later |
|
1740 if (m_currentPrependingSrc) |
|
1741 m_currentPrependingSrc->append(source); |
|
1742 else { |
|
1743 m_pendingSrc.append(source); |
|
1744 #if PRELOAD_SCANNER_ENABLED |
|
1745 if (m_preloadScanner && m_preloadScanner->inProgress() && appendData) |
|
1746 m_preloadScanner->write(source); |
|
1747 #endif |
|
1748 } |
|
1749 return; |
|
1750 } |
|
1751 |
|
1752 #if PRELOAD_SCANNER_ENABLED |
|
1753 if (m_preloadScanner && m_preloadScanner->inProgress() && appendData) |
|
1754 m_preloadScanner->end(); |
|
1755 #endif |
|
1756 |
|
1757 if (!m_src.isEmpty()) |
|
1758 m_src.append(source); |
|
1759 else |
|
1760 setSrc(source); |
|
1761 |
|
1762 // Once a timer is set, it has control of when the parser continues. |
|
1763 if (m_timer.isActive()) |
|
1764 return; |
|
1765 |
|
1766 bool wasInWrite = m_inWrite; |
|
1767 m_inWrite = true; |
|
1768 |
|
1769 willWriteHTML(source); |
|
1770 |
|
1771 Frame* frame = document()->frame(); |
|
1772 State state = m_state; |
|
1773 int processedCount = 0; |
|
1774 double startTime = currentTime(); |
|
1775 |
|
1776 while (!m_src.isEmpty() && (!frame || !frame->redirectScheduler()->locationChangePending())) { |
|
1777 if (!continueProcessing(processedCount, startTime, state)) |
|
1778 break; |
|
1779 advance(state); |
|
1780 } |
|
1781 |
|
1782 didWriteHTML(); |
|
1783 |
|
1784 m_inWrite = wasInWrite; |
|
1785 m_state = state; |
|
1786 |
|
1787 if (m_noMoreData && !m_inWrite && !state.loadingExtScript() && !m_executingScript && !m_timer.isActive()) |
|
1788 end(); // this actually causes us to be deleted |
|
1789 |
|
1790 // After parsing, go ahead and dispatch image beforeload events, but only if we're doing |
|
1791 // document parsing. For document fragments we wait, since they'll likely end up in the document by the time |
|
1792 // the beforeload events fire. |
|
1793 if (!m_fragment) |
|
1794 ImageLoader::dispatchPendingBeforeLoadEvents(); |
|
1795 } |
|
1796 |
|
1797 void LegacyHTMLDocumentParser::insert(const SegmentedString& source) |
|
1798 { |
|
1799 // FIXME: forceSynchronous should always be the same as the bool passed to |
|
1800 // write(). However LegacyHTMLDocumentParser uses write("", false) to pump |
|
1801 // the parser (after running external scripts, etc.) thus necessitating a |
|
1802 // separate state for forceSynchronous. |
|
1803 bool wasForcedSynchronous = forceSynchronous(); |
|
1804 setForceSynchronous(true); |
|
1805 write(source, false); |
|
1806 setForceSynchronous(wasForcedSynchronous); |
|
1807 } |
|
1808 |
|
1809 void LegacyHTMLDocumentParser::append(const SegmentedString& source) |
|
1810 { |
|
1811 write(source, true); |
|
1812 } |
|
1813 |
|
1814 void LegacyHTMLDocumentParser::stopParsing() |
|
1815 { |
|
1816 DocumentParser::stopParsing(); |
|
1817 m_timer.stop(); |
|
1818 |
|
1819 // FIXME: Why is LegacyHTMLDocumentParser the only DocumentParser which calls checkCompleted? |
|
1820 // The FrameLoader needs to know that the parser has finished with its data, |
|
1821 // regardless of whether it happened naturally or due to manual intervention. |
|
1822 if (!m_fragment && document()->frame()) |
|
1823 document()->frame()->loader()->checkCompleted(); |
|
1824 } |
|
1825 |
|
1826 bool LegacyHTMLDocumentParser::processingData() const |
|
1827 { |
|
1828 return m_timer.isActive() || m_inWrite; |
|
1829 } |
|
1830 |
|
1831 void LegacyHTMLDocumentParser::timerFired(Timer<LegacyHTMLDocumentParser>*) |
|
1832 { |
|
1833 if (document()->view() && document()->view()->layoutPending() && !document()->minimumLayoutDelay()) { |
|
1834 // Restart the timer and let layout win. This is basically a way of ensuring that the layout |
|
1835 // timer has higher priority than our timer. |
|
1836 m_timer.startOneShot(0); |
|
1837 return; |
|
1838 } |
|
1839 |
|
1840 // Invoke write() as though more data came in. This might cause us to get deleted. |
|
1841 write(SegmentedString(), true); |
|
1842 } |
|
1843 |
|
1844 void LegacyHTMLDocumentParser::end() |
|
1845 { |
|
1846 ASSERT(!m_timer.isActive()); |
|
1847 m_timer.stop(); // Only helps if assertion above fires, but do it anyway. |
|
1848 |
|
1849 if (m_buffer) { |
|
1850 // parseTag is using the buffer for different matters |
|
1851 if (!m_state.hasTagState()) |
|
1852 processToken(); |
|
1853 |
|
1854 fastFree(m_scriptCode); |
|
1855 m_scriptCode = 0; |
|
1856 m_scriptCodeSize = m_scriptCodeCapacity = m_scriptCodeResync = 0; |
|
1857 |
|
1858 fastFree(m_buffer); |
|
1859 m_buffer = 0; |
|
1860 } |
|
1861 |
|
1862 if (!inViewSourceMode()) |
|
1863 m_treeBuilder->finished(); |
|
1864 else |
|
1865 document()->finishedParsing(); |
|
1866 } |
|
1867 |
|
1868 void LegacyHTMLDocumentParser::finish() |
|
1869 { |
|
1870 // do this as long as we don't find matching comment ends |
|
1871 while ((m_state.inComment() || m_state.inServer()) && m_scriptCode && m_scriptCodeSize) { |
|
1872 // we've found an unmatched comment start |
|
1873 if (m_state.inComment()) |
|
1874 m_brokenComments = true; |
|
1875 else |
|
1876 m_brokenServer = true; |
|
1877 checkScriptBuffer(); |
|
1878 m_scriptCode[m_scriptCodeSize] = 0; |
|
1879 m_scriptCode[m_scriptCodeSize + 1] = 0; |
|
1880 int pos; |
|
1881 String food; |
|
1882 if (m_state.inScript() || m_state.inStyle() || m_state.inTextArea()) |
|
1883 food = String(m_scriptCode, m_scriptCodeSize); |
|
1884 else if (m_state.inServer()) { |
|
1885 food = "<"; |
|
1886 food.append(m_scriptCode, m_scriptCodeSize); |
|
1887 } else { |
|
1888 pos = find(m_scriptCode, m_scriptCodeSize, '>'); |
|
1889 food = String(m_scriptCode + pos + 1, m_scriptCodeSize - pos - 1); |
|
1890 } |
|
1891 fastFree(m_scriptCode); |
|
1892 m_scriptCode = 0; |
|
1893 m_scriptCodeSize = m_scriptCodeCapacity = m_scriptCodeResync = 0; |
|
1894 m_state.setInComment(false); |
|
1895 m_state.setInServer(false); |
|
1896 if (!food.isEmpty()) |
|
1897 write(food, true); |
|
1898 } |
|
1899 // this indicates we will not receive any more data... but if we are waiting on |
|
1900 // an external script to load, we can't finish parsing until that is done |
|
1901 m_noMoreData = true; |
|
1902 if (!m_inWrite && !m_state.loadingExtScript() && !m_executingScript && !m_timer.isActive()) |
|
1903 end(); // this actually causes us to be deleted |
|
1904 } |
|
1905 |
|
1906 bool LegacyHTMLDocumentParser::finishWasCalled() |
|
1907 { |
|
1908 return m_noMoreData; |
|
1909 } |
|
1910 |
|
1911 PassRefPtr<Node> LegacyHTMLDocumentParser::processToken() |
|
1912 { |
|
1913 if (m_dest > m_buffer) { |
|
1914 m_currentToken.text = StringImpl::createStrippingNullCharacters(m_buffer, m_dest - m_buffer); |
|
1915 if (m_currentToken.tagName != commentAtom) |
|
1916 m_currentToken.tagName = textAtom; |
|
1917 } else if (m_currentToken.tagName == nullAtom) { |
|
1918 m_currentToken.reset(); |
|
1919 return 0; |
|
1920 } |
|
1921 |
|
1922 m_dest = m_buffer; |
|
1923 |
|
1924 RefPtr<Node> n; |
|
1925 |
|
1926 if (!m_parserStopped) { |
|
1927 if (NamedNodeMap* map = m_currentToken.attrs.get()) |
|
1928 map->shrinkToLength(); |
|
1929 if (inViewSourceMode()) |
|
1930 static_cast<HTMLViewSourceDocument*>(document())->addViewSourceToken(&m_currentToken); |
|
1931 else |
|
1932 // pass the token over to the parser, the parser DOES NOT delete the token |
|
1933 n = m_treeBuilder->parseToken(&m_currentToken); |
|
1934 } |
|
1935 m_currentToken.reset(); |
|
1936 |
|
1937 return n.release(); |
|
1938 } |
|
1939 |
|
1940 void LegacyHTMLDocumentParser::processDoctypeToken() |
|
1941 { |
|
1942 if (inViewSourceMode()) |
|
1943 static_cast<HTMLViewSourceDocument*>(document())->addViewSourceDoctypeToken(&m_doctypeToken); |
|
1944 else |
|
1945 m_treeBuilder->parseDoctypeToken(&m_doctypeToken); |
|
1946 } |
|
1947 |
|
1948 LegacyHTMLDocumentParser::~LegacyHTMLDocumentParser() |
|
1949 { |
|
1950 ASSERT(!m_inWrite); |
|
1951 reset(); |
|
1952 } |
|
1953 |
|
1954 |
|
1955 void LegacyHTMLDocumentParser::enlargeBuffer(int len) |
|
1956 { |
|
1957 // Resize policy: Always at least double the size of the buffer each time. |
|
1958 int delta = max(len, m_bufferSize); |
|
1959 |
|
1960 // Check for overflow. |
|
1961 // For now, handle overflow the same way we handle fastRealloc failure, with CRASH. |
|
1962 static const int maxSize = INT_MAX / sizeof(UChar); |
|
1963 if (delta > maxSize - m_bufferSize) |
|
1964 CRASH(); |
|
1965 |
|
1966 int newSize = m_bufferSize + delta; |
|
1967 int oldOffset = m_dest - m_buffer; |
|
1968 m_buffer = static_cast<UChar*>(fastRealloc(m_buffer, newSize * sizeof(UChar))); |
|
1969 m_dest = m_buffer + oldOffset; |
|
1970 m_bufferSize = newSize; |
|
1971 } |
|
1972 |
|
1973 void LegacyHTMLDocumentParser::enlargeScriptBuffer(int len) |
|
1974 { |
|
1975 // Resize policy: Always at least double the size of the buffer each time. |
|
1976 int delta = max(len, m_scriptCodeCapacity); |
|
1977 |
|
1978 // Check for overflow. |
|
1979 // For now, handle overflow the same way we handle fastRealloc failure, with CRASH. |
|
1980 static const int maxSize = INT_MAX / sizeof(UChar); |
|
1981 if (delta > maxSize - m_scriptCodeCapacity) |
|
1982 CRASH(); |
|
1983 |
|
1984 int newSize = m_scriptCodeCapacity + delta; |
|
1985 // If we allow fastRealloc(ptr, 0), it will call CRASH(). We run into this |
|
1986 // case if the HTML being parsed begins with "<!--" and there's more data |
|
1987 // coming. |
|
1988 if (!newSize) { |
|
1989 ASSERT(!m_scriptCode); |
|
1990 return; |
|
1991 } |
|
1992 |
|
1993 m_scriptCode = static_cast<UChar*>(fastRealloc(m_scriptCode, newSize * sizeof(UChar))); |
|
1994 m_scriptCodeCapacity = newSize; |
|
1995 } |
|
1996 |
|
1997 void LegacyHTMLDocumentParser::executeScriptsWaitingForStylesheets() |
|
1998 { |
|
1999 ASSERT(document()->haveStylesheetsLoaded()); |
|
2000 |
|
2001 if (m_hasScriptsWaitingForStylesheets) |
|
2002 notifyFinished(0); |
|
2003 } |
|
2004 |
|
2005 void LegacyHTMLDocumentParser::notifyFinished(CachedResource*) |
|
2006 { |
|
2007 executeExternalScriptsIfReady(); |
|
2008 } |
|
2009 |
|
2010 void LegacyHTMLDocumentParser::executeExternalScriptsIfReady() |
|
2011 { |
|
2012 ASSERT(!m_pendingScripts.isEmpty()); |
|
2013 |
|
2014 // Make external scripts wait for external stylesheets. |
|
2015 // FIXME: This needs to be done for inline scripts too. |
|
2016 m_hasScriptsWaitingForStylesheets = !document()->haveStylesheetsLoaded(); |
|
2017 if (m_hasScriptsWaitingForStylesheets) |
|
2018 return; |
|
2019 |
|
2020 bool finished = false; |
|
2021 |
|
2022 double startTime = currentTime(); |
|
2023 while (!finished && m_pendingScripts.first()->isLoaded()) { |
|
2024 if (!continueExecutingExternalScripts(startTime)) |
|
2025 break; |
|
2026 |
|
2027 CachedResourceHandle<CachedScript> cs = m_pendingScripts.takeFirst(); |
|
2028 ASSERT(cache()->disabled() || cs->accessCount() > 0); |
|
2029 |
|
2030 setSrc(SegmentedString()); |
|
2031 |
|
2032 // make sure we forget about the script before we execute the new one |
|
2033 // infinite recursion might happen otherwise |
|
2034 ScriptSourceCode sourceCode(cs.get()); |
|
2035 bool errorOccurred = cs->errorOccurred(); |
|
2036 cs->removeClient(this); |
|
2037 |
|
2038 RefPtr<Node> n = m_scriptNode.release(); |
|
2039 |
|
2040 if (errorOccurred) |
|
2041 n->dispatchEvent(Event::create(eventNames().errorEvent, true, false)); |
|
2042 else { |
|
2043 if (static_cast<HTMLScriptElement*>(n.get())->shouldExecuteAsJavaScript()) |
|
2044 m_state = scriptExecution(sourceCode, m_state); |
|
2045 #if ENABLE(XHTMLMP) |
|
2046 else |
|
2047 document()->setShouldProcessNoscriptElement(true); |
|
2048 #endif |
|
2049 n->dispatchEvent(Event::create(eventNames().loadEvent, false, false)); |
|
2050 } |
|
2051 |
|
2052 // The state of m_pendingScripts.isEmpty() can change inside the scriptExecution() |
|
2053 // call above, so test afterwards. |
|
2054 finished = m_pendingScripts.isEmpty(); |
|
2055 if (finished) { |
|
2056 ASSERT(!m_hasScriptsWaitingForStylesheets); |
|
2057 m_state.setLoadingExtScript(false); |
|
2058 } else if (m_hasScriptsWaitingForStylesheets) { |
|
2059 // m_hasScriptsWaitingForStylesheets flag might have changed during the script execution. |
|
2060 // If it did we are now blocked waiting for stylesheets and should not execute more scripts until they arrive. |
|
2061 finished = true; |
|
2062 } |
|
2063 |
|
2064 // 'm_requestingScript' is true when we are called synchronously from |
|
2065 // scriptHandler(). In that case scriptHandler() will take care |
|
2066 // of m_pendingSrc. |
|
2067 if (!m_requestingScript) { |
|
2068 SegmentedString rest = m_pendingSrc; |
|
2069 m_pendingSrc.clear(); |
|
2070 write(rest, false); |
|
2071 // we might be deleted at this point, do not access any members. |
|
2072 } |
|
2073 } |
|
2074 } |
|
2075 |
|
2076 void LegacyHTMLDocumentParser::executeExternalScriptsTimerFired(Timer<LegacyHTMLDocumentParser>*) |
|
2077 { |
|
2078 if (document()->view() && document()->view()->layoutPending() && !document()->minimumLayoutDelay()) { |
|
2079 // Restart the timer and do layout first. |
|
2080 m_externalScriptsTimer.startOneShot(0); |
|
2081 return; |
|
2082 } |
|
2083 |
|
2084 // Continue executing external scripts. |
|
2085 executeExternalScriptsIfReady(); |
|
2086 } |
|
2087 |
|
2088 bool LegacyHTMLDocumentParser::continueExecutingExternalScripts(double startTime) |
|
2089 { |
|
2090 if (m_externalScriptsTimer.isActive()) |
|
2091 return false; |
|
2092 |
|
2093 if (currentTime() - startTime > m_tokenizerTimeDelay) { |
|
2094 // Schedule the timer to keep processing as soon as possible. |
|
2095 m_externalScriptsTimer.startOneShot(0); |
|
2096 return false; |
|
2097 } |
|
2098 return true; |
|
2099 } |
|
2100 |
|
2101 bool LegacyHTMLDocumentParser::isWaitingForScripts() const |
|
2102 { |
|
2103 return m_state.loadingExtScript(); |
|
2104 } |
|
2105 |
|
2106 void LegacyHTMLDocumentParser::setSrc(const SegmentedString& source) |
|
2107 { |
|
2108 m_src = source; |
|
2109 } |
|
2110 |
|
2111 void LegacyHTMLDocumentParser::parseDocumentFragment(const String& source, DocumentFragment* fragment, FragmentScriptingPermission scriptingPermission) |
|
2112 { |
|
2113 LegacyHTMLDocumentParser parser(fragment, scriptingPermission); |
|
2114 parser.setForceSynchronous(true); |
|
2115 parser.write(source, true); |
|
2116 parser.finish(); |
|
2117 ASSERT(!parser.processingData()); // make sure we're done (see 3963151) |
|
2118 } |
|
2119 |
|
2120 UChar decodeNamedEntity(const char* name) |
|
2121 { |
|
2122 const Entity* e = findEntity(name, strlen(name)); |
|
2123 return e ? e->code : 0; |
|
2124 } |
|
2125 |
|
2126 } |