|
1 /* |
|
2 Copyright (C) 1997 Martin Jones (mjones@kde.org) |
|
3 (C) 1997 Torben Weis (weis@kde.org) |
|
4 (C) 1999,2001 Lars Knoll (knoll@kde.org) |
|
5 (C) 2000,2001 Dirk Mueller (mueller@kde.org) |
|
6 Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. |
|
7 Copyright (C) 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/) |
|
8 |
|
9 This library is free software; you can redistribute it and/or |
|
10 modify it under the terms of the GNU Library General Public |
|
11 License as published by the Free Software Foundation; either |
|
12 version 2 of the License, or (at your option) any later version. |
|
13 |
|
14 This library is distributed in the hope that it will be useful, |
|
15 but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
17 Library General Public License for more details. |
|
18 |
|
19 You should have received a copy of the GNU Library General Public License |
|
20 along with this library; see the file COPYING.LIB. If not, write to |
|
21 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
|
22 Boston, MA 02110-1301, USA. |
|
23 */ |
|
24 |
|
25 #include "config.h" |
|
26 #include "LegacyHTMLTreeBuilder.h" |
|
27 |
|
28 #include "CharacterNames.h" |
|
29 #include "CSSPropertyNames.h" |
|
30 #include "CSSValueKeywords.h" |
|
31 #include "Chrome.h" |
|
32 #include "ChromeClient.h" |
|
33 #include "Comment.h" |
|
34 #include "Console.h" |
|
35 #include "DOMWindow.h" |
|
36 #include "DocumentFragment.h" |
|
37 #include "DocumentType.h" |
|
38 #include "Frame.h" |
|
39 #include "HTMLBodyElement.h" |
|
40 #include "HTMLDocument.h" |
|
41 #include "HTMLDivElement.h" |
|
42 #include "HTMLDListElement.h" |
|
43 #include "HTMLElementFactory.h" |
|
44 #include "HTMLFormElement.h" |
|
45 #include "HTMLHeadElement.h" |
|
46 #include "HTMLHRElement.h" |
|
47 #include "HTMLHtmlElement.h" |
|
48 #include "HTMLIsIndexElement.h" |
|
49 #include "HTMLMapElement.h" |
|
50 #include "HTMLNames.h" |
|
51 #include "HTMLParserQuirks.h" |
|
52 #include "HTMLTableCellElement.h" |
|
53 #include "HTMLTableRowElement.h" |
|
54 #include "HTMLTableSectionElement.h" |
|
55 #include "LegacyHTMLDocumentParser.h" |
|
56 #include "LocalizedStrings.h" |
|
57 #include "Page.h" |
|
58 #include "Settings.h" |
|
59 #include "Text.h" |
|
60 #include "TreeDepthLimit.h" |
|
61 #include <wtf/StdLibExtras.h> |
|
62 #include <wtf/dtoa.h> |
|
63 |
|
64 namespace WebCore { |
|
65 |
|
66 using namespace HTMLNames; |
|
67 |
|
68 static const unsigned cMaxRedundantTagDepth = 20; |
|
69 static const unsigned cResidualStyleMaxDepth = 200; |
|
70 static const unsigned cResidualStyleIterationLimit = 10; |
|
71 |
|
72 |
|
73 static const int minBlockLevelTagPriority = 3; |
|
74 |
|
75 // A cap on the number of tags with priority minBlockLevelTagPriority or higher |
|
76 // allowed in m_blockStack. The cap is enforced by adding such new elements as |
|
77 // siblings instead of children once it is reached. |
|
78 static const size_t cMaxBlockDepth = 4096; |
|
79 |
|
80 |
|
81 typedef HashSet<AtomicStringImpl*> TagNameSet; |
|
82 |
|
83 template< size_t ArraySize > |
|
84 static void addTags(TagNameSet& set, QualifiedName (&names)[ArraySize]) |
|
85 { |
|
86 for (size_t x = 0; x < ArraySize; x++) { |
|
87 const QualifiedName& name = names[x]; |
|
88 set.add(name.localName().impl()); |
|
89 } |
|
90 } |
|
91 |
|
92 struct HTMLStackElem : Noncopyable { |
|
93 HTMLStackElem(const AtomicString& t, int lvl, Node* n, bool r, HTMLStackElem* nx) |
|
94 : tagName(t) |
|
95 , level(lvl) |
|
96 , strayTableContent(false) |
|
97 , node(n) |
|
98 , didRefNode(r) |
|
99 , next(nx) |
|
100 { |
|
101 } |
|
102 |
|
103 void derefNode() |
|
104 { |
|
105 if (didRefNode) |
|
106 node->deref(); |
|
107 } |
|
108 |
|
109 AtomicString tagName; |
|
110 int level; |
|
111 bool strayTableContent; |
|
112 Node* node; |
|
113 bool didRefNode; |
|
114 HTMLStackElem* next; |
|
115 }; |
|
116 |
|
117 /** |
|
118 * The parser parses tokenized input into the document, building up the |
|
119 * document tree. If the document is well-formed, parsing it is straightforward. |
|
120 * |
|
121 * Unfortunately, we have to handle many HTML documents that are not well-formed, |
|
122 * so the parser has to be tolerant about errors. |
|
123 * |
|
124 * We have to take care of at least the following error conditions: |
|
125 * |
|
126 * 1. The element being added is explicitly forbidden inside some outer tag. |
|
127 * In this case we should close all tags up to the one, which forbids |
|
128 * the element, and add it afterwards. |
|
129 * |
|
130 * 2. We are not allowed to add the element directly. It could be that |
|
131 * the person writing the document forgot some tag in between (or that the |
|
132 * tag in between is optional). This could be the case with the following |
|
133 * tags: HTML HEAD BODY TBODY TR TD LI (did I forget any?). |
|
134 * |
|
135 * 3. We want to add a block element inside to an inline element. Close all |
|
136 * inline elements up to the next higher block element. |
|
137 * |
|
138 * 4. If this doesn't help, close elements until we are allowed to add the |
|
139 * element or ignore the tag. |
|
140 * |
|
141 */ |
|
142 |
|
143 LegacyHTMLTreeBuilder::LegacyHTMLTreeBuilder(HTMLDocument* doc, bool reportErrors) |
|
144 : m_document(doc) |
|
145 , m_current(doc) |
|
146 , m_didRefCurrent(false) |
|
147 , m_blockStack(0) |
|
148 , m_blocksInStack(0) |
|
149 , m_treeDepth(0) |
|
150 , m_hasPElementInScope(NotInScope) |
|
151 , m_inBody(false) |
|
152 , m_haveContent(false) |
|
153 , m_haveFrameSet(false) |
|
154 , m_isParsingFragment(false) |
|
155 , m_reportErrors(reportErrors) |
|
156 , m_handlingResidualStyleAcrossBlocks(false) |
|
157 , m_inStrayTableContent(0) |
|
158 , m_scriptingPermission(FragmentScriptingAllowed) |
|
159 , m_parserQuirks(m_document->page() ? m_document->page()->chrome()->client()->createHTMLParserQuirks() : 0) |
|
160 { |
|
161 } |
|
162 |
|
163 LegacyHTMLTreeBuilder::LegacyHTMLTreeBuilder(DocumentFragment* frag, FragmentScriptingPermission scriptingPermission) |
|
164 : m_document(frag->document()) |
|
165 , m_current(frag) |
|
166 , m_didRefCurrent(true) |
|
167 , m_blockStack(0) |
|
168 , m_blocksInStack(0) |
|
169 , m_treeDepth(0) |
|
170 , m_hasPElementInScope(NotInScope) |
|
171 , m_inBody(true) |
|
172 , m_haveContent(false) |
|
173 , m_haveFrameSet(false) |
|
174 , m_isParsingFragment(true) |
|
175 , m_reportErrors(false) |
|
176 , m_handlingResidualStyleAcrossBlocks(false) |
|
177 , m_inStrayTableContent(0) |
|
178 , m_scriptingPermission(scriptingPermission) |
|
179 , m_parserQuirks(m_document->page() ? m_document->page()->chrome()->client()->createHTMLParserQuirks() : 0) |
|
180 { |
|
181 if (frag) |
|
182 frag->ref(); |
|
183 } |
|
184 |
|
185 LegacyHTMLTreeBuilder::~LegacyHTMLTreeBuilder() |
|
186 { |
|
187 freeBlock(); |
|
188 if (m_didRefCurrent) |
|
189 m_current->deref(); |
|
190 } |
|
191 |
|
192 void LegacyHTMLTreeBuilder::reset() |
|
193 { |
|
194 ASSERT(!m_isParsingFragment); |
|
195 |
|
196 setCurrent(m_document); |
|
197 |
|
198 freeBlock(); |
|
199 |
|
200 m_treeDepth = 0; |
|
201 m_inBody = false; |
|
202 m_haveFrameSet = false; |
|
203 m_haveContent = false; |
|
204 m_inStrayTableContent = 0; |
|
205 |
|
206 m_currentFormElement = 0; |
|
207 m_currentMapElement = 0; |
|
208 m_head = 0; |
|
209 m_isindexElement = 0; |
|
210 |
|
211 m_skipModeTag = nullAtom; |
|
212 |
|
213 if (m_parserQuirks) |
|
214 m_parserQuirks->reset(); |
|
215 } |
|
216 |
|
217 void LegacyHTMLTreeBuilder::setCurrent(Node* newCurrent) |
|
218 { |
|
219 bool didRefNewCurrent = newCurrent && newCurrent != m_document; |
|
220 if (didRefNewCurrent) |
|
221 newCurrent->ref(); |
|
222 if (m_didRefCurrent) |
|
223 m_current->deref(); |
|
224 m_current = newCurrent; |
|
225 m_didRefCurrent = didRefNewCurrent; |
|
226 } |
|
227 |
|
228 inline static int tagPriorityOfNode(Node* n) |
|
229 { |
|
230 return n->isHTMLElement() ? static_cast<HTMLElement*>(n)->tagPriority() : 0; |
|
231 } |
|
232 |
|
233 inline void LegacyHTMLTreeBuilder::limitDepth(int tagPriority) |
|
234 { |
|
235 while (m_treeDepth >= maxDOMTreeDepth) |
|
236 popBlock(m_blockStack->tagName); |
|
237 if (tagPriority >= minBlockLevelTagPriority) { |
|
238 while (m_blocksInStack >= cMaxBlockDepth) |
|
239 popBlock(m_blockStack->tagName); |
|
240 } |
|
241 } |
|
242 |
|
243 inline bool LegacyHTMLTreeBuilder::insertNodeAfterLimitDepth(Node* n, bool flat) |
|
244 { |
|
245 limitDepth(tagPriorityOfNode(n)); |
|
246 return insertNode(n, flat); |
|
247 } |
|
248 |
|
249 PassRefPtr<Node> LegacyHTMLTreeBuilder::parseToken(Token* t) |
|
250 { |
|
251 if (!m_skipModeTag.isNull()) { |
|
252 if (!t->beginTag && t->tagName == m_skipModeTag) |
|
253 // Found the end tag for the current skip mode, so we're done skipping. |
|
254 m_skipModeTag = nullAtom; |
|
255 else if (m_current->localName() == t->tagName) |
|
256 // Do not skip </iframe>. |
|
257 // FIXME: What does that comment mean? How can it be right to parse a token without clearing m_skipModeTag? |
|
258 ; |
|
259 else |
|
260 return 0; |
|
261 } |
|
262 |
|
263 // Apparently some sites use </br> instead of <br>. Be compatible with IE and Firefox and treat this like <br>. |
|
264 if (t->isCloseTag(brTag) && m_document->inCompatMode()) { |
|
265 reportError(MalformedBRError); |
|
266 t->beginTag = true; |
|
267 } |
|
268 |
|
269 if (!t->beginTag) { |
|
270 processCloseTag(t); |
|
271 return 0; |
|
272 } |
|
273 |
|
274 // Ignore spaces, if we're not inside a paragraph or other inline code. |
|
275 // Do not alter the text if it is part of a scriptTag. |
|
276 if (t->tagName == textAtom && t->text && m_current->localName() != scriptTag) { |
|
277 if (m_inBody && !skipMode() && m_current->localName() != styleTag && |
|
278 m_current->localName() != titleTag && !t->text->containsOnlyWhitespace()) |
|
279 m_haveContent = true; |
|
280 |
|
281 // HTML5 requires text node coalescing. |
|
282 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#insert-a-character |
|
283 Node* previousChild = m_current->lastChild(); |
|
284 if (previousChild && previousChild->isTextNode()) { |
|
285 // Only coalesce text nodes if the text node wouldn't be foster parented. |
|
286 if (!m_current->hasTagName(htmlTag) |
|
287 && !m_current->hasTagName(tableTag) |
|
288 && !m_current->hasTagName(trTag) |
|
289 && !m_current->hasTagName(theadTag) |
|
290 && !m_current->hasTagName(tbodyTag) |
|
291 && !m_current->hasTagName(tfootTag) |
|
292 && !m_current->hasTagName(titleTag)) { |
|
293 // Technically we're only supposed to merge into the previous |
|
294 // text node if it was the last node inserted by the parser. |
|
295 // (This was a spec modification made to make it easier for |
|
296 // mozilla to run their parser in a thread.) |
|
297 // In practice it does not seem to matter. |
|
298 CharacterData* textNode = static_cast<CharacterData*>(previousChild); |
|
299 textNode->parserAppendData(t->text); |
|
300 return textNode; |
|
301 } |
|
302 } |
|
303 |
|
304 RefPtr<Node> n; |
|
305 String text = t->text.get(); |
|
306 unsigned charsLeft = text.length(); |
|
307 while (charsLeft) { |
|
308 // split large blocks of text to nodes of manageable size |
|
309 n = Text::createWithLengthLimit(m_document, text, charsLeft); |
|
310 if (!insertNodeAfterLimitDepth(n.get(), t->selfClosingTag)) |
|
311 return 0; |
|
312 } |
|
313 return n; |
|
314 } |
|
315 |
|
316 RefPtr<Node> n = getNode(t); |
|
317 // just to be sure, and to catch currently unimplemented stuff |
|
318 if (!n) |
|
319 return 0; |
|
320 |
|
321 // set attributes |
|
322 if (n->isHTMLElement()) { |
|
323 HTMLElement* e = static_cast<HTMLElement*>(n.get()); |
|
324 if (m_scriptingPermission == FragmentScriptingAllowed || t->tagName != scriptTag) |
|
325 e->setAttributeMap(t->attrs.get(), m_scriptingPermission); |
|
326 |
|
327 // take care of optional close tags |
|
328 if (e->endTagRequirement() == TagStatusOptional) |
|
329 popBlock(t->tagName); |
|
330 |
|
331 // If the node does not have a forbidden end tag requirement, and if the broken XML self-closing |
|
332 // syntax was used, report an error. |
|
333 if (t->brokenXMLStyle && e->endTagRequirement() != TagStatusForbidden) { |
|
334 if (t->tagName == scriptTag) |
|
335 reportError(IncorrectXMLCloseScriptWarning); |
|
336 else |
|
337 reportError(IncorrectXMLSelfCloseError, &t->tagName); |
|
338 } |
|
339 } |
|
340 |
|
341 if (!insertNodeAfterLimitDepth(n.get(), t->selfClosingTag)) { |
|
342 // we couldn't insert the node |
|
343 |
|
344 if (n->isElementNode()) { |
|
345 Element* e = static_cast<Element*>(n.get()); |
|
346 e->setAttributeMap(0); |
|
347 } |
|
348 |
|
349 if (m_currentMapElement == n) |
|
350 m_currentMapElement = 0; |
|
351 |
|
352 if (m_currentFormElement == n) |
|
353 m_currentFormElement = 0; |
|
354 |
|
355 if (m_head == n) |
|
356 m_head = 0; |
|
357 |
|
358 return 0; |
|
359 } |
|
360 return n; |
|
361 } |
|
362 |
|
363 void LegacyHTMLTreeBuilder::parseDoctypeToken(DoctypeToken* t) |
|
364 { |
|
365 // Ignore any doctype after the first. Ignore doctypes in fragments. |
|
366 if (m_document->doctype() || m_isParsingFragment || m_current != m_document) |
|
367 return; |
|
368 |
|
369 // Make a new doctype node and set it as our doctype. |
|
370 m_document->legacyParserAddChild(DocumentType::create(m_document, String::adopt(t->m_name), String::adopt(t->m_publicID), String::adopt(t->m_systemID))); |
|
371 if (t->m_forceQuirks) |
|
372 m_document->setParseMode(Document::Compat); |
|
373 } |
|
374 |
|
375 static bool isTableSection(const Node* n) |
|
376 { |
|
377 return n->hasTagName(tbodyTag) || n->hasTagName(tfootTag) || n->hasTagName(theadTag); |
|
378 } |
|
379 |
|
380 static bool isTablePart(const Node* n) |
|
381 { |
|
382 return n->hasTagName(trTag) || n->hasTagName(tdTag) || n->hasTagName(thTag) |
|
383 || isTableSection(n); |
|
384 } |
|
385 |
|
386 static bool isTableRelated(const Node* n) |
|
387 { |
|
388 return n->hasTagName(tableTag) || isTablePart(n); |
|
389 } |
|
390 |
|
391 static bool isScopingTag(const AtomicString& tagName) |
|
392 { |
|
393 return tagName == appletTag || tagName == captionTag || tagName == tdTag |
|
394 || tagName == thTag || tagName == buttonTag || tagName == marqueeTag |
|
395 || tagName == objectTag || tagName == tableTag || tagName == htmlTag; |
|
396 } |
|
397 |
|
398 bool LegacyHTMLTreeBuilder::insertNode(Node* n, bool flat) |
|
399 { |
|
400 RefPtr<Node> protectNode(n); |
|
401 |
|
402 const AtomicString& localName = n->localName(); |
|
403 |
|
404 // <table> is never allowed inside stray table content. Always pop out of the stray table content |
|
405 // and close up the first table, and then start the second table as a sibling. |
|
406 if (m_inStrayTableContent && localName == tableTag) |
|
407 popBlock(tableTag); |
|
408 |
|
409 if (m_parserQuirks && !m_parserQuirks->shouldInsertNode(m_current, n)) |
|
410 return false; |
|
411 |
|
412 int tagPriority = tagPriorityOfNode(n); |
|
413 |
|
414 // let's be stupid and just try to insert it. |
|
415 // this should work if the document is well-formed |
|
416 Node* newNode = m_current->legacyParserAddChild(n); |
|
417 if (!newNode) |
|
418 return handleError(n, flat, localName, tagPriority); // Try to handle the error. |
|
419 |
|
420 // don't push elements without end tags (e.g., <img>) on the stack |
|
421 bool parentAttached = m_current->attached(); |
|
422 if (tagPriority > 0 && !flat) { |
|
423 if (newNode == m_current) { |
|
424 // This case should only be hit when a demoted <form> is placed inside a table. |
|
425 ASSERT(localName == formTag); |
|
426 reportError(FormInsideTablePartError, &m_current->localName()); |
|
427 HTMLFormElement* form = static_cast<HTMLFormElement*>(n); |
|
428 form->setDemoted(true); |
|
429 } else { |
|
430 // The pushBlock function transfers ownership of current to the block stack |
|
431 // so we're guaranteed that m_didRefCurrent is false. The code below is an |
|
432 // optimized version of setCurrent that takes advantage of that fact and also |
|
433 // assumes that newNode is neither 0 nor a pointer to the document. |
|
434 pushBlock(localName, tagPriority); |
|
435 newNode->beginParsingChildren(); |
|
436 ASSERT(!m_didRefCurrent); |
|
437 newNode->ref(); |
|
438 m_current = newNode; |
|
439 m_didRefCurrent = true; |
|
440 } |
|
441 if (parentAttached && !n->attached() && !m_isParsingFragment) |
|
442 n->attach(); |
|
443 } else { |
|
444 if (parentAttached && !n->attached() && !m_isParsingFragment) |
|
445 n->attach(); |
|
446 n->finishParsingChildren(); |
|
447 } |
|
448 |
|
449 if (localName == htmlTag && m_document->frame() && !m_isParsingFragment) |
|
450 m_document->frame()->loader()->dispatchDocumentElementAvailable(); |
|
451 |
|
452 return true; |
|
453 } |
|
454 |
|
455 bool LegacyHTMLTreeBuilder::handleError(Node* n, bool flat, const AtomicString& localName, int tagPriority) |
|
456 { |
|
457 // Error handling code. This is just ad hoc handling of specific parent/child combinations. |
|
458 bool handled = false; |
|
459 |
|
460 // 1. Check out the element's tag name to decide how to deal with errors. |
|
461 if (n->isHTMLElement()) { |
|
462 HTMLElement* h = static_cast<HTMLElement*>(n); |
|
463 if (h->hasLocalName(trTag) || h->hasLocalName(thTag) || h->hasLocalName(tdTag)) { |
|
464 if (m_inStrayTableContent && !isTableRelated(m_current)) { |
|
465 reportError(MisplacedTablePartError, &localName, &m_current->localName()); |
|
466 // pop out to the nearest enclosing table-related tag. |
|
467 while (m_blockStack && !isTableRelated(m_current)) |
|
468 popOneBlock(); |
|
469 return insertNode(n); |
|
470 } |
|
471 } else if (h->hasLocalName(headTag)) { |
|
472 if (!m_current->isDocumentNode() && !m_current->hasTagName(htmlTag)) { |
|
473 reportError(MisplacedHeadError); |
|
474 return false; |
|
475 } |
|
476 } else if (h->hasLocalName(metaTag) || h->hasLocalName(linkTag) || h->hasLocalName(baseTag)) { |
|
477 bool createdHead = false; |
|
478 if (!m_head) { |
|
479 createHead(); |
|
480 createdHead = true; |
|
481 } |
|
482 if (m_head) { |
|
483 if (!createdHead) |
|
484 reportError(MisplacedHeadContentError, &localName, &m_current->localName()); |
|
485 if (m_head->legacyParserAddChild(n)) { |
|
486 if (!n->attached() && !m_isParsingFragment) |
|
487 n->attach(); |
|
488 return true; |
|
489 } |
|
490 return false; |
|
491 } |
|
492 } else if (h->hasLocalName(htmlTag)) { |
|
493 if (!m_current->isDocumentNode() ) { |
|
494 if (m_document->documentElement() && m_document->documentElement()->hasTagName(htmlTag) && !m_isParsingFragment) { |
|
495 reportError(RedundantHTMLBodyError, &localName); |
|
496 // we have another <HTML> element.... apply attributes to existing one |
|
497 // make sure we don't overwrite already existing attributes |
|
498 NamedNodeMap* map = static_cast<Element*>(n)->attributes(true); |
|
499 Element* existingHTML = static_cast<Element*>(m_document->documentElement()); |
|
500 NamedNodeMap* bmap = existingHTML->attributes(false); |
|
501 for (unsigned l = 0; map && l < map->length(); ++l) { |
|
502 Attribute* it = map->attributeItem(l); |
|
503 if (!bmap->getAttributeItem(it->name())) |
|
504 existingHTML->setAttribute(it->name(), it->value()); |
|
505 } |
|
506 } |
|
507 return false; |
|
508 } |
|
509 } else if (h->hasLocalName(titleTag) || h->hasLocalName(styleTag) || h->hasLocalName(scriptTag)) { |
|
510 bool createdHead = false; |
|
511 if (!m_head) { |
|
512 createHead(); |
|
513 createdHead = true; |
|
514 } |
|
515 if (m_head) { |
|
516 Node* newNode = m_head->legacyParserAddChild(n); |
|
517 if (!newNode) { |
|
518 setSkipMode(h->tagQName()); |
|
519 return false; |
|
520 } |
|
521 |
|
522 if (!createdHead) |
|
523 reportError(MisplacedHeadContentError, &localName, &m_current->localName()); |
|
524 |
|
525 pushBlock(localName, tagPriority); |
|
526 newNode->beginParsingChildren(); |
|
527 setCurrent(newNode); |
|
528 if (!n->attached() && !m_isParsingFragment) |
|
529 n->attach(); |
|
530 return true; |
|
531 } |
|
532 if (m_inBody) { |
|
533 setSkipMode(h->tagQName()); |
|
534 return false; |
|
535 } |
|
536 } else if (h->hasLocalName(bodyTag)) { |
|
537 if (m_inBody && m_document->body() && !m_isParsingFragment) { |
|
538 // we have another <BODY> element.... apply attributes to existing one |
|
539 // make sure we don't overwrite already existing attributes |
|
540 // some sites use <body bgcolor=rightcolor>...<body bgcolor=wrongcolor> |
|
541 reportError(RedundantHTMLBodyError, &localName); |
|
542 NamedNodeMap* map = static_cast<Element*>(n)->attributes(true); |
|
543 Element* existingBody = m_document->body(); |
|
544 NamedNodeMap* bmap = existingBody->attributes(false); |
|
545 for (unsigned l = 0; map && l < map->length(); ++l) { |
|
546 Attribute* it = map->attributeItem(l); |
|
547 if (!bmap->getAttributeItem(it->name())) |
|
548 existingBody->setAttribute(it->name(), it->value()); |
|
549 } |
|
550 return false; |
|
551 } else if (!m_current->isDocumentNode()) |
|
552 return false; |
|
553 } else if (h->hasLocalName(areaTag)) { |
|
554 if (m_currentMapElement) { |
|
555 reportError(MisplacedAreaError, &m_current->localName()); |
|
556 m_currentMapElement->legacyParserAddChild(n); |
|
557 if (!n->attached() && !m_isParsingFragment) |
|
558 n->attach(); |
|
559 handled = true; |
|
560 return true; |
|
561 } |
|
562 return false; |
|
563 } else if (h->hasLocalName(colgroupTag) || h->hasLocalName(captionTag)) { |
|
564 if (isTableRelated(m_current)) { |
|
565 while (m_blockStack && isTablePart(m_current)) |
|
566 popOneBlock(); |
|
567 return insertNode(n); |
|
568 } |
|
569 } |
|
570 } else if (n->isCommentNode() && !m_head) |
|
571 return false; |
|
572 |
|
573 // 2. Next we examine our currently active element to do some further error handling. |
|
574 if (m_current->isHTMLElement()) { |
|
575 HTMLElement* h = static_cast<HTMLElement*>(m_current); |
|
576 const AtomicString& currentTagName = h->localName(); |
|
577 if (h->hasLocalName(htmlTag)) { |
|
578 HTMLElement* elt = n->isHTMLElement() ? static_cast<HTMLElement*>(n) : 0; |
|
579 if (elt && (elt->hasLocalName(scriptTag) || elt->hasLocalName(styleTag) || |
|
580 elt->hasLocalName(metaTag) || elt->hasLocalName(linkTag) || |
|
581 elt->hasLocalName(objectTag) || elt->hasLocalName(embedTag) || |
|
582 elt->hasLocalName(titleTag) || elt->hasLocalName(isindexTag) || |
|
583 elt->hasLocalName(baseTag))) { |
|
584 if (!m_head) { |
|
585 m_head = HTMLHeadElement::create(m_document); |
|
586 insertNode(m_head.get()); |
|
587 handled = true; |
|
588 } |
|
589 } else { |
|
590 if (n->isTextNode()) { |
|
591 Text* t = static_cast<Text*>(n); |
|
592 if (t->containsOnlyWhitespace()) { |
|
593 if (m_head && !m_inBody) { |
|
594 // We're between </head> and <body>. According to |
|
595 // the HTML5 parsing algorithm, we're supposed to |
|
596 // insert whitespace text nodes into the HTML element. |
|
597 ExceptionCode ec; |
|
598 m_current->appendChild(n, ec); |
|
599 return true; |
|
600 } |
|
601 return false; |
|
602 } |
|
603 } |
|
604 if (!m_haveFrameSet) { |
|
605 // Ensure that head exists. |
|
606 // But not for older versions of Mail, where the implicit <head> isn't expected - <rdar://problem/6863795> |
|
607 if (!m_isParsingFragment && shouldCreateImplicitHead(m_document)) |
|
608 createHead(); |
|
609 |
|
610 popBlock(headTag); |
|
611 startBody(); |
|
612 insertNode(HTMLBodyElement::create(m_document).get()); |
|
613 handled = true; |
|
614 } else |
|
615 reportError(MisplacedFramesetContentError, &localName); |
|
616 } |
|
617 } else if (h->hasLocalName(headTag)) { |
|
618 if (n->hasTagName(htmlTag)) |
|
619 return false; |
|
620 else { |
|
621 // This means the body starts here... |
|
622 if (!m_haveFrameSet) { |
|
623 ASSERT(currentTagName == headTag); |
|
624 popBlock(currentTagName); |
|
625 startBody(); |
|
626 insertNode(HTMLBodyElement::create(m_document).get()); |
|
627 handled = true; |
|
628 } else |
|
629 reportError(MisplacedFramesetContentError, &localName); |
|
630 } |
|
631 } else if (h->hasLocalName(addressTag) || h->hasLocalName(fontTag) |
|
632 || h->hasLocalName(styleTag) || h->hasLocalName(titleTag)) { |
|
633 reportError(MisplacedContentRetryError, &localName, ¤tTagName); |
|
634 popBlock(currentTagName); |
|
635 handled = true; |
|
636 } else if (h->hasLocalName(captionTag)) { |
|
637 // Illegal content in a caption. Close the caption and try again. |
|
638 reportError(MisplacedCaptionContentError, &localName); |
|
639 popBlock(currentTagName); |
|
640 if (isTablePart(n)) |
|
641 return insertNode(n, flat); |
|
642 } else if (h->hasLocalName(tableTag) || h->hasLocalName(trTag) || isTableSection(h)) { |
|
643 if (n->hasTagName(tableTag)) { |
|
644 reportError(MisplacedTableError, ¤tTagName); |
|
645 if (m_isParsingFragment && !h->hasLocalName(tableTag)) |
|
646 // fragment may contain table parts without <table> ancestor, pop them one by one |
|
647 popBlock(h->localName()); |
|
648 popBlock(localName); // end the table |
|
649 handled = true; // ...and start a new one |
|
650 } else { |
|
651 ExceptionCode ec = 0; |
|
652 Node* node = m_current; |
|
653 Node* parent = node->parentNode(); |
|
654 // A script may have removed the current node's parent from the DOM |
|
655 // http://bugs.webkit.org/show_bug.cgi?id=7137 |
|
656 // FIXME: we should do real recovery here and re-parent with the correct node. |
|
657 if (!parent) |
|
658 return false; |
|
659 Node* grandparent = parent->parentNode(); |
|
660 |
|
661 if (n->isTextNode() || |
|
662 (h->hasLocalName(trTag) && |
|
663 isTableSection(parent) && grandparent && grandparent->hasTagName(tableTag)) || |
|
664 ((!n->hasTagName(tdTag) && !n->hasTagName(thTag) && |
|
665 !n->hasTagName(formTag) && !n->hasTagName(scriptTag)) && isTableSection(node) && |
|
666 parent->hasTagName(tableTag))) { |
|
667 node = (node->hasTagName(tableTag)) ? node : |
|
668 ((node->hasTagName(trTag)) ? grandparent : parent); |
|
669 // This can happen with fragments |
|
670 if (!node) |
|
671 return false; |
|
672 Node* parent = node->parentNode(); |
|
673 if (!parent) |
|
674 return false; |
|
675 parent->insertBefore(n, node, ec); |
|
676 if (!ec) { |
|
677 reportError(StrayTableContentError, &localName, ¤tTagName); |
|
678 if (n->isHTMLElement() && tagPriority > 0 && |
|
679 !flat && static_cast<HTMLElement*>(n)->endTagRequirement() != TagStatusForbidden) |
|
680 { |
|
681 pushBlock(localName, tagPriority); |
|
682 n->beginParsingChildren(); |
|
683 setCurrent(n); |
|
684 m_inStrayTableContent++; |
|
685 m_blockStack->strayTableContent = true; |
|
686 } |
|
687 return true; |
|
688 } |
|
689 } |
|
690 |
|
691 if (!ec) { |
|
692 if (m_current->hasTagName(trTag)) { |
|
693 reportError(TablePartRequiredError, &localName, &tdTag.localName()); |
|
694 insertNode(HTMLTableCellElement::create(tdTag, m_document).get()); |
|
695 } else if (m_current->hasTagName(tableTag)) { |
|
696 // Don't report an error in this case, since making a <tbody> happens all the time when you have <table><tr>, |
|
697 // and it isn't really a parse error per se. |
|
698 insertNode(HTMLTableSectionElement::create(tbodyTag, m_document).get()); |
|
699 } else { |
|
700 reportError(TablePartRequiredError, &localName, &trTag.localName()); |
|
701 insertNode(HTMLTableRowElement::create(m_document).get()); |
|
702 } |
|
703 handled = true; |
|
704 } |
|
705 } |
|
706 } else if (h->hasLocalName(objectTag)) { |
|
707 reportError(MisplacedContentRetryError, &localName, ¤tTagName); |
|
708 popBlock(objectTag); |
|
709 handled = true; |
|
710 } else if (h->hasLocalName(pTag) || isHeadingTag(currentTagName)) { |
|
711 if (!isInline(n)) { |
|
712 popBlock(currentTagName); |
|
713 handled = true; |
|
714 } |
|
715 } else if (h->hasLocalName(optionTag) || h->hasLocalName(optgroupTag)) { |
|
716 if (localName == optgroupTag) { |
|
717 popBlock(currentTagName); |
|
718 handled = true; |
|
719 } else if (localName == selectTag) { |
|
720 // IE treats a nested select as </select>. Let's do the same |
|
721 popBlock(localName); |
|
722 } |
|
723 } else if (h->hasLocalName(selectTag)) { |
|
724 if (localName == inputTag || localName == textareaTag) { |
|
725 reportError(MisplacedContentRetryError, &localName, ¤tTagName); |
|
726 popBlock(currentTagName); |
|
727 handled = true; |
|
728 } |
|
729 } else if (h->hasLocalName(colgroupTag)) { |
|
730 popBlock(currentTagName); |
|
731 handled = true; |
|
732 } else if (!h->hasLocalName(bodyTag)) { |
|
733 if (isInline(m_current)) { |
|
734 popInlineBlocks(); |
|
735 handled = true; |
|
736 } |
|
737 } |
|
738 } else if (m_current->isDocumentNode()) { |
|
739 if (n->isTextNode()) { |
|
740 Text* t = static_cast<Text*>(n); |
|
741 if (t->containsOnlyWhitespace()) |
|
742 return false; |
|
743 } |
|
744 |
|
745 if (!m_document->documentElement()) { |
|
746 insertNode(HTMLHtmlElement::create(m_document).get()); |
|
747 handled = true; |
|
748 } |
|
749 } |
|
750 |
|
751 // 3. If we couldn't handle the error, just return false and attempt to error-correct again. |
|
752 if (!handled) { |
|
753 reportError(IgnoredContentError, &localName, &m_current->localName()); |
|
754 return false; |
|
755 } |
|
756 return insertNode(n); |
|
757 } |
|
758 |
|
759 typedef bool (LegacyHTMLTreeBuilder::*CreateErrorCheckFunc)(Token* t, RefPtr<Node>&); |
|
760 typedef HashMap<AtomicStringImpl*, CreateErrorCheckFunc> FunctionMap; |
|
761 |
|
762 bool LegacyHTMLTreeBuilder::textCreateErrorCheck(Token* t, RefPtr<Node>& result) |
|
763 { |
|
764 result = Text::create(m_document, t->text.get()); |
|
765 return false; |
|
766 } |
|
767 |
|
768 bool LegacyHTMLTreeBuilder::commentCreateErrorCheck(Token* t, RefPtr<Node>& result) |
|
769 { |
|
770 result = Comment::create(m_document, t->text.get()); |
|
771 return false; |
|
772 } |
|
773 |
|
774 bool LegacyHTMLTreeBuilder::headCreateErrorCheck(Token*, RefPtr<Node>& result) |
|
775 { |
|
776 if (!m_head || m_current->localName() == htmlTag) { |
|
777 m_head = HTMLHeadElement::create(m_document); |
|
778 result = m_head; |
|
779 } else |
|
780 reportError(MisplacedHeadError); |
|
781 return false; |
|
782 } |
|
783 |
|
784 bool LegacyHTMLTreeBuilder::bodyCreateErrorCheck(Token*, RefPtr<Node>&) |
|
785 { |
|
786 // body no longer allowed if we have a frameset |
|
787 if (m_haveFrameSet) |
|
788 return false; |
|
789 |
|
790 // Ensure that head exists (unless parsing a fragment). |
|
791 // But not for older versions of Mail, where the implicit <head> isn't expected - <rdar://problem/6863795> |
|
792 if (!m_isParsingFragment && shouldCreateImplicitHead(m_document)) |
|
793 createHead(); |
|
794 |
|
795 popBlock(headTag); |
|
796 startBody(); |
|
797 return true; |
|
798 } |
|
799 |
|
800 bool LegacyHTMLTreeBuilder::framesetCreateErrorCheck(Token*, RefPtr<Node>&) |
|
801 { |
|
802 popBlock(headTag); |
|
803 if (m_inBody && !m_haveFrameSet && !m_haveContent) { |
|
804 popBlock(bodyTag); |
|
805 // ### actually for IE document.body returns the now hidden "body" element |
|
806 // we can't implement that behaviour now because it could cause too many |
|
807 // regressions and the headaches are not worth the work as long as there is |
|
808 // no site actually relying on that detail (Dirk) |
|
809 if (m_document->body() && !m_isParsingFragment) |
|
810 m_document->body()->setAttribute(styleAttr, "display:none"); |
|
811 m_inBody = false; |
|
812 } |
|
813 if ((m_haveContent || m_haveFrameSet) && m_current->localName() == htmlTag) |
|
814 return false; |
|
815 m_haveFrameSet = true; |
|
816 startBody(); |
|
817 return true; |
|
818 } |
|
819 |
|
820 bool LegacyHTMLTreeBuilder::formCreateErrorCheck(Token* t, RefPtr<Node>& result) |
|
821 { |
|
822 // Only create a new form if we're not already inside one. |
|
823 // This is consistent with other browsers' behavior. |
|
824 if (!m_currentFormElement) { |
|
825 m_currentFormElement = HTMLFormElement::create(m_document); |
|
826 result = m_currentFormElement; |
|
827 pCloserCreateErrorCheck(t, result); |
|
828 } |
|
829 return false; |
|
830 } |
|
831 |
|
832 bool LegacyHTMLTreeBuilder::isindexCreateErrorCheck(Token* t, RefPtr<Node>& result) |
|
833 { |
|
834 RefPtr<Node> n = handleIsindex(t); |
|
835 if (!m_inBody) |
|
836 m_isindexElement = n.release(); |
|
837 else { |
|
838 t->selfClosingTag = true; |
|
839 result = n.release(); |
|
840 } |
|
841 return false; |
|
842 } |
|
843 |
|
844 bool LegacyHTMLTreeBuilder::selectCreateErrorCheck(Token*, RefPtr<Node>&) |
|
845 { |
|
846 return true; |
|
847 } |
|
848 |
|
849 bool LegacyHTMLTreeBuilder::ddCreateErrorCheck(Token* t, RefPtr<Node>& result) |
|
850 { |
|
851 pCloserCreateErrorCheck(t, result); |
|
852 popBlock(dtTag); |
|
853 popBlock(ddTag); |
|
854 return true; |
|
855 } |
|
856 |
|
857 bool LegacyHTMLTreeBuilder::dtCreateErrorCheck(Token* t, RefPtr<Node>& result) |
|
858 { |
|
859 pCloserCreateErrorCheck(t, result); |
|
860 popBlock(ddTag); |
|
861 popBlock(dtTag); |
|
862 return true; |
|
863 } |
|
864 |
|
865 bool LegacyHTMLTreeBuilder::rpCreateErrorCheck(Token*, RefPtr<Node>&) |
|
866 { |
|
867 popBlock(rpTag); |
|
868 popBlock(rtTag); |
|
869 return true; |
|
870 } |
|
871 |
|
872 bool LegacyHTMLTreeBuilder::rtCreateErrorCheck(Token*, RefPtr<Node>&) |
|
873 { |
|
874 popBlock(rpTag); |
|
875 popBlock(rtTag); |
|
876 return true; |
|
877 } |
|
878 |
|
879 bool LegacyHTMLTreeBuilder::nestedCreateErrorCheck(Token* t, RefPtr<Node>&) |
|
880 { |
|
881 popBlock(t->tagName); |
|
882 return true; |
|
883 } |
|
884 |
|
885 bool LegacyHTMLTreeBuilder::nestedPCloserCreateErrorCheck(Token* t, RefPtr<Node>& result) |
|
886 { |
|
887 pCloserCreateErrorCheck(t, result); |
|
888 popBlock(t->tagName); |
|
889 return true; |
|
890 } |
|
891 |
|
892 bool LegacyHTMLTreeBuilder::nestedStyleCreateErrorCheck(Token* t, RefPtr<Node>&) |
|
893 { |
|
894 return allowNestedRedundantTag(t->tagName); |
|
895 } |
|
896 |
|
897 bool LegacyHTMLTreeBuilder::colCreateErrorCheck(Token*, RefPtr<Node>&) |
|
898 { |
|
899 if (!m_current->hasTagName(tableTag)) |
|
900 return true; |
|
901 RefPtr<Element> implicitColgroup = HTMLElementFactory::createHTMLElement(colgroupTag, m_document, 0, true); |
|
902 insertNode(implicitColgroup.get()); |
|
903 return true; |
|
904 } |
|
905 |
|
906 bool LegacyHTMLTreeBuilder::tableCellCreateErrorCheck(Token*, RefPtr<Node>&) |
|
907 { |
|
908 popBlock(tdTag); |
|
909 popBlock(thTag); |
|
910 return true; |
|
911 } |
|
912 |
|
913 bool LegacyHTMLTreeBuilder::tableSectionCreateErrorCheck(Token*, RefPtr<Node>&) |
|
914 { |
|
915 popBlock(theadTag); |
|
916 popBlock(tbodyTag); |
|
917 popBlock(tfootTag); |
|
918 return true; |
|
919 } |
|
920 |
|
921 bool LegacyHTMLTreeBuilder::noembedCreateErrorCheck(Token*, RefPtr<Node>&) |
|
922 { |
|
923 setSkipMode(noembedTag); |
|
924 return true; |
|
925 } |
|
926 |
|
927 bool LegacyHTMLTreeBuilder::noframesCreateErrorCheck(Token*, RefPtr<Node>&) |
|
928 { |
|
929 setSkipMode(noframesTag); |
|
930 return true; |
|
931 } |
|
932 |
|
933 bool LegacyHTMLTreeBuilder::noscriptCreateErrorCheck(Token*, RefPtr<Node>&) |
|
934 { |
|
935 if (!m_isParsingFragment) { |
|
936 Frame* frame = m_document->frame(); |
|
937 if (frame && frame->script()->canExecuteScripts(NotAboutToExecuteScript)) |
|
938 setSkipMode(noscriptTag); |
|
939 } |
|
940 return true; |
|
941 } |
|
942 |
|
943 bool LegacyHTMLTreeBuilder::pCloserCreateErrorCheck(Token*, RefPtr<Node>&) |
|
944 { |
|
945 if (hasPElementInScope()) |
|
946 popBlock(pTag); |
|
947 return true; |
|
948 } |
|
949 |
|
950 bool LegacyHTMLTreeBuilder::pCloserStrictCreateErrorCheck(Token*, RefPtr<Node>&) |
|
951 { |
|
952 if (m_document->inCompatMode()) |
|
953 return true; |
|
954 if (hasPElementInScope()) |
|
955 popBlock(pTag); |
|
956 return true; |
|
957 } |
|
958 |
|
959 bool LegacyHTMLTreeBuilder::mapCreateErrorCheck(Token*, RefPtr<Node>& result) |
|
960 { |
|
961 m_currentMapElement = HTMLMapElement::create(m_document); |
|
962 result = m_currentMapElement; |
|
963 return false; |
|
964 } |
|
965 |
|
966 static void mapTagToFunc(FunctionMap& map, const QualifiedName& tag, CreateErrorCheckFunc func) |
|
967 { |
|
968 map.set(tag.localName().impl(), func); |
|
969 } |
|
970 |
|
971 template< size_t ArraySize > |
|
972 static void mapTagsToFunc(FunctionMap& map, QualifiedName (&names)[ArraySize], CreateErrorCheckFunc func) |
|
973 { |
|
974 for (size_t x = 0; x < ArraySize; x++) { |
|
975 const QualifiedName& name = names[x]; |
|
976 mapTagToFunc(map, name, func); |
|
977 } |
|
978 } |
|
979 |
|
980 PassRefPtr<Node> LegacyHTMLTreeBuilder::getNode(Token* t) |
|
981 { |
|
982 // Init our error handling table. |
|
983 DEFINE_STATIC_LOCAL(FunctionMap, gFunctionMap, ()); |
|
984 if (gFunctionMap.isEmpty()) { |
|
985 QualifiedName nestedCreateErrorTags[] = { aTag, buttonTag, nobrTag, trTag }; |
|
986 mapTagsToFunc(gFunctionMap, nestedCreateErrorTags, &LegacyHTMLTreeBuilder::nestedCreateErrorCheck); |
|
987 |
|
988 QualifiedName nestedStyleCreateErrorTags[] = { bTag, bigTag, iTag, markTag, sTag, smallTag, strikeTag, ttTag, uTag }; |
|
989 mapTagsToFunc(gFunctionMap, nestedStyleCreateErrorTags, &LegacyHTMLTreeBuilder::nestedStyleCreateErrorCheck); |
|
990 |
|
991 QualifiedName pCloserCreateErrorTags[] = { addressTag, articleTag, |
|
992 asideTag, blockquoteTag, centerTag, dirTag, divTag, dlTag, |
|
993 fieldsetTag, footerTag, h1Tag, h2Tag, h3Tag, h4Tag, h5Tag, h6Tag, |
|
994 headerTag, hgroupTag, hrTag, listingTag, menuTag, navTag, olTag, |
|
995 pTag, plaintextTag, preTag, sectionTag, ulTag }; |
|
996 mapTagsToFunc(gFunctionMap, pCloserCreateErrorTags, &LegacyHTMLTreeBuilder::pCloserCreateErrorCheck); |
|
997 |
|
998 mapTagToFunc(gFunctionMap, bodyTag, &LegacyHTMLTreeBuilder::bodyCreateErrorCheck); |
|
999 mapTagToFunc(gFunctionMap, colTag, &LegacyHTMLTreeBuilder::colCreateErrorCheck); |
|
1000 mapTagToFunc(gFunctionMap, ddTag, &LegacyHTMLTreeBuilder::ddCreateErrorCheck); |
|
1001 mapTagToFunc(gFunctionMap, dtTag, &LegacyHTMLTreeBuilder::dtCreateErrorCheck); |
|
1002 mapTagToFunc(gFunctionMap, formTag, &LegacyHTMLTreeBuilder::formCreateErrorCheck); |
|
1003 mapTagToFunc(gFunctionMap, framesetTag, &LegacyHTMLTreeBuilder::framesetCreateErrorCheck); |
|
1004 mapTagToFunc(gFunctionMap, headTag, &LegacyHTMLTreeBuilder::headCreateErrorCheck); |
|
1005 mapTagToFunc(gFunctionMap, isindexTag, &LegacyHTMLTreeBuilder::isindexCreateErrorCheck); |
|
1006 mapTagToFunc(gFunctionMap, mapTag, &LegacyHTMLTreeBuilder::mapCreateErrorCheck); |
|
1007 mapTagToFunc(gFunctionMap, liTag, &LegacyHTMLTreeBuilder::nestedPCloserCreateErrorCheck); |
|
1008 mapTagToFunc(gFunctionMap, noembedTag, &LegacyHTMLTreeBuilder::noembedCreateErrorCheck); |
|
1009 mapTagToFunc(gFunctionMap, noframesTag, &LegacyHTMLTreeBuilder::noframesCreateErrorCheck); |
|
1010 mapTagToFunc(gFunctionMap, noscriptTag, &LegacyHTMLTreeBuilder::noscriptCreateErrorCheck); |
|
1011 mapTagToFunc(gFunctionMap, tableTag, &LegacyHTMLTreeBuilder::pCloserStrictCreateErrorCheck); |
|
1012 mapTagToFunc(gFunctionMap, rpTag, &LegacyHTMLTreeBuilder::rpCreateErrorCheck); |
|
1013 mapTagToFunc(gFunctionMap, rtTag, &LegacyHTMLTreeBuilder::rtCreateErrorCheck); |
|
1014 mapTagToFunc(gFunctionMap, selectTag, &LegacyHTMLTreeBuilder::selectCreateErrorCheck); |
|
1015 mapTagToFunc(gFunctionMap, tdTag, &LegacyHTMLTreeBuilder::tableCellCreateErrorCheck); |
|
1016 mapTagToFunc(gFunctionMap, thTag, &LegacyHTMLTreeBuilder::tableCellCreateErrorCheck); |
|
1017 mapTagToFunc(gFunctionMap, tbodyTag, &LegacyHTMLTreeBuilder::tableSectionCreateErrorCheck); |
|
1018 mapTagToFunc(gFunctionMap, tfootTag, &LegacyHTMLTreeBuilder::tableSectionCreateErrorCheck); |
|
1019 mapTagToFunc(gFunctionMap, theadTag, &LegacyHTMLTreeBuilder::tableSectionCreateErrorCheck); |
|
1020 |
|
1021 gFunctionMap.set(commentAtom.impl(), &LegacyHTMLTreeBuilder::commentCreateErrorCheck); |
|
1022 gFunctionMap.set(textAtom.impl(), &LegacyHTMLTreeBuilder::textCreateErrorCheck); |
|
1023 } |
|
1024 |
|
1025 bool proceed = true; |
|
1026 RefPtr<Node> result; |
|
1027 if (CreateErrorCheckFunc errorCheckFunc = gFunctionMap.get(t->tagName.impl())) |
|
1028 proceed = (this->*errorCheckFunc)(t, result); |
|
1029 if (proceed) |
|
1030 result = HTMLElementFactory::createHTMLElement(QualifiedName(nullAtom, t->tagName, xhtmlNamespaceURI), m_document, m_currentFormElement.get()); |
|
1031 return result.release(); |
|
1032 } |
|
1033 |
|
1034 bool LegacyHTMLTreeBuilder::allowNestedRedundantTag(const AtomicString& tagName) |
|
1035 { |
|
1036 // www.liceo.edu.mx is an example of a site that achieves a level of nesting of |
|
1037 // about 1500 tags, all from a bunch of <b>s. We will only allow at most 20 |
|
1038 // nested tags of the same type before just ignoring them all together. |
|
1039 unsigned i = 0; |
|
1040 for (HTMLStackElem* curr = m_blockStack; |
|
1041 i < cMaxRedundantTagDepth && curr && curr->tagName == tagName; |
|
1042 curr = curr->next, i++) { } |
|
1043 return i != cMaxRedundantTagDepth; |
|
1044 } |
|
1045 |
|
1046 void LegacyHTMLTreeBuilder::processCloseTag(Token* t) |
|
1047 { |
|
1048 // Support for really broken html. |
|
1049 // we never close the body tag, since some stupid web pages close it before the actual end of the doc. |
|
1050 // let's rely on the end() call to close things. |
|
1051 if (t->tagName == htmlTag || t->tagName == bodyTag || t->tagName == commentAtom) |
|
1052 return; |
|
1053 |
|
1054 bool checkForCloseTagErrors = true; |
|
1055 if (t->tagName == formTag && m_currentFormElement) { |
|
1056 m_currentFormElement = 0; |
|
1057 checkForCloseTagErrors = false; |
|
1058 } else if (t->tagName == mapTag) |
|
1059 m_currentMapElement = 0; |
|
1060 else if (t->tagName == pTag) |
|
1061 checkForCloseTagErrors = false; |
|
1062 |
|
1063 HTMLStackElem* oldElem = m_blockStack; |
|
1064 popBlock(t->tagName, checkForCloseTagErrors); |
|
1065 if (oldElem == m_blockStack && t->tagName == pTag) { |
|
1066 // We encountered a stray </p>. Amazingly Gecko, WinIE, and MacIE all treat |
|
1067 // this as a valid break, i.e., <p></p>. So go ahead and make the empty |
|
1068 // paragraph. |
|
1069 t->beginTag = true; |
|
1070 parseToken(t); |
|
1071 popBlock(t->tagName); |
|
1072 reportError(StrayParagraphCloseError); |
|
1073 } |
|
1074 } |
|
1075 |
|
1076 bool LegacyHTMLTreeBuilder::isHeadingTag(const AtomicString& tagName) |
|
1077 { |
|
1078 DEFINE_STATIC_LOCAL(TagNameSet, headingTags, ()); |
|
1079 if (headingTags.isEmpty()) { |
|
1080 QualifiedName tagNames[] = { h1Tag, h2Tag, h3Tag, h4Tag, h5Tag, h6Tag }; |
|
1081 addTags(headingTags, tagNames); |
|
1082 } |
|
1083 return headingTags.contains(tagName.impl()); |
|
1084 } |
|
1085 |
|
1086 bool LegacyHTMLTreeBuilder::isInline(Node* node) const |
|
1087 { |
|
1088 if (node->isTextNode()) |
|
1089 return true; |
|
1090 |
|
1091 if (node->isHTMLElement()) { |
|
1092 HTMLElement* e = static_cast<HTMLElement*>(node); |
|
1093 if (e->hasLocalName(aTag) || e->hasLocalName(fontTag) || e->hasLocalName(ttTag) || |
|
1094 e->hasLocalName(uTag) || e->hasLocalName(bTag) || e->hasLocalName(iTag) || |
|
1095 e->hasLocalName(sTag) || e->hasLocalName(strikeTag) || e->hasLocalName(bigTag) || |
|
1096 e->hasLocalName(smallTag) || e->hasLocalName(emTag) || e->hasLocalName(strongTag) || |
|
1097 e->hasLocalName(dfnTag) || e->hasLocalName(codeTag) || e->hasLocalName(sampTag) || |
|
1098 e->hasLocalName(kbdTag) || e->hasLocalName(varTag) || e->hasLocalName(citeTag) || |
|
1099 e->hasLocalName(abbrTag) || e->hasLocalName(acronymTag) || e->hasLocalName(subTag) || |
|
1100 e->hasLocalName(supTag) || e->hasLocalName(spanTag) || e->hasLocalName(nobrTag) || |
|
1101 e->hasLocalName(noframesTag) || e->hasLocalName(nolayerTag) || |
|
1102 e->hasLocalName(noembedTag) || e->hasLocalName(markTag)) |
|
1103 return true; |
|
1104 #if !ENABLE(XHTMLMP) |
|
1105 if (e->hasLocalName(noscriptTag) && !m_isParsingFragment) { |
|
1106 Frame* frame = m_document->frame(); |
|
1107 if (frame && frame->script()->canExecuteScripts(NotAboutToExecuteScript)) |
|
1108 return true; |
|
1109 } |
|
1110 #endif |
|
1111 } |
|
1112 |
|
1113 return false; |
|
1114 } |
|
1115 |
|
1116 bool LegacyHTMLTreeBuilder::isResidualStyleTag(const AtomicString& tagName) |
|
1117 { |
|
1118 DEFINE_STATIC_LOCAL(HashSet<AtomicStringImpl*>, residualStyleTags, ()); |
|
1119 if (residualStyleTags.isEmpty()) { |
|
1120 QualifiedName tagNames[] = { aTag, fontTag, ttTag, uTag, bTag, iTag, |
|
1121 sTag, strikeTag, bigTag, smallTag, emTag, strongTag, dfnTag, |
|
1122 codeTag, sampTag, kbdTag, varTag, nobrTag, markTag }; |
|
1123 addTags(residualStyleTags, tagNames); |
|
1124 } |
|
1125 return residualStyleTags.contains(tagName.impl()); |
|
1126 } |
|
1127 |
|
1128 bool LegacyHTMLTreeBuilder::isAffectedByResidualStyle(const AtomicString& tagName) |
|
1129 { |
|
1130 DEFINE_STATIC_LOCAL(HashSet<AtomicStringImpl*>, unaffectedTags, ()); |
|
1131 if (unaffectedTags.isEmpty()) { |
|
1132 QualifiedName tagNames[] = { bodyTag, tableTag, theadTag, tbodyTag, |
|
1133 tfootTag, trTag, thTag, tdTag, captionTag, colgroupTag, colTag, |
|
1134 optionTag, optgroupTag, selectTag, objectTag, datagridTag, datalistTag }; |
|
1135 addTags(unaffectedTags, tagNames); |
|
1136 } |
|
1137 return !unaffectedTags.contains(tagName.impl()); |
|
1138 } |
|
1139 |
|
1140 void LegacyHTMLTreeBuilder::handleResidualStyleCloseTagAcrossBlocks(HTMLStackElem* elem) |
|
1141 { |
|
1142 HTMLStackElem* maxElem = 0; |
|
1143 bool finished = false; |
|
1144 bool strayTableContent = elem->strayTableContent; |
|
1145 |
|
1146 unsigned iterationCount = 0; |
|
1147 |
|
1148 m_handlingResidualStyleAcrossBlocks = true; |
|
1149 while (!finished && (iterationCount++ < cResidualStyleIterationLimit)) { |
|
1150 // Find the outermost element that crosses over to a higher level. If there exists another higher-level |
|
1151 // element, we will do another pass, until we have corrected the innermost one. |
|
1152 ExceptionCode ec = 0; |
|
1153 HTMLStackElem* curr = m_blockStack; |
|
1154 HTMLStackElem* prev = 0; |
|
1155 HTMLStackElem* prevMaxElem = 0; |
|
1156 maxElem = 0; |
|
1157 finished = true; |
|
1158 while (curr && curr != elem) { |
|
1159 if (curr->level > elem->level) { |
|
1160 if (!isAffectedByResidualStyle(curr->tagName)) |
|
1161 return; |
|
1162 if (maxElem) |
|
1163 // We will need another pass. |
|
1164 finished = false; |
|
1165 maxElem = curr; |
|
1166 prevMaxElem = prev; |
|
1167 } |
|
1168 |
|
1169 prev = curr; |
|
1170 curr = curr->next; |
|
1171 } |
|
1172 |
|
1173 if (!curr || !maxElem) |
|
1174 return; |
|
1175 |
|
1176 Node* residualElem = prev->node; |
|
1177 Node* blockElem = prevMaxElem ? prevMaxElem->node : m_current; |
|
1178 Node* parentElem = elem->node; |
|
1179 |
|
1180 // Check to see if the reparenting that is going to occur is allowed according to the DOM. |
|
1181 // FIXME: We should either always allow it or perform an additional fixup instead of |
|
1182 // just bailing here. |
|
1183 // Example: <p><font><center>blah</font></center></p> isn't doing a fixup right now. |
|
1184 if (!parentElem->childAllowed(blockElem)) |
|
1185 return; |
|
1186 |
|
1187 m_hasPElementInScope = Unknown; |
|
1188 |
|
1189 if (maxElem->node->parentNode() != elem->node) { |
|
1190 // Walk the stack and remove any elements that aren't residual style tags. These |
|
1191 // are basically just being closed up. Example: |
|
1192 // <font><span>Moo<p>Goo</font></p>. |
|
1193 // In the above example, the <span> doesn't need to be reopened. It can just close. |
|
1194 HTMLStackElem* currElem = maxElem->next; |
|
1195 HTMLStackElem* prevElem = maxElem; |
|
1196 while (currElem != elem) { |
|
1197 HTMLStackElem* nextElem = currElem->next; |
|
1198 if (!isResidualStyleTag(currElem->tagName)) { |
|
1199 prevElem->next = nextElem; |
|
1200 prevElem->derefNode(); |
|
1201 prevElem->node = currElem->node; |
|
1202 prevElem->didRefNode = currElem->didRefNode; |
|
1203 delete currElem; |
|
1204 m_treeDepth--; |
|
1205 } else |
|
1206 prevElem = currElem; |
|
1207 currElem = nextElem; |
|
1208 } |
|
1209 |
|
1210 // We have to reopen residual tags in between maxElem and elem. An example of this case is: |
|
1211 // <font><i>Moo<p>Foo</font>. |
|
1212 // In this case, we need to transform the part before the <p> into: |
|
1213 // <font><i>Moo</i></font><i> |
|
1214 // so that the <i> will remain open. This involves the modification of elements |
|
1215 // in the block stack. |
|
1216 // This will also affect how we ultimately reparent the block, since we want it to end up |
|
1217 // under the reopened residual tags (e.g., the <i> in the above example.) |
|
1218 RefPtr<Node> prevNode = 0; |
|
1219 currElem = maxElem; |
|
1220 while (currElem->node != residualElem) { |
|
1221 if (isResidualStyleTag(currElem->node->localName())) { |
|
1222 // Create a clone of this element. |
|
1223 // We call releaseRef to get a raw pointer since we plan to hand over ownership to currElem. |
|
1224 Node* currNode = currElem->node->cloneNode(false).releaseRef(); |
|
1225 reportError(ResidualStyleError, &currNode->localName()); |
|
1226 |
|
1227 // Change the stack element's node to point to the clone. |
|
1228 // The stack element adopts the reference we obtained above by calling release(). |
|
1229 currElem->derefNode(); |
|
1230 currElem->node = currNode; |
|
1231 currElem->didRefNode = true; |
|
1232 |
|
1233 // Attach the previous node as a child of this new node. |
|
1234 if (prevNode) |
|
1235 currNode->appendChild(prevNode, ec); |
|
1236 else // The new parent for the block element is going to be the innermost clone. |
|
1237 parentElem = currNode; // FIXME: We shifted parentElem to be a residual inline. We never checked to see if blockElem could be legally placed inside the inline though. |
|
1238 |
|
1239 prevNode = currNode; |
|
1240 } |
|
1241 |
|
1242 currElem = currElem->next; |
|
1243 } |
|
1244 |
|
1245 // Now append the chain of new residual style elements if one exists. |
|
1246 if (prevNode) |
|
1247 elem->node->appendChild(prevNode, ec); // FIXME: This append can result in weird stuff happening, like an inline chain being put into a table section. |
|
1248 } |
|
1249 |
|
1250 // Check if the block is still in the tree. If it isn't, then we don't |
|
1251 // want to remove it from its parent (that would crash) or insert it into |
|
1252 // a new parent later. See http://bugs.webkit.org/show_bug.cgi?id=6778 |
|
1253 bool isBlockStillInTree = blockElem->parentNode(); |
|
1254 |
|
1255 // We need to make a clone of |residualElem| and place it just inside |blockElem|. |
|
1256 // All content of |blockElem| is reparented to be under this clone. We then |
|
1257 // reparent |blockElem| using real DOM calls so that attachment/detachment will |
|
1258 // be performed to fix up the rendering tree. |
|
1259 // So for this example: <b>...<p>Foo</b>Goo</p> |
|
1260 // The end result will be: <b>...</b><p><b>Foo</b>Goo</p> |
|
1261 // |
|
1262 // Step 1: Remove |blockElem| from its parent, doing a batch detach of all the kids. |
|
1263 if (isBlockStillInTree) |
|
1264 blockElem->parentNode()->removeChild(blockElem, ec); |
|
1265 |
|
1266 Node* newNodePtr = 0; |
|
1267 if (blockElem->firstChild()) { |
|
1268 // Step 2: Clone |residualElem|. |
|
1269 RefPtr<Node> newNode = residualElem->cloneNode(false); // Shallow clone. We don't pick up the same kids. |
|
1270 newNodePtr = newNode.get(); |
|
1271 reportError(ResidualStyleError, &newNode->localName()); |
|
1272 |
|
1273 // Step 3: Place |blockElem|'s children under |newNode|. Remove all of the children of |blockElem| |
|
1274 // before we've put |newElem| into the document. That way we'll only do one attachment of all |
|
1275 // the new content (instead of a bunch of individual attachments). |
|
1276 Node* currNode = blockElem->firstChild(); |
|
1277 while (currNode) { |
|
1278 Node* nextNode = currNode->nextSibling(); |
|
1279 newNode->appendChild(currNode, ec); |
|
1280 currNode = nextNode; |
|
1281 } |
|
1282 |
|
1283 // Step 4: Place |newNode| under |blockElem|. |blockElem| is still out of the document, so no |
|
1284 // attachment can occur yet. |
|
1285 blockElem->appendChild(newNode.release(), ec); |
|
1286 } else |
|
1287 finished = true; |
|
1288 |
|
1289 // Step 5: Reparent |blockElem|. Now the full attachment of the fixed up tree takes place. |
|
1290 if (isBlockStillInTree) |
|
1291 parentElem->appendChild(blockElem, ec); |
|
1292 |
|
1293 // Step 6: Pull |elem| out of the stack, since it is no longer enclosing us. Also update |
|
1294 // the node associated with the previous stack element so that when it gets popped, |
|
1295 // it doesn't make the residual element the next current node. |
|
1296 HTMLStackElem* currElem = maxElem; |
|
1297 HTMLStackElem* prevElem = 0; |
|
1298 while (currElem != elem) { |
|
1299 prevElem = currElem; |
|
1300 currElem = currElem->next; |
|
1301 } |
|
1302 prevElem->next = elem->next; |
|
1303 prevElem->derefNode(); |
|
1304 prevElem->node = elem->node; |
|
1305 prevElem->didRefNode = elem->didRefNode; |
|
1306 m_treeDepth--; |
|
1307 if (!finished) { |
|
1308 // Repurpose |elem| to represent |newNode| and insert it at the appropriate position |
|
1309 // in the stack. We do not do this for the innermost block, because in that case the new |
|
1310 // node is effectively no longer open. |
|
1311 elem->next = maxElem; |
|
1312 elem->node = prevMaxElem->node; |
|
1313 elem->didRefNode = prevMaxElem->didRefNode; |
|
1314 elem->strayTableContent = false; |
|
1315 prevMaxElem->next = elem; |
|
1316 ASSERT(newNodePtr); |
|
1317 prevMaxElem->node = newNodePtr; |
|
1318 newNodePtr->ref(); |
|
1319 prevMaxElem->didRefNode = true; |
|
1320 m_treeDepth++; |
|
1321 } else |
|
1322 delete elem; |
|
1323 } |
|
1324 |
|
1325 // FIXME: If we ever make a case like this work: |
|
1326 // <table><b><i><form></b></form></i></table> |
|
1327 // Then this check will be too simplistic. Right now the <i><form> chain will end up inside the <tbody>, which is pretty crazy. |
|
1328 if (strayTableContent) |
|
1329 m_inStrayTableContent--; |
|
1330 |
|
1331 // Step 7: Reopen intermediate inlines, e.g., <b><p><i>Foo</b>Goo</p>. |
|
1332 // In the above example, Goo should stay italic. |
|
1333 // We cap the number of tags we're willing to reopen based off cResidualStyleMaxDepth. |
|
1334 |
|
1335 HTMLStackElem* curr = m_blockStack; |
|
1336 HTMLStackElem* residualStyleStack = 0; |
|
1337 unsigned stackDepth = 1; |
|
1338 unsigned redundantStyleCount = 0; |
|
1339 while (curr && curr != maxElem) { |
|
1340 // We will actually schedule this tag for reopening |
|
1341 // after we complete the close of this entire block. |
|
1342 if (isResidualStyleTag(curr->tagName) && stackDepth++ < cResidualStyleMaxDepth) { |
|
1343 // We've overloaded the use of stack elements and are just reusing the |
|
1344 // struct with a slightly different meaning to the variables. Instead of chaining |
|
1345 // from innermost to outermost, we build up a list of all the tags we need to reopen |
|
1346 // from the outermost to the innermost, i.e., residualStyleStack will end up pointing |
|
1347 // to the outermost tag we need to reopen. |
|
1348 // We also set curr->node to be the actual element that corresponds to the ID stored in |
|
1349 // curr->id rather than the node that you should pop to when the element gets pulled off |
|
1350 // the stack. |
|
1351 if (residualStyleStack && curr->tagName == residualStyleStack->tagName && curr->node->attributes()->mapsEquivalent(residualStyleStack->node->attributes())) |
|
1352 redundantStyleCount++; |
|
1353 else |
|
1354 redundantStyleCount = 0; |
|
1355 |
|
1356 if (redundantStyleCount < cMaxRedundantTagDepth) |
|
1357 moveOneBlockToStack(residualStyleStack); |
|
1358 else |
|
1359 popOneBlock(); |
|
1360 } else |
|
1361 popOneBlock(); |
|
1362 |
|
1363 curr = m_blockStack; |
|
1364 } |
|
1365 |
|
1366 reopenResidualStyleTags(residualStyleStack, 0); // Stray table content can't be an issue here, since some element above will always become the root of new stray table content. |
|
1367 |
|
1368 m_handlingResidualStyleAcrossBlocks = false; |
|
1369 } |
|
1370 |
|
1371 void LegacyHTMLTreeBuilder::reopenResidualStyleTags(HTMLStackElem* elem, Node* malformedTableParent) |
|
1372 { |
|
1373 // Loop for each tag that needs to be reopened. |
|
1374 while (elem) { |
|
1375 // Create a shallow clone of the DOM node for this element. |
|
1376 RefPtr<Node> newNode = elem->node->cloneNode(false); |
|
1377 reportError(ResidualStyleError, &newNode->localName()); |
|
1378 |
|
1379 // Append the new node. In the malformed table case, we need to insert before the table, |
|
1380 // which will be the last child. |
|
1381 ExceptionCode ec = 0; |
|
1382 if (malformedTableParent) |
|
1383 malformedTableParent->insertBefore(newNode, malformedTableParent->lastChild(), ec); |
|
1384 else |
|
1385 m_current->appendChild(newNode, ec); |
|
1386 // FIXME: Is it really OK to ignore the exceptions here? |
|
1387 |
|
1388 // Now push a new stack element for this node we just created. |
|
1389 pushBlock(elem->tagName, elem->level); |
|
1390 newNode->beginParsingChildren(); |
|
1391 |
|
1392 // Set our strayTableContent boolean if needed, so that the reopened tag also knows |
|
1393 // that it is inside a malformed table. |
|
1394 m_blockStack->strayTableContent = malformedTableParent != 0; |
|
1395 if (m_blockStack->strayTableContent) |
|
1396 m_inStrayTableContent++; |
|
1397 |
|
1398 // Clear our malformed table parent variable. |
|
1399 malformedTableParent = 0; |
|
1400 |
|
1401 // Update |current| manually to point to the new node. |
|
1402 setCurrent(newNode.get()); |
|
1403 |
|
1404 // Advance to the next tag that needs to be reopened. |
|
1405 HTMLStackElem* next = elem->next; |
|
1406 elem->derefNode(); |
|
1407 delete elem; |
|
1408 elem = next; |
|
1409 } |
|
1410 } |
|
1411 |
|
1412 void LegacyHTMLTreeBuilder::pushBlock(const AtomicString& tagName, int level) |
|
1413 { |
|
1414 m_blockStack = new HTMLStackElem(tagName, level, m_current, m_didRefCurrent, m_blockStack); |
|
1415 if (level >= minBlockLevelTagPriority) |
|
1416 m_blocksInStack++; |
|
1417 m_treeDepth++; |
|
1418 m_didRefCurrent = false; |
|
1419 if (tagName == pTag) |
|
1420 m_hasPElementInScope = InScope; |
|
1421 else if (isScopingTag(tagName)) |
|
1422 m_hasPElementInScope = NotInScope; |
|
1423 } |
|
1424 |
|
1425 void LegacyHTMLTreeBuilder::popBlock(const AtomicString& tagName, bool reportErrors) |
|
1426 { |
|
1427 HTMLStackElem* elem = m_blockStack; |
|
1428 |
|
1429 if (m_parserQuirks && elem && !m_parserQuirks->shouldPopBlock(elem->tagName, tagName)) |
|
1430 return; |
|
1431 |
|
1432 int maxLevel = 0; |
|
1433 |
|
1434 while (elem && (elem->tagName != tagName)) { |
|
1435 if (maxLevel < elem->level) |
|
1436 maxLevel = elem->level; |
|
1437 elem = elem->next; |
|
1438 } |
|
1439 |
|
1440 if (!elem) { |
|
1441 if (reportErrors) |
|
1442 reportError(StrayCloseTagError, &tagName, 0, true); |
|
1443 return; |
|
1444 } |
|
1445 |
|
1446 if (maxLevel > elem->level) { |
|
1447 // We didn't match because the tag is in a different scope, e.g., |
|
1448 // <b><p>Foo</b>. Try to correct the problem. |
|
1449 if (!isResidualStyleTag(tagName)) |
|
1450 return; |
|
1451 return handleResidualStyleCloseTagAcrossBlocks(elem); |
|
1452 } |
|
1453 |
|
1454 bool isAffectedByStyle = isAffectedByResidualStyle(elem->tagName); |
|
1455 HTMLStackElem* residualStyleStack = 0; |
|
1456 Node* malformedTableParent = 0; |
|
1457 |
|
1458 elem = m_blockStack; |
|
1459 unsigned stackDepth = 1; |
|
1460 unsigned redundantStyleCount = 0; |
|
1461 while (elem) { |
|
1462 if (elem->tagName == tagName) { |
|
1463 int strayTable = m_inStrayTableContent; |
|
1464 popOneBlock(); |
|
1465 elem = 0; |
|
1466 |
|
1467 // This element was the root of some malformed content just inside an implicit or |
|
1468 // explicit <tbody> or <tr>. |
|
1469 // If we end up needing to reopen residual style tags, the root of the reopened chain |
|
1470 // must also know that it is the root of malformed content inside a <tbody>/<tr>. |
|
1471 if (strayTable && (m_inStrayTableContent < strayTable) && residualStyleStack) { |
|
1472 Node* curr = m_current; |
|
1473 while (curr && !curr->hasTagName(tableTag)) |
|
1474 curr = curr->parentNode(); |
|
1475 malformedTableParent = curr ? curr->parentNode() : 0; |
|
1476 } |
|
1477 } |
|
1478 else { |
|
1479 if (m_currentFormElement && elem->tagName == formTag) |
|
1480 // A <form> is being closed prematurely (and this is |
|
1481 // malformed HTML). Set an attribute on the form to clear out its |
|
1482 // bottom margin. |
|
1483 m_currentFormElement->setMalformed(true); |
|
1484 |
|
1485 // Schedule this tag for reopening |
|
1486 // after we complete the close of this entire block. |
|
1487 if (isAffectedByStyle && isResidualStyleTag(elem->tagName) && stackDepth++ < cResidualStyleMaxDepth) { |
|
1488 // We've overloaded the use of stack elements and are just reusing the |
|
1489 // struct with a slightly different meaning to the variables. Instead of chaining |
|
1490 // from innermost to outermost, we build up a list of all the tags we need to reopen |
|
1491 // from the outermost to the innermost, i.e., residualStyleStack will end up pointing |
|
1492 // to the outermost tag we need to reopen. |
|
1493 // We also set elem->node to be the actual element that corresponds to the ID stored in |
|
1494 // elem->id rather than the node that you should pop to when the element gets pulled off |
|
1495 // the stack. |
|
1496 if (residualStyleStack && elem->tagName == residualStyleStack->tagName && elem->node->attributes()->mapsEquivalent(residualStyleStack->node->attributes())) |
|
1497 redundantStyleCount++; |
|
1498 else |
|
1499 redundantStyleCount = 0; |
|
1500 |
|
1501 if (redundantStyleCount < cMaxRedundantTagDepth) |
|
1502 moveOneBlockToStack(residualStyleStack); |
|
1503 else |
|
1504 popOneBlock(); |
|
1505 } else |
|
1506 popOneBlock(); |
|
1507 elem = m_blockStack; |
|
1508 } |
|
1509 } |
|
1510 |
|
1511 reopenResidualStyleTags(residualStyleStack, malformedTableParent); |
|
1512 } |
|
1513 |
|
1514 inline HTMLStackElem* LegacyHTMLTreeBuilder::popOneBlockCommon() |
|
1515 { |
|
1516 HTMLStackElem* elem = m_blockStack; |
|
1517 |
|
1518 // Form elements restore their state during the parsing process. |
|
1519 // Also, a few elements (<applet>, <object>) need to know when all child elements (<param>s) are available. |
|
1520 if (m_current && elem->node != m_current) |
|
1521 m_current->finishParsingChildren(); |
|
1522 |
|
1523 if (m_blockStack->level >= minBlockLevelTagPriority) { |
|
1524 ASSERT(m_blocksInStack > 0); |
|
1525 m_blocksInStack--; |
|
1526 } |
|
1527 m_treeDepth--; |
|
1528 m_blockStack = elem->next; |
|
1529 m_current = elem->node; |
|
1530 m_didRefCurrent = elem->didRefNode; |
|
1531 |
|
1532 if (elem->strayTableContent) |
|
1533 m_inStrayTableContent--; |
|
1534 |
|
1535 if (elem->tagName == pTag) |
|
1536 m_hasPElementInScope = NotInScope; |
|
1537 else if (isScopingTag(elem->tagName)) |
|
1538 m_hasPElementInScope = Unknown; |
|
1539 |
|
1540 return elem; |
|
1541 } |
|
1542 |
|
1543 void LegacyHTMLTreeBuilder::popOneBlock() |
|
1544 { |
|
1545 // Store the current node before popOneBlockCommon overwrites it. |
|
1546 Node* lastCurrent = m_current; |
|
1547 bool didRefLastCurrent = m_didRefCurrent; |
|
1548 |
|
1549 delete popOneBlockCommon(); |
|
1550 |
|
1551 if (didRefLastCurrent) |
|
1552 lastCurrent->deref(); |
|
1553 } |
|
1554 |
|
1555 void LegacyHTMLTreeBuilder::moveOneBlockToStack(HTMLStackElem*& head) |
|
1556 { |
|
1557 // We'll be using the stack element we're popping, but for the current node. |
|
1558 // See the two callers for details. |
|
1559 |
|
1560 // Store the current node before popOneBlockCommon overwrites it. |
|
1561 Node* lastCurrent = m_current; |
|
1562 bool didRefLastCurrent = m_didRefCurrent; |
|
1563 |
|
1564 // Pop the block, but don't deref the current node as popOneBlock does because |
|
1565 // we'll be using the pointer in the new stack element. |
|
1566 HTMLStackElem* elem = popOneBlockCommon(); |
|
1567 |
|
1568 // Transfer the current node into the stack element. |
|
1569 // No need to deref the old elem->node because popOneBlockCommon transferred |
|
1570 // it into the m_current/m_didRefCurrent fields. |
|
1571 elem->node = lastCurrent; |
|
1572 elem->didRefNode = didRefLastCurrent; |
|
1573 elem->next = head; |
|
1574 head = elem; |
|
1575 } |
|
1576 |
|
1577 void LegacyHTMLTreeBuilder::checkIfHasPElementInScope() |
|
1578 { |
|
1579 m_hasPElementInScope = NotInScope; |
|
1580 HTMLStackElem* elem = m_blockStack; |
|
1581 while (elem) { |
|
1582 const AtomicString& tagName = elem->tagName; |
|
1583 if (tagName == pTag) { |
|
1584 m_hasPElementInScope = InScope; |
|
1585 return; |
|
1586 } else if (isScopingTag(tagName)) |
|
1587 return; |
|
1588 elem = elem->next; |
|
1589 } |
|
1590 } |
|
1591 |
|
1592 void LegacyHTMLTreeBuilder::popInlineBlocks() |
|
1593 { |
|
1594 while (m_blockStack && isInline(m_current)) |
|
1595 popOneBlock(); |
|
1596 } |
|
1597 |
|
1598 void LegacyHTMLTreeBuilder::freeBlock() |
|
1599 { |
|
1600 while (m_blockStack) |
|
1601 popOneBlock(); |
|
1602 ASSERT(!m_blocksInStack); |
|
1603 ASSERT(!m_treeDepth); |
|
1604 } |
|
1605 |
|
1606 void LegacyHTMLTreeBuilder::createHead() |
|
1607 { |
|
1608 if (m_head) |
|
1609 return; |
|
1610 |
|
1611 if (!m_document->documentElement() && !m_isParsingFragment) { |
|
1612 insertNode(HTMLHtmlElement::create(m_document).get()); |
|
1613 ASSERT(m_document->documentElement() || m_isParsingFragment); |
|
1614 } |
|
1615 |
|
1616 m_head = HTMLHeadElement::create(m_document); |
|
1617 |
|
1618 if (m_isParsingFragment) |
|
1619 return; |
|
1620 |
|
1621 HTMLElement* body = m_document->body(); |
|
1622 ExceptionCode ec = 0; |
|
1623 m_document->documentElement()->insertBefore(m_head.get(), body, ec); |
|
1624 if (ec) |
|
1625 m_head = 0; |
|
1626 |
|
1627 // If the body does not exist yet, then the <head> should be pushed as the current block. |
|
1628 if (m_head && !body) { |
|
1629 pushBlock(m_head->localName(), m_head->tagPriority()); |
|
1630 setCurrent(m_head.get()); |
|
1631 } |
|
1632 } |
|
1633 |
|
1634 PassRefPtr<Node> LegacyHTMLTreeBuilder::handleIsindex(Token* t) |
|
1635 { |
|
1636 RefPtr<Node> n = HTMLDivElement::create(m_document); |
|
1637 |
|
1638 NamedNodeMap* attrs = t->attrs.get(); |
|
1639 |
|
1640 RefPtr<HTMLIsIndexElement> isIndex = HTMLIsIndexElement::create(m_document, m_currentFormElement.get()); |
|
1641 isIndex->setAttributeMap(attrs); |
|
1642 isIndex->setAttribute(typeAttr, "khtml_isindex"); |
|
1643 |
|
1644 String text = searchableIndexIntroduction(); |
|
1645 if (attrs) { |
|
1646 if (Attribute* a = attrs->getAttributeItem(promptAttr)) |
|
1647 text = a->value().string() + " "; |
|
1648 t->attrs = 0; |
|
1649 } |
|
1650 |
|
1651 n->legacyParserAddChild(HTMLHRElement::create(m_document)); |
|
1652 n->legacyParserAddChild(Text::create(m_document, text)); |
|
1653 n->legacyParserAddChild(isIndex.release()); |
|
1654 n->legacyParserAddChild(HTMLHRElement::create(m_document)); |
|
1655 |
|
1656 return n.release(); |
|
1657 } |
|
1658 |
|
1659 void LegacyHTMLTreeBuilder::startBody() |
|
1660 { |
|
1661 if (m_inBody) |
|
1662 return; |
|
1663 |
|
1664 m_inBody = true; |
|
1665 |
|
1666 if (m_isindexElement) { |
|
1667 insertNode(m_isindexElement.get(), true /* don't descend into this node */); |
|
1668 m_isindexElement = 0; |
|
1669 } |
|
1670 } |
|
1671 |
|
1672 void LegacyHTMLTreeBuilder::finished() |
|
1673 { |
|
1674 // In the case of a completely empty document, here's the place to create the HTML element. |
|
1675 if (m_current && m_current->isDocumentNode() && !m_document->documentElement()) |
|
1676 insertNode(HTMLHtmlElement::create(m_document).get()); |
|
1677 |
|
1678 // This ensures that "current" is not left pointing to a node when the document is destroyed. |
|
1679 freeBlock(); |
|
1680 setCurrent(0); |
|
1681 |
|
1682 // Warning, this may delete the parser, so don't try to do anything else after this. |
|
1683 if (!m_isParsingFragment) |
|
1684 m_document->finishedParsing(); |
|
1685 } |
|
1686 |
|
1687 void LegacyHTMLTreeBuilder::reportErrorToConsole(HTMLParserErrorCode errorCode, const AtomicString* tagName1, const AtomicString* tagName2, bool closeTags) |
|
1688 { |
|
1689 Frame* frame = m_document->frame(); |
|
1690 if (!frame) |
|
1691 return; |
|
1692 |
|
1693 ScriptableDocumentParser* parser = m_document->scriptableDocumentParser(); |
|
1694 int lineNumber = parser->lineNumber() + 1; |
|
1695 |
|
1696 AtomicString tag1; |
|
1697 AtomicString tag2; |
|
1698 if (tagName1) { |
|
1699 if (*tagName1 == "#text") |
|
1700 tag1 = "Text"; |
|
1701 else if (*tagName1 == "#comment") |
|
1702 tag1 = "<!-- comment -->"; |
|
1703 else |
|
1704 tag1 = (closeTags ? "</" : "<") + *tagName1 + ">"; |
|
1705 } |
|
1706 if (tagName2) { |
|
1707 if (*tagName2 == "#text") |
|
1708 tag2 = "Text"; |
|
1709 else if (*tagName2 == "#comment") |
|
1710 tag2 = "<!-- comment -->"; |
|
1711 else |
|
1712 tag2 = (closeTags ? "</" : "<") + *tagName2 + ">"; |
|
1713 } |
|
1714 |
|
1715 const char* errorMsg = htmlParserErrorMessageTemplate(errorCode); |
|
1716 if (!errorMsg) |
|
1717 return; |
|
1718 |
|
1719 String message; |
|
1720 if (parser->processingContentWrittenByScript()) |
|
1721 message += htmlParserDocumentWriteMessage(); |
|
1722 message += errorMsg; |
|
1723 message.replace("%tag1", tag1); |
|
1724 message.replace("%tag2", tag2); |
|
1725 |
|
1726 frame->domWindow()->console()->addMessage(HTMLMessageSource, LogMessageType, |
|
1727 isWarning(errorCode) ? WarningMessageLevel : ErrorMessageLevel, |
|
1728 message, lineNumber, m_document->url().string()); |
|
1729 } |
|
1730 |
|
1731 #ifdef BUILDING_ON_LEOPARD |
|
1732 bool shouldCreateImplicitHead(Document* document) |
|
1733 { |
|
1734 ASSERT(document); |
|
1735 |
|
1736 Settings* settings = document->page() ? document->page()->settings() : 0; |
|
1737 return settings ? !settings->needsLeopardMailQuirks() : true; |
|
1738 } |
|
1739 #elif defined(BUILDING_ON_TIGER) |
|
1740 bool shouldCreateImplicitHead(Document* document) |
|
1741 { |
|
1742 ASSERT(document); |
|
1743 |
|
1744 Settings* settings = document->page() ? document->page()->settings() : 0; |
|
1745 return settings ? !settings->needsTigerMailQuirks() : true; |
|
1746 } |
|
1747 #endif |
|
1748 |
|
1749 |
|
1750 String serializeForNumberType(double number) |
|
1751 { |
|
1752 // According to HTML5, "the best representation of the number n as a floating |
|
1753 // point number" is a string produced by applying ToString() to n. |
|
1754 DtoaBuffer buffer; |
|
1755 unsigned length; |
|
1756 doubleToStringInJavaScriptFormat(number, buffer, &length); |
|
1757 return String(buffer, length); |
|
1758 } |
|
1759 |
|
1760 bool parseToDoubleForNumberType(const String& src, double* out) |
|
1761 { |
|
1762 // See HTML5 2.4.4.3 `Real numbers.' |
|
1763 |
|
1764 if (src.isEmpty()) |
|
1765 return false; |
|
1766 // String::toDouble() accepts leading + \t \n \v \f \r and SPACE, which are invalid in HTML5. |
|
1767 // So, check the first character. |
|
1768 if (src[0] != '-' && (src[0] < '0' || src[0] > '9')) |
|
1769 return false; |
|
1770 |
|
1771 bool valid = false; |
|
1772 double value = src.toDouble(&valid); |
|
1773 if (!valid) |
|
1774 return false; |
|
1775 // NaN and Infinity are not valid numbers according to the standard. |
|
1776 if (!isfinite(value)) |
|
1777 return false; |
|
1778 // -0 -> 0 |
|
1779 if (!value) |
|
1780 value = 0; |
|
1781 if (out) |
|
1782 *out = value; |
|
1783 return true; |
|
1784 } |
|
1785 |
|
1786 } |