diff -r 000000000000 -r 4f2f89ce4247 WebCore/html/LegacyHTMLDocumentParser.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/WebCore/html/LegacyHTMLDocumentParser.h Fri Sep 17 09:02:29 2010 +0300 @@ -0,0 +1,452 @@ +/* + Copyright (C) 1997 Martin Jones (mjones@kde.org) + (C) 1997 Torben Weis (weis@kde.org) + (C) 1998 Waldo Bastian (bastian@kde.org) + (C) 2001 Dirk Mueller (mueller@kde.org) + Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public License + along with this library; see the file COPYING.LIB. If not, write to + the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + Boston, MA 02110-1301, USA. +*/ + +#ifndef LegacyHTMLDocumentParser_h +#define LegacyHTMLDocumentParser_h + +#include "CachedResourceClient.h" +#include "CachedResourceHandle.h" +#include "FragmentScriptingPermission.h" +#include "NamedNodeMap.h" +#include "ScriptableDocumentParser.h" +#include "SegmentedString.h" +#include "Timer.h" +#include +#include +#include + +namespace WebCore { + +class CachedScript; +class DocumentFragment; +class Document; +class HTMLDocument; +class HTMLScriptElement; +class HTMLViewSourceDocument; +class FrameView; +class LegacyHTMLTreeBuilder; +class Node; +class LegacyPreloadScanner; +class ScriptSourceCode; + +/** + * @internal + * represents one HTML tag. Consists of a numerical id, and the list + * of attributes. Can also represent text. In this case the id = 0 and + * text contains the text. + */ +struct Token { + Token() + : beginTag(true) + , selfClosingTag(false) + , brokenXMLStyle(false) + , m_sourceInfo(0) + { } + ~Token() { } + + void addAttribute(AtomicString& attrName, const AtomicString& v, bool viewSourceMode); + + bool isOpenTag(const QualifiedName& fullName) const { return beginTag && fullName.localName() == tagName; } + bool isCloseTag(const QualifiedName& fullName) const { return !beginTag && fullName.localName() == tagName; } + + void reset() + { + attrs = 0; + text = 0; + tagName = nullAtom; + beginTag = true; + selfClosingTag = false; + brokenXMLStyle = false; + if (m_sourceInfo) + m_sourceInfo->clear(); + } + + void addViewSourceChar(UChar c) { if (!m_sourceInfo.get()) m_sourceInfo.set(new Vector); m_sourceInfo->append(c); } + + RefPtr attrs; + RefPtr text; + AtomicString tagName; + bool beginTag; + bool selfClosingTag; + bool brokenXMLStyle; + OwnPtr > m_sourceInfo; +}; + +enum DoctypeState { + DoctypeBegin, + DoctypeBeforeName, + DoctypeName, + DoctypeAfterName, + DoctypeBeforePublicID, + DoctypePublicID, + DoctypeAfterPublicID, + DoctypeBeforeSystemID, + DoctypeSystemID, + DoctypeAfterSystemID, + DoctypeBogus +}; + +class DoctypeToken { +public: + DoctypeToken() {} + + void reset() + { + m_name.clear(); + m_publicID.clear(); + m_systemID.clear(); + m_state = DoctypeBegin; + m_source.clear(); + m_forceQuirks = false; + } + + DoctypeState state() { return m_state; } + void setState(DoctypeState s) { m_state = s; } + + Vector m_name; + Vector m_publicID; + Vector m_systemID; + DoctypeState m_state; + + Vector m_source; + + bool m_forceQuirks; // Used by the HTML5 parser. +}; + +//----------------------------------------------------------------------------- + +// FIXME: This class does too much. Right now it is both an HTML tokenizer as well +// as handling all of the non-tokenizer-specific junk related to tokenizing HTML +// (like dealing with " + UChar searchBuffer[10]; + + // Counts where we are in the string we are scanning for + int searchCount; + // the stopper string + const char* m_searchStopper; + int m_searchStopperLength; + + // if no more data is coming, just parse what we have (including ext scripts that + // may be still downloading) and finish + bool m_noMoreData; + // URL to get source code of script from + String m_scriptTagSrcAttrValue; + String m_scriptTagCharsetAttrValue; + // the HTML code we will parse after the external script we are waiting for has loaded + SegmentedString m_pendingSrc; + + // the HTML code we will parse after this particular script has + // loaded, but before all pending HTML + SegmentedString* m_currentPrependingSrc; + + // true if we are executing a script while parsing a document. This causes the parsing of + // the output of the script to be postponed until after the script has finished executing + int m_executingScript; + Deque > m_pendingScripts; + RefPtr m_scriptNode; + + bool m_requestingScript; + bool m_hasScriptsWaitingForStylesheets; + + // if we found one broken comment, there are most likely others as well + // store a flag to get rid of the O(n^2) behaviour in such a case. + bool m_brokenComments; + // current line number + int m_lineNumber; + int m_currentScriptTagStartLineNumber; + int m_currentTagStartLineNumber; + + double m_tokenizerTimeDelay; + int m_tokenizerChunkSize; + + // The timer for continued processing. + Timer m_timer; + + // The timer for continued executing external scripts. + Timer m_externalScriptsTimer; + +// This buffer can hold arbitrarily long user-defined attribute names, such as in EMBED tags. +// So any fixed number might be too small, but rather than rewriting all usage of this buffer +// we'll just make it large enough to handle all imaginable cases. +#define CBUFLEN 1024 + UChar m_cBuffer[CBUFLEN + 2]; + unsigned int m_cBufferPos; + + SegmentedString m_src; + OwnPtr m_treeBuilder; + bool m_inWrite; + bool m_fragment; + FragmentScriptingPermission m_scriptingPermission; + + OwnPtr m_preloadScanner; +}; + +UChar decodeNamedEntity(const char*); + +} // namespace WebCore + +#endif // LegacyHTMLDocumentParser_h