diff -r 000000000000 -r 4f2f89ce4247 WebCore/html/LegacyHTMLDocumentParser.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/WebCore/html/LegacyHTMLDocumentParser.h Fri Sep 17 09:02:29 2010 +0300
@@ -0,0 +1,452 @@
+/*
+ Copyright (C) 1997 Martin Jones (mjones@kde.org)
+ (C) 1997 Torben Weis (weis@kde.org)
+ (C) 1998 Waldo Bastian (bastian@kde.org)
+ (C) 2001 Dirk Mueller (mueller@kde.org)
+ Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public License
+ along with this library; see the file COPYING.LIB. If not, write to
+ the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ Boston, MA 02110-1301, USA.
+*/
+
+#ifndef LegacyHTMLDocumentParser_h
+#define LegacyHTMLDocumentParser_h
+
+#include "CachedResourceClient.h"
+#include "CachedResourceHandle.h"
+#include "FragmentScriptingPermission.h"
+#include "NamedNodeMap.h"
+#include "ScriptableDocumentParser.h"
+#include "SegmentedString.h"
+#include "Timer.h"
+#include
+#include
+#include
+
+namespace WebCore {
+
+class CachedScript;
+class DocumentFragment;
+class Document;
+class HTMLDocument;
+class HTMLScriptElement;
+class HTMLViewSourceDocument;
+class FrameView;
+class LegacyHTMLTreeBuilder;
+class Node;
+class LegacyPreloadScanner;
+class ScriptSourceCode;
+
+/**
+ * @internal
+ * represents one HTML tag. Consists of a numerical id, and the list
+ * of attributes. Can also represent text. In this case the id = 0 and
+ * text contains the text.
+ */
+struct Token {
+ Token()
+ : beginTag(true)
+ , selfClosingTag(false)
+ , brokenXMLStyle(false)
+ , m_sourceInfo(0)
+ { }
+ ~Token() { }
+
+ void addAttribute(AtomicString& attrName, const AtomicString& v, bool viewSourceMode);
+
+ bool isOpenTag(const QualifiedName& fullName) const { return beginTag && fullName.localName() == tagName; }
+ bool isCloseTag(const QualifiedName& fullName) const { return !beginTag && fullName.localName() == tagName; }
+
+ void reset()
+ {
+ attrs = 0;
+ text = 0;
+ tagName = nullAtom;
+ beginTag = true;
+ selfClosingTag = false;
+ brokenXMLStyle = false;
+ if (m_sourceInfo)
+ m_sourceInfo->clear();
+ }
+
+ void addViewSourceChar(UChar c) { if (!m_sourceInfo.get()) m_sourceInfo.set(new Vector); m_sourceInfo->append(c); }
+
+ RefPtr attrs;
+ RefPtr text;
+ AtomicString tagName;
+ bool beginTag;
+ bool selfClosingTag;
+ bool brokenXMLStyle;
+ OwnPtr > m_sourceInfo;
+};
+
+enum DoctypeState {
+ DoctypeBegin,
+ DoctypeBeforeName,
+ DoctypeName,
+ DoctypeAfterName,
+ DoctypeBeforePublicID,
+ DoctypePublicID,
+ DoctypeAfterPublicID,
+ DoctypeBeforeSystemID,
+ DoctypeSystemID,
+ DoctypeAfterSystemID,
+ DoctypeBogus
+};
+
+class DoctypeToken {
+public:
+ DoctypeToken() {}
+
+ void reset()
+ {
+ m_name.clear();
+ m_publicID.clear();
+ m_systemID.clear();
+ m_state = DoctypeBegin;
+ m_source.clear();
+ m_forceQuirks = false;
+ }
+
+ DoctypeState state() { return m_state; }
+ void setState(DoctypeState s) { m_state = s; }
+
+ Vector m_name;
+ Vector m_publicID;
+ Vector m_systemID;
+ DoctypeState m_state;
+
+ Vector m_source;
+
+ bool m_forceQuirks; // Used by the HTML5 parser.
+};
+
+//-----------------------------------------------------------------------------
+
+// FIXME: This class does too much. Right now it is both an HTML tokenizer as well
+// as handling all of the non-tokenizer-specific junk related to tokenizing HTML
+// (like dealing with "
+ UChar searchBuffer[10];
+
+ // Counts where we are in the string we are scanning for
+ int searchCount;
+ // the stopper string
+ const char* m_searchStopper;
+ int m_searchStopperLength;
+
+ // if no more data is coming, just parse what we have (including ext scripts that
+ // may be still downloading) and finish
+ bool m_noMoreData;
+ // URL to get source code of script from
+ String m_scriptTagSrcAttrValue;
+ String m_scriptTagCharsetAttrValue;
+ // the HTML code we will parse after the external script we are waiting for has loaded
+ SegmentedString m_pendingSrc;
+
+ // the HTML code we will parse after this particular script has
+ // loaded, but before all pending HTML
+ SegmentedString* m_currentPrependingSrc;
+
+ // true if we are executing a script while parsing a document. This causes the parsing of
+ // the output of the script to be postponed until after the script has finished executing
+ int m_executingScript;
+ Deque > m_pendingScripts;
+ RefPtr m_scriptNode;
+
+ bool m_requestingScript;
+ bool m_hasScriptsWaitingForStylesheets;
+
+ // if we found one broken comment, there are most likely others as well
+ // store a flag to get rid of the O(n^2) behaviour in such a case.
+ bool m_brokenComments;
+ // current line number
+ int m_lineNumber;
+ int m_currentScriptTagStartLineNumber;
+ int m_currentTagStartLineNumber;
+
+ double m_tokenizerTimeDelay;
+ int m_tokenizerChunkSize;
+
+ // The timer for continued processing.
+ Timer m_timer;
+
+ // The timer for continued executing external scripts.
+ Timer m_externalScriptsTimer;
+
+// This buffer can hold arbitrarily long user-defined attribute names, such as in EMBED tags.
+// So any fixed number might be too small, but rather than rewriting all usage of this buffer
+// we'll just make it large enough to handle all imaginable cases.
+#define CBUFLEN 1024
+ UChar m_cBuffer[CBUFLEN + 2];
+ unsigned int m_cBufferPos;
+
+ SegmentedString m_src;
+ OwnPtr m_treeBuilder;
+ bool m_inWrite;
+ bool m_fragment;
+ FragmentScriptingPermission m_scriptingPermission;
+
+ OwnPtr m_preloadScanner;
+};
+
+UChar decodeNamedEntity(const char*);
+
+} // namespace WebCore
+
+#endif // LegacyHTMLDocumentParser_h