diff -r 000000000000 -r 4f2f89ce4247 WebCore/html/HTMLToken.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/WebCore/html/HTMLToken.h Fri Sep 17 09:02:29 2010 +0300 @@ -0,0 +1,467 @@ +/* + * Copyright (C) 2010 Google, Inc. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef HTMLToken_h +#define HTMLToken_h + +#include "NamedNodeMap.h" +#include +#include +#include + +namespace WebCore { + +class HTMLToken : public Noncopyable { +public: + enum Type { + Uninitialized, + DOCTYPE, + StartTag, + EndTag, + Comment, + Character, + EndOfFile, + }; + + class Attribute { + public: + WTF::Vector m_name; + WTF::Vector m_value; + }; + + typedef WTF::Vector AttributeList; + typedef WTF::Vector DataVector; + + HTMLToken() { clear(); } + + void clear() + { + m_type = Uninitialized; + } + + void makeEndOfFile() + { + ASSERT(m_type == Uninitialized); + m_type = EndOfFile; + } + + void beginStartTag(UChar character) + { + ASSERT(character); + ASSERT(m_type == Uninitialized); + m_type = StartTag; + m_data.clear(); + m_selfClosing = false; + m_currentAttribute = 0; + m_attributes.clear(); + + m_data.append(character); + } + + template + void beginEndTag(T characters) + { + ASSERT(m_type == Uninitialized); + m_type = EndTag; + m_data.clear(); + m_selfClosing = false; + m_currentAttribute = 0; + m_attributes.clear(); + + m_data.append(characters); + } + + void beginCharacter(UChar character) + { + ASSERT(character); + ASSERT(m_type == Uninitialized); + m_type = Character; + m_data.clear(); + m_data.append(character); + } + + void beginComment() + { + ASSERT(m_type == Uninitialized); + m_type = Comment; + m_data.clear(); + } + + void beginDOCTYPE() + { + ASSERT(m_type == Uninitialized); + m_type = DOCTYPE; + m_data.clear(); + m_doctypeData.set(new DoctypeData()); + } + + void beginDOCTYPE(UChar character) + { + ASSERT(character); + beginDOCTYPE(); + m_data.append(character); + } + + void appendToName(UChar character) + { + ASSERT(character); + ASSERT(m_type == StartTag || m_type == EndTag || m_type == DOCTYPE); + m_data.append(character); + } + + template + void appendToCharacter(T characters) + { + ASSERT(m_type == Character); + m_data.append(characters); + } + + void appendToComment(UChar character) + { + ASSERT(character); + ASSERT(m_type == Comment); + m_data.append(character); + } + + void addNewAttribute() + { + ASSERT(m_type == StartTag || m_type == EndTag); + m_attributes.grow(m_attributes.size() + 1); + m_currentAttribute = &m_attributes.last(); + } + + void appendToAttributeName(UChar character) + { + ASSERT(character); + ASSERT(m_type == StartTag || m_type == EndTag); + m_currentAttribute->m_name.append(character); + } + + void appendToAttributeValue(UChar character) + { + ASSERT(character); + ASSERT(m_type == StartTag || m_type == EndTag); + m_currentAttribute->m_value.append(character); + } + + Type type() const { return m_type; } + + bool selfClosing() const + { + ASSERT(m_type == StartTag || m_type == EndTag); + return m_selfClosing; + } + + void setSelfClosing() + { + ASSERT(m_type == HTMLToken::StartTag || m_type == HTMLToken::EndTag); + m_selfClosing = true; + } + + const AttributeList& attributes() const + { + ASSERT(m_type == StartTag || m_type == EndTag); + return m_attributes; + } + + const DataVector& name() const + { + ASSERT(m_type == StartTag || m_type == EndTag || m_type == DOCTYPE); + return m_data; + } + + const DataVector& characters() const + { + ASSERT(m_type == Character); + return m_data; + } + + const DataVector& comment() const + { + ASSERT(m_type == Comment); + return m_data; + } + + // FIXME: Distinguish between a missing public identifer and an empty one. + const WTF::Vector& publicIdentifier() const + { + ASSERT(m_type == DOCTYPE); + return m_doctypeData->m_publicIdentifier; + } + + // FIXME: Distinguish between a missing system identifer and an empty one. + const WTF::Vector& systemIdentifier() const + { + ASSERT(m_type == DOCTYPE); + return m_doctypeData->m_systemIdentifier; + } + + void setPublicIdentifierToEmptyString() + { + ASSERT(m_type == DOCTYPE); + m_doctypeData->m_hasPublicIdentifier = true; + m_doctypeData->m_publicIdentifier.clear(); + } + + void setSystemIdentifierToEmptyString() + { + ASSERT(m_type == DOCTYPE); + m_doctypeData->m_hasSystemIdentifier = true; + m_doctypeData->m_systemIdentifier.clear(); + } + + bool forceQuirks() const + { + ASSERT(m_type == DOCTYPE); + return m_doctypeData->m_forceQuirks; + } + + void setForceQuirks() + { + ASSERT(m_type == DOCTYPE); + m_doctypeData->m_forceQuirks = true; + } + + void appendToPublicIdentifier(UChar character) + { + ASSERT(character); + ASSERT(m_type == DOCTYPE); + ASSERT(m_doctypeData->m_hasPublicIdentifier); + m_doctypeData->m_publicIdentifier.append(character); + } + + void appendToSystemIdentifier(UChar character) + { + ASSERT(character); + ASSERT(m_type == DOCTYPE); + ASSERT(m_doctypeData->m_hasSystemIdentifier); + m_doctypeData->m_systemIdentifier.append(character); + } + +private: + // FIXME: I'm not sure what the final relationship between HTMLToken and + // AtomicHTMLToken will be. I'm marking this a friend for now, but we'll + // want to end up with a cleaner interface between the two classes. + friend class AtomicHTMLToken; + + class DoctypeData { + public: + DoctypeData() + : m_hasPublicIdentifier(false) + , m_hasSystemIdentifier(false) + , m_forceQuirks(false) + { + } + + bool m_hasPublicIdentifier; + bool m_hasSystemIdentifier; + bool m_forceQuirks; + WTF::Vector m_publicIdentifier; + WTF::Vector m_systemIdentifier; + }; + + Type m_type; + + // "name" for DOCTYPE, StartTag, and EndTag + // "characters" for Character + // "data" for Comment + DataVector m_data; + + // For DOCTYPE + OwnPtr m_doctypeData; + + // For StartTag and EndTag + bool m_selfClosing; + AttributeList m_attributes; // Old tokenizer reserves 10. + + // A pointer into m_attributes used during lexing. + Attribute* m_currentAttribute; +}; + +// FIXME: This class should eventually be named HTMLToken once we move the +// exiting HTMLToken to be internal to the HTMLTokenizer. +class AtomicHTMLToken : public Noncopyable { +public: + AtomicHTMLToken(HTMLToken& token) + : m_type(token.type()) + { + switch (m_type) { + case HTMLToken::Uninitialized: + ASSERT_NOT_REACHED(); + break; + case HTMLToken::DOCTYPE: + m_name = AtomicString(token.name().data(), token.name().size()); + m_doctypeData = token.m_doctypeData.release(); + break; + case HTMLToken::EndOfFile: + break; + case HTMLToken::StartTag: + case HTMLToken::EndTag: { + m_selfClosing = token.selfClosing(); + m_name = AtomicString(token.name().data(), token.name().size()); + const HTMLToken::AttributeList& attributes = token.attributes(); + for (HTMLToken::AttributeList::const_iterator iter = attributes.begin(); + iter != attributes.end(); ++iter) { + if (!iter->m_name.isEmpty()) { + String name(iter->m_name.data(), iter->m_name.size()); + String value(iter->m_value.data(), iter->m_value.size()); + RefPtr mappedAttribute = Attribute::createMapped(name, value); + if (!m_attributes) + m_attributes = NamedNodeMap::create(); + m_attributes->insertAttribute(mappedAttribute.release(), false); + } + } + break; + } + case HTMLToken::Comment: + m_data = String(token.comment().data(), token.comment().size()); + break; + case HTMLToken::Character: + m_externalCharacters = &token.characters(); + break; + } + } + + AtomicHTMLToken(HTMLToken::Type type, AtomicString name, PassRefPtr attributes = 0) + : m_type(type) + , m_name(name) + , m_attributes(attributes) + { + ASSERT(usesName()); + } + + HTMLToken::Type type() const { return m_type; } + + const AtomicString& name() const + { + ASSERT(usesName()); + return m_name; + } + + void setName(const AtomicString& name) + { + ASSERT(usesName()); + m_name = name; + } + + bool selfClosing() const + { + ASSERT(m_type == HTMLToken::StartTag || m_type == HTMLToken::EndTag); + return m_selfClosing; + } + + Attribute* getAttributeItem(const QualifiedName& attributeName) + { + ASSERT(usesAttributes()); + if (!m_attributes) + return 0; + return m_attributes->getAttributeItem(attributeName); + } + + NamedNodeMap* attributes() const + { + ASSERT(usesAttributes()); + return m_attributes.get(); + } + + PassRefPtr takeAtributes() + { + ASSERT(usesAttributes()); + return m_attributes.release(); + } + + const HTMLToken::DataVector& characters() const + { + ASSERT(m_type == HTMLToken::Character); + return *m_externalCharacters; + } + + const String& comment() const + { + ASSERT(m_type == HTMLToken::Comment); + return m_data; + } + + // FIXME: Distinguish between a missing public identifer and an empty one. + WTF::Vector& publicIdentifier() const + { + ASSERT(m_type == HTMLToken::DOCTYPE); + return m_doctypeData->m_publicIdentifier; + } + + // FIXME: Distinguish between a missing system identifer and an empty one. + WTF::Vector& systemIdentifier() const + { + ASSERT(m_type == HTMLToken::DOCTYPE); + return m_doctypeData->m_systemIdentifier; + } + + bool forceQuirks() const + { + ASSERT(m_type == HTMLToken::DOCTYPE); + return m_doctypeData->m_forceQuirks; + } + +private: + HTMLToken::Type m_type; + + bool usesName() const + { + return m_type == HTMLToken::StartTag || m_type == HTMLToken::EndTag || m_type == HTMLToken::DOCTYPE; + } + + bool usesAttributes() const + { + return m_type == HTMLToken::StartTag || m_type == HTMLToken::EndTag; + } + + // "name" for DOCTYPE, StartTag, and EndTag + AtomicString m_name; + + // "data" for Comment + String m_data; + + // "characters" for Character + // + // We don't want to copy the the characters out of the HTMLToken, so we + // keep a pointer to its buffer instead. This buffer is owned by the + // HTMLToken and causes a lifetime dependence between these objects. + // + // FIXME: Add a mechanism for "internalizing" the characters when the + // HTMLToken is destructed. + const HTMLToken::DataVector* m_externalCharacters; + + // For DOCTYPE + OwnPtr m_doctypeData; + + // For StartTag and EndTag + bool m_selfClosing; + + RefPtr m_attributes; +}; + +} + +#endif