/*
* Copyright (C) 2009 Google Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Google Inc. nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include "WebEntities.h"
#include <string.h>
#include "PlatformString.h"
#include "StringBuilder.h"
#include <wtf/HashMap.h>
#include "WebString.h"
using namespace WebCore;
namespace {
// Note that this file is also included by LegacyHTMLDocumentParser.cpp so we are getting
// two copies of the data in memory. We can fix this by changing the script
// that generated the array to create a static const that is its length, but
// this is low priority since the data is less than 4K. We use anonymous
// namespace to prevent name collisions.
#include "HTMLEntityNames.cpp" // NOLINT
}
namespace WebKit {
void populateMap(WTF::HashMap<int, WebCore::String>& map,
const Entity* entities,
size_t entitiesCount,
bool standardHTML)
{
ASSERT(map.isEmpty());
const Entity* entity = &entities[0];
for (size_t i = 0; i < entitiesCount; i++, entity++) {
int code = entity->code;
String name = entity->name;
// For consistency, use the lowe case for entities that have both.
if (map.contains(code) && map.get(code) == name.lower())
continue;
// Don't register %, ⊅ and &supl;.
if (standardHTML && (code == '%' || code == 0x2285 || code == 0x00b9))
continue;
map.set(code, name);
}
if (standardHTML)
map.set(static_cast<int>(0x0027), String("#39"));
}
static const Entity xmlBuiltInEntityCodes[] = {
{ "lt", 0x003c },
{ "gt", 0x003e },
{ "amp", 0x0026 },
{ "apos", 0x0027 },
{ "quot", 0x0022 }
};
WebEntities::WebEntities(bool xmlEntities)
{
if (xmlEntities)
populateMap(m_entitiesMap,
xmlBuiltInEntityCodes,
sizeof(xmlBuiltInEntityCodes) / sizeof(Entity),
false);
else
populateMap(m_entitiesMap,
wordlist,
sizeof(wordlist) / sizeof(Entity),
true);
}
String WebEntities::entityNameByCode(int code) const
{
if (m_entitiesMap.contains(code))
return m_entitiesMap.get(code);
return "";
}
String WebEntities::convertEntitiesInString(const String& value) const
{
unsigned len = value.length();
const UChar* startPos = value.characters();
const UChar* curPos = startPos;
// FIXME: Optimize - create StringBuilder only if value has any entities.
StringBuilder result;
while (len--) {
if (m_entitiesMap.contains(*curPos)) {
// Append content before entity code.
if (curPos > startPos)
result.append(String(startPos, curPos - startPos));
result.append("&");
result.append(m_entitiesMap.get(*curPos));
result.append(";");
startPos = ++curPos;
} else
curPos++;
}
// Append the remaining content.
if (curPos > startPos)
result.append(String(startPos, curPos - startPos));
return result.toString();
}
} // namespace WebKit