JavaScriptCore/wtf/text/StringImpl.h
changeset 0 4f2f89ce4247
equal deleted inserted replaced
-1:000000000000 0:4f2f89ce4247
       
     1 /*
       
     2  * Copyright (C) 1999 Lars Knoll (knoll@kde.org)
       
     3  * Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010 Apple Inc. All rights reserved.
       
     4  * Copyright (C) 2009 Google Inc. All rights reserved.
       
     5  *
       
     6  * This library is free software; you can redistribute it and/or
       
     7  * modify it under the terms of the GNU Library General Public
       
     8  * License as published by the Free Software Foundation; either
       
     9  * version 2 of the License, or (at your option) any later version.
       
    10  *
       
    11  * This library is distributed in the hope that it will be useful,
       
    12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
       
    13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
       
    14  * Library General Public License for more details.
       
    15  *
       
    16  * You should have received a copy of the GNU Library General Public License
       
    17  * along with this library; see the file COPYING.LIB.  If not, write to
       
    18  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
       
    19  * Boston, MA 02110-1301, USA.
       
    20  *
       
    21  */
       
    22 
       
    23 #ifndef StringImpl_h
       
    24 #define StringImpl_h
       
    25 
       
    26 #include <limits.h>
       
    27 #include <wtf/ASCIICType.h>
       
    28 #include <wtf/CrossThreadRefCounted.h>
       
    29 #include <wtf/OwnFastMallocPtr.h>
       
    30 #include <wtf/StdLibExtras.h>
       
    31 #include <wtf/StringHashFunctions.h>
       
    32 #include <wtf/Vector.h>
       
    33 #include <wtf/text/StringImplBase.h>
       
    34 #include <wtf/unicode/Unicode.h>
       
    35 
       
    36 #if PLATFORM(CF)
       
    37 typedef const struct __CFString * CFStringRef;
       
    38 #endif
       
    39 
       
    40 #ifdef __OBJC__
       
    41 @class NSString;
       
    42 #endif
       
    43 
       
    44 // FIXME: This is a temporary layering violation while we move string code to WTF.
       
    45 // Landing the file moves in one patch, will follow on with patches to change the namespaces.
       
    46 namespace JSC {
       
    47 
       
    48 struct IdentifierCStringTranslator;
       
    49 struct IdentifierUCharBufferTranslator;
       
    50 
       
    51 }
       
    52 
       
    53 // FIXME: This is a temporary layering violation while we move string code to WTF.
       
    54 // Landing the file moves in one patch, will follow on with patches to change the namespaces.
       
    55 namespace WebCore {
       
    56 
       
    57 class StringBuffer;
       
    58 
       
    59 struct CStringTranslator;
       
    60 struct HashAndCharactersTranslator;
       
    61 struct StringHash;
       
    62 struct UCharBufferTranslator;
       
    63 
       
    64 enum TextCaseSensitivity { TextCaseSensitive, TextCaseInsensitive };
       
    65 
       
    66 typedef OwnFastMallocPtr<const UChar> SharableUChar;
       
    67 typedef CrossThreadRefCounted<SharableUChar> SharedUChar;
       
    68 typedef bool (*CharacterMatchFunctionPtr)(UChar);
       
    69 
       
    70 class StringImpl : public StringImplBase {
       
    71     friend struct JSC::IdentifierCStringTranslator;
       
    72     friend struct JSC::IdentifierUCharBufferTranslator;
       
    73     friend struct CStringTranslator;
       
    74     friend struct HashAndCharactersTranslator;
       
    75     friend struct UCharBufferTranslator;
       
    76     friend class AtomicStringImpl;
       
    77 private:
       
    78     // Used to construct static strings, which have an special refCount that can never hit zero.
       
    79     // This means that the static string will never be destroyed, which is important because
       
    80     // static strings will be shared across threads & ref-counted in a non-threadsafe manner.
       
    81     StringImpl(const UChar* characters, unsigned length, StaticStringConstructType)
       
    82         : StringImplBase(length, ConstructStaticString)
       
    83         , m_data(characters)
       
    84         , m_buffer(0)
       
    85         , m_hash(0)
       
    86     {
       
    87         // Ensure that the hash is computed so that AtomicStringHash can call existingHash()
       
    88         // with impunity. The empty string is special because it is never entered into
       
    89         // AtomicString's HashKey, but still needs to compare correctly.
       
    90         hash();
       
    91     }
       
    92 
       
    93     // Create a normal string with internal storage (BufferInternal)
       
    94     StringImpl(unsigned length)
       
    95         : StringImplBase(length, BufferInternal)
       
    96         , m_data(reinterpret_cast<const UChar*>(this + 1))
       
    97         , m_buffer(0)
       
    98         , m_hash(0)
       
    99     {
       
   100         ASSERT(m_data);
       
   101         ASSERT(m_length);
       
   102     }
       
   103 
       
   104     // Create a StringImpl adopting ownership of the provided buffer (BufferOwned)
       
   105     StringImpl(const UChar* characters, unsigned length)
       
   106         : StringImplBase(length, BufferOwned)
       
   107         , m_data(characters)
       
   108         , m_buffer(0)
       
   109         , m_hash(0)
       
   110     {
       
   111         ASSERT(m_data);
       
   112         ASSERT(m_length);
       
   113     }
       
   114 
       
   115     // Used to create new strings that are a substring of an existing StringImpl (BufferSubstring)
       
   116     StringImpl(const UChar* characters, unsigned length, PassRefPtr<StringImpl> base)
       
   117         : StringImplBase(length, BufferSubstring)
       
   118         , m_data(characters)
       
   119         , m_substringBuffer(base.releaseRef())
       
   120         , m_hash(0)
       
   121     {
       
   122         ASSERT(m_data);
       
   123         ASSERT(m_length);
       
   124         ASSERT(m_substringBuffer->bufferOwnership() != BufferSubstring);
       
   125     }
       
   126 
       
   127     // Used to construct new strings sharing an existing SharedUChar (BufferShared)
       
   128     StringImpl(const UChar* characters, unsigned length, PassRefPtr<SharedUChar> sharedBuffer)
       
   129         : StringImplBase(length, BufferShared)
       
   130         , m_data(characters)
       
   131         , m_sharedBuffer(sharedBuffer.releaseRef())
       
   132         , m_hash(0)
       
   133     {
       
   134         ASSERT(m_data);
       
   135         ASSERT(m_length);
       
   136     }
       
   137 
       
   138     // For use only by AtomicString's XXXTranslator helpers.
       
   139     void setHash(unsigned hash)
       
   140     {
       
   141         ASSERT(!isStatic());
       
   142         ASSERT(!m_hash);
       
   143         ASSERT(hash == computeHash(m_data, m_length));
       
   144         m_hash = hash;
       
   145     }
       
   146 
       
   147 public:
       
   148     ~StringImpl();
       
   149 
       
   150     static PassRefPtr<StringImpl> create(const UChar*, unsigned length);
       
   151     static PassRefPtr<StringImpl> create(const char*, unsigned length);
       
   152     static PassRefPtr<StringImpl> create(const char*);
       
   153     static PassRefPtr<StringImpl> create(const UChar*, unsigned length, PassRefPtr<SharedUChar> sharedBuffer);
       
   154     static ALWAYS_INLINE PassRefPtr<StringImpl> create(PassRefPtr<StringImpl> rep, unsigned offset, unsigned length)
       
   155     {
       
   156         ASSERT(rep);
       
   157         ASSERT(length <= rep->length());
       
   158 
       
   159         if (!length)
       
   160             return empty();
       
   161 
       
   162         StringImpl* ownerRep = (rep->bufferOwnership() == BufferSubstring) ? rep->m_substringBuffer : rep.get();
       
   163         return adoptRef(new StringImpl(rep->m_data + offset, length, ownerRep));
       
   164     }
       
   165 
       
   166     static PassRefPtr<StringImpl> createUninitialized(unsigned length, UChar*& data);
       
   167     static ALWAYS_INLINE PassRefPtr<StringImpl> tryCreateUninitialized(unsigned length, UChar*& output)
       
   168     {
       
   169         if (!length) {
       
   170             output = 0;
       
   171             return empty();
       
   172         }
       
   173 
       
   174         if (length > ((std::numeric_limits<size_t>::max() - sizeof(StringImpl)) / sizeof(UChar))) {
       
   175             output = 0;
       
   176             return 0;
       
   177         }
       
   178         StringImpl* resultImpl;
       
   179         if (!tryFastMalloc(sizeof(UChar) * length + sizeof(StringImpl)).getValue(resultImpl)) {
       
   180             output = 0;
       
   181             return 0;
       
   182         }
       
   183         output = reinterpret_cast<UChar*>(resultImpl + 1);
       
   184         return adoptRef(new(resultImpl) StringImpl(length));
       
   185     }
       
   186 
       
   187     static unsigned dataOffset() { return OBJECT_OFFSETOF(StringImpl, m_data); }
       
   188     static PassRefPtr<StringImpl> createWithTerminatingNullCharacter(const StringImpl&);
       
   189     static PassRefPtr<StringImpl> createStrippingNullCharacters(const UChar*, unsigned length);
       
   190 
       
   191     template<size_t inlineCapacity>
       
   192     static PassRefPtr<StringImpl> adopt(Vector<UChar, inlineCapacity>& vector)
       
   193     {
       
   194         if (size_t size = vector.size()) {
       
   195             ASSERT(vector.data());
       
   196             return adoptRef(new StringImpl(vector.releaseBuffer(), size));
       
   197         }
       
   198         return empty();
       
   199     }
       
   200     static PassRefPtr<StringImpl> adopt(StringBuffer&);
       
   201 
       
   202     SharedUChar* sharedBuffer();
       
   203     const UChar* characters() const { return m_data; }
       
   204 
       
   205     size_t cost()
       
   206     {
       
   207         // For substrings, return the cost of the base string.
       
   208         if (bufferOwnership() == BufferSubstring)
       
   209             return m_substringBuffer->cost();
       
   210 
       
   211         if (m_refCountAndFlags & s_refCountFlagShouldReportedCost) {
       
   212             m_refCountAndFlags &= ~s_refCountFlagShouldReportedCost;
       
   213             return m_length;
       
   214         }
       
   215         return 0;
       
   216     }
       
   217 
       
   218     bool isIdentifier() const { return m_refCountAndFlags & s_refCountFlagIsIdentifier; }
       
   219     void setIsIdentifier(bool isIdentifier)
       
   220     {
       
   221         ASSERT(!isStatic());
       
   222         if (isIdentifier)
       
   223             m_refCountAndFlags |= s_refCountFlagIsIdentifier;
       
   224         else
       
   225             m_refCountAndFlags &= ~s_refCountFlagIsIdentifier;
       
   226     }
       
   227 
       
   228     bool hasTerminatingNullCharacter() const { return m_refCountAndFlags & s_refCountFlagHasTerminatingNullCharacter; }
       
   229 
       
   230     bool isAtomic() const { return m_refCountAndFlags & s_refCountFlagIsAtomic; }
       
   231     void setIsAtomic(bool isIdentifier)
       
   232     {
       
   233         ASSERT(!isStatic());
       
   234         if (isIdentifier)
       
   235             m_refCountAndFlags |= s_refCountFlagIsAtomic;
       
   236         else
       
   237             m_refCountAndFlags &= ~s_refCountFlagIsAtomic;
       
   238     }
       
   239 
       
   240     unsigned hash() const { if (!m_hash) m_hash = computeHash(m_data, m_length); return m_hash; }
       
   241     unsigned existingHash() const { ASSERT(m_hash); return m_hash; }
       
   242     static unsigned computeHash(const UChar* data, unsigned length) { return WTF::stringHash(data, length); }
       
   243     static unsigned computeHash(const char* data, unsigned length) { return WTF::stringHash(data, length); }
       
   244     static unsigned computeHash(const char* data) { return WTF::stringHash(data); }
       
   245 
       
   246     ALWAYS_INLINE void deref() { m_refCountAndFlags -= s_refCountIncrement; if (!(m_refCountAndFlags & (s_refCountMask | s_refCountFlagStatic))) delete this; }
       
   247     ALWAYS_INLINE bool hasOneRef() const { return (m_refCountAndFlags & (s_refCountMask | s_refCountFlagStatic)) == s_refCountIncrement; }
       
   248 
       
   249     static StringImpl* empty();
       
   250 
       
   251     static void copyChars(UChar* destination, const UChar* source, unsigned numCharacters)
       
   252     {
       
   253         if (numCharacters <= s_copyCharsInlineCutOff) {
       
   254             for (unsigned i = 0; i < numCharacters; ++i)
       
   255                 destination[i] = source[i];
       
   256         } else
       
   257             memcpy(destination, source, numCharacters * sizeof(UChar));
       
   258     }
       
   259 
       
   260     // Returns a StringImpl suitable for use on another thread.
       
   261     PassRefPtr<StringImpl> crossThreadString();
       
   262     // Makes a deep copy. Helpful only if you need to use a String on another thread
       
   263     // (use crossThreadString if the method call doesn't need to be threadsafe).
       
   264     // Since StringImpl objects are immutable, there's no other reason to make a copy.
       
   265     PassRefPtr<StringImpl> threadsafeCopy() const;
       
   266 
       
   267     PassRefPtr<StringImpl> substring(unsigned pos, unsigned len = UINT_MAX);
       
   268 
       
   269     UChar operator[](unsigned i) { ASSERT(i < m_length); return m_data[i]; }
       
   270     UChar32 characterStartingAt(unsigned);
       
   271 
       
   272     bool containsOnlyWhitespace();
       
   273 
       
   274     int toIntStrict(bool* ok = 0, int base = 10);
       
   275     unsigned toUIntStrict(bool* ok = 0, int base = 10);
       
   276     int64_t toInt64Strict(bool* ok = 0, int base = 10);
       
   277     uint64_t toUInt64Strict(bool* ok = 0, int base = 10);
       
   278     intptr_t toIntPtrStrict(bool* ok = 0, int base = 10);
       
   279 
       
   280     int toInt(bool* ok = 0); // ignores trailing garbage
       
   281     unsigned toUInt(bool* ok = 0); // ignores trailing garbage
       
   282     int64_t toInt64(bool* ok = 0); // ignores trailing garbage
       
   283     uint64_t toUInt64(bool* ok = 0); // ignores trailing garbage
       
   284     intptr_t toIntPtr(bool* ok = 0); // ignores trailing garbage
       
   285 
       
   286     double toDouble(bool* ok = 0);
       
   287     float toFloat(bool* ok = 0);
       
   288 
       
   289     PassRefPtr<StringImpl> lower();
       
   290     PassRefPtr<StringImpl> upper();
       
   291     PassRefPtr<StringImpl> secure(UChar aChar);
       
   292     PassRefPtr<StringImpl> foldCase();
       
   293 
       
   294     PassRefPtr<StringImpl> stripWhiteSpace();
       
   295     PassRefPtr<StringImpl> simplifyWhiteSpace();
       
   296 
       
   297     PassRefPtr<StringImpl> removeCharacters(CharacterMatchFunctionPtr);
       
   298 
       
   299     int find(const char*, int index = 0, bool caseSensitive = true);
       
   300     int find(UChar, int index = 0);
       
   301     int find(CharacterMatchFunctionPtr, int index = 0);
       
   302     int find(StringImpl*, int index, bool caseSensitive = true);
       
   303 
       
   304     int reverseFind(UChar, int index);
       
   305     int reverseFind(StringImpl*, int index, bool caseSensitive = true);
       
   306     
       
   307     bool startsWith(StringImpl* str, bool caseSensitive = true) { return reverseFind(str, 0, caseSensitive) == 0; }
       
   308     bool endsWith(StringImpl*, bool caseSensitive = true);
       
   309 
       
   310     PassRefPtr<StringImpl> replace(UChar, UChar);
       
   311     PassRefPtr<StringImpl> replace(UChar, StringImpl*);
       
   312     PassRefPtr<StringImpl> replace(StringImpl*, StringImpl*);
       
   313     PassRefPtr<StringImpl> replace(unsigned index, unsigned len, StringImpl*);
       
   314 
       
   315     Vector<char> ascii();
       
   316 
       
   317     WTF::Unicode::Direction defaultWritingDirection();
       
   318 
       
   319 #if PLATFORM(CF)
       
   320     CFStringRef createCFString();
       
   321 #endif
       
   322 #ifdef __OBJC__
       
   323     operator NSString*();
       
   324 #endif
       
   325 
       
   326 private:
       
   327     // This number must be at least 2 to avoid sharing empty, null as well as 1 character strings from SmallStrings.
       
   328     static const unsigned s_copyCharsInlineCutOff = 20;
       
   329 
       
   330     static PassRefPtr<StringImpl> createStrippingNullCharactersSlowCase(const UChar*, unsigned length);
       
   331     
       
   332     BufferOwnership bufferOwnership() const { return static_cast<BufferOwnership>(m_refCountAndFlags & s_refCountMaskBufferOwnership); }
       
   333     bool isStatic() const { return m_refCountAndFlags & s_refCountFlagStatic; }
       
   334     const UChar* m_data;
       
   335     union {
       
   336         void* m_buffer;
       
   337         StringImpl* m_substringBuffer;
       
   338         SharedUChar* m_sharedBuffer;
       
   339     };
       
   340     mutable unsigned m_hash;
       
   341 };
       
   342 
       
   343 bool equal(const StringImpl*, const StringImpl*);
       
   344 bool equal(const StringImpl*, const char*);
       
   345 inline bool equal(const char* a, StringImpl* b) { return equal(b, a); }
       
   346 
       
   347 bool equalIgnoringCase(StringImpl*, StringImpl*);
       
   348 bool equalIgnoringCase(StringImpl*, const char*);
       
   349 inline bool equalIgnoringCase(const char* a, StringImpl* b) { return equalIgnoringCase(b, a); }
       
   350 bool equalIgnoringCase(const UChar* a, const char* b, unsigned length);
       
   351 inline bool equalIgnoringCase(const char* a, const UChar* b, unsigned length) { return equalIgnoringCase(b, a, length); }
       
   352 
       
   353 bool equalIgnoringNullity(StringImpl*, StringImpl*);
       
   354 
       
   355 int codePointCompare(const StringImpl*, const StringImpl*);
       
   356 
       
   357 static inline bool isSpaceOrNewline(UChar c)
       
   358 {
       
   359     // Use isASCIISpace() for basic Latin-1.
       
   360     // This will include newlines, which aren't included in Unicode DirWS.
       
   361     return c <= 0x7F ? WTF::isASCIISpace(c) : WTF::Unicode::direction(c) == WTF::Unicode::WhiteSpaceNeutral;
       
   362 }
       
   363 
       
   364 // This is a hot function because it's used when parsing HTML.
       
   365 inline PassRefPtr<StringImpl> StringImpl::createStrippingNullCharacters(const UChar* characters, unsigned length)
       
   366 {
       
   367     ASSERT(characters);
       
   368     ASSERT(length);
       
   369 
       
   370     // Optimize for the case where there are no Null characters by quickly
       
   371     // searching for nulls, and then using StringImpl::create, which will
       
   372     // memcpy the whole buffer.  This is faster than assigning character by
       
   373     // character during the loop. 
       
   374 
       
   375     // Fast case.
       
   376     int foundNull = 0;
       
   377     for (unsigned i = 0; !foundNull && i < length; i++) {
       
   378         int c = characters[i]; // more efficient than using UChar here (at least on Intel Mac OS)
       
   379         foundNull |= !c;
       
   380     }
       
   381     if (!foundNull)
       
   382         return StringImpl::create(characters, length);
       
   383 
       
   384     return StringImpl::createStrippingNullCharactersSlowCase(characters, length);
       
   385 }
       
   386 
       
   387 }
       
   388 
       
   389 using WebCore::equal;
       
   390 
       
   391 namespace WTF {
       
   392 
       
   393     // WebCore::StringHash is the default hash for StringImpl* and RefPtr<StringImpl>
       
   394     template<typename T> struct DefaultHash;
       
   395     template<> struct DefaultHash<WebCore::StringImpl*> {
       
   396         typedef WebCore::StringHash Hash;
       
   397     };
       
   398     template<> struct DefaultHash<RefPtr<WebCore::StringImpl> > {
       
   399         typedef WebCore::StringHash Hash;
       
   400     };
       
   401 
       
   402 }
       
   403 
       
   404 #endif