|
1 /* |
|
2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org) |
|
3 * Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010 Apple Inc. All rights reserved. |
|
4 * Copyright (C) 2009 Google Inc. All rights reserved. |
|
5 * |
|
6 * This library is free software; you can redistribute it and/or |
|
7 * modify it under the terms of the GNU Library General Public |
|
8 * License as published by the Free Software Foundation; either |
|
9 * version 2 of the License, or (at your option) any later version. |
|
10 * |
|
11 * This library is distributed in the hope that it will be useful, |
|
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
14 * Library General Public License for more details. |
|
15 * |
|
16 * You should have received a copy of the GNU Library General Public License |
|
17 * along with this library; see the file COPYING.LIB. If not, write to |
|
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
|
19 * Boston, MA 02110-1301, USA. |
|
20 * |
|
21 */ |
|
22 |
|
23 #ifndef StringImpl_h |
|
24 #define StringImpl_h |
|
25 |
|
26 #include <limits.h> |
|
27 #include <wtf/ASCIICType.h> |
|
28 #include <wtf/CrossThreadRefCounted.h> |
|
29 #include <wtf/OwnFastMallocPtr.h> |
|
30 #include <wtf/StdLibExtras.h> |
|
31 #include <wtf/StringHashFunctions.h> |
|
32 #include <wtf/Vector.h> |
|
33 #include <wtf/text/StringImplBase.h> |
|
34 #include <wtf/unicode/Unicode.h> |
|
35 |
|
36 #if PLATFORM(CF) |
|
37 typedef const struct __CFString * CFStringRef; |
|
38 #endif |
|
39 |
|
40 #ifdef __OBJC__ |
|
41 @class NSString; |
|
42 #endif |
|
43 |
|
44 // FIXME: This is a temporary layering violation while we move string code to WTF. |
|
45 // Landing the file moves in one patch, will follow on with patches to change the namespaces. |
|
46 namespace JSC { |
|
47 |
|
48 struct IdentifierCStringTranslator; |
|
49 struct IdentifierUCharBufferTranslator; |
|
50 |
|
51 } |
|
52 |
|
53 // FIXME: This is a temporary layering violation while we move string code to WTF. |
|
54 // Landing the file moves in one patch, will follow on with patches to change the namespaces. |
|
55 namespace WebCore { |
|
56 |
|
57 class StringBuffer; |
|
58 |
|
59 struct CStringTranslator; |
|
60 struct HashAndCharactersTranslator; |
|
61 struct StringHash; |
|
62 struct UCharBufferTranslator; |
|
63 |
|
64 enum TextCaseSensitivity { TextCaseSensitive, TextCaseInsensitive }; |
|
65 |
|
66 typedef OwnFastMallocPtr<const UChar> SharableUChar; |
|
67 typedef CrossThreadRefCounted<SharableUChar> SharedUChar; |
|
68 typedef bool (*CharacterMatchFunctionPtr)(UChar); |
|
69 |
|
70 class StringImpl : public StringImplBase { |
|
71 friend struct JSC::IdentifierCStringTranslator; |
|
72 friend struct JSC::IdentifierUCharBufferTranslator; |
|
73 friend struct CStringTranslator; |
|
74 friend struct HashAndCharactersTranslator; |
|
75 friend struct UCharBufferTranslator; |
|
76 friend class AtomicStringImpl; |
|
77 private: |
|
78 // Used to construct static strings, which have an special refCount that can never hit zero. |
|
79 // This means that the static string will never be destroyed, which is important because |
|
80 // static strings will be shared across threads & ref-counted in a non-threadsafe manner. |
|
81 StringImpl(const UChar* characters, unsigned length, StaticStringConstructType) |
|
82 : StringImplBase(length, ConstructStaticString) |
|
83 , m_data(characters) |
|
84 , m_buffer(0) |
|
85 , m_hash(0) |
|
86 { |
|
87 // Ensure that the hash is computed so that AtomicStringHash can call existingHash() |
|
88 // with impunity. The empty string is special because it is never entered into |
|
89 // AtomicString's HashKey, but still needs to compare correctly. |
|
90 hash(); |
|
91 } |
|
92 |
|
93 // Create a normal string with internal storage (BufferInternal) |
|
94 StringImpl(unsigned length) |
|
95 : StringImplBase(length, BufferInternal) |
|
96 , m_data(reinterpret_cast<const UChar*>(this + 1)) |
|
97 , m_buffer(0) |
|
98 , m_hash(0) |
|
99 { |
|
100 ASSERT(m_data); |
|
101 ASSERT(m_length); |
|
102 } |
|
103 |
|
104 // Create a StringImpl adopting ownership of the provided buffer (BufferOwned) |
|
105 StringImpl(const UChar* characters, unsigned length) |
|
106 : StringImplBase(length, BufferOwned) |
|
107 , m_data(characters) |
|
108 , m_buffer(0) |
|
109 , m_hash(0) |
|
110 { |
|
111 ASSERT(m_data); |
|
112 ASSERT(m_length); |
|
113 } |
|
114 |
|
115 // Used to create new strings that are a substring of an existing StringImpl (BufferSubstring) |
|
116 StringImpl(const UChar* characters, unsigned length, PassRefPtr<StringImpl> base) |
|
117 : StringImplBase(length, BufferSubstring) |
|
118 , m_data(characters) |
|
119 , m_substringBuffer(base.releaseRef()) |
|
120 , m_hash(0) |
|
121 { |
|
122 ASSERT(m_data); |
|
123 ASSERT(m_length); |
|
124 ASSERT(m_substringBuffer->bufferOwnership() != BufferSubstring); |
|
125 } |
|
126 |
|
127 // Used to construct new strings sharing an existing SharedUChar (BufferShared) |
|
128 StringImpl(const UChar* characters, unsigned length, PassRefPtr<SharedUChar> sharedBuffer) |
|
129 : StringImplBase(length, BufferShared) |
|
130 , m_data(characters) |
|
131 , m_sharedBuffer(sharedBuffer.releaseRef()) |
|
132 , m_hash(0) |
|
133 { |
|
134 ASSERT(m_data); |
|
135 ASSERT(m_length); |
|
136 } |
|
137 |
|
138 // For use only by AtomicString's XXXTranslator helpers. |
|
139 void setHash(unsigned hash) |
|
140 { |
|
141 ASSERT(!isStatic()); |
|
142 ASSERT(!m_hash); |
|
143 ASSERT(hash == computeHash(m_data, m_length)); |
|
144 m_hash = hash; |
|
145 } |
|
146 |
|
147 public: |
|
148 ~StringImpl(); |
|
149 |
|
150 static PassRefPtr<StringImpl> create(const UChar*, unsigned length); |
|
151 static PassRefPtr<StringImpl> create(const char*, unsigned length); |
|
152 static PassRefPtr<StringImpl> create(const char*); |
|
153 static PassRefPtr<StringImpl> create(const UChar*, unsigned length, PassRefPtr<SharedUChar> sharedBuffer); |
|
154 static ALWAYS_INLINE PassRefPtr<StringImpl> create(PassRefPtr<StringImpl> rep, unsigned offset, unsigned length) |
|
155 { |
|
156 ASSERT(rep); |
|
157 ASSERT(length <= rep->length()); |
|
158 |
|
159 if (!length) |
|
160 return empty(); |
|
161 |
|
162 StringImpl* ownerRep = (rep->bufferOwnership() == BufferSubstring) ? rep->m_substringBuffer : rep.get(); |
|
163 return adoptRef(new StringImpl(rep->m_data + offset, length, ownerRep)); |
|
164 } |
|
165 |
|
166 static PassRefPtr<StringImpl> createUninitialized(unsigned length, UChar*& data); |
|
167 static ALWAYS_INLINE PassRefPtr<StringImpl> tryCreateUninitialized(unsigned length, UChar*& output) |
|
168 { |
|
169 if (!length) { |
|
170 output = 0; |
|
171 return empty(); |
|
172 } |
|
173 |
|
174 if (length > ((std::numeric_limits<size_t>::max() - sizeof(StringImpl)) / sizeof(UChar))) { |
|
175 output = 0; |
|
176 return 0; |
|
177 } |
|
178 StringImpl* resultImpl; |
|
179 if (!tryFastMalloc(sizeof(UChar) * length + sizeof(StringImpl)).getValue(resultImpl)) { |
|
180 output = 0; |
|
181 return 0; |
|
182 } |
|
183 output = reinterpret_cast<UChar*>(resultImpl + 1); |
|
184 return adoptRef(new(resultImpl) StringImpl(length)); |
|
185 } |
|
186 |
|
187 static unsigned dataOffset() { return OBJECT_OFFSETOF(StringImpl, m_data); } |
|
188 static PassRefPtr<StringImpl> createWithTerminatingNullCharacter(const StringImpl&); |
|
189 static PassRefPtr<StringImpl> createStrippingNullCharacters(const UChar*, unsigned length); |
|
190 |
|
191 template<size_t inlineCapacity> |
|
192 static PassRefPtr<StringImpl> adopt(Vector<UChar, inlineCapacity>& vector) |
|
193 { |
|
194 if (size_t size = vector.size()) { |
|
195 ASSERT(vector.data()); |
|
196 return adoptRef(new StringImpl(vector.releaseBuffer(), size)); |
|
197 } |
|
198 return empty(); |
|
199 } |
|
200 static PassRefPtr<StringImpl> adopt(StringBuffer&); |
|
201 |
|
202 SharedUChar* sharedBuffer(); |
|
203 const UChar* characters() const { return m_data; } |
|
204 |
|
205 size_t cost() |
|
206 { |
|
207 // For substrings, return the cost of the base string. |
|
208 if (bufferOwnership() == BufferSubstring) |
|
209 return m_substringBuffer->cost(); |
|
210 |
|
211 if (m_refCountAndFlags & s_refCountFlagShouldReportedCost) { |
|
212 m_refCountAndFlags &= ~s_refCountFlagShouldReportedCost; |
|
213 return m_length; |
|
214 } |
|
215 return 0; |
|
216 } |
|
217 |
|
218 bool isIdentifier() const { return m_refCountAndFlags & s_refCountFlagIsIdentifier; } |
|
219 void setIsIdentifier(bool isIdentifier) |
|
220 { |
|
221 ASSERT(!isStatic()); |
|
222 if (isIdentifier) |
|
223 m_refCountAndFlags |= s_refCountFlagIsIdentifier; |
|
224 else |
|
225 m_refCountAndFlags &= ~s_refCountFlagIsIdentifier; |
|
226 } |
|
227 |
|
228 bool hasTerminatingNullCharacter() const { return m_refCountAndFlags & s_refCountFlagHasTerminatingNullCharacter; } |
|
229 |
|
230 bool isAtomic() const { return m_refCountAndFlags & s_refCountFlagIsAtomic; } |
|
231 void setIsAtomic(bool isIdentifier) |
|
232 { |
|
233 ASSERT(!isStatic()); |
|
234 if (isIdentifier) |
|
235 m_refCountAndFlags |= s_refCountFlagIsAtomic; |
|
236 else |
|
237 m_refCountAndFlags &= ~s_refCountFlagIsAtomic; |
|
238 } |
|
239 |
|
240 unsigned hash() const { if (!m_hash) m_hash = computeHash(m_data, m_length); return m_hash; } |
|
241 unsigned existingHash() const { ASSERT(m_hash); return m_hash; } |
|
242 static unsigned computeHash(const UChar* data, unsigned length) { return WTF::stringHash(data, length); } |
|
243 static unsigned computeHash(const char* data, unsigned length) { return WTF::stringHash(data, length); } |
|
244 static unsigned computeHash(const char* data) { return WTF::stringHash(data); } |
|
245 |
|
246 ALWAYS_INLINE void deref() { m_refCountAndFlags -= s_refCountIncrement; if (!(m_refCountAndFlags & (s_refCountMask | s_refCountFlagStatic))) delete this; } |
|
247 ALWAYS_INLINE bool hasOneRef() const { return (m_refCountAndFlags & (s_refCountMask | s_refCountFlagStatic)) == s_refCountIncrement; } |
|
248 |
|
249 static StringImpl* empty(); |
|
250 |
|
251 static void copyChars(UChar* destination, const UChar* source, unsigned numCharacters) |
|
252 { |
|
253 if (numCharacters <= s_copyCharsInlineCutOff) { |
|
254 for (unsigned i = 0; i < numCharacters; ++i) |
|
255 destination[i] = source[i]; |
|
256 } else |
|
257 memcpy(destination, source, numCharacters * sizeof(UChar)); |
|
258 } |
|
259 |
|
260 // Returns a StringImpl suitable for use on another thread. |
|
261 PassRefPtr<StringImpl> crossThreadString(); |
|
262 // Makes a deep copy. Helpful only if you need to use a String on another thread |
|
263 // (use crossThreadString if the method call doesn't need to be threadsafe). |
|
264 // Since StringImpl objects are immutable, there's no other reason to make a copy. |
|
265 PassRefPtr<StringImpl> threadsafeCopy() const; |
|
266 |
|
267 PassRefPtr<StringImpl> substring(unsigned pos, unsigned len = UINT_MAX); |
|
268 |
|
269 UChar operator[](unsigned i) { ASSERT(i < m_length); return m_data[i]; } |
|
270 UChar32 characterStartingAt(unsigned); |
|
271 |
|
272 bool containsOnlyWhitespace(); |
|
273 |
|
274 int toIntStrict(bool* ok = 0, int base = 10); |
|
275 unsigned toUIntStrict(bool* ok = 0, int base = 10); |
|
276 int64_t toInt64Strict(bool* ok = 0, int base = 10); |
|
277 uint64_t toUInt64Strict(bool* ok = 0, int base = 10); |
|
278 intptr_t toIntPtrStrict(bool* ok = 0, int base = 10); |
|
279 |
|
280 int toInt(bool* ok = 0); // ignores trailing garbage |
|
281 unsigned toUInt(bool* ok = 0); // ignores trailing garbage |
|
282 int64_t toInt64(bool* ok = 0); // ignores trailing garbage |
|
283 uint64_t toUInt64(bool* ok = 0); // ignores trailing garbage |
|
284 intptr_t toIntPtr(bool* ok = 0); // ignores trailing garbage |
|
285 |
|
286 double toDouble(bool* ok = 0); |
|
287 float toFloat(bool* ok = 0); |
|
288 |
|
289 PassRefPtr<StringImpl> lower(); |
|
290 PassRefPtr<StringImpl> upper(); |
|
291 PassRefPtr<StringImpl> secure(UChar aChar); |
|
292 PassRefPtr<StringImpl> foldCase(); |
|
293 |
|
294 PassRefPtr<StringImpl> stripWhiteSpace(); |
|
295 PassRefPtr<StringImpl> simplifyWhiteSpace(); |
|
296 |
|
297 PassRefPtr<StringImpl> removeCharacters(CharacterMatchFunctionPtr); |
|
298 |
|
299 int find(const char*, int index = 0, bool caseSensitive = true); |
|
300 int find(UChar, int index = 0); |
|
301 int find(CharacterMatchFunctionPtr, int index = 0); |
|
302 int find(StringImpl*, int index, bool caseSensitive = true); |
|
303 |
|
304 int reverseFind(UChar, int index); |
|
305 int reverseFind(StringImpl*, int index, bool caseSensitive = true); |
|
306 |
|
307 bool startsWith(StringImpl* str, bool caseSensitive = true) { return reverseFind(str, 0, caseSensitive) == 0; } |
|
308 bool endsWith(StringImpl*, bool caseSensitive = true); |
|
309 |
|
310 PassRefPtr<StringImpl> replace(UChar, UChar); |
|
311 PassRefPtr<StringImpl> replace(UChar, StringImpl*); |
|
312 PassRefPtr<StringImpl> replace(StringImpl*, StringImpl*); |
|
313 PassRefPtr<StringImpl> replace(unsigned index, unsigned len, StringImpl*); |
|
314 |
|
315 Vector<char> ascii(); |
|
316 |
|
317 WTF::Unicode::Direction defaultWritingDirection(); |
|
318 |
|
319 #if PLATFORM(CF) |
|
320 CFStringRef createCFString(); |
|
321 #endif |
|
322 #ifdef __OBJC__ |
|
323 operator NSString*(); |
|
324 #endif |
|
325 |
|
326 private: |
|
327 // This number must be at least 2 to avoid sharing empty, null as well as 1 character strings from SmallStrings. |
|
328 static const unsigned s_copyCharsInlineCutOff = 20; |
|
329 |
|
330 static PassRefPtr<StringImpl> createStrippingNullCharactersSlowCase(const UChar*, unsigned length); |
|
331 |
|
332 BufferOwnership bufferOwnership() const { return static_cast<BufferOwnership>(m_refCountAndFlags & s_refCountMaskBufferOwnership); } |
|
333 bool isStatic() const { return m_refCountAndFlags & s_refCountFlagStatic; } |
|
334 const UChar* m_data; |
|
335 union { |
|
336 void* m_buffer; |
|
337 StringImpl* m_substringBuffer; |
|
338 SharedUChar* m_sharedBuffer; |
|
339 }; |
|
340 mutable unsigned m_hash; |
|
341 }; |
|
342 |
|
343 bool equal(const StringImpl*, const StringImpl*); |
|
344 bool equal(const StringImpl*, const char*); |
|
345 inline bool equal(const char* a, StringImpl* b) { return equal(b, a); } |
|
346 |
|
347 bool equalIgnoringCase(StringImpl*, StringImpl*); |
|
348 bool equalIgnoringCase(StringImpl*, const char*); |
|
349 inline bool equalIgnoringCase(const char* a, StringImpl* b) { return equalIgnoringCase(b, a); } |
|
350 bool equalIgnoringCase(const UChar* a, const char* b, unsigned length); |
|
351 inline bool equalIgnoringCase(const char* a, const UChar* b, unsigned length) { return equalIgnoringCase(b, a, length); } |
|
352 |
|
353 bool equalIgnoringNullity(StringImpl*, StringImpl*); |
|
354 |
|
355 int codePointCompare(const StringImpl*, const StringImpl*); |
|
356 |
|
357 static inline bool isSpaceOrNewline(UChar c) |
|
358 { |
|
359 // Use isASCIISpace() for basic Latin-1. |
|
360 // This will include newlines, which aren't included in Unicode DirWS. |
|
361 return c <= 0x7F ? WTF::isASCIISpace(c) : WTF::Unicode::direction(c) == WTF::Unicode::WhiteSpaceNeutral; |
|
362 } |
|
363 |
|
364 // This is a hot function because it's used when parsing HTML. |
|
365 inline PassRefPtr<StringImpl> StringImpl::createStrippingNullCharacters(const UChar* characters, unsigned length) |
|
366 { |
|
367 ASSERT(characters); |
|
368 ASSERT(length); |
|
369 |
|
370 // Optimize for the case where there are no Null characters by quickly |
|
371 // searching for nulls, and then using StringImpl::create, which will |
|
372 // memcpy the whole buffer. This is faster than assigning character by |
|
373 // character during the loop. |
|
374 |
|
375 // Fast case. |
|
376 int foundNull = 0; |
|
377 for (unsigned i = 0; !foundNull && i < length; i++) { |
|
378 int c = characters[i]; // more efficient than using UChar here (at least on Intel Mac OS) |
|
379 foundNull |= !c; |
|
380 } |
|
381 if (!foundNull) |
|
382 return StringImpl::create(characters, length); |
|
383 |
|
384 return StringImpl::createStrippingNullCharactersSlowCase(characters, length); |
|
385 } |
|
386 |
|
387 } |
|
388 |
|
389 using WebCore::equal; |
|
390 |
|
391 namespace WTF { |
|
392 |
|
393 // WebCore::StringHash is the default hash for StringImpl* and RefPtr<StringImpl> |
|
394 template<typename T> struct DefaultHash; |
|
395 template<> struct DefaultHash<WebCore::StringImpl*> { |
|
396 typedef WebCore::StringHash Hash; |
|
397 }; |
|
398 template<> struct DefaultHash<RefPtr<WebCore::StringImpl> > { |
|
399 typedef WebCore::StringHash Hash; |
|
400 }; |
|
401 |
|
402 } |
|
403 |
|
404 #endif |