JavaScriptCore/runtime/UString.cpp
changeset 0 4f2f89ce4247
equal deleted inserted replaced
-1:000000000000 0:4f2f89ce4247
       
     1 /*
       
     2  *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
       
     3  *  Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
       
     4  *  Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
       
     5  *  Copyright (C) 2009 Google Inc. All rights reserved.
       
     6  *
       
     7  *  This library is free software; you can redistribute it and/or
       
     8  *  modify it under the terms of the GNU Library General Public
       
     9  *  License as published by the Free Software Foundation; either
       
    10  *  version 2 of the License, or (at your option) any later version.
       
    11  *
       
    12  *  This library is distributed in the hope that it will be useful,
       
    13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
       
    14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
       
    15  *  Library General Public License for more details.
       
    16  *
       
    17  *  You should have received a copy of the GNU Library General Public License
       
    18  *  along with this library; see the file COPYING.LIB.  If not, write to
       
    19  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
       
    20  *  Boston, MA 02110-1301, USA.
       
    21  *
       
    22  */
       
    23 
       
    24 #include "config.h"
       
    25 #include "UString.h"
       
    26 
       
    27 #include "JSGlobalObjectFunctions.h"
       
    28 #include "Collector.h"
       
    29 #include "dtoa.h"
       
    30 #include "Identifier.h"
       
    31 #include "Operations.h"
       
    32 #include <ctype.h>
       
    33 #include <limits.h>
       
    34 #include <limits>
       
    35 #include <stdio.h>
       
    36 #include <stdlib.h>
       
    37 #include <wtf/ASCIICType.h>
       
    38 #include <wtf/Assertions.h>
       
    39 #include <wtf/MathExtras.h>
       
    40 #include <wtf/StringExtras.h>
       
    41 #include <wtf/Vector.h>
       
    42 #include <wtf/unicode/UTF8.h>
       
    43 
       
    44 #if HAVE(STRINGS_H)
       
    45 #include <strings.h>
       
    46 #endif
       
    47 
       
    48 using namespace WTF;
       
    49 using namespace WTF::Unicode;
       
    50 using namespace std;
       
    51 
       
    52 namespace JSC {
       
    53 
       
    54 extern const double NaN;
       
    55 extern const double Inf;
       
    56 
       
    57 // The null string is immutable, except for refCount.
       
    58 UString* UString::s_nullUString;
       
    59 
       
    60 COMPILE_ASSERT(sizeof(UString) == sizeof(void*), UString_should_stay_small);
       
    61 
       
    62 void initializeUString()
       
    63 {
       
    64     // UStringImpl::empty() does not construct its static string in a threadsafe fashion,
       
    65     // so ensure it has been initialized from here.
       
    66     UStringImpl::empty();
       
    67 
       
    68     UString::s_nullUString = new UString;
       
    69 }
       
    70 
       
    71 UString::UString(const char* c)
       
    72     : m_rep(Rep::create(c))
       
    73 {
       
    74 }
       
    75 
       
    76 UString::UString(const char* c, unsigned length)
       
    77     : m_rep(Rep::create(c, length))
       
    78 {
       
    79 }
       
    80 
       
    81 UString::UString(const UChar* c, unsigned length)
       
    82     : m_rep(Rep::create(c, length))
       
    83 {
       
    84 }
       
    85 
       
    86 UString UString::from(int i)
       
    87 {
       
    88     UChar buf[1 + sizeof(i) * 3];
       
    89     UChar* end = buf + sizeof(buf) / sizeof(UChar);
       
    90     UChar* p = end;
       
    91 
       
    92     if (i == 0)
       
    93         *--p = '0';
       
    94     else if (i == INT_MIN) {
       
    95         char minBuf[1 + sizeof(i) * 3];
       
    96         snprintf(minBuf, sizeof(minBuf), "%d", INT_MIN);
       
    97         return UString(minBuf);
       
    98     } else {
       
    99         bool negative = false;
       
   100         if (i < 0) {
       
   101             negative = true;
       
   102             i = -i;
       
   103         }
       
   104         while (i) {
       
   105             *--p = static_cast<unsigned short>((i % 10) + '0');
       
   106             i /= 10;
       
   107         }
       
   108         if (negative)
       
   109             *--p = '-';
       
   110     }
       
   111 
       
   112     return UString(p, static_cast<unsigned>(end - p));
       
   113 }
       
   114 
       
   115 UString UString::from(long long i)
       
   116 {
       
   117     UChar buf[1 + sizeof(i) * 3];
       
   118     UChar* end = buf + sizeof(buf) / sizeof(UChar);
       
   119     UChar* p = end;
       
   120 
       
   121     if (i == 0)
       
   122         *--p = '0';
       
   123     else if (i == std::numeric_limits<long long>::min()) {
       
   124         char minBuf[1 + sizeof(i) * 3];
       
   125 #if OS(WINDOWS)
       
   126         snprintf(minBuf, sizeof(minBuf), "%I64d", std::numeric_limits<long long>::min());
       
   127 #else
       
   128         snprintf(minBuf, sizeof(minBuf), "%lld", std::numeric_limits<long long>::min());
       
   129 #endif
       
   130         return UString(minBuf);
       
   131     } else {
       
   132         bool negative = false;
       
   133         if (i < 0) {
       
   134             negative = true;
       
   135             i = -i;
       
   136         }
       
   137         while (i) {
       
   138             *--p = static_cast<unsigned short>((i % 10) + '0');
       
   139             i /= 10;
       
   140         }
       
   141         if (negative)
       
   142             *--p = '-';
       
   143     }
       
   144 
       
   145     return UString(p, static_cast<unsigned>(end - p));
       
   146 }
       
   147 
       
   148 UString UString::from(unsigned u)
       
   149 {
       
   150     UChar buf[sizeof(u) * 3];
       
   151     UChar* end = buf + sizeof(buf) / sizeof(UChar);
       
   152     UChar* p = end;
       
   153 
       
   154     if (u == 0)
       
   155         *--p = '0';
       
   156     else {
       
   157         while (u) {
       
   158             *--p = static_cast<unsigned short>((u % 10) + '0');
       
   159             u /= 10;
       
   160         }
       
   161     }
       
   162 
       
   163     return UString(p, static_cast<unsigned>(end - p));
       
   164 }
       
   165 
       
   166 UString UString::from(long l)
       
   167 {
       
   168     UChar buf[1 + sizeof(l) * 3];
       
   169     UChar* end = buf + sizeof(buf) / sizeof(UChar);
       
   170     UChar* p = end;
       
   171 
       
   172     if (l == 0)
       
   173         *--p = '0';
       
   174     else if (l == LONG_MIN) {
       
   175         char minBuf[1 + sizeof(l) * 3];
       
   176         snprintf(minBuf, sizeof(minBuf), "%ld", LONG_MIN);
       
   177         return UString(minBuf);
       
   178     } else {
       
   179         bool negative = false;
       
   180         if (l < 0) {
       
   181             negative = true;
       
   182             l = -l;
       
   183         }
       
   184         while (l) {
       
   185             *--p = static_cast<unsigned short>((l % 10) + '0');
       
   186             l /= 10;
       
   187         }
       
   188         if (negative)
       
   189             *--p = '-';
       
   190     }
       
   191 
       
   192     return UString(p, end - p);
       
   193 }
       
   194 
       
   195 UString UString::from(double d)
       
   196 {
       
   197     DtoaBuffer buffer;
       
   198     unsigned length;
       
   199     doubleToStringInJavaScriptFormat(d, buffer, &length);
       
   200     return UString(buffer, length);
       
   201 }
       
   202 
       
   203 char* UString::ascii() const
       
   204 {
       
   205     static char* asciiBuffer = 0;
       
   206 
       
   207     unsigned length = size();
       
   208     unsigned neededSize = length + 1;
       
   209     delete[] asciiBuffer;
       
   210     asciiBuffer = new char[neededSize];
       
   211 
       
   212     const UChar* p = data();
       
   213     char* q = asciiBuffer;
       
   214     const UChar* limit = p + length;
       
   215     while (p != limit) {
       
   216         *q = static_cast<char>(p[0]);
       
   217         ++p;
       
   218         ++q;
       
   219     }
       
   220     *q = '\0';
       
   221 
       
   222     return asciiBuffer;
       
   223 }
       
   224 
       
   225 bool UString::is8Bit() const
       
   226 {
       
   227     const UChar* u = data();
       
   228     const UChar* limit = u + size();
       
   229     while (u < limit) {
       
   230         if (u[0] > 0xFF)
       
   231             return false;
       
   232         ++u;
       
   233     }
       
   234 
       
   235     return true;
       
   236 }
       
   237 
       
   238 UChar UString::operator[](unsigned pos) const
       
   239 {
       
   240     if (pos >= size())
       
   241         return '\0';
       
   242     return data()[pos];
       
   243 }
       
   244 
       
   245 static inline bool isInfinity(double number)
       
   246 {
       
   247     return number == Inf || number == -Inf;
       
   248 }
       
   249 
       
   250 static bool isInfinity(const UChar* data, const UChar* end)
       
   251 {
       
   252     return data + 7 < end
       
   253         && data[0] == 'I'
       
   254         && data[1] == 'n'
       
   255         && data[2] == 'f'
       
   256         && data[3] == 'i'
       
   257         && data[4] == 'n'
       
   258         && data[5] == 'i'
       
   259         && data[6] == 't'
       
   260         && data[7] == 'y';
       
   261 }
       
   262 
       
   263 double UString::toDouble(bool tolerateTrailingJunk, bool tolerateEmptyString) const
       
   264 {
       
   265     unsigned size = this->size();
       
   266 
       
   267     if (size == 1) {
       
   268         UChar c = data()[0];
       
   269         if (isASCIIDigit(c))
       
   270             return c - '0';
       
   271         if (isStrWhiteSpace(c) && tolerateEmptyString)
       
   272             return 0;
       
   273         return NaN;
       
   274     }
       
   275 
       
   276     // FIXME: If tolerateTrailingJunk is true, then we want to tolerate junk 
       
   277     // after the number, even if it contains invalid UTF-16 sequences. So we
       
   278     // shouldn't use the UTF8String function, which returns null when it
       
   279     // encounters invalid UTF-16. Further, we have no need to convert the
       
   280     // non-ASCII characters to UTF-8, so the UTF8String does quite a bit of
       
   281     // unnecessary work.
       
   282 
       
   283     // FIXME: The space skipping code below skips only ASCII spaces, but callers
       
   284     // need to skip all StrWhiteSpace. The isStrWhiteSpace function does the
       
   285     // right thing but requires UChar, not char, for its argument.
       
   286 
       
   287     const UChar* data = this->data();
       
   288     const UChar* end = data + size;
       
   289 
       
   290     // Skip leading white space.
       
   291     for (; data < end; ++data) {
       
   292         if (!isStrWhiteSpace(*data))
       
   293             break;
       
   294     }
       
   295 
       
   296     // Empty string.
       
   297     if (data == end)
       
   298         return tolerateEmptyString ? 0.0 : NaN;
       
   299 
       
   300     double number;
       
   301 
       
   302     if (data[0] == '0' && data + 2 < end && (data[1] | 0x20) == 'x' && isASCIIHexDigit(data[2])) {
       
   303         // Hex number.
       
   304         data += 2;
       
   305         const UChar* firstDigitPosition = data;
       
   306         number = 0;
       
   307         while (true) {
       
   308             number = number * 16 + toASCIIHexValue(*data);
       
   309             ++data;
       
   310             if (data == end)
       
   311                 break;
       
   312             if (!isASCIIHexDigit(*data))
       
   313                 break;
       
   314         }
       
   315         if (number >= mantissaOverflowLowerBound)
       
   316             number = parseIntOverflow(firstDigitPosition, data - firstDigitPosition, 16);
       
   317     } else {
       
   318         // Decimal number.
       
   319 
       
   320         // Put into a null-terminated byte buffer.
       
   321         Vector<char, 32> byteBuffer;
       
   322         for (const UChar* characters = data; characters < end; ++characters) {
       
   323             UChar character = *characters;
       
   324             byteBuffer.append(isASCII(character) ? character : 0);
       
   325         }
       
   326         byteBuffer.append(0);
       
   327 
       
   328         char* byteBufferEnd;
       
   329         number = WTF::strtod(byteBuffer.data(), &byteBufferEnd);
       
   330         const UChar* pastNumber = data + (byteBufferEnd - byteBuffer.data());
       
   331 
       
   332         if ((number || pastNumber != data) && !isInfinity(number))
       
   333             data = pastNumber;
       
   334         else {
       
   335             // We used strtod() to do the conversion. However, strtod() handles
       
   336             // infinite values slightly differently than JavaScript in that it
       
   337             // converts the string "inf" with any capitalization to infinity,
       
   338             // whereas the ECMA spec requires that it be converted to NaN.
       
   339 
       
   340             double signedInfinity = Inf;
       
   341             if (data < end) {
       
   342                 if (*data == '+')
       
   343                     data++;
       
   344                 else if (*data == '-') {
       
   345                     signedInfinity = -Inf;
       
   346                     data++;
       
   347                 }
       
   348             }
       
   349             if (isInfinity(data, end)) {
       
   350                 number = signedInfinity;
       
   351                 data += 8;
       
   352             } else if (isInfinity(number) && data < end && (*data | 0x20) != 'i')
       
   353                 data = pastNumber;
       
   354             else
       
   355                 return NaN;
       
   356         }
       
   357     }
       
   358 
       
   359     // Look for trailing junk.
       
   360     if (!tolerateTrailingJunk) {
       
   361         // Allow trailing white space.
       
   362         for (; data < end; ++data) {
       
   363             if (!isStrWhiteSpace(*data))
       
   364                 break;
       
   365         }
       
   366         if (data != end)
       
   367             return NaN;
       
   368     }
       
   369 
       
   370     return number;
       
   371 }
       
   372 
       
   373 double UString::toDouble(bool tolerateTrailingJunk) const
       
   374 {
       
   375     return toDouble(tolerateTrailingJunk, true);
       
   376 }
       
   377 
       
   378 double UString::toDouble() const
       
   379 {
       
   380     return toDouble(false, true);
       
   381 }
       
   382 
       
   383 uint32_t UString::toUInt32(bool* ok) const
       
   384 {
       
   385     double d = toDouble();
       
   386     bool b = true;
       
   387 
       
   388     if (d != static_cast<uint32_t>(d)) {
       
   389         b = false;
       
   390         d = 0;
       
   391     }
       
   392 
       
   393     if (ok)
       
   394         *ok = b;
       
   395 
       
   396     return static_cast<uint32_t>(d);
       
   397 }
       
   398 
       
   399 uint32_t UString::toUInt32(bool* ok, bool tolerateEmptyString) const
       
   400 {
       
   401     double d = toDouble(false, tolerateEmptyString);
       
   402     bool b = true;
       
   403 
       
   404     if (d != static_cast<uint32_t>(d)) {
       
   405         b = false;
       
   406         d = 0;
       
   407     }
       
   408 
       
   409     if (ok)
       
   410         *ok = b;
       
   411 
       
   412     return static_cast<uint32_t>(d);
       
   413 }
       
   414 
       
   415 uint32_t UString::toStrictUInt32(bool* ok) const
       
   416 {
       
   417     if (ok)
       
   418         *ok = false;
       
   419 
       
   420     // Empty string is not OK.
       
   421     unsigned len = m_rep->length();
       
   422     if (len == 0)
       
   423         return 0;
       
   424     const UChar* p = m_rep->characters();
       
   425     unsigned short c = p[0];
       
   426 
       
   427     // If the first digit is 0, only 0 itself is OK.
       
   428     if (c == '0') {
       
   429         if (len == 1 && ok)
       
   430             *ok = true;
       
   431         return 0;
       
   432     }
       
   433 
       
   434     // Convert to UInt32, checking for overflow.
       
   435     uint32_t i = 0;
       
   436     while (1) {
       
   437         // Process character, turning it into a digit.
       
   438         if (c < '0' || c > '9')
       
   439             return 0;
       
   440         const unsigned d = c - '0';
       
   441 
       
   442         // Multiply by 10, checking for overflow out of 32 bits.
       
   443         if (i > 0xFFFFFFFFU / 10)
       
   444             return 0;
       
   445         i *= 10;
       
   446 
       
   447         // Add in the digit, checking for overflow out of 32 bits.
       
   448         const unsigned max = 0xFFFFFFFFU - d;
       
   449         if (i > max)
       
   450             return 0;
       
   451         i += d;
       
   452 
       
   453         // Handle end of string.
       
   454         if (--len == 0) {
       
   455             if (ok)
       
   456                 *ok = true;
       
   457             return i;
       
   458         }
       
   459 
       
   460         // Get next character.
       
   461         c = *(++p);
       
   462     }
       
   463 }
       
   464 
       
   465 unsigned UString::find(const UString& f, unsigned pos) const
       
   466 {
       
   467     unsigned fsz = f.size();
       
   468 
       
   469     if (fsz == 1) {
       
   470         UChar ch = f[0];
       
   471         const UChar* end = data() + size();
       
   472         for (const UChar* c = data() + pos; c < end; c++) {
       
   473             if (*c == ch)
       
   474                 return static_cast<unsigned>(c - data());
       
   475         }
       
   476         return NotFound;
       
   477     }
       
   478 
       
   479     unsigned sz = size();
       
   480     if (sz < fsz)
       
   481         return NotFound;
       
   482     if (fsz == 0)
       
   483         return pos;
       
   484     const UChar* end = data() + sz - fsz;
       
   485     unsigned fsizeminusone = (fsz - 1) * sizeof(UChar);
       
   486     const UChar* fdata = f.data();
       
   487     unsigned short fchar = fdata[0];
       
   488     ++fdata;
       
   489     for (const UChar* c = data() + pos; c <= end; c++) {
       
   490         if (c[0] == fchar && !memcmp(c + 1, fdata, fsizeminusone))
       
   491             return static_cast<unsigned>(c - data());
       
   492     }
       
   493 
       
   494     return NotFound;
       
   495 }
       
   496 
       
   497 unsigned UString::find(UChar ch, unsigned pos) const
       
   498 {
       
   499     const UChar* end = data() + size();
       
   500     for (const UChar* c = data() + pos; c < end; c++) {
       
   501         if (*c == ch)
       
   502             return static_cast<unsigned>(c - data());
       
   503     }
       
   504 
       
   505     return NotFound;
       
   506 }
       
   507 
       
   508 unsigned UString::rfind(const UString& f, unsigned pos) const
       
   509 {
       
   510     unsigned sz = size();
       
   511     unsigned fsz = f.size();
       
   512     if (sz < fsz)
       
   513         return NotFound;
       
   514     if (pos > sz - fsz)
       
   515         pos = sz - fsz;
       
   516     if (fsz == 0)
       
   517         return pos;
       
   518     unsigned fsizeminusone = (fsz - 1) * sizeof(UChar);
       
   519     const UChar* fdata = f.data();
       
   520     for (const UChar* c = data() + pos; c >= data(); c--) {
       
   521         if (*c == *fdata && !memcmp(c + 1, fdata + 1, fsizeminusone))
       
   522             return static_cast<unsigned>(c - data());
       
   523     }
       
   524 
       
   525     return NotFound;
       
   526 }
       
   527 
       
   528 unsigned UString::rfind(UChar ch, unsigned pos) const
       
   529 {
       
   530     if (isEmpty())
       
   531         return NotFound;
       
   532     if (pos + 1 >= size())
       
   533         pos = size() - 1;
       
   534     for (const UChar* c = data() + pos; c >= data(); c--) {
       
   535         if (*c == ch)
       
   536             return static_cast<unsigned>(c - data());
       
   537     }
       
   538 
       
   539     return NotFound;
       
   540 }
       
   541 
       
   542 UString UString::substr(unsigned pos, unsigned len) const
       
   543 {
       
   544     unsigned s = size();
       
   545 
       
   546     if (pos >= s)
       
   547         pos = s;
       
   548     unsigned limit = s - pos;
       
   549     if (len > limit)
       
   550         len = limit;
       
   551 
       
   552     if (pos == 0 && len == s)
       
   553         return *this;
       
   554 
       
   555     return UString(Rep::create(m_rep, pos, len));
       
   556 }
       
   557 
       
   558 bool operator==(const UString& s1, const char *s2)
       
   559 {
       
   560     if (s2 == 0)
       
   561         return s1.isEmpty();
       
   562 
       
   563     const UChar* u = s1.data();
       
   564     const UChar* uend = u + s1.size();
       
   565     while (u != uend && *s2) {
       
   566         if (u[0] != (unsigned char)*s2)
       
   567             return false;
       
   568         s2++;
       
   569         u++;
       
   570     }
       
   571 
       
   572     return u == uend && *s2 == 0;
       
   573 }
       
   574 
       
   575 bool operator<(const UString& s1, const UString& s2)
       
   576 {
       
   577     const unsigned l1 = s1.size();
       
   578     const unsigned l2 = s2.size();
       
   579     const unsigned lmin = l1 < l2 ? l1 : l2;
       
   580     const UChar* c1 = s1.data();
       
   581     const UChar* c2 = s2.data();
       
   582     unsigned l = 0;
       
   583     while (l < lmin && *c1 == *c2) {
       
   584         c1++;
       
   585         c2++;
       
   586         l++;
       
   587     }
       
   588     if (l < lmin)
       
   589         return (c1[0] < c2[0]);
       
   590 
       
   591     return (l1 < l2);
       
   592 }
       
   593 
       
   594 bool operator>(const UString& s1, const UString& s2)
       
   595 {
       
   596     const unsigned l1 = s1.size();
       
   597     const unsigned l2 = s2.size();
       
   598     const unsigned lmin = l1 < l2 ? l1 : l2;
       
   599     const UChar* c1 = s1.data();
       
   600     const UChar* c2 = s2.data();
       
   601     unsigned l = 0;
       
   602     while (l < lmin && *c1 == *c2) {
       
   603         c1++;
       
   604         c2++;
       
   605         l++;
       
   606     }
       
   607     if (l < lmin)
       
   608         return (c1[0] > c2[0]);
       
   609 
       
   610     return (l1 > l2);
       
   611 }
       
   612 
       
   613 CString UString::UTF8String(bool strict) const
       
   614 {
       
   615     // Allocate a buffer big enough to hold all the characters.
       
   616     const unsigned length = size();
       
   617     Vector<char, 1024> buffer(length * 3);
       
   618 
       
   619     // Convert to runs of 8-bit characters.
       
   620     char* p = buffer.data();
       
   621     const UChar* d = reinterpret_cast<const UChar*>(&data()[0]);
       
   622     ConversionResult result = convertUTF16ToUTF8(&d, d + length, &p, p + buffer.size(), strict);
       
   623     if (result != conversionOK)
       
   624         return CString();
       
   625 
       
   626     return CString(buffer.data(), p - buffer.data());
       
   627 }
       
   628 
       
   629 } // namespace JSC