JavaScriptCore/runtime/LiteralParser.cpp
changeset 0 4f2f89ce4247
equal deleted inserted replaced
-1:000000000000 0:4f2f89ce4247
       
     1 /*
       
     2  * Copyright (C) 2009 Apple Inc. All rights reserved.
       
     3  *
       
     4  * Redistribution and use in source and binary forms, with or without
       
     5  * modification, are permitted provided that the following conditions
       
     6  * are met:
       
     7  * 1. Redistributions of source code must retain the above copyright
       
     8  *    notice, this list of conditions and the following disclaimer.
       
     9  * 2. Redistributions in binary form must reproduce the above copyright
       
    10  *    notice, this list of conditions and the following disclaimer in the
       
    11  *    documentation and/or other materials provided with the distribution.
       
    12  *
       
    13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
       
    14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
       
    15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
       
    16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
       
    17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
       
    18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
       
    19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
       
    20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
       
    21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
       
    22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
       
    23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
       
    24  */
       
    25 
       
    26 #include "config.h"
       
    27 #include "LiteralParser.h"
       
    28 
       
    29 #include "JSArray.h"
       
    30 #include "JSString.h"
       
    31 #include "Lexer.h"
       
    32 #include "StringBuilder.h"
       
    33 #include <wtf/ASCIICType.h>
       
    34 #include <wtf/dtoa.h>
       
    35 
       
    36 namespace JSC {
       
    37 
       
    38 LiteralParser::TokenType LiteralParser::Lexer::lex(LiteralParserToken& token)
       
    39 {
       
    40     while (m_ptr < m_end && isASCIISpace(*m_ptr))
       
    41         ++m_ptr;
       
    42 
       
    43     ASSERT(m_ptr <= m_end);
       
    44     if (m_ptr >= m_end) {
       
    45         token.type = TokEnd;
       
    46         token.start = token.end = m_ptr;
       
    47         return TokEnd;
       
    48     }
       
    49     token.type = TokError;
       
    50     token.start = m_ptr;
       
    51     switch (*m_ptr) {
       
    52         case '[':
       
    53             token.type = TokLBracket;
       
    54             token.end = ++m_ptr;
       
    55             return TokLBracket;
       
    56         case ']':
       
    57             token.type = TokRBracket;
       
    58             token.end = ++m_ptr;
       
    59             return TokRBracket;
       
    60         case '(':
       
    61             token.type = TokLParen;
       
    62             token.end = ++m_ptr;
       
    63             return TokLBracket;
       
    64         case ')':
       
    65             token.type = TokRParen;
       
    66             token.end = ++m_ptr;
       
    67             return TokRBracket;
       
    68         case '{':
       
    69             token.type = TokLBrace;
       
    70             token.end = ++m_ptr;
       
    71             return TokLBrace;
       
    72         case '}':
       
    73             token.type = TokRBrace;
       
    74             token.end = ++m_ptr;
       
    75             return TokRBrace;
       
    76         case ',':
       
    77             token.type = TokComma;
       
    78             token.end = ++m_ptr;
       
    79             return TokComma;
       
    80         case ':':
       
    81             token.type = TokColon;
       
    82             token.end = ++m_ptr;
       
    83             return TokColon;
       
    84         case '"':
       
    85             if (m_mode == StrictJSON)
       
    86                 return lexString<StrictJSON>(token);
       
    87             return lexString<NonStrictJSON>(token);
       
    88         case 't':
       
    89             if (m_end - m_ptr >= 4 && m_ptr[1] == 'r' && m_ptr[2] == 'u' && m_ptr[3] == 'e') {
       
    90                 m_ptr += 4;
       
    91                 token.type = TokTrue;
       
    92                 token.end = m_ptr;
       
    93                 return TokTrue;
       
    94             }
       
    95             break;
       
    96         case 'f':
       
    97             if (m_end - m_ptr >= 5 && m_ptr[1] == 'a' && m_ptr[2] == 'l' && m_ptr[3] == 's' && m_ptr[4] == 'e') {
       
    98                 m_ptr += 5;
       
    99                 token.type = TokFalse;
       
   100                 token.end = m_ptr;
       
   101                 return TokFalse;
       
   102             }
       
   103             break;
       
   104         case 'n':
       
   105             if (m_end - m_ptr >= 4 && m_ptr[1] == 'u' && m_ptr[2] == 'l' && m_ptr[3] == 'l') {
       
   106                 m_ptr += 4;
       
   107                 token.type = TokNull;
       
   108                 token.end = m_ptr;
       
   109                 return TokNull;
       
   110             }
       
   111             break;    
       
   112         case '-':
       
   113         case '0':
       
   114         case '1':
       
   115         case '2':
       
   116         case '3':
       
   117         case '4':
       
   118         case '5':
       
   119         case '6':
       
   120         case '7':
       
   121         case '8':
       
   122         case '9':
       
   123             return lexNumber(token);
       
   124     }
       
   125     return TokError;
       
   126 }
       
   127 
       
   128 template <LiteralParser::ParserMode mode> static inline bool isSafeStringCharacter(UChar c)
       
   129 {
       
   130     return (c >= ' ' && (mode == LiteralParser::StrictJSON || c <= 0xff) && c != '\\' && c != '"') || c == '\t';
       
   131 }
       
   132 
       
   133 // "inline" is required here to help WINSCW compiler resolve specialized argument in templated functions.
       
   134 template <LiteralParser::ParserMode mode> inline LiteralParser::TokenType LiteralParser::Lexer::lexString(LiteralParserToken& token)
       
   135 {
       
   136     ++m_ptr;
       
   137     const UChar* runStart;
       
   138     StringBuilder builder;
       
   139     do {
       
   140         runStart = m_ptr;
       
   141         while (m_ptr < m_end && isSafeStringCharacter<mode>(*m_ptr))
       
   142             ++m_ptr;
       
   143         if (runStart < m_ptr)
       
   144             builder.append(runStart, m_ptr - runStart);
       
   145         if ((mode == StrictJSON) && m_ptr < m_end && *m_ptr == '\\') {
       
   146             ++m_ptr;
       
   147             if (m_ptr >= m_end)
       
   148                 return TokError;
       
   149             switch (*m_ptr) {
       
   150                 case '"':
       
   151                     builder.append('"');
       
   152                     m_ptr++;
       
   153                     break;
       
   154                 case '\\':
       
   155                     builder.append('\\');
       
   156                     m_ptr++;
       
   157                     break;
       
   158                 case '/':
       
   159                     builder.append('/');
       
   160                     m_ptr++;
       
   161                     break;
       
   162                 case 'b':
       
   163                     builder.append('\b');
       
   164                     m_ptr++;
       
   165                     break;
       
   166                 case 'f':
       
   167                     builder.append('\f');
       
   168                     m_ptr++;
       
   169                     break;
       
   170                 case 'n':
       
   171                     builder.append('\n');
       
   172                     m_ptr++;
       
   173                     break;
       
   174                 case 'r':
       
   175                     builder.append('\r');
       
   176                     m_ptr++;
       
   177                     break;
       
   178                 case 't':
       
   179                     builder.append('\t');
       
   180                     m_ptr++;
       
   181                     break;
       
   182 
       
   183                 case 'u':
       
   184                     if ((m_end - m_ptr) < 5) // uNNNN == 5 characters
       
   185                         return TokError;
       
   186                     for (int i = 1; i < 5; i++) {
       
   187                         if (!isASCIIHexDigit(m_ptr[i]))
       
   188                             return TokError;
       
   189                     }
       
   190                     builder.append(JSC::Lexer::convertUnicode(m_ptr[1], m_ptr[2], m_ptr[3], m_ptr[4]));
       
   191                     m_ptr += 5;
       
   192                     break;
       
   193 
       
   194                 default:
       
   195                     return TokError;
       
   196             }
       
   197         }
       
   198     } while ((mode == StrictJSON) && m_ptr != runStart && (m_ptr < m_end) && *m_ptr != '"');
       
   199 
       
   200     if (m_ptr >= m_end || *m_ptr != '"')
       
   201         return TokError;
       
   202 
       
   203     token.stringToken = builder.build();
       
   204     token.type = TokString;
       
   205     token.end = ++m_ptr;
       
   206     return TokString;
       
   207 }
       
   208 
       
   209 LiteralParser::TokenType LiteralParser::Lexer::lexNumber(LiteralParserToken& token)
       
   210 {
       
   211     // ES5 and json.org define numbers as
       
   212     // number
       
   213     //     int
       
   214     //     int frac? exp?
       
   215     //
       
   216     // int
       
   217     //     -? 0
       
   218     //     -? digit1-9 digits?
       
   219     //
       
   220     // digits
       
   221     //     digit digits?
       
   222     //
       
   223     // -?(0 | [1-9][0-9]*) ('.' [0-9]+)? ([eE][+-]? [0-9]+)?
       
   224 
       
   225     if (m_ptr < m_end && *m_ptr == '-') // -?
       
   226         ++m_ptr;
       
   227     
       
   228     // (0 | [1-9][0-9]*)
       
   229     if (m_ptr < m_end && *m_ptr == '0') // 0
       
   230         ++m_ptr;
       
   231     else if (m_ptr < m_end && *m_ptr >= '1' && *m_ptr <= '9') { // [1-9]
       
   232         ++m_ptr;
       
   233         // [0-9]*
       
   234         while (m_ptr < m_end && isASCIIDigit(*m_ptr))
       
   235             ++m_ptr;
       
   236     } else
       
   237         return TokError;
       
   238 
       
   239     // ('.' [0-9]+)?
       
   240     if (m_ptr < m_end && *m_ptr == '.') {
       
   241         ++m_ptr;
       
   242         // [0-9]+
       
   243         if (m_ptr >= m_end || !isASCIIDigit(*m_ptr))
       
   244             return TokError;
       
   245 
       
   246         ++m_ptr;
       
   247         while (m_ptr < m_end && isASCIIDigit(*m_ptr))
       
   248             ++m_ptr;
       
   249     }
       
   250 
       
   251     //  ([eE][+-]? [0-9]+)?
       
   252     if (m_ptr < m_end && (*m_ptr == 'e' || *m_ptr == 'E')) { // [eE]
       
   253         ++m_ptr;
       
   254 
       
   255         // [-+]?
       
   256         if (m_ptr < m_end && (*m_ptr == '-' || *m_ptr == '+'))
       
   257             ++m_ptr;
       
   258 
       
   259         // [0-9]+
       
   260         if (m_ptr >= m_end || !isASCIIDigit(*m_ptr))
       
   261             return TokError;
       
   262         
       
   263         ++m_ptr;
       
   264         while (m_ptr < m_end && isASCIIDigit(*m_ptr))
       
   265             ++m_ptr;
       
   266     }
       
   267     
       
   268     token.type = TokNumber;
       
   269     token.end = m_ptr;
       
   270     Vector<char, 64> buffer(token.end - token.start + 1);
       
   271     int i;
       
   272     for (i = 0; i < token.end - token.start; i++) {
       
   273         ASSERT(static_cast<char>(token.start[i]) == token.start[i]);
       
   274         buffer[i] = static_cast<char>(token.start[i]);
       
   275     }
       
   276     buffer[i] = 0;
       
   277     char* end;
       
   278     token.numberToken = WTF::strtod(buffer.data(), &end);
       
   279     ASSERT(buffer.data() + (token.end - token.start) == end);
       
   280     return TokNumber;
       
   281 }
       
   282 
       
   283 JSValue LiteralParser::parse(ParserState initialState)
       
   284 {
       
   285     ParserState state = initialState;
       
   286     MarkedArgumentBuffer objectStack;
       
   287     JSValue lastValue;
       
   288     Vector<ParserState, 16> stateStack;
       
   289     Vector<Identifier, 16> identifierStack;
       
   290     while (1) {
       
   291         switch(state) {
       
   292             startParseArray:
       
   293             case StartParseArray: {
       
   294                 JSArray* array = constructEmptyArray(m_exec);
       
   295                 objectStack.append(array);
       
   296                 // fallthrough
       
   297             }
       
   298             doParseArrayStartExpression:
       
   299             case DoParseArrayStartExpression: {
       
   300                 TokenType lastToken = m_lexer.currentToken().type;
       
   301                 if (m_lexer.next() == TokRBracket) {
       
   302                     if (lastToken == TokComma)
       
   303                         return JSValue();
       
   304                     m_lexer.next();
       
   305                     lastValue = objectStack.last();
       
   306                     objectStack.removeLast();
       
   307                     break;
       
   308                 }
       
   309 
       
   310                 stateStack.append(DoParseArrayEndExpression);
       
   311                 goto startParseExpression;
       
   312             }
       
   313             case DoParseArrayEndExpression: {
       
   314                  asArray(objectStack.last())->push(m_exec, lastValue);
       
   315                 
       
   316                 if (m_lexer.currentToken().type == TokComma)
       
   317                     goto doParseArrayStartExpression;
       
   318 
       
   319                 if (m_lexer.currentToken().type != TokRBracket)
       
   320                     return JSValue();
       
   321                 
       
   322                 m_lexer.next();
       
   323                 lastValue = objectStack.last();
       
   324                 objectStack.removeLast();
       
   325                 break;
       
   326             }
       
   327             startParseObject:
       
   328             case StartParseObject: {
       
   329                 JSObject* object = constructEmptyObject(m_exec);
       
   330                 objectStack.append(object);
       
   331 
       
   332                 TokenType type = m_lexer.next();
       
   333                 if (type == TokString) {
       
   334                     Lexer::LiteralParserToken identifierToken = m_lexer.currentToken();
       
   335 
       
   336                     // Check for colon
       
   337                     if (m_lexer.next() != TokColon)
       
   338                         return JSValue();
       
   339                     
       
   340                     m_lexer.next();
       
   341                     identifierStack.append(Identifier(m_exec, identifierToken.stringToken));
       
   342                     stateStack.append(DoParseObjectEndExpression);
       
   343                     goto startParseExpression;
       
   344                 } else if (type != TokRBrace) 
       
   345                     return JSValue();
       
   346                 m_lexer.next();
       
   347                 lastValue = objectStack.last();
       
   348                 objectStack.removeLast();
       
   349                 break;
       
   350             }
       
   351             doParseObjectStartExpression:
       
   352             case DoParseObjectStartExpression: {
       
   353                 TokenType type = m_lexer.next();
       
   354                 if (type != TokString)
       
   355                     return JSValue();
       
   356                 Lexer::LiteralParserToken identifierToken = m_lexer.currentToken();
       
   357 
       
   358                 // Check for colon
       
   359                 if (m_lexer.next() != TokColon)
       
   360                     return JSValue();
       
   361 
       
   362                 m_lexer.next();
       
   363                 identifierStack.append(Identifier(m_exec, identifierToken.stringToken));
       
   364                 stateStack.append(DoParseObjectEndExpression);
       
   365                 goto startParseExpression;
       
   366             }
       
   367             case DoParseObjectEndExpression:
       
   368             {
       
   369                 asObject(objectStack.last())->putDirect(identifierStack.last(), lastValue);
       
   370                 identifierStack.removeLast();
       
   371                 if (m_lexer.currentToken().type == TokComma)
       
   372                     goto doParseObjectStartExpression;
       
   373                 if (m_lexer.currentToken().type != TokRBrace)
       
   374                     return JSValue();
       
   375                 m_lexer.next();
       
   376                 lastValue = objectStack.last();
       
   377                 objectStack.removeLast();
       
   378                 break;
       
   379             }
       
   380             startParseExpression:
       
   381             case StartParseExpression: {
       
   382                 switch (m_lexer.currentToken().type) {
       
   383                     case TokLBracket:
       
   384                         goto startParseArray;
       
   385                     case TokLBrace:
       
   386                         goto startParseObject;
       
   387                     case TokString: {
       
   388                         Lexer::LiteralParserToken stringToken = m_lexer.currentToken();
       
   389                         m_lexer.next();
       
   390                         lastValue = jsString(m_exec, stringToken.stringToken);
       
   391                         break;
       
   392                     }
       
   393                     case TokNumber: {
       
   394                         Lexer::LiteralParserToken numberToken = m_lexer.currentToken();
       
   395                         m_lexer.next();
       
   396                         lastValue = jsNumber(m_exec, numberToken.numberToken);
       
   397                         break;
       
   398                     }
       
   399                     case TokNull:
       
   400                         m_lexer.next();
       
   401                         lastValue = jsNull();
       
   402                         break;
       
   403 
       
   404                     case TokTrue:
       
   405                         m_lexer.next();
       
   406                         lastValue = jsBoolean(true);
       
   407                         break;
       
   408 
       
   409                     case TokFalse:
       
   410                         m_lexer.next();
       
   411                         lastValue = jsBoolean(false);
       
   412                         break;
       
   413 
       
   414                     default:
       
   415                         // Error
       
   416                         return JSValue();
       
   417                 }
       
   418                 break;
       
   419             }
       
   420             case StartParseStatement: {
       
   421                 switch (m_lexer.currentToken().type) {
       
   422                     case TokLBracket:
       
   423                     case TokNumber:
       
   424                     case TokString:
       
   425                         goto startParseExpression;
       
   426 
       
   427                     case TokLParen: {
       
   428                         m_lexer.next();
       
   429                         stateStack.append(StartParseStatementEndStatement);
       
   430                         goto startParseExpression;
       
   431                     }
       
   432                     default:
       
   433                         return JSValue();
       
   434                 }
       
   435             }
       
   436             case StartParseStatementEndStatement: {
       
   437                 ASSERT(stateStack.isEmpty());
       
   438                 if (m_lexer.currentToken().type != TokRParen)
       
   439                     return JSValue();
       
   440                 if (m_lexer.next() == TokEnd)
       
   441                     return lastValue;
       
   442                 return JSValue();
       
   443             }
       
   444             default:
       
   445                 ASSERT_NOT_REACHED();
       
   446         }
       
   447         if (stateStack.isEmpty())
       
   448             return lastValue;
       
   449         state = stateStack.last();
       
   450         stateStack.removeLast();
       
   451         continue;
       
   452     }
       
   453 }
       
   454 
       
   455 }