JavaScriptCore/runtime/LiteralParser.cpp
changeset 0 4f2f89ce4247
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/JavaScriptCore/runtime/LiteralParser.cpp	Fri Sep 17 09:02:29 2010 +0300
@@ -0,0 +1,455 @@
+/*
+ * Copyright (C) 2009 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#include "config.h"
+#include "LiteralParser.h"
+
+#include "JSArray.h"
+#include "JSString.h"
+#include "Lexer.h"
+#include "StringBuilder.h"
+#include <wtf/ASCIICType.h>
+#include <wtf/dtoa.h>
+
+namespace JSC {
+
+LiteralParser::TokenType LiteralParser::Lexer::lex(LiteralParserToken& token)
+{
+    while (m_ptr < m_end && isASCIISpace(*m_ptr))
+        ++m_ptr;
+
+    ASSERT(m_ptr <= m_end);
+    if (m_ptr >= m_end) {
+        token.type = TokEnd;
+        token.start = token.end = m_ptr;
+        return TokEnd;
+    }
+    token.type = TokError;
+    token.start = m_ptr;
+    switch (*m_ptr) {
+        case '[':
+            token.type = TokLBracket;
+            token.end = ++m_ptr;
+            return TokLBracket;
+        case ']':
+            token.type = TokRBracket;
+            token.end = ++m_ptr;
+            return TokRBracket;
+        case '(':
+            token.type = TokLParen;
+            token.end = ++m_ptr;
+            return TokLBracket;
+        case ')':
+            token.type = TokRParen;
+            token.end = ++m_ptr;
+            return TokRBracket;
+        case '{':
+            token.type = TokLBrace;
+            token.end = ++m_ptr;
+            return TokLBrace;
+        case '}':
+            token.type = TokRBrace;
+            token.end = ++m_ptr;
+            return TokRBrace;
+        case ',':
+            token.type = TokComma;
+            token.end = ++m_ptr;
+            return TokComma;
+        case ':':
+            token.type = TokColon;
+            token.end = ++m_ptr;
+            return TokColon;
+        case '"':
+            if (m_mode == StrictJSON)
+                return lexString<StrictJSON>(token);
+            return lexString<NonStrictJSON>(token);
+        case 't':
+            if (m_end - m_ptr >= 4 && m_ptr[1] == 'r' && m_ptr[2] == 'u' && m_ptr[3] == 'e') {
+                m_ptr += 4;
+                token.type = TokTrue;
+                token.end = m_ptr;
+                return TokTrue;
+            }
+            break;
+        case 'f':
+            if (m_end - m_ptr >= 5 && m_ptr[1] == 'a' && m_ptr[2] == 'l' && m_ptr[3] == 's' && m_ptr[4] == 'e') {
+                m_ptr += 5;
+                token.type = TokFalse;
+                token.end = m_ptr;
+                return TokFalse;
+            }
+            break;
+        case 'n':
+            if (m_end - m_ptr >= 4 && m_ptr[1] == 'u' && m_ptr[2] == 'l' && m_ptr[3] == 'l') {
+                m_ptr += 4;
+                token.type = TokNull;
+                token.end = m_ptr;
+                return TokNull;
+            }
+            break;    
+        case '-':
+        case '0':
+        case '1':
+        case '2':
+        case '3':
+        case '4':
+        case '5':
+        case '6':
+        case '7':
+        case '8':
+        case '9':
+            return lexNumber(token);
+    }
+    return TokError;
+}
+
+template <LiteralParser::ParserMode mode> static inline bool isSafeStringCharacter(UChar c)
+{
+    return (c >= ' ' && (mode == LiteralParser::StrictJSON || c <= 0xff) && c != '\\' && c != '"') || c == '\t';
+}
+
+// "inline" is required here to help WINSCW compiler resolve specialized argument in templated functions.
+template <LiteralParser::ParserMode mode> inline LiteralParser::TokenType LiteralParser::Lexer::lexString(LiteralParserToken& token)
+{
+    ++m_ptr;
+    const UChar* runStart;
+    StringBuilder builder;
+    do {
+        runStart = m_ptr;
+        while (m_ptr < m_end && isSafeStringCharacter<mode>(*m_ptr))
+            ++m_ptr;
+        if (runStart < m_ptr)
+            builder.append(runStart, m_ptr - runStart);
+        if ((mode == StrictJSON) && m_ptr < m_end && *m_ptr == '\\') {
+            ++m_ptr;
+            if (m_ptr >= m_end)
+                return TokError;
+            switch (*m_ptr) {
+                case '"':
+                    builder.append('"');
+                    m_ptr++;
+                    break;
+                case '\\':
+                    builder.append('\\');
+                    m_ptr++;
+                    break;
+                case '/':
+                    builder.append('/');
+                    m_ptr++;
+                    break;
+                case 'b':
+                    builder.append('\b');
+                    m_ptr++;
+                    break;
+                case 'f':
+                    builder.append('\f');
+                    m_ptr++;
+                    break;
+                case 'n':
+                    builder.append('\n');
+                    m_ptr++;
+                    break;
+                case 'r':
+                    builder.append('\r');
+                    m_ptr++;
+                    break;
+                case 't':
+                    builder.append('\t');
+                    m_ptr++;
+                    break;
+
+                case 'u':
+                    if ((m_end - m_ptr) < 5) // uNNNN == 5 characters
+                        return TokError;
+                    for (int i = 1; i < 5; i++) {
+                        if (!isASCIIHexDigit(m_ptr[i]))
+                            return TokError;
+                    }
+                    builder.append(JSC::Lexer::convertUnicode(m_ptr[1], m_ptr[2], m_ptr[3], m_ptr[4]));
+                    m_ptr += 5;
+                    break;
+
+                default:
+                    return TokError;
+            }
+        }
+    } while ((mode == StrictJSON) && m_ptr != runStart && (m_ptr < m_end) && *m_ptr != '"');
+
+    if (m_ptr >= m_end || *m_ptr != '"')
+        return TokError;
+
+    token.stringToken = builder.build();
+    token.type = TokString;
+    token.end = ++m_ptr;
+    return TokString;
+}
+
+LiteralParser::TokenType LiteralParser::Lexer::lexNumber(LiteralParserToken& token)
+{
+    // ES5 and json.org define numbers as
+    // number
+    //     int
+    //     int frac? exp?
+    //
+    // int
+    //     -? 0
+    //     -? digit1-9 digits?
+    //
+    // digits
+    //     digit digits?
+    //
+    // -?(0 | [1-9][0-9]*) ('.' [0-9]+)? ([eE][+-]? [0-9]+)?
+
+    if (m_ptr < m_end && *m_ptr == '-') // -?
+        ++m_ptr;
+    
+    // (0 | [1-9][0-9]*)
+    if (m_ptr < m_end && *m_ptr == '0') // 0
+        ++m_ptr;
+    else if (m_ptr < m_end && *m_ptr >= '1' && *m_ptr <= '9') { // [1-9]
+        ++m_ptr;
+        // [0-9]*
+        while (m_ptr < m_end && isASCIIDigit(*m_ptr))
+            ++m_ptr;
+    } else
+        return TokError;
+
+    // ('.' [0-9]+)?
+    if (m_ptr < m_end && *m_ptr == '.') {
+        ++m_ptr;
+        // [0-9]+
+        if (m_ptr >= m_end || !isASCIIDigit(*m_ptr))
+            return TokError;
+
+        ++m_ptr;
+        while (m_ptr < m_end && isASCIIDigit(*m_ptr))
+            ++m_ptr;
+    }
+
+    //  ([eE][+-]? [0-9]+)?
+    if (m_ptr < m_end && (*m_ptr == 'e' || *m_ptr == 'E')) { // [eE]
+        ++m_ptr;
+
+        // [-+]?
+        if (m_ptr < m_end && (*m_ptr == '-' || *m_ptr == '+'))
+            ++m_ptr;
+
+        // [0-9]+
+        if (m_ptr >= m_end || !isASCIIDigit(*m_ptr))
+            return TokError;
+        
+        ++m_ptr;
+        while (m_ptr < m_end && isASCIIDigit(*m_ptr))
+            ++m_ptr;
+    }
+    
+    token.type = TokNumber;
+    token.end = m_ptr;
+    Vector<char, 64> buffer(token.end - token.start + 1);
+    int i;
+    for (i = 0; i < token.end - token.start; i++) {
+        ASSERT(static_cast<char>(token.start[i]) == token.start[i]);
+        buffer[i] = static_cast<char>(token.start[i]);
+    }
+    buffer[i] = 0;
+    char* end;
+    token.numberToken = WTF::strtod(buffer.data(), &end);
+    ASSERT(buffer.data() + (token.end - token.start) == end);
+    return TokNumber;
+}
+
+JSValue LiteralParser::parse(ParserState initialState)
+{
+    ParserState state = initialState;
+    MarkedArgumentBuffer objectStack;
+    JSValue lastValue;
+    Vector<ParserState, 16> stateStack;
+    Vector<Identifier, 16> identifierStack;
+    while (1) {
+        switch(state) {
+            startParseArray:
+            case StartParseArray: {
+                JSArray* array = constructEmptyArray(m_exec);
+                objectStack.append(array);
+                // fallthrough
+            }
+            doParseArrayStartExpression:
+            case DoParseArrayStartExpression: {
+                TokenType lastToken = m_lexer.currentToken().type;
+                if (m_lexer.next() == TokRBracket) {
+                    if (lastToken == TokComma)
+                        return JSValue();
+                    m_lexer.next();
+                    lastValue = objectStack.last();
+                    objectStack.removeLast();
+                    break;
+                }
+
+                stateStack.append(DoParseArrayEndExpression);
+                goto startParseExpression;
+            }
+            case DoParseArrayEndExpression: {
+                 asArray(objectStack.last())->push(m_exec, lastValue);
+                
+                if (m_lexer.currentToken().type == TokComma)
+                    goto doParseArrayStartExpression;
+
+                if (m_lexer.currentToken().type != TokRBracket)
+                    return JSValue();
+                
+                m_lexer.next();
+                lastValue = objectStack.last();
+                objectStack.removeLast();
+                break;
+            }
+            startParseObject:
+            case StartParseObject: {
+                JSObject* object = constructEmptyObject(m_exec);
+                objectStack.append(object);
+
+                TokenType type = m_lexer.next();
+                if (type == TokString) {
+                    Lexer::LiteralParserToken identifierToken = m_lexer.currentToken();
+
+                    // Check for colon
+                    if (m_lexer.next() != TokColon)
+                        return JSValue();
+                    
+                    m_lexer.next();
+                    identifierStack.append(Identifier(m_exec, identifierToken.stringToken));
+                    stateStack.append(DoParseObjectEndExpression);
+                    goto startParseExpression;
+                } else if (type != TokRBrace) 
+                    return JSValue();
+                m_lexer.next();
+                lastValue = objectStack.last();
+                objectStack.removeLast();
+                break;
+            }
+            doParseObjectStartExpression:
+            case DoParseObjectStartExpression: {
+                TokenType type = m_lexer.next();
+                if (type != TokString)
+                    return JSValue();
+                Lexer::LiteralParserToken identifierToken = m_lexer.currentToken();
+
+                // Check for colon
+                if (m_lexer.next() != TokColon)
+                    return JSValue();
+
+                m_lexer.next();
+                identifierStack.append(Identifier(m_exec, identifierToken.stringToken));
+                stateStack.append(DoParseObjectEndExpression);
+                goto startParseExpression;
+            }
+            case DoParseObjectEndExpression:
+            {
+                asObject(objectStack.last())->putDirect(identifierStack.last(), lastValue);
+                identifierStack.removeLast();
+                if (m_lexer.currentToken().type == TokComma)
+                    goto doParseObjectStartExpression;
+                if (m_lexer.currentToken().type != TokRBrace)
+                    return JSValue();
+                m_lexer.next();
+                lastValue = objectStack.last();
+                objectStack.removeLast();
+                break;
+            }
+            startParseExpression:
+            case StartParseExpression: {
+                switch (m_lexer.currentToken().type) {
+                    case TokLBracket:
+                        goto startParseArray;
+                    case TokLBrace:
+                        goto startParseObject;
+                    case TokString: {
+                        Lexer::LiteralParserToken stringToken = m_lexer.currentToken();
+                        m_lexer.next();
+                        lastValue = jsString(m_exec, stringToken.stringToken);
+                        break;
+                    }
+                    case TokNumber: {
+                        Lexer::LiteralParserToken numberToken = m_lexer.currentToken();
+                        m_lexer.next();
+                        lastValue = jsNumber(m_exec, numberToken.numberToken);
+                        break;
+                    }
+                    case TokNull:
+                        m_lexer.next();
+                        lastValue = jsNull();
+                        break;
+
+                    case TokTrue:
+                        m_lexer.next();
+                        lastValue = jsBoolean(true);
+                        break;
+
+                    case TokFalse:
+                        m_lexer.next();
+                        lastValue = jsBoolean(false);
+                        break;
+
+                    default:
+                        // Error
+                        return JSValue();
+                }
+                break;
+            }
+            case StartParseStatement: {
+                switch (m_lexer.currentToken().type) {
+                    case TokLBracket:
+                    case TokNumber:
+                    case TokString:
+                        goto startParseExpression;
+
+                    case TokLParen: {
+                        m_lexer.next();
+                        stateStack.append(StartParseStatementEndStatement);
+                        goto startParseExpression;
+                    }
+                    default:
+                        return JSValue();
+                }
+            }
+            case StartParseStatementEndStatement: {
+                ASSERT(stateStack.isEmpty());
+                if (m_lexer.currentToken().type != TokRParen)
+                    return JSValue();
+                if (m_lexer.next() == TokEnd)
+                    return lastValue;
+                return JSValue();
+            }
+            default:
+                ASSERT_NOT_REACHED();
+        }
+        if (stateStack.isEmpty())
+            return lastValue;
+        state = stateStack.last();
+        stateStack.removeLast();
+        continue;
+    }
+}
+
+}