JavaScriptCore/parser/Lexer.cpp
changeset 0 4f2f89ce4247
equal deleted inserted replaced
-1:000000000000 0:4f2f89ce4247
       
     1 /*
       
     2  *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
       
     3  *  Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All Rights Reserved.
       
     4  *  Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
       
     5  *  Copyright (C) 2010 Zoltan Herczeg (zherczeg@inf.u-szeged.hu)
       
     6  *
       
     7  *  This library is free software; you can redistribute it and/or
       
     8  *  modify it under the terms of the GNU Library General Public
       
     9  *  License as published by the Free Software Foundation; either
       
    10  *  version 2 of the License, or (at your option) any later version.
       
    11  *
       
    12  *  This library is distributed in the hope that it will be useful,
       
    13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
       
    14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
       
    15  *  Library General Public License for more details.
       
    16  *
       
    17  *  You should have received a copy of the GNU Library General Public License
       
    18  *  along with this library; see the file COPYING.LIB.  If not, write to
       
    19  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
       
    20  *  Boston, MA 02110-1301, USA.
       
    21  *
       
    22  */
       
    23 
       
    24 #include "config.h"
       
    25 #include "Lexer.h"
       
    26 
       
    27 #include "JSFunction.h"
       
    28 
       
    29 #include "JSGlobalObjectFunctions.h"
       
    30 #include "Identifier.h"
       
    31 #include "NodeInfo.h"
       
    32 #include "Nodes.h"
       
    33 #include "dtoa.h"
       
    34 #include <ctype.h>
       
    35 #include <limits.h>
       
    36 #include <string.h>
       
    37 #include <wtf/Assertions.h>
       
    38 
       
    39 using namespace WTF;
       
    40 using namespace Unicode;
       
    41 
       
    42 #include "JSParser.h"
       
    43 #include "Lookup.h"
       
    44 #include "Lexer.lut.h"
       
    45 
       
    46 namespace JSC {
       
    47 
       
    48 
       
    49 enum CharacterType {
       
    50     // Types for the main switch
       
    51 
       
    52     // The first three types are fixed, and also used for identifying
       
    53     // ASCII alpha and alphanumeric characters (see isIdentStart and isIdentPart).
       
    54     CharacterIdentifierStart,
       
    55     CharacterZero,
       
    56     CharacterNumber,
       
    57 
       
    58     CharacterInvalid,
       
    59     CharacterLineTerminator,
       
    60     CharacterExclamationMark,
       
    61     CharacterOpenParen,
       
    62     CharacterCloseParen,
       
    63     CharacterOpenBracket,
       
    64     CharacterCloseBracket,
       
    65     CharacterComma,
       
    66     CharacterColon,
       
    67     CharacterQuestion,
       
    68     CharacterTilde,
       
    69     CharacterQuote,
       
    70     CharacterDot,
       
    71     CharacterSlash,
       
    72     CharacterBackSlash,
       
    73     CharacterSemicolon,
       
    74     CharacterOpenBrace,
       
    75     CharacterCloseBrace,
       
    76 
       
    77     CharacterAdd,
       
    78     CharacterSub,
       
    79     CharacterMultiply,
       
    80     CharacterModulo,
       
    81     CharacterAnd,
       
    82     CharacterXor,
       
    83     CharacterOr,
       
    84     CharacterLess,
       
    85     CharacterGreater,
       
    86     CharacterEqual,
       
    87 
       
    88     // Other types (only one so far)
       
    89     CharacterWhiteSpace,
       
    90 };
       
    91 
       
    92 // 128 ASCII codes
       
    93 static const unsigned short typesOfASCIICharacters[128] = {
       
    94 /*   0 - Null               */ CharacterInvalid,
       
    95 /*   1 - Start of Heading   */ CharacterInvalid,
       
    96 /*   2 - Start of Text      */ CharacterInvalid,
       
    97 /*   3 - End of Text        */ CharacterInvalid,
       
    98 /*   4 - End of Transm.     */ CharacterInvalid,
       
    99 /*   5 - Enquiry            */ CharacterInvalid,
       
   100 /*   6 - Acknowledgment     */ CharacterInvalid,
       
   101 /*   7 - Bell               */ CharacterInvalid,
       
   102 /*   8 - Back Space         */ CharacterInvalid,
       
   103 /*   9 - Horizontal Tab     */ CharacterWhiteSpace,
       
   104 /*  10 - Line Feed          */ CharacterLineTerminator,
       
   105 /*  11 - Vertical Tab       */ CharacterWhiteSpace,
       
   106 /*  12 - Form Feed          */ CharacterWhiteSpace,
       
   107 /*  13 - Carriage Return    */ CharacterLineTerminator,
       
   108 /*  14 - Shift Out          */ CharacterInvalid,
       
   109 /*  15 - Shift In           */ CharacterInvalid,
       
   110 /*  16 - Data Line Escape   */ CharacterInvalid,
       
   111 /*  17 - Device Control 1   */ CharacterInvalid,
       
   112 /*  18 - Device Control 2   */ CharacterInvalid,
       
   113 /*  19 - Device Control 3   */ CharacterInvalid,
       
   114 /*  20 - Device Control 4   */ CharacterInvalid,
       
   115 /*  21 - Negative Ack.      */ CharacterInvalid,
       
   116 /*  22 - Synchronous Idle   */ CharacterInvalid,
       
   117 /*  23 - End of Transmit    */ CharacterInvalid,
       
   118 /*  24 - Cancel             */ CharacterInvalid,
       
   119 /*  25 - End of Medium      */ CharacterInvalid,
       
   120 /*  26 - Substitute         */ CharacterInvalid,
       
   121 /*  27 - Escape             */ CharacterInvalid,
       
   122 /*  28 - File Separator     */ CharacterInvalid,
       
   123 /*  29 - Group Separator    */ CharacterInvalid,
       
   124 /*  30 - Record Separator   */ CharacterInvalid,
       
   125 /*  31 - Unit Separator     */ CharacterInvalid,
       
   126 /*  32 - Space              */ CharacterWhiteSpace,
       
   127 /*  33 - !                  */ CharacterExclamationMark,
       
   128 /*  34 - "                  */ CharacterQuote,
       
   129 /*  35 - #                  */ CharacterInvalid,
       
   130 /*  36 - $                  */ CharacterIdentifierStart,
       
   131 /*  37 - %                  */ CharacterModulo,
       
   132 /*  38 - &                  */ CharacterAnd,
       
   133 /*  39 - '                  */ CharacterQuote,
       
   134 /*  40 - (                  */ CharacterOpenParen,
       
   135 /*  41 - )                  */ CharacterCloseParen,
       
   136 /*  42 - *                  */ CharacterMultiply,
       
   137 /*  43 - +                  */ CharacterAdd,
       
   138 /*  44 - ,                  */ CharacterComma,
       
   139 /*  45 - -                  */ CharacterSub,
       
   140 /*  46 - .                  */ CharacterDot,
       
   141 /*  47 - /                  */ CharacterSlash,
       
   142 /*  48 - 0                  */ CharacterZero,
       
   143 /*  49 - 1                  */ CharacterNumber,
       
   144 /*  50 - 2                  */ CharacterNumber,
       
   145 /*  51 - 3                  */ CharacterNumber,
       
   146 /*  52 - 4                  */ CharacterNumber,
       
   147 /*  53 - 5                  */ CharacterNumber,
       
   148 /*  54 - 6                  */ CharacterNumber,
       
   149 /*  55 - 7                  */ CharacterNumber,
       
   150 /*  56 - 8                  */ CharacterNumber,
       
   151 /*  57 - 9                  */ CharacterNumber,
       
   152 /*  58 - :                  */ CharacterColon,
       
   153 /*  59 - ;                  */ CharacterSemicolon,
       
   154 /*  60 - <                  */ CharacterLess,
       
   155 /*  61 - =                  */ CharacterEqual,
       
   156 /*  62 - >                  */ CharacterGreater,
       
   157 /*  63 - ?                  */ CharacterQuestion,
       
   158 /*  64 - @                  */ CharacterInvalid,
       
   159 /*  65 - A                  */ CharacterIdentifierStart,
       
   160 /*  66 - B                  */ CharacterIdentifierStart,
       
   161 /*  67 - C                  */ CharacterIdentifierStart,
       
   162 /*  68 - D                  */ CharacterIdentifierStart,
       
   163 /*  69 - E                  */ CharacterIdentifierStart,
       
   164 /*  70 - F                  */ CharacterIdentifierStart,
       
   165 /*  71 - G                  */ CharacterIdentifierStart,
       
   166 /*  72 - H                  */ CharacterIdentifierStart,
       
   167 /*  73 - I                  */ CharacterIdentifierStart,
       
   168 /*  74 - J                  */ CharacterIdentifierStart,
       
   169 /*  75 - K                  */ CharacterIdentifierStart,
       
   170 /*  76 - L                  */ CharacterIdentifierStart,
       
   171 /*  77 - M                  */ CharacterIdentifierStart,
       
   172 /*  78 - N                  */ CharacterIdentifierStart,
       
   173 /*  79 - O                  */ CharacterIdentifierStart,
       
   174 /*  80 - P                  */ CharacterIdentifierStart,
       
   175 /*  81 - Q                  */ CharacterIdentifierStart,
       
   176 /*  82 - R                  */ CharacterIdentifierStart,
       
   177 /*  83 - S                  */ CharacterIdentifierStart,
       
   178 /*  84 - T                  */ CharacterIdentifierStart,
       
   179 /*  85 - U                  */ CharacterIdentifierStart,
       
   180 /*  86 - V                  */ CharacterIdentifierStart,
       
   181 /*  87 - W                  */ CharacterIdentifierStart,
       
   182 /*  88 - X                  */ CharacterIdentifierStart,
       
   183 /*  89 - Y                  */ CharacterIdentifierStart,
       
   184 /*  90 - Z                  */ CharacterIdentifierStart,
       
   185 /*  91 - [                  */ CharacterOpenBracket,
       
   186 /*  92 - \                  */ CharacterBackSlash,
       
   187 /*  93 - ]                  */ CharacterCloseBracket,
       
   188 /*  94 - ^                  */ CharacterXor,
       
   189 /*  95 - _                  */ CharacterIdentifierStart,
       
   190 /*  96 - `                  */ CharacterInvalid,
       
   191 /*  97 - a                  */ CharacterIdentifierStart,
       
   192 /*  98 - b                  */ CharacterIdentifierStart,
       
   193 /*  99 - c                  */ CharacterIdentifierStart,
       
   194 /* 100 - d                  */ CharacterIdentifierStart,
       
   195 /* 101 - e                  */ CharacterIdentifierStart,
       
   196 /* 102 - f                  */ CharacterIdentifierStart,
       
   197 /* 103 - g                  */ CharacterIdentifierStart,
       
   198 /* 104 - h                  */ CharacterIdentifierStart,
       
   199 /* 105 - i                  */ CharacterIdentifierStart,
       
   200 /* 106 - j                  */ CharacterIdentifierStart,
       
   201 /* 107 - k                  */ CharacterIdentifierStart,
       
   202 /* 108 - l                  */ CharacterIdentifierStart,
       
   203 /* 109 - m                  */ CharacterIdentifierStart,
       
   204 /* 110 - n                  */ CharacterIdentifierStart,
       
   205 /* 111 - o                  */ CharacterIdentifierStart,
       
   206 /* 112 - p                  */ CharacterIdentifierStart,
       
   207 /* 113 - q                  */ CharacterIdentifierStart,
       
   208 /* 114 - r                  */ CharacterIdentifierStart,
       
   209 /* 115 - s                  */ CharacterIdentifierStart,
       
   210 /* 116 - t                  */ CharacterIdentifierStart,
       
   211 /* 117 - u                  */ CharacterIdentifierStart,
       
   212 /* 118 - v                  */ CharacterIdentifierStart,
       
   213 /* 119 - w                  */ CharacterIdentifierStart,
       
   214 /* 120 - x                  */ CharacterIdentifierStart,
       
   215 /* 121 - y                  */ CharacterIdentifierStart,
       
   216 /* 122 - z                  */ CharacterIdentifierStart,
       
   217 /* 123 - {                  */ CharacterOpenBrace,
       
   218 /* 124 - |                  */ CharacterOr,
       
   219 /* 125 - }                  */ CharacterCloseBrace,
       
   220 /* 126 - ~                  */ CharacterTilde,
       
   221 /* 127 - Delete             */ CharacterInvalid,
       
   222 };
       
   223 
       
   224 Lexer::Lexer(JSGlobalData* globalData)
       
   225     : m_isReparsing(false)
       
   226     , m_globalData(globalData)
       
   227     , m_keywordTable(JSC::mainTable)
       
   228 {
       
   229 }
       
   230 
       
   231 Lexer::~Lexer()
       
   232 {
       
   233     m_keywordTable.deleteTable();
       
   234 }
       
   235 
       
   236 ALWAYS_INLINE const UChar* Lexer::currentCharacter() const
       
   237 {
       
   238     ASSERT(m_code <= m_codeEnd);
       
   239     return m_code;
       
   240 }
       
   241 
       
   242 ALWAYS_INLINE int Lexer::currentOffset() const
       
   243 {
       
   244     return currentCharacter() - m_codeStart;
       
   245 }
       
   246 
       
   247 void Lexer::setCode(const SourceCode& source, ParserArena& arena)
       
   248 {
       
   249     m_arena = &arena.identifierArena();
       
   250 
       
   251     m_lineNumber = source.firstLine();
       
   252     m_delimited = false;
       
   253     m_lastToken = -1;
       
   254 
       
   255     const UChar* data = source.provider()->data();
       
   256 
       
   257     m_source = &source;
       
   258     m_codeStart = data;
       
   259     m_code = data + source.startOffset();
       
   260     m_codeEnd = data + source.endOffset();
       
   261     m_error = false;
       
   262     m_atLineStart = true;
       
   263 
       
   264     m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
       
   265     m_buffer16.reserveInitialCapacity((m_codeEnd - m_code) / 2);
       
   266 
       
   267     if (LIKELY(m_code < m_codeEnd))
       
   268         m_current = *m_code;
       
   269     else
       
   270         m_current = -1;
       
   271     ASSERT(currentOffset() == source.startOffset());
       
   272 }
       
   273 
       
   274 ALWAYS_INLINE void Lexer::shift()
       
   275 {
       
   276     // Faster than an if-else sequence
       
   277     ASSERT(m_current != -1);
       
   278     m_current = -1;
       
   279     ++m_code;
       
   280     if (LIKELY(m_code < m_codeEnd))
       
   281         m_current = *m_code;
       
   282 }
       
   283 
       
   284 ALWAYS_INLINE int Lexer::peek(int offset)
       
   285 {
       
   286     // Only use if necessary
       
   287     ASSERT(offset > 0 && offset < 5);
       
   288     const UChar* code = m_code + offset;
       
   289     return (code < m_codeEnd) ? *code : -1;
       
   290 }
       
   291 
       
   292 int Lexer::getUnicodeCharacter()
       
   293 {
       
   294     int char1 = peek(1);
       
   295     int char2 = peek(2);
       
   296     int char3 = peek(3);
       
   297 
       
   298     if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(char1) || !isASCIIHexDigit(char2) || !isASCIIHexDigit(char3)))
       
   299         return -1;
       
   300 
       
   301     int result = convertUnicode(m_current, char1, char2, char3);
       
   302     shift();
       
   303     shift();
       
   304     shift();
       
   305     shift();
       
   306     return result;
       
   307 }
       
   308 
       
   309 void Lexer::shiftLineTerminator()
       
   310 {
       
   311     ASSERT(isLineTerminator(m_current));
       
   312 
       
   313     int m_prev = m_current;
       
   314     shift();
       
   315 
       
   316     // Allow both CRLF and LFCR.
       
   317     if (m_prev + m_current == '\n' + '\r')
       
   318         shift();
       
   319 
       
   320     ++m_lineNumber;
       
   321 }
       
   322 
       
   323 ALWAYS_INLINE const Identifier* Lexer::makeIdentifier(const UChar* characters, size_t length)
       
   324 {
       
   325     return &m_arena->makeIdentifier(m_globalData, characters, length);
       
   326 }
       
   327 
       
   328 ALWAYS_INLINE bool Lexer::lastTokenWasRestrKeyword() const
       
   329 {
       
   330     return m_lastToken == CONTINUE || m_lastToken == BREAK || m_lastToken == RETURN || m_lastToken == THROW;
       
   331 }
       
   332 
       
   333 static NEVER_INLINE bool isNonASCIIIdentStart(int c)
       
   334 {
       
   335     return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other);
       
   336 }
       
   337 
       
   338 static inline bool isIdentStart(int c)
       
   339 {
       
   340     return isASCII(c) ? typesOfASCIICharacters[c] == CharacterIdentifierStart : isNonASCIIIdentStart(c);
       
   341 }
       
   342 
       
   343 static NEVER_INLINE bool isNonASCIIIdentPart(int c)
       
   344 {
       
   345     return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other
       
   346         | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector);
       
   347 }
       
   348 
       
   349 static inline bool isIdentPart(int c)
       
   350 {
       
   351     // Character types are divided into two groups depending on whether they can be part of an
       
   352     // identifier or not. Those whose type value is less or equal than CharacterNumber can be
       
   353     // part of an identifier. (See the CharacterType definition for more details.)
       
   354     return isASCII(c) ? typesOfASCIICharacters[c] <= CharacterNumber : isNonASCIIIdentPart(c);
       
   355 }
       
   356 
       
   357 static inline int singleEscape(int c)
       
   358 {
       
   359     switch (c) {
       
   360     case 'b':
       
   361         return 0x08;
       
   362     case 't':
       
   363         return 0x09;
       
   364     case 'n':
       
   365         return 0x0A;
       
   366     case 'v':
       
   367         return 0x0B;
       
   368     case 'f':
       
   369         return 0x0C;
       
   370     case 'r':
       
   371         return 0x0D;
       
   372     case '\\':
       
   373         return '\\';
       
   374     case '\'':
       
   375         return '\'';
       
   376     case '"':
       
   377         return '"';
       
   378     default:
       
   379         return 0;
       
   380     }
       
   381 }
       
   382 
       
   383 inline void Lexer::record8(int c)
       
   384 {
       
   385     ASSERT(c >= 0);
       
   386     ASSERT(c <= 0xFF);
       
   387     m_buffer8.append(static_cast<char>(c));
       
   388 }
       
   389 
       
   390 inline void Lexer::record16(UChar c)
       
   391 {
       
   392     m_buffer16.append(c);
       
   393 }
       
   394 
       
   395 inline void Lexer::record16(int c)
       
   396 {
       
   397     ASSERT(c >= 0);
       
   398     ASSERT(c <= USHRT_MAX);
       
   399     record16(UChar(static_cast<unsigned short>(c)));
       
   400 }
       
   401 
       
   402 ALWAYS_INLINE bool Lexer::parseString(JSTokenData* lvalp)
       
   403 {
       
   404     int stringQuoteCharacter = m_current;
       
   405     shift();
       
   406 
       
   407     const UChar* stringStart = currentCharacter();
       
   408 
       
   409     while (m_current != stringQuoteCharacter) {
       
   410         if (UNLIKELY(m_current == '\\')) {
       
   411             if (stringStart != currentCharacter())
       
   412                 m_buffer16.append(stringStart, currentCharacter() - stringStart);
       
   413             shift();
       
   414 
       
   415             int escape = singleEscape(m_current);
       
   416 
       
   417             // Most common escape sequences first
       
   418             if (escape) {
       
   419                 record16(escape);
       
   420                 shift();
       
   421             } else if (UNLIKELY(isLineTerminator(m_current)))
       
   422                 shiftLineTerminator();
       
   423             else if (m_current == 'x') {
       
   424                 shift();
       
   425                 if (isASCIIHexDigit(m_current) && isASCIIHexDigit(peek(1))) {
       
   426                     int prev = m_current;
       
   427                     shift();
       
   428                     record16(convertHex(prev, m_current));
       
   429                     shift();
       
   430                 } else
       
   431                     record16('x');
       
   432             } else if (m_current == 'u') {
       
   433                 shift();
       
   434                 int character = getUnicodeCharacter();
       
   435                 if (character != -1)
       
   436                     record16(character);
       
   437                 else if (m_current == stringQuoteCharacter)
       
   438                     record16('u');
       
   439                 else // Only stringQuoteCharacter allowed after \u
       
   440                     return false;
       
   441             } else if (isASCIIOctalDigit(m_current)) {
       
   442                 // Octal character sequences
       
   443                 int character1 = m_current;
       
   444                 shift();
       
   445                 if (isASCIIOctalDigit(m_current)) {
       
   446                     // Two octal characters
       
   447                     int character2 = m_current;
       
   448                     shift();
       
   449                     if (character1 >= '0' && character1 <= '3' && isASCIIOctalDigit(m_current)) {
       
   450                         record16((character1 - '0') * 64 + (character2 - '0') * 8 + m_current - '0');
       
   451                         shift();
       
   452                     } else
       
   453                         record16((character1 - '0') * 8 + character2 - '0');
       
   454                 } else
       
   455                     record16(character1 - '0');
       
   456             } else if (m_current != -1) {
       
   457                 record16(m_current);
       
   458                 shift();
       
   459             } else
       
   460                 return false;
       
   461 
       
   462             stringStart = currentCharacter();
       
   463             continue;
       
   464         }
       
   465         // Fast check for characters that require special handling.
       
   466         // Catches -1, \n, \r, 0x2028, and 0x2029 as efficiently
       
   467         // as possible, and lets through all common ASCII characters.
       
   468         if (UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
       
   469             // New-line or end of input is not allowed
       
   470             if (UNLIKELY(isLineTerminator(m_current)) || UNLIKELY(m_current == -1))
       
   471                 return false;
       
   472             // Anything else is just a normal character
       
   473         }
       
   474         shift();
       
   475     }
       
   476 
       
   477     if (currentCharacter() != stringStart)
       
   478         m_buffer16.append(stringStart, currentCharacter() - stringStart);
       
   479     lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
       
   480     m_buffer16.resize(0);
       
   481     return true;
       
   482 }
       
   483 
       
   484 JSTokenType Lexer::lex(JSTokenData* lvalp, JSTokenInfo* llocp, LexType lexType)
       
   485 {
       
   486     ASSERT(!m_error);
       
   487     ASSERT(m_buffer8.isEmpty());
       
   488     ASSERT(m_buffer16.isEmpty());
       
   489 
       
   490     JSTokenType token = ERRORTOK;
       
   491     int identChar = 0;
       
   492     m_terminator = false;
       
   493 
       
   494 start:
       
   495     while (isWhiteSpace(m_current))
       
   496         shift();
       
   497 
       
   498     int startOffset = currentOffset();
       
   499 
       
   500     if (UNLIKELY(m_current == -1))
       
   501         return EOFTOK;
       
   502 
       
   503     m_delimited = false;
       
   504 
       
   505     CharacterType type;
       
   506     if (LIKELY(isASCII(m_current)))
       
   507         type = static_cast<CharacterType>(typesOfASCIICharacters[m_current]);
       
   508     else if (isNonASCIIIdentStart(m_current))
       
   509         type = CharacterIdentifierStart;
       
   510     else if (isLineTerminator(m_current))
       
   511         type = CharacterLineTerminator;
       
   512     else
       
   513         type = CharacterInvalid;
       
   514 
       
   515     switch (type) {
       
   516     case CharacterGreater:
       
   517         shift();
       
   518         if (m_current == '>') {
       
   519             shift();
       
   520             if (m_current == '>') {
       
   521                 shift();
       
   522                 if (m_current == '=') {
       
   523                     shift();
       
   524                     token = URSHIFTEQUAL;
       
   525                     break;
       
   526                 }
       
   527                 token = URSHIFT;
       
   528                 break;
       
   529             }
       
   530             if (m_current == '=') {
       
   531                 shift();
       
   532                 token = RSHIFTEQUAL;
       
   533                 break;
       
   534             }
       
   535             token = RSHIFT;
       
   536             break;
       
   537         }
       
   538         if (m_current == '=') {
       
   539             shift();
       
   540             token = GE;
       
   541             break;
       
   542         }
       
   543         token = GT;
       
   544         break;
       
   545     case CharacterEqual:
       
   546         shift();
       
   547         if (m_current == '=') {
       
   548             shift();
       
   549             if (m_current == '=') {
       
   550                 shift();
       
   551                 token = STREQ;
       
   552                 break;
       
   553             }
       
   554             token = EQEQ;
       
   555             break;
       
   556         }
       
   557         token = EQUAL;
       
   558         break;
       
   559     case CharacterLess:
       
   560         shift();
       
   561         if (m_current == '!' && peek(1) == '-' && peek(2) == '-') {
       
   562             // <!-- marks the beginning of a line comment (for www usage)
       
   563             goto inSingleLineComment;
       
   564         }
       
   565         if (m_current == '<') {
       
   566             shift();
       
   567             if (m_current == '=') {
       
   568                 shift();
       
   569                 token = LSHIFTEQUAL;
       
   570                 break;
       
   571             }
       
   572             token = LSHIFT;
       
   573             break;
       
   574         }
       
   575         if (m_current == '=') {
       
   576             shift();
       
   577             token = LE;
       
   578             break;
       
   579         }
       
   580         token = LT;
       
   581         break;
       
   582     case CharacterExclamationMark:
       
   583         shift();
       
   584         if (m_current == '=') {
       
   585             shift();
       
   586             if (m_current == '=') {
       
   587                 shift();
       
   588                 token = STRNEQ;
       
   589                 break;
       
   590             }
       
   591             token = NE;
       
   592             break;
       
   593         }
       
   594         token = EXCLAMATION;
       
   595         break;
       
   596     case CharacterAdd:
       
   597         shift();
       
   598         if (m_current == '+') {
       
   599             shift();
       
   600             token = (!m_terminator) ? PLUSPLUS : AUTOPLUSPLUS;
       
   601             break;
       
   602         }
       
   603         if (m_current == '=') {
       
   604             shift();
       
   605             token = PLUSEQUAL;
       
   606             break;
       
   607         }
       
   608         token = PLUS;
       
   609         break;
       
   610     case CharacterSub:
       
   611         shift();
       
   612         if (m_current == '-') {
       
   613             shift();
       
   614             if (m_atLineStart && m_current == '>') {
       
   615                 shift();
       
   616                 goto inSingleLineComment;
       
   617             }
       
   618             token = (!m_terminator) ? MINUSMINUS : AUTOMINUSMINUS;
       
   619             break;
       
   620         }
       
   621         if (m_current == '=') {
       
   622             shift();
       
   623             token = MINUSEQUAL;
       
   624             break;
       
   625         }
       
   626         token = MINUS;
       
   627         break;
       
   628     case CharacterMultiply:
       
   629         shift();
       
   630         if (m_current == '=') {
       
   631             shift();
       
   632             token = MULTEQUAL;
       
   633             break;
       
   634         }
       
   635         token = TIMES;
       
   636         break;
       
   637     case CharacterSlash:
       
   638         shift();
       
   639         if (m_current == '/') {
       
   640             shift();
       
   641             goto inSingleLineComment;
       
   642         }
       
   643         if (m_current == '*') {
       
   644             shift();
       
   645             goto inMultiLineComment;
       
   646         }
       
   647         if (m_current == '=') {
       
   648             shift();
       
   649             token = DIVEQUAL;
       
   650             break;
       
   651         }
       
   652         token = DIVIDE;
       
   653         break;
       
   654     case CharacterAnd:
       
   655         shift();
       
   656         if (m_current == '&') {
       
   657             shift();
       
   658             token = AND;
       
   659             break;
       
   660         }
       
   661         if (m_current == '=') {
       
   662             shift();
       
   663             token = ANDEQUAL;
       
   664             break;
       
   665         }
       
   666         token = BITAND;
       
   667         break;
       
   668     case CharacterXor:
       
   669         shift();
       
   670         if (m_current == '=') {
       
   671             shift();
       
   672             token = XOREQUAL;
       
   673             break;
       
   674         }
       
   675         token = BITXOR;
       
   676         break;
       
   677     case CharacterModulo:
       
   678         shift();
       
   679         if (m_current == '=') {
       
   680             shift();
       
   681             token = MODEQUAL;
       
   682             break;
       
   683         }
       
   684         token = MOD;
       
   685         break;
       
   686     case CharacterOr:
       
   687         shift();
       
   688         if (m_current == '=') {
       
   689             shift();
       
   690             token = OREQUAL;
       
   691             break;
       
   692         }
       
   693         if (m_current == '|') {
       
   694             shift();
       
   695             token = OR;
       
   696             break;
       
   697         }
       
   698         token = BITOR;
       
   699         break;
       
   700     case CharacterDot:
       
   701         shift();
       
   702         if (isASCIIDigit(m_current)) {
       
   703             record8('.');
       
   704             goto inNumberAfterDecimalPoint;
       
   705         }
       
   706         token = DOT;
       
   707         break;
       
   708     case CharacterOpenParen:
       
   709         token = OPENPAREN;
       
   710         shift();
       
   711         break;
       
   712     case CharacterCloseParen:
       
   713         token = CLOSEPAREN;
       
   714         shift();
       
   715         break;
       
   716     case CharacterOpenBracket:
       
   717         token = OPENBRACKET;
       
   718         shift();
       
   719         break;
       
   720     case CharacterCloseBracket:
       
   721         token = CLOSEBRACKET;
       
   722         shift();
       
   723         break;
       
   724     case CharacterComma:
       
   725         token = COMMA;
       
   726         shift();
       
   727         break;
       
   728     case CharacterColon:
       
   729         token = COLON;
       
   730         shift();
       
   731         break;
       
   732     case CharacterQuestion:
       
   733         token = QUESTION;
       
   734         shift();
       
   735         break;
       
   736     case CharacterTilde:
       
   737         token = TILDE;
       
   738         shift();
       
   739         break;
       
   740     case CharacterSemicolon:
       
   741         m_delimited = true;
       
   742         shift();
       
   743         token = SEMICOLON;
       
   744         break;
       
   745     case CharacterOpenBrace:
       
   746         lvalp->intValue = currentOffset();
       
   747         shift();
       
   748         token = OPENBRACE;
       
   749         break;
       
   750     case CharacterCloseBrace:
       
   751         lvalp->intValue = currentOffset();
       
   752         m_delimited = true;
       
   753         shift();
       
   754         token = CLOSEBRACE;
       
   755         break;
       
   756     case CharacterBackSlash:
       
   757         goto startIdentifierWithBackslash;
       
   758     case CharacterZero:
       
   759         goto startNumberWithZeroDigit;
       
   760     case CharacterNumber:
       
   761         goto startNumber;
       
   762     case CharacterQuote:
       
   763         if (UNLIKELY(!parseString(lvalp)))
       
   764             goto returnError;
       
   765         shift();
       
   766         m_delimited = false;
       
   767         token = STRING;
       
   768         break;
       
   769     case CharacterIdentifierStart:
       
   770         ASSERT(isIdentStart(m_current));
       
   771         goto startIdentifierOrKeyword;
       
   772     case CharacterLineTerminator:
       
   773         ASSERT(isLineTerminator(m_current));
       
   774         shiftLineTerminator();
       
   775         m_atLineStart = true;
       
   776         m_terminator = true;
       
   777         if (lastTokenWasRestrKeyword()) {
       
   778             token = SEMICOLON;
       
   779             goto doneSemicolon;
       
   780         }
       
   781         goto start;
       
   782     case CharacterInvalid:
       
   783         goto returnError;
       
   784     default:
       
   785         ASSERT_NOT_REACHED();
       
   786         goto returnError;
       
   787     }
       
   788 
       
   789     m_atLineStart = false;
       
   790     goto returnToken;
       
   791 
       
   792 startIdentifierWithBackslash: {
       
   793     shift();
       
   794     if (UNLIKELY(m_current != 'u'))
       
   795         goto returnError;
       
   796     shift();
       
   797 
       
   798     identChar = getUnicodeCharacter();
       
   799     if (UNLIKELY(identChar == -1))
       
   800         goto returnError;
       
   801     if (UNLIKELY(!isIdentStart(identChar)))
       
   802         goto returnError;
       
   803     goto inIdentifierAfterCharacterCheck;
       
   804 }
       
   805 
       
   806 startIdentifierOrKeyword: {
       
   807     const UChar* identifierStart = currentCharacter();
       
   808     shift();
       
   809     while (isIdentPart(m_current))
       
   810         shift();
       
   811     if (LIKELY(m_current != '\\')) {
       
   812         // Fast case for idents which does not contain \uCCCC characters
       
   813         lvalp->ident = makeIdentifier(identifierStart, currentCharacter() - identifierStart);
       
   814         goto doneIdentifierOrKeyword;
       
   815     }
       
   816     m_buffer16.append(identifierStart, currentCharacter() - identifierStart);
       
   817 }
       
   818 
       
   819     do {
       
   820         shift();
       
   821         if (UNLIKELY(m_current != 'u'))
       
   822             goto returnError;
       
   823         shift();
       
   824         identChar = getUnicodeCharacter();
       
   825         if (UNLIKELY(identChar == -1))
       
   826             goto returnError;
       
   827         if (UNLIKELY(!isIdentPart(identChar)))
       
   828             goto returnError;
       
   829 inIdentifierAfterCharacterCheck:
       
   830         record16(identChar);
       
   831 
       
   832         while (isIdentPart(m_current)) {
       
   833             record16(m_current);
       
   834             shift();
       
   835         }
       
   836     } while (UNLIKELY(m_current == '\\'));
       
   837     goto doneIdentifier;
       
   838 
       
   839 inSingleLineComment:
       
   840     while (!isLineTerminator(m_current)) {
       
   841         if (UNLIKELY(m_current == -1))
       
   842             return EOFTOK;
       
   843         shift();
       
   844     }
       
   845     shiftLineTerminator();
       
   846     m_atLineStart = true;
       
   847     m_terminator = true;
       
   848     if (lastTokenWasRestrKeyword())
       
   849         goto doneSemicolon;
       
   850     goto start;
       
   851 
       
   852 inMultiLineComment:
       
   853     while (true) {
       
   854         if (UNLIKELY(m_current == '*')) {
       
   855             shift();
       
   856             if (m_current == '/')
       
   857                 break;
       
   858             if (m_current == '*')
       
   859                 continue;
       
   860         }
       
   861 
       
   862         if (UNLIKELY(m_current == -1))
       
   863             goto returnError;
       
   864 
       
   865         if (isLineTerminator(m_current))
       
   866             shiftLineTerminator();
       
   867         else
       
   868             shift();
       
   869     }
       
   870     shift();
       
   871     m_atLineStart = false;
       
   872     goto start;
       
   873 
       
   874 startNumberWithZeroDigit:
       
   875     shift();
       
   876     if ((m_current | 0x20) == 'x' && isASCIIHexDigit(peek(1))) {
       
   877         shift();
       
   878         goto inHex;
       
   879     }
       
   880     if (m_current == '.') {
       
   881         record8('0');
       
   882         record8('.');
       
   883         shift();
       
   884         goto inNumberAfterDecimalPoint;
       
   885     }
       
   886     if ((m_current | 0x20) == 'e') {
       
   887         record8('0');
       
   888         record8('e');
       
   889         shift();
       
   890         goto inExponentIndicator;
       
   891     }
       
   892     if (isASCIIOctalDigit(m_current))
       
   893         goto inOctal;
       
   894     if (isASCIIDigit(m_current))
       
   895         goto startNumber;
       
   896     lvalp->doubleValue = 0;
       
   897     goto doneNumeric;
       
   898 
       
   899 inNumberAfterDecimalPoint:
       
   900     while (isASCIIDigit(m_current)) {
       
   901         record8(m_current);
       
   902         shift();
       
   903     }
       
   904     if ((m_current | 0x20) == 'e') {
       
   905         record8('e');
       
   906         shift();
       
   907         goto inExponentIndicator;
       
   908     }
       
   909     goto doneNumber;
       
   910 
       
   911 inExponentIndicator:
       
   912     if (m_current == '+' || m_current == '-') {
       
   913         record8(m_current);
       
   914         shift();
       
   915     }
       
   916     if (!isASCIIDigit(m_current))
       
   917         goto returnError;
       
   918     do {
       
   919         record8(m_current);
       
   920         shift();
       
   921     } while (isASCIIDigit(m_current));
       
   922     goto doneNumber;
       
   923 
       
   924 inOctal: {
       
   925     do {
       
   926         record8(m_current);
       
   927         shift();
       
   928     } while (isASCIIOctalDigit(m_current));
       
   929     if (isASCIIDigit(m_current))
       
   930         goto startNumber;
       
   931 
       
   932     double dval = 0;
       
   933 
       
   934     const char* end = m_buffer8.end();
       
   935     for (const char* p = m_buffer8.data(); p < end; ++p) {
       
   936         dval *= 8;
       
   937         dval += *p - '0';
       
   938     }
       
   939     if (dval >= mantissaOverflowLowerBound)
       
   940         dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 8);
       
   941 
       
   942     m_buffer8.resize(0);
       
   943 
       
   944     lvalp->doubleValue = dval;
       
   945     goto doneNumeric;
       
   946 }
       
   947 
       
   948 inHex: {
       
   949     do {
       
   950         record8(m_current);
       
   951         shift();
       
   952     } while (isASCIIHexDigit(m_current));
       
   953 
       
   954     double dval = 0;
       
   955 
       
   956     const char* end = m_buffer8.end();
       
   957     for (const char* p = m_buffer8.data(); p < end; ++p) {
       
   958         dval *= 16;
       
   959         dval += toASCIIHexValue(*p);
       
   960     }
       
   961     if (dval >= mantissaOverflowLowerBound)
       
   962         dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 16);
       
   963 
       
   964     m_buffer8.resize(0);
       
   965 
       
   966     lvalp->doubleValue = dval;
       
   967     goto doneNumeric;
       
   968 }
       
   969 
       
   970 startNumber:
       
   971     record8(m_current);
       
   972     shift();
       
   973     while (isASCIIDigit(m_current)) {
       
   974         record8(m_current);
       
   975         shift();
       
   976     }
       
   977     if (m_current == '.') {
       
   978         record8('.');
       
   979         shift();
       
   980         goto inNumberAfterDecimalPoint;
       
   981     }
       
   982     if ((m_current | 0x20) == 'e') {
       
   983         record8('e');
       
   984         shift();
       
   985         goto inExponentIndicator;
       
   986     }
       
   987 
       
   988     // Fall through into doneNumber.
       
   989 
       
   990 doneNumber:
       
   991     // Null-terminate string for strtod.
       
   992     m_buffer8.append('\0');
       
   993     lvalp->doubleValue = WTF::strtod(m_buffer8.data(), 0);
       
   994     m_buffer8.resize(0);
       
   995 
       
   996     // Fall through into doneNumeric.
       
   997 
       
   998 doneNumeric:
       
   999     // No identifiers allowed directly after numeric literal, e.g. "3in" is bad.
       
  1000     if (UNLIKELY(isIdentStart(m_current)))
       
  1001         goto returnError;
       
  1002 
       
  1003     m_atLineStart = false;
       
  1004     m_delimited = false;
       
  1005     token = NUMBER;
       
  1006     goto returnToken;
       
  1007 
       
  1008 doneSemicolon:
       
  1009     token = SEMICOLON;
       
  1010     m_delimited = true;
       
  1011     goto returnToken;
       
  1012 
       
  1013 doneIdentifier:
       
  1014     m_atLineStart = false;
       
  1015     m_delimited = false;
       
  1016     lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
       
  1017     m_buffer16.resize(0);
       
  1018     token = IDENT;
       
  1019     goto returnToken;
       
  1020 
       
  1021 doneIdentifierOrKeyword: {
       
  1022     m_atLineStart = false;
       
  1023     m_delimited = false;
       
  1024     m_buffer16.resize(0);
       
  1025     if (lexType == IdentifyReservedWords) {
       
  1026         const HashEntry* entry = m_keywordTable.entry(m_globalData, *lvalp->ident);
       
  1027         token = entry ? static_cast<JSTokenType>(entry->lexerValue()) : IDENT;
       
  1028     } else
       
  1029         token = IDENT;
       
  1030     // Fall through into returnToken.
       
  1031 }
       
  1032 
       
  1033 returnToken: {
       
  1034     int lineNumber = m_lineNumber;
       
  1035     llocp->line = lineNumber;
       
  1036     llocp->startOffset = startOffset;
       
  1037     llocp->endOffset = currentOffset();
       
  1038     m_lastToken = token;
       
  1039     return token;
       
  1040 }
       
  1041 
       
  1042 returnError:
       
  1043     m_error = true;
       
  1044     return ERRORTOK;
       
  1045 }
       
  1046 
       
  1047 bool Lexer::scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix)
       
  1048 {
       
  1049     ASSERT(m_buffer16.isEmpty());
       
  1050 
       
  1051     bool lastWasEscape = false;
       
  1052     bool inBrackets = false;
       
  1053 
       
  1054     if (patternPrefix) {
       
  1055         ASSERT(!isLineTerminator(patternPrefix));
       
  1056         ASSERT(patternPrefix != '/');
       
  1057         ASSERT(patternPrefix != '[');
       
  1058         record16(patternPrefix);
       
  1059     }
       
  1060 
       
  1061     while (true) {
       
  1062         int current = m_current;
       
  1063 
       
  1064         if (isLineTerminator(current) || current == -1) {
       
  1065             m_buffer16.resize(0);
       
  1066             return false;
       
  1067         }
       
  1068 
       
  1069         shift();
       
  1070 
       
  1071         if (current == '/' && !lastWasEscape && !inBrackets)
       
  1072             break;
       
  1073 
       
  1074         record16(current);
       
  1075 
       
  1076         if (lastWasEscape) {
       
  1077             lastWasEscape = false;
       
  1078             continue;
       
  1079         }
       
  1080 
       
  1081         switch (current) {
       
  1082         case '[':
       
  1083             inBrackets = true;
       
  1084             break;
       
  1085         case ']':
       
  1086             inBrackets = false;
       
  1087             break;
       
  1088         case '\\':
       
  1089             lastWasEscape = true;
       
  1090             break;
       
  1091         }
       
  1092     }
       
  1093 
       
  1094     pattern = makeIdentifier(m_buffer16.data(), m_buffer16.size());
       
  1095     m_buffer16.resize(0);
       
  1096 
       
  1097     while (isIdentPart(m_current)) {
       
  1098         record16(m_current);
       
  1099         shift();
       
  1100     }
       
  1101 
       
  1102     flags = makeIdentifier(m_buffer16.data(), m_buffer16.size());
       
  1103     m_buffer16.resize(0);
       
  1104 
       
  1105     return true;
       
  1106 }
       
  1107 
       
  1108 bool Lexer::skipRegExp()
       
  1109 {
       
  1110     bool lastWasEscape = false;
       
  1111     bool inBrackets = false;
       
  1112 
       
  1113     while (true) {
       
  1114         int current = m_current;
       
  1115 
       
  1116         if (isLineTerminator(current) || current == -1)
       
  1117             return false;
       
  1118 
       
  1119         shift();
       
  1120 
       
  1121         if (current == '/' && !lastWasEscape && !inBrackets)
       
  1122             break;
       
  1123 
       
  1124         if (lastWasEscape) {
       
  1125             lastWasEscape = false;
       
  1126             continue;
       
  1127         }
       
  1128 
       
  1129         switch (current) {
       
  1130         case '[':
       
  1131             inBrackets = true;
       
  1132             break;
       
  1133         case ']':
       
  1134             inBrackets = false;
       
  1135             break;
       
  1136         case '\\':
       
  1137             lastWasEscape = true;
       
  1138             break;
       
  1139         }
       
  1140     }
       
  1141 
       
  1142     while (isIdentPart(m_current))
       
  1143         shift();
       
  1144 
       
  1145     return true;
       
  1146 }
       
  1147 
       
  1148 void Lexer::clear()
       
  1149 {
       
  1150     m_arena = 0;
       
  1151 
       
  1152     Vector<char> newBuffer8;
       
  1153     m_buffer8.swap(newBuffer8);
       
  1154 
       
  1155     Vector<UChar> newBuffer16;
       
  1156     m_buffer16.swap(newBuffer16);
       
  1157 
       
  1158     m_isReparsing = false;
       
  1159 }
       
  1160 
       
  1161 SourceCode Lexer::sourceCode(int openBrace, int closeBrace, int firstLine)
       
  1162 {
       
  1163     ASSERT(m_source->provider()->data()[openBrace] == '{');
       
  1164     ASSERT(m_source->provider()->data()[closeBrace] == '}');
       
  1165     return SourceCode(m_source->provider(), openBrace, closeBrace + 1, firstLine);
       
  1166 }
       
  1167 
       
  1168 } // namespace JSC