JavaScriptCore/parser/Lexer.h
changeset 0 4f2f89ce4247
equal deleted inserted replaced
-1:000000000000 0:4f2f89ce4247
       
     1 /*
       
     2  *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
       
     3  *  Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
       
     4  *  Copyright (C) 2010 Zoltan Herczeg (zherczeg@inf.u-szeged.hu)
       
     5  *
       
     6  *  This library is free software; you can redistribute it and/or
       
     7  *  modify it under the terms of the GNU Library General Public
       
     8  *  License as published by the Free Software Foundation; either
       
     9  *  version 2 of the License, or (at your option) any later version.
       
    10  *
       
    11  *  This library is distributed in the hope that it will be useful,
       
    12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
       
    13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
       
    14  *  Library General Public License for more details.
       
    15  *
       
    16  *  You should have received a copy of the GNU Library General Public License
       
    17  *  along with this library; see the file COPYING.LIB.  If not, write to
       
    18  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
       
    19  *  Boston, MA 02110-1301, USA.
       
    20  *
       
    21  */
       
    22 
       
    23 #ifndef Lexer_h
       
    24 #define Lexer_h
       
    25 
       
    26 #include "JSParser.h"
       
    27 #include "Lookup.h"
       
    28 #include "ParserArena.h"
       
    29 #include "SourceCode.h"
       
    30 #include <wtf/ASCIICType.h>
       
    31 #include <wtf/AlwaysInline.h>
       
    32 #include <wtf/SegmentedVector.h>
       
    33 #include <wtf/Vector.h>
       
    34 #include <wtf/unicode/Unicode.h>
       
    35 
       
    36 namespace JSC {
       
    37 
       
    38     class RegExp;
       
    39 
       
    40     class Lexer : public Noncopyable {
       
    41     public:
       
    42         // Character manipulation functions.
       
    43         static bool isWhiteSpace(int character);
       
    44         static bool isLineTerminator(int character);
       
    45         static unsigned char convertHex(int c1, int c2);
       
    46         static UChar convertUnicode(int c1, int c2, int c3, int c4);
       
    47 
       
    48         // Functions to set up parsing.
       
    49         void setCode(const SourceCode&, ParserArena&);
       
    50         void setIsReparsing() { m_isReparsing = true; }
       
    51 
       
    52         // Functions for the parser itself.
       
    53         enum LexType { IdentifyReservedWords, IgnoreReservedWords };
       
    54         JSTokenType lex(JSTokenData* lvalp, JSTokenInfo* llocp, LexType);
       
    55         int lineNumber() const { return m_lineNumber; }
       
    56         void setLastLineNumber(int lastLineNumber) { m_lastLineNumber = lastLineNumber; }
       
    57         int lastLineNumber() const { return m_lastLineNumber; }
       
    58         bool prevTerminator() const { return m_terminator; }
       
    59         SourceCode sourceCode(int openBrace, int closeBrace, int firstLine);
       
    60         bool scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix = 0);
       
    61         bool skipRegExp();
       
    62 
       
    63         // Functions for use after parsing.
       
    64         bool sawError() const { return m_error; }
       
    65         void clear();
       
    66         int currentOffset() { return m_code - m_codeStart; }
       
    67         void setOffset(int offset)
       
    68         {
       
    69             m_code = m_codeStart + offset;
       
    70             m_current = *m_code;
       
    71         }
       
    72 
       
    73     private:
       
    74         friend class JSGlobalData;
       
    75 
       
    76         Lexer(JSGlobalData*);
       
    77         ~Lexer();
       
    78 
       
    79         void record8(int);
       
    80         void record16(int);
       
    81         void record16(UChar);
       
    82 
       
    83         void copyCodeWithoutBOMs();
       
    84 
       
    85         ALWAYS_INLINE void shift();
       
    86         ALWAYS_INLINE int peek(int offset);
       
    87         int getUnicodeCharacter();
       
    88         void shiftLineTerminator();
       
    89 
       
    90         ALWAYS_INLINE const UChar* currentCharacter() const;
       
    91         ALWAYS_INLINE int currentOffset() const;
       
    92 
       
    93         ALWAYS_INLINE const Identifier* makeIdentifier(const UChar* characters, size_t length);
       
    94 
       
    95         ALWAYS_INLINE bool lastTokenWasRestrKeyword() const;
       
    96 
       
    97         ALWAYS_INLINE bool parseString(JSTokenData* lvalp);
       
    98 
       
    99         static const size_t initialReadBufferCapacity = 32;
       
   100 
       
   101         int m_lineNumber;
       
   102         int m_lastLineNumber;
       
   103 
       
   104         Vector<char> m_buffer8;
       
   105         Vector<UChar> m_buffer16;
       
   106         bool m_terminator;
       
   107         bool m_delimited; // encountered delimiter like "'" and "}" on last run
       
   108         int m_lastToken;
       
   109 
       
   110         const SourceCode* m_source;
       
   111         const UChar* m_code;
       
   112         const UChar* m_codeStart;
       
   113         const UChar* m_codeEnd;
       
   114         bool m_isReparsing;
       
   115         bool m_atLineStart;
       
   116         bool m_error;
       
   117 
       
   118         // current and following unicode characters (int to allow for -1 for end-of-file marker)
       
   119         int m_current;
       
   120 
       
   121         IdentifierArena* m_arena;
       
   122 
       
   123         JSGlobalData* m_globalData;
       
   124 
       
   125         const HashTable m_keywordTable;
       
   126     };
       
   127 
       
   128     inline bool Lexer::isWhiteSpace(int ch)
       
   129     {
       
   130         return isASCII(ch) ? (ch == ' ' || ch == '\t' || ch == 0xB || ch == 0xC) : (WTF::Unicode::isSeparatorSpace(ch) || ch == 0xFEFF);
       
   131     }
       
   132 
       
   133     inline bool Lexer::isLineTerminator(int ch)
       
   134     {
       
   135         return ch == '\r' || ch == '\n' || (ch & ~1) == 0x2028;
       
   136     }
       
   137 
       
   138     inline unsigned char Lexer::convertHex(int c1, int c2)
       
   139     {
       
   140         return (toASCIIHexValue(c1) << 4) | toASCIIHexValue(c2);
       
   141     }
       
   142 
       
   143     inline UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4)
       
   144     {
       
   145         return (convertHex(c1, c2) << 8) | convertHex(c3, c4);
       
   146     }
       
   147 
       
   148 } // namespace JSC
       
   149 
       
   150 #endif // Lexer_h