|
1 /* |
|
2 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org) |
|
3 * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. |
|
4 * Copyright (C) 2010 Zoltan Herczeg (zherczeg@inf.u-szeged.hu) |
|
5 * |
|
6 * This library is free software; you can redistribute it and/or |
|
7 * modify it under the terms of the GNU Library General Public |
|
8 * License as published by the Free Software Foundation; either |
|
9 * version 2 of the License, or (at your option) any later version. |
|
10 * |
|
11 * This library is distributed in the hope that it will be useful, |
|
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
14 * Library General Public License for more details. |
|
15 * |
|
16 * You should have received a copy of the GNU Library General Public License |
|
17 * along with this library; see the file COPYING.LIB. If not, write to |
|
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
|
19 * Boston, MA 02110-1301, USA. |
|
20 * |
|
21 */ |
|
22 |
|
23 #ifndef Lexer_h |
|
24 #define Lexer_h |
|
25 |
|
26 #include "JSParser.h" |
|
27 #include "Lookup.h" |
|
28 #include "ParserArena.h" |
|
29 #include "SourceCode.h" |
|
30 #include <wtf/ASCIICType.h> |
|
31 #include <wtf/AlwaysInline.h> |
|
32 #include <wtf/SegmentedVector.h> |
|
33 #include <wtf/Vector.h> |
|
34 #include <wtf/unicode/Unicode.h> |
|
35 |
|
36 namespace JSC { |
|
37 |
|
38 class RegExp; |
|
39 |
|
40 class Lexer : public Noncopyable { |
|
41 public: |
|
42 // Character manipulation functions. |
|
43 static bool isWhiteSpace(int character); |
|
44 static bool isLineTerminator(int character); |
|
45 static unsigned char convertHex(int c1, int c2); |
|
46 static UChar convertUnicode(int c1, int c2, int c3, int c4); |
|
47 |
|
48 // Functions to set up parsing. |
|
49 void setCode(const SourceCode&, ParserArena&); |
|
50 void setIsReparsing() { m_isReparsing = true; } |
|
51 |
|
52 // Functions for the parser itself. |
|
53 enum LexType { IdentifyReservedWords, IgnoreReservedWords }; |
|
54 JSTokenType lex(JSTokenData* lvalp, JSTokenInfo* llocp, LexType); |
|
55 int lineNumber() const { return m_lineNumber; } |
|
56 void setLastLineNumber(int lastLineNumber) { m_lastLineNumber = lastLineNumber; } |
|
57 int lastLineNumber() const { return m_lastLineNumber; } |
|
58 bool prevTerminator() const { return m_terminator; } |
|
59 SourceCode sourceCode(int openBrace, int closeBrace, int firstLine); |
|
60 bool scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix = 0); |
|
61 bool skipRegExp(); |
|
62 |
|
63 // Functions for use after parsing. |
|
64 bool sawError() const { return m_error; } |
|
65 void clear(); |
|
66 int currentOffset() { return m_code - m_codeStart; } |
|
67 void setOffset(int offset) |
|
68 { |
|
69 m_code = m_codeStart + offset; |
|
70 m_current = *m_code; |
|
71 } |
|
72 |
|
73 private: |
|
74 friend class JSGlobalData; |
|
75 |
|
76 Lexer(JSGlobalData*); |
|
77 ~Lexer(); |
|
78 |
|
79 void record8(int); |
|
80 void record16(int); |
|
81 void record16(UChar); |
|
82 |
|
83 void copyCodeWithoutBOMs(); |
|
84 |
|
85 ALWAYS_INLINE void shift(); |
|
86 ALWAYS_INLINE int peek(int offset); |
|
87 int getUnicodeCharacter(); |
|
88 void shiftLineTerminator(); |
|
89 |
|
90 ALWAYS_INLINE const UChar* currentCharacter() const; |
|
91 ALWAYS_INLINE int currentOffset() const; |
|
92 |
|
93 ALWAYS_INLINE const Identifier* makeIdentifier(const UChar* characters, size_t length); |
|
94 |
|
95 ALWAYS_INLINE bool lastTokenWasRestrKeyword() const; |
|
96 |
|
97 ALWAYS_INLINE bool parseString(JSTokenData* lvalp); |
|
98 |
|
99 static const size_t initialReadBufferCapacity = 32; |
|
100 |
|
101 int m_lineNumber; |
|
102 int m_lastLineNumber; |
|
103 |
|
104 Vector<char> m_buffer8; |
|
105 Vector<UChar> m_buffer16; |
|
106 bool m_terminator; |
|
107 bool m_delimited; // encountered delimiter like "'" and "}" on last run |
|
108 int m_lastToken; |
|
109 |
|
110 const SourceCode* m_source; |
|
111 const UChar* m_code; |
|
112 const UChar* m_codeStart; |
|
113 const UChar* m_codeEnd; |
|
114 bool m_isReparsing; |
|
115 bool m_atLineStart; |
|
116 bool m_error; |
|
117 |
|
118 // current and following unicode characters (int to allow for -1 for end-of-file marker) |
|
119 int m_current; |
|
120 |
|
121 IdentifierArena* m_arena; |
|
122 |
|
123 JSGlobalData* m_globalData; |
|
124 |
|
125 const HashTable m_keywordTable; |
|
126 }; |
|
127 |
|
128 inline bool Lexer::isWhiteSpace(int ch) |
|
129 { |
|
130 return isASCII(ch) ? (ch == ' ' || ch == '\t' || ch == 0xB || ch == 0xC) : (WTF::Unicode::isSeparatorSpace(ch) || ch == 0xFEFF); |
|
131 } |
|
132 |
|
133 inline bool Lexer::isLineTerminator(int ch) |
|
134 { |
|
135 return ch == '\r' || ch == '\n' || (ch & ~1) == 0x2028; |
|
136 } |
|
137 |
|
138 inline unsigned char Lexer::convertHex(int c1, int c2) |
|
139 { |
|
140 return (toASCIIHexValue(c1) << 4) | toASCIIHexValue(c2); |
|
141 } |
|
142 |
|
143 inline UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4) |
|
144 { |
|
145 return (convertHex(c1, c2) << 8) | convertHex(c3, c4); |
|
146 } |
|
147 |
|
148 } // namespace JSC |
|
149 |
|
150 #endif // Lexer_h |