|
1 /* |
|
2 * Copyright (C) 2009 Apple Inc. All rights reserved. |
|
3 * |
|
4 * Redistribution and use in source and binary forms, with or without |
|
5 * modification, are permitted provided that the following conditions |
|
6 * are met: |
|
7 * 1. Redistributions of source code must retain the above copyright |
|
8 * notice, this list of conditions and the following disclaimer. |
|
9 * 2. Redistributions in binary form must reproduce the above copyright |
|
10 * notice, this list of conditions and the following disclaimer in the |
|
11 * documentation and/or other materials provided with the distribution. |
|
12 * |
|
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
|
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
|
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
|
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
|
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
|
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
|
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
|
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
|
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
|
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
24 */ |
|
25 |
|
26 #include "config.h" |
|
27 #include "LiteralParser.h" |
|
28 |
|
29 #include "JSArray.h" |
|
30 #include "JSString.h" |
|
31 #include "Lexer.h" |
|
32 #include "StringBuilder.h" |
|
33 #include <wtf/ASCIICType.h> |
|
34 #include <wtf/dtoa.h> |
|
35 |
|
36 namespace JSC { |
|
37 |
|
38 LiteralParser::TokenType LiteralParser::Lexer::lex(LiteralParserToken& token) |
|
39 { |
|
40 while (m_ptr < m_end && isASCIISpace(*m_ptr)) |
|
41 ++m_ptr; |
|
42 |
|
43 ASSERT(m_ptr <= m_end); |
|
44 if (m_ptr >= m_end) { |
|
45 token.type = TokEnd; |
|
46 token.start = token.end = m_ptr; |
|
47 return TokEnd; |
|
48 } |
|
49 token.type = TokError; |
|
50 token.start = m_ptr; |
|
51 switch (*m_ptr) { |
|
52 case '[': |
|
53 token.type = TokLBracket; |
|
54 token.end = ++m_ptr; |
|
55 return TokLBracket; |
|
56 case ']': |
|
57 token.type = TokRBracket; |
|
58 token.end = ++m_ptr; |
|
59 return TokRBracket; |
|
60 case '(': |
|
61 token.type = TokLParen; |
|
62 token.end = ++m_ptr; |
|
63 return TokLBracket; |
|
64 case ')': |
|
65 token.type = TokRParen; |
|
66 token.end = ++m_ptr; |
|
67 return TokRBracket; |
|
68 case '{': |
|
69 token.type = TokLBrace; |
|
70 token.end = ++m_ptr; |
|
71 return TokLBrace; |
|
72 case '}': |
|
73 token.type = TokRBrace; |
|
74 token.end = ++m_ptr; |
|
75 return TokRBrace; |
|
76 case ',': |
|
77 token.type = TokComma; |
|
78 token.end = ++m_ptr; |
|
79 return TokComma; |
|
80 case ':': |
|
81 token.type = TokColon; |
|
82 token.end = ++m_ptr; |
|
83 return TokColon; |
|
84 case '"': |
|
85 if (m_mode == StrictJSON) |
|
86 return lexString<StrictJSON>(token); |
|
87 return lexString<NonStrictJSON>(token); |
|
88 case 't': |
|
89 if (m_end - m_ptr >= 4 && m_ptr[1] == 'r' && m_ptr[2] == 'u' && m_ptr[3] == 'e') { |
|
90 m_ptr += 4; |
|
91 token.type = TokTrue; |
|
92 token.end = m_ptr; |
|
93 return TokTrue; |
|
94 } |
|
95 break; |
|
96 case 'f': |
|
97 if (m_end - m_ptr >= 5 && m_ptr[1] == 'a' && m_ptr[2] == 'l' && m_ptr[3] == 's' && m_ptr[4] == 'e') { |
|
98 m_ptr += 5; |
|
99 token.type = TokFalse; |
|
100 token.end = m_ptr; |
|
101 return TokFalse; |
|
102 } |
|
103 break; |
|
104 case 'n': |
|
105 if (m_end - m_ptr >= 4 && m_ptr[1] == 'u' && m_ptr[2] == 'l' && m_ptr[3] == 'l') { |
|
106 m_ptr += 4; |
|
107 token.type = TokNull; |
|
108 token.end = m_ptr; |
|
109 return TokNull; |
|
110 } |
|
111 break; |
|
112 case '-': |
|
113 case '0': |
|
114 case '1': |
|
115 case '2': |
|
116 case '3': |
|
117 case '4': |
|
118 case '5': |
|
119 case '6': |
|
120 case '7': |
|
121 case '8': |
|
122 case '9': |
|
123 return lexNumber(token); |
|
124 } |
|
125 return TokError; |
|
126 } |
|
127 |
|
128 template <LiteralParser::ParserMode mode> static inline bool isSafeStringCharacter(UChar c) |
|
129 { |
|
130 return (c >= ' ' && (mode == LiteralParser::StrictJSON || c <= 0xff) && c != '\\' && c != '"') || c == '\t'; |
|
131 } |
|
132 |
|
133 // "inline" is required here to help WINSCW compiler resolve specialized argument in templated functions. |
|
134 template <LiteralParser::ParserMode mode> inline LiteralParser::TokenType LiteralParser::Lexer::lexString(LiteralParserToken& token) |
|
135 { |
|
136 ++m_ptr; |
|
137 const UChar* runStart; |
|
138 StringBuilder builder; |
|
139 do { |
|
140 runStart = m_ptr; |
|
141 while (m_ptr < m_end && isSafeStringCharacter<mode>(*m_ptr)) |
|
142 ++m_ptr; |
|
143 if (runStart < m_ptr) |
|
144 builder.append(runStart, m_ptr - runStart); |
|
145 if ((mode == StrictJSON) && m_ptr < m_end && *m_ptr == '\\') { |
|
146 ++m_ptr; |
|
147 if (m_ptr >= m_end) |
|
148 return TokError; |
|
149 switch (*m_ptr) { |
|
150 case '"': |
|
151 builder.append('"'); |
|
152 m_ptr++; |
|
153 break; |
|
154 case '\\': |
|
155 builder.append('\\'); |
|
156 m_ptr++; |
|
157 break; |
|
158 case '/': |
|
159 builder.append('/'); |
|
160 m_ptr++; |
|
161 break; |
|
162 case 'b': |
|
163 builder.append('\b'); |
|
164 m_ptr++; |
|
165 break; |
|
166 case 'f': |
|
167 builder.append('\f'); |
|
168 m_ptr++; |
|
169 break; |
|
170 case 'n': |
|
171 builder.append('\n'); |
|
172 m_ptr++; |
|
173 break; |
|
174 case 'r': |
|
175 builder.append('\r'); |
|
176 m_ptr++; |
|
177 break; |
|
178 case 't': |
|
179 builder.append('\t'); |
|
180 m_ptr++; |
|
181 break; |
|
182 |
|
183 case 'u': |
|
184 if ((m_end - m_ptr) < 5) // uNNNN == 5 characters |
|
185 return TokError; |
|
186 for (int i = 1; i < 5; i++) { |
|
187 if (!isASCIIHexDigit(m_ptr[i])) |
|
188 return TokError; |
|
189 } |
|
190 builder.append(JSC::Lexer::convertUnicode(m_ptr[1], m_ptr[2], m_ptr[3], m_ptr[4])); |
|
191 m_ptr += 5; |
|
192 break; |
|
193 |
|
194 default: |
|
195 return TokError; |
|
196 } |
|
197 } |
|
198 } while ((mode == StrictJSON) && m_ptr != runStart && (m_ptr < m_end) && *m_ptr != '"'); |
|
199 |
|
200 if (m_ptr >= m_end || *m_ptr != '"') |
|
201 return TokError; |
|
202 |
|
203 token.stringToken = builder.build(); |
|
204 token.type = TokString; |
|
205 token.end = ++m_ptr; |
|
206 return TokString; |
|
207 } |
|
208 |
|
209 LiteralParser::TokenType LiteralParser::Lexer::lexNumber(LiteralParserToken& token) |
|
210 { |
|
211 // ES5 and json.org define numbers as |
|
212 // number |
|
213 // int |
|
214 // int frac? exp? |
|
215 // |
|
216 // int |
|
217 // -? 0 |
|
218 // -? digit1-9 digits? |
|
219 // |
|
220 // digits |
|
221 // digit digits? |
|
222 // |
|
223 // -?(0 | [1-9][0-9]*) ('.' [0-9]+)? ([eE][+-]? [0-9]+)? |
|
224 |
|
225 if (m_ptr < m_end && *m_ptr == '-') // -? |
|
226 ++m_ptr; |
|
227 |
|
228 // (0 | [1-9][0-9]*) |
|
229 if (m_ptr < m_end && *m_ptr == '0') // 0 |
|
230 ++m_ptr; |
|
231 else if (m_ptr < m_end && *m_ptr >= '1' && *m_ptr <= '9') { // [1-9] |
|
232 ++m_ptr; |
|
233 // [0-9]* |
|
234 while (m_ptr < m_end && isASCIIDigit(*m_ptr)) |
|
235 ++m_ptr; |
|
236 } else |
|
237 return TokError; |
|
238 |
|
239 // ('.' [0-9]+)? |
|
240 if (m_ptr < m_end && *m_ptr == '.') { |
|
241 ++m_ptr; |
|
242 // [0-9]+ |
|
243 if (m_ptr >= m_end || !isASCIIDigit(*m_ptr)) |
|
244 return TokError; |
|
245 |
|
246 ++m_ptr; |
|
247 while (m_ptr < m_end && isASCIIDigit(*m_ptr)) |
|
248 ++m_ptr; |
|
249 } |
|
250 |
|
251 // ([eE][+-]? [0-9]+)? |
|
252 if (m_ptr < m_end && (*m_ptr == 'e' || *m_ptr == 'E')) { // [eE] |
|
253 ++m_ptr; |
|
254 |
|
255 // [-+]? |
|
256 if (m_ptr < m_end && (*m_ptr == '-' || *m_ptr == '+')) |
|
257 ++m_ptr; |
|
258 |
|
259 // [0-9]+ |
|
260 if (m_ptr >= m_end || !isASCIIDigit(*m_ptr)) |
|
261 return TokError; |
|
262 |
|
263 ++m_ptr; |
|
264 while (m_ptr < m_end && isASCIIDigit(*m_ptr)) |
|
265 ++m_ptr; |
|
266 } |
|
267 |
|
268 token.type = TokNumber; |
|
269 token.end = m_ptr; |
|
270 Vector<char, 64> buffer(token.end - token.start + 1); |
|
271 int i; |
|
272 for (i = 0; i < token.end - token.start; i++) { |
|
273 ASSERT(static_cast<char>(token.start[i]) == token.start[i]); |
|
274 buffer[i] = static_cast<char>(token.start[i]); |
|
275 } |
|
276 buffer[i] = 0; |
|
277 char* end; |
|
278 token.numberToken = WTF::strtod(buffer.data(), &end); |
|
279 ASSERT(buffer.data() + (token.end - token.start) == end); |
|
280 return TokNumber; |
|
281 } |
|
282 |
|
283 JSValue LiteralParser::parse(ParserState initialState) |
|
284 { |
|
285 ParserState state = initialState; |
|
286 MarkedArgumentBuffer objectStack; |
|
287 JSValue lastValue; |
|
288 Vector<ParserState, 16> stateStack; |
|
289 Vector<Identifier, 16> identifierStack; |
|
290 while (1) { |
|
291 switch(state) { |
|
292 startParseArray: |
|
293 case StartParseArray: { |
|
294 JSArray* array = constructEmptyArray(m_exec); |
|
295 objectStack.append(array); |
|
296 // fallthrough |
|
297 } |
|
298 doParseArrayStartExpression: |
|
299 case DoParseArrayStartExpression: { |
|
300 TokenType lastToken = m_lexer.currentToken().type; |
|
301 if (m_lexer.next() == TokRBracket) { |
|
302 if (lastToken == TokComma) |
|
303 return JSValue(); |
|
304 m_lexer.next(); |
|
305 lastValue = objectStack.last(); |
|
306 objectStack.removeLast(); |
|
307 break; |
|
308 } |
|
309 |
|
310 stateStack.append(DoParseArrayEndExpression); |
|
311 goto startParseExpression; |
|
312 } |
|
313 case DoParseArrayEndExpression: { |
|
314 asArray(objectStack.last())->push(m_exec, lastValue); |
|
315 |
|
316 if (m_lexer.currentToken().type == TokComma) |
|
317 goto doParseArrayStartExpression; |
|
318 |
|
319 if (m_lexer.currentToken().type != TokRBracket) |
|
320 return JSValue(); |
|
321 |
|
322 m_lexer.next(); |
|
323 lastValue = objectStack.last(); |
|
324 objectStack.removeLast(); |
|
325 break; |
|
326 } |
|
327 startParseObject: |
|
328 case StartParseObject: { |
|
329 JSObject* object = constructEmptyObject(m_exec); |
|
330 objectStack.append(object); |
|
331 |
|
332 TokenType type = m_lexer.next(); |
|
333 if (type == TokString) { |
|
334 Lexer::LiteralParserToken identifierToken = m_lexer.currentToken(); |
|
335 |
|
336 // Check for colon |
|
337 if (m_lexer.next() != TokColon) |
|
338 return JSValue(); |
|
339 |
|
340 m_lexer.next(); |
|
341 identifierStack.append(Identifier(m_exec, identifierToken.stringToken)); |
|
342 stateStack.append(DoParseObjectEndExpression); |
|
343 goto startParseExpression; |
|
344 } else if (type != TokRBrace) |
|
345 return JSValue(); |
|
346 m_lexer.next(); |
|
347 lastValue = objectStack.last(); |
|
348 objectStack.removeLast(); |
|
349 break; |
|
350 } |
|
351 doParseObjectStartExpression: |
|
352 case DoParseObjectStartExpression: { |
|
353 TokenType type = m_lexer.next(); |
|
354 if (type != TokString) |
|
355 return JSValue(); |
|
356 Lexer::LiteralParserToken identifierToken = m_lexer.currentToken(); |
|
357 |
|
358 // Check for colon |
|
359 if (m_lexer.next() != TokColon) |
|
360 return JSValue(); |
|
361 |
|
362 m_lexer.next(); |
|
363 identifierStack.append(Identifier(m_exec, identifierToken.stringToken)); |
|
364 stateStack.append(DoParseObjectEndExpression); |
|
365 goto startParseExpression; |
|
366 } |
|
367 case DoParseObjectEndExpression: |
|
368 { |
|
369 asObject(objectStack.last())->putDirect(identifierStack.last(), lastValue); |
|
370 identifierStack.removeLast(); |
|
371 if (m_lexer.currentToken().type == TokComma) |
|
372 goto doParseObjectStartExpression; |
|
373 if (m_lexer.currentToken().type != TokRBrace) |
|
374 return JSValue(); |
|
375 m_lexer.next(); |
|
376 lastValue = objectStack.last(); |
|
377 objectStack.removeLast(); |
|
378 break; |
|
379 } |
|
380 startParseExpression: |
|
381 case StartParseExpression: { |
|
382 switch (m_lexer.currentToken().type) { |
|
383 case TokLBracket: |
|
384 goto startParseArray; |
|
385 case TokLBrace: |
|
386 goto startParseObject; |
|
387 case TokString: { |
|
388 Lexer::LiteralParserToken stringToken = m_lexer.currentToken(); |
|
389 m_lexer.next(); |
|
390 lastValue = jsString(m_exec, stringToken.stringToken); |
|
391 break; |
|
392 } |
|
393 case TokNumber: { |
|
394 Lexer::LiteralParserToken numberToken = m_lexer.currentToken(); |
|
395 m_lexer.next(); |
|
396 lastValue = jsNumber(m_exec, numberToken.numberToken); |
|
397 break; |
|
398 } |
|
399 case TokNull: |
|
400 m_lexer.next(); |
|
401 lastValue = jsNull(); |
|
402 break; |
|
403 |
|
404 case TokTrue: |
|
405 m_lexer.next(); |
|
406 lastValue = jsBoolean(true); |
|
407 break; |
|
408 |
|
409 case TokFalse: |
|
410 m_lexer.next(); |
|
411 lastValue = jsBoolean(false); |
|
412 break; |
|
413 |
|
414 default: |
|
415 // Error |
|
416 return JSValue(); |
|
417 } |
|
418 break; |
|
419 } |
|
420 case StartParseStatement: { |
|
421 switch (m_lexer.currentToken().type) { |
|
422 case TokLBracket: |
|
423 case TokNumber: |
|
424 case TokString: |
|
425 goto startParseExpression; |
|
426 |
|
427 case TokLParen: { |
|
428 m_lexer.next(); |
|
429 stateStack.append(StartParseStatementEndStatement); |
|
430 goto startParseExpression; |
|
431 } |
|
432 default: |
|
433 return JSValue(); |
|
434 } |
|
435 } |
|
436 case StartParseStatementEndStatement: { |
|
437 ASSERT(stateStack.isEmpty()); |
|
438 if (m_lexer.currentToken().type != TokRParen) |
|
439 return JSValue(); |
|
440 if (m_lexer.next() == TokEnd) |
|
441 return lastValue; |
|
442 return JSValue(); |
|
443 } |
|
444 default: |
|
445 ASSERT_NOT_REACHED(); |
|
446 } |
|
447 if (stateStack.isEmpty()) |
|
448 return lastValue; |
|
449 state = stateStack.last(); |
|
450 stateStack.removeLast(); |
|
451 continue; |
|
452 } |
|
453 } |
|
454 |
|
455 } |