|
1 /* |
|
2 * Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies). |
|
3 * All rights reserved. |
|
4 * This component and the accompanying materials are made available |
|
5 * under the terms of "Eclipse Public License v1.0" |
|
6 * which accompanies this distribution, and is available |
|
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
8 * |
|
9 * Initial Contributors: |
|
10 * Nokia Corporation - initial contribution. |
|
11 * |
|
12 * Contributors: |
|
13 * |
|
14 * Description: |
|
15 * |
|
16 */ |
|
17 #include "cpixparsetools.h" |
|
18 #include "itk.h" |
|
19 |
|
20 #include <iostream> |
|
21 #include <memory> |
|
22 |
|
23 using namespace Cpt::Lex; |
|
24 using namespace Cpt::Parser; |
|
25 using namespace std; |
|
26 |
|
27 enum TokenType { |
|
28 TOKEN_LEFT_BRACKET = Cpt::Lex::TOKEN_LAST_RESERVED, // 8 |
|
29 TOKEN_RIGHT_BRACKET, |
|
30 TOKEN_COMMA, // 10 |
|
31 TOKEN_PIPE, |
|
32 TOKEN_SWITCH, |
|
33 TOKEN_CASE, |
|
34 TOKEN_DEFAULT, |
|
35 TOKEN_LEFT_BRACE, // 15 |
|
36 TOKEN_RIGHT_BRACE, |
|
37 TOKEN_COLON, |
|
38 TOKEN_TERMINATOR |
|
39 }; |
|
40 |
|
41 void PrintToken(Cpt::Lex::Token token) { |
|
42 switch (token.type()) { |
|
43 case TOKEN_WS: wcout<<L"space"; break; |
|
44 case TOKEN_ID: wcout<<"id"; break; |
|
45 case TOKEN_LIT: wcout<<"lit"; break; |
|
46 case TOKEN_STRLIT: wcout<<"str-lit"; break; |
|
47 case TOKEN_REALLIT: wcout<<"real-lit"; break; |
|
48 case TOKEN_INTLIT: wcout<<"int-lit"; break; |
|
49 case TOKEN_LEFT_BRACKET: wcout<<"lbr"; break; |
|
50 case TOKEN_RIGHT_BRACKET: wcout<<"rbr"; break; |
|
51 case TOKEN_COMMA: wcout<<"comma"; break; |
|
52 case TOKEN_PIPE: wcout<<"pipe"; break; |
|
53 case TOKEN_SWITCH : wcout<<"sw"; break; |
|
54 case TOKEN_CASE : wcout<<"case"; break; |
|
55 case TOKEN_DEFAULT : wcout<<"default"; break; |
|
56 case TOKEN_LEFT_BRACE : wcout<<"lbc"; break; |
|
57 case TOKEN_RIGHT_BRACE : wcout<<"rbc"; break; |
|
58 case TOKEN_COLON : wcout<<"cl"; break; |
|
59 case TOKEN_TERMINATOR : wcout<<"tr"; break; |
|
60 |
|
61 default: wcout<<"unknown"; break; |
|
62 } |
|
63 wcout<<L"('"<<token.text()<<L"')"; |
|
64 } |
|
65 |
|
66 void TestTokenization(Itk::TestMgr * , |
|
67 const wchar_t * inputStr) |
|
68 { |
|
69 WhitespaceTokenizer ws; |
|
70 IdTokenizer ids; |
|
71 IntLitTokenizer ints; |
|
72 RealLitTokenizer reals; |
|
73 LitTokenizer lits('\''); |
|
74 SymbolTokenizer lb(TOKEN_LEFT_BRACKET, L"("); |
|
75 SymbolTokenizer rb(TOKEN_RIGHT_BRACKET, L")"); |
|
76 SymbolTokenizer cm(TOKEN_COMMA, L","); |
|
77 SymbolTokenizer pp(TOKEN_PIPE, L">"); |
|
78 |
|
79 // NOTE: ints and reals are before lits, so even if lits |
|
80 // itself can recognize strings, ints and reals, the ints and |
|
81 // reals are taking precedence - just for the test cases now |
|
82 // (to check if those types are recognized correctly). So |
|
83 // basically, in test cases, lit will mean string literals, |
|
84 // and int-lit, real-lit will mean integer and real literals, |
|
85 // respectively. |
|
86 Tokenizer* tokenizers[] = { |
|
87 &ws, &lb, &rb, &cm, &pp, &ids, &ints, &reals, &lits, 0 |
|
88 }; |
|
89 MultiTokenizer tokenizer(tokenizers); |
|
90 |
|
91 Tokens |
|
92 source(tokenizer, |
|
93 inputStr); |
|
94 WhiteSpaceFilter tokens(source); |
|
95 |
|
96 while (tokens) PrintToken(tokens++); |
|
97 cout<<endl; |
|
98 } |
|
99 |
|
100 |
|
101 void TestTokenization1(Itk::TestMgr * testMgr) |
|
102 { |
|
103 TestTokenization(testMgr, |
|
104 L"stdtokens>lowercase>stopwords('a', 'an','the')>stem('en')"); |
|
105 } |
|
106 |
|
107 void TestTokenization2(Itk::TestMgr * testMgr) |
|
108 { |
|
109 TestTokenization(testMgr, |
|
110 L"'foo' 0 1 -2 'bar' +234 -34"); |
|
111 } |
|
112 |
|
113 |
|
114 void TestTokenization3(Itk::TestMgr * testMgr) |
|
115 { |
|
116 TestTokenization(testMgr, |
|
117 L"'hallo' 0.0 .0 .5 -1.0 -.05 45 'bar' +.123 +3.1415"); |
|
118 } |
|
119 |
|
120 |
|
121 void TestTokenization4(Itk::TestMgr * testMgr) |
|
122 { |
|
123 TestTokenization(testMgr, |
|
124 L"'\\' ''\\\\' '\\a' '\\\n'"); |
|
125 } |
|
126 |
|
127 |
|
128 void TestTokenization5(Itk::TestMgr * ) |
|
129 { |
|
130 WhitespaceTokenizer |
|
131 ws; |
|
132 IdTokenizer |
|
133 ids; |
|
134 SymbolTokenizer |
|
135 for_(0xf00, L"for"); |
|
136 SymbolTokenizer |
|
137 if_(0xbeef, L"if"); |
|
138 Tokenizer* tokenizers[] = { |
|
139 &ws, &for_, &if_, &ids, 0 |
|
140 }; |
|
141 |
|
142 MultiTokenizer |
|
143 tokenizer(tokenizers); |
|
144 |
|
145 Tokens |
|
146 source(tokenizer, |
|
147 L"fo for fore forth ofor oforo i if ifdom ifer fif fifi forfi fifor"); // test escape in literals |
|
148 WhiteSpaceFilter |
|
149 tokens(source); |
|
150 |
|
151 while (tokens) PrintToken(tokens++); |
|
152 cout<<endl; |
|
153 } |
|
154 |
|
155 void TestTokenizationErrors(Itk::TestMgr* ) |
|
156 { |
|
157 WhitespaceTokenizer ws; |
|
158 IdTokenizer ids; |
|
159 LitTokenizer lits('\''); |
|
160 SymbolTokenizer lb(TOKEN_LEFT_BRACKET, L"("); |
|
161 SymbolTokenizer rb(TOKEN_RIGHT_BRACKET, L")"); |
|
162 SymbolTokenizer cm(TOKEN_COMMA, L","); |
|
163 SymbolTokenizer pp(TOKEN_PIPE, L">"); |
|
164 Tokenizer* tokenizers[] = { |
|
165 &ws, &lb, &rb, &cm, &pp, &ids, &lits, 0 |
|
166 }; |
|
167 MultiTokenizer tokenizer(tokenizers); |
|
168 const wchar_t* text; |
|
169 { |
|
170 Tokens tokens(tokenizer, text = L"stdtokens>lowercase>stopwords('a', 'an','the)>stem('en')"); |
|
171 try { |
|
172 while (tokens) PrintToken(tokens++); |
|
173 } catch (LexException& exc) { |
|
174 /* OBS |
|
175 wcout<<endl<<L"LexException: "<<exc.describe(text)<<endl; |
|
176 */ |
|
177 exc.setContext(text); |
|
178 wcout<<endl<<L"LexException: "<<exc.wWhat()<<endl; |
|
179 } catch (exception& exc) { |
|
180 cout<<endl<<"Exception: "<<exc.what()<<endl; |
|
181 } |
|
182 } |
|
183 { |
|
184 Tokens tokens(tokenizer, text = L"fas-324we?`213ff3*21(+"); |
|
185 try { |
|
186 while (tokens) PrintToken(tokens++); |
|
187 } catch (LexException& exc) { |
|
188 /* OBS |
|
189 wcout<<endl<<L"LexException: "<<exc.describe(text)<<endl; |
|
190 */ |
|
191 exc.setContext(text); |
|
192 wcout<<endl<<L"LexException: "<<exc.wWhat()<<endl; |
|
193 } catch (exception& exc) { |
|
194 cout<<endl<<"Exception: "<<exc.what()<<endl; |
|
195 } |
|
196 } |
|
197 } |
|
198 |
|
199 Itk::TesterBase * CreateParsingTests() |
|
200 { |
|
201 using namespace Itk; |
|
202 |
|
203 SuiteTester |
|
204 * parsingTests = new SuiteTester("parsing"); |
|
205 |
|
206 |
|
207 parsingTests->add("tokenization1", |
|
208 TestTokenization1, |
|
209 "tokenization1"); |
|
210 |
|
211 parsingTests->add("tokenization2", |
|
212 TestTokenization2, |
|
213 "tokenization2"); |
|
214 |
|
215 parsingTests->add("tokenization3", |
|
216 TestTokenization3, |
|
217 "tokenization3"); |
|
218 |
|
219 parsingTests->add("tokenization4", |
|
220 TestTokenization4, |
|
221 "tokenization4"); |
|
222 |
|
223 parsingTests->add("tokenization5", |
|
224 TestTokenization5, |
|
225 "tokenization5"); |
|
226 |
|
227 parsingTests->add("syntaxerrors", |
|
228 TestTokenizationErrors, |
|
229 "syntaxerrors"); |
|
230 |
|
231 return parsingTests; |
|
232 } |
|
233 |
|
234 |