searchengine/util/tsrc/cpixtoolsunittest/src/parseunittest.cpp
changeset 0 671dee74050a
child 8 6547bf8ca13a
equal deleted inserted replaced
-1:000000000000 0:671dee74050a
       
     1 /*
       
     2 * Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
       
     3 * All rights reserved.
       
     4 * This component and the accompanying materials are made available
       
     5 * under the terms of "Eclipse Public License v1.0"
       
     6 * which accompanies this distribution, and is available
       
     7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     8 *
       
     9 * Initial Contributors:
       
    10 * Nokia Corporation - initial contribution.
       
    11 *
       
    12 * Contributors:
       
    13 *
       
    14 * Description: 
       
    15 *
       
    16 */
       
    17 #include "cpixparsetools.h"
       
    18 #include "itk.h"
       
    19 
       
    20 #include <iostream>
       
    21 #include <memory>
       
    22 
       
    23 using namespace Cpt::Lex; 
       
    24 using namespace Cpt::Parser; 
       
    25 using namespace std; 
       
    26 
       
    27 enum TokenType {
       
    28 	TOKEN_LEFT_BRACKET = Cpt::Lex::TOKEN_LAST_RESERVED,  // 8
       
    29 	TOKEN_RIGHT_BRACKET, 
       
    30 	TOKEN_COMMA, // 10
       
    31 	TOKEN_PIPE,
       
    32 	TOKEN_SWITCH,
       
    33 	TOKEN_CASE,
       
    34 	TOKEN_DEFAULT,
       
    35 	TOKEN_LEFT_BRACE, // 15
       
    36 	TOKEN_RIGHT_BRACE,
       
    37 	TOKEN_COLON,
       
    38 	TOKEN_TERMINATOR
       
    39 };
       
    40 
       
    41 void PrintToken(Cpt::Lex::Token token) {
       
    42 	switch (token.type()) {
       
    43 		case TOKEN_WS: wcout<<L"space"; break; 
       
    44 		case TOKEN_ID: wcout<<"id"; break;
       
    45 		case TOKEN_LIT: wcout<<"lit"; break;
       
    46 		case TOKEN_STRLIT: wcout<<"str-lit"; break;
       
    47 		case TOKEN_REALLIT: wcout<<"real-lit"; break;
       
    48 		case TOKEN_INTLIT: wcout<<"int-lit"; break;
       
    49 		case TOKEN_LEFT_BRACKET: wcout<<"lbr"; break;
       
    50 		case TOKEN_RIGHT_BRACKET: wcout<<"rbr"; break;
       
    51 		case TOKEN_COMMA: wcout<<"comma"; break;
       
    52 		case TOKEN_PIPE: wcout<<"pipe"; break;
       
    53 		case TOKEN_SWITCH : wcout<<"sw"; break;
       
    54 		case TOKEN_CASE : wcout<<"case"; break;
       
    55 		case TOKEN_DEFAULT : wcout<<"default"; break;
       
    56 		case TOKEN_LEFT_BRACE : wcout<<"lbc"; break;
       
    57 		case TOKEN_RIGHT_BRACE : wcout<<"rbc"; break;
       
    58 		case TOKEN_COLON : wcout<<"cl"; break;
       
    59 		case TOKEN_TERMINATOR : wcout<<"tr"; break;
       
    60 
       
    61 		default: wcout<<"unknown"; break;
       
    62 	}
       
    63 	wcout<<L"('"<<token.text()<<L"')";  
       
    64 }
       
    65 
       
    66 void TestTokenization(Itk::TestMgr  * ,
       
    67                       const wchar_t * inputStr)
       
    68 {
       
    69 	WhitespaceTokenizer ws; 
       
    70 	IdTokenizer ids; 
       
    71         IntLitTokenizer ints;
       
    72         RealLitTokenizer reals;
       
    73 	LitTokenizer lits('\''); 
       
    74 	SymbolTokenizer lb(TOKEN_LEFT_BRACKET, L"("); 
       
    75 	SymbolTokenizer rb(TOKEN_RIGHT_BRACKET, L")"); 
       
    76 	SymbolTokenizer cm(TOKEN_COMMA, L","); 
       
    77 	SymbolTokenizer pp(TOKEN_PIPE, L">");
       
    78 
       
    79         // NOTE: ints and reals are before lits, so even if lits
       
    80         // itself can recognize strings, ints and reals, the ints and
       
    81         // reals are taking precedence - just for the test cases now
       
    82         // (to check if those types are recognized correctly). So
       
    83         // basically, in test cases, lit will mean string literals,
       
    84         // and int-lit, real-lit will mean integer and real literals,
       
    85         // respectively.
       
    86 	Tokenizer* tokenizers[] = {
       
    87 		&ws, &lb, &rb, &cm, &pp, &ids, &ints, &reals, &lits, 0
       
    88 	};
       
    89 	MultiTokenizer tokenizer(tokenizers);
       
    90 	
       
    91 	Tokens 
       
    92             source(tokenizer, 
       
    93                    inputStr);
       
    94 	WhiteSpaceFilter tokens(source); 
       
    95 	
       
    96 	while (tokens) PrintToken(tokens++); 
       
    97 	cout<<endl;
       
    98 }
       
    99 
       
   100 
       
   101 void TestTokenization1(Itk::TestMgr * testMgr)
       
   102 {
       
   103     TestTokenization(testMgr,
       
   104                      L"stdtokens>lowercase>stopwords('a', 'an','the')>stem('en')");
       
   105 }
       
   106 
       
   107 void TestTokenization2(Itk::TestMgr * testMgr)
       
   108 {
       
   109     TestTokenization(testMgr,
       
   110                      L"'foo' 0 1 -2 'bar' +234 -34");
       
   111 }
       
   112 
       
   113 
       
   114 void TestTokenization3(Itk::TestMgr * testMgr)
       
   115 {
       
   116     TestTokenization(testMgr,
       
   117                      L"'hallo' 0.0 .0 .5 -1.0 -.05 45 'bar' +.123 +3.1415");
       
   118 }
       
   119 
       
   120 
       
   121 void TestTokenization4(Itk::TestMgr * testMgr)
       
   122 {
       
   123     TestTokenization(testMgr,
       
   124                      L"'\\' ''\\\\' '\\a' '\\\n'");
       
   125 }
       
   126 
       
   127 
       
   128 void TestTokenization5(Itk::TestMgr * )
       
   129 {
       
   130     WhitespaceTokenizer 
       
   131         ws; 
       
   132     IdTokenizer 
       
   133         ids; 
       
   134     SymbolTokenizer 
       
   135         for_(0xf00, L"for"); 
       
   136     SymbolTokenizer 
       
   137         if_(0xbeef, L"if"); 
       
   138     Tokenizer* tokenizers[] = {
       
   139         &ws, &for_, &if_, &ids, 0
       
   140     };
       
   141 
       
   142     MultiTokenizer 
       
   143         tokenizer(tokenizers);
       
   144 
       
   145     Tokens 
       
   146         source(tokenizer, 
       
   147                L"fo for fore forth ofor oforo i if ifdom ifer fif fifi forfi fifor"); // test escape in literals
       
   148     WhiteSpaceFilter 
       
   149         tokens(source); 
       
   150 
       
   151     while (tokens) PrintToken(tokens++); 
       
   152     cout<<endl;
       
   153 }
       
   154 
       
   155 void TestTokenizationErrors(Itk::TestMgr* ) 
       
   156 {
       
   157 	WhitespaceTokenizer ws; 
       
   158 	IdTokenizer ids; 
       
   159 	LitTokenizer lits('\''); 
       
   160 	SymbolTokenizer lb(TOKEN_LEFT_BRACKET, L"("); 
       
   161 	SymbolTokenizer rb(TOKEN_RIGHT_BRACKET, L")"); 
       
   162 	SymbolTokenizer cm(TOKEN_COMMA, L","); 
       
   163 	SymbolTokenizer pp(TOKEN_PIPE, L">");
       
   164 	Tokenizer* tokenizers[] = {
       
   165 		&ws, &lb, &rb, &cm, &pp, &ids, &lits, 0
       
   166 	};
       
   167 	MultiTokenizer tokenizer(tokenizers);
       
   168 	const wchar_t* text;
       
   169 	{
       
   170 		Tokens tokens(tokenizer, text = L"stdtokens>lowercase>stopwords('a', 'an','the)>stem('en')");
       
   171 		try {
       
   172 			while (tokens) PrintToken(tokens++); 
       
   173 		} catch (LexException& exc) {
       
   174                     /* OBS
       
   175 			wcout<<endl<<L"LexException: "<<exc.describe(text)<<endl; 
       
   176                     */
       
   177                     exc.setContext(text);
       
   178                     wcout<<endl<<L"LexException: "<<exc.wWhat()<<endl; 
       
   179 		} catch (exception& exc) {
       
   180 			cout<<endl<<"Exception: "<<exc.what()<<endl; 
       
   181 		}
       
   182 	}
       
   183 	{
       
   184 		Tokens tokens(tokenizer, text = L"fas-324we?`213ff3*21(+");
       
   185 		try {
       
   186 			while (tokens) PrintToken(tokens++); 
       
   187 		} catch (LexException& exc) {
       
   188                     /* OBS
       
   189 			wcout<<endl<<L"LexException: "<<exc.describe(text)<<endl; 
       
   190                     */
       
   191                     exc.setContext(text);
       
   192                     wcout<<endl<<L"LexException: "<<exc.wWhat()<<endl; 
       
   193 		} catch (exception& exc) {
       
   194 			cout<<endl<<"Exception: "<<exc.what()<<endl; 
       
   195 		}
       
   196 	}
       
   197 }
       
   198 
       
   199 Itk::TesterBase * CreateParsingTests()
       
   200 {
       
   201     using namespace Itk;
       
   202 
       
   203     SuiteTester
       
   204         * parsingTests = new SuiteTester("parsing");
       
   205    
       
   206 
       
   207     parsingTests->add("tokenization1",
       
   208                       TestTokenization1,
       
   209                       "tokenization1");
       
   210 
       
   211     parsingTests->add("tokenization2",
       
   212                       TestTokenization2,
       
   213                       "tokenization2");
       
   214 
       
   215     parsingTests->add("tokenization3",
       
   216                       TestTokenization3,
       
   217                       "tokenization3");
       
   218 
       
   219     parsingTests->add("tokenization4",
       
   220                       TestTokenization4,
       
   221                       "tokenization4");
       
   222 
       
   223     parsingTests->add("tokenization5",
       
   224                       TestTokenization5,
       
   225                       "tokenization5");
       
   226 
       
   227     parsingTests->add("syntaxerrors",
       
   228                       TestTokenizationErrors,
       
   229                       "syntaxerrors");
       
   230 	    
       
   231     return parsingTests;
       
   232 }
       
   233 
       
   234