FCL/sf/mw/searchsrv: comparison searchengine/util/tsrc/cpixtoolsunittest/src/parseunittest.cpp

equal deleted inserted replaced

--1:000000000000
+:671dee74050a
+/*
+* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
+* All rights reserved.
+* This component and the accompanying materials are made available
+* under the terms of "Eclipse Public License v1.0"
+* which accompanies this distribution, and is available
+* at the URL "http://www.eclipse.org/legal/epl-v10.html".
+*
+* Initial Contributors:
+* Nokia Corporation - initial contribution.
+*
+* Contributors:
+*
+* Description:
+*
+*/
+#include "cpixparsetools.h"
+#include "itk.h"
+#include <iostream>
+#include <memory>
+using namespace Cpt::Lex;
+using namespace Cpt::Parser;
+using namespace std;
+enum TokenType {
+	TOKEN_LEFT_BRACKET = Cpt::Lex::TOKEN_LAST_RESERVED,  // 8
+	TOKEN_RIGHT_BRACKET,
+	TOKEN_COMMA, // 10
+	TOKEN_PIPE,
+	TOKEN_SWITCH,
+	TOKEN_CASE,
+	TOKEN_DEFAULT,
+	TOKEN_LEFT_BRACE, // 15
+	TOKEN_RIGHT_BRACE,
+	TOKEN_COLON,
+	TOKEN_TERMINATOR
+};
+void PrintToken(Cpt::Lex::Token token) {
+	switch (token.type()) {
+		case TOKEN_WS: wcout<<L"space"; break;
+		case TOKEN_ID: wcout<<"id"; break;
+		case TOKEN_LIT: wcout<<"lit"; break;
+		case TOKEN_STRLIT: wcout<<"str-lit"; break;
+		case TOKEN_REALLIT: wcout<<"real-lit"; break;
+		case TOKEN_INTLIT: wcout<<"int-lit"; break;
+		case TOKEN_LEFT_BRACKET: wcout<<"lbr"; break;
+		case TOKEN_RIGHT_BRACKET: wcout<<"rbr"; break;
+		case TOKEN_COMMA: wcout<<"comma"; break;
+		case TOKEN_PIPE: wcout<<"pipe"; break;
+		case TOKEN_SWITCH : wcout<<"sw"; break;
+		case TOKEN_CASE : wcout<<"case"; break;
+		case TOKEN_DEFAULT : wcout<<"default"; break;
+		case TOKEN_LEFT_BRACE : wcout<<"lbc"; break;
+		case TOKEN_RIGHT_BRACE : wcout<<"rbc"; break;
+		case TOKEN_COLON : wcout<<"cl"; break;
+		case TOKEN_TERMINATOR : wcout<<"tr"; break;
+		default: wcout<<"unknown"; break;
+	}
+	wcout<<L"('"<<token.text()<<L"')";
+}
+void TestTokenization(Itk::TestMgr  * ,
+const wchar_t * inputStr)
+{
+	WhitespaceTokenizer ws;
+	IdTokenizer ids;
+IntLitTokenizer ints;
+RealLitTokenizer reals;
+	LitTokenizer lits('\'');
+	SymbolTokenizer lb(TOKEN_LEFT_BRACKET, L"(");
+	SymbolTokenizer rb(TOKEN_RIGHT_BRACKET, L")");
+	SymbolTokenizer cm(TOKEN_COMMA, L",");
+	SymbolTokenizer pp(TOKEN_PIPE, L">");
+// NOTE: ints and reals are before lits, so even if lits
+// itself can recognize strings, ints and reals, the ints and
+// reals are taking precedence - just for the test cases now
+// (to check if those types are recognized correctly). So
+// basically, in test cases, lit will mean string literals,
+// and int-lit, real-lit will mean integer and real literals,
+// respectively.
+	Tokenizer* tokenizers[] = {
+		&ws, &lb, &rb, &cm, &pp, &ids, &ints, &reals, &lits, 0
+	};
+	MultiTokenizer tokenizer(tokenizers);
+	Tokens
+source(tokenizer,
+inputStr);
+	WhiteSpaceFilter tokens(source);
+	while (tokens) PrintToken(tokens++);
+	cout<<endl;
+}
+void TestTokenization1(Itk::TestMgr * testMgr)
+{
+TestTokenization(testMgr,
+L"stdtokens>lowercase>stopwords('a', 'an','the')>stem('en')");
+}
+void TestTokenization2(Itk::TestMgr * testMgr)
+{
+TestTokenization(testMgr,
+L"'foo' 0 1 -2 'bar' +234 -34");
+}
+void TestTokenization3(Itk::TestMgr * testMgr)
+{
+TestTokenization(testMgr,
+L"'hallo' 0.0 .0 .5 -1.0 -.05 45 'bar' +.123 +3.1415");
+}
+void TestTokenization4(Itk::TestMgr * testMgr)
+{
+TestTokenization(testMgr,
+L"'\\' ''\\\\' '\\a' '\\\n'");
+}
+void TestTokenization5(Itk::TestMgr * )
+{
+WhitespaceTokenizer
+ws;
+IdTokenizer
+ids;
+SymbolTokenizer
+for_(0xf00, L"for");
+SymbolTokenizer
+if_(0xbeef, L"if");
+Tokenizer* tokenizers[] = {
+&ws, &for_, &if_, &ids, 0
+};
+MultiTokenizer
+tokenizer(tokenizers);
+Tokens
+source(tokenizer,
+L"fo for fore forth ofor oforo i if ifdom ifer fif fifi forfi fifor"); // test escape in literals
+WhiteSpaceFilter
+tokens(source);
+while (tokens) PrintToken(tokens++);
+cout<<endl;
+}
+void TestTokenizationErrors(Itk::TestMgr* )
+{
+	WhitespaceTokenizer ws;
+	IdTokenizer ids;
+	LitTokenizer lits('\'');
+	SymbolTokenizer lb(TOKEN_LEFT_BRACKET, L"(");
+	SymbolTokenizer rb(TOKEN_RIGHT_BRACKET, L")");
+	SymbolTokenizer cm(TOKEN_COMMA, L",");
+	SymbolTokenizer pp(TOKEN_PIPE, L">");
+	Tokenizer* tokenizers[] = {
+		&ws, &lb, &rb, &cm, &pp, &ids, &lits, 0
+	};
+	MultiTokenizer tokenizer(tokenizers);
+	const wchar_t* text;
+	{
+		Tokens tokens(tokenizer, text = L"stdtokens>lowercase>stopwords('a', 'an','the)>stem('en')");
+		try {
+			while (tokens) PrintToken(tokens++);
+		} catch (LexException& exc) {
+/* OBS
+			wcout<<endl<<L"LexException: "<<exc.describe(text)<<endl;
+*/
+exc.setContext(text);
+wcout<<endl<<L"LexException: "<<exc.wWhat()<<endl;
+		} catch (exception& exc) {
+			cout<<endl<<"Exception: "<<exc.what()<<endl;
+		}
+	}
+	{
+		Tokens tokens(tokenizer, text = L"fas-324we?`213ff3*21(+");
+		try {
+			while (tokens) PrintToken(tokens++);
+		} catch (LexException& exc) {
+/* OBS
+			wcout<<endl<<L"LexException: "<<exc.describe(text)<<endl;
+*/
+exc.setContext(text);
+wcout<<endl<<L"LexException: "<<exc.wWhat()<<endl;
+		} catch (exception& exc) {
+			cout<<endl<<"Exception: "<<exc.what()<<endl;
+		}
+	}
+}
+Itk::TesterBase * CreateParsingTests()
+{
+using namespace Itk;
+SuiteTester
+* parsingTests = new SuiteTester("parsing");
+parsingTests->add("tokenization1",
+TestTokenization1,
+"tokenization1");
+parsingTests->add("tokenization2",
+TestTokenization2,
+"tokenization2");
+parsingTests->add("tokenization3",
+TestTokenization3,
+"tokenization3");
+parsingTests->add("tokenization4",
+TestTokenization4,
+"tokenization4");
+parsingTests->add("tokenization5",
+TestTokenization5,
+"tokenization5");
+parsingTests->add("syntaxerrors",
+TestTokenizationErrors,
+"syntaxerrors");
+return parsingTests;
+}

changeset 0	671dee74050a
child 8	6547bf8ca13a