searchengine/cpix/tsrc/cpixunittest/src/analysis.cpp
changeset 0 671dee74050a
child 3 ae3f1779f6da
equal deleted inserted replaced
-1:000000000000 0:671dee74050a
       
     1 /*
       
     2 * Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
       
     3 * All rights reserved.
       
     4 * This component and the accompanying materials are made available
       
     5 * under the terms of "Eclipse Public License v1.0"
       
     6 * which accompanies this distribution, and is available
       
     7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     8 *
       
     9 * Initial Contributors:
       
    10 * Nokia Corporation - initial contribution.
       
    11 *
       
    12 * Contributors:
       
    13 *
       
    14 * Description: 
       
    15 *
       
    16 */
       
    17 
       
    18 #include <wchar.h>
       
    19 #include <stddef.h>
       
    20 
       
    21 #include <iostream>
       
    22 
       
    23 #include "cpixidxdb.h"
       
    24 
       
    25 #include "itk.h"
       
    26 
       
    27 #include "config.h"
       
    28 #include "testutils.h"
       
    29 #include "cpixanalyzer.h"
       
    30 
       
    31 #include "cpixdoc.h"
       
    32 
       
    33 const char * AnalysisTestDocsToIndex[5] = {
       
    34     FILE_TEST_CORPUS_PATH "\\en\\1.txt",
       
    35     FILE_TEST_CORPUS_PATH "\\en\\2.txt",
       
    36     FILE_TEST_CORPUS_PATH "\\en\\3.txt",
       
    37     FILE_TEST_CORPUS_PATH "\\en\\4.txt",
       
    38     NULL
       
    39 };
       
    40 
       
    41 const wchar_t * AnalyzerTestTermsToSearch[5] = {
       
    42 	L"happy",
       
    43 	L"happiness",
       
    44 	L"happening",
       
    45     NULL
       
    46 };
       
    47 
       
    48 
       
    49 void TestAnalyzerParsing(Itk::TestMgr * , const wchar_t* definition) 
       
    50 {
       
    51 	cpix_Result result; 
       
    52 	
       
    53 	printf("Creating analyzer %S\n", definition);
       
    54 	
       
    55 	cpix_Analyzer* analyzer = cpix_Analyzer_create( &result, definition);
       
    56 
       
    57 	if ( cpix_Failed( &result) ) {
       
    58 		printf("Analyzer creation failed with %S\n", result.err_->msg_);
       
    59 		return; 
       
    60 	}
       
    61 	cpix_Analyzer_destroy( analyzer ); 
       
    62 }
       
    63 
       
    64 void TestAnalyzersParsing(Itk::TestMgr * testMgr) 
       
    65 {
       
    66 	TestAnalyzerParsing(testMgr, L"stdtokens>lowercase"); 
       
    67 	TestAnalyzerParsing(testMgr, L"whitespace>lowercase"); 
       
    68 	TestAnalyzerParsing(testMgr, L"letter>lowercase"); 
       
    69 	// special syntax
       
    70 	TestAnalyzerParsing(testMgr, L"stdtokens()>lowercase"); 
       
    71 	TestAnalyzerParsing(testMgr, L"stdtokens>lowercase()"); 
       
    72 	// parameteres
       
    73 	TestAnalyzerParsing(testMgr, L"stdtokens>lowercase>stem(en)"); 
       
    74 	TestAnalyzerParsing(testMgr, L"letter>lowercase>stop(en)"); 
       
    75 	TestAnalyzerParsing(testMgr, L"letter>lowercase>stop('a', 'an', 'the')");
       
    76 
       
    77 	// bad syntaxes
       
    78 	TestAnalyzerParsing(testMgr, L"letter><lowercase" ); 
       
    79 	TestAnalyzerParsing(testMgr, L"38j_d fad23 4?q ca'wRA" ); 
       
    80 	// parsing failures
       
    81 	TestAnalyzerParsing(testMgr, L"letter>>lowercase" ); 
       
    82 	TestAnalyzerParsing(testMgr, L">letter>>lowercase lowercase" ); 
       
    83 	TestAnalyzerParsing(testMgr, L"letter lowercase" ); 
       
    84 }
       
    85 
       
    86 void TestSwitchParsing(Itk::TestMgr * testMgr) 
       
    87 {
       
    88 	// Per field query syntax
       
    89 	TestAnalyzerParsing(testMgr, L"switch {"
       
    90 									 L"case '_docuid':          keyword; "
       
    91 									 L"case '_appclass':        whitespace>lowercase;"
       
    92 									 L"case 'title', 'message': standard>lowercase>stem(en)>stop(en);"
       
    93 									 L"default:                 standard;"
       
    94 								 L"}");
       
    95 	TestAnalyzerParsing(testMgr, L"switch{ case '_qnr': whitespace; default: standard; }>lowercase");
       
    96 	TestAnalyzerParsing(testMgr, L"switch{ default: 	standard; }");
       
    97 	TestAnalyzerParsing(testMgr, L"switch{ case '_qnr': switch{ case '_docuid': keyword; default: whitespace; }; default: standard; }");
       
    98 }
       
    99 
       
   100 void TestAnalyzerUsage(Itk::TestMgr * testMgr, const wchar_t* definition) 
       
   101 {
       
   102 	printf("Indexing and searching with %S\n", definition); 
       
   103 	
       
   104 	cpix_Result
       
   105         result;
       
   106 
       
   107     cpix_IdxDb_dbgScrapAll(&result);
       
   108 
       
   109 	std::auto_ptr<FileIdxUtil> util( new FileIdxUtil ); 
       
   110 	
       
   111 	util->init(); 
       
   112 	
       
   113 	cpix_Analyzer* analyzer = cpix_Analyzer_create( &result, definition );
       
   114 			
       
   115 	if ( cpix_Failed( &result) ) {
       
   116 		printf("Analyzer creation failed with %S\n", result.err_->msg_); 
       
   117 		return; 
       
   118 	}
       
   119 	
       
   120 	//
       
   121 	// Add first few simple documents from english stem corpus
       
   122 	// English test corpus is used, because part of the analyzers contain
       
   123 	// english specific functinality, like stop words and stemming. 
       
   124 	
       
   125 	for (int i = 0; AnalysisTestDocsToIndex[i]; i++) 
       
   126 	{
       
   127 		util->indexFile( AnalysisTestDocsToIndex[i], analyzer, testMgr ); 
       
   128 	}
       
   129 	
       
   130 	//
       
   131 	// Then continue by adding an empty document. It is inserted as 
       
   132 	// a special case.
       
   133 	
       
   134 	cpix_Document* doc = cpix_Document_create( &result, L"empty", "root file text", L"", LTEXTFILE_MIMETYPE );
       
   135 	
       
   136 	cpix_Field field;
       
   137 	cpix_Field_initialize( &field, CONTENTS_FIELD, L"", cpix_STORE_YES | cpix_INDEX_TOKENIZED );
       
   138 	
       
   139 	cpix_Document_add( doc, &field ); 
       
   140 	
       
   141 	cpix_IdxDb_add( util->idxDb(), doc, analyzer );
       
   142 	
       
   143 	cpix_Document_destroy( doc );
       
   144 	
       
   145 	printf("\nIndexed empty item.\n"); 
       
   146 	
       
   147         util->flush();
       
   148 
       
   149 	// 
       
   150 	// Commit searches and print the results
       
   151 	
       
   152 	cpix_QueryParser
       
   153 		* queryParser = cpix_QueryParser_create(&result,
       
   154 												CONTENTS_FIELD,
       
   155 												analyzer );
       
   156 	if (queryParser == NULL)
       
   157 		{
       
   158 			cpix_Analyzer_destroy( analyzer );
       
   159 			ITK_PANIC("Could not create query parser");
       
   160 		}
       
   161 	
       
   162 	for (int i = 0; AnalyzerTestTermsToSearch[i]; i++) 
       
   163 	{
       
   164 		cpix_Query* query = cpix_QueryParser_parse(queryParser,
       
   165 												    AnalyzerTestTermsToSearch[i]);
       
   166 		if (cpix_Failed(queryParser)
       
   167 			|| query == NULL)
       
   168 			{
       
   169 				cpix_Analyzer_destroy(analyzer);
       
   170 				cpix_ClearError(queryParser);
       
   171 				cpix_QueryParser_destroy(queryParser);
       
   172 				ITK_PANIC("Could not parse query string");
       
   173 			}
       
   174 		cpix_Hits
       
   175 			* hits = cpix_IdxDb_search(util->idxDb(),
       
   176 									   query );
       
   177 
       
   178 		cpix_Query_destroy( query ); 
       
   179 
       
   180 		if (cpix_Failed(util->idxDb())) 
       
   181 			{
       
   182 			cpix_Analyzer_destroy(analyzer);
       
   183 			cpix_ClearError(queryParser);
       
   184 			cpix_QueryParser_destroy(queryParser);
       
   185 			ITK_PANIC("Searching index database failed.");
       
   186 			}
       
   187 		else 
       
   188 			{
       
   189 			util->printHits( hits, testMgr ); 
       
   190 			cpix_Hits_destroy( hits ); 
       
   191 			}
       
   192 	}
       
   193 	cpix_QueryParser_destroy(queryParser);
       
   194 	cpix_Analyzer_destroy( analyzer ); 
       
   195 }
       
   196 
       
   197 void TestAnalyzersUsage(Itk::TestMgr * testMgr) 
       
   198 	{
       
   199 	TestAnalyzerUsage(testMgr, L"whitespace" ); 
       
   200 	TestAnalyzerUsage(testMgr, L"letter>lowercase" ); 
       
   201 	TestAnalyzerUsage(testMgr, L"stdtokens>lowercase>stem(en)"); 
       
   202 	TestAnalyzerUsage(testMgr, L"letter>lowercase>stop(en)"); 
       
   203 	TestAnalyzerUsage(testMgr, L"letter>lowercase>stop('a', 'an', 'the')"); 
       
   204 	}
       
   205 
       
   206 
       
   207 Itk::TesterBase * CreateAnalysisWhiteBoxTests();
       
   208 
       
   209 
       
   210 Itk::TesterBase * CreateAnalysisTests()
       
   211 {
       
   212     using namespace Itk;
       
   213 
       
   214     SuiteTester
       
   215         * analysis = new SuiteTester("analysis");
       
   216 
       
   217 
       
   218     analysis->add(CreateAnalysisWhiteBoxTests());
       
   219 
       
   220     analysis->add("parsing",
       
   221                   &TestAnalyzersParsing,
       
   222                   "parsing");
       
   223 
       
   224     analysis->add("switchParsing",
       
   225                   &TestSwitchParsing,
       
   226                   "switchParsing");
       
   227 
       
   228     analysis->add("usage",
       
   229                   &TestAnalyzersUsage,
       
   230                   "usage");
       
   231 
       
   232     // TODO add more
       
   233 
       
   234     return analysis;
       
   235 }