|
1 /* |
|
2 * Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies). |
|
3 * All rights reserved. |
|
4 * This component and the accompanying materials are made available |
|
5 * under the terms of "Eclipse Public License v1.0" |
|
6 * which accompanies this distribution, and is available |
|
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
8 * |
|
9 * Initial Contributors: |
|
10 * Nokia Corporation - initial contribution. |
|
11 * |
|
12 * Contributors: |
|
13 * |
|
14 * Description: |
|
15 * |
|
16 */ |
|
17 |
|
18 #include <wchar.h> |
|
19 #include <stddef.h> |
|
20 |
|
21 #include <iostream> |
|
22 |
|
23 #include "cpixidxdb.h" |
|
24 |
|
25 #include "itk.h" |
|
26 |
|
27 #include "config.h" |
|
28 #include "testutils.h" |
|
29 #include "cpixanalyzer.h" |
|
30 |
|
31 #include "cpixdoc.h" |
|
32 |
|
33 const char * AnalysisTestDocsToIndex[5] = { |
|
34 FILE_TEST_CORPUS_PATH "\\en\\1.txt", |
|
35 FILE_TEST_CORPUS_PATH "\\en\\2.txt", |
|
36 FILE_TEST_CORPUS_PATH "\\en\\3.txt", |
|
37 FILE_TEST_CORPUS_PATH "\\en\\4.txt", |
|
38 NULL |
|
39 }; |
|
40 |
|
41 const wchar_t * AnalyzerTestTermsToSearch[5] = { |
|
42 L"happy", |
|
43 L"happiness", |
|
44 L"happening", |
|
45 NULL |
|
46 }; |
|
47 |
|
48 |
|
49 void TestAnalyzerParsing(Itk::TestMgr * , const wchar_t* definition) |
|
50 { |
|
51 cpix_Result result; |
|
52 |
|
53 printf("Creating analyzer %S\n", definition); |
|
54 |
|
55 cpix_Analyzer* analyzer = cpix_Analyzer_create( &result, definition); |
|
56 |
|
57 if ( cpix_Failed( &result) ) { |
|
58 printf("Analyzer creation failed with %S\n", result.err_->msg_); |
|
59 return; |
|
60 } |
|
61 cpix_Analyzer_destroy( analyzer ); |
|
62 } |
|
63 |
|
64 void TestAnalyzersParsing(Itk::TestMgr * testMgr) |
|
65 { |
|
66 TestAnalyzerParsing(testMgr, L"stdtokens>lowercase"); |
|
67 TestAnalyzerParsing(testMgr, L"whitespace>lowercase"); |
|
68 TestAnalyzerParsing(testMgr, L"letter>lowercase"); |
|
69 // special syntax |
|
70 TestAnalyzerParsing(testMgr, L"stdtokens()>lowercase"); |
|
71 TestAnalyzerParsing(testMgr, L"stdtokens>lowercase()"); |
|
72 // parameteres |
|
73 TestAnalyzerParsing(testMgr, L"stdtokens>lowercase>stem(en)"); |
|
74 TestAnalyzerParsing(testMgr, L"letter>lowercase>stop(en)"); |
|
75 TestAnalyzerParsing(testMgr, L"letter>lowercase>stop('a', 'an', 'the')"); |
|
76 |
|
77 // bad syntaxes |
|
78 TestAnalyzerParsing(testMgr, L"letter><lowercase" ); |
|
79 TestAnalyzerParsing(testMgr, L"38j_d fad23 4?q ca'wRA" ); |
|
80 // parsing failures |
|
81 TestAnalyzerParsing(testMgr, L"letter>>lowercase" ); |
|
82 TestAnalyzerParsing(testMgr, L">letter>>lowercase lowercase" ); |
|
83 TestAnalyzerParsing(testMgr, L"letter lowercase" ); |
|
84 } |
|
85 |
|
86 void TestSwitchParsing(Itk::TestMgr * testMgr) |
|
87 { |
|
88 // Per field query syntax |
|
89 TestAnalyzerParsing(testMgr, L"switch {" |
|
90 L"case '_docuid': keyword; " |
|
91 L"case '_appclass': whitespace>lowercase;" |
|
92 L"case 'title', 'message': standard>lowercase>stem(en)>stop(en);" |
|
93 L"default: standard;" |
|
94 L"}"); |
|
95 TestAnalyzerParsing(testMgr, L"switch{ case '_qnr': whitespace; default: standard; }>lowercase"); |
|
96 TestAnalyzerParsing(testMgr, L"switch{ default: standard; }"); |
|
97 TestAnalyzerParsing(testMgr, L"switch{ case '_qnr': switch{ case '_docuid': keyword; default: whitespace; }; default: standard; }"); |
|
98 } |
|
99 |
|
100 void TestAnalyzerUsage(Itk::TestMgr * testMgr, const wchar_t* definition) |
|
101 { |
|
102 printf("Indexing and searching with %S\n", definition); |
|
103 |
|
104 cpix_Result |
|
105 result; |
|
106 |
|
107 cpix_IdxDb_dbgScrapAll(&result); |
|
108 |
|
109 std::auto_ptr<FileIdxUtil> util( new FileIdxUtil ); |
|
110 |
|
111 util->init(); |
|
112 |
|
113 cpix_Analyzer* analyzer = cpix_Analyzer_create( &result, definition ); |
|
114 |
|
115 if ( cpix_Failed( &result) ) { |
|
116 printf("Analyzer creation failed with %S\n", result.err_->msg_); |
|
117 return; |
|
118 } |
|
119 |
|
120 // |
|
121 // Add first few simple documents from english stem corpus |
|
122 // English test corpus is used, because part of the analyzers contain |
|
123 // english specific functinality, like stop words and stemming. |
|
124 |
|
125 for (int i = 0; AnalysisTestDocsToIndex[i]; i++) |
|
126 { |
|
127 util->indexFile( AnalysisTestDocsToIndex[i], analyzer, testMgr ); |
|
128 } |
|
129 |
|
130 // |
|
131 // Then continue by adding an empty document. It is inserted as |
|
132 // a special case. |
|
133 |
|
134 cpix_Document* doc = cpix_Document_create( &result, L"empty", "root file text", L"", LTEXTFILE_MIMETYPE ); |
|
135 |
|
136 cpix_Field field; |
|
137 cpix_Field_initialize( &field, CONTENTS_FIELD, L"", cpix_STORE_YES | cpix_INDEX_TOKENIZED ); |
|
138 |
|
139 cpix_Document_add( doc, &field ); |
|
140 |
|
141 cpix_IdxDb_add( util->idxDb(), doc, analyzer ); |
|
142 |
|
143 cpix_Document_destroy( doc ); |
|
144 |
|
145 printf("\nIndexed empty item.\n"); |
|
146 |
|
147 util->flush(); |
|
148 |
|
149 // |
|
150 // Commit searches and print the results |
|
151 |
|
152 cpix_QueryParser |
|
153 * queryParser = cpix_QueryParser_create(&result, |
|
154 CONTENTS_FIELD, |
|
155 analyzer ); |
|
156 if (queryParser == NULL) |
|
157 { |
|
158 cpix_Analyzer_destroy( analyzer ); |
|
159 ITK_PANIC("Could not create query parser"); |
|
160 } |
|
161 |
|
162 for (int i = 0; AnalyzerTestTermsToSearch[i]; i++) |
|
163 { |
|
164 cpix_Query* query = cpix_QueryParser_parse(queryParser, |
|
165 AnalyzerTestTermsToSearch[i]); |
|
166 if (cpix_Failed(queryParser) |
|
167 || query == NULL) |
|
168 { |
|
169 cpix_Analyzer_destroy(analyzer); |
|
170 cpix_ClearError(queryParser); |
|
171 cpix_QueryParser_destroy(queryParser); |
|
172 ITK_PANIC("Could not parse query string"); |
|
173 } |
|
174 cpix_Hits |
|
175 * hits = cpix_IdxDb_search(util->idxDb(), |
|
176 query ); |
|
177 |
|
178 cpix_Query_destroy( query ); |
|
179 |
|
180 if (cpix_Failed(util->idxDb())) |
|
181 { |
|
182 cpix_Analyzer_destroy(analyzer); |
|
183 cpix_ClearError(queryParser); |
|
184 cpix_QueryParser_destroy(queryParser); |
|
185 ITK_PANIC("Searching index database failed."); |
|
186 } |
|
187 else |
|
188 { |
|
189 util->printHits( hits, testMgr ); |
|
190 cpix_Hits_destroy( hits ); |
|
191 } |
|
192 } |
|
193 cpix_QueryParser_destroy(queryParser); |
|
194 cpix_Analyzer_destroy( analyzer ); |
|
195 } |
|
196 |
|
197 void TestAnalyzersUsage(Itk::TestMgr * testMgr) |
|
198 { |
|
199 TestAnalyzerUsage(testMgr, L"whitespace" ); |
|
200 TestAnalyzerUsage(testMgr, L"letter>lowercase" ); |
|
201 TestAnalyzerUsage(testMgr, L"stdtokens>lowercase>stem(en)"); |
|
202 TestAnalyzerUsage(testMgr, L"letter>lowercase>stop(en)"); |
|
203 TestAnalyzerUsage(testMgr, L"letter>lowercase>stop('a', 'an', 'the')"); |
|
204 } |
|
205 |
|
206 |
|
207 Itk::TesterBase * CreateAnalysisWhiteBoxTests(); |
|
208 |
|
209 |
|
210 Itk::TesterBase * CreateAnalysisTests() |
|
211 { |
|
212 using namespace Itk; |
|
213 |
|
214 SuiteTester |
|
215 * analysis = new SuiteTester("analysis"); |
|
216 |
|
217 |
|
218 analysis->add(CreateAnalysisWhiteBoxTests()); |
|
219 |
|
220 analysis->add("parsing", |
|
221 &TestAnalyzersParsing, |
|
222 "parsing"); |
|
223 |
|
224 analysis->add("switchParsing", |
|
225 &TestSwitchParsing, |
|
226 "switchParsing"); |
|
227 |
|
228 analysis->add("usage", |
|
229 &TestAnalyzersUsage, |
|
230 "usage"); |
|
231 |
|
232 // TODO add more |
|
233 |
|
234 return analysis; |
|
235 } |