author | Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com> |
Mon, 04 Oct 2010 01:32:07 +0300 | |
changeset 2 | 303757a437d3 |
parent 0 | 4f2f89ce4247 |
permissions | -rw-r--r-- |
0
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
1 |
/* |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
2 |
* Copyright (C) 2009 Apple Inc. All rights reserved. |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
3 |
* |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
4 |
* Redistribution and use in source and binary forms, with or without |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
5 |
* modification, are permitted provided that the following conditions |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
6 |
* are met: |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
7 |
* 1. Redistributions of source code must retain the above copyright |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
8 |
* notice, this list of conditions and the following disclaimer. |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
9 |
* 2. Redistributions in binary form must reproduce the above copyright |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
10 |
* notice, this list of conditions and the following disclaimer in the |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
11 |
* documentation and/or other materials provided with the distribution. |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
12 |
* |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
13 |
* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
14 |
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
15 |
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
16 |
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
17 |
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
18 |
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
19 |
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
20 |
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
21 |
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
22 |
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
23 |
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
24 |
*/ |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
25 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
26 |
#include "config.h" |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
27 |
#include "RegexCompiler.h" |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
28 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
29 |
#include "RegexInterpreter.h" |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
30 |
#include "RegexPattern.h" |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
31 |
#include <wtf/Vector.h> |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
32 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
33 |
#if ENABLE(YARR) |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
34 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
35 |
using namespace WTF; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
36 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
37 |
namespace JSC { namespace Yarr { |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
38 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
39 |
#include "RegExpJitTables.h" |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
40 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
41 |
class CharacterClassConstructor { |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
42 |
public: |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
43 |
CharacterClassConstructor(bool isCaseInsensitive = false) |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
44 |
: m_isCaseInsensitive(isCaseInsensitive) |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
45 |
{ |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
46 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
47 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
48 |
void reset() |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
49 |
{ |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
50 |
m_matches.clear(); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
51 |
m_ranges.clear(); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
52 |
m_matchesUnicode.clear(); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
53 |
m_rangesUnicode.clear(); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
54 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
55 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
56 |
void append(const CharacterClass* other) |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
57 |
{ |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
58 |
for (size_t i = 0; i < other->m_matches.size(); ++i) |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
59 |
addSorted(m_matches, other->m_matches[i]); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
60 |
for (size_t i = 0; i < other->m_ranges.size(); ++i) |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
61 |
addSortedRange(m_ranges, other->m_ranges[i].begin, other->m_ranges[i].end); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
62 |
for (size_t i = 0; i < other->m_matchesUnicode.size(); ++i) |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
63 |
addSorted(m_matchesUnicode, other->m_matchesUnicode[i]); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
64 |
for (size_t i = 0; i < other->m_rangesUnicode.size(); ++i) |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
65 |
addSortedRange(m_rangesUnicode, other->m_rangesUnicode[i].begin, other->m_rangesUnicode[i].end); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
66 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
67 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
68 |
void putChar(UChar ch) |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
69 |
{ |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
70 |
if (ch <= 0x7f) { |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
71 |
if (m_isCaseInsensitive && isASCIIAlpha(ch)) { |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
72 |
addSorted(m_matches, toASCIIUpper(ch)); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
73 |
addSorted(m_matches, toASCIILower(ch)); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
74 |
} else |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
75 |
addSorted(m_matches, ch); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
76 |
} else { |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
77 |
UChar upper, lower; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
78 |
if (m_isCaseInsensitive && ((upper = Unicode::toUpper(ch)) != (lower = Unicode::toLower(ch)))) { |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
79 |
addSorted(m_matchesUnicode, upper); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
80 |
addSorted(m_matchesUnicode, lower); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
81 |
} else |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
82 |
addSorted(m_matchesUnicode, ch); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
83 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
84 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
85 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
86 |
// returns true if this character has another case, and 'ch' is the upper case form. |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
87 |
static inline bool isUnicodeUpper(UChar ch) |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
88 |
{ |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
89 |
return ch != Unicode::toLower(ch); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
90 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
91 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
92 |
// returns true if this character has another case, and 'ch' is the lower case form. |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
93 |
static inline bool isUnicodeLower(UChar ch) |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
94 |
{ |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
95 |
return ch != Unicode::toUpper(ch); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
96 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
97 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
98 |
void putRange(UChar lo, UChar hi) |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
99 |
{ |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
100 |
if (lo <= 0x7f) { |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
101 |
char asciiLo = lo; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
102 |
char asciiHi = std::min(hi, (UChar)0x7f); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
103 |
addSortedRange(m_ranges, lo, asciiHi); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
104 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
105 |
if (m_isCaseInsensitive) { |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
106 |
if ((asciiLo <= 'Z') && (asciiHi >= 'A')) |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
107 |
addSortedRange(m_ranges, std::max(asciiLo, 'A')+('a'-'A'), std::min(asciiHi, 'Z')+('a'-'A')); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
108 |
if ((asciiLo <= 'z') && (asciiHi >= 'a')) |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
109 |
addSortedRange(m_ranges, std::max(asciiLo, 'a')+('A'-'a'), std::min(asciiHi, 'z')+('A'-'a')); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
110 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
111 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
112 |
if (hi >= 0x80) { |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
113 |
uint32_t unicodeCurr = std::max(lo, (UChar)0x80); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
114 |
addSortedRange(m_rangesUnicode, unicodeCurr, hi); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
115 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
116 |
if (m_isCaseInsensitive) { |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
117 |
while (unicodeCurr <= hi) { |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
118 |
// If the upper bound of the range (hi) is 0xffff, the increments to |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
119 |
// unicodeCurr in this loop may take it to 0x10000. This is fine |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
120 |
// (if so we won't re-enter the loop, since the loop condition above |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
121 |
// will definitely fail) - but this does mean we cannot use a UChar |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
122 |
// to represent unicodeCurr, we must use a 32-bit value instead. |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
123 |
ASSERT(unicodeCurr <= 0xffff); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
124 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
125 |
if (isUnicodeUpper(unicodeCurr)) { |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
126 |
UChar lowerCaseRangeBegin = Unicode::toLower(unicodeCurr); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
127 |
UChar lowerCaseRangeEnd = lowerCaseRangeBegin; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
128 |
while ((++unicodeCurr <= hi) && isUnicodeUpper(unicodeCurr) && (Unicode::toLower(unicodeCurr) == (lowerCaseRangeEnd + 1))) |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
129 |
lowerCaseRangeEnd++; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
130 |
addSortedRange(m_rangesUnicode, lowerCaseRangeBegin, lowerCaseRangeEnd); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
131 |
} else if (isUnicodeLower(unicodeCurr)) { |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
132 |
UChar upperCaseRangeBegin = Unicode::toUpper(unicodeCurr); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
133 |
UChar upperCaseRangeEnd = upperCaseRangeBegin; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
134 |
while ((++unicodeCurr <= hi) && isUnicodeLower(unicodeCurr) && (Unicode::toUpper(unicodeCurr) == (upperCaseRangeEnd + 1))) |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
135 |
upperCaseRangeEnd++; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
136 |
addSortedRange(m_rangesUnicode, upperCaseRangeBegin, upperCaseRangeEnd); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
137 |
} else |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
138 |
++unicodeCurr; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
139 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
140 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
141 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
142 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
143 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
144 |
CharacterClass* charClass() |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
145 |
{ |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
146 |
CharacterClass* characterClass = new CharacterClass(0); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
147 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
148 |
characterClass->m_matches.append(m_matches); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
149 |
characterClass->m_ranges.append(m_ranges); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
150 |
characterClass->m_matchesUnicode.append(m_matchesUnicode); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
151 |
characterClass->m_rangesUnicode.append(m_rangesUnicode); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
152 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
153 |
reset(); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
154 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
155 |
return characterClass; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
156 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
157 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
158 |
private: |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
159 |
void addSorted(Vector<UChar>& matches, UChar ch) |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
160 |
{ |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
161 |
unsigned pos = 0; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
162 |
unsigned range = matches.size(); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
163 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
164 |
// binary chop, find position to insert char. |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
165 |
while (range) { |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
166 |
unsigned index = range >> 1; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
167 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
168 |
int val = matches[pos+index] - ch; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
169 |
if (!val) |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
170 |
return; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
171 |
else if (val > 0) |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
172 |
range = index; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
173 |
else { |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
174 |
pos += (index+1); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
175 |
range -= (index+1); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
176 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
177 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
178 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
179 |
if (pos == matches.size()) |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
180 |
matches.append(ch); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
181 |
else |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
182 |
matches.insert(pos, ch); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
183 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
184 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
185 |
void addSortedRange(Vector<CharacterRange>& ranges, UChar lo, UChar hi) |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
186 |
{ |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
187 |
unsigned end = ranges.size(); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
188 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
189 |
// Simple linear scan - I doubt there are that many ranges anyway... |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
190 |
// feel free to fix this with something faster (eg binary chop). |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
191 |
for (unsigned i = 0; i < end; ++i) { |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
192 |
// does the new range fall before the current position in the array |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
193 |
if (hi < ranges[i].begin) { |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
194 |
// optional optimization: concatenate appending ranges? - may not be worthwhile. |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
195 |
if (hi == (ranges[i].begin - 1)) { |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
196 |
ranges[i].begin = lo; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
197 |
return; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
198 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
199 |
ranges.insert(i, CharacterRange(lo, hi)); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
200 |
return; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
201 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
202 |
// Okay, since we didn't hit the last case, the end of the new range is definitely at or after the begining |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
203 |
// If the new range start at or before the end of the last range, then the overlap (if it starts one after the |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
204 |
// end of the last range they concatenate, which is just as good. |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
205 |
if (lo <= (ranges[i].end + 1)) { |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
206 |
// found an intersect! we'll replace this entry in the array. |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
207 |
ranges[i].begin = std::min(ranges[i].begin, lo); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
208 |
ranges[i].end = std::max(ranges[i].end, hi); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
209 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
210 |
// now check if the new range can subsume any subsequent ranges. |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
211 |
unsigned next = i+1; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
212 |
// each iteration of the loop we will either remove something from the list, or break the loop. |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
213 |
while (next < ranges.size()) { |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
214 |
if (ranges[next].begin <= (ranges[i].end + 1)) { |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
215 |
// the next entry now overlaps / concatenates this one. |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
216 |
ranges[i].end = std::max(ranges[i].end, ranges[next].end); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
217 |
ranges.remove(next); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
218 |
} else |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
219 |
break; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
220 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
221 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
222 |
return; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
223 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
224 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
225 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
226 |
// CharacterRange comes after all existing ranges. |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
227 |
ranges.append(CharacterRange(lo, hi)); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
228 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
229 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
230 |
bool m_isCaseInsensitive; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
231 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
232 |
Vector<UChar> m_matches; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
233 |
Vector<CharacterRange> m_ranges; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
234 |
Vector<UChar> m_matchesUnicode; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
235 |
Vector<CharacterRange> m_rangesUnicode; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
236 |
}; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
237 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
238 |
class RegexPatternConstructor { |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
239 |
public: |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
240 |
RegexPatternConstructor(RegexPattern& pattern) |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
241 |
: m_pattern(pattern) |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
242 |
, m_characterClassConstructor(pattern.m_ignoreCase) |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
243 |
{ |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
244 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
245 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
246 |
~RegexPatternConstructor() |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
247 |
{ |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
248 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
249 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
250 |
void reset() |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
251 |
{ |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
252 |
m_pattern.reset(); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
253 |
m_characterClassConstructor.reset(); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
254 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
255 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
256 |
void assertionBOL() |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
257 |
{ |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
258 |
m_alternative->m_terms.append(PatternTerm::BOL()); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
259 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
260 |
void assertionEOL() |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
261 |
{ |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
262 |
m_alternative->m_terms.append(PatternTerm::EOL()); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
263 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
264 |
void assertionWordBoundary(bool invert) |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
265 |
{ |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
266 |
m_alternative->m_terms.append(PatternTerm::WordBoundary(invert)); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
267 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
268 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
269 |
void atomPatternCharacter(UChar ch) |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
270 |
{ |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
271 |
// We handle case-insensitive checking of unicode characters which do have both |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
272 |
// cases by handling them as if they were defined using a CharacterClass. |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
273 |
if (m_pattern.m_ignoreCase && !isASCII(ch) && (Unicode::toUpper(ch) != Unicode::toLower(ch))) { |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
274 |
atomCharacterClassBegin(); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
275 |
atomCharacterClassAtom(ch); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
276 |
atomCharacterClassEnd(); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
277 |
} else |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
278 |
m_alternative->m_terms.append(PatternTerm(ch)); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
279 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
280 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
281 |
void atomBuiltInCharacterClass(BuiltInCharacterClassID classID, bool invert) |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
282 |
{ |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
283 |
switch (classID) { |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
284 |
case DigitClassID: |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
285 |
m_alternative->m_terms.append(PatternTerm(m_pattern.digitsCharacterClass(), invert)); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
286 |
break; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
287 |
case SpaceClassID: |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
288 |
m_alternative->m_terms.append(PatternTerm(m_pattern.spacesCharacterClass(), invert)); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
289 |
break; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
290 |
case WordClassID: |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
291 |
m_alternative->m_terms.append(PatternTerm(m_pattern.wordcharCharacterClass(), invert)); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
292 |
break; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
293 |
case NewlineClassID: |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
294 |
m_alternative->m_terms.append(PatternTerm(m_pattern.newlineCharacterClass(), invert)); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
295 |
break; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
296 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
297 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
298 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
299 |
void atomCharacterClassBegin(bool invert = false) |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
300 |
{ |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
301 |
m_invertCharacterClass = invert; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
302 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
303 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
304 |
void atomCharacterClassAtom(UChar ch) |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
305 |
{ |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
306 |
m_characterClassConstructor.putChar(ch); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
307 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
308 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
309 |
void atomCharacterClassRange(UChar begin, UChar end) |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
310 |
{ |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
311 |
m_characterClassConstructor.putRange(begin, end); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
312 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
313 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
314 |
void atomCharacterClassBuiltIn(BuiltInCharacterClassID classID, bool invert) |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
315 |
{ |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
316 |
ASSERT(classID != NewlineClassID); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
317 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
318 |
switch (classID) { |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
319 |
case DigitClassID: |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
320 |
m_characterClassConstructor.append(invert ? m_pattern.nondigitsCharacterClass() : m_pattern.digitsCharacterClass()); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
321 |
break; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
322 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
323 |
case SpaceClassID: |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
324 |
m_characterClassConstructor.append(invert ? m_pattern.nonspacesCharacterClass() : m_pattern.spacesCharacterClass()); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
325 |
break; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
326 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
327 |
case WordClassID: |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
328 |
m_characterClassConstructor.append(invert ? m_pattern.nonwordcharCharacterClass() : m_pattern.wordcharCharacterClass()); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
329 |
break; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
330 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
331 |
default: |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
332 |
ASSERT_NOT_REACHED(); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
333 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
334 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
335 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
336 |
void atomCharacterClassEnd() |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
337 |
{ |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
338 |
CharacterClass* newCharacterClass = m_characterClassConstructor.charClass(); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
339 |
m_pattern.m_userCharacterClasses.append(newCharacterClass); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
340 |
m_alternative->m_terms.append(PatternTerm(newCharacterClass, m_invertCharacterClass)); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
341 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
342 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
343 |
void atomParenthesesSubpatternBegin(bool capture = true) |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
344 |
{ |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
345 |
unsigned subpatternId = m_pattern.m_numSubpatterns + 1; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
346 |
if (capture) |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
347 |
m_pattern.m_numSubpatterns++; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
348 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
349 |
PatternDisjunction* parenthesesDisjunction = new PatternDisjunction(m_alternative); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
350 |
m_pattern.m_disjunctions.append(parenthesesDisjunction); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
351 |
m_alternative->m_terms.append(PatternTerm(PatternTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction, capture)); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
352 |
m_alternative = parenthesesDisjunction->addNewAlternative(); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
353 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
354 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
355 |
void atomParentheticalAssertionBegin(bool invert = false) |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
356 |
{ |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
357 |
PatternDisjunction* parenthesesDisjunction = new PatternDisjunction(m_alternative); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
358 |
m_pattern.m_disjunctions.append(parenthesesDisjunction); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
359 |
m_alternative->m_terms.append(PatternTerm(PatternTerm::TypeParentheticalAssertion, m_pattern.m_numSubpatterns + 1, parenthesesDisjunction, invert)); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
360 |
m_alternative = parenthesesDisjunction->addNewAlternative(); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
361 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
362 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
363 |
void atomParenthesesEnd() |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
364 |
{ |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
365 |
ASSERT(m_alternative->m_parent); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
366 |
ASSERT(m_alternative->m_parent->m_parent); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
367 |
m_alternative = m_alternative->m_parent->m_parent; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
368 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
369 |
m_alternative->lastTerm().parentheses.lastSubpatternId = m_pattern.m_numSubpatterns; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
370 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
371 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
372 |
void atomBackReference(unsigned subpatternId) |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
373 |
{ |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
374 |
ASSERT(subpatternId); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
375 |
m_pattern.m_containsBackreferences = true; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
376 |
m_pattern.m_maxBackReference = std::max(m_pattern.m_maxBackReference, subpatternId); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
377 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
378 |
if (subpatternId > m_pattern.m_numSubpatterns) { |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
379 |
m_alternative->m_terms.append(PatternTerm::ForwardReference()); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
380 |
return; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
381 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
382 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
383 |
PatternAlternative* currentAlternative = m_alternative; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
384 |
ASSERT(currentAlternative); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
385 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
386 |
// Note to self: if we waited until the AST was baked, we could also remove forwards refs |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
387 |
while ((currentAlternative = currentAlternative->m_parent->m_parent)) { |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
388 |
PatternTerm& term = currentAlternative->lastTerm(); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
389 |
ASSERT((term.type == PatternTerm::TypeParenthesesSubpattern) || (term.type == PatternTerm::TypeParentheticalAssertion)); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
390 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
391 |
if ((term.type == PatternTerm::TypeParenthesesSubpattern) && term.invertOrCapture && (subpatternId == term.subpatternId)) { |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
392 |
m_alternative->m_terms.append(PatternTerm::ForwardReference()); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
393 |
return; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
394 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
395 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
396 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
397 |
m_alternative->m_terms.append(PatternTerm(subpatternId)); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
398 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
399 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
400 |
PatternDisjunction* copyDisjunction(PatternDisjunction* disjunction) |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
401 |
{ |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
402 |
PatternDisjunction* newDisjunction = new PatternDisjunction(); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
403 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
404 |
newDisjunction->m_parent = disjunction->m_parent; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
405 |
for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) { |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
406 |
PatternAlternative* alternative = disjunction->m_alternatives[alt]; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
407 |
PatternAlternative* newAlternative = newDisjunction->addNewAlternative(); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
408 |
for (unsigned i = 0; i < alternative->m_terms.size(); ++i) |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
409 |
newAlternative->m_terms.append(copyTerm(alternative->m_terms[i])); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
410 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
411 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
412 |
m_pattern.m_disjunctions.append(newDisjunction); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
413 |
return newDisjunction; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
414 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
415 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
416 |
PatternTerm copyTerm(PatternTerm& term) |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
417 |
{ |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
418 |
if ((term.type != PatternTerm::TypeParenthesesSubpattern) && (term.type != PatternTerm::TypeParentheticalAssertion)) |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
419 |
return PatternTerm(term); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
420 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
421 |
PatternTerm termCopy = term; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
422 |
termCopy.parentheses.disjunction = copyDisjunction(termCopy.parentheses.disjunction); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
423 |
return termCopy; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
424 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
425 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
426 |
void quantifyAtom(unsigned min, unsigned max, bool greedy) |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
427 |
{ |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
428 |
ASSERT(min <= max); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
429 |
ASSERT(m_alternative->m_terms.size()); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
430 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
431 |
if (!max) { |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
432 |
m_alternative->removeLastTerm(); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
433 |
return; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
434 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
435 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
436 |
PatternTerm& term = m_alternative->lastTerm(); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
437 |
ASSERT(term.type > PatternTerm::TypeAssertionWordBoundary); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
438 |
ASSERT((term.quantityCount == 1) && (term.quantityType == QuantifierFixedCount)); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
439 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
440 |
// For any assertion with a zero minimum, not matching is valid and has no effect, |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
441 |
// remove it. Otherwise, we need to match as least once, but there is no point |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
442 |
// matching more than once, so remove the quantifier. It is not entirely clear |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
443 |
// from the spec whether or not this behavior is correct, but I believe this |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
444 |
// matches Firefox. :-/ |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
445 |
if (term.type == PatternTerm::TypeParentheticalAssertion) { |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
446 |
if (!min) |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
447 |
m_alternative->removeLastTerm(); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
448 |
return; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
449 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
450 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
451 |
if (min == 0) |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
452 |
term.quantify(max, greedy ? QuantifierGreedy : QuantifierNonGreedy); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
453 |
else if (min == max) |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
454 |
term.quantify(min, QuantifierFixedCount); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
455 |
else { |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
456 |
term.quantify(min, QuantifierFixedCount); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
457 |
m_alternative->m_terms.append(copyTerm(term)); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
458 |
// NOTE: this term is interesting from an analysis perspective, in that it can be ignored..... |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
459 |
m_alternative->lastTerm().quantify((max == UINT_MAX) ? max : max - min, greedy ? QuantifierGreedy : QuantifierNonGreedy); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
460 |
if (m_alternative->lastTerm().type == PatternTerm::TypeParenthesesSubpattern) |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
461 |
m_alternative->lastTerm().parentheses.isCopy = true; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
462 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
463 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
464 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
465 |
void disjunction() |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
466 |
{ |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
467 |
m_alternative = m_alternative->m_parent->addNewAlternative(); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
468 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
469 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
470 |
void regexBegin() |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
471 |
{ |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
472 |
m_pattern.m_body = new PatternDisjunction(); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
473 |
m_alternative = m_pattern.m_body->addNewAlternative(); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
474 |
m_pattern.m_disjunctions.append(m_pattern.m_body); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
475 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
476 |
void regexEnd() |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
477 |
{ |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
478 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
479 |
void regexError() |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
480 |
{ |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
481 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
482 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
483 |
unsigned setupAlternativeOffsets(PatternAlternative* alternative, unsigned currentCallFrameSize, unsigned initialInputPosition) |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
484 |
{ |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
485 |
alternative->m_hasFixedSize = true; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
486 |
unsigned currentInputPosition = initialInputPosition; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
487 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
488 |
for (unsigned i = 0; i < alternative->m_terms.size(); ++i) { |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
489 |
PatternTerm& term = alternative->m_terms[i]; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
490 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
491 |
switch (term.type) { |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
492 |
case PatternTerm::TypeAssertionBOL: |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
493 |
case PatternTerm::TypeAssertionEOL: |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
494 |
case PatternTerm::TypeAssertionWordBoundary: |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
495 |
term.inputPosition = currentInputPosition; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
496 |
break; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
497 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
498 |
case PatternTerm::TypeBackReference: |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
499 |
term.inputPosition = currentInputPosition; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
500 |
term.frameLocation = currentCallFrameSize; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
501 |
currentCallFrameSize += RegexStackSpaceForBackTrackInfoBackReference; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
502 |
alternative->m_hasFixedSize = false; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
503 |
break; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
504 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
505 |
case PatternTerm::TypeForwardReference: |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
506 |
break; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
507 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
508 |
case PatternTerm::TypePatternCharacter: |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
509 |
term.inputPosition = currentInputPosition; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
510 |
if (term.quantityType != QuantifierFixedCount) { |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
511 |
term.frameLocation = currentCallFrameSize; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
512 |
currentCallFrameSize += RegexStackSpaceForBackTrackInfoPatternCharacter; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
513 |
alternative->m_hasFixedSize = false; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
514 |
} else |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
515 |
currentInputPosition += term.quantityCount; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
516 |
break; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
517 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
518 |
case PatternTerm::TypeCharacterClass: |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
519 |
term.inputPosition = currentInputPosition; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
520 |
if (term.quantityType != QuantifierFixedCount) { |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
521 |
term.frameLocation = currentCallFrameSize; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
522 |
currentCallFrameSize += RegexStackSpaceForBackTrackInfoCharacterClass; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
523 |
alternative->m_hasFixedSize = false; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
524 |
} else |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
525 |
currentInputPosition += term.quantityCount; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
526 |
break; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
527 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
528 |
case PatternTerm::TypeParenthesesSubpattern: |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
529 |
// Note: for fixed once parentheses we will ensure at least the minimum is available; others are on their own. |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
530 |
term.frameLocation = currentCallFrameSize; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
531 |
if ((term.quantityCount == 1) && !term.parentheses.isCopy) { |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
532 |
if (term.quantityType == QuantifierFixedCount) { |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
533 |
currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize, currentInputPosition); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
534 |
currentInputPosition += term.parentheses.disjunction->m_minimumSize; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
535 |
} else { |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
536 |
currentCallFrameSize += RegexStackSpaceForBackTrackInfoParenthesesOnce; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
537 |
currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize, currentInputPosition); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
538 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
539 |
term.inputPosition = currentInputPosition; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
540 |
} else { |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
541 |
term.inputPosition = currentInputPosition; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
542 |
setupDisjunctionOffsets(term.parentheses.disjunction, 0, currentInputPosition); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
543 |
currentCallFrameSize += RegexStackSpaceForBackTrackInfoParentheses; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
544 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
545 |
// Fixed count of 1 could be accepted, if they have a fixed size *AND* if all alternatives are of the same length. |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
546 |
alternative->m_hasFixedSize = false; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
547 |
break; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
548 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
549 |
case PatternTerm::TypeParentheticalAssertion: |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
550 |
term.inputPosition = currentInputPosition; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
551 |
term.frameLocation = currentCallFrameSize; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
552 |
currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize + RegexStackSpaceForBackTrackInfoParentheticalAssertion, currentInputPosition); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
553 |
break; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
554 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
555 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
556 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
557 |
alternative->m_minimumSize = currentInputPosition - initialInputPosition; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
558 |
return currentCallFrameSize; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
559 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
560 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
561 |
unsigned setupDisjunctionOffsets(PatternDisjunction* disjunction, unsigned initialCallFrameSize, unsigned initialInputPosition) |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
562 |
{ |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
563 |
if ((disjunction != m_pattern.m_body) && (disjunction->m_alternatives.size() > 1)) |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
564 |
initialCallFrameSize += RegexStackSpaceForBackTrackInfoAlternative; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
565 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
566 |
unsigned minimumInputSize = UINT_MAX; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
567 |
unsigned maximumCallFrameSize = 0; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
568 |
bool hasFixedSize = true; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
569 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
570 |
for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) { |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
571 |
PatternAlternative* alternative = disjunction->m_alternatives[alt]; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
572 |
unsigned currentAlternativeCallFrameSize = setupAlternativeOffsets(alternative, initialCallFrameSize, initialInputPosition); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
573 |
minimumInputSize = min(minimumInputSize, alternative->m_minimumSize); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
574 |
maximumCallFrameSize = max(maximumCallFrameSize, currentAlternativeCallFrameSize); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
575 |
hasFixedSize &= alternative->m_hasFixedSize; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
576 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
577 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
578 |
ASSERT(minimumInputSize != UINT_MAX); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
579 |
ASSERT(maximumCallFrameSize >= initialCallFrameSize); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
580 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
581 |
disjunction->m_hasFixedSize = hasFixedSize; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
582 |
disjunction->m_minimumSize = minimumInputSize; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
583 |
disjunction->m_callFrameSize = maximumCallFrameSize; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
584 |
return maximumCallFrameSize; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
585 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
586 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
587 |
void setupOffsets() |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
588 |
{ |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
589 |
setupDisjunctionOffsets(m_pattern.m_body, 0, 0); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
590 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
591 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
592 |
private: |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
593 |
RegexPattern& m_pattern; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
594 |
PatternAlternative* m_alternative; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
595 |
CharacterClassConstructor m_characterClassConstructor; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
596 |
bool m_invertCharacterClass; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
597 |
}; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
598 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
599 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
600 |
const char* compileRegex(const UString& patternString, RegexPattern& pattern) |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
601 |
{ |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
602 |
RegexPatternConstructor constructor(pattern); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
603 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
604 |
if (const char* error = parse(constructor, patternString)) |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
605 |
return error; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
606 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
607 |
// If the pattern contains illegal backreferences reset & reparse. |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
608 |
// Quoting Netscape's "What's new in JavaScript 1.2", |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
609 |
// "Note: if the number of left parentheses is less than the number specified |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
610 |
// in \#, the \# is taken as an octal escape as described in the next row." |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
611 |
if (pattern.containsIllegalBackReference()) { |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
612 |
unsigned numSubpatterns = pattern.m_numSubpatterns; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
613 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
614 |
constructor.reset(); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
615 |
#if !ASSERT_DISABLED |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
616 |
const char* error = |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
617 |
#endif |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
618 |
parse(constructor, patternString, numSubpatterns); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
619 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
620 |
ASSERT(!error); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
621 |
ASSERT(numSubpatterns == pattern.m_numSubpatterns); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
622 |
} |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
623 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
624 |
constructor.setupOffsets(); |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
625 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
626 |
return 0; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
627 |
}; |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
628 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
629 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
630 |
} } |
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
631 |
|
4f2f89ce4247
Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
632 |
#endif |