|
1 /* |
|
2 * Copyright (C) 2009, 2010 Apple Inc. All rights reserved. |
|
3 * |
|
4 * Redistribution and use in source and binary forms, with or without |
|
5 * modification, are permitted provided that the following conditions |
|
6 * are met: |
|
7 * 1. Redistributions of source code must retain the above copyright |
|
8 * notice, this list of conditions and the following disclaimer. |
|
9 * 2. Redistributions in binary form must reproduce the above copyright |
|
10 * notice, this list of conditions and the following disclaimer in the |
|
11 * documentation and/or other materials provided with the distribution. |
|
12 * |
|
13 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY |
|
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
|
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR |
|
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
|
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
|
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
|
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
|
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
|
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
|
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
24 */ |
|
25 |
|
26 #include "config.h" |
|
27 #include "UserContentURLPattern.h" |
|
28 #include "KURL.h" |
|
29 #include <wtf/StdLibExtras.h> |
|
30 |
|
31 namespace WebCore { |
|
32 |
|
33 bool UserContentURLPattern::matchesPatterns(const KURL& url, const Vector<String>* whitelist, const Vector<String>* blacklist) |
|
34 { |
|
35 // In order for a URL to be a match it has to be present in the whitelist and not present in the blacklist. |
|
36 // If there is no whitelist at all, then all URLs are assumed to be in the whitelist. |
|
37 bool matchesWhitelist = !whitelist || whitelist->isEmpty(); |
|
38 if (!matchesWhitelist) { |
|
39 for (unsigned i = 0; i < whitelist->size(); ++i) { |
|
40 UserContentURLPattern contentPattern(whitelist->at(i)); |
|
41 if (contentPattern.matches(url)) { |
|
42 matchesWhitelist = true; |
|
43 break; |
|
44 } |
|
45 } |
|
46 } |
|
47 |
|
48 bool matchesBlacklist = false; |
|
49 if (blacklist) { |
|
50 for (unsigned i = 0; i < blacklist->size(); ++i) { |
|
51 UserContentURLPattern contentPattern(blacklist->at(i)); |
|
52 if (contentPattern.matches(url)) { |
|
53 matchesBlacklist = true; |
|
54 break; |
|
55 } |
|
56 } |
|
57 } |
|
58 |
|
59 return matchesWhitelist && !matchesBlacklist; |
|
60 } |
|
61 |
|
62 bool UserContentURLPattern::parse(const String& pattern) |
|
63 { |
|
64 DEFINE_STATIC_LOCAL(const String, schemeSeparator, ("://")); |
|
65 |
|
66 int schemeEndPos = pattern.find(schemeSeparator); |
|
67 if (schemeEndPos == -1) |
|
68 return false; |
|
69 |
|
70 m_scheme = pattern.left(schemeEndPos); |
|
71 |
|
72 int hostStartPos = schemeEndPos + schemeSeparator.length(); |
|
73 if (hostStartPos >= static_cast<int>(pattern.length())) |
|
74 return false; |
|
75 |
|
76 int pathStartPos = 0; |
|
77 |
|
78 if (equalIgnoringCase(m_scheme, "file")) |
|
79 pathStartPos = hostStartPos; |
|
80 else { |
|
81 int hostEndPos = pattern.find("/", hostStartPos); |
|
82 if (hostEndPos == -1) |
|
83 return false; |
|
84 |
|
85 m_host = pattern.substring(hostStartPos, hostEndPos - hostStartPos); |
|
86 m_matchSubdomains = false; |
|
87 |
|
88 if (m_host == "*") { |
|
89 // The pattern can be just '*', which means match all domains. |
|
90 m_host = ""; |
|
91 m_matchSubdomains = true; |
|
92 } else if (m_host.startsWith("*.")) { |
|
93 // The first component can be '*', which means to match all subdomains. |
|
94 m_host = m_host.substring(2); // Length of "*." |
|
95 m_matchSubdomains = true; |
|
96 } |
|
97 |
|
98 // No other '*' can occur in the host. |
|
99 if (m_host.find("*") != -1) |
|
100 return false; |
|
101 |
|
102 pathStartPos = hostEndPos; |
|
103 } |
|
104 |
|
105 m_path = pattern.right(pattern.length() - pathStartPos); |
|
106 |
|
107 return true; |
|
108 } |
|
109 |
|
110 bool UserContentURLPattern::matches(const KURL& test) const |
|
111 { |
|
112 if (m_invalid) |
|
113 return false; |
|
114 |
|
115 if (!equalIgnoringCase(test.protocol(), m_scheme)) |
|
116 return false; |
|
117 |
|
118 if (!equalIgnoringCase(m_scheme, "file") && !matchesHost(test)) |
|
119 return false; |
|
120 |
|
121 return matchesPath(test); |
|
122 } |
|
123 |
|
124 bool UserContentURLPattern::matchesHost(const KURL& test) const |
|
125 { |
|
126 const String& host = test.host(); |
|
127 if (equalIgnoringCase(host, m_host)) |
|
128 return true; |
|
129 |
|
130 if (!m_matchSubdomains) |
|
131 return false; |
|
132 |
|
133 // If we're matching subdomains, and we have no host, that means the pattern |
|
134 // was <scheme>://*/<whatever>, so we match anything. |
|
135 if (!m_host.length()) |
|
136 return true; |
|
137 |
|
138 // Check if the domain is a subdomain of our host. |
|
139 if (!host.endsWith(m_host, false)) |
|
140 return false; |
|
141 |
|
142 ASSERT(host.length() > m_host.length()); |
|
143 |
|
144 // Check that the character before the suffix is a period. |
|
145 return host[host.length() - m_host.length() - 1] == '.'; |
|
146 } |
|
147 |
|
148 struct MatchTester |
|
149 { |
|
150 const String m_pattern; |
|
151 unsigned m_patternIndex; |
|
152 |
|
153 const String m_test; |
|
154 unsigned m_testIndex; |
|
155 |
|
156 MatchTester(const String& pattern, const String& test) |
|
157 : m_pattern(pattern) |
|
158 , m_patternIndex(0) |
|
159 , m_test(test) |
|
160 , m_testIndex(0) |
|
161 { |
|
162 } |
|
163 |
|
164 bool testStringFinished() const { return m_testIndex >= m_test.length(); } |
|
165 bool patternStringFinished() const { return m_patternIndex >= m_pattern.length(); } |
|
166 |
|
167 void eatWildcard() |
|
168 { |
|
169 while (!patternStringFinished()) { |
|
170 if (m_pattern[m_patternIndex] != '*') |
|
171 return; |
|
172 m_patternIndex++; |
|
173 } |
|
174 } |
|
175 |
|
176 void eatSameChars() |
|
177 { |
|
178 while (!patternStringFinished() && !testStringFinished()) { |
|
179 if (m_pattern[m_patternIndex] == '*') |
|
180 return; |
|
181 if (m_pattern[m_patternIndex] != m_test[m_testIndex]) |
|
182 return; |
|
183 m_patternIndex++; |
|
184 m_testIndex++; |
|
185 } |
|
186 } |
|
187 |
|
188 bool test() |
|
189 { |
|
190 // Eat all the matching chars. |
|
191 eatSameChars(); |
|
192 |
|
193 // If the string is finished, then the pattern must be empty too, or contains |
|
194 // only wildcards. |
|
195 if (testStringFinished()) { |
|
196 eatWildcard(); |
|
197 if (patternStringFinished()) |
|
198 return true; |
|
199 return false; |
|
200 } |
|
201 |
|
202 // Pattern is empty but not string, this is not a match. |
|
203 if (patternStringFinished()) |
|
204 return false; |
|
205 |
|
206 // If we don't encounter a *, then we're hosed. |
|
207 if (m_pattern[m_patternIndex] != '*') |
|
208 return false; |
|
209 |
|
210 while (!testStringFinished()) { |
|
211 MatchTester nextMatch(*this); |
|
212 nextMatch.m_patternIndex++; |
|
213 if (nextMatch.test()) |
|
214 return true; |
|
215 m_testIndex++; |
|
216 } |
|
217 |
|
218 // We reached the end of the string. Let's see if the pattern contains only |
|
219 // wildcards. |
|
220 eatWildcard(); |
|
221 return patternStringFinished(); |
|
222 } |
|
223 }; |
|
224 |
|
225 bool UserContentURLPattern::matchesPath(const KURL& test) const |
|
226 { |
|
227 MatchTester match(m_path, test.path()); |
|
228 return match.test(); |
|
229 } |
|
230 |
|
231 } // namespace WebCore |