WebCore/page/UserContentURLPattern.cpp
changeset 0 4f2f89ce4247
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/WebCore/page/UserContentURLPattern.cpp	Fri Sep 17 09:02:29 2010 +0300
@@ -0,0 +1,231 @@
+/*
+ * Copyright (C) 2009, 2010 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#include "config.h"
+#include "UserContentURLPattern.h"
+#include "KURL.h"
+#include <wtf/StdLibExtras.h>
+
+namespace WebCore {
+
+bool UserContentURLPattern::matchesPatterns(const KURL& url, const Vector<String>* whitelist, const Vector<String>* blacklist)
+{
+    // In order for a URL to be a match it has to be present in the whitelist and not present in the blacklist.
+    // If there is no whitelist at all, then all URLs are assumed to be in the whitelist.
+    bool matchesWhitelist = !whitelist || whitelist->isEmpty();
+    if (!matchesWhitelist) {
+        for (unsigned i = 0; i < whitelist->size(); ++i) {
+            UserContentURLPattern contentPattern(whitelist->at(i));
+            if (contentPattern.matches(url)) {
+                matchesWhitelist = true;
+                break;
+            }
+        }
+    }
+
+    bool matchesBlacklist = false;
+    if (blacklist) {
+        for (unsigned i = 0; i < blacklist->size(); ++i) {
+            UserContentURLPattern contentPattern(blacklist->at(i));
+            if (contentPattern.matches(url)) {
+                matchesBlacklist = true;
+                break;
+            }
+        }
+    }
+
+    return matchesWhitelist && !matchesBlacklist;
+}
+
+bool UserContentURLPattern::parse(const String& pattern)
+{
+    DEFINE_STATIC_LOCAL(const String, schemeSeparator, ("://"));
+
+    int schemeEndPos = pattern.find(schemeSeparator);
+    if (schemeEndPos == -1)
+        return false;
+
+    m_scheme = pattern.left(schemeEndPos);
+
+    int hostStartPos = schemeEndPos + schemeSeparator.length();
+    if (hostStartPos >= static_cast<int>(pattern.length()))
+        return false;
+
+    int pathStartPos = 0;
+
+    if (equalIgnoringCase(m_scheme, "file"))
+        pathStartPos = hostStartPos;
+    else {
+        int hostEndPos = pattern.find("/", hostStartPos);
+        if (hostEndPos == -1)
+            return false;
+
+        m_host = pattern.substring(hostStartPos, hostEndPos - hostStartPos);
+        m_matchSubdomains = false;
+
+        if (m_host == "*") {
+            // The pattern can be just '*', which means match all domains.
+            m_host = "";
+            m_matchSubdomains = true;
+        } else if (m_host.startsWith("*.")) {
+            // The first component can be '*', which means to match all subdomains.
+            m_host = m_host.substring(2); // Length of "*."
+            m_matchSubdomains = true;
+        }
+
+        // No other '*' can occur in the host.
+        if (m_host.find("*") != -1)
+            return false;
+
+        pathStartPos = hostEndPos;
+    }
+
+    m_path = pattern.right(pattern.length() - pathStartPos);
+
+    return true;
+}
+
+bool UserContentURLPattern::matches(const KURL& test) const
+{
+    if (m_invalid)
+        return false;
+
+    if (!equalIgnoringCase(test.protocol(), m_scheme))
+        return false;
+
+    if (!equalIgnoringCase(m_scheme, "file") && !matchesHost(test))
+        return false;
+
+    return matchesPath(test);
+}
+
+bool UserContentURLPattern::matchesHost(const KURL& test) const
+{
+    const String& host = test.host();
+    if (equalIgnoringCase(host, m_host))
+        return true;
+
+    if (!m_matchSubdomains)
+        return false;
+
+    // If we're matching subdomains, and we have no host, that means the pattern
+    // was <scheme>://*/<whatever>, so we match anything.
+    if (!m_host.length())
+        return true;
+
+    // Check if the domain is a subdomain of our host.
+    if (!host.endsWith(m_host, false))
+        return false;
+
+    ASSERT(host.length() > m_host.length());
+
+    // Check that the character before the suffix is a period.
+    return host[host.length() - m_host.length() - 1] == '.';
+}
+
+struct MatchTester
+{
+    const String m_pattern;
+    unsigned m_patternIndex;
+    
+    const String m_test;
+    unsigned m_testIndex;
+    
+    MatchTester(const String& pattern, const String& test)
+    : m_pattern(pattern)
+    , m_patternIndex(0)
+    , m_test(test)
+    , m_testIndex(0)
+    {
+    }
+    
+    bool testStringFinished() const { return m_testIndex >= m_test.length(); }
+    bool patternStringFinished() const { return m_patternIndex >= m_pattern.length(); }
+
+    void eatWildcard()
+    {
+        while (!patternStringFinished()) {
+            if (m_pattern[m_patternIndex] != '*')
+                return;
+            m_patternIndex++;
+        }
+    }
+    
+    void eatSameChars()
+    {
+        while (!patternStringFinished() && !testStringFinished()) {
+            if (m_pattern[m_patternIndex] == '*')
+                return;
+            if (m_pattern[m_patternIndex] != m_test[m_testIndex])
+                return;
+            m_patternIndex++;
+            m_testIndex++;
+        }
+    }
+
+    bool test()
+    {
+        // Eat all the matching chars.
+        eatSameChars();
+
+        // If the string is finished, then the pattern must be empty too, or contains
+        // only wildcards.
+        if (testStringFinished()) {
+            eatWildcard();
+            if (patternStringFinished())
+                return true;
+            return false;
+        }
+  
+        // Pattern is empty but not string, this is not a match.
+        if (patternStringFinished())
+            return false;
+        
+        // If we don't encounter a *, then we're hosed.
+        if (m_pattern[m_patternIndex] != '*')
+            return false;
+        
+        while (!testStringFinished()) {
+            MatchTester nextMatch(*this);
+            nextMatch.m_patternIndex++;
+            if (nextMatch.test())
+                return true;
+            m_testIndex++;
+        }
+
+        // We reached the end of the string.  Let's see if the pattern contains only
+        // wildcards.
+        eatWildcard();
+        return patternStringFinished();
+    }
+};
+
+bool UserContentURLPattern::matchesPath(const KURL& test) const
+{
+    MatchTester match(m_path, test.path());
+    return match.test();
+}
+
+} // namespace WebCore