WebCore/page/UserContentURLPattern.cpp
changeset 0 4f2f89ce4247
equal deleted inserted replaced
-1:000000000000 0:4f2f89ce4247
       
     1 /*
       
     2  * Copyright (C) 2009, 2010 Apple Inc. All rights reserved.
       
     3  *
       
     4  * Redistribution and use in source and binary forms, with or without
       
     5  * modification, are permitted provided that the following conditions
       
     6  * are met:
       
     7  * 1. Redistributions of source code must retain the above copyright
       
     8  *    notice, this list of conditions and the following disclaimer.
       
     9  * 2. Redistributions in binary form must reproduce the above copyright
       
    10  *    notice, this list of conditions and the following disclaimer in the
       
    11  *    documentation and/or other materials provided with the distribution.
       
    12  *
       
    13  * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
       
    14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
       
    15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
       
    16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
       
    17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
       
    18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
       
    19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
       
    20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
       
    21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
       
    22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
       
    23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
       
    24  */
       
    25 
       
    26 #include "config.h"
       
    27 #include "UserContentURLPattern.h"
       
    28 #include "KURL.h"
       
    29 #include <wtf/StdLibExtras.h>
       
    30 
       
    31 namespace WebCore {
       
    32 
       
    33 bool UserContentURLPattern::matchesPatterns(const KURL& url, const Vector<String>* whitelist, const Vector<String>* blacklist)
       
    34 {
       
    35     // In order for a URL to be a match it has to be present in the whitelist and not present in the blacklist.
       
    36     // If there is no whitelist at all, then all URLs are assumed to be in the whitelist.
       
    37     bool matchesWhitelist = !whitelist || whitelist->isEmpty();
       
    38     if (!matchesWhitelist) {
       
    39         for (unsigned i = 0; i < whitelist->size(); ++i) {
       
    40             UserContentURLPattern contentPattern(whitelist->at(i));
       
    41             if (contentPattern.matches(url)) {
       
    42                 matchesWhitelist = true;
       
    43                 break;
       
    44             }
       
    45         }
       
    46     }
       
    47 
       
    48     bool matchesBlacklist = false;
       
    49     if (blacklist) {
       
    50         for (unsigned i = 0; i < blacklist->size(); ++i) {
       
    51             UserContentURLPattern contentPattern(blacklist->at(i));
       
    52             if (contentPattern.matches(url)) {
       
    53                 matchesBlacklist = true;
       
    54                 break;
       
    55             }
       
    56         }
       
    57     }
       
    58 
       
    59     return matchesWhitelist && !matchesBlacklist;
       
    60 }
       
    61 
       
    62 bool UserContentURLPattern::parse(const String& pattern)
       
    63 {
       
    64     DEFINE_STATIC_LOCAL(const String, schemeSeparator, ("://"));
       
    65 
       
    66     int schemeEndPos = pattern.find(schemeSeparator);
       
    67     if (schemeEndPos == -1)
       
    68         return false;
       
    69 
       
    70     m_scheme = pattern.left(schemeEndPos);
       
    71 
       
    72     int hostStartPos = schemeEndPos + schemeSeparator.length();
       
    73     if (hostStartPos >= static_cast<int>(pattern.length()))
       
    74         return false;
       
    75 
       
    76     int pathStartPos = 0;
       
    77 
       
    78     if (equalIgnoringCase(m_scheme, "file"))
       
    79         pathStartPos = hostStartPos;
       
    80     else {
       
    81         int hostEndPos = pattern.find("/", hostStartPos);
       
    82         if (hostEndPos == -1)
       
    83             return false;
       
    84 
       
    85         m_host = pattern.substring(hostStartPos, hostEndPos - hostStartPos);
       
    86         m_matchSubdomains = false;
       
    87 
       
    88         if (m_host == "*") {
       
    89             // The pattern can be just '*', which means match all domains.
       
    90             m_host = "";
       
    91             m_matchSubdomains = true;
       
    92         } else if (m_host.startsWith("*.")) {
       
    93             // The first component can be '*', which means to match all subdomains.
       
    94             m_host = m_host.substring(2); // Length of "*."
       
    95             m_matchSubdomains = true;
       
    96         }
       
    97 
       
    98         // No other '*' can occur in the host.
       
    99         if (m_host.find("*") != -1)
       
   100             return false;
       
   101 
       
   102         pathStartPos = hostEndPos;
       
   103     }
       
   104 
       
   105     m_path = pattern.right(pattern.length() - pathStartPos);
       
   106 
       
   107     return true;
       
   108 }
       
   109 
       
   110 bool UserContentURLPattern::matches(const KURL& test) const
       
   111 {
       
   112     if (m_invalid)
       
   113         return false;
       
   114 
       
   115     if (!equalIgnoringCase(test.protocol(), m_scheme))
       
   116         return false;
       
   117 
       
   118     if (!equalIgnoringCase(m_scheme, "file") && !matchesHost(test))
       
   119         return false;
       
   120 
       
   121     return matchesPath(test);
       
   122 }
       
   123 
       
   124 bool UserContentURLPattern::matchesHost(const KURL& test) const
       
   125 {
       
   126     const String& host = test.host();
       
   127     if (equalIgnoringCase(host, m_host))
       
   128         return true;
       
   129 
       
   130     if (!m_matchSubdomains)
       
   131         return false;
       
   132 
       
   133     // If we're matching subdomains, and we have no host, that means the pattern
       
   134     // was <scheme>://*/<whatever>, so we match anything.
       
   135     if (!m_host.length())
       
   136         return true;
       
   137 
       
   138     // Check if the domain is a subdomain of our host.
       
   139     if (!host.endsWith(m_host, false))
       
   140         return false;
       
   141 
       
   142     ASSERT(host.length() > m_host.length());
       
   143 
       
   144     // Check that the character before the suffix is a period.
       
   145     return host[host.length() - m_host.length() - 1] == '.';
       
   146 }
       
   147 
       
   148 struct MatchTester
       
   149 {
       
   150     const String m_pattern;
       
   151     unsigned m_patternIndex;
       
   152     
       
   153     const String m_test;
       
   154     unsigned m_testIndex;
       
   155     
       
   156     MatchTester(const String& pattern, const String& test)
       
   157     : m_pattern(pattern)
       
   158     , m_patternIndex(0)
       
   159     , m_test(test)
       
   160     , m_testIndex(0)
       
   161     {
       
   162     }
       
   163     
       
   164     bool testStringFinished() const { return m_testIndex >= m_test.length(); }
       
   165     bool patternStringFinished() const { return m_patternIndex >= m_pattern.length(); }
       
   166 
       
   167     void eatWildcard()
       
   168     {
       
   169         while (!patternStringFinished()) {
       
   170             if (m_pattern[m_patternIndex] != '*')
       
   171                 return;
       
   172             m_patternIndex++;
       
   173         }
       
   174     }
       
   175     
       
   176     void eatSameChars()
       
   177     {
       
   178         while (!patternStringFinished() && !testStringFinished()) {
       
   179             if (m_pattern[m_patternIndex] == '*')
       
   180                 return;
       
   181             if (m_pattern[m_patternIndex] != m_test[m_testIndex])
       
   182                 return;
       
   183             m_patternIndex++;
       
   184             m_testIndex++;
       
   185         }
       
   186     }
       
   187 
       
   188     bool test()
       
   189     {
       
   190         // Eat all the matching chars.
       
   191         eatSameChars();
       
   192 
       
   193         // If the string is finished, then the pattern must be empty too, or contains
       
   194         // only wildcards.
       
   195         if (testStringFinished()) {
       
   196             eatWildcard();
       
   197             if (patternStringFinished())
       
   198                 return true;
       
   199             return false;
       
   200         }
       
   201   
       
   202         // Pattern is empty but not string, this is not a match.
       
   203         if (patternStringFinished())
       
   204             return false;
       
   205         
       
   206         // If we don't encounter a *, then we're hosed.
       
   207         if (m_pattern[m_patternIndex] != '*')
       
   208             return false;
       
   209         
       
   210         while (!testStringFinished()) {
       
   211             MatchTester nextMatch(*this);
       
   212             nextMatch.m_patternIndex++;
       
   213             if (nextMatch.test())
       
   214                 return true;
       
   215             m_testIndex++;
       
   216         }
       
   217 
       
   218         // We reached the end of the string.  Let's see if the pattern contains only
       
   219         // wildcards.
       
   220         eatWildcard();
       
   221         return patternStringFinished();
       
   222     }
       
   223 };
       
   224 
       
   225 bool UserContentURLPattern::matchesPath(const KURL& test) const
       
   226 {
       
   227     MatchTester match(m_path, test.path());
       
   228     return match.test();
       
   229 }
       
   230 
       
   231 } // namespace WebCore