symbian-qemu-0.9.1-12/python-2.6.1/Lib/test/test_robotparser.py
author Gareth Stockwell <gareth.stockwell@accenture.com>
Wed, 22 Sep 2010 15:40:40 +0100
branchgraphics-phase-3
changeset 111 345f1c88c950
parent 1 2fb8b9db1c86
permissions -rw-r--r--
Fixes to syborg-graphicswrapper.vcproj These changes allow syborg-graphicswrapper to link against the hostthreadadapter and khronosapiwrapper libraries built by the graphics.simulator component. The .vcproj file uses relative paths, which requires that the following three packages are laid out as follows: os/ graphics adapt/ graphics.simulator qemu
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
1
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
     1
import unittest, StringIO, robotparser
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
     2
from test import test_support
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
     3
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
     4
class RobotTestCase(unittest.TestCase):
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
     5
    def __init__(self, index, parser, url, good, agent):
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
     6
        unittest.TestCase.__init__(self)
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
     7
        if good:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
     8
            self.str = "RobotTest(%d, good, %s)" % (index, url)
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
     9
        else:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    10
            self.str = "RobotTest(%d, bad, %s)" % (index, url)
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    11
        self.parser = parser
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    12
        self.url = url
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    13
        self.good = good
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    14
        self.agent = agent
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    15
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    16
    def runTest(self):
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    17
        if isinstance(self.url, tuple):
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    18
            agent, url = self.url
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    19
        else:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    20
            url = self.url
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    21
            agent = self.agent
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    22
        if self.good:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    23
            self.failUnless(self.parser.can_fetch(agent, url))
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    24
        else:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    25
            self.failIf(self.parser.can_fetch(agent, url))
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    26
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    27
    def __str__(self):
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    28
        return self.str
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    29
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    30
tests = unittest.TestSuite()
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    31
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    32
def RobotTest(index, robots_txt, good_urls, bad_urls,
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    33
              agent="test_robotparser"):
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    34
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    35
    lines = StringIO.StringIO(robots_txt).readlines()
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    36
    parser = robotparser.RobotFileParser()
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    37
    parser.parse(lines)
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    38
    for url in good_urls:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    39
        tests.addTest(RobotTestCase(index, parser, url, 1, agent))
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    40
    for url in bad_urls:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    41
        tests.addTest(RobotTestCase(index, parser, url, 0, agent))
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    42
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    43
# Examples from http://www.robotstxt.org/wc/norobots.html (fetched 2002)
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    44
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    45
# 1.
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    46
doc = """
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    47
User-agent: *
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    48
Disallow: /cyberworld/map/ # This is an infinite virtual URL space
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    49
Disallow: /tmp/ # these will soon disappear
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    50
Disallow: /foo.html
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    51
"""
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    52
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    53
good = ['/','/test.html']
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    54
bad = ['/cyberworld/map/index.html','/tmp/xxx','/foo.html']
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    55
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    56
RobotTest(1, doc, good, bad)
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    57
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    58
# 2.
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    59
doc = """
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    60
# robots.txt for http://www.example.com/
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    61
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    62
User-agent: *
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    63
Disallow: /cyberworld/map/ # This is an infinite virtual URL space
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    64
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    65
# Cybermapper knows where to go.
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    66
User-agent: cybermapper
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    67
Disallow:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    68
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    69
"""
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    70
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    71
good = ['/','/test.html',('cybermapper','/cyberworld/map/index.html')]
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    72
bad = ['/cyberworld/map/index.html']
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    73
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    74
RobotTest(2, doc, good, bad)
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    75
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    76
# 3.
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    77
doc = """
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    78
# go away
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    79
User-agent: *
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    80
Disallow: /
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    81
"""
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    82
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    83
good = []
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    84
bad = ['/cyberworld/map/index.html','/','/tmp/']
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    85
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    86
RobotTest(3, doc, good, bad)
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    87
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    88
# Examples from http://www.robotstxt.org/wc/norobots-rfc.html (fetched 2002)
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    89
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    90
# 4.
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    91
doc = """
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    92
User-agent: figtree
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    93
Disallow: /tmp
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    94
Disallow: /a%3cd.html
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    95
Disallow: /a%2fb.html
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    96
Disallow: /%7ejoe/index.html
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    97
"""
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    98
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    99
good = [] # XFAIL '/a/b.html'
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   100
bad = ['/tmp','/tmp.html','/tmp/a.html',
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   101
       '/a%3cd.html','/a%3Cd.html','/a%2fb.html',
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   102
       '/~joe/index.html'
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   103
       ]
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   104
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   105
RobotTest(4, doc, good, bad, 'figtree')
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   106
RobotTest(5, doc, good, bad, 'FigTree Robot libwww-perl/5.04')
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   107
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   108
# 6.
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   109
doc = """
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   110
User-agent: *
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   111
Disallow: /tmp/
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   112
Disallow: /a%3Cd.html
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   113
Disallow: /a/b.html
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   114
Disallow: /%7ejoe/index.html
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   115
"""
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   116
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   117
good = ['/tmp',] # XFAIL: '/a%2fb.html'
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   118
bad = ['/tmp/','/tmp/a.html',
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   119
       '/a%3cd.html','/a%3Cd.html',"/a/b.html",
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   120
       '/%7Ejoe/index.html']
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   121
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   122
RobotTest(6, doc, good, bad)
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   123
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   124
# From bug report #523041
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   125
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   126
# 7.
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   127
doc = """
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   128
User-Agent: *
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   129
Disallow: /.
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   130
"""
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   131
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   132
good = ['/foo.html']
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   133
bad = [] # Bug report says "/" should be denied, but that is not in the RFC
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   134
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   135
RobotTest(7, doc, good, bad)
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   136
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   137
# From Google: http://www.google.com/support/webmasters/bin/answer.py?hl=en&answer=40364
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   138
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   139
# 8.
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   140
doc = """
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   141
User-agent: Googlebot
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   142
Allow: /folder1/myfile.html
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   143
Disallow: /folder1/
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   144
"""
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   145
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   146
good = ['/folder1/myfile.html']
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   147
bad = ['/folder1/anotherfile.html']
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   148
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   149
RobotTest(8, doc, good, bad, agent="Googlebot")
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   150
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   151
# 9.  This file is incorrect because "Googlebot" is a substring of
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   152
#     "Googlebot-Mobile", so test 10 works just like test 9.
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   153
doc = """
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   154
User-agent: Googlebot
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   155
Disallow: /
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   156
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   157
User-agent: Googlebot-Mobile
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   158
Allow: /
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   159
"""
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   160
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   161
good = []
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   162
bad = ['/something.jpg']
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   163
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   164
RobotTest(9, doc, good, bad, agent="Googlebot")
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   165
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   166
good = []
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   167
bad = ['/something.jpg']
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   168
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   169
RobotTest(10, doc, good, bad, agent="Googlebot-Mobile")
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   170
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   171
# 11.  Get the order correct.
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   172
doc = """
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   173
User-agent: Googlebot-Mobile
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   174
Allow: /
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   175
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   176
User-agent: Googlebot
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   177
Disallow: /
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   178
"""
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   179
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   180
good = []
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   181
bad = ['/something.jpg']
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   182
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   183
RobotTest(11, doc, good, bad, agent="Googlebot")
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   184
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   185
good = ['/something.jpg']
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   186
bad = []
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   187
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   188
RobotTest(12, doc, good, bad, agent="Googlebot-Mobile")
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   189
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   190
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   191
# 13.  Google also got the order wrong in #8.  You need to specify the
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   192
#      URLs from more specific to more general.
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   193
doc = """
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   194
User-agent: Googlebot
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   195
Allow: /folder1/myfile.html
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   196
Disallow: /folder1/
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   197
"""
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   198
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   199
good = ['/folder1/myfile.html']
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   200
bad = ['/folder1/anotherfile.html']
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   201
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   202
RobotTest(13, doc, good, bad, agent="googlebot")
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   203
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   204
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   205
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   206
class TestCase(unittest.TestCase):
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   207
    def runTest(self):
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   208
        test_support.requires('network')
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   209
        # whole site is password-protected.
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   210
        url = 'http://mueblesmoraleda.com'
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   211
        parser = robotparser.RobotFileParser()
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   212
        parser.set_url(url)
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   213
        parser.read()
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   214
        self.assertEqual(parser.can_fetch("*", url+"/robots.txt"), False)
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   215
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   216
def test_main():
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   217
    test_support.run_unittest(tests)
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   218
    TestCase().run()
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   219
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   220
if __name__=='__main__':
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   221
    test_support.verbose = 1
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   222
    test_main()