python-2.5.2/win32/Lib/cookielib.py
changeset 0 ae805ac0140d
equal deleted inserted replaced
-1:000000000000 0:ae805ac0140d
       
     1 """HTTP cookie handling for web clients.
       
     2 
       
     3 This module has (now fairly distant) origins in Gisle Aas' Perl module
       
     4 HTTP::Cookies, from the libwww-perl library.
       
     5 
       
     6 Docstrings, comments and debug strings in this code refer to the
       
     7 attributes of the HTTP cookie system as cookie-attributes, to distinguish
       
     8 them clearly from Python attributes.
       
     9 
       
    10 Class diagram (note that BSDDBCookieJar and the MSIE* classes are not
       
    11 distributed with the Python standard library, but are available from
       
    12 http://wwwsearch.sf.net/):
       
    13 
       
    14                         CookieJar____
       
    15                         /     \      \
       
    16             FileCookieJar      \      \
       
    17              /    |   \         \      \
       
    18  MozillaCookieJar | LWPCookieJar \      \
       
    19                   |               |      \
       
    20                   |   ---MSIEBase |       \
       
    21                   |  /      |     |        \
       
    22                   | /   MSIEDBCookieJar BSDDBCookieJar
       
    23                   |/
       
    24                MSIECookieJar
       
    25 
       
    26 """
       
    27 
       
    28 __all__ = ['Cookie', 'CookieJar', 'CookiePolicy', 'DefaultCookiePolicy',
       
    29            'FileCookieJar', 'LWPCookieJar', 'LoadError', 'MozillaCookieJar']
       
    30 
       
    31 import re, urlparse, copy, time, urllib
       
    32 try:
       
    33     import threading as _threading
       
    34 except ImportError:
       
    35     import dummy_threading as _threading
       
    36 import httplib  # only for the default HTTP port
       
    37 from calendar import timegm
       
    38 
       
    39 debug = False   # set to True to enable debugging via the logging module
       
    40 logger = None
       
    41 
       
    42 def _debug(*args):
       
    43     if not debug:
       
    44         return
       
    45     global logger
       
    46     if not logger:
       
    47         import logging
       
    48         logger = logging.getLogger("cookielib")
       
    49     return logger.debug(*args)
       
    50 
       
    51 
       
    52 DEFAULT_HTTP_PORT = str(httplib.HTTP_PORT)
       
    53 MISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar "
       
    54                          "instance initialised with one)")
       
    55 
       
    56 def _warn_unhandled_exception():
       
    57     # There are a few catch-all except: statements in this module, for
       
    58     # catching input that's bad in unexpected ways.  Warn if any
       
    59     # exceptions are caught there.
       
    60     import warnings, traceback, StringIO
       
    61     f = StringIO.StringIO()
       
    62     traceback.print_exc(None, f)
       
    63     msg = f.getvalue()
       
    64     warnings.warn("cookielib bug!\n%s" % msg, stacklevel=2)
       
    65 
       
    66 
       
    67 # Date/time conversion
       
    68 # -----------------------------------------------------------------------------
       
    69 
       
    70 EPOCH_YEAR = 1970
       
    71 def _timegm(tt):
       
    72     year, month, mday, hour, min, sec = tt[:6]
       
    73     if ((year >= EPOCH_YEAR) and (1 <= month <= 12) and (1 <= mday <= 31) and
       
    74         (0 <= hour <= 24) and (0 <= min <= 59) and (0 <= sec <= 61)):
       
    75         return timegm(tt)
       
    76     else:
       
    77         return None
       
    78 
       
    79 DAYS = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
       
    80 MONTHS = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
       
    81           "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
       
    82 MONTHS_LOWER = []
       
    83 for month in MONTHS: MONTHS_LOWER.append(month.lower())
       
    84 
       
    85 def time2isoz(t=None):
       
    86     """Return a string representing time in seconds since epoch, t.
       
    87 
       
    88     If the function is called without an argument, it will use the current
       
    89     time.
       
    90 
       
    91     The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ",
       
    92     representing Universal Time (UTC, aka GMT).  An example of this format is:
       
    93 
       
    94     1994-11-24 08:49:37Z
       
    95 
       
    96     """
       
    97     if t is None: t = time.time()
       
    98     year, mon, mday, hour, min, sec = time.gmtime(t)[:6]
       
    99     return "%04d-%02d-%02d %02d:%02d:%02dZ" % (
       
   100         year, mon, mday, hour, min, sec)
       
   101 
       
   102 def time2netscape(t=None):
       
   103     """Return a string representing time in seconds since epoch, t.
       
   104 
       
   105     If the function is called without an argument, it will use the current
       
   106     time.
       
   107 
       
   108     The format of the returned string is like this:
       
   109 
       
   110     Wed, DD-Mon-YYYY HH:MM:SS GMT
       
   111 
       
   112     """
       
   113     if t is None: t = time.time()
       
   114     year, mon, mday, hour, min, sec, wday = time.gmtime(t)[:7]
       
   115     return "%s %02d-%s-%04d %02d:%02d:%02d GMT" % (
       
   116         DAYS[wday], mday, MONTHS[mon-1], year, hour, min, sec)
       
   117 
       
   118 
       
   119 UTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None}
       
   120 
       
   121 TIMEZONE_RE = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$")
       
   122 def offset_from_tz_string(tz):
       
   123     offset = None
       
   124     if tz in UTC_ZONES:
       
   125         offset = 0
       
   126     else:
       
   127         m = TIMEZONE_RE.search(tz)
       
   128         if m:
       
   129             offset = 3600 * int(m.group(2))
       
   130             if m.group(3):
       
   131                 offset = offset + 60 * int(m.group(3))
       
   132             if m.group(1) == '-':
       
   133                 offset = -offset
       
   134     return offset
       
   135 
       
   136 def _str2time(day, mon, yr, hr, min, sec, tz):
       
   137     # translate month name to number
       
   138     # month numbers start with 1 (January)
       
   139     try:
       
   140         mon = MONTHS_LOWER.index(mon.lower())+1
       
   141     except ValueError:
       
   142         # maybe it's already a number
       
   143         try:
       
   144             imon = int(mon)
       
   145         except ValueError:
       
   146             return None
       
   147         if 1 <= imon <= 12:
       
   148             mon = imon
       
   149         else:
       
   150             return None
       
   151 
       
   152     # make sure clock elements are defined
       
   153     if hr is None: hr = 0
       
   154     if min is None: min = 0
       
   155     if sec is None: sec = 0
       
   156 
       
   157     yr = int(yr)
       
   158     day = int(day)
       
   159     hr = int(hr)
       
   160     min = int(min)
       
   161     sec = int(sec)
       
   162 
       
   163     if yr < 1000:
       
   164         # find "obvious" year
       
   165         cur_yr = time.localtime(time.time())[0]
       
   166         m = cur_yr % 100
       
   167         tmp = yr
       
   168         yr = yr + cur_yr - m
       
   169         m = m - tmp
       
   170         if abs(m) > 50:
       
   171             if m > 0: yr = yr + 100
       
   172             else: yr = yr - 100
       
   173 
       
   174     # convert UTC time tuple to seconds since epoch (not timezone-adjusted)
       
   175     t = _timegm((yr, mon, day, hr, min, sec, tz))
       
   176 
       
   177     if t is not None:
       
   178         # adjust time using timezone string, to get absolute time since epoch
       
   179         if tz is None:
       
   180             tz = "UTC"
       
   181         tz = tz.upper()
       
   182         offset = offset_from_tz_string(tz)
       
   183         if offset is None:
       
   184             return None
       
   185         t = t - offset
       
   186 
       
   187     return t
       
   188 
       
   189 STRICT_DATE_RE = re.compile(
       
   190     r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) "
       
   191     "(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$")
       
   192 WEEKDAY_RE = re.compile(
       
   193     r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I)
       
   194 LOOSE_HTTP_DATE_RE = re.compile(
       
   195     r"""^
       
   196     (\d\d?)            # day
       
   197        (?:\s+|[-\/])
       
   198     (\w+)              # month
       
   199         (?:\s+|[-\/])
       
   200     (\d+)              # year
       
   201     (?:
       
   202           (?:\s+|:)    # separator before clock
       
   203        (\d\d?):(\d\d)  # hour:min
       
   204        (?::(\d\d))?    # optional seconds
       
   205     )?                 # optional clock
       
   206        \s*
       
   207     ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone
       
   208        \s*
       
   209     (?:\(\w+\))?       # ASCII representation of timezone in parens.
       
   210        \s*$""", re.X)
       
   211 def http2time(text):
       
   212     """Returns time in seconds since epoch of time represented by a string.
       
   213 
       
   214     Return value is an integer.
       
   215 
       
   216     None is returned if the format of str is unrecognized, the time is outside
       
   217     the representable range, or the timezone string is not recognized.  If the
       
   218     string contains no timezone, UTC is assumed.
       
   219 
       
   220     The timezone in the string may be numerical (like "-0800" or "+0100") or a
       
   221     string timezone (like "UTC", "GMT", "BST" or "EST").  Currently, only the
       
   222     timezone strings equivalent to UTC (zero offset) are known to the function.
       
   223 
       
   224     The function loosely parses the following formats:
       
   225 
       
   226     Wed, 09 Feb 1994 22:23:32 GMT       -- HTTP format
       
   227     Tuesday, 08-Feb-94 14:15:29 GMT     -- old rfc850 HTTP format
       
   228     Tuesday, 08-Feb-1994 14:15:29 GMT   -- broken rfc850 HTTP format
       
   229     09 Feb 1994 22:23:32 GMT            -- HTTP format (no weekday)
       
   230     08-Feb-94 14:15:29 GMT              -- rfc850 format (no weekday)
       
   231     08-Feb-1994 14:15:29 GMT            -- broken rfc850 format (no weekday)
       
   232 
       
   233     The parser ignores leading and trailing whitespace.  The time may be
       
   234     absent.
       
   235 
       
   236     If the year is given with only 2 digits, the function will select the
       
   237     century that makes the year closest to the current date.
       
   238 
       
   239     """
       
   240     # fast exit for strictly conforming string
       
   241     m = STRICT_DATE_RE.search(text)
       
   242     if m:
       
   243         g = m.groups()
       
   244         mon = MONTHS_LOWER.index(g[1].lower()) + 1
       
   245         tt = (int(g[2]), mon, int(g[0]),
       
   246               int(g[3]), int(g[4]), float(g[5]))
       
   247         return _timegm(tt)
       
   248 
       
   249     # No, we need some messy parsing...
       
   250 
       
   251     # clean up
       
   252     text = text.lstrip()
       
   253     text = WEEKDAY_RE.sub("", text, 1)  # Useless weekday
       
   254 
       
   255     # tz is time zone specifier string
       
   256     day, mon, yr, hr, min, sec, tz = [None]*7
       
   257 
       
   258     # loose regexp parse
       
   259     m = LOOSE_HTTP_DATE_RE.search(text)
       
   260     if m is not None:
       
   261         day, mon, yr, hr, min, sec, tz = m.groups()
       
   262     else:
       
   263         return None  # bad format
       
   264 
       
   265     return _str2time(day, mon, yr, hr, min, sec, tz)
       
   266 
       
   267 ISO_DATE_RE = re.compile(
       
   268     """^
       
   269     (\d{4})              # year
       
   270        [-\/]?
       
   271     (\d\d?)              # numerical month
       
   272        [-\/]?
       
   273     (\d\d?)              # day
       
   274    (?:
       
   275          (?:\s+|[-:Tt])  # separator before clock
       
   276       (\d\d?):?(\d\d)    # hour:min
       
   277       (?::?(\d\d(?:\.\d*)?))?  # optional seconds (and fractional)
       
   278    )?                    # optional clock
       
   279       \s*
       
   280    ([-+]?\d\d?:?(:?\d\d)?
       
   281     |Z|z)?               # timezone  (Z is "zero meridian", i.e. GMT)
       
   282       \s*$""", re.X)
       
   283 def iso2time(text):
       
   284     """
       
   285     As for http2time, but parses the ISO 8601 formats:
       
   286 
       
   287     1994-02-03 14:15:29 -0100    -- ISO 8601 format
       
   288     1994-02-03 14:15:29          -- zone is optional
       
   289     1994-02-03                   -- only date
       
   290     1994-02-03T14:15:29          -- Use T as separator
       
   291     19940203T141529Z             -- ISO 8601 compact format
       
   292     19940203                     -- only date
       
   293 
       
   294     """
       
   295     # clean up
       
   296     text = text.lstrip()
       
   297 
       
   298     # tz is time zone specifier string
       
   299     day, mon, yr, hr, min, sec, tz = [None]*7
       
   300 
       
   301     # loose regexp parse
       
   302     m = ISO_DATE_RE.search(text)
       
   303     if m is not None:
       
   304         # XXX there's an extra bit of the timezone I'm ignoring here: is
       
   305         #   this the right thing to do?
       
   306         yr, mon, day, hr, min, sec, tz, _ = m.groups()
       
   307     else:
       
   308         return None  # bad format
       
   309 
       
   310     return _str2time(day, mon, yr, hr, min, sec, tz)
       
   311 
       
   312 
       
   313 # Header parsing
       
   314 # -----------------------------------------------------------------------------
       
   315 
       
   316 def unmatched(match):
       
   317     """Return unmatched part of re.Match object."""
       
   318     start, end = match.span(0)
       
   319     return match.string[:start]+match.string[end:]
       
   320 
       
   321 HEADER_TOKEN_RE =        re.compile(r"^\s*([^=\s;,]+)")
       
   322 HEADER_QUOTED_VALUE_RE = re.compile(r"^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"")
       
   323 HEADER_VALUE_RE =        re.compile(r"^\s*=\s*([^\s;,]*)")
       
   324 HEADER_ESCAPE_RE = re.compile(r"\\(.)")
       
   325 def split_header_words(header_values):
       
   326     r"""Parse header values into a list of lists containing key,value pairs.
       
   327 
       
   328     The function knows how to deal with ",", ";" and "=" as well as quoted
       
   329     values after "=".  A list of space separated tokens are parsed as if they
       
   330     were separated by ";".
       
   331 
       
   332     If the header_values passed as argument contains multiple values, then they
       
   333     are treated as if they were a single value separated by comma ",".
       
   334 
       
   335     This means that this function is useful for parsing header fields that
       
   336     follow this syntax (BNF as from the HTTP/1.1 specification, but we relax
       
   337     the requirement for tokens).
       
   338 
       
   339       headers           = #header
       
   340       header            = (token | parameter) *( [";"] (token | parameter))
       
   341 
       
   342       token             = 1*<any CHAR except CTLs or separators>
       
   343       separators        = "(" | ")" | "<" | ">" | "@"
       
   344                         | "," | ";" | ":" | "\" | <">
       
   345                         | "/" | "[" | "]" | "?" | "="
       
   346                         | "{" | "}" | SP | HT
       
   347 
       
   348       quoted-string     = ( <"> *(qdtext | quoted-pair ) <"> )
       
   349       qdtext            = <any TEXT except <">>
       
   350       quoted-pair       = "\" CHAR
       
   351 
       
   352       parameter         = attribute "=" value
       
   353       attribute         = token
       
   354       value             = token | quoted-string
       
   355 
       
   356     Each header is represented by a list of key/value pairs.  The value for a
       
   357     simple token (not part of a parameter) is None.  Syntactically incorrect
       
   358     headers will not necessarily be parsed as you would want.
       
   359 
       
   360     This is easier to describe with some examples:
       
   361 
       
   362     >>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz'])
       
   363     [[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]]
       
   364     >>> split_header_words(['text/html; charset="iso-8859-1"'])
       
   365     [[('text/html', None), ('charset', 'iso-8859-1')]]
       
   366     >>> split_header_words([r'Basic realm="\"foo\bar\""'])
       
   367     [[('Basic', None), ('realm', '"foobar"')]]
       
   368 
       
   369     """
       
   370     assert not isinstance(header_values, basestring)
       
   371     result = []
       
   372     for text in header_values:
       
   373         orig_text = text
       
   374         pairs = []
       
   375         while text:
       
   376             m = HEADER_TOKEN_RE.search(text)
       
   377             if m:
       
   378                 text = unmatched(m)
       
   379                 name = m.group(1)
       
   380                 m = HEADER_QUOTED_VALUE_RE.search(text)
       
   381                 if m:  # quoted value
       
   382                     text = unmatched(m)
       
   383                     value = m.group(1)
       
   384                     value = HEADER_ESCAPE_RE.sub(r"\1", value)
       
   385                 else:
       
   386                     m = HEADER_VALUE_RE.search(text)
       
   387                     if m:  # unquoted value
       
   388                         text = unmatched(m)
       
   389                         value = m.group(1)
       
   390                         value = value.rstrip()
       
   391                     else:
       
   392                         # no value, a lone token
       
   393                         value = None
       
   394                 pairs.append((name, value))
       
   395             elif text.lstrip().startswith(","):
       
   396                 # concatenated headers, as per RFC 2616 section 4.2
       
   397                 text = text.lstrip()[1:]
       
   398                 if pairs: result.append(pairs)
       
   399                 pairs = []
       
   400             else:
       
   401                 # skip junk
       
   402                 non_junk, nr_junk_chars = re.subn("^[=\s;]*", "", text)
       
   403                 assert nr_junk_chars > 0, (
       
   404                     "split_header_words bug: '%s', '%s', %s" %
       
   405                     (orig_text, text, pairs))
       
   406                 text = non_junk
       
   407         if pairs: result.append(pairs)
       
   408     return result
       
   409 
       
   410 HEADER_JOIN_ESCAPE_RE = re.compile(r"([\"\\])")
       
   411 def join_header_words(lists):
       
   412     """Do the inverse (almost) of the conversion done by split_header_words.
       
   413 
       
   414     Takes a list of lists of (key, value) pairs and produces a single header
       
   415     value.  Attribute values are quoted if needed.
       
   416 
       
   417     >>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]])
       
   418     'text/plain; charset="iso-8859/1"'
       
   419     >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]])
       
   420     'text/plain, charset="iso-8859/1"'
       
   421 
       
   422     """
       
   423     headers = []
       
   424     for pairs in lists:
       
   425         attr = []
       
   426         for k, v in pairs:
       
   427             if v is not None:
       
   428                 if not re.search(r"^\w+$", v):
       
   429                     v = HEADER_JOIN_ESCAPE_RE.sub(r"\\\1", v)  # escape " and \
       
   430                     v = '"%s"' % v
       
   431                 k = "%s=%s" % (k, v)
       
   432             attr.append(k)
       
   433         if attr: headers.append("; ".join(attr))
       
   434     return ", ".join(headers)
       
   435 
       
   436 def parse_ns_headers(ns_headers):
       
   437     """Ad-hoc parser for Netscape protocol cookie-attributes.
       
   438 
       
   439     The old Netscape cookie format for Set-Cookie can for instance contain
       
   440     an unquoted "," in the expires field, so we have to use this ad-hoc
       
   441     parser instead of split_header_words.
       
   442 
       
   443     XXX This may not make the best possible effort to parse all the crap
       
   444     that Netscape Cookie headers contain.  Ronald Tschalar's HTTPClient
       
   445     parser is probably better, so could do worse than following that if
       
   446     this ever gives any trouble.
       
   447 
       
   448     Currently, this is also used for parsing RFC 2109 cookies.
       
   449 
       
   450     """
       
   451     known_attrs = ("expires", "domain", "path", "secure",
       
   452                    # RFC 2109 attrs (may turn up in Netscape cookies, too)
       
   453                    "port", "max-age")
       
   454 
       
   455     result = []
       
   456     for ns_header in ns_headers:
       
   457         pairs = []
       
   458         version_set = False
       
   459         for ii, param in enumerate(re.split(r";\s*", ns_header)):
       
   460             param = param.rstrip()
       
   461             if param == "": continue
       
   462             if "=" not in param:
       
   463                 k, v = param, None
       
   464             else:
       
   465                 k, v = re.split(r"\s*=\s*", param, 1)
       
   466                 k = k.lstrip()
       
   467             if ii != 0:
       
   468                 lc = k.lower()
       
   469                 if lc in known_attrs:
       
   470                     k = lc
       
   471                 if k == "version":
       
   472                     # This is an RFC 2109 cookie.
       
   473                     version_set = True
       
   474                 if k == "expires":
       
   475                     # convert expires date to seconds since epoch
       
   476                     if v.startswith('"'): v = v[1:]
       
   477                     if v.endswith('"'): v = v[:-1]
       
   478                     v = http2time(v)  # None if invalid
       
   479             pairs.append((k, v))
       
   480 
       
   481         if pairs:
       
   482             if not version_set:
       
   483                 pairs.append(("version", "0"))
       
   484             result.append(pairs)
       
   485 
       
   486     return result
       
   487 
       
   488 
       
   489 IPV4_RE = re.compile(r"\.\d+$")
       
   490 def is_HDN(text):
       
   491     """Return True if text is a host domain name."""
       
   492     # XXX
       
   493     # This may well be wrong.  Which RFC is HDN defined in, if any (for
       
   494     #  the purposes of RFC 2965)?
       
   495     # For the current implementation, what about IPv6?  Remember to look
       
   496     #  at other uses of IPV4_RE also, if change this.
       
   497     if IPV4_RE.search(text):
       
   498         return False
       
   499     if text == "":
       
   500         return False
       
   501     if text[0] == "." or text[-1] == ".":
       
   502         return False
       
   503     return True
       
   504 
       
   505 def domain_match(A, B):
       
   506     """Return True if domain A domain-matches domain B, according to RFC 2965.
       
   507 
       
   508     A and B may be host domain names or IP addresses.
       
   509 
       
   510     RFC 2965, section 1:
       
   511 
       
   512     Host names can be specified either as an IP address or a HDN string.
       
   513     Sometimes we compare one host name with another.  (Such comparisons SHALL
       
   514     be case-insensitive.)  Host A's name domain-matches host B's if
       
   515 
       
   516          *  their host name strings string-compare equal; or
       
   517 
       
   518          * A is a HDN string and has the form NB, where N is a non-empty
       
   519             name string, B has the form .B', and B' is a HDN string.  (So,
       
   520             x.y.com domain-matches .Y.com but not Y.com.)
       
   521 
       
   522     Note that domain-match is not a commutative operation: a.b.c.com
       
   523     domain-matches .c.com, but not the reverse.
       
   524 
       
   525     """
       
   526     # Note that, if A or B are IP addresses, the only relevant part of the
       
   527     # definition of the domain-match algorithm is the direct string-compare.
       
   528     A = A.lower()
       
   529     B = B.lower()
       
   530     if A == B:
       
   531         return True
       
   532     if not is_HDN(A):
       
   533         return False
       
   534     i = A.rfind(B)
       
   535     if i == -1 or i == 0:
       
   536         # A does not have form NB, or N is the empty string
       
   537         return False
       
   538     if not B.startswith("."):
       
   539         return False
       
   540     if not is_HDN(B[1:]):
       
   541         return False
       
   542     return True
       
   543 
       
   544 def liberal_is_HDN(text):
       
   545     """Return True if text is a sort-of-like a host domain name.
       
   546 
       
   547     For accepting/blocking domains.
       
   548 
       
   549     """
       
   550     if IPV4_RE.search(text):
       
   551         return False
       
   552     return True
       
   553 
       
   554 def user_domain_match(A, B):
       
   555     """For blocking/accepting domains.
       
   556 
       
   557     A and B may be host domain names or IP addresses.
       
   558 
       
   559     """
       
   560     A = A.lower()
       
   561     B = B.lower()
       
   562     if not (liberal_is_HDN(A) and liberal_is_HDN(B)):
       
   563         if A == B:
       
   564             # equal IP addresses
       
   565             return True
       
   566         return False
       
   567     initial_dot = B.startswith(".")
       
   568     if initial_dot and A.endswith(B):
       
   569         return True
       
   570     if not initial_dot and A == B:
       
   571         return True
       
   572     return False
       
   573 
       
   574 cut_port_re = re.compile(r":\d+$")
       
   575 def request_host(request):
       
   576     """Return request-host, as defined by RFC 2965.
       
   577 
       
   578     Variation from RFC: returned value is lowercased, for convenient
       
   579     comparison.
       
   580 
       
   581     """
       
   582     url = request.get_full_url()
       
   583     host = urlparse.urlparse(url)[1]
       
   584     if host == "":
       
   585         host = request.get_header("Host", "")
       
   586 
       
   587     # remove port, if present
       
   588     host = cut_port_re.sub("", host, 1)
       
   589     return host.lower()
       
   590 
       
   591 def eff_request_host(request):
       
   592     """Return a tuple (request-host, effective request-host name).
       
   593 
       
   594     As defined by RFC 2965, except both are lowercased.
       
   595 
       
   596     """
       
   597     erhn = req_host = request_host(request)
       
   598     if req_host.find(".") == -1 and not IPV4_RE.search(req_host):
       
   599         erhn = req_host + ".local"
       
   600     return req_host, erhn
       
   601 
       
   602 def request_path(request):
       
   603     """request-URI, as defined by RFC 2965."""
       
   604     url = request.get_full_url()
       
   605     #scheme, netloc, path, parameters, query, frag = urlparse.urlparse(url)
       
   606     #req_path = escape_path("".join(urlparse.urlparse(url)[2:]))
       
   607     path, parameters, query, frag = urlparse.urlparse(url)[2:]
       
   608     if parameters:
       
   609         path = "%s;%s" % (path, parameters)
       
   610     path = escape_path(path)
       
   611     req_path = urlparse.urlunparse(("", "", path, "", query, frag))
       
   612     if not req_path.startswith("/"):
       
   613         # fix bad RFC 2396 absoluteURI
       
   614         req_path = "/"+req_path
       
   615     return req_path
       
   616 
       
   617 def request_port(request):
       
   618     host = request.get_host()
       
   619     i = host.find(':')
       
   620     if i >= 0:
       
   621         port = host[i+1:]
       
   622         try:
       
   623             int(port)
       
   624         except ValueError:
       
   625             _debug("nonnumeric port: '%s'", port)
       
   626             return None
       
   627     else:
       
   628         port = DEFAULT_HTTP_PORT
       
   629     return port
       
   630 
       
   631 # Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't
       
   632 # need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738).
       
   633 HTTP_PATH_SAFE = "%/;:@&=+$,!~*'()"
       
   634 ESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])")
       
   635 def uppercase_escaped_char(match):
       
   636     return "%%%s" % match.group(1).upper()
       
   637 def escape_path(path):
       
   638     """Escape any invalid characters in HTTP URL, and uppercase all escapes."""
       
   639     # There's no knowing what character encoding was used to create URLs
       
   640     # containing %-escapes, but since we have to pick one to escape invalid
       
   641     # path characters, we pick UTF-8, as recommended in the HTML 4.0
       
   642     # specification:
       
   643     # http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1
       
   644     # And here, kind of: draft-fielding-uri-rfc2396bis-03
       
   645     # (And in draft IRI specification: draft-duerst-iri-05)
       
   646     # (And here, for new URI schemes: RFC 2718)
       
   647     if isinstance(path, unicode):
       
   648         path = path.encode("utf-8")
       
   649     path = urllib.quote(path, HTTP_PATH_SAFE)
       
   650     path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path)
       
   651     return path
       
   652 
       
   653 def reach(h):
       
   654     """Return reach of host h, as defined by RFC 2965, section 1.
       
   655 
       
   656     The reach R of a host name H is defined as follows:
       
   657 
       
   658        *  If
       
   659 
       
   660           -  H is the host domain name of a host; and,
       
   661 
       
   662           -  H has the form A.B; and
       
   663 
       
   664           -  A has no embedded (that is, interior) dots; and
       
   665 
       
   666           -  B has at least one embedded dot, or B is the string "local".
       
   667              then the reach of H is .B.
       
   668 
       
   669        *  Otherwise, the reach of H is H.
       
   670 
       
   671     >>> reach("www.acme.com")
       
   672     '.acme.com'
       
   673     >>> reach("acme.com")
       
   674     'acme.com'
       
   675     >>> reach("acme.local")
       
   676     '.local'
       
   677 
       
   678     """
       
   679     i = h.find(".")
       
   680     if i >= 0:
       
   681         #a = h[:i]  # this line is only here to show what a is
       
   682         b = h[i+1:]
       
   683         i = b.find(".")
       
   684         if is_HDN(h) and (i >= 0 or b == "local"):
       
   685             return "."+b
       
   686     return h
       
   687 
       
   688 def is_third_party(request):
       
   689     """
       
   690 
       
   691     RFC 2965, section 3.3.6:
       
   692 
       
   693         An unverifiable transaction is to a third-party host if its request-
       
   694         host U does not domain-match the reach R of the request-host O in the
       
   695         origin transaction.
       
   696 
       
   697     """
       
   698     req_host = request_host(request)
       
   699     if not domain_match(req_host, reach(request.get_origin_req_host())):
       
   700         return True
       
   701     else:
       
   702         return False
       
   703 
       
   704 
       
   705 class Cookie:
       
   706     """HTTP Cookie.
       
   707 
       
   708     This class represents both Netscape and RFC 2965 cookies.
       
   709 
       
   710     This is deliberately a very simple class.  It just holds attributes.  It's
       
   711     possible to construct Cookie instances that don't comply with the cookie
       
   712     standards.  CookieJar.make_cookies is the factory function for Cookie
       
   713     objects -- it deals with cookie parsing, supplying defaults, and
       
   714     normalising to the representation used in this class.  CookiePolicy is
       
   715     responsible for checking them to see whether they should be accepted from
       
   716     and returned to the server.
       
   717 
       
   718     Note that the port may be present in the headers, but unspecified ("Port"
       
   719     rather than"Port=80", for example); if this is the case, port is None.
       
   720 
       
   721     """
       
   722 
       
   723     def __init__(self, version, name, value,
       
   724                  port, port_specified,
       
   725                  domain, domain_specified, domain_initial_dot,
       
   726                  path, path_specified,
       
   727                  secure,
       
   728                  expires,
       
   729                  discard,
       
   730                  comment,
       
   731                  comment_url,
       
   732                  rest,
       
   733                  rfc2109=False,
       
   734                  ):
       
   735 
       
   736         if version is not None: version = int(version)
       
   737         if expires is not None: expires = int(expires)
       
   738         if port is None and port_specified is True:
       
   739             raise ValueError("if port is None, port_specified must be false")
       
   740 
       
   741         self.version = version
       
   742         self.name = name
       
   743         self.value = value
       
   744         self.port = port
       
   745         self.port_specified = port_specified
       
   746         # normalise case, as per RFC 2965 section 3.3.3
       
   747         self.domain = domain.lower()
       
   748         self.domain_specified = domain_specified
       
   749         # Sigh.  We need to know whether the domain given in the
       
   750         # cookie-attribute had an initial dot, in order to follow RFC 2965
       
   751         # (as clarified in draft errata).  Needed for the returned $Domain
       
   752         # value.
       
   753         self.domain_initial_dot = domain_initial_dot
       
   754         self.path = path
       
   755         self.path_specified = path_specified
       
   756         self.secure = secure
       
   757         self.expires = expires
       
   758         self.discard = discard
       
   759         self.comment = comment
       
   760         self.comment_url = comment_url
       
   761         self.rfc2109 = rfc2109
       
   762 
       
   763         self._rest = copy.copy(rest)
       
   764 
       
   765     def has_nonstandard_attr(self, name):
       
   766         return name in self._rest
       
   767     def get_nonstandard_attr(self, name, default=None):
       
   768         return self._rest.get(name, default)
       
   769     def set_nonstandard_attr(self, name, value):
       
   770         self._rest[name] = value
       
   771 
       
   772     def is_expired(self, now=None):
       
   773         if now is None: now = time.time()
       
   774         if (self.expires is not None) and (self.expires <= now):
       
   775             return True
       
   776         return False
       
   777 
       
   778     def __str__(self):
       
   779         if self.port is None: p = ""
       
   780         else: p = ":"+self.port
       
   781         limit = self.domain + p + self.path
       
   782         if self.value is not None:
       
   783             namevalue = "%s=%s" % (self.name, self.value)
       
   784         else:
       
   785             namevalue = self.name
       
   786         return "<Cookie %s for %s>" % (namevalue, limit)
       
   787 
       
   788     def __repr__(self):
       
   789         args = []
       
   790         for name in ("version", "name", "value",
       
   791                      "port", "port_specified",
       
   792                      "domain", "domain_specified", "domain_initial_dot",
       
   793                      "path", "path_specified",
       
   794                      "secure", "expires", "discard", "comment", "comment_url",
       
   795                      ):
       
   796             attr = getattr(self, name)
       
   797             args.append("%s=%s" % (name, repr(attr)))
       
   798         args.append("rest=%s" % repr(self._rest))
       
   799         args.append("rfc2109=%s" % repr(self.rfc2109))
       
   800         return "Cookie(%s)" % ", ".join(args)
       
   801 
       
   802 
       
   803 class CookiePolicy:
       
   804     """Defines which cookies get accepted from and returned to server.
       
   805 
       
   806     May also modify cookies, though this is probably a bad idea.
       
   807 
       
   808     The subclass DefaultCookiePolicy defines the standard rules for Netscape
       
   809     and RFC 2965 cookies -- override that if you want a customised policy.
       
   810 
       
   811     """
       
   812     def set_ok(self, cookie, request):
       
   813         """Return true if (and only if) cookie should be accepted from server.
       
   814 
       
   815         Currently, pre-expired cookies never get this far -- the CookieJar
       
   816         class deletes such cookies itself.
       
   817 
       
   818         """
       
   819         raise NotImplementedError()
       
   820 
       
   821     def return_ok(self, cookie, request):
       
   822         """Return true if (and only if) cookie should be returned to server."""
       
   823         raise NotImplementedError()
       
   824 
       
   825     def domain_return_ok(self, domain, request):
       
   826         """Return false if cookies should not be returned, given cookie domain.
       
   827         """
       
   828         return True
       
   829 
       
   830     def path_return_ok(self, path, request):
       
   831         """Return false if cookies should not be returned, given cookie path.
       
   832         """
       
   833         return True
       
   834 
       
   835 
       
   836 class DefaultCookiePolicy(CookiePolicy):
       
   837     """Implements the standard rules for accepting and returning cookies."""
       
   838 
       
   839     DomainStrictNoDots = 1
       
   840     DomainStrictNonDomain = 2
       
   841     DomainRFC2965Match = 4
       
   842 
       
   843     DomainLiberal = 0
       
   844     DomainStrict = DomainStrictNoDots|DomainStrictNonDomain
       
   845 
       
   846     def __init__(self,
       
   847                  blocked_domains=None, allowed_domains=None,
       
   848                  netscape=True, rfc2965=False,
       
   849                  rfc2109_as_netscape=None,
       
   850                  hide_cookie2=False,
       
   851                  strict_domain=False,
       
   852                  strict_rfc2965_unverifiable=True,
       
   853                  strict_ns_unverifiable=False,
       
   854                  strict_ns_domain=DomainLiberal,
       
   855                  strict_ns_set_initial_dollar=False,
       
   856                  strict_ns_set_path=False,
       
   857                  ):
       
   858         """Constructor arguments should be passed as keyword arguments only."""
       
   859         self.netscape = netscape
       
   860         self.rfc2965 = rfc2965
       
   861         self.rfc2109_as_netscape = rfc2109_as_netscape
       
   862         self.hide_cookie2 = hide_cookie2
       
   863         self.strict_domain = strict_domain
       
   864         self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable
       
   865         self.strict_ns_unverifiable = strict_ns_unverifiable
       
   866         self.strict_ns_domain = strict_ns_domain
       
   867         self.strict_ns_set_initial_dollar = strict_ns_set_initial_dollar
       
   868         self.strict_ns_set_path = strict_ns_set_path
       
   869 
       
   870         if blocked_domains is not None:
       
   871             self._blocked_domains = tuple(blocked_domains)
       
   872         else:
       
   873             self._blocked_domains = ()
       
   874 
       
   875         if allowed_domains is not None:
       
   876             allowed_domains = tuple(allowed_domains)
       
   877         self._allowed_domains = allowed_domains
       
   878 
       
   879     def blocked_domains(self):
       
   880         """Return the sequence of blocked domains (as a tuple)."""
       
   881         return self._blocked_domains
       
   882     def set_blocked_domains(self, blocked_domains):
       
   883         """Set the sequence of blocked domains."""
       
   884         self._blocked_domains = tuple(blocked_domains)
       
   885 
       
   886     def is_blocked(self, domain):
       
   887         for blocked_domain in self._blocked_domains:
       
   888             if user_domain_match(domain, blocked_domain):
       
   889                 return True
       
   890         return False
       
   891 
       
   892     def allowed_domains(self):
       
   893         """Return None, or the sequence of allowed domains (as a tuple)."""
       
   894         return self._allowed_domains
       
   895     def set_allowed_domains(self, allowed_domains):
       
   896         """Set the sequence of allowed domains, or None."""
       
   897         if allowed_domains is not None:
       
   898             allowed_domains = tuple(allowed_domains)
       
   899         self._allowed_domains = allowed_domains
       
   900 
       
   901     def is_not_allowed(self, domain):
       
   902         if self._allowed_domains is None:
       
   903             return False
       
   904         for allowed_domain in self._allowed_domains:
       
   905             if user_domain_match(domain, allowed_domain):
       
   906                 return False
       
   907         return True
       
   908 
       
   909     def set_ok(self, cookie, request):
       
   910         """
       
   911         If you override .set_ok(), be sure to call this method.  If it returns
       
   912         false, so should your subclass (assuming your subclass wants to be more
       
   913         strict about which cookies to accept).
       
   914 
       
   915         """
       
   916         _debug(" - checking cookie %s=%s", cookie.name, cookie.value)
       
   917 
       
   918         assert cookie.name is not None
       
   919 
       
   920         for n in "version", "verifiability", "name", "path", "domain", "port":
       
   921             fn_name = "set_ok_"+n
       
   922             fn = getattr(self, fn_name)
       
   923             if not fn(cookie, request):
       
   924                 return False
       
   925 
       
   926         return True
       
   927 
       
   928     def set_ok_version(self, cookie, request):
       
   929         if cookie.version is None:
       
   930             # Version is always set to 0 by parse_ns_headers if it's a Netscape
       
   931             # cookie, so this must be an invalid RFC 2965 cookie.
       
   932             _debug("   Set-Cookie2 without version attribute (%s=%s)",
       
   933                    cookie.name, cookie.value)
       
   934             return False
       
   935         if cookie.version > 0 and not self.rfc2965:
       
   936             _debug("   RFC 2965 cookies are switched off")
       
   937             return False
       
   938         elif cookie.version == 0 and not self.netscape:
       
   939             _debug("   Netscape cookies are switched off")
       
   940             return False
       
   941         return True
       
   942 
       
   943     def set_ok_verifiability(self, cookie, request):
       
   944         if request.is_unverifiable() and is_third_party(request):
       
   945             if cookie.version > 0 and self.strict_rfc2965_unverifiable:
       
   946                 _debug("   third-party RFC 2965 cookie during "
       
   947                              "unverifiable transaction")
       
   948                 return False
       
   949             elif cookie.version == 0 and self.strict_ns_unverifiable:
       
   950                 _debug("   third-party Netscape cookie during "
       
   951                              "unverifiable transaction")
       
   952                 return False
       
   953         return True
       
   954 
       
   955     def set_ok_name(self, cookie, request):
       
   956         # Try and stop servers setting V0 cookies designed to hack other
       
   957         # servers that know both V0 and V1 protocols.
       
   958         if (cookie.version == 0 and self.strict_ns_set_initial_dollar and
       
   959             cookie.name.startswith("$")):
       
   960             _debug("   illegal name (starts with '$'): '%s'", cookie.name)
       
   961             return False
       
   962         return True
       
   963 
       
   964     def set_ok_path(self, cookie, request):
       
   965         if cookie.path_specified:
       
   966             req_path = request_path(request)
       
   967             if ((cookie.version > 0 or
       
   968                  (cookie.version == 0 and self.strict_ns_set_path)) and
       
   969                 not req_path.startswith(cookie.path)):
       
   970                 _debug("   path attribute %s is not a prefix of request "
       
   971                        "path %s", cookie.path, req_path)
       
   972                 return False
       
   973         return True
       
   974 
       
   975     def set_ok_domain(self, cookie, request):
       
   976         if self.is_blocked(cookie.domain):
       
   977             _debug("   domain %s is in user block-list", cookie.domain)
       
   978             return False
       
   979         if self.is_not_allowed(cookie.domain):
       
   980             _debug("   domain %s is not in user allow-list", cookie.domain)
       
   981             return False
       
   982         if cookie.domain_specified:
       
   983             req_host, erhn = eff_request_host(request)
       
   984             domain = cookie.domain
       
   985             if self.strict_domain and (domain.count(".") >= 2):
       
   986                 # XXX This should probably be compared with the Konqueror
       
   987                 # (kcookiejar.cpp) and Mozilla implementations, but it's a
       
   988                 # losing battle.
       
   989                 i = domain.rfind(".")
       
   990                 j = domain.rfind(".", 0, i)
       
   991                 if j == 0:  # domain like .foo.bar
       
   992                     tld = domain[i+1:]
       
   993                     sld = domain[j+1:i]
       
   994                     if sld.lower() in ("co", "ac", "com", "edu", "org", "net",
       
   995                        "gov", "mil", "int", "aero", "biz", "cat", "coop",
       
   996                        "info", "jobs", "mobi", "museum", "name", "pro",
       
   997                        "travel", "eu") and len(tld) == 2:
       
   998                         # domain like .co.uk
       
   999                         _debug("   country-code second level domain %s", domain)
       
  1000                         return False
       
  1001             if domain.startswith("."):
       
  1002                 undotted_domain = domain[1:]
       
  1003             else:
       
  1004                 undotted_domain = domain
       
  1005             embedded_dots = (undotted_domain.find(".") >= 0)
       
  1006             if not embedded_dots and domain != ".local":
       
  1007                 _debug("   non-local domain %s contains no embedded dot",
       
  1008                        domain)
       
  1009                 return False
       
  1010             if cookie.version == 0:
       
  1011                 if (not erhn.endswith(domain) and
       
  1012                     (not erhn.startswith(".") and
       
  1013                      not ("."+erhn).endswith(domain))):
       
  1014                     _debug("   effective request-host %s (even with added "
       
  1015                            "initial dot) does not end end with %s",
       
  1016                            erhn, domain)
       
  1017                     return False
       
  1018             if (cookie.version > 0 or
       
  1019                 (self.strict_ns_domain & self.DomainRFC2965Match)):
       
  1020                 if not domain_match(erhn, domain):
       
  1021                     _debug("   effective request-host %s does not domain-match "
       
  1022                            "%s", erhn, domain)
       
  1023                     return False
       
  1024             if (cookie.version > 0 or
       
  1025                 (self.strict_ns_domain & self.DomainStrictNoDots)):
       
  1026                 host_prefix = req_host[:-len(domain)]
       
  1027                 if (host_prefix.find(".") >= 0 and
       
  1028                     not IPV4_RE.search(req_host)):
       
  1029                     _debug("   host prefix %s for domain %s contains a dot",
       
  1030                            host_prefix, domain)
       
  1031                     return False
       
  1032         return True
       
  1033 
       
  1034     def set_ok_port(self, cookie, request):
       
  1035         if cookie.port_specified:
       
  1036             req_port = request_port(request)
       
  1037             if req_port is None:
       
  1038                 req_port = "80"
       
  1039             else:
       
  1040                 req_port = str(req_port)
       
  1041             for p in cookie.port.split(","):
       
  1042                 try:
       
  1043                     int(p)
       
  1044                 except ValueError:
       
  1045                     _debug("   bad port %s (not numeric)", p)
       
  1046                     return False
       
  1047                 if p == req_port:
       
  1048                     break
       
  1049             else:
       
  1050                 _debug("   request port (%s) not found in %s",
       
  1051                        req_port, cookie.port)
       
  1052                 return False
       
  1053         return True
       
  1054 
       
  1055     def return_ok(self, cookie, request):
       
  1056         """
       
  1057         If you override .return_ok(), be sure to call this method.  If it
       
  1058         returns false, so should your subclass (assuming your subclass wants to
       
  1059         be more strict about which cookies to return).
       
  1060 
       
  1061         """
       
  1062         # Path has already been checked by .path_return_ok(), and domain
       
  1063         # blocking done by .domain_return_ok().
       
  1064         _debug(" - checking cookie %s=%s", cookie.name, cookie.value)
       
  1065 
       
  1066         for n in "version", "verifiability", "secure", "expires", "port", "domain":
       
  1067             fn_name = "return_ok_"+n
       
  1068             fn = getattr(self, fn_name)
       
  1069             if not fn(cookie, request):
       
  1070                 return False
       
  1071         return True
       
  1072 
       
  1073     def return_ok_version(self, cookie, request):
       
  1074         if cookie.version > 0 and not self.rfc2965:
       
  1075             _debug("   RFC 2965 cookies are switched off")
       
  1076             return False
       
  1077         elif cookie.version == 0 and not self.netscape:
       
  1078             _debug("   Netscape cookies are switched off")
       
  1079             return False
       
  1080         return True
       
  1081 
       
  1082     def return_ok_verifiability(self, cookie, request):
       
  1083         if request.is_unverifiable() and is_third_party(request):
       
  1084             if cookie.version > 0 and self.strict_rfc2965_unverifiable:
       
  1085                 _debug("   third-party RFC 2965 cookie during unverifiable "
       
  1086                        "transaction")
       
  1087                 return False
       
  1088             elif cookie.version == 0 and self.strict_ns_unverifiable:
       
  1089                 _debug("   third-party Netscape cookie during unverifiable "
       
  1090                        "transaction")
       
  1091                 return False
       
  1092         return True
       
  1093 
       
  1094     def return_ok_secure(self, cookie, request):
       
  1095         if cookie.secure and request.get_type() != "https":
       
  1096             _debug("   secure cookie with non-secure request")
       
  1097             return False
       
  1098         return True
       
  1099 
       
  1100     def return_ok_expires(self, cookie, request):
       
  1101         if cookie.is_expired(self._now):
       
  1102             _debug("   cookie expired")
       
  1103             return False
       
  1104         return True
       
  1105 
       
  1106     def return_ok_port(self, cookie, request):
       
  1107         if cookie.port:
       
  1108             req_port = request_port(request)
       
  1109             if req_port is None:
       
  1110                 req_port = "80"
       
  1111             for p in cookie.port.split(","):
       
  1112                 if p == req_port:
       
  1113                     break
       
  1114             else:
       
  1115                 _debug("   request port %s does not match cookie port %s",
       
  1116                        req_port, cookie.port)
       
  1117                 return False
       
  1118         return True
       
  1119 
       
  1120     def return_ok_domain(self, cookie, request):
       
  1121         req_host, erhn = eff_request_host(request)
       
  1122         domain = cookie.domain
       
  1123 
       
  1124         # strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't
       
  1125         if (cookie.version == 0 and
       
  1126             (self.strict_ns_domain & self.DomainStrictNonDomain) and
       
  1127             not cookie.domain_specified and domain != erhn):
       
  1128             _debug("   cookie with unspecified domain does not string-compare "
       
  1129                    "equal to request domain")
       
  1130             return False
       
  1131 
       
  1132         if cookie.version > 0 and not domain_match(erhn, domain):
       
  1133             _debug("   effective request-host name %s does not domain-match "
       
  1134                    "RFC 2965 cookie domain %s", erhn, domain)
       
  1135             return False
       
  1136         if cookie.version == 0 and not ("."+erhn).endswith(domain):
       
  1137             _debug("   request-host %s does not match Netscape cookie domain "
       
  1138                    "%s", req_host, domain)
       
  1139             return False
       
  1140         return True
       
  1141 
       
  1142     def domain_return_ok(self, domain, request):
       
  1143         # Liberal check of.  This is here as an optimization to avoid
       
  1144         # having to load lots of MSIE cookie files unless necessary.
       
  1145         req_host, erhn = eff_request_host(request)
       
  1146         if not req_host.startswith("."):
       
  1147             req_host = "."+req_host
       
  1148         if not erhn.startswith("."):
       
  1149             erhn = "."+erhn
       
  1150         if not (req_host.endswith(domain) or erhn.endswith(domain)):
       
  1151             #_debug("   request domain %s does not match cookie domain %s",
       
  1152             #       req_host, domain)
       
  1153             return False
       
  1154 
       
  1155         if self.is_blocked(domain):
       
  1156             _debug("   domain %s is in user block-list", domain)
       
  1157             return False
       
  1158         if self.is_not_allowed(domain):
       
  1159             _debug("   domain %s is not in user allow-list", domain)
       
  1160             return False
       
  1161 
       
  1162         return True
       
  1163 
       
  1164     def path_return_ok(self, path, request):
       
  1165         _debug("- checking cookie path=%s", path)
       
  1166         req_path = request_path(request)
       
  1167         if not req_path.startswith(path):
       
  1168             _debug("  %s does not path-match %s", req_path, path)
       
  1169             return False
       
  1170         return True
       
  1171 
       
  1172 
       
  1173 def vals_sorted_by_key(adict):
       
  1174     keys = adict.keys()
       
  1175     keys.sort()
       
  1176     return map(adict.get, keys)
       
  1177 
       
  1178 def deepvalues(mapping):
       
  1179     """Iterates over nested mapping, depth-first, in sorted order by key."""
       
  1180     values = vals_sorted_by_key(mapping)
       
  1181     for obj in values:
       
  1182         mapping = False
       
  1183         try:
       
  1184             obj.items
       
  1185         except AttributeError:
       
  1186             pass
       
  1187         else:
       
  1188             mapping = True
       
  1189             for subobj in deepvalues(obj):
       
  1190                 yield subobj
       
  1191         if not mapping:
       
  1192             yield obj
       
  1193 
       
  1194 
       
  1195 # Used as second parameter to dict.get() method, to distinguish absent
       
  1196 # dict key from one with a None value.
       
  1197 class Absent: pass
       
  1198 
       
  1199 class CookieJar:
       
  1200     """Collection of HTTP cookies.
       
  1201 
       
  1202     You may not need to know about this class: try
       
  1203     urllib2.build_opener(HTTPCookieProcessor).open(url).
       
  1204 
       
  1205     """
       
  1206 
       
  1207     non_word_re = re.compile(r"\W")
       
  1208     quote_re = re.compile(r"([\"\\])")
       
  1209     strict_domain_re = re.compile(r"\.?[^.]*")
       
  1210     domain_re = re.compile(r"[^.]*")
       
  1211     dots_re = re.compile(r"^\.+")
       
  1212 
       
  1213     magic_re = r"^\#LWP-Cookies-(\d+\.\d+)"
       
  1214 
       
  1215     def __init__(self, policy=None):
       
  1216         if policy is None:
       
  1217             policy = DefaultCookiePolicy()
       
  1218         self._policy = policy
       
  1219 
       
  1220         self._cookies_lock = _threading.RLock()
       
  1221         self._cookies = {}
       
  1222 
       
  1223     def set_policy(self, policy):
       
  1224         self._policy = policy
       
  1225 
       
  1226     def _cookies_for_domain(self, domain, request):
       
  1227         cookies = []
       
  1228         if not self._policy.domain_return_ok(domain, request):
       
  1229             return []
       
  1230         _debug("Checking %s for cookies to return", domain)
       
  1231         cookies_by_path = self._cookies[domain]
       
  1232         for path in cookies_by_path.keys():
       
  1233             if not self._policy.path_return_ok(path, request):
       
  1234                 continue
       
  1235             cookies_by_name = cookies_by_path[path]
       
  1236             for cookie in cookies_by_name.values():
       
  1237                 if not self._policy.return_ok(cookie, request):
       
  1238                     _debug("   not returning cookie")
       
  1239                     continue
       
  1240                 _debug("   it's a match")
       
  1241                 cookies.append(cookie)
       
  1242         return cookies
       
  1243 
       
  1244     def _cookies_for_request(self, request):
       
  1245         """Return a list of cookies to be returned to server."""
       
  1246         cookies = []
       
  1247         for domain in self._cookies.keys():
       
  1248             cookies.extend(self._cookies_for_domain(domain, request))
       
  1249         return cookies
       
  1250 
       
  1251     def _cookie_attrs(self, cookies):
       
  1252         """Return a list of cookie-attributes to be returned to server.
       
  1253 
       
  1254         like ['foo="bar"; $Path="/"', ...]
       
  1255 
       
  1256         The $Version attribute is also added when appropriate (currently only
       
  1257         once per request).
       
  1258 
       
  1259         """
       
  1260         # add cookies in order of most specific (ie. longest) path first
       
  1261         def decreasing_size(a, b): return cmp(len(b.path), len(a.path))
       
  1262         cookies.sort(decreasing_size)
       
  1263 
       
  1264         version_set = False
       
  1265 
       
  1266         attrs = []
       
  1267         for cookie in cookies:
       
  1268             # set version of Cookie header
       
  1269             # XXX
       
  1270             # What should it be if multiple matching Set-Cookie headers have
       
  1271             #  different versions themselves?
       
  1272             # Answer: there is no answer; was supposed to be settled by
       
  1273             #  RFC 2965 errata, but that may never appear...
       
  1274             version = cookie.version
       
  1275             if not version_set:
       
  1276                 version_set = True
       
  1277                 if version > 0:
       
  1278                     attrs.append("$Version=%s" % version)
       
  1279 
       
  1280             # quote cookie value if necessary
       
  1281             # (not for Netscape protocol, which already has any quotes
       
  1282             #  intact, due to the poorly-specified Netscape Cookie: syntax)
       
  1283             if ((cookie.value is not None) and
       
  1284                 self.non_word_re.search(cookie.value) and version > 0):
       
  1285                 value = self.quote_re.sub(r"\\\1", cookie.value)
       
  1286             else:
       
  1287                 value = cookie.value
       
  1288 
       
  1289             # add cookie-attributes to be returned in Cookie header
       
  1290             if cookie.value is None:
       
  1291                 attrs.append(cookie.name)
       
  1292             else:
       
  1293                 attrs.append("%s=%s" % (cookie.name, value))
       
  1294             if version > 0:
       
  1295                 if cookie.path_specified:
       
  1296                     attrs.append('$Path="%s"' % cookie.path)
       
  1297                 if cookie.domain.startswith("."):
       
  1298                     domain = cookie.domain
       
  1299                     if (not cookie.domain_initial_dot and
       
  1300                         domain.startswith(".")):
       
  1301                         domain = domain[1:]
       
  1302                     attrs.append('$Domain="%s"' % domain)
       
  1303                 if cookie.port is not None:
       
  1304                     p = "$Port"
       
  1305                     if cookie.port_specified:
       
  1306                         p = p + ('="%s"' % cookie.port)
       
  1307                     attrs.append(p)
       
  1308 
       
  1309         return attrs
       
  1310 
       
  1311     def add_cookie_header(self, request):
       
  1312         """Add correct Cookie: header to request (urllib2.Request object).
       
  1313 
       
  1314         The Cookie2 header is also added unless policy.hide_cookie2 is true.
       
  1315 
       
  1316         """
       
  1317         _debug("add_cookie_header")
       
  1318         self._cookies_lock.acquire()
       
  1319 
       
  1320         self._policy._now = self._now = int(time.time())
       
  1321 
       
  1322         cookies = self._cookies_for_request(request)
       
  1323 
       
  1324         attrs = self._cookie_attrs(cookies)
       
  1325         if attrs:
       
  1326             if not request.has_header("Cookie"):
       
  1327                 request.add_unredirected_header(
       
  1328                     "Cookie", "; ".join(attrs))
       
  1329 
       
  1330         # if necessary, advertise that we know RFC 2965
       
  1331         if (self._policy.rfc2965 and not self._policy.hide_cookie2 and
       
  1332             not request.has_header("Cookie2")):
       
  1333             for cookie in cookies:
       
  1334                 if cookie.version != 1:
       
  1335                     request.add_unredirected_header("Cookie2", '$Version="1"')
       
  1336                     break
       
  1337 
       
  1338         self._cookies_lock.release()
       
  1339 
       
  1340         self.clear_expired_cookies()
       
  1341 
       
  1342     def _normalized_cookie_tuples(self, attrs_set):
       
  1343         """Return list of tuples containing normalised cookie information.
       
  1344 
       
  1345         attrs_set is the list of lists of key,value pairs extracted from
       
  1346         the Set-Cookie or Set-Cookie2 headers.
       
  1347 
       
  1348         Tuples are name, value, standard, rest, where name and value are the
       
  1349         cookie name and value, standard is a dictionary containing the standard
       
  1350         cookie-attributes (discard, secure, version, expires or max-age,
       
  1351         domain, path and port) and rest is a dictionary containing the rest of
       
  1352         the cookie-attributes.
       
  1353 
       
  1354         """
       
  1355         cookie_tuples = []
       
  1356 
       
  1357         boolean_attrs = "discard", "secure"
       
  1358         value_attrs = ("version",
       
  1359                        "expires", "max-age",
       
  1360                        "domain", "path", "port",
       
  1361                        "comment", "commenturl")
       
  1362 
       
  1363         for cookie_attrs in attrs_set:
       
  1364             name, value = cookie_attrs[0]
       
  1365 
       
  1366             # Build dictionary of standard cookie-attributes (standard) and
       
  1367             # dictionary of other cookie-attributes (rest).
       
  1368 
       
  1369             # Note: expiry time is normalised to seconds since epoch.  V0
       
  1370             # cookies should have the Expires cookie-attribute, and V1 cookies
       
  1371             # should have Max-Age, but since V1 includes RFC 2109 cookies (and
       
  1372             # since V0 cookies may be a mish-mash of Netscape and RFC 2109), we
       
  1373             # accept either (but prefer Max-Age).
       
  1374             max_age_set = False
       
  1375 
       
  1376             bad_cookie = False
       
  1377 
       
  1378             standard = {}
       
  1379             rest = {}
       
  1380             for k, v in cookie_attrs[1:]:
       
  1381                 lc = k.lower()
       
  1382                 # don't lose case distinction for unknown fields
       
  1383                 if lc in value_attrs or lc in boolean_attrs:
       
  1384                     k = lc
       
  1385                 if k in boolean_attrs and v is None:
       
  1386                     # boolean cookie-attribute is present, but has no value
       
  1387                     # (like "discard", rather than "port=80")
       
  1388                     v = True
       
  1389                 if k in standard:
       
  1390                     # only first value is significant
       
  1391                     continue
       
  1392                 if k == "domain":
       
  1393                     if v is None:
       
  1394                         _debug("   missing value for domain attribute")
       
  1395                         bad_cookie = True
       
  1396                         break
       
  1397                     # RFC 2965 section 3.3.3
       
  1398                     v = v.lower()
       
  1399                 if k == "expires":
       
  1400                     if max_age_set:
       
  1401                         # Prefer max-age to expires (like Mozilla)
       
  1402                         continue
       
  1403                     if v is None:
       
  1404                         _debug("   missing or invalid value for expires "
       
  1405                               "attribute: treating as session cookie")
       
  1406                         continue
       
  1407                 if k == "max-age":
       
  1408                     max_age_set = True
       
  1409                     try:
       
  1410                         v = int(v)
       
  1411                     except ValueError:
       
  1412                         _debug("   missing or invalid (non-numeric) value for "
       
  1413                               "max-age attribute")
       
  1414                         bad_cookie = True
       
  1415                         break
       
  1416                     # convert RFC 2965 Max-Age to seconds since epoch
       
  1417                     # XXX Strictly you're supposed to follow RFC 2616
       
  1418                     #   age-calculation rules.  Remember that zero Max-Age is a
       
  1419                     #   is a request to discard (old and new) cookie, though.
       
  1420                     k = "expires"
       
  1421                     v = self._now + v
       
  1422                 if (k in value_attrs) or (k in boolean_attrs):
       
  1423                     if (v is None and
       
  1424                         k not in ("port", "comment", "commenturl")):
       
  1425                         _debug("   missing value for %s attribute" % k)
       
  1426                         bad_cookie = True
       
  1427                         break
       
  1428                     standard[k] = v
       
  1429                 else:
       
  1430                     rest[k] = v
       
  1431 
       
  1432             if bad_cookie:
       
  1433                 continue
       
  1434 
       
  1435             cookie_tuples.append((name, value, standard, rest))
       
  1436 
       
  1437         return cookie_tuples
       
  1438 
       
  1439     def _cookie_from_cookie_tuple(self, tup, request):
       
  1440         # standard is dict of standard cookie-attributes, rest is dict of the
       
  1441         # rest of them
       
  1442         name, value, standard, rest = tup
       
  1443 
       
  1444         domain = standard.get("domain", Absent)
       
  1445         path = standard.get("path", Absent)
       
  1446         port = standard.get("port", Absent)
       
  1447         expires = standard.get("expires", Absent)
       
  1448 
       
  1449         # set the easy defaults
       
  1450         version = standard.get("version", None)
       
  1451         if version is not None: version = int(version)
       
  1452         secure = standard.get("secure", False)
       
  1453         # (discard is also set if expires is Absent)
       
  1454         discard = standard.get("discard", False)
       
  1455         comment = standard.get("comment", None)
       
  1456         comment_url = standard.get("commenturl", None)
       
  1457 
       
  1458         # set default path
       
  1459         if path is not Absent and path != "":
       
  1460             path_specified = True
       
  1461             path = escape_path(path)
       
  1462         else:
       
  1463             path_specified = False
       
  1464             path = request_path(request)
       
  1465             i = path.rfind("/")
       
  1466             if i != -1:
       
  1467                 if version == 0:
       
  1468                     # Netscape spec parts company from reality here
       
  1469                     path = path[:i]
       
  1470                 else:
       
  1471                     path = path[:i+1]
       
  1472             if len(path) == 0: path = "/"
       
  1473 
       
  1474         # set default domain
       
  1475         domain_specified = domain is not Absent
       
  1476         # but first we have to remember whether it starts with a dot
       
  1477         domain_initial_dot = False
       
  1478         if domain_specified:
       
  1479             domain_initial_dot = bool(domain.startswith("."))
       
  1480         if domain is Absent:
       
  1481             req_host, erhn = eff_request_host(request)
       
  1482             domain = erhn
       
  1483         elif not domain.startswith("."):
       
  1484             domain = "."+domain
       
  1485 
       
  1486         # set default port
       
  1487         port_specified = False
       
  1488         if port is not Absent:
       
  1489             if port is None:
       
  1490                 # Port attr present, but has no value: default to request port.
       
  1491                 # Cookie should then only be sent back on that port.
       
  1492                 port = request_port(request)
       
  1493             else:
       
  1494                 port_specified = True
       
  1495                 port = re.sub(r"\s+", "", port)
       
  1496         else:
       
  1497             # No port attr present.  Cookie can be sent back on any port.
       
  1498             port = None
       
  1499 
       
  1500         # set default expires and discard
       
  1501         if expires is Absent:
       
  1502             expires = None
       
  1503             discard = True
       
  1504         elif expires <= self._now:
       
  1505             # Expiry date in past is request to delete cookie.  This can't be
       
  1506             # in DefaultCookiePolicy, because can't delete cookies there.
       
  1507             try:
       
  1508                 self.clear(domain, path, name)
       
  1509             except KeyError:
       
  1510                 pass
       
  1511             _debug("Expiring cookie, domain='%s', path='%s', name='%s'",
       
  1512                    domain, path, name)
       
  1513             return None
       
  1514 
       
  1515         return Cookie(version,
       
  1516                       name, value,
       
  1517                       port, port_specified,
       
  1518                       domain, domain_specified, domain_initial_dot,
       
  1519                       path, path_specified,
       
  1520                       secure,
       
  1521                       expires,
       
  1522                       discard,
       
  1523                       comment,
       
  1524                       comment_url,
       
  1525                       rest)
       
  1526 
       
  1527     def _cookies_from_attrs_set(self, attrs_set, request):
       
  1528         cookie_tuples = self._normalized_cookie_tuples(attrs_set)
       
  1529 
       
  1530         cookies = []
       
  1531         for tup in cookie_tuples:
       
  1532             cookie = self._cookie_from_cookie_tuple(tup, request)
       
  1533             if cookie: cookies.append(cookie)
       
  1534         return cookies
       
  1535 
       
  1536     def _process_rfc2109_cookies(self, cookies):
       
  1537         rfc2109_as_ns = getattr(self._policy, 'rfc2109_as_netscape', None)
       
  1538         if rfc2109_as_ns is None:
       
  1539             rfc2109_as_ns = not self._policy.rfc2965
       
  1540         for cookie in cookies:
       
  1541             if cookie.version == 1:
       
  1542                 cookie.rfc2109 = True
       
  1543                 if rfc2109_as_ns:
       
  1544                     # treat 2109 cookies as Netscape cookies rather than
       
  1545                     # as RFC2965 cookies
       
  1546                     cookie.version = 0
       
  1547 
       
  1548     def make_cookies(self, response, request):
       
  1549         """Return sequence of Cookie objects extracted from response object."""
       
  1550         # get cookie-attributes for RFC 2965 and Netscape protocols
       
  1551         headers = response.info()
       
  1552         rfc2965_hdrs = headers.getheaders("Set-Cookie2")
       
  1553         ns_hdrs = headers.getheaders("Set-Cookie")
       
  1554 
       
  1555         rfc2965 = self._policy.rfc2965
       
  1556         netscape = self._policy.netscape
       
  1557 
       
  1558         if ((not rfc2965_hdrs and not ns_hdrs) or
       
  1559             (not ns_hdrs and not rfc2965) or
       
  1560             (not rfc2965_hdrs and not netscape) or
       
  1561             (not netscape and not rfc2965)):
       
  1562             return []  # no relevant cookie headers: quick exit
       
  1563 
       
  1564         try:
       
  1565             cookies = self._cookies_from_attrs_set(
       
  1566                 split_header_words(rfc2965_hdrs), request)
       
  1567         except Exception:
       
  1568             _warn_unhandled_exception()
       
  1569             cookies = []
       
  1570 
       
  1571         if ns_hdrs and netscape:
       
  1572             try:
       
  1573                 # RFC 2109 and Netscape cookies
       
  1574                 ns_cookies = self._cookies_from_attrs_set(
       
  1575                     parse_ns_headers(ns_hdrs), request)
       
  1576             except Exception:
       
  1577                 _warn_unhandled_exception()
       
  1578                 ns_cookies = []
       
  1579             self._process_rfc2109_cookies(ns_cookies)
       
  1580 
       
  1581             # Look for Netscape cookies (from Set-Cookie headers) that match
       
  1582             # corresponding RFC 2965 cookies (from Set-Cookie2 headers).
       
  1583             # For each match, keep the RFC 2965 cookie and ignore the Netscape
       
  1584             # cookie (RFC 2965 section 9.1).  Actually, RFC 2109 cookies are
       
  1585             # bundled in with the Netscape cookies for this purpose, which is
       
  1586             # reasonable behaviour.
       
  1587             if rfc2965:
       
  1588                 lookup = {}
       
  1589                 for cookie in cookies:
       
  1590                     lookup[(cookie.domain, cookie.path, cookie.name)] = None
       
  1591 
       
  1592                 def no_matching_rfc2965(ns_cookie, lookup=lookup):
       
  1593                     key = ns_cookie.domain, ns_cookie.path, ns_cookie.name
       
  1594                     return key not in lookup
       
  1595                 ns_cookies = filter(no_matching_rfc2965, ns_cookies)
       
  1596 
       
  1597             if ns_cookies:
       
  1598                 cookies.extend(ns_cookies)
       
  1599 
       
  1600         return cookies
       
  1601 
       
  1602     def set_cookie_if_ok(self, cookie, request):
       
  1603         """Set a cookie if policy says it's OK to do so."""
       
  1604         self._cookies_lock.acquire()
       
  1605         self._policy._now = self._now = int(time.time())
       
  1606 
       
  1607         if self._policy.set_ok(cookie, request):
       
  1608             self.set_cookie(cookie)
       
  1609 
       
  1610         self._cookies_lock.release()
       
  1611 
       
  1612     def set_cookie(self, cookie):
       
  1613         """Set a cookie, without checking whether or not it should be set."""
       
  1614         c = self._cookies
       
  1615         self._cookies_lock.acquire()
       
  1616         try:
       
  1617             if cookie.domain not in c: c[cookie.domain] = {}
       
  1618             c2 = c[cookie.domain]
       
  1619             if cookie.path not in c2: c2[cookie.path] = {}
       
  1620             c3 = c2[cookie.path]
       
  1621             c3[cookie.name] = cookie
       
  1622         finally:
       
  1623             self._cookies_lock.release()
       
  1624 
       
  1625     def extract_cookies(self, response, request):
       
  1626         """Extract cookies from response, where allowable given the request."""
       
  1627         _debug("extract_cookies: %s", response.info())
       
  1628         self._cookies_lock.acquire()
       
  1629         self._policy._now = self._now = int(time.time())
       
  1630 
       
  1631         for cookie in self.make_cookies(response, request):
       
  1632             if self._policy.set_ok(cookie, request):
       
  1633                 _debug(" setting cookie: %s", cookie)
       
  1634                 self.set_cookie(cookie)
       
  1635         self._cookies_lock.release()
       
  1636 
       
  1637     def clear(self, domain=None, path=None, name=None):
       
  1638         """Clear some cookies.
       
  1639 
       
  1640         Invoking this method without arguments will clear all cookies.  If
       
  1641         given a single argument, only cookies belonging to that domain will be
       
  1642         removed.  If given two arguments, cookies belonging to the specified
       
  1643         path within that domain are removed.  If given three arguments, then
       
  1644         the cookie with the specified name, path and domain is removed.
       
  1645 
       
  1646         Raises KeyError if no matching cookie exists.
       
  1647 
       
  1648         """
       
  1649         if name is not None:
       
  1650             if (domain is None) or (path is None):
       
  1651                 raise ValueError(
       
  1652                     "domain and path must be given to remove a cookie by name")
       
  1653             del self._cookies[domain][path][name]
       
  1654         elif path is not None:
       
  1655             if domain is None:
       
  1656                 raise ValueError(
       
  1657                     "domain must be given to remove cookies by path")
       
  1658             del self._cookies[domain][path]
       
  1659         elif domain is not None:
       
  1660             del self._cookies[domain]
       
  1661         else:
       
  1662             self._cookies = {}
       
  1663 
       
  1664     def clear_session_cookies(self):
       
  1665         """Discard all session cookies.
       
  1666 
       
  1667         Note that the .save() method won't save session cookies anyway, unless
       
  1668         you ask otherwise by passing a true ignore_discard argument.
       
  1669 
       
  1670         """
       
  1671         self._cookies_lock.acquire()
       
  1672         for cookie in self:
       
  1673             if cookie.discard:
       
  1674                 self.clear(cookie.domain, cookie.path, cookie.name)
       
  1675         self._cookies_lock.release()
       
  1676 
       
  1677     def clear_expired_cookies(self):
       
  1678         """Discard all expired cookies.
       
  1679 
       
  1680         You probably don't need to call this method: expired cookies are never
       
  1681         sent back to the server (provided you're using DefaultCookiePolicy),
       
  1682         this method is called by CookieJar itself every so often, and the
       
  1683         .save() method won't save expired cookies anyway (unless you ask
       
  1684         otherwise by passing a true ignore_expires argument).
       
  1685 
       
  1686         """
       
  1687         self._cookies_lock.acquire()
       
  1688         now = time.time()
       
  1689         for cookie in self:
       
  1690             if cookie.is_expired(now):
       
  1691                 self.clear(cookie.domain, cookie.path, cookie.name)
       
  1692         self._cookies_lock.release()
       
  1693 
       
  1694     def __iter__(self):
       
  1695         return deepvalues(self._cookies)
       
  1696 
       
  1697     def __len__(self):
       
  1698         """Return number of contained cookies."""
       
  1699         i = 0
       
  1700         for cookie in self: i = i + 1
       
  1701         return i
       
  1702 
       
  1703     def __repr__(self):
       
  1704         r = []
       
  1705         for cookie in self: r.append(repr(cookie))
       
  1706         return "<%s[%s]>" % (self.__class__, ", ".join(r))
       
  1707 
       
  1708     def __str__(self):
       
  1709         r = []
       
  1710         for cookie in self: r.append(str(cookie))
       
  1711         return "<%s[%s]>" % (self.__class__, ", ".join(r))
       
  1712 
       
  1713 
       
  1714 # derives from IOError for backwards-compatibility with Python 2.4.0
       
  1715 class LoadError(IOError): pass
       
  1716 
       
  1717 class FileCookieJar(CookieJar):
       
  1718     """CookieJar that can be loaded from and saved to a file."""
       
  1719 
       
  1720     def __init__(self, filename=None, delayload=False, policy=None):
       
  1721         """
       
  1722         Cookies are NOT loaded from the named file until either the .load() or
       
  1723         .revert() method is called.
       
  1724 
       
  1725         """
       
  1726         CookieJar.__init__(self, policy)
       
  1727         if filename is not None:
       
  1728             try:
       
  1729                 filename+""
       
  1730             except:
       
  1731                 raise ValueError("filename must be string-like")
       
  1732         self.filename = filename
       
  1733         self.delayload = bool(delayload)
       
  1734 
       
  1735     def save(self, filename=None, ignore_discard=False, ignore_expires=False):
       
  1736         """Save cookies to a file."""
       
  1737         raise NotImplementedError()
       
  1738 
       
  1739     def load(self, filename=None, ignore_discard=False, ignore_expires=False):
       
  1740         """Load cookies from a file."""
       
  1741         if filename is None:
       
  1742             if self.filename is not None: filename = self.filename
       
  1743             else: raise ValueError(MISSING_FILENAME_TEXT)
       
  1744 
       
  1745         f = open(filename)
       
  1746         try:
       
  1747             self._really_load(f, filename, ignore_discard, ignore_expires)
       
  1748         finally:
       
  1749             f.close()
       
  1750 
       
  1751     def revert(self, filename=None,
       
  1752                ignore_discard=False, ignore_expires=False):
       
  1753         """Clear all cookies and reload cookies from a saved file.
       
  1754 
       
  1755         Raises LoadError (or IOError) if reversion is not successful; the
       
  1756         object's state will not be altered if this happens.
       
  1757 
       
  1758         """
       
  1759         if filename is None:
       
  1760             if self.filename is not None: filename = self.filename
       
  1761             else: raise ValueError(MISSING_FILENAME_TEXT)
       
  1762 
       
  1763         self._cookies_lock.acquire()
       
  1764 
       
  1765         old_state = copy.deepcopy(self._cookies)
       
  1766         self._cookies = {}
       
  1767         try:
       
  1768             self.load(filename, ignore_discard, ignore_expires)
       
  1769         except (LoadError, IOError):
       
  1770             self._cookies = old_state
       
  1771             raise
       
  1772 
       
  1773         self._cookies_lock.release()
       
  1774 
       
  1775 from _LWPCookieJar import LWPCookieJar, lwp_cookie_str
       
  1776 from _MozillaCookieJar import MozillaCookieJar