|
1 """HTTP cookie handling for web clients. |
|
2 |
|
3 This module has (now fairly distant) origins in Gisle Aas' Perl module |
|
4 HTTP::Cookies, from the libwww-perl library. |
|
5 |
|
6 Docstrings, comments and debug strings in this code refer to the |
|
7 attributes of the HTTP cookie system as cookie-attributes, to distinguish |
|
8 them clearly from Python attributes. |
|
9 |
|
10 Class diagram (note that BSDDBCookieJar and the MSIE* classes are not |
|
11 distributed with the Python standard library, but are available from |
|
12 http://wwwsearch.sf.net/): |
|
13 |
|
14 CookieJar____ |
|
15 / \ \ |
|
16 FileCookieJar \ \ |
|
17 / | \ \ \ |
|
18 MozillaCookieJar | LWPCookieJar \ \ |
|
19 | | \ |
|
20 | ---MSIEBase | \ |
|
21 | / | | \ |
|
22 | / MSIEDBCookieJar BSDDBCookieJar |
|
23 |/ |
|
24 MSIECookieJar |
|
25 |
|
26 """ |
|
27 |
|
28 __all__ = ['Cookie', 'CookieJar', 'CookiePolicy', 'DefaultCookiePolicy', |
|
29 'FileCookieJar', 'LWPCookieJar', 'LoadError', 'MozillaCookieJar'] |
|
30 |
|
31 import re, urlparse, copy, time, urllib |
|
32 try: |
|
33 import threading as _threading |
|
34 except ImportError: |
|
35 import dummy_threading as _threading |
|
36 import httplib # only for the default HTTP port |
|
37 from calendar import timegm |
|
38 |
|
39 debug = False # set to True to enable debugging via the logging module |
|
40 logger = None |
|
41 |
|
42 def _debug(*args): |
|
43 if not debug: |
|
44 return |
|
45 global logger |
|
46 if not logger: |
|
47 import logging |
|
48 logger = logging.getLogger("cookielib") |
|
49 return logger.debug(*args) |
|
50 |
|
51 |
|
52 DEFAULT_HTTP_PORT = str(httplib.HTTP_PORT) |
|
53 MISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar " |
|
54 "instance initialised with one)") |
|
55 |
|
56 def _warn_unhandled_exception(): |
|
57 # There are a few catch-all except: statements in this module, for |
|
58 # catching input that's bad in unexpected ways. Warn if any |
|
59 # exceptions are caught there. |
|
60 import warnings, traceback, StringIO |
|
61 f = StringIO.StringIO() |
|
62 traceback.print_exc(None, f) |
|
63 msg = f.getvalue() |
|
64 warnings.warn("cookielib bug!\n%s" % msg, stacklevel=2) |
|
65 |
|
66 |
|
67 # Date/time conversion |
|
68 # ----------------------------------------------------------------------------- |
|
69 |
|
70 EPOCH_YEAR = 1970 |
|
71 def _timegm(tt): |
|
72 year, month, mday, hour, min, sec = tt[:6] |
|
73 if ((year >= EPOCH_YEAR) and (1 <= month <= 12) and (1 <= mday <= 31) and |
|
74 (0 <= hour <= 24) and (0 <= min <= 59) and (0 <= sec <= 61)): |
|
75 return timegm(tt) |
|
76 else: |
|
77 return None |
|
78 |
|
79 DAYS = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] |
|
80 MONTHS = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", |
|
81 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"] |
|
82 MONTHS_LOWER = [] |
|
83 for month in MONTHS: MONTHS_LOWER.append(month.lower()) |
|
84 |
|
85 def time2isoz(t=None): |
|
86 """Return a string representing time in seconds since epoch, t. |
|
87 |
|
88 If the function is called without an argument, it will use the current |
|
89 time. |
|
90 |
|
91 The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ", |
|
92 representing Universal Time (UTC, aka GMT). An example of this format is: |
|
93 |
|
94 1994-11-24 08:49:37Z |
|
95 |
|
96 """ |
|
97 if t is None: t = time.time() |
|
98 year, mon, mday, hour, min, sec = time.gmtime(t)[:6] |
|
99 return "%04d-%02d-%02d %02d:%02d:%02dZ" % ( |
|
100 year, mon, mday, hour, min, sec) |
|
101 |
|
102 def time2netscape(t=None): |
|
103 """Return a string representing time in seconds since epoch, t. |
|
104 |
|
105 If the function is called without an argument, it will use the current |
|
106 time. |
|
107 |
|
108 The format of the returned string is like this: |
|
109 |
|
110 Wed, DD-Mon-YYYY HH:MM:SS GMT |
|
111 |
|
112 """ |
|
113 if t is None: t = time.time() |
|
114 year, mon, mday, hour, min, sec, wday = time.gmtime(t)[:7] |
|
115 return "%s %02d-%s-%04d %02d:%02d:%02d GMT" % ( |
|
116 DAYS[wday], mday, MONTHS[mon-1], year, hour, min, sec) |
|
117 |
|
118 |
|
119 UTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None} |
|
120 |
|
121 TIMEZONE_RE = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$") |
|
122 def offset_from_tz_string(tz): |
|
123 offset = None |
|
124 if tz in UTC_ZONES: |
|
125 offset = 0 |
|
126 else: |
|
127 m = TIMEZONE_RE.search(tz) |
|
128 if m: |
|
129 offset = 3600 * int(m.group(2)) |
|
130 if m.group(3): |
|
131 offset = offset + 60 * int(m.group(3)) |
|
132 if m.group(1) == '-': |
|
133 offset = -offset |
|
134 return offset |
|
135 |
|
136 def _str2time(day, mon, yr, hr, min, sec, tz): |
|
137 # translate month name to number |
|
138 # month numbers start with 1 (January) |
|
139 try: |
|
140 mon = MONTHS_LOWER.index(mon.lower())+1 |
|
141 except ValueError: |
|
142 # maybe it's already a number |
|
143 try: |
|
144 imon = int(mon) |
|
145 except ValueError: |
|
146 return None |
|
147 if 1 <= imon <= 12: |
|
148 mon = imon |
|
149 else: |
|
150 return None |
|
151 |
|
152 # make sure clock elements are defined |
|
153 if hr is None: hr = 0 |
|
154 if min is None: min = 0 |
|
155 if sec is None: sec = 0 |
|
156 |
|
157 yr = int(yr) |
|
158 day = int(day) |
|
159 hr = int(hr) |
|
160 min = int(min) |
|
161 sec = int(sec) |
|
162 |
|
163 if yr < 1000: |
|
164 # find "obvious" year |
|
165 cur_yr = time.localtime(time.time())[0] |
|
166 m = cur_yr % 100 |
|
167 tmp = yr |
|
168 yr = yr + cur_yr - m |
|
169 m = m - tmp |
|
170 if abs(m) > 50: |
|
171 if m > 0: yr = yr + 100 |
|
172 else: yr = yr - 100 |
|
173 |
|
174 # convert UTC time tuple to seconds since epoch (not timezone-adjusted) |
|
175 t = _timegm((yr, mon, day, hr, min, sec, tz)) |
|
176 |
|
177 if t is not None: |
|
178 # adjust time using timezone string, to get absolute time since epoch |
|
179 if tz is None: |
|
180 tz = "UTC" |
|
181 tz = tz.upper() |
|
182 offset = offset_from_tz_string(tz) |
|
183 if offset is None: |
|
184 return None |
|
185 t = t - offset |
|
186 |
|
187 return t |
|
188 |
|
189 STRICT_DATE_RE = re.compile( |
|
190 r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) " |
|
191 "(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$") |
|
192 WEEKDAY_RE = re.compile( |
|
193 r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I) |
|
194 LOOSE_HTTP_DATE_RE = re.compile( |
|
195 r"""^ |
|
196 (\d\d?) # day |
|
197 (?:\s+|[-\/]) |
|
198 (\w+) # month |
|
199 (?:\s+|[-\/]) |
|
200 (\d+) # year |
|
201 (?: |
|
202 (?:\s+|:) # separator before clock |
|
203 (\d\d?):(\d\d) # hour:min |
|
204 (?::(\d\d))? # optional seconds |
|
205 )? # optional clock |
|
206 \s* |
|
207 ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone |
|
208 \s* |
|
209 (?:\(\w+\))? # ASCII representation of timezone in parens. |
|
210 \s*$""", re.X) |
|
211 def http2time(text): |
|
212 """Returns time in seconds since epoch of time represented by a string. |
|
213 |
|
214 Return value is an integer. |
|
215 |
|
216 None is returned if the format of str is unrecognized, the time is outside |
|
217 the representable range, or the timezone string is not recognized. If the |
|
218 string contains no timezone, UTC is assumed. |
|
219 |
|
220 The timezone in the string may be numerical (like "-0800" or "+0100") or a |
|
221 string timezone (like "UTC", "GMT", "BST" or "EST"). Currently, only the |
|
222 timezone strings equivalent to UTC (zero offset) are known to the function. |
|
223 |
|
224 The function loosely parses the following formats: |
|
225 |
|
226 Wed, 09 Feb 1994 22:23:32 GMT -- HTTP format |
|
227 Tuesday, 08-Feb-94 14:15:29 GMT -- old rfc850 HTTP format |
|
228 Tuesday, 08-Feb-1994 14:15:29 GMT -- broken rfc850 HTTP format |
|
229 09 Feb 1994 22:23:32 GMT -- HTTP format (no weekday) |
|
230 08-Feb-94 14:15:29 GMT -- rfc850 format (no weekday) |
|
231 08-Feb-1994 14:15:29 GMT -- broken rfc850 format (no weekday) |
|
232 |
|
233 The parser ignores leading and trailing whitespace. The time may be |
|
234 absent. |
|
235 |
|
236 If the year is given with only 2 digits, the function will select the |
|
237 century that makes the year closest to the current date. |
|
238 |
|
239 """ |
|
240 # fast exit for strictly conforming string |
|
241 m = STRICT_DATE_RE.search(text) |
|
242 if m: |
|
243 g = m.groups() |
|
244 mon = MONTHS_LOWER.index(g[1].lower()) + 1 |
|
245 tt = (int(g[2]), mon, int(g[0]), |
|
246 int(g[3]), int(g[4]), float(g[5])) |
|
247 return _timegm(tt) |
|
248 |
|
249 # No, we need some messy parsing... |
|
250 |
|
251 # clean up |
|
252 text = text.lstrip() |
|
253 text = WEEKDAY_RE.sub("", text, 1) # Useless weekday |
|
254 |
|
255 # tz is time zone specifier string |
|
256 day, mon, yr, hr, min, sec, tz = [None]*7 |
|
257 |
|
258 # loose regexp parse |
|
259 m = LOOSE_HTTP_DATE_RE.search(text) |
|
260 if m is not None: |
|
261 day, mon, yr, hr, min, sec, tz = m.groups() |
|
262 else: |
|
263 return None # bad format |
|
264 |
|
265 return _str2time(day, mon, yr, hr, min, sec, tz) |
|
266 |
|
267 ISO_DATE_RE = re.compile( |
|
268 """^ |
|
269 (\d{4}) # year |
|
270 [-\/]? |
|
271 (\d\d?) # numerical month |
|
272 [-\/]? |
|
273 (\d\d?) # day |
|
274 (?: |
|
275 (?:\s+|[-:Tt]) # separator before clock |
|
276 (\d\d?):?(\d\d) # hour:min |
|
277 (?::?(\d\d(?:\.\d*)?))? # optional seconds (and fractional) |
|
278 )? # optional clock |
|
279 \s* |
|
280 ([-+]?\d\d?:?(:?\d\d)? |
|
281 |Z|z)? # timezone (Z is "zero meridian", i.e. GMT) |
|
282 \s*$""", re.X) |
|
283 def iso2time(text): |
|
284 """ |
|
285 As for http2time, but parses the ISO 8601 formats: |
|
286 |
|
287 1994-02-03 14:15:29 -0100 -- ISO 8601 format |
|
288 1994-02-03 14:15:29 -- zone is optional |
|
289 1994-02-03 -- only date |
|
290 1994-02-03T14:15:29 -- Use T as separator |
|
291 19940203T141529Z -- ISO 8601 compact format |
|
292 19940203 -- only date |
|
293 |
|
294 """ |
|
295 # clean up |
|
296 text = text.lstrip() |
|
297 |
|
298 # tz is time zone specifier string |
|
299 day, mon, yr, hr, min, sec, tz = [None]*7 |
|
300 |
|
301 # loose regexp parse |
|
302 m = ISO_DATE_RE.search(text) |
|
303 if m is not None: |
|
304 # XXX there's an extra bit of the timezone I'm ignoring here: is |
|
305 # this the right thing to do? |
|
306 yr, mon, day, hr, min, sec, tz, _ = m.groups() |
|
307 else: |
|
308 return None # bad format |
|
309 |
|
310 return _str2time(day, mon, yr, hr, min, sec, tz) |
|
311 |
|
312 |
|
313 # Header parsing |
|
314 # ----------------------------------------------------------------------------- |
|
315 |
|
316 def unmatched(match): |
|
317 """Return unmatched part of re.Match object.""" |
|
318 start, end = match.span(0) |
|
319 return match.string[:start]+match.string[end:] |
|
320 |
|
321 HEADER_TOKEN_RE = re.compile(r"^\s*([^=\s;,]+)") |
|
322 HEADER_QUOTED_VALUE_RE = re.compile(r"^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"") |
|
323 HEADER_VALUE_RE = re.compile(r"^\s*=\s*([^\s;,]*)") |
|
324 HEADER_ESCAPE_RE = re.compile(r"\\(.)") |
|
325 def split_header_words(header_values): |
|
326 r"""Parse header values into a list of lists containing key,value pairs. |
|
327 |
|
328 The function knows how to deal with ",", ";" and "=" as well as quoted |
|
329 values after "=". A list of space separated tokens are parsed as if they |
|
330 were separated by ";". |
|
331 |
|
332 If the header_values passed as argument contains multiple values, then they |
|
333 are treated as if they were a single value separated by comma ",". |
|
334 |
|
335 This means that this function is useful for parsing header fields that |
|
336 follow this syntax (BNF as from the HTTP/1.1 specification, but we relax |
|
337 the requirement for tokens). |
|
338 |
|
339 headers = #header |
|
340 header = (token | parameter) *( [";"] (token | parameter)) |
|
341 |
|
342 token = 1*<any CHAR except CTLs or separators> |
|
343 separators = "(" | ")" | "<" | ">" | "@" |
|
344 | "," | ";" | ":" | "\" | <"> |
|
345 | "/" | "[" | "]" | "?" | "=" |
|
346 | "{" | "}" | SP | HT |
|
347 |
|
348 quoted-string = ( <"> *(qdtext | quoted-pair ) <"> ) |
|
349 qdtext = <any TEXT except <">> |
|
350 quoted-pair = "\" CHAR |
|
351 |
|
352 parameter = attribute "=" value |
|
353 attribute = token |
|
354 value = token | quoted-string |
|
355 |
|
356 Each header is represented by a list of key/value pairs. The value for a |
|
357 simple token (not part of a parameter) is None. Syntactically incorrect |
|
358 headers will not necessarily be parsed as you would want. |
|
359 |
|
360 This is easier to describe with some examples: |
|
361 |
|
362 >>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz']) |
|
363 [[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]] |
|
364 >>> split_header_words(['text/html; charset="iso-8859-1"']) |
|
365 [[('text/html', None), ('charset', 'iso-8859-1')]] |
|
366 >>> split_header_words([r'Basic realm="\"foo\bar\""']) |
|
367 [[('Basic', None), ('realm', '"foobar"')]] |
|
368 |
|
369 """ |
|
370 assert not isinstance(header_values, basestring) |
|
371 result = [] |
|
372 for text in header_values: |
|
373 orig_text = text |
|
374 pairs = [] |
|
375 while text: |
|
376 m = HEADER_TOKEN_RE.search(text) |
|
377 if m: |
|
378 text = unmatched(m) |
|
379 name = m.group(1) |
|
380 m = HEADER_QUOTED_VALUE_RE.search(text) |
|
381 if m: # quoted value |
|
382 text = unmatched(m) |
|
383 value = m.group(1) |
|
384 value = HEADER_ESCAPE_RE.sub(r"\1", value) |
|
385 else: |
|
386 m = HEADER_VALUE_RE.search(text) |
|
387 if m: # unquoted value |
|
388 text = unmatched(m) |
|
389 value = m.group(1) |
|
390 value = value.rstrip() |
|
391 else: |
|
392 # no value, a lone token |
|
393 value = None |
|
394 pairs.append((name, value)) |
|
395 elif text.lstrip().startswith(","): |
|
396 # concatenated headers, as per RFC 2616 section 4.2 |
|
397 text = text.lstrip()[1:] |
|
398 if pairs: result.append(pairs) |
|
399 pairs = [] |
|
400 else: |
|
401 # skip junk |
|
402 non_junk, nr_junk_chars = re.subn("^[=\s;]*", "", text) |
|
403 assert nr_junk_chars > 0, ( |
|
404 "split_header_words bug: '%s', '%s', %s" % |
|
405 (orig_text, text, pairs)) |
|
406 text = non_junk |
|
407 if pairs: result.append(pairs) |
|
408 return result |
|
409 |
|
410 HEADER_JOIN_ESCAPE_RE = re.compile(r"([\"\\])") |
|
411 def join_header_words(lists): |
|
412 """Do the inverse (almost) of the conversion done by split_header_words. |
|
413 |
|
414 Takes a list of lists of (key, value) pairs and produces a single header |
|
415 value. Attribute values are quoted if needed. |
|
416 |
|
417 >>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]]) |
|
418 'text/plain; charset="iso-8859/1"' |
|
419 >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]]) |
|
420 'text/plain, charset="iso-8859/1"' |
|
421 |
|
422 """ |
|
423 headers = [] |
|
424 for pairs in lists: |
|
425 attr = [] |
|
426 for k, v in pairs: |
|
427 if v is not None: |
|
428 if not re.search(r"^\w+$", v): |
|
429 v = HEADER_JOIN_ESCAPE_RE.sub(r"\\\1", v) # escape " and \ |
|
430 v = '"%s"' % v |
|
431 k = "%s=%s" % (k, v) |
|
432 attr.append(k) |
|
433 if attr: headers.append("; ".join(attr)) |
|
434 return ", ".join(headers) |
|
435 |
|
436 def parse_ns_headers(ns_headers): |
|
437 """Ad-hoc parser for Netscape protocol cookie-attributes. |
|
438 |
|
439 The old Netscape cookie format for Set-Cookie can for instance contain |
|
440 an unquoted "," in the expires field, so we have to use this ad-hoc |
|
441 parser instead of split_header_words. |
|
442 |
|
443 XXX This may not make the best possible effort to parse all the crap |
|
444 that Netscape Cookie headers contain. Ronald Tschalar's HTTPClient |
|
445 parser is probably better, so could do worse than following that if |
|
446 this ever gives any trouble. |
|
447 |
|
448 Currently, this is also used for parsing RFC 2109 cookies. |
|
449 |
|
450 """ |
|
451 known_attrs = ("expires", "domain", "path", "secure", |
|
452 # RFC 2109 attrs (may turn up in Netscape cookies, too) |
|
453 "port", "max-age") |
|
454 |
|
455 result = [] |
|
456 for ns_header in ns_headers: |
|
457 pairs = [] |
|
458 version_set = False |
|
459 for ii, param in enumerate(re.split(r";\s*", ns_header)): |
|
460 param = param.rstrip() |
|
461 if param == "": continue |
|
462 if "=" not in param: |
|
463 k, v = param, None |
|
464 else: |
|
465 k, v = re.split(r"\s*=\s*", param, 1) |
|
466 k = k.lstrip() |
|
467 if ii != 0: |
|
468 lc = k.lower() |
|
469 if lc in known_attrs: |
|
470 k = lc |
|
471 if k == "version": |
|
472 # This is an RFC 2109 cookie. |
|
473 version_set = True |
|
474 if k == "expires": |
|
475 # convert expires date to seconds since epoch |
|
476 if v.startswith('"'): v = v[1:] |
|
477 if v.endswith('"'): v = v[:-1] |
|
478 v = http2time(v) # None if invalid |
|
479 pairs.append((k, v)) |
|
480 |
|
481 if pairs: |
|
482 if not version_set: |
|
483 pairs.append(("version", "0")) |
|
484 result.append(pairs) |
|
485 |
|
486 return result |
|
487 |
|
488 |
|
489 IPV4_RE = re.compile(r"\.\d+$") |
|
490 def is_HDN(text): |
|
491 """Return True if text is a host domain name.""" |
|
492 # XXX |
|
493 # This may well be wrong. Which RFC is HDN defined in, if any (for |
|
494 # the purposes of RFC 2965)? |
|
495 # For the current implementation, what about IPv6? Remember to look |
|
496 # at other uses of IPV4_RE also, if change this. |
|
497 if IPV4_RE.search(text): |
|
498 return False |
|
499 if text == "": |
|
500 return False |
|
501 if text[0] == "." or text[-1] == ".": |
|
502 return False |
|
503 return True |
|
504 |
|
505 def domain_match(A, B): |
|
506 """Return True if domain A domain-matches domain B, according to RFC 2965. |
|
507 |
|
508 A and B may be host domain names or IP addresses. |
|
509 |
|
510 RFC 2965, section 1: |
|
511 |
|
512 Host names can be specified either as an IP address or a HDN string. |
|
513 Sometimes we compare one host name with another. (Such comparisons SHALL |
|
514 be case-insensitive.) Host A's name domain-matches host B's if |
|
515 |
|
516 * their host name strings string-compare equal; or |
|
517 |
|
518 * A is a HDN string and has the form NB, where N is a non-empty |
|
519 name string, B has the form .B', and B' is a HDN string. (So, |
|
520 x.y.com domain-matches .Y.com but not Y.com.) |
|
521 |
|
522 Note that domain-match is not a commutative operation: a.b.c.com |
|
523 domain-matches .c.com, but not the reverse. |
|
524 |
|
525 """ |
|
526 # Note that, if A or B are IP addresses, the only relevant part of the |
|
527 # definition of the domain-match algorithm is the direct string-compare. |
|
528 A = A.lower() |
|
529 B = B.lower() |
|
530 if A == B: |
|
531 return True |
|
532 if not is_HDN(A): |
|
533 return False |
|
534 i = A.rfind(B) |
|
535 if i == -1 or i == 0: |
|
536 # A does not have form NB, or N is the empty string |
|
537 return False |
|
538 if not B.startswith("."): |
|
539 return False |
|
540 if not is_HDN(B[1:]): |
|
541 return False |
|
542 return True |
|
543 |
|
544 def liberal_is_HDN(text): |
|
545 """Return True if text is a sort-of-like a host domain name. |
|
546 |
|
547 For accepting/blocking domains. |
|
548 |
|
549 """ |
|
550 if IPV4_RE.search(text): |
|
551 return False |
|
552 return True |
|
553 |
|
554 def user_domain_match(A, B): |
|
555 """For blocking/accepting domains. |
|
556 |
|
557 A and B may be host domain names or IP addresses. |
|
558 |
|
559 """ |
|
560 A = A.lower() |
|
561 B = B.lower() |
|
562 if not (liberal_is_HDN(A) and liberal_is_HDN(B)): |
|
563 if A == B: |
|
564 # equal IP addresses |
|
565 return True |
|
566 return False |
|
567 initial_dot = B.startswith(".") |
|
568 if initial_dot and A.endswith(B): |
|
569 return True |
|
570 if not initial_dot and A == B: |
|
571 return True |
|
572 return False |
|
573 |
|
574 cut_port_re = re.compile(r":\d+$") |
|
575 def request_host(request): |
|
576 """Return request-host, as defined by RFC 2965. |
|
577 |
|
578 Variation from RFC: returned value is lowercased, for convenient |
|
579 comparison. |
|
580 |
|
581 """ |
|
582 url = request.get_full_url() |
|
583 host = urlparse.urlparse(url)[1] |
|
584 if host == "": |
|
585 host = request.get_header("Host", "") |
|
586 |
|
587 # remove port, if present |
|
588 host = cut_port_re.sub("", host, 1) |
|
589 return host.lower() |
|
590 |
|
591 def eff_request_host(request): |
|
592 """Return a tuple (request-host, effective request-host name). |
|
593 |
|
594 As defined by RFC 2965, except both are lowercased. |
|
595 |
|
596 """ |
|
597 erhn = req_host = request_host(request) |
|
598 if req_host.find(".") == -1 and not IPV4_RE.search(req_host): |
|
599 erhn = req_host + ".local" |
|
600 return req_host, erhn |
|
601 |
|
602 def request_path(request): |
|
603 """request-URI, as defined by RFC 2965.""" |
|
604 url = request.get_full_url() |
|
605 #scheme, netloc, path, parameters, query, frag = urlparse.urlparse(url) |
|
606 #req_path = escape_path("".join(urlparse.urlparse(url)[2:])) |
|
607 path, parameters, query, frag = urlparse.urlparse(url)[2:] |
|
608 if parameters: |
|
609 path = "%s;%s" % (path, parameters) |
|
610 path = escape_path(path) |
|
611 req_path = urlparse.urlunparse(("", "", path, "", query, frag)) |
|
612 if not req_path.startswith("/"): |
|
613 # fix bad RFC 2396 absoluteURI |
|
614 req_path = "/"+req_path |
|
615 return req_path |
|
616 |
|
617 def request_port(request): |
|
618 host = request.get_host() |
|
619 i = host.find(':') |
|
620 if i >= 0: |
|
621 port = host[i+1:] |
|
622 try: |
|
623 int(port) |
|
624 except ValueError: |
|
625 _debug("nonnumeric port: '%s'", port) |
|
626 return None |
|
627 else: |
|
628 port = DEFAULT_HTTP_PORT |
|
629 return port |
|
630 |
|
631 # Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't |
|
632 # need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738). |
|
633 HTTP_PATH_SAFE = "%/;:@&=+$,!~*'()" |
|
634 ESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])") |
|
635 def uppercase_escaped_char(match): |
|
636 return "%%%s" % match.group(1).upper() |
|
637 def escape_path(path): |
|
638 """Escape any invalid characters in HTTP URL, and uppercase all escapes.""" |
|
639 # There's no knowing what character encoding was used to create URLs |
|
640 # containing %-escapes, but since we have to pick one to escape invalid |
|
641 # path characters, we pick UTF-8, as recommended in the HTML 4.0 |
|
642 # specification: |
|
643 # http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1 |
|
644 # And here, kind of: draft-fielding-uri-rfc2396bis-03 |
|
645 # (And in draft IRI specification: draft-duerst-iri-05) |
|
646 # (And here, for new URI schemes: RFC 2718) |
|
647 if isinstance(path, unicode): |
|
648 path = path.encode("utf-8") |
|
649 path = urllib.quote(path, HTTP_PATH_SAFE) |
|
650 path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path) |
|
651 return path |
|
652 |
|
653 def reach(h): |
|
654 """Return reach of host h, as defined by RFC 2965, section 1. |
|
655 |
|
656 The reach R of a host name H is defined as follows: |
|
657 |
|
658 * If |
|
659 |
|
660 - H is the host domain name of a host; and, |
|
661 |
|
662 - H has the form A.B; and |
|
663 |
|
664 - A has no embedded (that is, interior) dots; and |
|
665 |
|
666 - B has at least one embedded dot, or B is the string "local". |
|
667 then the reach of H is .B. |
|
668 |
|
669 * Otherwise, the reach of H is H. |
|
670 |
|
671 >>> reach("www.acme.com") |
|
672 '.acme.com' |
|
673 >>> reach("acme.com") |
|
674 'acme.com' |
|
675 >>> reach("acme.local") |
|
676 '.local' |
|
677 |
|
678 """ |
|
679 i = h.find(".") |
|
680 if i >= 0: |
|
681 #a = h[:i] # this line is only here to show what a is |
|
682 b = h[i+1:] |
|
683 i = b.find(".") |
|
684 if is_HDN(h) and (i >= 0 or b == "local"): |
|
685 return "."+b |
|
686 return h |
|
687 |
|
688 def is_third_party(request): |
|
689 """ |
|
690 |
|
691 RFC 2965, section 3.3.6: |
|
692 |
|
693 An unverifiable transaction is to a third-party host if its request- |
|
694 host U does not domain-match the reach R of the request-host O in the |
|
695 origin transaction. |
|
696 |
|
697 """ |
|
698 req_host = request_host(request) |
|
699 if not domain_match(req_host, reach(request.get_origin_req_host())): |
|
700 return True |
|
701 else: |
|
702 return False |
|
703 |
|
704 |
|
705 class Cookie: |
|
706 """HTTP Cookie. |
|
707 |
|
708 This class represents both Netscape and RFC 2965 cookies. |
|
709 |
|
710 This is deliberately a very simple class. It just holds attributes. It's |
|
711 possible to construct Cookie instances that don't comply with the cookie |
|
712 standards. CookieJar.make_cookies is the factory function for Cookie |
|
713 objects -- it deals with cookie parsing, supplying defaults, and |
|
714 normalising to the representation used in this class. CookiePolicy is |
|
715 responsible for checking them to see whether they should be accepted from |
|
716 and returned to the server. |
|
717 |
|
718 Note that the port may be present in the headers, but unspecified ("Port" |
|
719 rather than"Port=80", for example); if this is the case, port is None. |
|
720 |
|
721 """ |
|
722 |
|
723 def __init__(self, version, name, value, |
|
724 port, port_specified, |
|
725 domain, domain_specified, domain_initial_dot, |
|
726 path, path_specified, |
|
727 secure, |
|
728 expires, |
|
729 discard, |
|
730 comment, |
|
731 comment_url, |
|
732 rest, |
|
733 rfc2109=False, |
|
734 ): |
|
735 |
|
736 if version is not None: version = int(version) |
|
737 if expires is not None: expires = int(expires) |
|
738 if port is None and port_specified is True: |
|
739 raise ValueError("if port is None, port_specified must be false") |
|
740 |
|
741 self.version = version |
|
742 self.name = name |
|
743 self.value = value |
|
744 self.port = port |
|
745 self.port_specified = port_specified |
|
746 # normalise case, as per RFC 2965 section 3.3.3 |
|
747 self.domain = domain.lower() |
|
748 self.domain_specified = domain_specified |
|
749 # Sigh. We need to know whether the domain given in the |
|
750 # cookie-attribute had an initial dot, in order to follow RFC 2965 |
|
751 # (as clarified in draft errata). Needed for the returned $Domain |
|
752 # value. |
|
753 self.domain_initial_dot = domain_initial_dot |
|
754 self.path = path |
|
755 self.path_specified = path_specified |
|
756 self.secure = secure |
|
757 self.expires = expires |
|
758 self.discard = discard |
|
759 self.comment = comment |
|
760 self.comment_url = comment_url |
|
761 self.rfc2109 = rfc2109 |
|
762 |
|
763 self._rest = copy.copy(rest) |
|
764 |
|
765 def has_nonstandard_attr(self, name): |
|
766 return name in self._rest |
|
767 def get_nonstandard_attr(self, name, default=None): |
|
768 return self._rest.get(name, default) |
|
769 def set_nonstandard_attr(self, name, value): |
|
770 self._rest[name] = value |
|
771 |
|
772 def is_expired(self, now=None): |
|
773 if now is None: now = time.time() |
|
774 if (self.expires is not None) and (self.expires <= now): |
|
775 return True |
|
776 return False |
|
777 |
|
778 def __str__(self): |
|
779 if self.port is None: p = "" |
|
780 else: p = ":"+self.port |
|
781 limit = self.domain + p + self.path |
|
782 if self.value is not None: |
|
783 namevalue = "%s=%s" % (self.name, self.value) |
|
784 else: |
|
785 namevalue = self.name |
|
786 return "<Cookie %s for %s>" % (namevalue, limit) |
|
787 |
|
788 def __repr__(self): |
|
789 args = [] |
|
790 for name in ("version", "name", "value", |
|
791 "port", "port_specified", |
|
792 "domain", "domain_specified", "domain_initial_dot", |
|
793 "path", "path_specified", |
|
794 "secure", "expires", "discard", "comment", "comment_url", |
|
795 ): |
|
796 attr = getattr(self, name) |
|
797 args.append("%s=%s" % (name, repr(attr))) |
|
798 args.append("rest=%s" % repr(self._rest)) |
|
799 args.append("rfc2109=%s" % repr(self.rfc2109)) |
|
800 return "Cookie(%s)" % ", ".join(args) |
|
801 |
|
802 |
|
803 class CookiePolicy: |
|
804 """Defines which cookies get accepted from and returned to server. |
|
805 |
|
806 May also modify cookies, though this is probably a bad idea. |
|
807 |
|
808 The subclass DefaultCookiePolicy defines the standard rules for Netscape |
|
809 and RFC 2965 cookies -- override that if you want a customised policy. |
|
810 |
|
811 """ |
|
812 def set_ok(self, cookie, request): |
|
813 """Return true if (and only if) cookie should be accepted from server. |
|
814 |
|
815 Currently, pre-expired cookies never get this far -- the CookieJar |
|
816 class deletes such cookies itself. |
|
817 |
|
818 """ |
|
819 raise NotImplementedError() |
|
820 |
|
821 def return_ok(self, cookie, request): |
|
822 """Return true if (and only if) cookie should be returned to server.""" |
|
823 raise NotImplementedError() |
|
824 |
|
825 def domain_return_ok(self, domain, request): |
|
826 """Return false if cookies should not be returned, given cookie domain. |
|
827 """ |
|
828 return True |
|
829 |
|
830 def path_return_ok(self, path, request): |
|
831 """Return false if cookies should not be returned, given cookie path. |
|
832 """ |
|
833 return True |
|
834 |
|
835 |
|
836 class DefaultCookiePolicy(CookiePolicy): |
|
837 """Implements the standard rules for accepting and returning cookies.""" |
|
838 |
|
839 DomainStrictNoDots = 1 |
|
840 DomainStrictNonDomain = 2 |
|
841 DomainRFC2965Match = 4 |
|
842 |
|
843 DomainLiberal = 0 |
|
844 DomainStrict = DomainStrictNoDots|DomainStrictNonDomain |
|
845 |
|
846 def __init__(self, |
|
847 blocked_domains=None, allowed_domains=None, |
|
848 netscape=True, rfc2965=False, |
|
849 rfc2109_as_netscape=None, |
|
850 hide_cookie2=False, |
|
851 strict_domain=False, |
|
852 strict_rfc2965_unverifiable=True, |
|
853 strict_ns_unverifiable=False, |
|
854 strict_ns_domain=DomainLiberal, |
|
855 strict_ns_set_initial_dollar=False, |
|
856 strict_ns_set_path=False, |
|
857 ): |
|
858 """Constructor arguments should be passed as keyword arguments only.""" |
|
859 self.netscape = netscape |
|
860 self.rfc2965 = rfc2965 |
|
861 self.rfc2109_as_netscape = rfc2109_as_netscape |
|
862 self.hide_cookie2 = hide_cookie2 |
|
863 self.strict_domain = strict_domain |
|
864 self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable |
|
865 self.strict_ns_unverifiable = strict_ns_unverifiable |
|
866 self.strict_ns_domain = strict_ns_domain |
|
867 self.strict_ns_set_initial_dollar = strict_ns_set_initial_dollar |
|
868 self.strict_ns_set_path = strict_ns_set_path |
|
869 |
|
870 if blocked_domains is not None: |
|
871 self._blocked_domains = tuple(blocked_domains) |
|
872 else: |
|
873 self._blocked_domains = () |
|
874 |
|
875 if allowed_domains is not None: |
|
876 allowed_domains = tuple(allowed_domains) |
|
877 self._allowed_domains = allowed_domains |
|
878 |
|
879 def blocked_domains(self): |
|
880 """Return the sequence of blocked domains (as a tuple).""" |
|
881 return self._blocked_domains |
|
882 def set_blocked_domains(self, blocked_domains): |
|
883 """Set the sequence of blocked domains.""" |
|
884 self._blocked_domains = tuple(blocked_domains) |
|
885 |
|
886 def is_blocked(self, domain): |
|
887 for blocked_domain in self._blocked_domains: |
|
888 if user_domain_match(domain, blocked_domain): |
|
889 return True |
|
890 return False |
|
891 |
|
892 def allowed_domains(self): |
|
893 """Return None, or the sequence of allowed domains (as a tuple).""" |
|
894 return self._allowed_domains |
|
895 def set_allowed_domains(self, allowed_domains): |
|
896 """Set the sequence of allowed domains, or None.""" |
|
897 if allowed_domains is not None: |
|
898 allowed_domains = tuple(allowed_domains) |
|
899 self._allowed_domains = allowed_domains |
|
900 |
|
901 def is_not_allowed(self, domain): |
|
902 if self._allowed_domains is None: |
|
903 return False |
|
904 for allowed_domain in self._allowed_domains: |
|
905 if user_domain_match(domain, allowed_domain): |
|
906 return False |
|
907 return True |
|
908 |
|
909 def set_ok(self, cookie, request): |
|
910 """ |
|
911 If you override .set_ok(), be sure to call this method. If it returns |
|
912 false, so should your subclass (assuming your subclass wants to be more |
|
913 strict about which cookies to accept). |
|
914 |
|
915 """ |
|
916 _debug(" - checking cookie %s=%s", cookie.name, cookie.value) |
|
917 |
|
918 assert cookie.name is not None |
|
919 |
|
920 for n in "version", "verifiability", "name", "path", "domain", "port": |
|
921 fn_name = "set_ok_"+n |
|
922 fn = getattr(self, fn_name) |
|
923 if not fn(cookie, request): |
|
924 return False |
|
925 |
|
926 return True |
|
927 |
|
928 def set_ok_version(self, cookie, request): |
|
929 if cookie.version is None: |
|
930 # Version is always set to 0 by parse_ns_headers if it's a Netscape |
|
931 # cookie, so this must be an invalid RFC 2965 cookie. |
|
932 _debug(" Set-Cookie2 without version attribute (%s=%s)", |
|
933 cookie.name, cookie.value) |
|
934 return False |
|
935 if cookie.version > 0 and not self.rfc2965: |
|
936 _debug(" RFC 2965 cookies are switched off") |
|
937 return False |
|
938 elif cookie.version == 0 and not self.netscape: |
|
939 _debug(" Netscape cookies are switched off") |
|
940 return False |
|
941 return True |
|
942 |
|
943 def set_ok_verifiability(self, cookie, request): |
|
944 if request.is_unverifiable() and is_third_party(request): |
|
945 if cookie.version > 0 and self.strict_rfc2965_unverifiable: |
|
946 _debug(" third-party RFC 2965 cookie during " |
|
947 "unverifiable transaction") |
|
948 return False |
|
949 elif cookie.version == 0 and self.strict_ns_unverifiable: |
|
950 _debug(" third-party Netscape cookie during " |
|
951 "unverifiable transaction") |
|
952 return False |
|
953 return True |
|
954 |
|
955 def set_ok_name(self, cookie, request): |
|
956 # Try and stop servers setting V0 cookies designed to hack other |
|
957 # servers that know both V0 and V1 protocols. |
|
958 if (cookie.version == 0 and self.strict_ns_set_initial_dollar and |
|
959 cookie.name.startswith("$")): |
|
960 _debug(" illegal name (starts with '$'): '%s'", cookie.name) |
|
961 return False |
|
962 return True |
|
963 |
|
964 def set_ok_path(self, cookie, request): |
|
965 if cookie.path_specified: |
|
966 req_path = request_path(request) |
|
967 if ((cookie.version > 0 or |
|
968 (cookie.version == 0 and self.strict_ns_set_path)) and |
|
969 not req_path.startswith(cookie.path)): |
|
970 _debug(" path attribute %s is not a prefix of request " |
|
971 "path %s", cookie.path, req_path) |
|
972 return False |
|
973 return True |
|
974 |
|
975 def set_ok_domain(self, cookie, request): |
|
976 if self.is_blocked(cookie.domain): |
|
977 _debug(" domain %s is in user block-list", cookie.domain) |
|
978 return False |
|
979 if self.is_not_allowed(cookie.domain): |
|
980 _debug(" domain %s is not in user allow-list", cookie.domain) |
|
981 return False |
|
982 if cookie.domain_specified: |
|
983 req_host, erhn = eff_request_host(request) |
|
984 domain = cookie.domain |
|
985 if self.strict_domain and (domain.count(".") >= 2): |
|
986 # XXX This should probably be compared with the Konqueror |
|
987 # (kcookiejar.cpp) and Mozilla implementations, but it's a |
|
988 # losing battle. |
|
989 i = domain.rfind(".") |
|
990 j = domain.rfind(".", 0, i) |
|
991 if j == 0: # domain like .foo.bar |
|
992 tld = domain[i+1:] |
|
993 sld = domain[j+1:i] |
|
994 if sld.lower() in ("co", "ac", "com", "edu", "org", "net", |
|
995 "gov", "mil", "int", "aero", "biz", "cat", "coop", |
|
996 "info", "jobs", "mobi", "museum", "name", "pro", |
|
997 "travel", "eu") and len(tld) == 2: |
|
998 # domain like .co.uk |
|
999 _debug(" country-code second level domain %s", domain) |
|
1000 return False |
|
1001 if domain.startswith("."): |
|
1002 undotted_domain = domain[1:] |
|
1003 else: |
|
1004 undotted_domain = domain |
|
1005 embedded_dots = (undotted_domain.find(".") >= 0) |
|
1006 if not embedded_dots and domain != ".local": |
|
1007 _debug(" non-local domain %s contains no embedded dot", |
|
1008 domain) |
|
1009 return False |
|
1010 if cookie.version == 0: |
|
1011 if (not erhn.endswith(domain) and |
|
1012 (not erhn.startswith(".") and |
|
1013 not ("."+erhn).endswith(domain))): |
|
1014 _debug(" effective request-host %s (even with added " |
|
1015 "initial dot) does not end end with %s", |
|
1016 erhn, domain) |
|
1017 return False |
|
1018 if (cookie.version > 0 or |
|
1019 (self.strict_ns_domain & self.DomainRFC2965Match)): |
|
1020 if not domain_match(erhn, domain): |
|
1021 _debug(" effective request-host %s does not domain-match " |
|
1022 "%s", erhn, domain) |
|
1023 return False |
|
1024 if (cookie.version > 0 or |
|
1025 (self.strict_ns_domain & self.DomainStrictNoDots)): |
|
1026 host_prefix = req_host[:-len(domain)] |
|
1027 if (host_prefix.find(".") >= 0 and |
|
1028 not IPV4_RE.search(req_host)): |
|
1029 _debug(" host prefix %s for domain %s contains a dot", |
|
1030 host_prefix, domain) |
|
1031 return False |
|
1032 return True |
|
1033 |
|
1034 def set_ok_port(self, cookie, request): |
|
1035 if cookie.port_specified: |
|
1036 req_port = request_port(request) |
|
1037 if req_port is None: |
|
1038 req_port = "80" |
|
1039 else: |
|
1040 req_port = str(req_port) |
|
1041 for p in cookie.port.split(","): |
|
1042 try: |
|
1043 int(p) |
|
1044 except ValueError: |
|
1045 _debug(" bad port %s (not numeric)", p) |
|
1046 return False |
|
1047 if p == req_port: |
|
1048 break |
|
1049 else: |
|
1050 _debug(" request port (%s) not found in %s", |
|
1051 req_port, cookie.port) |
|
1052 return False |
|
1053 return True |
|
1054 |
|
1055 def return_ok(self, cookie, request): |
|
1056 """ |
|
1057 If you override .return_ok(), be sure to call this method. If it |
|
1058 returns false, so should your subclass (assuming your subclass wants to |
|
1059 be more strict about which cookies to return). |
|
1060 |
|
1061 """ |
|
1062 # Path has already been checked by .path_return_ok(), and domain |
|
1063 # blocking done by .domain_return_ok(). |
|
1064 _debug(" - checking cookie %s=%s", cookie.name, cookie.value) |
|
1065 |
|
1066 for n in "version", "verifiability", "secure", "expires", "port", "domain": |
|
1067 fn_name = "return_ok_"+n |
|
1068 fn = getattr(self, fn_name) |
|
1069 if not fn(cookie, request): |
|
1070 return False |
|
1071 return True |
|
1072 |
|
1073 def return_ok_version(self, cookie, request): |
|
1074 if cookie.version > 0 and not self.rfc2965: |
|
1075 _debug(" RFC 2965 cookies are switched off") |
|
1076 return False |
|
1077 elif cookie.version == 0 and not self.netscape: |
|
1078 _debug(" Netscape cookies are switched off") |
|
1079 return False |
|
1080 return True |
|
1081 |
|
1082 def return_ok_verifiability(self, cookie, request): |
|
1083 if request.is_unverifiable() and is_third_party(request): |
|
1084 if cookie.version > 0 and self.strict_rfc2965_unverifiable: |
|
1085 _debug(" third-party RFC 2965 cookie during unverifiable " |
|
1086 "transaction") |
|
1087 return False |
|
1088 elif cookie.version == 0 and self.strict_ns_unverifiable: |
|
1089 _debug(" third-party Netscape cookie during unverifiable " |
|
1090 "transaction") |
|
1091 return False |
|
1092 return True |
|
1093 |
|
1094 def return_ok_secure(self, cookie, request): |
|
1095 if cookie.secure and request.get_type() != "https": |
|
1096 _debug(" secure cookie with non-secure request") |
|
1097 return False |
|
1098 return True |
|
1099 |
|
1100 def return_ok_expires(self, cookie, request): |
|
1101 if cookie.is_expired(self._now): |
|
1102 _debug(" cookie expired") |
|
1103 return False |
|
1104 return True |
|
1105 |
|
1106 def return_ok_port(self, cookie, request): |
|
1107 if cookie.port: |
|
1108 req_port = request_port(request) |
|
1109 if req_port is None: |
|
1110 req_port = "80" |
|
1111 for p in cookie.port.split(","): |
|
1112 if p == req_port: |
|
1113 break |
|
1114 else: |
|
1115 _debug(" request port %s does not match cookie port %s", |
|
1116 req_port, cookie.port) |
|
1117 return False |
|
1118 return True |
|
1119 |
|
1120 def return_ok_domain(self, cookie, request): |
|
1121 req_host, erhn = eff_request_host(request) |
|
1122 domain = cookie.domain |
|
1123 |
|
1124 # strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't |
|
1125 if (cookie.version == 0 and |
|
1126 (self.strict_ns_domain & self.DomainStrictNonDomain) and |
|
1127 not cookie.domain_specified and domain != erhn): |
|
1128 _debug(" cookie with unspecified domain does not string-compare " |
|
1129 "equal to request domain") |
|
1130 return False |
|
1131 |
|
1132 if cookie.version > 0 and not domain_match(erhn, domain): |
|
1133 _debug(" effective request-host name %s does not domain-match " |
|
1134 "RFC 2965 cookie domain %s", erhn, domain) |
|
1135 return False |
|
1136 if cookie.version == 0 and not ("."+erhn).endswith(domain): |
|
1137 _debug(" request-host %s does not match Netscape cookie domain " |
|
1138 "%s", req_host, domain) |
|
1139 return False |
|
1140 return True |
|
1141 |
|
1142 def domain_return_ok(self, domain, request): |
|
1143 # Liberal check of. This is here as an optimization to avoid |
|
1144 # having to load lots of MSIE cookie files unless necessary. |
|
1145 req_host, erhn = eff_request_host(request) |
|
1146 if not req_host.startswith("."): |
|
1147 req_host = "."+req_host |
|
1148 if not erhn.startswith("."): |
|
1149 erhn = "."+erhn |
|
1150 if not (req_host.endswith(domain) or erhn.endswith(domain)): |
|
1151 #_debug(" request domain %s does not match cookie domain %s", |
|
1152 # req_host, domain) |
|
1153 return False |
|
1154 |
|
1155 if self.is_blocked(domain): |
|
1156 _debug(" domain %s is in user block-list", domain) |
|
1157 return False |
|
1158 if self.is_not_allowed(domain): |
|
1159 _debug(" domain %s is not in user allow-list", domain) |
|
1160 return False |
|
1161 |
|
1162 return True |
|
1163 |
|
1164 def path_return_ok(self, path, request): |
|
1165 _debug("- checking cookie path=%s", path) |
|
1166 req_path = request_path(request) |
|
1167 if not req_path.startswith(path): |
|
1168 _debug(" %s does not path-match %s", req_path, path) |
|
1169 return False |
|
1170 return True |
|
1171 |
|
1172 |
|
1173 def vals_sorted_by_key(adict): |
|
1174 keys = adict.keys() |
|
1175 keys.sort() |
|
1176 return map(adict.get, keys) |
|
1177 |
|
1178 def deepvalues(mapping): |
|
1179 """Iterates over nested mapping, depth-first, in sorted order by key.""" |
|
1180 values = vals_sorted_by_key(mapping) |
|
1181 for obj in values: |
|
1182 mapping = False |
|
1183 try: |
|
1184 obj.items |
|
1185 except AttributeError: |
|
1186 pass |
|
1187 else: |
|
1188 mapping = True |
|
1189 for subobj in deepvalues(obj): |
|
1190 yield subobj |
|
1191 if not mapping: |
|
1192 yield obj |
|
1193 |
|
1194 |
|
1195 # Used as second parameter to dict.get() method, to distinguish absent |
|
1196 # dict key from one with a None value. |
|
1197 class Absent: pass |
|
1198 |
|
1199 class CookieJar: |
|
1200 """Collection of HTTP cookies. |
|
1201 |
|
1202 You may not need to know about this class: try |
|
1203 urllib2.build_opener(HTTPCookieProcessor).open(url). |
|
1204 |
|
1205 """ |
|
1206 |
|
1207 non_word_re = re.compile(r"\W") |
|
1208 quote_re = re.compile(r"([\"\\])") |
|
1209 strict_domain_re = re.compile(r"\.?[^.]*") |
|
1210 domain_re = re.compile(r"[^.]*") |
|
1211 dots_re = re.compile(r"^\.+") |
|
1212 |
|
1213 magic_re = r"^\#LWP-Cookies-(\d+\.\d+)" |
|
1214 |
|
1215 def __init__(self, policy=None): |
|
1216 if policy is None: |
|
1217 policy = DefaultCookiePolicy() |
|
1218 self._policy = policy |
|
1219 |
|
1220 self._cookies_lock = _threading.RLock() |
|
1221 self._cookies = {} |
|
1222 |
|
1223 def set_policy(self, policy): |
|
1224 self._policy = policy |
|
1225 |
|
1226 def _cookies_for_domain(self, domain, request): |
|
1227 cookies = [] |
|
1228 if not self._policy.domain_return_ok(domain, request): |
|
1229 return [] |
|
1230 _debug("Checking %s for cookies to return", domain) |
|
1231 cookies_by_path = self._cookies[domain] |
|
1232 for path in cookies_by_path.keys(): |
|
1233 if not self._policy.path_return_ok(path, request): |
|
1234 continue |
|
1235 cookies_by_name = cookies_by_path[path] |
|
1236 for cookie in cookies_by_name.values(): |
|
1237 if not self._policy.return_ok(cookie, request): |
|
1238 _debug(" not returning cookie") |
|
1239 continue |
|
1240 _debug(" it's a match") |
|
1241 cookies.append(cookie) |
|
1242 return cookies |
|
1243 |
|
1244 def _cookies_for_request(self, request): |
|
1245 """Return a list of cookies to be returned to server.""" |
|
1246 cookies = [] |
|
1247 for domain in self._cookies.keys(): |
|
1248 cookies.extend(self._cookies_for_domain(domain, request)) |
|
1249 return cookies |
|
1250 |
|
1251 def _cookie_attrs(self, cookies): |
|
1252 """Return a list of cookie-attributes to be returned to server. |
|
1253 |
|
1254 like ['foo="bar"; $Path="/"', ...] |
|
1255 |
|
1256 The $Version attribute is also added when appropriate (currently only |
|
1257 once per request). |
|
1258 |
|
1259 """ |
|
1260 # add cookies in order of most specific (ie. longest) path first |
|
1261 def decreasing_size(a, b): return cmp(len(b.path), len(a.path)) |
|
1262 cookies.sort(decreasing_size) |
|
1263 |
|
1264 version_set = False |
|
1265 |
|
1266 attrs = [] |
|
1267 for cookie in cookies: |
|
1268 # set version of Cookie header |
|
1269 # XXX |
|
1270 # What should it be if multiple matching Set-Cookie headers have |
|
1271 # different versions themselves? |
|
1272 # Answer: there is no answer; was supposed to be settled by |
|
1273 # RFC 2965 errata, but that may never appear... |
|
1274 version = cookie.version |
|
1275 if not version_set: |
|
1276 version_set = True |
|
1277 if version > 0: |
|
1278 attrs.append("$Version=%s" % version) |
|
1279 |
|
1280 # quote cookie value if necessary |
|
1281 # (not for Netscape protocol, which already has any quotes |
|
1282 # intact, due to the poorly-specified Netscape Cookie: syntax) |
|
1283 if ((cookie.value is not None) and |
|
1284 self.non_word_re.search(cookie.value) and version > 0): |
|
1285 value = self.quote_re.sub(r"\\\1", cookie.value) |
|
1286 else: |
|
1287 value = cookie.value |
|
1288 |
|
1289 # add cookie-attributes to be returned in Cookie header |
|
1290 if cookie.value is None: |
|
1291 attrs.append(cookie.name) |
|
1292 else: |
|
1293 attrs.append("%s=%s" % (cookie.name, value)) |
|
1294 if version > 0: |
|
1295 if cookie.path_specified: |
|
1296 attrs.append('$Path="%s"' % cookie.path) |
|
1297 if cookie.domain.startswith("."): |
|
1298 domain = cookie.domain |
|
1299 if (not cookie.domain_initial_dot and |
|
1300 domain.startswith(".")): |
|
1301 domain = domain[1:] |
|
1302 attrs.append('$Domain="%s"' % domain) |
|
1303 if cookie.port is not None: |
|
1304 p = "$Port" |
|
1305 if cookie.port_specified: |
|
1306 p = p + ('="%s"' % cookie.port) |
|
1307 attrs.append(p) |
|
1308 |
|
1309 return attrs |
|
1310 |
|
1311 def add_cookie_header(self, request): |
|
1312 """Add correct Cookie: header to request (urllib2.Request object). |
|
1313 |
|
1314 The Cookie2 header is also added unless policy.hide_cookie2 is true. |
|
1315 |
|
1316 """ |
|
1317 _debug("add_cookie_header") |
|
1318 self._cookies_lock.acquire() |
|
1319 |
|
1320 self._policy._now = self._now = int(time.time()) |
|
1321 |
|
1322 cookies = self._cookies_for_request(request) |
|
1323 |
|
1324 attrs = self._cookie_attrs(cookies) |
|
1325 if attrs: |
|
1326 if not request.has_header("Cookie"): |
|
1327 request.add_unredirected_header( |
|
1328 "Cookie", "; ".join(attrs)) |
|
1329 |
|
1330 # if necessary, advertise that we know RFC 2965 |
|
1331 if (self._policy.rfc2965 and not self._policy.hide_cookie2 and |
|
1332 not request.has_header("Cookie2")): |
|
1333 for cookie in cookies: |
|
1334 if cookie.version != 1: |
|
1335 request.add_unredirected_header("Cookie2", '$Version="1"') |
|
1336 break |
|
1337 |
|
1338 self._cookies_lock.release() |
|
1339 |
|
1340 self.clear_expired_cookies() |
|
1341 |
|
1342 def _normalized_cookie_tuples(self, attrs_set): |
|
1343 """Return list of tuples containing normalised cookie information. |
|
1344 |
|
1345 attrs_set is the list of lists of key,value pairs extracted from |
|
1346 the Set-Cookie or Set-Cookie2 headers. |
|
1347 |
|
1348 Tuples are name, value, standard, rest, where name and value are the |
|
1349 cookie name and value, standard is a dictionary containing the standard |
|
1350 cookie-attributes (discard, secure, version, expires or max-age, |
|
1351 domain, path and port) and rest is a dictionary containing the rest of |
|
1352 the cookie-attributes. |
|
1353 |
|
1354 """ |
|
1355 cookie_tuples = [] |
|
1356 |
|
1357 boolean_attrs = "discard", "secure" |
|
1358 value_attrs = ("version", |
|
1359 "expires", "max-age", |
|
1360 "domain", "path", "port", |
|
1361 "comment", "commenturl") |
|
1362 |
|
1363 for cookie_attrs in attrs_set: |
|
1364 name, value = cookie_attrs[0] |
|
1365 |
|
1366 # Build dictionary of standard cookie-attributes (standard) and |
|
1367 # dictionary of other cookie-attributes (rest). |
|
1368 |
|
1369 # Note: expiry time is normalised to seconds since epoch. V0 |
|
1370 # cookies should have the Expires cookie-attribute, and V1 cookies |
|
1371 # should have Max-Age, but since V1 includes RFC 2109 cookies (and |
|
1372 # since V0 cookies may be a mish-mash of Netscape and RFC 2109), we |
|
1373 # accept either (but prefer Max-Age). |
|
1374 max_age_set = False |
|
1375 |
|
1376 bad_cookie = False |
|
1377 |
|
1378 standard = {} |
|
1379 rest = {} |
|
1380 for k, v in cookie_attrs[1:]: |
|
1381 lc = k.lower() |
|
1382 # don't lose case distinction for unknown fields |
|
1383 if lc in value_attrs or lc in boolean_attrs: |
|
1384 k = lc |
|
1385 if k in boolean_attrs and v is None: |
|
1386 # boolean cookie-attribute is present, but has no value |
|
1387 # (like "discard", rather than "port=80") |
|
1388 v = True |
|
1389 if k in standard: |
|
1390 # only first value is significant |
|
1391 continue |
|
1392 if k == "domain": |
|
1393 if v is None: |
|
1394 _debug(" missing value for domain attribute") |
|
1395 bad_cookie = True |
|
1396 break |
|
1397 # RFC 2965 section 3.3.3 |
|
1398 v = v.lower() |
|
1399 if k == "expires": |
|
1400 if max_age_set: |
|
1401 # Prefer max-age to expires (like Mozilla) |
|
1402 continue |
|
1403 if v is None: |
|
1404 _debug(" missing or invalid value for expires " |
|
1405 "attribute: treating as session cookie") |
|
1406 continue |
|
1407 if k == "max-age": |
|
1408 max_age_set = True |
|
1409 try: |
|
1410 v = int(v) |
|
1411 except ValueError: |
|
1412 _debug(" missing or invalid (non-numeric) value for " |
|
1413 "max-age attribute") |
|
1414 bad_cookie = True |
|
1415 break |
|
1416 # convert RFC 2965 Max-Age to seconds since epoch |
|
1417 # XXX Strictly you're supposed to follow RFC 2616 |
|
1418 # age-calculation rules. Remember that zero Max-Age is a |
|
1419 # is a request to discard (old and new) cookie, though. |
|
1420 k = "expires" |
|
1421 v = self._now + v |
|
1422 if (k in value_attrs) or (k in boolean_attrs): |
|
1423 if (v is None and |
|
1424 k not in ("port", "comment", "commenturl")): |
|
1425 _debug(" missing value for %s attribute" % k) |
|
1426 bad_cookie = True |
|
1427 break |
|
1428 standard[k] = v |
|
1429 else: |
|
1430 rest[k] = v |
|
1431 |
|
1432 if bad_cookie: |
|
1433 continue |
|
1434 |
|
1435 cookie_tuples.append((name, value, standard, rest)) |
|
1436 |
|
1437 return cookie_tuples |
|
1438 |
|
1439 def _cookie_from_cookie_tuple(self, tup, request): |
|
1440 # standard is dict of standard cookie-attributes, rest is dict of the |
|
1441 # rest of them |
|
1442 name, value, standard, rest = tup |
|
1443 |
|
1444 domain = standard.get("domain", Absent) |
|
1445 path = standard.get("path", Absent) |
|
1446 port = standard.get("port", Absent) |
|
1447 expires = standard.get("expires", Absent) |
|
1448 |
|
1449 # set the easy defaults |
|
1450 version = standard.get("version", None) |
|
1451 if version is not None: version = int(version) |
|
1452 secure = standard.get("secure", False) |
|
1453 # (discard is also set if expires is Absent) |
|
1454 discard = standard.get("discard", False) |
|
1455 comment = standard.get("comment", None) |
|
1456 comment_url = standard.get("commenturl", None) |
|
1457 |
|
1458 # set default path |
|
1459 if path is not Absent and path != "": |
|
1460 path_specified = True |
|
1461 path = escape_path(path) |
|
1462 else: |
|
1463 path_specified = False |
|
1464 path = request_path(request) |
|
1465 i = path.rfind("/") |
|
1466 if i != -1: |
|
1467 if version == 0: |
|
1468 # Netscape spec parts company from reality here |
|
1469 path = path[:i] |
|
1470 else: |
|
1471 path = path[:i+1] |
|
1472 if len(path) == 0: path = "/" |
|
1473 |
|
1474 # set default domain |
|
1475 domain_specified = domain is not Absent |
|
1476 # but first we have to remember whether it starts with a dot |
|
1477 domain_initial_dot = False |
|
1478 if domain_specified: |
|
1479 domain_initial_dot = bool(domain.startswith(".")) |
|
1480 if domain is Absent: |
|
1481 req_host, erhn = eff_request_host(request) |
|
1482 domain = erhn |
|
1483 elif not domain.startswith("."): |
|
1484 domain = "."+domain |
|
1485 |
|
1486 # set default port |
|
1487 port_specified = False |
|
1488 if port is not Absent: |
|
1489 if port is None: |
|
1490 # Port attr present, but has no value: default to request port. |
|
1491 # Cookie should then only be sent back on that port. |
|
1492 port = request_port(request) |
|
1493 else: |
|
1494 port_specified = True |
|
1495 port = re.sub(r"\s+", "", port) |
|
1496 else: |
|
1497 # No port attr present. Cookie can be sent back on any port. |
|
1498 port = None |
|
1499 |
|
1500 # set default expires and discard |
|
1501 if expires is Absent: |
|
1502 expires = None |
|
1503 discard = True |
|
1504 elif expires <= self._now: |
|
1505 # Expiry date in past is request to delete cookie. This can't be |
|
1506 # in DefaultCookiePolicy, because can't delete cookies there. |
|
1507 try: |
|
1508 self.clear(domain, path, name) |
|
1509 except KeyError: |
|
1510 pass |
|
1511 _debug("Expiring cookie, domain='%s', path='%s', name='%s'", |
|
1512 domain, path, name) |
|
1513 return None |
|
1514 |
|
1515 return Cookie(version, |
|
1516 name, value, |
|
1517 port, port_specified, |
|
1518 domain, domain_specified, domain_initial_dot, |
|
1519 path, path_specified, |
|
1520 secure, |
|
1521 expires, |
|
1522 discard, |
|
1523 comment, |
|
1524 comment_url, |
|
1525 rest) |
|
1526 |
|
1527 def _cookies_from_attrs_set(self, attrs_set, request): |
|
1528 cookie_tuples = self._normalized_cookie_tuples(attrs_set) |
|
1529 |
|
1530 cookies = [] |
|
1531 for tup in cookie_tuples: |
|
1532 cookie = self._cookie_from_cookie_tuple(tup, request) |
|
1533 if cookie: cookies.append(cookie) |
|
1534 return cookies |
|
1535 |
|
1536 def _process_rfc2109_cookies(self, cookies): |
|
1537 rfc2109_as_ns = getattr(self._policy, 'rfc2109_as_netscape', None) |
|
1538 if rfc2109_as_ns is None: |
|
1539 rfc2109_as_ns = not self._policy.rfc2965 |
|
1540 for cookie in cookies: |
|
1541 if cookie.version == 1: |
|
1542 cookie.rfc2109 = True |
|
1543 if rfc2109_as_ns: |
|
1544 # treat 2109 cookies as Netscape cookies rather than |
|
1545 # as RFC2965 cookies |
|
1546 cookie.version = 0 |
|
1547 |
|
1548 def make_cookies(self, response, request): |
|
1549 """Return sequence of Cookie objects extracted from response object.""" |
|
1550 # get cookie-attributes for RFC 2965 and Netscape protocols |
|
1551 headers = response.info() |
|
1552 rfc2965_hdrs = headers.getheaders("Set-Cookie2") |
|
1553 ns_hdrs = headers.getheaders("Set-Cookie") |
|
1554 |
|
1555 rfc2965 = self._policy.rfc2965 |
|
1556 netscape = self._policy.netscape |
|
1557 |
|
1558 if ((not rfc2965_hdrs and not ns_hdrs) or |
|
1559 (not ns_hdrs and not rfc2965) or |
|
1560 (not rfc2965_hdrs and not netscape) or |
|
1561 (not netscape and not rfc2965)): |
|
1562 return [] # no relevant cookie headers: quick exit |
|
1563 |
|
1564 try: |
|
1565 cookies = self._cookies_from_attrs_set( |
|
1566 split_header_words(rfc2965_hdrs), request) |
|
1567 except Exception: |
|
1568 _warn_unhandled_exception() |
|
1569 cookies = [] |
|
1570 |
|
1571 if ns_hdrs and netscape: |
|
1572 try: |
|
1573 # RFC 2109 and Netscape cookies |
|
1574 ns_cookies = self._cookies_from_attrs_set( |
|
1575 parse_ns_headers(ns_hdrs), request) |
|
1576 except Exception: |
|
1577 _warn_unhandled_exception() |
|
1578 ns_cookies = [] |
|
1579 self._process_rfc2109_cookies(ns_cookies) |
|
1580 |
|
1581 # Look for Netscape cookies (from Set-Cookie headers) that match |
|
1582 # corresponding RFC 2965 cookies (from Set-Cookie2 headers). |
|
1583 # For each match, keep the RFC 2965 cookie and ignore the Netscape |
|
1584 # cookie (RFC 2965 section 9.1). Actually, RFC 2109 cookies are |
|
1585 # bundled in with the Netscape cookies for this purpose, which is |
|
1586 # reasonable behaviour. |
|
1587 if rfc2965: |
|
1588 lookup = {} |
|
1589 for cookie in cookies: |
|
1590 lookup[(cookie.domain, cookie.path, cookie.name)] = None |
|
1591 |
|
1592 def no_matching_rfc2965(ns_cookie, lookup=lookup): |
|
1593 key = ns_cookie.domain, ns_cookie.path, ns_cookie.name |
|
1594 return key not in lookup |
|
1595 ns_cookies = filter(no_matching_rfc2965, ns_cookies) |
|
1596 |
|
1597 if ns_cookies: |
|
1598 cookies.extend(ns_cookies) |
|
1599 |
|
1600 return cookies |
|
1601 |
|
1602 def set_cookie_if_ok(self, cookie, request): |
|
1603 """Set a cookie if policy says it's OK to do so.""" |
|
1604 self._cookies_lock.acquire() |
|
1605 self._policy._now = self._now = int(time.time()) |
|
1606 |
|
1607 if self._policy.set_ok(cookie, request): |
|
1608 self.set_cookie(cookie) |
|
1609 |
|
1610 self._cookies_lock.release() |
|
1611 |
|
1612 def set_cookie(self, cookie): |
|
1613 """Set a cookie, without checking whether or not it should be set.""" |
|
1614 c = self._cookies |
|
1615 self._cookies_lock.acquire() |
|
1616 try: |
|
1617 if cookie.domain not in c: c[cookie.domain] = {} |
|
1618 c2 = c[cookie.domain] |
|
1619 if cookie.path not in c2: c2[cookie.path] = {} |
|
1620 c3 = c2[cookie.path] |
|
1621 c3[cookie.name] = cookie |
|
1622 finally: |
|
1623 self._cookies_lock.release() |
|
1624 |
|
1625 def extract_cookies(self, response, request): |
|
1626 """Extract cookies from response, where allowable given the request.""" |
|
1627 _debug("extract_cookies: %s", response.info()) |
|
1628 self._cookies_lock.acquire() |
|
1629 self._policy._now = self._now = int(time.time()) |
|
1630 |
|
1631 for cookie in self.make_cookies(response, request): |
|
1632 if self._policy.set_ok(cookie, request): |
|
1633 _debug(" setting cookie: %s", cookie) |
|
1634 self.set_cookie(cookie) |
|
1635 self._cookies_lock.release() |
|
1636 |
|
1637 def clear(self, domain=None, path=None, name=None): |
|
1638 """Clear some cookies. |
|
1639 |
|
1640 Invoking this method without arguments will clear all cookies. If |
|
1641 given a single argument, only cookies belonging to that domain will be |
|
1642 removed. If given two arguments, cookies belonging to the specified |
|
1643 path within that domain are removed. If given three arguments, then |
|
1644 the cookie with the specified name, path and domain is removed. |
|
1645 |
|
1646 Raises KeyError if no matching cookie exists. |
|
1647 |
|
1648 """ |
|
1649 if name is not None: |
|
1650 if (domain is None) or (path is None): |
|
1651 raise ValueError( |
|
1652 "domain and path must be given to remove a cookie by name") |
|
1653 del self._cookies[domain][path][name] |
|
1654 elif path is not None: |
|
1655 if domain is None: |
|
1656 raise ValueError( |
|
1657 "domain must be given to remove cookies by path") |
|
1658 del self._cookies[domain][path] |
|
1659 elif domain is not None: |
|
1660 del self._cookies[domain] |
|
1661 else: |
|
1662 self._cookies = {} |
|
1663 |
|
1664 def clear_session_cookies(self): |
|
1665 """Discard all session cookies. |
|
1666 |
|
1667 Note that the .save() method won't save session cookies anyway, unless |
|
1668 you ask otherwise by passing a true ignore_discard argument. |
|
1669 |
|
1670 """ |
|
1671 self._cookies_lock.acquire() |
|
1672 for cookie in self: |
|
1673 if cookie.discard: |
|
1674 self.clear(cookie.domain, cookie.path, cookie.name) |
|
1675 self._cookies_lock.release() |
|
1676 |
|
1677 def clear_expired_cookies(self): |
|
1678 """Discard all expired cookies. |
|
1679 |
|
1680 You probably don't need to call this method: expired cookies are never |
|
1681 sent back to the server (provided you're using DefaultCookiePolicy), |
|
1682 this method is called by CookieJar itself every so often, and the |
|
1683 .save() method won't save expired cookies anyway (unless you ask |
|
1684 otherwise by passing a true ignore_expires argument). |
|
1685 |
|
1686 """ |
|
1687 self._cookies_lock.acquire() |
|
1688 now = time.time() |
|
1689 for cookie in self: |
|
1690 if cookie.is_expired(now): |
|
1691 self.clear(cookie.domain, cookie.path, cookie.name) |
|
1692 self._cookies_lock.release() |
|
1693 |
|
1694 def __iter__(self): |
|
1695 return deepvalues(self._cookies) |
|
1696 |
|
1697 def __len__(self): |
|
1698 """Return number of contained cookies.""" |
|
1699 i = 0 |
|
1700 for cookie in self: i = i + 1 |
|
1701 return i |
|
1702 |
|
1703 def __repr__(self): |
|
1704 r = [] |
|
1705 for cookie in self: r.append(repr(cookie)) |
|
1706 return "<%s[%s]>" % (self.__class__, ", ".join(r)) |
|
1707 |
|
1708 def __str__(self): |
|
1709 r = [] |
|
1710 for cookie in self: r.append(str(cookie)) |
|
1711 return "<%s[%s]>" % (self.__class__, ", ".join(r)) |
|
1712 |
|
1713 |
|
1714 # derives from IOError for backwards-compatibility with Python 2.4.0 |
|
1715 class LoadError(IOError): pass |
|
1716 |
|
1717 class FileCookieJar(CookieJar): |
|
1718 """CookieJar that can be loaded from and saved to a file.""" |
|
1719 |
|
1720 def __init__(self, filename=None, delayload=False, policy=None): |
|
1721 """ |
|
1722 Cookies are NOT loaded from the named file until either the .load() or |
|
1723 .revert() method is called. |
|
1724 |
|
1725 """ |
|
1726 CookieJar.__init__(self, policy) |
|
1727 if filename is not None: |
|
1728 try: |
|
1729 filename+"" |
|
1730 except: |
|
1731 raise ValueError("filename must be string-like") |
|
1732 self.filename = filename |
|
1733 self.delayload = bool(delayload) |
|
1734 |
|
1735 def save(self, filename=None, ignore_discard=False, ignore_expires=False): |
|
1736 """Save cookies to a file.""" |
|
1737 raise NotImplementedError() |
|
1738 |
|
1739 def load(self, filename=None, ignore_discard=False, ignore_expires=False): |
|
1740 """Load cookies from a file.""" |
|
1741 if filename is None: |
|
1742 if self.filename is not None: filename = self.filename |
|
1743 else: raise ValueError(MISSING_FILENAME_TEXT) |
|
1744 |
|
1745 f = open(filename) |
|
1746 try: |
|
1747 self._really_load(f, filename, ignore_discard, ignore_expires) |
|
1748 finally: |
|
1749 f.close() |
|
1750 |
|
1751 def revert(self, filename=None, |
|
1752 ignore_discard=False, ignore_expires=False): |
|
1753 """Clear all cookies and reload cookies from a saved file. |
|
1754 |
|
1755 Raises LoadError (or IOError) if reversion is not successful; the |
|
1756 object's state will not be altered if this happens. |
|
1757 |
|
1758 """ |
|
1759 if filename is None: |
|
1760 if self.filename is not None: filename = self.filename |
|
1761 else: raise ValueError(MISSING_FILENAME_TEXT) |
|
1762 |
|
1763 self._cookies_lock.acquire() |
|
1764 |
|
1765 old_state = copy.deepcopy(self._cookies) |
|
1766 self._cookies = {} |
|
1767 try: |
|
1768 self.load(filename, ignore_discard, ignore_expires) |
|
1769 except (LoadError, IOError): |
|
1770 self._cookies = old_state |
|
1771 raise |
|
1772 |
|
1773 self._cookies_lock.release() |
|
1774 |
|
1775 from _LWPCookieJar import LWPCookieJar, lwp_cookie_str |
|
1776 from _MozillaCookieJar import MozillaCookieJar |