|
1 # Copyright (C) 2001-2006 Python Software Foundation |
|
2 # Author: Barry Warsaw |
|
3 # Contact: email-sig@python.org |
|
4 |
|
5 """Miscellaneous utilities.""" |
|
6 |
|
7 __all__ = [ |
|
8 'collapse_rfc2231_value', |
|
9 'decode_params', |
|
10 'decode_rfc2231', |
|
11 'encode_rfc2231', |
|
12 'formataddr', |
|
13 'formatdate', |
|
14 'getaddresses', |
|
15 'make_msgid', |
|
16 'parseaddr', |
|
17 'parsedate', |
|
18 'parsedate_tz', |
|
19 'unquote', |
|
20 ] |
|
21 |
|
22 import os |
|
23 import re |
|
24 import time |
|
25 import base64 |
|
26 import random |
|
27 import socket |
|
28 import urllib |
|
29 import warnings |
|
30 from cStringIO import StringIO |
|
31 |
|
32 from email._parseaddr import quote |
|
33 from email._parseaddr import AddressList as _AddressList |
|
34 from email._parseaddr import mktime_tz |
|
35 |
|
36 # We need wormarounds for bugs in these methods in older Pythons (see below) |
|
37 from email._parseaddr import parsedate as _parsedate |
|
38 from email._parseaddr import parsedate_tz as _parsedate_tz |
|
39 |
|
40 from quopri import decodestring as _qdecode |
|
41 |
|
42 # Intrapackage imports |
|
43 from email.encoders import _bencode, _qencode |
|
44 |
|
45 COMMASPACE = ', ' |
|
46 EMPTYSTRING = '' |
|
47 UEMPTYSTRING = u'' |
|
48 CRLF = '\r\n' |
|
49 TICK = "'" |
|
50 |
|
51 specialsre = re.compile(r'[][\\()<>@,:;".]') |
|
52 escapesre = re.compile(r'[][\\()"]') |
|
53 |
|
54 |
|
55 |
|
56 # Helpers |
|
57 |
|
58 def _identity(s): |
|
59 return s |
|
60 |
|
61 |
|
62 def _bdecode(s): |
|
63 # We can't quite use base64.encodestring() since it tacks on a "courtesy |
|
64 # newline". Blech! |
|
65 if not s: |
|
66 return s |
|
67 value = base64.decodestring(s) |
|
68 if not s.endswith('\n') and value.endswith('\n'): |
|
69 return value[:-1] |
|
70 return value |
|
71 |
|
72 |
|
73 |
|
74 def fix_eols(s): |
|
75 """Replace all line-ending characters with \r\n.""" |
|
76 # Fix newlines with no preceding carriage return |
|
77 s = re.sub(r'(?<!\r)\n', CRLF, s) |
|
78 # Fix carriage returns with no following newline |
|
79 s = re.sub(r'\r(?!\n)', CRLF, s) |
|
80 return s |
|
81 |
|
82 |
|
83 |
|
84 def formataddr(pair): |
|
85 """The inverse of parseaddr(), this takes a 2-tuple of the form |
|
86 (realname, email_address) and returns the string value suitable |
|
87 for an RFC 2822 From, To or Cc header. |
|
88 |
|
89 If the first element of pair is false, then the second element is |
|
90 returned unmodified. |
|
91 """ |
|
92 name, address = pair |
|
93 if name: |
|
94 quotes = '' |
|
95 if specialsre.search(name): |
|
96 quotes = '"' |
|
97 name = escapesre.sub(r'\\\g<0>', name) |
|
98 return '%s%s%s <%s>' % (quotes, name, quotes, address) |
|
99 return address |
|
100 |
|
101 |
|
102 |
|
103 def getaddresses(fieldvalues): |
|
104 """Return a list of (REALNAME, EMAIL) for each fieldvalue.""" |
|
105 all = COMMASPACE.join(fieldvalues) |
|
106 a = _AddressList(all) |
|
107 return a.addresslist |
|
108 |
|
109 |
|
110 |
|
111 ecre = re.compile(r''' |
|
112 =\? # literal =? |
|
113 (?P<charset>[^?]*?) # non-greedy up to the next ? is the charset |
|
114 \? # literal ? |
|
115 (?P<encoding>[qb]) # either a "q" or a "b", case insensitive |
|
116 \? # literal ? |
|
117 (?P<atom>.*?) # non-greedy up to the next ?= is the atom |
|
118 \?= # literal ?= |
|
119 ''', re.VERBOSE | re.IGNORECASE) |
|
120 |
|
121 |
|
122 |
|
123 def formatdate(timeval=None, localtime=False, usegmt=False): |
|
124 """Returns a date string as specified by RFC 2822, e.g.: |
|
125 |
|
126 Fri, 09 Nov 2001 01:08:47 -0000 |
|
127 |
|
128 Optional timeval if given is a floating point time value as accepted by |
|
129 gmtime() and localtime(), otherwise the current time is used. |
|
130 |
|
131 Optional localtime is a flag that when True, interprets timeval, and |
|
132 returns a date relative to the local timezone instead of UTC, properly |
|
133 taking daylight savings time into account. |
|
134 |
|
135 Optional argument usegmt means that the timezone is written out as |
|
136 an ascii string, not numeric one (so "GMT" instead of "+0000"). This |
|
137 is needed for HTTP, and is only used when localtime==False. |
|
138 """ |
|
139 # Note: we cannot use strftime() because that honors the locale and RFC |
|
140 # 2822 requires that day and month names be the English abbreviations. |
|
141 if timeval is None: |
|
142 timeval = time.time() |
|
143 if localtime: |
|
144 now = time.localtime(timeval) |
|
145 # Calculate timezone offset, based on whether the local zone has |
|
146 # daylight savings time, and whether DST is in effect. |
|
147 if time.daylight and now[-1]: |
|
148 offset = time.altzone |
|
149 else: |
|
150 offset = time.timezone |
|
151 hours, minutes = divmod(abs(offset), 3600) |
|
152 # Remember offset is in seconds west of UTC, but the timezone is in |
|
153 # minutes east of UTC, so the signs differ. |
|
154 if offset > 0: |
|
155 sign = '-' |
|
156 else: |
|
157 sign = '+' |
|
158 zone = '%s%02d%02d' % (sign, hours, minutes // 60) |
|
159 else: |
|
160 now = time.gmtime(timeval) |
|
161 # Timezone offset is always -0000 |
|
162 if usegmt: |
|
163 zone = 'GMT' |
|
164 else: |
|
165 zone = '-0000' |
|
166 return '%s, %02d %s %04d %02d:%02d:%02d %s' % ( |
|
167 ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][now[6]], |
|
168 now[2], |
|
169 ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', |
|
170 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][now[1] - 1], |
|
171 now[0], now[3], now[4], now[5], |
|
172 zone) |
|
173 |
|
174 |
|
175 |
|
176 def make_msgid(idstring=None): |
|
177 """Returns a string suitable for RFC 2822 compliant Message-ID, e.g: |
|
178 |
|
179 <20020201195627.33539.96671@nightshade.la.mastaler.com> |
|
180 |
|
181 Optional idstring if given is a string used to strengthen the |
|
182 uniqueness of the message id. |
|
183 """ |
|
184 timeval = time.time() |
|
185 utcdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(timeval)) |
|
186 pid = os.getpid() |
|
187 randint = random.randrange(100000) |
|
188 if idstring is None: |
|
189 idstring = '' |
|
190 else: |
|
191 idstring = '.' + idstring |
|
192 idhost = socket.getfqdn() |
|
193 msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, idhost) |
|
194 return msgid |
|
195 |
|
196 |
|
197 |
|
198 # These functions are in the standalone mimelib version only because they've |
|
199 # subsequently been fixed in the latest Python versions. We use this to worm |
|
200 # around broken older Pythons. |
|
201 def parsedate(data): |
|
202 if not data: |
|
203 return None |
|
204 return _parsedate(data) |
|
205 |
|
206 |
|
207 def parsedate_tz(data): |
|
208 if not data: |
|
209 return None |
|
210 return _parsedate_tz(data) |
|
211 |
|
212 |
|
213 def parseaddr(addr): |
|
214 addrs = _AddressList(addr).addresslist |
|
215 if not addrs: |
|
216 return '', '' |
|
217 return addrs[0] |
|
218 |
|
219 |
|
220 # rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3. |
|
221 def unquote(str): |
|
222 """Remove quotes from a string.""" |
|
223 if len(str) > 1: |
|
224 if str.startswith('"') and str.endswith('"'): |
|
225 return str[1:-1].replace('\\\\', '\\').replace('\\"', '"') |
|
226 if str.startswith('<') and str.endswith('>'): |
|
227 return str[1:-1] |
|
228 return str |
|
229 |
|
230 |
|
231 |
|
232 # RFC2231-related functions - parameter encoding and decoding |
|
233 def decode_rfc2231(s): |
|
234 """Decode string according to RFC 2231""" |
|
235 parts = s.split(TICK, 2) |
|
236 if len(parts) <= 2: |
|
237 return None, None, s |
|
238 return parts |
|
239 |
|
240 |
|
241 def encode_rfc2231(s, charset=None, language=None): |
|
242 """Encode string according to RFC 2231. |
|
243 |
|
244 If neither charset nor language is given, then s is returned as-is. If |
|
245 charset is given but not language, the string is encoded using the empty |
|
246 string for language. |
|
247 """ |
|
248 import urllib |
|
249 s = urllib.quote(s, safe='') |
|
250 if charset is None and language is None: |
|
251 return s |
|
252 if language is None: |
|
253 language = '' |
|
254 return "%s'%s'%s" % (charset, language, s) |
|
255 |
|
256 |
|
257 rfc2231_continuation = re.compile(r'^(?P<name>\w+)\*((?P<num>[0-9]+)\*?)?$') |
|
258 |
|
259 def decode_params(params): |
|
260 """Decode parameters list according to RFC 2231. |
|
261 |
|
262 params is a sequence of 2-tuples containing (param name, string value). |
|
263 """ |
|
264 # Copy params so we don't mess with the original |
|
265 params = params[:] |
|
266 new_params = [] |
|
267 # Map parameter's name to a list of continuations. The values are a |
|
268 # 3-tuple of the continuation number, the string value, and a flag |
|
269 # specifying whether a particular segment is %-encoded. |
|
270 rfc2231_params = {} |
|
271 name, value = params.pop(0) |
|
272 new_params.append((name, value)) |
|
273 while params: |
|
274 name, value = params.pop(0) |
|
275 if name.endswith('*'): |
|
276 encoded = True |
|
277 else: |
|
278 encoded = False |
|
279 value = unquote(value) |
|
280 mo = rfc2231_continuation.match(name) |
|
281 if mo: |
|
282 name, num = mo.group('name', 'num') |
|
283 if num is not None: |
|
284 num = int(num) |
|
285 rfc2231_params.setdefault(name, []).append((num, value, encoded)) |
|
286 else: |
|
287 new_params.append((name, '"%s"' % quote(value))) |
|
288 if rfc2231_params: |
|
289 for name, continuations in rfc2231_params.items(): |
|
290 value = [] |
|
291 extended = False |
|
292 # Sort by number |
|
293 continuations.sort() |
|
294 # And now append all values in numerical order, converting |
|
295 # %-encodings for the encoded segments. If any of the |
|
296 # continuation names ends in a *, then the entire string, after |
|
297 # decoding segments and concatenating, must have the charset and |
|
298 # language specifiers at the beginning of the string. |
|
299 for num, s, encoded in continuations: |
|
300 if encoded: |
|
301 s = urllib.unquote(s) |
|
302 extended = True |
|
303 value.append(s) |
|
304 value = quote(EMPTYSTRING.join(value)) |
|
305 if extended: |
|
306 charset, language, value = decode_rfc2231(value) |
|
307 new_params.append((name, (charset, language, '"%s"' % value))) |
|
308 else: |
|
309 new_params.append((name, '"%s"' % value)) |
|
310 return new_params |
|
311 |
|
312 def collapse_rfc2231_value(value, errors='replace', |
|
313 fallback_charset='us-ascii'): |
|
314 if isinstance(value, tuple): |
|
315 rawval = unquote(value[2]) |
|
316 charset = value[0] or 'us-ascii' |
|
317 try: |
|
318 return unicode(rawval, charset, errors) |
|
319 except LookupError: |
|
320 # XXX charset is unknown to Python. |
|
321 return unicode(rawval, fallback_charset, errors) |
|
322 else: |
|
323 return unquote(value) |