|
1 # Copyright (C) 2001-2006 Python Software Foundation |
|
2 # Author: Barry Warsaw |
|
3 # Contact: email-sig@python.org |
|
4 |
|
5 """Classes to generate plain text from a message object tree.""" |
|
6 |
|
7 __all__ = ['Generator', 'DecodedGenerator'] |
|
8 |
|
9 import re |
|
10 import sys |
|
11 import time |
|
12 import random |
|
13 import warnings |
|
14 |
|
15 from cStringIO import StringIO |
|
16 from email.header import Header |
|
17 |
|
18 UNDERSCORE = '_' |
|
19 NL = '\n' |
|
20 |
|
21 fcre = re.compile(r'^From ', re.MULTILINE) |
|
22 |
|
23 def _is8bitstring(s): |
|
24 if isinstance(s, str): |
|
25 try: |
|
26 unicode(s, 'us-ascii') |
|
27 except UnicodeError: |
|
28 return True |
|
29 return False |
|
30 |
|
31 |
|
32 |
|
33 class Generator: |
|
34 """Generates output from a Message object tree. |
|
35 |
|
36 This basic generator writes the message to the given file object as plain |
|
37 text. |
|
38 """ |
|
39 # |
|
40 # Public interface |
|
41 # |
|
42 |
|
43 def __init__(self, outfp, mangle_from_=True, maxheaderlen=78): |
|
44 """Create the generator for message flattening. |
|
45 |
|
46 outfp is the output file-like object for writing the message to. It |
|
47 must have a write() method. |
|
48 |
|
49 Optional mangle_from_ is a flag that, when True (the default), escapes |
|
50 From_ lines in the body of the message by putting a `>' in front of |
|
51 them. |
|
52 |
|
53 Optional maxheaderlen specifies the longest length for a non-continued |
|
54 header. When a header line is longer (in characters, with tabs |
|
55 expanded to 8 spaces) than maxheaderlen, the header will split as |
|
56 defined in the Header class. Set maxheaderlen to zero to disable |
|
57 header wrapping. The default is 78, as recommended (but not required) |
|
58 by RFC 2822. |
|
59 """ |
|
60 self._fp = outfp |
|
61 self._mangle_from_ = mangle_from_ |
|
62 self._maxheaderlen = maxheaderlen |
|
63 |
|
64 def write(self, s): |
|
65 # Just delegate to the file object |
|
66 self._fp.write(s) |
|
67 |
|
68 def flatten(self, msg, unixfrom=False): |
|
69 """Print the message object tree rooted at msg to the output file |
|
70 specified when the Generator instance was created. |
|
71 |
|
72 unixfrom is a flag that forces the printing of a Unix From_ delimiter |
|
73 before the first object in the message tree. If the original message |
|
74 has no From_ delimiter, a `standard' one is crafted. By default, this |
|
75 is False to inhibit the printing of any From_ delimiter. |
|
76 |
|
77 Note that for subobjects, no From_ line is printed. |
|
78 """ |
|
79 if unixfrom: |
|
80 ufrom = msg.get_unixfrom() |
|
81 if not ufrom: |
|
82 ufrom = 'From nobody ' + time.ctime(time.time()) |
|
83 print >> self._fp, ufrom |
|
84 self._write(msg) |
|
85 |
|
86 def clone(self, fp): |
|
87 """Clone this generator with the exact same options.""" |
|
88 return self.__class__(fp, self._mangle_from_, self._maxheaderlen) |
|
89 |
|
90 # |
|
91 # Protected interface - undocumented ;/ |
|
92 # |
|
93 |
|
94 def _write(self, msg): |
|
95 # We can't write the headers yet because of the following scenario: |
|
96 # say a multipart message includes the boundary string somewhere in |
|
97 # its body. We'd have to calculate the new boundary /before/ we write |
|
98 # the headers so that we can write the correct Content-Type: |
|
99 # parameter. |
|
100 # |
|
101 # The way we do this, so as to make the _handle_*() methods simpler, |
|
102 # is to cache any subpart writes into a StringIO. The we write the |
|
103 # headers and the StringIO contents. That way, subpart handlers can |
|
104 # Do The Right Thing, and can still modify the Content-Type: header if |
|
105 # necessary. |
|
106 oldfp = self._fp |
|
107 try: |
|
108 self._fp = sfp = StringIO() |
|
109 self._dispatch(msg) |
|
110 finally: |
|
111 self._fp = oldfp |
|
112 # Write the headers. First we see if the message object wants to |
|
113 # handle that itself. If not, we'll do it generically. |
|
114 meth = getattr(msg, '_write_headers', None) |
|
115 if meth is None: |
|
116 self._write_headers(msg) |
|
117 else: |
|
118 meth(self) |
|
119 self._fp.write(sfp.getvalue()) |
|
120 |
|
121 def _dispatch(self, msg): |
|
122 # Get the Content-Type: for the message, then try to dispatch to |
|
123 # self._handle_<maintype>_<subtype>(). If there's no handler for the |
|
124 # full MIME type, then dispatch to self._handle_<maintype>(). If |
|
125 # that's missing too, then dispatch to self._writeBody(). |
|
126 main = msg.get_content_maintype() |
|
127 sub = msg.get_content_subtype() |
|
128 specific = UNDERSCORE.join((main, sub)).replace('-', '_') |
|
129 meth = getattr(self, '_handle_' + specific, None) |
|
130 if meth is None: |
|
131 generic = main.replace('-', '_') |
|
132 meth = getattr(self, '_handle_' + generic, None) |
|
133 if meth is None: |
|
134 meth = self._writeBody |
|
135 meth(msg) |
|
136 |
|
137 # |
|
138 # Default handlers |
|
139 # |
|
140 |
|
141 def _write_headers(self, msg): |
|
142 for h, v in msg.items(): |
|
143 print >> self._fp, '%s:' % h, |
|
144 if self._maxheaderlen == 0: |
|
145 # Explicit no-wrapping |
|
146 print >> self._fp, v |
|
147 elif isinstance(v, Header): |
|
148 # Header instances know what to do |
|
149 print >> self._fp, v.encode() |
|
150 elif _is8bitstring(v): |
|
151 # If we have raw 8bit data in a byte string, we have no idea |
|
152 # what the encoding is. There is no safe way to split this |
|
153 # string. If it's ascii-subset, then we could do a normal |
|
154 # ascii split, but if it's multibyte then we could break the |
|
155 # string. There's no way to know so the least harm seems to |
|
156 # be to not split the string and risk it being too long. |
|
157 print >> self._fp, v |
|
158 else: |
|
159 # Header's got lots of smarts, so use it. |
|
160 print >> self._fp, Header( |
|
161 v, maxlinelen=self._maxheaderlen, |
|
162 header_name=h, continuation_ws='\t').encode() |
|
163 # A blank line always separates headers from body |
|
164 print >> self._fp |
|
165 |
|
166 # |
|
167 # Handlers for writing types and subtypes |
|
168 # |
|
169 |
|
170 def _handle_text(self, msg): |
|
171 payload = msg.get_payload() |
|
172 if payload is None: |
|
173 return |
|
174 if not isinstance(payload, basestring): |
|
175 raise TypeError('string payload expected: %s' % type(payload)) |
|
176 if self._mangle_from_: |
|
177 payload = fcre.sub('>From ', payload) |
|
178 self._fp.write(payload) |
|
179 |
|
180 # Default body handler |
|
181 _writeBody = _handle_text |
|
182 |
|
183 def _handle_multipart(self, msg): |
|
184 # The trick here is to write out each part separately, merge them all |
|
185 # together, and then make sure that the boundary we've chosen isn't |
|
186 # present in the payload. |
|
187 msgtexts = [] |
|
188 subparts = msg.get_payload() |
|
189 if subparts is None: |
|
190 subparts = [] |
|
191 elif isinstance(subparts, basestring): |
|
192 # e.g. a non-strict parse of a message with no starting boundary. |
|
193 self._fp.write(subparts) |
|
194 return |
|
195 elif not isinstance(subparts, list): |
|
196 # Scalar payload |
|
197 subparts = [subparts] |
|
198 for part in subparts: |
|
199 s = StringIO() |
|
200 g = self.clone(s) |
|
201 g.flatten(part, unixfrom=False) |
|
202 msgtexts.append(s.getvalue()) |
|
203 # Now make sure the boundary we've selected doesn't appear in any of |
|
204 # the message texts. |
|
205 alltext = NL.join(msgtexts) |
|
206 # BAW: What about boundaries that are wrapped in double-quotes? |
|
207 boundary = msg.get_boundary(failobj=_make_boundary(alltext)) |
|
208 # If we had to calculate a new boundary because the body text |
|
209 # contained that string, set the new boundary. We don't do it |
|
210 # unconditionally because, while set_boundary() preserves order, it |
|
211 # doesn't preserve newlines/continuations in headers. This is no big |
|
212 # deal in practice, but turns out to be inconvenient for the unittest |
|
213 # suite. |
|
214 if msg.get_boundary() <> boundary: |
|
215 msg.set_boundary(boundary) |
|
216 # If there's a preamble, write it out, with a trailing CRLF |
|
217 if msg.preamble is not None: |
|
218 print >> self._fp, msg.preamble |
|
219 # dash-boundary transport-padding CRLF |
|
220 print >> self._fp, '--' + boundary |
|
221 # body-part |
|
222 if msgtexts: |
|
223 self._fp.write(msgtexts.pop(0)) |
|
224 # *encapsulation |
|
225 # --> delimiter transport-padding |
|
226 # --> CRLF body-part |
|
227 for body_part in msgtexts: |
|
228 # delimiter transport-padding CRLF |
|
229 print >> self._fp, '\n--' + boundary |
|
230 # body-part |
|
231 self._fp.write(body_part) |
|
232 # close-delimiter transport-padding |
|
233 self._fp.write('\n--' + boundary + '--') |
|
234 if msg.epilogue is not None: |
|
235 print >> self._fp |
|
236 self._fp.write(msg.epilogue) |
|
237 |
|
238 def _handle_message_delivery_status(self, msg): |
|
239 # We can't just write the headers directly to self's file object |
|
240 # because this will leave an extra newline between the last header |
|
241 # block and the boundary. Sigh. |
|
242 blocks = [] |
|
243 for part in msg.get_payload(): |
|
244 s = StringIO() |
|
245 g = self.clone(s) |
|
246 g.flatten(part, unixfrom=False) |
|
247 text = s.getvalue() |
|
248 lines = text.split('\n') |
|
249 # Strip off the unnecessary trailing empty line |
|
250 if lines and lines[-1] == '': |
|
251 blocks.append(NL.join(lines[:-1])) |
|
252 else: |
|
253 blocks.append(text) |
|
254 # Now join all the blocks with an empty line. This has the lovely |
|
255 # effect of separating each block with an empty line, but not adding |
|
256 # an extra one after the last one. |
|
257 self._fp.write(NL.join(blocks)) |
|
258 |
|
259 def _handle_message(self, msg): |
|
260 s = StringIO() |
|
261 g = self.clone(s) |
|
262 # The payload of a message/rfc822 part should be a multipart sequence |
|
263 # of length 1. The zeroth element of the list should be the Message |
|
264 # object for the subpart. Extract that object, stringify it, and |
|
265 # write it out. |
|
266 g.flatten(msg.get_payload(0), unixfrom=False) |
|
267 self._fp.write(s.getvalue()) |
|
268 |
|
269 |
|
270 |
|
271 _FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]' |
|
272 |
|
273 class DecodedGenerator(Generator): |
|
274 """Generator a text representation of a message. |
|
275 |
|
276 Like the Generator base class, except that non-text parts are substituted |
|
277 with a format string representing the part. |
|
278 """ |
|
279 def __init__(self, outfp, mangle_from_=True, maxheaderlen=78, fmt=None): |
|
280 """Like Generator.__init__() except that an additional optional |
|
281 argument is allowed. |
|
282 |
|
283 Walks through all subparts of a message. If the subpart is of main |
|
284 type `text', then it prints the decoded payload of the subpart. |
|
285 |
|
286 Otherwise, fmt is a format string that is used instead of the message |
|
287 payload. fmt is expanded with the following keywords (in |
|
288 %(keyword)s format): |
|
289 |
|
290 type : Full MIME type of the non-text part |
|
291 maintype : Main MIME type of the non-text part |
|
292 subtype : Sub-MIME type of the non-text part |
|
293 filename : Filename of the non-text part |
|
294 description: Description associated with the non-text part |
|
295 encoding : Content transfer encoding of the non-text part |
|
296 |
|
297 The default value for fmt is None, meaning |
|
298 |
|
299 [Non-text (%(type)s) part of message omitted, filename %(filename)s] |
|
300 """ |
|
301 Generator.__init__(self, outfp, mangle_from_, maxheaderlen) |
|
302 if fmt is None: |
|
303 self._fmt = _FMT |
|
304 else: |
|
305 self._fmt = fmt |
|
306 |
|
307 def _dispatch(self, msg): |
|
308 for part in msg.walk(): |
|
309 maintype = part.get_content_maintype() |
|
310 if maintype == 'text': |
|
311 print >> self, part.get_payload(decode=True) |
|
312 elif maintype == 'multipart': |
|
313 # Just skip this |
|
314 pass |
|
315 else: |
|
316 print >> self, self._fmt % { |
|
317 'type' : part.get_content_type(), |
|
318 'maintype' : part.get_content_maintype(), |
|
319 'subtype' : part.get_content_subtype(), |
|
320 'filename' : part.get_filename('[no filename]'), |
|
321 'description': part.get('Content-Description', |
|
322 '[no description]'), |
|
323 'encoding' : part.get('Content-Transfer-Encoding', |
|
324 '[no encoding]'), |
|
325 } |
|
326 |
|
327 |
|
328 |
|
329 # Helper |
|
330 _width = len(repr(sys.maxint-1)) |
|
331 _fmt = '%%0%dd' % _width |
|
332 |
|
333 def _make_boundary(text=None): |
|
334 # Craft a random boundary. If text is given, ensure that the chosen |
|
335 # boundary doesn't appear in the text. |
|
336 token = random.randrange(sys.maxint) |
|
337 boundary = ('=' * 15) + (_fmt % token) + '==' |
|
338 if text is None: |
|
339 return boundary |
|
340 b = boundary |
|
341 counter = 0 |
|
342 while True: |
|
343 cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE) |
|
344 if not cre.search(text): |
|
345 break |
|
346 b = boundary + '.' + str(counter) |
|
347 counter += 1 |
|
348 return b |