python-2.5.2/win32/Lib/email/utils.py
changeset 0 ae805ac0140d
equal deleted inserted replaced
-1:000000000000 0:ae805ac0140d
       
     1 # Copyright (C) 2001-2006 Python Software Foundation
       
     2 # Author: Barry Warsaw
       
     3 # Contact: email-sig@python.org
       
     4 
       
     5 """Miscellaneous utilities."""
       
     6 
       
     7 __all__ = [
       
     8     'collapse_rfc2231_value',
       
     9     'decode_params',
       
    10     'decode_rfc2231',
       
    11     'encode_rfc2231',
       
    12     'formataddr',
       
    13     'formatdate',
       
    14     'getaddresses',
       
    15     'make_msgid',
       
    16     'parseaddr',
       
    17     'parsedate',
       
    18     'parsedate_tz',
       
    19     'unquote',
       
    20     ]
       
    21 
       
    22 import os
       
    23 import re
       
    24 import time
       
    25 import base64
       
    26 import random
       
    27 import socket
       
    28 import urllib
       
    29 import warnings
       
    30 from cStringIO import StringIO
       
    31 
       
    32 from email._parseaddr import quote
       
    33 from email._parseaddr import AddressList as _AddressList
       
    34 from email._parseaddr import mktime_tz
       
    35 
       
    36 # We need wormarounds for bugs in these methods in older Pythons (see below)
       
    37 from email._parseaddr import parsedate as _parsedate
       
    38 from email._parseaddr import parsedate_tz as _parsedate_tz
       
    39 
       
    40 from quopri import decodestring as _qdecode
       
    41 
       
    42 # Intrapackage imports
       
    43 from email.encoders import _bencode, _qencode
       
    44 
       
    45 COMMASPACE = ', '
       
    46 EMPTYSTRING = ''
       
    47 UEMPTYSTRING = u''
       
    48 CRLF = '\r\n'
       
    49 TICK = "'"
       
    50 
       
    51 specialsre = re.compile(r'[][\\()<>@,:;".]')
       
    52 escapesre = re.compile(r'[][\\()"]')
       
    53 
       
    54 
       
    55 
       
    56 # Helpers
       
    57 
       
    58 def _identity(s):
       
    59     return s
       
    60 
       
    61 
       
    62 def _bdecode(s):
       
    63     # We can't quite use base64.encodestring() since it tacks on a "courtesy
       
    64     # newline".  Blech!
       
    65     if not s:
       
    66         return s
       
    67     value = base64.decodestring(s)
       
    68     if not s.endswith('\n') and value.endswith('\n'):
       
    69         return value[:-1]
       
    70     return value
       
    71 
       
    72 
       
    73 
       
    74 def fix_eols(s):
       
    75     """Replace all line-ending characters with \r\n."""
       
    76     # Fix newlines with no preceding carriage return
       
    77     s = re.sub(r'(?<!\r)\n', CRLF, s)
       
    78     # Fix carriage returns with no following newline
       
    79     s = re.sub(r'\r(?!\n)', CRLF, s)
       
    80     return s
       
    81 
       
    82 
       
    83 
       
    84 def formataddr(pair):
       
    85     """The inverse of parseaddr(), this takes a 2-tuple of the form
       
    86     (realname, email_address) and returns the string value suitable
       
    87     for an RFC 2822 From, To or Cc header.
       
    88 
       
    89     If the first element of pair is false, then the second element is
       
    90     returned unmodified.
       
    91     """
       
    92     name, address = pair
       
    93     if name:
       
    94         quotes = ''
       
    95         if specialsre.search(name):
       
    96             quotes = '"'
       
    97         name = escapesre.sub(r'\\\g<0>', name)
       
    98         return '%s%s%s <%s>' % (quotes, name, quotes, address)
       
    99     return address
       
   100 
       
   101 
       
   102 
       
   103 def getaddresses(fieldvalues):
       
   104     """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
       
   105     all = COMMASPACE.join(fieldvalues)
       
   106     a = _AddressList(all)
       
   107     return a.addresslist
       
   108 
       
   109 
       
   110 
       
   111 ecre = re.compile(r'''
       
   112   =\?                   # literal =?
       
   113   (?P<charset>[^?]*?)   # non-greedy up to the next ? is the charset
       
   114   \?                    # literal ?
       
   115   (?P<encoding>[qb])    # either a "q" or a "b", case insensitive
       
   116   \?                    # literal ?
       
   117   (?P<atom>.*?)         # non-greedy up to the next ?= is the atom
       
   118   \?=                   # literal ?=
       
   119   ''', re.VERBOSE | re.IGNORECASE)
       
   120 
       
   121 
       
   122 
       
   123 def formatdate(timeval=None, localtime=False, usegmt=False):
       
   124     """Returns a date string as specified by RFC 2822, e.g.:
       
   125 
       
   126     Fri, 09 Nov 2001 01:08:47 -0000
       
   127 
       
   128     Optional timeval if given is a floating point time value as accepted by
       
   129     gmtime() and localtime(), otherwise the current time is used.
       
   130 
       
   131     Optional localtime is a flag that when True, interprets timeval, and
       
   132     returns a date relative to the local timezone instead of UTC, properly
       
   133     taking daylight savings time into account.
       
   134 
       
   135     Optional argument usegmt means that the timezone is written out as
       
   136     an ascii string, not numeric one (so "GMT" instead of "+0000"). This
       
   137     is needed for HTTP, and is only used when localtime==False.
       
   138     """
       
   139     # Note: we cannot use strftime() because that honors the locale and RFC
       
   140     # 2822 requires that day and month names be the English abbreviations.
       
   141     if timeval is None:
       
   142         timeval = time.time()
       
   143     if localtime:
       
   144         now = time.localtime(timeval)
       
   145         # Calculate timezone offset, based on whether the local zone has
       
   146         # daylight savings time, and whether DST is in effect.
       
   147         if time.daylight and now[-1]:
       
   148             offset = time.altzone
       
   149         else:
       
   150             offset = time.timezone
       
   151         hours, minutes = divmod(abs(offset), 3600)
       
   152         # Remember offset is in seconds west of UTC, but the timezone is in
       
   153         # minutes east of UTC, so the signs differ.
       
   154         if offset > 0:
       
   155             sign = '-'
       
   156         else:
       
   157             sign = '+'
       
   158         zone = '%s%02d%02d' % (sign, hours, minutes // 60)
       
   159     else:
       
   160         now = time.gmtime(timeval)
       
   161         # Timezone offset is always -0000
       
   162         if usegmt:
       
   163             zone = 'GMT'
       
   164         else:
       
   165             zone = '-0000'
       
   166     return '%s, %02d %s %04d %02d:%02d:%02d %s' % (
       
   167         ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][now[6]],
       
   168         now[2],
       
   169         ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
       
   170          'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][now[1] - 1],
       
   171         now[0], now[3], now[4], now[5],
       
   172         zone)
       
   173 
       
   174 
       
   175 
       
   176 def make_msgid(idstring=None):
       
   177     """Returns a string suitable for RFC 2822 compliant Message-ID, e.g:
       
   178 
       
   179     <20020201195627.33539.96671@nightshade.la.mastaler.com>
       
   180 
       
   181     Optional idstring if given is a string used to strengthen the
       
   182     uniqueness of the message id.
       
   183     """
       
   184     timeval = time.time()
       
   185     utcdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(timeval))
       
   186     pid = os.getpid()
       
   187     randint = random.randrange(100000)
       
   188     if idstring is None:
       
   189         idstring = ''
       
   190     else:
       
   191         idstring = '.' + idstring
       
   192     idhost = socket.getfqdn()
       
   193     msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, idhost)
       
   194     return msgid
       
   195 
       
   196 
       
   197 
       
   198 # These functions are in the standalone mimelib version only because they've
       
   199 # subsequently been fixed in the latest Python versions.  We use this to worm
       
   200 # around broken older Pythons.
       
   201 def parsedate(data):
       
   202     if not data:
       
   203         return None
       
   204     return _parsedate(data)
       
   205 
       
   206 
       
   207 def parsedate_tz(data):
       
   208     if not data:
       
   209         return None
       
   210     return _parsedate_tz(data)
       
   211 
       
   212 
       
   213 def parseaddr(addr):
       
   214     addrs = _AddressList(addr).addresslist
       
   215     if not addrs:
       
   216         return '', ''
       
   217     return addrs[0]
       
   218 
       
   219 
       
   220 # rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3.
       
   221 def unquote(str):
       
   222     """Remove quotes from a string."""
       
   223     if len(str) > 1:
       
   224         if str.startswith('"') and str.endswith('"'):
       
   225             return str[1:-1].replace('\\\\', '\\').replace('\\"', '"')
       
   226         if str.startswith('<') and str.endswith('>'):
       
   227             return str[1:-1]
       
   228     return str
       
   229 
       
   230 
       
   231 
       
   232 # RFC2231-related functions - parameter encoding and decoding
       
   233 def decode_rfc2231(s):
       
   234     """Decode string according to RFC 2231"""
       
   235     parts = s.split(TICK, 2)
       
   236     if len(parts) <= 2:
       
   237         return None, None, s
       
   238     return parts
       
   239 
       
   240 
       
   241 def encode_rfc2231(s, charset=None, language=None):
       
   242     """Encode string according to RFC 2231.
       
   243 
       
   244     If neither charset nor language is given, then s is returned as-is.  If
       
   245     charset is given but not language, the string is encoded using the empty
       
   246     string for language.
       
   247     """
       
   248     import urllib
       
   249     s = urllib.quote(s, safe='')
       
   250     if charset is None and language is None:
       
   251         return s
       
   252     if language is None:
       
   253         language = ''
       
   254     return "%s'%s'%s" % (charset, language, s)
       
   255 
       
   256 
       
   257 rfc2231_continuation = re.compile(r'^(?P<name>\w+)\*((?P<num>[0-9]+)\*?)?$')
       
   258 
       
   259 def decode_params(params):
       
   260     """Decode parameters list according to RFC 2231.
       
   261 
       
   262     params is a sequence of 2-tuples containing (param name, string value).
       
   263     """
       
   264     # Copy params so we don't mess with the original
       
   265     params = params[:]
       
   266     new_params = []
       
   267     # Map parameter's name to a list of continuations.  The values are a
       
   268     # 3-tuple of the continuation number, the string value, and a flag
       
   269     # specifying whether a particular segment is %-encoded.
       
   270     rfc2231_params = {}
       
   271     name, value = params.pop(0)
       
   272     new_params.append((name, value))
       
   273     while params:
       
   274         name, value = params.pop(0)
       
   275         if name.endswith('*'):
       
   276             encoded = True
       
   277         else:
       
   278             encoded = False
       
   279         value = unquote(value)
       
   280         mo = rfc2231_continuation.match(name)
       
   281         if mo:
       
   282             name, num = mo.group('name', 'num')
       
   283             if num is not None:
       
   284                 num = int(num)
       
   285             rfc2231_params.setdefault(name, []).append((num, value, encoded))
       
   286         else:
       
   287             new_params.append((name, '"%s"' % quote(value)))
       
   288     if rfc2231_params:
       
   289         for name, continuations in rfc2231_params.items():
       
   290             value = []
       
   291             extended = False
       
   292             # Sort by number
       
   293             continuations.sort()
       
   294             # And now append all values in numerical order, converting
       
   295             # %-encodings for the encoded segments.  If any of the
       
   296             # continuation names ends in a *, then the entire string, after
       
   297             # decoding segments and concatenating, must have the charset and
       
   298             # language specifiers at the beginning of the string.
       
   299             for num, s, encoded in continuations:
       
   300                 if encoded:
       
   301                     s = urllib.unquote(s)
       
   302                     extended = True
       
   303                 value.append(s)
       
   304             value = quote(EMPTYSTRING.join(value))
       
   305             if extended:
       
   306                 charset, language, value = decode_rfc2231(value)
       
   307                 new_params.append((name, (charset, language, '"%s"' % value)))
       
   308             else:
       
   309                 new_params.append((name, '"%s"' % value))
       
   310     return new_params
       
   311 
       
   312 def collapse_rfc2231_value(value, errors='replace',
       
   313                            fallback_charset='us-ascii'):
       
   314     if isinstance(value, tuple):
       
   315         rawval = unquote(value[2])
       
   316         charset = value[0] or 'us-ascii'
       
   317         try:
       
   318             return unicode(rawval, charset, errors)
       
   319         except LookupError:
       
   320             # XXX charset is unknown to Python.
       
   321             return unicode(rawval, fallback_charset, errors)
       
   322     else:
       
   323         return unquote(value)