python-2.5.2/win32/Lib/test/test_codeccallbacks.py
changeset 0 ae805ac0140d
equal deleted inserted replaced
-1:000000000000 0:ae805ac0140d
       
     1 import test.test_support, unittest
       
     2 import sys, codecs, htmlentitydefs, unicodedata
       
     3 
       
     4 class PosReturn:
       
     5     # this can be used for configurable callbacks
       
     6 
       
     7     def __init__(self):
       
     8         self.pos = 0
       
     9 
       
    10     def handle(self, exc):
       
    11         oldpos = self.pos
       
    12         realpos = oldpos
       
    13         if realpos<0:
       
    14             realpos = len(exc.object) + realpos
       
    15         # if we don't advance this time, terminate on the next call
       
    16         # otherwise we'd get an endless loop
       
    17         if realpos <= exc.start:
       
    18             self.pos = len(exc.object)
       
    19         return (u"<?>", oldpos)
       
    20 
       
    21 # A UnicodeEncodeError object with a bad start attribute
       
    22 class BadStartUnicodeEncodeError(UnicodeEncodeError):
       
    23     def __init__(self):
       
    24         UnicodeEncodeError.__init__(self, "ascii", u"", 0, 1, "bad")
       
    25         self.start = []
       
    26 
       
    27 # A UnicodeEncodeError object with a bad object attribute
       
    28 class BadObjectUnicodeEncodeError(UnicodeEncodeError):
       
    29     def __init__(self):
       
    30         UnicodeEncodeError.__init__(self, "ascii", u"", 0, 1, "bad")
       
    31         self.object = []
       
    32 
       
    33 # A UnicodeDecodeError object without an end attribute
       
    34 class NoEndUnicodeDecodeError(UnicodeDecodeError):
       
    35     def __init__(self):
       
    36         UnicodeDecodeError.__init__(self, "ascii", "", 0, 1, "bad")
       
    37         del self.end
       
    38 
       
    39 # A UnicodeDecodeError object with a bad object attribute
       
    40 class BadObjectUnicodeDecodeError(UnicodeDecodeError):
       
    41     def __init__(self):
       
    42         UnicodeDecodeError.__init__(self, "ascii", "", 0, 1, "bad")
       
    43         self.object = []
       
    44 
       
    45 # A UnicodeTranslateError object without a start attribute
       
    46 class NoStartUnicodeTranslateError(UnicodeTranslateError):
       
    47     def __init__(self):
       
    48         UnicodeTranslateError.__init__(self, u"", 0, 1, "bad")
       
    49         del self.start
       
    50 
       
    51 # A UnicodeTranslateError object without an end attribute
       
    52 class NoEndUnicodeTranslateError(UnicodeTranslateError):
       
    53     def __init__(self):
       
    54         UnicodeTranslateError.__init__(self,  u"", 0, 1, "bad")
       
    55         del self.end
       
    56 
       
    57 # A UnicodeTranslateError object without an object attribute
       
    58 class NoObjectUnicodeTranslateError(UnicodeTranslateError):
       
    59     def __init__(self):
       
    60         UnicodeTranslateError.__init__(self, u"", 0, 1, "bad")
       
    61         del self.object
       
    62 
       
    63 class CodecCallbackTest(unittest.TestCase):
       
    64 
       
    65     def test_xmlcharrefreplace(self):
       
    66         # replace unencodable characters which numeric character entities.
       
    67         # For ascii, latin-1 and charmaps this is completely implemented
       
    68         # in C and should be reasonably fast.
       
    69         s = u"\u30b9\u30d1\u30e2 \xe4nd eggs"
       
    70         self.assertEqual(
       
    71             s.encode("ascii", "xmlcharrefreplace"),
       
    72             "&#12473;&#12497;&#12514; &#228;nd eggs"
       
    73         )
       
    74         self.assertEqual(
       
    75             s.encode("latin-1", "xmlcharrefreplace"),
       
    76             "&#12473;&#12497;&#12514; \xe4nd eggs"
       
    77         )
       
    78 
       
    79     def test_xmlcharnamereplace(self):
       
    80         # This time use a named character entity for unencodable
       
    81         # characters, if one is available.
       
    82 
       
    83         def xmlcharnamereplace(exc):
       
    84             if not isinstance(exc, UnicodeEncodeError):
       
    85                 raise TypeError("don't know how to handle %r" % exc)
       
    86             l = []
       
    87             for c in exc.object[exc.start:exc.end]:
       
    88                 try:
       
    89                     l.append(u"&%s;" % htmlentitydefs.codepoint2name[ord(c)])
       
    90                 except KeyError:
       
    91                     l.append(u"&#%d;" % ord(c))
       
    92             return (u"".join(l), exc.end)
       
    93 
       
    94         codecs.register_error(
       
    95             "test.xmlcharnamereplace", xmlcharnamereplace)
       
    96 
       
    97         sin = u"\xab\u211c\xbb = \u2329\u1234\u20ac\u232a"
       
    98         sout = "&laquo;&real;&raquo; = &lang;&#4660;&euro;&rang;"
       
    99         self.assertEqual(sin.encode("ascii", "test.xmlcharnamereplace"), sout)
       
   100         sout = "\xab&real;\xbb = &lang;&#4660;&euro;&rang;"
       
   101         self.assertEqual(sin.encode("latin-1", "test.xmlcharnamereplace"), sout)
       
   102         sout = "\xab&real;\xbb = &lang;&#4660;\xa4&rang;"
       
   103         self.assertEqual(sin.encode("iso-8859-15", "test.xmlcharnamereplace"), sout)
       
   104 
       
   105     def test_uninamereplace(self):
       
   106         # We're using the names from the unicode database this time,
       
   107         # and we're doing "syntax highlighting" here, i.e. we include
       
   108         # the replaced text in ANSI escape sequences. For this it is
       
   109         # useful that the error handler is not called for every single
       
   110         # unencodable character, but for a complete sequence of
       
   111         # unencodable characters, otherwise we would output many
       
   112         # unneccessary escape sequences.
       
   113 
       
   114         def uninamereplace(exc):
       
   115             if not isinstance(exc, UnicodeEncodeError):
       
   116                 raise TypeError("don't know how to handle %r" % exc)
       
   117             l = []
       
   118             for c in exc.object[exc.start:exc.end]:
       
   119                 l.append(unicodedata.name(c, u"0x%x" % ord(c)))
       
   120             return (u"\033[1m%s\033[0m" % u", ".join(l), exc.end)
       
   121 
       
   122         codecs.register_error(
       
   123             "test.uninamereplace", uninamereplace)
       
   124 
       
   125         sin = u"\xac\u1234\u20ac\u8000"
       
   126         sout = "\033[1mNOT SIGN, ETHIOPIC SYLLABLE SEE, EURO SIGN, CJK UNIFIED IDEOGRAPH-8000\033[0m"
       
   127         self.assertEqual(sin.encode("ascii", "test.uninamereplace"), sout)
       
   128 
       
   129         sout = "\xac\033[1mETHIOPIC SYLLABLE SEE, EURO SIGN, CJK UNIFIED IDEOGRAPH-8000\033[0m"
       
   130         self.assertEqual(sin.encode("latin-1", "test.uninamereplace"), sout)
       
   131 
       
   132         sout = "\xac\033[1mETHIOPIC SYLLABLE SEE\033[0m\xa4\033[1mCJK UNIFIED IDEOGRAPH-8000\033[0m"
       
   133         self.assertEqual(sin.encode("iso-8859-15", "test.uninamereplace"), sout)
       
   134 
       
   135     def test_backslashescape(self):
       
   136         # Does the same as the "unicode-escape" encoding, but with different
       
   137         # base encodings.
       
   138         sin = u"a\xac\u1234\u20ac\u8000"
       
   139         if sys.maxunicode > 0xffff:
       
   140             sin += unichr(sys.maxunicode)
       
   141         sout = "a\\xac\\u1234\\u20ac\\u8000"
       
   142         if sys.maxunicode > 0xffff:
       
   143             sout += "\\U%08x" % sys.maxunicode
       
   144         self.assertEqual(sin.encode("ascii", "backslashreplace"), sout)
       
   145 
       
   146         sout = "a\xac\\u1234\\u20ac\\u8000"
       
   147         if sys.maxunicode > 0xffff:
       
   148             sout += "\\U%08x" % sys.maxunicode
       
   149         self.assertEqual(sin.encode("latin-1", "backslashreplace"), sout)
       
   150 
       
   151         sout = "a\xac\\u1234\xa4\\u8000"
       
   152         if sys.maxunicode > 0xffff:
       
   153             sout += "\\U%08x" % sys.maxunicode
       
   154         self.assertEqual(sin.encode("iso-8859-15", "backslashreplace"), sout)
       
   155 
       
   156     def test_decoderelaxedutf8(self):
       
   157         # This is the test for a decoding callback handler,
       
   158         # that relaxes the UTF-8 minimal encoding restriction.
       
   159         # A null byte that is encoded as "\xc0\x80" will be
       
   160         # decoded as a null byte. All other illegal sequences
       
   161         # will be handled strictly.
       
   162         def relaxedutf8(exc):
       
   163             if not isinstance(exc, UnicodeDecodeError):
       
   164                 raise TypeError("don't know how to handle %r" % exc)
       
   165             if exc.object[exc.start:exc.end].startswith("\xc0\x80"):
       
   166                 return (u"\x00", exc.start+2) # retry after two bytes
       
   167             else:
       
   168                 raise exc
       
   169 
       
   170         codecs.register_error(
       
   171             "test.relaxedutf8", relaxedutf8)
       
   172 
       
   173         sin = "a\x00b\xc0\x80c\xc3\xbc\xc0\x80\xc0\x80"
       
   174         sout = u"a\x00b\x00c\xfc\x00\x00"
       
   175         self.assertEqual(sin.decode("utf-8", "test.relaxedutf8"), sout)
       
   176         sin = "\xc0\x80\xc0\x81"
       
   177         self.assertRaises(UnicodeError, sin.decode, "utf-8", "test.relaxedutf8")
       
   178 
       
   179     def test_charmapencode(self):
       
   180         # For charmap encodings the replacement string will be
       
   181         # mapped through the encoding again. This means, that
       
   182         # to be able to use e.g. the "replace" handler, the
       
   183         # charmap has to have a mapping for "?".
       
   184         charmap = dict([ (ord(c), 2*c.upper()) for c in "abcdefgh"])
       
   185         sin = u"abc"
       
   186         sout = "AABBCC"
       
   187         self.assertEquals(codecs.charmap_encode(sin, "strict", charmap)[0], sout)
       
   188 
       
   189         sin = u"abcA"
       
   190         self.assertRaises(UnicodeError, codecs.charmap_encode, sin, "strict", charmap)
       
   191 
       
   192         charmap[ord("?")] = "XYZ"
       
   193         sin = u"abcDEF"
       
   194         sout = "AABBCCXYZXYZXYZ"
       
   195         self.assertEquals(codecs.charmap_encode(sin, "replace", charmap)[0], sout)
       
   196 
       
   197         charmap[ord("?")] = u"XYZ"
       
   198         self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap)
       
   199 
       
   200         charmap[ord("?")] = u"XYZ"
       
   201         self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap)
       
   202 
       
   203     def test_decodeunicodeinternal(self):
       
   204         self.assertRaises(
       
   205             UnicodeDecodeError,
       
   206             "\x00\x00\x00\x00\x00".decode,
       
   207             "unicode-internal",
       
   208         )
       
   209         if sys.maxunicode > 0xffff:
       
   210             def handler_unicodeinternal(exc):
       
   211                 if not isinstance(exc, UnicodeDecodeError):
       
   212                     raise TypeError("don't know how to handle %r" % exc)
       
   213                 return (u"\x01", 1)
       
   214 
       
   215             self.assertEqual(
       
   216                 "\x00\x00\x00\x00\x00".decode("unicode-internal", "ignore"),
       
   217                 u"\u0000"
       
   218             )
       
   219 
       
   220             self.assertEqual(
       
   221                 "\x00\x00\x00\x00\x00".decode("unicode-internal", "replace"),
       
   222                 u"\u0000\ufffd"
       
   223             )
       
   224 
       
   225             codecs.register_error("test.hui", handler_unicodeinternal)
       
   226 
       
   227             self.assertEqual(
       
   228                 "\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui"),
       
   229                 u"\u0000\u0001\u0000"
       
   230             )
       
   231 
       
   232     def test_callbacks(self):
       
   233         def handler1(exc):
       
   234             if not isinstance(exc, UnicodeEncodeError) \
       
   235                and not isinstance(exc, UnicodeDecodeError):
       
   236                 raise TypeError("don't know how to handle %r" % exc)
       
   237             l = [u"<%d>" % ord(exc.object[pos]) for pos in xrange(exc.start, exc.end)]
       
   238             return (u"[%s]" % u"".join(l), exc.end)
       
   239 
       
   240         codecs.register_error("test.handler1", handler1)
       
   241 
       
   242         def handler2(exc):
       
   243             if not isinstance(exc, UnicodeDecodeError):
       
   244                 raise TypeError("don't know how to handle %r" % exc)
       
   245             l = [u"<%d>" % ord(exc.object[pos]) for pos in xrange(exc.start, exc.end)]
       
   246             return (u"[%s]" % u"".join(l), exc.end+1) # skip one character
       
   247 
       
   248         codecs.register_error("test.handler2", handler2)
       
   249 
       
   250         s = "\x00\x81\x7f\x80\xff"
       
   251 
       
   252         self.assertEqual(
       
   253             s.decode("ascii", "test.handler1"),
       
   254             u"\x00[<129>]\x7f[<128>][<255>]"
       
   255         )
       
   256         self.assertEqual(
       
   257             s.decode("ascii", "test.handler2"),
       
   258             u"\x00[<129>][<128>]"
       
   259         )
       
   260 
       
   261         self.assertEqual(
       
   262             "\\u3042\u3xxx".decode("unicode-escape", "test.handler1"),
       
   263             u"\u3042[<92><117><51><120>]xx"
       
   264         )
       
   265 
       
   266         self.assertEqual(
       
   267             "\\u3042\u3xx".decode("unicode-escape", "test.handler1"),
       
   268             u"\u3042[<92><117><51><120><120>]"
       
   269         )
       
   270 
       
   271         self.assertEqual(
       
   272             codecs.charmap_decode("abc", "test.handler1", {ord("a"): u"z"})[0],
       
   273             u"z[<98>][<99>]"
       
   274         )
       
   275 
       
   276         self.assertEqual(
       
   277             u"g\xfc\xdfrk".encode("ascii", "test.handler1"),
       
   278             u"g[<252><223>]rk"
       
   279         )
       
   280 
       
   281         self.assertEqual(
       
   282             u"g\xfc\xdf".encode("ascii", "test.handler1"),
       
   283             u"g[<252><223>]"
       
   284         )
       
   285 
       
   286     def test_longstrings(self):
       
   287         # test long strings to check for memory overflow problems
       
   288         errors = [ "strict", "ignore", "replace", "xmlcharrefreplace", "backslashreplace"]
       
   289         # register the handlers under different names,
       
   290         # to prevent the codec from recognizing the name
       
   291         for err in errors:
       
   292             codecs.register_error("test." + err, codecs.lookup_error(err))
       
   293         l = 1000
       
   294         errors += [ "test." + err for err in errors ]
       
   295         for uni in [ s*l for s in (u"x", u"\u3042", u"a\xe4") ]:
       
   296             for enc in ("ascii", "latin-1", "iso-8859-1", "iso-8859-15", "utf-8", "utf-7", "utf-16"):
       
   297                 for err in errors:
       
   298                     try:
       
   299                         uni.encode(enc, err)
       
   300                     except UnicodeError:
       
   301                         pass
       
   302 
       
   303     def check_exceptionobjectargs(self, exctype, args, msg):
       
   304         # Test UnicodeError subclasses: construction, attribute assignment and __str__ conversion
       
   305         # check with one missing argument
       
   306         self.assertRaises(TypeError, exctype, *args[:-1])
       
   307         # check with one argument too much
       
   308         self.assertRaises(TypeError, exctype, *(args + ["too much"]))
       
   309         # check with one argument of the wrong type
       
   310         wrongargs = [ "spam", u"eggs", 42, 1.0, None ]
       
   311         for i in xrange(len(args)):
       
   312             for wrongarg in wrongargs:
       
   313                 if type(wrongarg) is type(args[i]):
       
   314                     continue
       
   315                 # build argument array
       
   316                 callargs = []
       
   317                 for j in xrange(len(args)):
       
   318                     if i==j:
       
   319                         callargs.append(wrongarg)
       
   320                     else:
       
   321                         callargs.append(args[i])
       
   322                 self.assertRaises(TypeError, exctype, *callargs)
       
   323 
       
   324         # check with the correct number and type of arguments
       
   325         exc = exctype(*args)
       
   326         self.assertEquals(str(exc), msg)
       
   327 
       
   328     def test_unicodeencodeerror(self):
       
   329         self.check_exceptionobjectargs(
       
   330             UnicodeEncodeError,
       
   331             ["ascii", u"g\xfcrk", 1, 2, "ouch"],
       
   332             "'ascii' codec can't encode character u'\\xfc' in position 1: ouch"
       
   333         )
       
   334         self.check_exceptionobjectargs(
       
   335             UnicodeEncodeError,
       
   336             ["ascii", u"g\xfcrk", 1, 4, "ouch"],
       
   337             "'ascii' codec can't encode characters in position 1-3: ouch"
       
   338         )
       
   339         self.check_exceptionobjectargs(
       
   340             UnicodeEncodeError,
       
   341             ["ascii", u"\xfcx", 0, 1, "ouch"],
       
   342             "'ascii' codec can't encode character u'\\xfc' in position 0: ouch"
       
   343         )
       
   344         self.check_exceptionobjectargs(
       
   345             UnicodeEncodeError,
       
   346             ["ascii", u"\u0100x", 0, 1, "ouch"],
       
   347             "'ascii' codec can't encode character u'\\u0100' in position 0: ouch"
       
   348         )
       
   349         self.check_exceptionobjectargs(
       
   350             UnicodeEncodeError,
       
   351             ["ascii", u"\uffffx", 0, 1, "ouch"],
       
   352             "'ascii' codec can't encode character u'\\uffff' in position 0: ouch"
       
   353         )
       
   354         if sys.maxunicode > 0xffff:
       
   355             self.check_exceptionobjectargs(
       
   356                 UnicodeEncodeError,
       
   357                 ["ascii", u"\U00010000x", 0, 1, "ouch"],
       
   358                 "'ascii' codec can't encode character u'\\U00010000' in position 0: ouch"
       
   359             )
       
   360 
       
   361     def test_unicodedecodeerror(self):
       
   362         self.check_exceptionobjectargs(
       
   363             UnicodeDecodeError,
       
   364             ["ascii", "g\xfcrk", 1, 2, "ouch"],
       
   365             "'ascii' codec can't decode byte 0xfc in position 1: ouch"
       
   366         )
       
   367         self.check_exceptionobjectargs(
       
   368             UnicodeDecodeError,
       
   369             ["ascii", "g\xfcrk", 1, 3, "ouch"],
       
   370             "'ascii' codec can't decode bytes in position 1-2: ouch"
       
   371         )
       
   372 
       
   373     def test_unicodetranslateerror(self):
       
   374         self.check_exceptionobjectargs(
       
   375             UnicodeTranslateError,
       
   376             [u"g\xfcrk", 1, 2, "ouch"],
       
   377             "can't translate character u'\\xfc' in position 1: ouch"
       
   378         )
       
   379         self.check_exceptionobjectargs(
       
   380             UnicodeTranslateError,
       
   381             [u"g\u0100rk", 1, 2, "ouch"],
       
   382             "can't translate character u'\\u0100' in position 1: ouch"
       
   383         )
       
   384         self.check_exceptionobjectargs(
       
   385             UnicodeTranslateError,
       
   386             [u"g\uffffrk", 1, 2, "ouch"],
       
   387             "can't translate character u'\\uffff' in position 1: ouch"
       
   388         )
       
   389         if sys.maxunicode > 0xffff:
       
   390             self.check_exceptionobjectargs(
       
   391                 UnicodeTranslateError,
       
   392                 [u"g\U00010000rk", 1, 2, "ouch"],
       
   393                 "can't translate character u'\\U00010000' in position 1: ouch"
       
   394             )
       
   395         self.check_exceptionobjectargs(
       
   396             UnicodeTranslateError,
       
   397             [u"g\xfcrk", 1, 3, "ouch"],
       
   398             "can't translate characters in position 1-2: ouch"
       
   399         )
       
   400 
       
   401     def test_badandgoodstrictexceptions(self):
       
   402         # "strict" complains about a non-exception passed in
       
   403         self.assertRaises(
       
   404             TypeError,
       
   405             codecs.strict_errors,
       
   406             42
       
   407         )
       
   408         # "strict" complains about the wrong exception type
       
   409         self.assertRaises(
       
   410             Exception,
       
   411             codecs.strict_errors,
       
   412             Exception("ouch")
       
   413         )
       
   414 
       
   415         # If the correct exception is passed in, "strict" raises it
       
   416         self.assertRaises(
       
   417             UnicodeEncodeError,
       
   418             codecs.strict_errors,
       
   419             UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")
       
   420         )
       
   421 
       
   422     def test_badandgoodignoreexceptions(self):
       
   423         # "ignore" complains about a non-exception passed in
       
   424         self.assertRaises(
       
   425            TypeError,
       
   426            codecs.ignore_errors,
       
   427            42
       
   428         )
       
   429         # "ignore" complains about the wrong exception type
       
   430         self.assertRaises(
       
   431            TypeError,
       
   432            codecs.ignore_errors,
       
   433            UnicodeError("ouch")
       
   434         )
       
   435         # If the correct exception is passed in, "ignore" returns an empty replacement
       
   436         self.assertEquals(
       
   437             codecs.ignore_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")),
       
   438             (u"", 1)
       
   439         )
       
   440         self.assertEquals(
       
   441             codecs.ignore_errors(UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")),
       
   442             (u"", 1)
       
   443         )
       
   444         self.assertEquals(
       
   445             codecs.ignore_errors(UnicodeTranslateError(u"\u3042", 0, 1, "ouch")),
       
   446             (u"", 1)
       
   447         )
       
   448 
       
   449     def test_badandgoodreplaceexceptions(self):
       
   450         # "replace" complains about a non-exception passed in
       
   451         self.assertRaises(
       
   452            TypeError,
       
   453            codecs.replace_errors,
       
   454            42
       
   455         )
       
   456         # "replace" complains about the wrong exception type
       
   457         self.assertRaises(
       
   458            TypeError,
       
   459            codecs.replace_errors,
       
   460            UnicodeError("ouch")
       
   461         )
       
   462         self.assertRaises(
       
   463             TypeError,
       
   464             codecs.replace_errors,
       
   465             BadObjectUnicodeEncodeError()
       
   466         )
       
   467         self.assertRaises(
       
   468             TypeError,
       
   469             codecs.replace_errors,
       
   470             BadObjectUnicodeDecodeError()
       
   471         )
       
   472         # With the correct exception, "replace" returns an "?" or u"\ufffd" replacement
       
   473         self.assertEquals(
       
   474             codecs.replace_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")),
       
   475             (u"?", 1)
       
   476         )
       
   477         self.assertEquals(
       
   478             codecs.replace_errors(UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")),
       
   479             (u"\ufffd", 1)
       
   480         )
       
   481         self.assertEquals(
       
   482             codecs.replace_errors(UnicodeTranslateError(u"\u3042", 0, 1, "ouch")),
       
   483             (u"\ufffd", 1)
       
   484         )
       
   485 
       
   486     def test_badandgoodxmlcharrefreplaceexceptions(self):
       
   487         # "xmlcharrefreplace" complains about a non-exception passed in
       
   488         self.assertRaises(
       
   489            TypeError,
       
   490            codecs.xmlcharrefreplace_errors,
       
   491            42
       
   492         )
       
   493         # "xmlcharrefreplace" complains about the wrong exception types
       
   494         self.assertRaises(
       
   495            TypeError,
       
   496            codecs.xmlcharrefreplace_errors,
       
   497            UnicodeError("ouch")
       
   498         )
       
   499         # "xmlcharrefreplace" can only be used for encoding
       
   500         self.assertRaises(
       
   501             TypeError,
       
   502             codecs.xmlcharrefreplace_errors,
       
   503             UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")
       
   504         )
       
   505         self.assertRaises(
       
   506             TypeError,
       
   507             codecs.xmlcharrefreplace_errors,
       
   508             UnicodeTranslateError(u"\u3042", 0, 1, "ouch")
       
   509         )
       
   510         # Use the correct exception
       
   511         cs = (0, 1, 9, 10, 99, 100, 999, 1000, 9999, 10000, 0x3042)
       
   512         s = "".join(unichr(c) for c in cs)
       
   513         self.assertEquals(
       
   514             codecs.xmlcharrefreplace_errors(
       
   515                 UnicodeEncodeError("ascii", s, 0, len(s), "ouch")
       
   516             ),
       
   517             (u"".join(u"&#%d;" % ord(c) for c in s), len(s))
       
   518         )
       
   519 
       
   520     def test_badandgoodbackslashreplaceexceptions(self):
       
   521         # "backslashreplace" complains about a non-exception passed in
       
   522         self.assertRaises(
       
   523            TypeError,
       
   524            codecs.backslashreplace_errors,
       
   525            42
       
   526         )
       
   527         # "backslashreplace" complains about the wrong exception types
       
   528         self.assertRaises(
       
   529            TypeError,
       
   530            codecs.backslashreplace_errors,
       
   531            UnicodeError("ouch")
       
   532         )
       
   533         # "backslashreplace" can only be used for encoding
       
   534         self.assertRaises(
       
   535             TypeError,
       
   536             codecs.backslashreplace_errors,
       
   537             UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")
       
   538         )
       
   539         self.assertRaises(
       
   540             TypeError,
       
   541             codecs.backslashreplace_errors,
       
   542             UnicodeTranslateError(u"\u3042", 0, 1, "ouch")
       
   543         )
       
   544         # Use the correct exception
       
   545         self.assertEquals(
       
   546             codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")),
       
   547             (u"\\u3042", 1)
       
   548         )
       
   549         self.assertEquals(
       
   550             codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\x00", 0, 1, "ouch")),
       
   551             (u"\\x00", 1)
       
   552         )
       
   553         self.assertEquals(
       
   554             codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\xff", 0, 1, "ouch")),
       
   555             (u"\\xff", 1)
       
   556         )
       
   557         self.assertEquals(
       
   558             codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\u0100", 0, 1, "ouch")),
       
   559             (u"\\u0100", 1)
       
   560         )
       
   561         self.assertEquals(
       
   562             codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\uffff", 0, 1, "ouch")),
       
   563             (u"\\uffff", 1)
       
   564         )
       
   565         if sys.maxunicode>0xffff:
       
   566             self.assertEquals(
       
   567                 codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\U00010000", 0, 1, "ouch")),
       
   568                 (u"\\U00010000", 1)
       
   569             )
       
   570             self.assertEquals(
       
   571                 codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\U0010ffff", 0, 1, "ouch")),
       
   572                 (u"\\U0010ffff", 1)
       
   573             )
       
   574 
       
   575     def test_badhandlerresults(self):
       
   576         results = ( 42, u"foo", (1,2,3), (u"foo", 1, 3), (u"foo", None), (u"foo",), ("foo", 1, 3), ("foo", None), ("foo",) )
       
   577         encs = ("ascii", "latin-1", "iso-8859-1", "iso-8859-15")
       
   578 
       
   579         for res in results:
       
   580             codecs.register_error("test.badhandler", lambda: res)
       
   581             for enc in encs:
       
   582                 self.assertRaises(
       
   583                     TypeError,
       
   584                     u"\u3042".encode,
       
   585                     enc,
       
   586                     "test.badhandler"
       
   587                 )
       
   588             for (enc, bytes) in (
       
   589                 ("ascii", "\xff"),
       
   590                 ("utf-8", "\xff"),
       
   591                 ("utf-7", "+x-"),
       
   592                 ("unicode-internal", "\x00"),
       
   593             ):
       
   594                 self.assertRaises(
       
   595                     TypeError,
       
   596                     bytes.decode,
       
   597                     enc,
       
   598                     "test.badhandler"
       
   599                 )
       
   600 
       
   601     def test_lookup(self):
       
   602         self.assertEquals(codecs.strict_errors, codecs.lookup_error("strict"))
       
   603         self.assertEquals(codecs.ignore_errors, codecs.lookup_error("ignore"))
       
   604         self.assertEquals(codecs.strict_errors, codecs.lookup_error("strict"))
       
   605         self.assertEquals(
       
   606             codecs.xmlcharrefreplace_errors,
       
   607             codecs.lookup_error("xmlcharrefreplace")
       
   608         )
       
   609         self.assertEquals(
       
   610             codecs.backslashreplace_errors,
       
   611             codecs.lookup_error("backslashreplace")
       
   612         )
       
   613 
       
   614     def test_unencodablereplacement(self):
       
   615         def unencrepl(exc):
       
   616             if isinstance(exc, UnicodeEncodeError):
       
   617                 return (u"\u4242", exc.end)
       
   618             else:
       
   619                 raise TypeError("don't know how to handle %r" % exc)
       
   620         codecs.register_error("test.unencreplhandler", unencrepl)
       
   621         for enc in ("ascii", "iso-8859-1", "iso-8859-15"):
       
   622             self.assertRaises(
       
   623                 UnicodeEncodeError,
       
   624                 u"\u4242".encode,
       
   625                 enc,
       
   626                 "test.unencreplhandler"
       
   627             )
       
   628 
       
   629     def test_badregistercall(self):
       
   630         # enhance coverage of:
       
   631         # Modules/_codecsmodule.c::register_error()
       
   632         # Python/codecs.c::PyCodec_RegisterError()
       
   633         self.assertRaises(TypeError, codecs.register_error, 42)
       
   634         self.assertRaises(TypeError, codecs.register_error, "test.dummy", 42)
       
   635 
       
   636     def test_badlookupcall(self):
       
   637         # enhance coverage of:
       
   638         # Modules/_codecsmodule.c::lookup_error()
       
   639         self.assertRaises(TypeError, codecs.lookup_error)
       
   640 
       
   641     def test_unknownhandler(self):
       
   642         # enhance coverage of:
       
   643         # Modules/_codecsmodule.c::lookup_error()
       
   644         self.assertRaises(LookupError, codecs.lookup_error, "test.unknown")
       
   645 
       
   646     def test_xmlcharrefvalues(self):
       
   647         # enhance coverage of:
       
   648         # Python/codecs.c::PyCodec_XMLCharRefReplaceErrors()
       
   649         # and inline implementations
       
   650         v = (1, 5, 10, 50, 100, 500, 1000, 5000, 10000, 50000)
       
   651         if sys.maxunicode>=100000:
       
   652             v += (100000, 500000, 1000000)
       
   653         s = u"".join([unichr(x) for x in v])
       
   654         codecs.register_error("test.xmlcharrefreplace", codecs.xmlcharrefreplace_errors)
       
   655         for enc in ("ascii", "iso-8859-15"):
       
   656             for err in ("xmlcharrefreplace", "test.xmlcharrefreplace"):
       
   657                 s.encode(enc, err)
       
   658 
       
   659     def test_decodehelper(self):
       
   660         # enhance coverage of:
       
   661         # Objects/unicodeobject.c::unicode_decode_call_errorhandler()
       
   662         # and callers
       
   663         self.assertRaises(LookupError, "\xff".decode, "ascii", "test.unknown")
       
   664 
       
   665         def baddecodereturn1(exc):
       
   666             return 42
       
   667         codecs.register_error("test.baddecodereturn1", baddecodereturn1)
       
   668         self.assertRaises(TypeError, "\xff".decode, "ascii", "test.baddecodereturn1")
       
   669         self.assertRaises(TypeError, "\\".decode, "unicode-escape", "test.baddecodereturn1")
       
   670         self.assertRaises(TypeError, "\\x0".decode, "unicode-escape", "test.baddecodereturn1")
       
   671         self.assertRaises(TypeError, "\\x0y".decode, "unicode-escape", "test.baddecodereturn1")
       
   672         self.assertRaises(TypeError, "\\Uffffeeee".decode, "unicode-escape", "test.baddecodereturn1")
       
   673         self.assertRaises(TypeError, "\\uyyyy".decode, "raw-unicode-escape", "test.baddecodereturn1")
       
   674 
       
   675         def baddecodereturn2(exc):
       
   676             return (u"?", None)
       
   677         codecs.register_error("test.baddecodereturn2", baddecodereturn2)
       
   678         self.assertRaises(TypeError, "\xff".decode, "ascii", "test.baddecodereturn2")
       
   679 
       
   680         handler = PosReturn()
       
   681         codecs.register_error("test.posreturn", handler.handle)
       
   682 
       
   683         # Valid negative position
       
   684         handler.pos = -1
       
   685         self.assertEquals("\xff0".decode("ascii", "test.posreturn"), u"<?>0")
       
   686 
       
   687         # Valid negative position
       
   688         handler.pos = -2
       
   689         self.assertEquals("\xff0".decode("ascii", "test.posreturn"), u"<?><?>")
       
   690 
       
   691         # Negative position out of bounds
       
   692         handler.pos = -3
       
   693         self.assertRaises(IndexError, "\xff0".decode, "ascii", "test.posreturn")
       
   694 
       
   695         # Valid positive position
       
   696         handler.pos = 1
       
   697         self.assertEquals("\xff0".decode("ascii", "test.posreturn"), u"<?>0")
       
   698 
       
   699         # Largest valid positive position (one beyond end of input)
       
   700         handler.pos = 2
       
   701         self.assertEquals("\xff0".decode("ascii", "test.posreturn"), u"<?>")
       
   702 
       
   703         # Invalid positive position
       
   704         handler.pos = 3
       
   705         self.assertRaises(IndexError, "\xff0".decode, "ascii", "test.posreturn")
       
   706 
       
   707         # Restart at the "0"
       
   708         handler.pos = 6
       
   709         self.assertEquals("\\uyyyy0".decode("raw-unicode-escape", "test.posreturn"), u"<?>0")
       
   710 
       
   711         class D(dict):
       
   712             def __getitem__(self, key):
       
   713                 raise ValueError
       
   714         self.assertRaises(UnicodeError, codecs.charmap_decode, "\xff", "strict", {0xff: None})
       
   715         self.assertRaises(ValueError, codecs.charmap_decode, "\xff", "strict", D())
       
   716         self.assertRaises(TypeError, codecs.charmap_decode, "\xff", "strict", {0xff: sys.maxunicode+1})
       
   717 
       
   718     def test_encodehelper(self):
       
   719         # enhance coverage of:
       
   720         # Objects/unicodeobject.c::unicode_encode_call_errorhandler()
       
   721         # and callers
       
   722         self.assertRaises(LookupError, u"\xff".encode, "ascii", "test.unknown")
       
   723 
       
   724         def badencodereturn1(exc):
       
   725             return 42
       
   726         codecs.register_error("test.badencodereturn1", badencodereturn1)
       
   727         self.assertRaises(TypeError, u"\xff".encode, "ascii", "test.badencodereturn1")
       
   728 
       
   729         def badencodereturn2(exc):
       
   730             return (u"?", None)
       
   731         codecs.register_error("test.badencodereturn2", badencodereturn2)
       
   732         self.assertRaises(TypeError, u"\xff".encode, "ascii", "test.badencodereturn2")
       
   733 
       
   734         handler = PosReturn()
       
   735         codecs.register_error("test.posreturn", handler.handle)
       
   736 
       
   737         # Valid negative position
       
   738         handler.pos = -1
       
   739         self.assertEquals(u"\xff0".encode("ascii", "test.posreturn"), "<?>0")
       
   740 
       
   741         # Valid negative position
       
   742         handler.pos = -2
       
   743         self.assertEquals(u"\xff0".encode("ascii", "test.posreturn"), "<?><?>")
       
   744 
       
   745         # Negative position out of bounds
       
   746         handler.pos = -3
       
   747         self.assertRaises(IndexError, u"\xff0".encode, "ascii", "test.posreturn")
       
   748 
       
   749         # Valid positive position
       
   750         handler.pos = 1
       
   751         self.assertEquals(u"\xff0".encode("ascii", "test.posreturn"), "<?>0")
       
   752 
       
   753         # Largest valid positive position (one beyond end of input
       
   754         handler.pos = 2
       
   755         self.assertEquals(u"\xff0".encode("ascii", "test.posreturn"), "<?>")
       
   756 
       
   757         # Invalid positive position
       
   758         handler.pos = 3
       
   759         self.assertRaises(IndexError, u"\xff0".encode, "ascii", "test.posreturn")
       
   760 
       
   761         handler.pos = 0
       
   762 
       
   763         class D(dict):
       
   764             def __getitem__(self, key):
       
   765                 raise ValueError
       
   766         for err in ("strict", "replace", "xmlcharrefreplace", "backslashreplace", "test.posreturn"):
       
   767             self.assertRaises(UnicodeError, codecs.charmap_encode, u"\xff", err, {0xff: None})
       
   768             self.assertRaises(ValueError, codecs.charmap_encode, u"\xff", err, D())
       
   769             self.assertRaises(TypeError, codecs.charmap_encode, u"\xff", err, {0xff: 300})
       
   770 
       
   771     def test_translatehelper(self):
       
   772         # enhance coverage of:
       
   773         # Objects/unicodeobject.c::unicode_encode_call_errorhandler()
       
   774         # and callers
       
   775         # (Unfortunately the errors argument is not directly accessible
       
   776         # from Python, so we can't test that much)
       
   777         class D(dict):
       
   778             def __getitem__(self, key):
       
   779                 raise ValueError
       
   780         self.assertRaises(ValueError, u"\xff".translate, D())
       
   781         self.assertRaises(TypeError, u"\xff".translate, {0xff: sys.maxunicode+1})
       
   782         self.assertRaises(TypeError, u"\xff".translate, {0xff: ()})
       
   783 
       
   784     def test_bug828737(self):
       
   785         charmap = {
       
   786             ord("&"): u"&amp;",
       
   787             ord("<"): u"&lt;",
       
   788             ord(">"): u"&gt;",
       
   789             ord('"'): u"&quot;",
       
   790         }
       
   791 
       
   792         for n in (1, 10, 100, 1000):
       
   793             text = u'abc<def>ghi'*n
       
   794             text.translate(charmap)
       
   795 
       
   796 def test_main():
       
   797     test.test_support.run_unittest(CodecCallbackTest)
       
   798 
       
   799 if __name__ == "__main__":
       
   800     test_main()