python-2.5.2/win32/Lib/test/test_ucn.py
changeset 0 ae805ac0140d
equal deleted inserted replaced
-1:000000000000 0:ae805ac0140d
       
     1 """ Test script for the Unicode implementation.
       
     2 
       
     3 Written by Bill Tutt.
       
     4 Modified for Python 2.0 by Fredrik Lundh (fredrik@pythonware.com)
       
     5 
       
     6 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
       
     7 
       
     8 """#"
       
     9 
       
    10 import unittest
       
    11 
       
    12 from test import test_support
       
    13 
       
    14 class UnicodeNamesTest(unittest.TestCase):
       
    15 
       
    16     def checkletter(self, name, code):
       
    17         # Helper that put all \N escapes inside eval'd raw strings,
       
    18         # to make sure this script runs even if the compiler
       
    19         # chokes on \N escapes
       
    20         res = eval(ur'u"\N{%s}"' % name)
       
    21         self.assertEqual(res, code)
       
    22         return res
       
    23 
       
    24     def test_general(self):
       
    25         # General and case insensitivity test:
       
    26         chars = [
       
    27             "LATIN CAPITAL LETTER T",
       
    28             "LATIN SMALL LETTER H",
       
    29             "LATIN SMALL LETTER E",
       
    30             "SPACE",
       
    31             "LATIN SMALL LETTER R",
       
    32             "LATIN CAPITAL LETTER E",
       
    33             "LATIN SMALL LETTER D",
       
    34             "SPACE",
       
    35             "LATIN SMALL LETTER f",
       
    36             "LATIN CAPITAL LeTtEr o",
       
    37             "LATIN SMaLl LETTER x",
       
    38             "SPACE",
       
    39             "LATIN SMALL LETTER A",
       
    40             "LATIN SMALL LETTER T",
       
    41             "LATIN SMALL LETTER E",
       
    42             "SPACE",
       
    43             "LATIN SMALL LETTER T",
       
    44             "LATIN SMALL LETTER H",
       
    45             "LATIN SMALL LETTER E",
       
    46             "SpAcE",
       
    47             "LATIN SMALL LETTER S",
       
    48             "LATIN SMALL LETTER H",
       
    49             "LATIN small LETTER e",
       
    50             "LATIN small LETTER e",
       
    51             "LATIN SMALL LETTER P",
       
    52             "FULL STOP"
       
    53         ]
       
    54         string = u"The rEd fOx ate the sheep."
       
    55 
       
    56         self.assertEqual(
       
    57             u"".join([self.checkletter(*args) for args in zip(chars, string)]),
       
    58             string
       
    59         )
       
    60 
       
    61     def test_ascii_letters(self):
       
    62         import unicodedata
       
    63 
       
    64         for char in "".join(map(chr, xrange(ord("a"), ord("z")))):
       
    65             name = "LATIN SMALL LETTER %s" % char.upper()
       
    66             code = unicodedata.lookup(name)
       
    67             self.assertEqual(unicodedata.name(code), name)
       
    68 
       
    69     def test_hangul_syllables(self):
       
    70         self.checkletter("HANGUL SYLLABLE GA", u"\uac00")
       
    71         self.checkletter("HANGUL SYLLABLE GGWEOSS", u"\uafe8")
       
    72         self.checkletter("HANGUL SYLLABLE DOLS", u"\ub3d0")
       
    73         self.checkletter("HANGUL SYLLABLE RYAN", u"\ub7b8")
       
    74         self.checkletter("HANGUL SYLLABLE MWIK", u"\ubba0")
       
    75         self.checkletter("HANGUL SYLLABLE BBWAEM", u"\ubf88")
       
    76         self.checkletter("HANGUL SYLLABLE SSEOL", u"\uc370")
       
    77         self.checkletter("HANGUL SYLLABLE YI", u"\uc758")
       
    78         self.checkletter("HANGUL SYLLABLE JJYOSS", u"\ucb40")
       
    79         self.checkletter("HANGUL SYLLABLE KYEOLS", u"\ucf28")
       
    80         self.checkletter("HANGUL SYLLABLE PAN", u"\ud310")
       
    81         self.checkletter("HANGUL SYLLABLE HWEOK", u"\ud6f8")
       
    82         self.checkletter("HANGUL SYLLABLE HIH", u"\ud7a3")
       
    83 
       
    84         import unicodedata
       
    85         self.assertRaises(ValueError, unicodedata.name, u"\ud7a4")
       
    86 
       
    87     def test_cjk_unified_ideographs(self):
       
    88         self.checkletter("CJK UNIFIED IDEOGRAPH-3400", u"\u3400")
       
    89         self.checkletter("CJK UNIFIED IDEOGRAPH-4DB5", u"\u4db5")
       
    90         self.checkletter("CJK UNIFIED IDEOGRAPH-4E00", u"\u4e00")
       
    91         self.checkletter("CJK UNIFIED IDEOGRAPH-9FA5", u"\u9fa5")
       
    92         self.checkletter("CJK UNIFIED IDEOGRAPH-20000", u"\U00020000")
       
    93         self.checkletter("CJK UNIFIED IDEOGRAPH-2A6D6", u"\U0002a6d6")
       
    94 
       
    95     def test_bmp_characters(self):
       
    96         import unicodedata
       
    97         count = 0
       
    98         for code in xrange(0x10000):
       
    99             char = unichr(code)
       
   100             name = unicodedata.name(char, None)
       
   101             if name is not None:
       
   102                 self.assertEqual(unicodedata.lookup(name), char)
       
   103                 count += 1
       
   104 
       
   105     def test_misc_symbols(self):
       
   106         self.checkletter("PILCROW SIGN", u"\u00b6")
       
   107         self.checkletter("REPLACEMENT CHARACTER", u"\uFFFD")
       
   108         self.checkletter("HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK", u"\uFF9F")
       
   109         self.checkletter("FULLWIDTH LATIN SMALL LETTER A", u"\uFF41")
       
   110 
       
   111     def test_errors(self):
       
   112         import unicodedata
       
   113         self.assertRaises(TypeError, unicodedata.name)
       
   114         self.assertRaises(TypeError, unicodedata.name, u'xx')
       
   115         self.assertRaises(TypeError, unicodedata.lookup)
       
   116         self.assertRaises(KeyError, unicodedata.lookup, u'unknown')
       
   117 
       
   118     def test_strict_eror_handling(self):
       
   119         # bogus character name
       
   120         self.assertRaises(
       
   121             UnicodeError,
       
   122             unicode, "\\N{blah}", 'unicode-escape', 'strict'
       
   123         )
       
   124         # long bogus character name
       
   125         self.assertRaises(
       
   126             UnicodeError,
       
   127             unicode, "\\N{%s}" % ("x" * 100000), 'unicode-escape', 'strict'
       
   128         )
       
   129         # missing closing brace
       
   130         self.assertRaises(
       
   131             UnicodeError,
       
   132             unicode, "\\N{SPACE", 'unicode-escape', 'strict'
       
   133         )
       
   134         # missing opening brace
       
   135         self.assertRaises(
       
   136             UnicodeError,
       
   137             unicode, "\\NSPACE", 'unicode-escape', 'strict'
       
   138         )
       
   139 
       
   140 def test_main():
       
   141     test_support.run_unittest(UnicodeNamesTest)
       
   142 
       
   143 if __name__ == "__main__":
       
   144     test_main()