python-2.5.2/win32/include/unicodeobject.h
changeset 0 ae805ac0140d
equal deleted inserted replaced
-1:000000000000 0:ae805ac0140d
       
     1 #ifndef Py_UNICODEOBJECT_H
       
     2 #define Py_UNICODEOBJECT_H
       
     3 
       
     4 /*
       
     5 
       
     6 Unicode implementation based on original code by Fredrik Lundh,
       
     7 modified by Marc-Andre Lemburg (mal@lemburg.com) according to the
       
     8 Unicode Integration Proposal (see file Misc/unicode.txt).
       
     9 
       
    10 Copyright (c) Corporation for National Research Initiatives.
       
    11 
       
    12 
       
    13  Original header:
       
    14  --------------------------------------------------------------------
       
    15 
       
    16  * Yet another Unicode string type for Python.  This type supports the
       
    17  * 16-bit Basic Multilingual Plane (BMP) only.
       
    18  *
       
    19  * Written by Fredrik Lundh, January 1999.
       
    20  *
       
    21  * Copyright (c) 1999 by Secret Labs AB.
       
    22  * Copyright (c) 1999 by Fredrik Lundh.
       
    23  *
       
    24  * fredrik@pythonware.com
       
    25  * http://www.pythonware.com
       
    26  *
       
    27  * --------------------------------------------------------------------
       
    28  * This Unicode String Type is
       
    29  * 
       
    30  * Copyright (c) 1999 by Secret Labs AB
       
    31  * Copyright (c) 1999 by Fredrik Lundh
       
    32  * 
       
    33  * By obtaining, using, and/or copying this software and/or its
       
    34  * associated documentation, you agree that you have read, understood,
       
    35  * and will comply with the following terms and conditions:
       
    36  * 
       
    37  * Permission to use, copy, modify, and distribute this software and its
       
    38  * associated documentation for any purpose and without fee is hereby
       
    39  * granted, provided that the above copyright notice appears in all
       
    40  * copies, and that both that copyright notice and this permission notice
       
    41  * appear in supporting documentation, and that the name of Secret Labs
       
    42  * AB or the author not be used in advertising or publicity pertaining to
       
    43  * distribution of the software without specific, written prior
       
    44  * permission.
       
    45  * 
       
    46  * SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
       
    47  * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
       
    48  * FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
       
    49  * ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
       
    50  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
       
    51  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
       
    52  * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
       
    53  * -------------------------------------------------------------------- */
       
    54 
       
    55 #include <ctype.h>
       
    56 
       
    57 /* === Internal API ======================================================= */
       
    58 
       
    59 /* --- Internal Unicode Format -------------------------------------------- */
       
    60 
       
    61 #ifndef Py_USING_UNICODE
       
    62 
       
    63 #define PyUnicode_Check(op)                 0
       
    64 #define PyUnicode_CheckExact(op)            0
       
    65 
       
    66 #else
       
    67 
       
    68 /* FIXME: MvL's new implementation assumes that Py_UNICODE_SIZE is
       
    69    properly set, but the default rules below doesn't set it.  I'll
       
    70    sort this out some other day -- fredrik@pythonware.com */
       
    71 
       
    72 #ifndef Py_UNICODE_SIZE
       
    73 #error Must define Py_UNICODE_SIZE
       
    74 #endif
       
    75 
       
    76 /* Setting Py_UNICODE_WIDE enables UCS-4 storage.  Otherwise, Unicode
       
    77    strings are stored as UCS-2 (with limited support for UTF-16) */
       
    78 
       
    79 #if Py_UNICODE_SIZE >= 4
       
    80 #define Py_UNICODE_WIDE
       
    81 #endif
       
    82 
       
    83 /* Set these flags if the platform has "wchar.h", "wctype.h" and the
       
    84    wchar_t type is a 16-bit unsigned type */
       
    85 /* #define HAVE_WCHAR_H */
       
    86 /* #define HAVE_USABLE_WCHAR_T */
       
    87 
       
    88 /* Defaults for various platforms */
       
    89 #ifndef PY_UNICODE_TYPE
       
    90 
       
    91 /* Windows has a usable wchar_t type (unless we're using UCS-4) */
       
    92 # if defined(MS_WIN32) && Py_UNICODE_SIZE == 2
       
    93 #  define HAVE_USABLE_WCHAR_T
       
    94 #  define PY_UNICODE_TYPE wchar_t
       
    95 # endif
       
    96 
       
    97 # if defined(Py_UNICODE_WIDE)
       
    98 #  define PY_UNICODE_TYPE Py_UCS4
       
    99 # endif
       
   100 
       
   101 #endif
       
   102 
       
   103 /* If the compiler provides a wchar_t type we try to support it
       
   104    through the interface functions PyUnicode_FromWideChar() and
       
   105    PyUnicode_AsWideChar(). */
       
   106 
       
   107 #ifdef HAVE_USABLE_WCHAR_T
       
   108 # ifndef HAVE_WCHAR_H
       
   109 #  define HAVE_WCHAR_H
       
   110 # endif
       
   111 #endif
       
   112 
       
   113 #ifdef HAVE_WCHAR_H
       
   114 /* Work around a cosmetic bug in BSDI 4.x wchar.h; thanks to Thomas Wouters */
       
   115 # ifdef _HAVE_BSDI
       
   116 #  include <time.h>
       
   117 # endif
       
   118 #  include <wchar.h>
       
   119 #endif
       
   120 
       
   121 /*
       
   122  * Use this typedef when you need to represent a UTF-16 surrogate pair
       
   123  * as single unsigned integer.
       
   124  */
       
   125 #if SIZEOF_INT >= 4 
       
   126 typedef unsigned int Py_UCS4; 
       
   127 #elif SIZEOF_LONG >= 4
       
   128 typedef unsigned long Py_UCS4; 
       
   129 #endif
       
   130 
       
   131 typedef PY_UNICODE_TYPE Py_UNICODE;
       
   132 
       
   133 /* --- UCS-2/UCS-4 Name Mangling ------------------------------------------ */
       
   134 
       
   135 /* Unicode API names are mangled to assure that UCS-2 and UCS-4 builds
       
   136    produce different external names and thus cause import errors in
       
   137    case Python interpreters and extensions with mixed compiled in
       
   138    Unicode width assumptions are combined. */
       
   139 
       
   140 #ifndef Py_UNICODE_WIDE
       
   141 
       
   142 # define PyUnicode_AsASCIIString PyUnicodeUCS2_AsASCIIString
       
   143 # define PyUnicode_AsCharmapString PyUnicodeUCS2_AsCharmapString
       
   144 # define PyUnicode_AsEncodedObject PyUnicodeUCS2_AsEncodedObject
       
   145 # define PyUnicode_AsEncodedString PyUnicodeUCS2_AsEncodedString
       
   146 # define PyUnicode_AsLatin1String PyUnicodeUCS2_AsLatin1String
       
   147 # define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS2_AsRawUnicodeEscapeString
       
   148 # define PyUnicode_AsUTF16String PyUnicodeUCS2_AsUTF16String
       
   149 # define PyUnicode_AsUTF8String PyUnicodeUCS2_AsUTF8String
       
   150 # define PyUnicode_AsUnicode PyUnicodeUCS2_AsUnicode
       
   151 # define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS2_AsUnicodeEscapeString
       
   152 # define PyUnicode_AsWideChar PyUnicodeUCS2_AsWideChar
       
   153 # define PyUnicode_Compare PyUnicodeUCS2_Compare
       
   154 # define PyUnicode_Concat PyUnicodeUCS2_Concat
       
   155 # define PyUnicode_Contains PyUnicodeUCS2_Contains
       
   156 # define PyUnicode_Count PyUnicodeUCS2_Count
       
   157 # define PyUnicode_Decode PyUnicodeUCS2_Decode
       
   158 # define PyUnicode_DecodeASCII PyUnicodeUCS2_DecodeASCII
       
   159 # define PyUnicode_DecodeCharmap PyUnicodeUCS2_DecodeCharmap
       
   160 # define PyUnicode_DecodeLatin1 PyUnicodeUCS2_DecodeLatin1
       
   161 # define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS2_DecodeRawUnicodeEscape
       
   162 # define PyUnicode_DecodeUTF16 PyUnicodeUCS2_DecodeUTF16
       
   163 # define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS2_DecodeUTF16Stateful
       
   164 # define PyUnicode_DecodeUTF8 PyUnicodeUCS2_DecodeUTF8
       
   165 # define PyUnicode_DecodeUTF8Stateful PyUnicodeUCS2_DecodeUTF8Stateful
       
   166 # define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS2_DecodeUnicodeEscape
       
   167 # define PyUnicode_Encode PyUnicodeUCS2_Encode
       
   168 # define PyUnicode_EncodeASCII PyUnicodeUCS2_EncodeASCII
       
   169 # define PyUnicode_EncodeCharmap PyUnicodeUCS2_EncodeCharmap
       
   170 # define PyUnicode_EncodeDecimal PyUnicodeUCS2_EncodeDecimal
       
   171 # define PyUnicode_EncodeLatin1 PyUnicodeUCS2_EncodeLatin1
       
   172 # define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS2_EncodeRawUnicodeEscape
       
   173 # define PyUnicode_EncodeUTF16 PyUnicodeUCS2_EncodeUTF16
       
   174 # define PyUnicode_EncodeUTF8 PyUnicodeUCS2_EncodeUTF8
       
   175 # define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS2_EncodeUnicodeEscape
       
   176 # define PyUnicode_Find PyUnicodeUCS2_Find
       
   177 # define PyUnicode_Format PyUnicodeUCS2_Format
       
   178 # define PyUnicode_FromEncodedObject PyUnicodeUCS2_FromEncodedObject
       
   179 # define PyUnicode_FromObject PyUnicodeUCS2_FromObject
       
   180 # define PyUnicode_FromOrdinal PyUnicodeUCS2_FromOrdinal
       
   181 # define PyUnicode_FromUnicode PyUnicodeUCS2_FromUnicode
       
   182 # define PyUnicode_FromWideChar PyUnicodeUCS2_FromWideChar
       
   183 # define PyUnicode_GetDefaultEncoding PyUnicodeUCS2_GetDefaultEncoding
       
   184 # define PyUnicode_GetMax PyUnicodeUCS2_GetMax
       
   185 # define PyUnicode_GetSize PyUnicodeUCS2_GetSize
       
   186 # define PyUnicode_Join PyUnicodeUCS2_Join
       
   187 # define PyUnicode_Partition PyUnicodeUCS2_Partition
       
   188 # define PyUnicode_RPartition PyUnicodeUCS2_RPartition
       
   189 # define PyUnicode_RSplit PyUnicodeUCS2_RSplit
       
   190 # define PyUnicode_Replace PyUnicodeUCS2_Replace
       
   191 # define PyUnicode_Resize PyUnicodeUCS2_Resize
       
   192 # define PyUnicode_RichCompare PyUnicodeUCS2_RichCompare
       
   193 # define PyUnicode_SetDefaultEncoding PyUnicodeUCS2_SetDefaultEncoding
       
   194 # define PyUnicode_Split PyUnicodeUCS2_Split
       
   195 # define PyUnicode_Splitlines PyUnicodeUCS2_Splitlines
       
   196 # define PyUnicode_Tailmatch PyUnicodeUCS2_Tailmatch
       
   197 # define PyUnicode_Translate PyUnicodeUCS2_Translate
       
   198 # define PyUnicode_TranslateCharmap PyUnicodeUCS2_TranslateCharmap
       
   199 # define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS2_AsDefaultEncodedString
       
   200 # define _PyUnicode_Fini _PyUnicodeUCS2_Fini
       
   201 # define _PyUnicode_Init _PyUnicodeUCS2_Init
       
   202 # define _PyUnicode_IsAlpha _PyUnicodeUCS2_IsAlpha
       
   203 # define _PyUnicode_IsDecimalDigit _PyUnicodeUCS2_IsDecimalDigit
       
   204 # define _PyUnicode_IsDigit _PyUnicodeUCS2_IsDigit
       
   205 # define _PyUnicode_IsLinebreak _PyUnicodeUCS2_IsLinebreak
       
   206 # define _PyUnicode_IsLowercase _PyUnicodeUCS2_IsLowercase
       
   207 # define _PyUnicode_IsNumeric _PyUnicodeUCS2_IsNumeric
       
   208 # define _PyUnicode_IsTitlecase _PyUnicodeUCS2_IsTitlecase
       
   209 # define _PyUnicode_IsUppercase _PyUnicodeUCS2_IsUppercase
       
   210 # define _PyUnicode_IsWhitespace _PyUnicodeUCS2_IsWhitespace
       
   211 # define _PyUnicode_ToDecimalDigit _PyUnicodeUCS2_ToDecimalDigit
       
   212 # define _PyUnicode_ToDigit _PyUnicodeUCS2_ToDigit
       
   213 # define _PyUnicode_ToLowercase _PyUnicodeUCS2_ToLowercase
       
   214 # define _PyUnicode_ToNumeric _PyUnicodeUCS2_ToNumeric
       
   215 # define _PyUnicode_ToTitlecase _PyUnicodeUCS2_ToTitlecase
       
   216 # define _PyUnicode_ToUppercase _PyUnicodeUCS2_ToUppercase
       
   217 
       
   218 #else
       
   219 
       
   220 # define PyUnicode_AsASCIIString PyUnicodeUCS4_AsASCIIString
       
   221 # define PyUnicode_AsCharmapString PyUnicodeUCS4_AsCharmapString
       
   222 # define PyUnicode_AsEncodedObject PyUnicodeUCS4_AsEncodedObject
       
   223 # define PyUnicode_AsEncodedString PyUnicodeUCS4_AsEncodedString
       
   224 # define PyUnicode_AsLatin1String PyUnicodeUCS4_AsLatin1String
       
   225 # define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS4_AsRawUnicodeEscapeString
       
   226 # define PyUnicode_AsUTF16String PyUnicodeUCS4_AsUTF16String
       
   227 # define PyUnicode_AsUTF8String PyUnicodeUCS4_AsUTF8String
       
   228 # define PyUnicode_AsUnicode PyUnicodeUCS4_AsUnicode
       
   229 # define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS4_AsUnicodeEscapeString
       
   230 # define PyUnicode_AsWideChar PyUnicodeUCS4_AsWideChar
       
   231 # define PyUnicode_Compare PyUnicodeUCS4_Compare
       
   232 # define PyUnicode_Concat PyUnicodeUCS4_Concat
       
   233 # define PyUnicode_Contains PyUnicodeUCS4_Contains
       
   234 # define PyUnicode_Count PyUnicodeUCS4_Count
       
   235 # define PyUnicode_Decode PyUnicodeUCS4_Decode
       
   236 # define PyUnicode_DecodeASCII PyUnicodeUCS4_DecodeASCII
       
   237 # define PyUnicode_DecodeCharmap PyUnicodeUCS4_DecodeCharmap
       
   238 # define PyUnicode_DecodeLatin1 PyUnicodeUCS4_DecodeLatin1
       
   239 # define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS4_DecodeRawUnicodeEscape
       
   240 # define PyUnicode_DecodeUTF16 PyUnicodeUCS4_DecodeUTF16
       
   241 # define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS4_DecodeUTF16Stateful
       
   242 # define PyUnicode_DecodeUTF8 PyUnicodeUCS4_DecodeUTF8
       
   243 # define PyUnicode_DecodeUTF8Stateful PyUnicodeUCS4_DecodeUTF8Stateful
       
   244 # define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS4_DecodeUnicodeEscape
       
   245 # define PyUnicode_Encode PyUnicodeUCS4_Encode
       
   246 # define PyUnicode_EncodeASCII PyUnicodeUCS4_EncodeASCII
       
   247 # define PyUnicode_EncodeCharmap PyUnicodeUCS4_EncodeCharmap
       
   248 # define PyUnicode_EncodeDecimal PyUnicodeUCS4_EncodeDecimal
       
   249 # define PyUnicode_EncodeLatin1 PyUnicodeUCS4_EncodeLatin1
       
   250 # define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS4_EncodeRawUnicodeEscape
       
   251 # define PyUnicode_EncodeUTF16 PyUnicodeUCS4_EncodeUTF16
       
   252 # define PyUnicode_EncodeUTF8 PyUnicodeUCS4_EncodeUTF8
       
   253 # define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS4_EncodeUnicodeEscape
       
   254 # define PyUnicode_Find PyUnicodeUCS4_Find
       
   255 # define PyUnicode_Format PyUnicodeUCS4_Format
       
   256 # define PyUnicode_FromEncodedObject PyUnicodeUCS4_FromEncodedObject
       
   257 # define PyUnicode_FromObject PyUnicodeUCS4_FromObject
       
   258 # define PyUnicode_FromOrdinal PyUnicodeUCS4_FromOrdinal
       
   259 # define PyUnicode_FromUnicode PyUnicodeUCS4_FromUnicode
       
   260 # define PyUnicode_FromWideChar PyUnicodeUCS4_FromWideChar
       
   261 # define PyUnicode_GetDefaultEncoding PyUnicodeUCS4_GetDefaultEncoding
       
   262 # define PyUnicode_GetMax PyUnicodeUCS4_GetMax
       
   263 # define PyUnicode_GetSize PyUnicodeUCS4_GetSize
       
   264 # define PyUnicode_Join PyUnicodeUCS4_Join
       
   265 # define PyUnicode_Partition PyUnicodeUCS4_Partition
       
   266 # define PyUnicode_RPartition PyUnicodeUCS4_RPartition
       
   267 # define PyUnicode_RSplit PyUnicodeUCS4_RSplit
       
   268 # define PyUnicode_Replace PyUnicodeUCS4_Replace
       
   269 # define PyUnicode_Resize PyUnicodeUCS4_Resize
       
   270 # define PyUnicode_RichCompare PyUnicodeUCS4_RichCompare
       
   271 # define PyUnicode_SetDefaultEncoding PyUnicodeUCS4_SetDefaultEncoding
       
   272 # define PyUnicode_Split PyUnicodeUCS4_Split
       
   273 # define PyUnicode_Splitlines PyUnicodeUCS4_Splitlines
       
   274 # define PyUnicode_Tailmatch PyUnicodeUCS4_Tailmatch
       
   275 # define PyUnicode_Translate PyUnicodeUCS4_Translate
       
   276 # define PyUnicode_TranslateCharmap PyUnicodeUCS4_TranslateCharmap
       
   277 # define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS4_AsDefaultEncodedString
       
   278 # define _PyUnicode_Fini _PyUnicodeUCS4_Fini
       
   279 # define _PyUnicode_Init _PyUnicodeUCS4_Init
       
   280 # define _PyUnicode_IsAlpha _PyUnicodeUCS4_IsAlpha
       
   281 # define _PyUnicode_IsDecimalDigit _PyUnicodeUCS4_IsDecimalDigit
       
   282 # define _PyUnicode_IsDigit _PyUnicodeUCS4_IsDigit
       
   283 # define _PyUnicode_IsLinebreak _PyUnicodeUCS4_IsLinebreak
       
   284 # define _PyUnicode_IsLowercase _PyUnicodeUCS4_IsLowercase
       
   285 # define _PyUnicode_IsNumeric _PyUnicodeUCS4_IsNumeric
       
   286 # define _PyUnicode_IsTitlecase _PyUnicodeUCS4_IsTitlecase
       
   287 # define _PyUnicode_IsUppercase _PyUnicodeUCS4_IsUppercase
       
   288 # define _PyUnicode_IsWhitespace _PyUnicodeUCS4_IsWhitespace
       
   289 # define _PyUnicode_ToDecimalDigit _PyUnicodeUCS4_ToDecimalDigit
       
   290 # define _PyUnicode_ToDigit _PyUnicodeUCS4_ToDigit
       
   291 # define _PyUnicode_ToLowercase _PyUnicodeUCS4_ToLowercase
       
   292 # define _PyUnicode_ToNumeric _PyUnicodeUCS4_ToNumeric
       
   293 # define _PyUnicode_ToTitlecase _PyUnicodeUCS4_ToTitlecase
       
   294 # define _PyUnicode_ToUppercase _PyUnicodeUCS4_ToUppercase
       
   295 
       
   296 
       
   297 #endif
       
   298 
       
   299 /* --- Internal Unicode Operations ---------------------------------------- */
       
   300 
       
   301 /* If you want Python to use the compiler's wctype.h functions instead
       
   302    of the ones supplied with Python, define WANT_WCTYPE_FUNCTIONS or
       
   303    configure Python using --with-wctype-functions.  This reduces the
       
   304    interpreter's code size. */
       
   305 
       
   306 #if defined(HAVE_USABLE_WCHAR_T) && defined(WANT_WCTYPE_FUNCTIONS)
       
   307 
       
   308 #include <wctype.h>
       
   309 
       
   310 #define Py_UNICODE_ISSPACE(ch) iswspace(ch)
       
   311 
       
   312 #define Py_UNICODE_ISLOWER(ch) iswlower(ch)
       
   313 #define Py_UNICODE_ISUPPER(ch) iswupper(ch)
       
   314 #define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
       
   315 #define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
       
   316 
       
   317 #define Py_UNICODE_TOLOWER(ch) towlower(ch)
       
   318 #define Py_UNICODE_TOUPPER(ch) towupper(ch)
       
   319 #define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
       
   320 
       
   321 #define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
       
   322 #define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
       
   323 #define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
       
   324 
       
   325 #define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
       
   326 #define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
       
   327 #define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
       
   328 
       
   329 #define Py_UNICODE_ISALPHA(ch) iswalpha(ch)
       
   330 
       
   331 #else
       
   332 
       
   333 #define Py_UNICODE_ISSPACE(ch) _PyUnicode_IsWhitespace(ch)
       
   334 
       
   335 #define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch)
       
   336 #define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch)
       
   337 #define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
       
   338 #define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
       
   339 
       
   340 #define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch)
       
   341 #define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch)
       
   342 #define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
       
   343 
       
   344 #define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
       
   345 #define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
       
   346 #define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
       
   347 
       
   348 #define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
       
   349 #define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
       
   350 #define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
       
   351 
       
   352 #define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch)
       
   353 
       
   354 #endif
       
   355 
       
   356 #define Py_UNICODE_ISALNUM(ch) \
       
   357        (Py_UNICODE_ISALPHA(ch) || \
       
   358         Py_UNICODE_ISDECIMAL(ch) || \
       
   359         Py_UNICODE_ISDIGIT(ch) || \
       
   360         Py_UNICODE_ISNUMERIC(ch))
       
   361 
       
   362 #define Py_UNICODE_COPY(target, source, length)				\
       
   363 	Py_MEMCPY((target), (source), (length)*sizeof(Py_UNICODE))
       
   364 
       
   365 #define Py_UNICODE_FILL(target, value, length) do\
       
   366     {Py_ssize_t i_; Py_UNICODE *t_ = (target); Py_UNICODE v_ = (value);\
       
   367         for (i_ = 0; i_ < (length); i_++) t_[i_] = v_;\
       
   368     } while (0)
       
   369 
       
   370 /* check if substring matches at given offset.  the offset must be
       
   371    valid, and the substring must not be empty */
       
   372 #define Py_UNICODE_MATCH(string, offset, substring) \
       
   373     ((*((string)->str + (offset)) == *((substring)->str)) && \
       
   374     ((*((string)->str + (offset) + (substring)->length-1) == *((substring)->str + (substring)->length-1))) && \
       
   375      !memcmp((string)->str + (offset), (substring)->str, (substring)->length*sizeof(Py_UNICODE)))
       
   376 
       
   377 #ifdef __cplusplus
       
   378 extern "C" {
       
   379 #endif
       
   380 
       
   381 /* --- Unicode Type ------------------------------------------------------- */
       
   382 
       
   383 typedef struct {
       
   384     PyObject_HEAD
       
   385     Py_ssize_t length;		/* Length of raw Unicode data in buffer */
       
   386     Py_UNICODE *str;		/* Raw Unicode buffer */
       
   387     long hash;			/* Hash value; -1 if not set */
       
   388     PyObject *defenc;		/* (Default) Encoded version as Python
       
   389 				   string, or NULL; this is used for
       
   390 				   implementing the buffer protocol */
       
   391 } PyUnicodeObject;
       
   392 
       
   393 PyAPI_DATA(PyTypeObject) PyUnicode_Type;
       
   394 
       
   395 #define PyUnicode_Check(op) PyObject_TypeCheck(op, &PyUnicode_Type)
       
   396 #define PyUnicode_CheckExact(op) ((op)->ob_type == &PyUnicode_Type)
       
   397 
       
   398 /* Fast access macros */
       
   399 #define PyUnicode_GET_SIZE(op) \
       
   400         (((PyUnicodeObject *)(op))->length)
       
   401 #define PyUnicode_GET_DATA_SIZE(op) \
       
   402         (((PyUnicodeObject *)(op))->length * sizeof(Py_UNICODE))
       
   403 #define PyUnicode_AS_UNICODE(op) \
       
   404         (((PyUnicodeObject *)(op))->str)
       
   405 #define PyUnicode_AS_DATA(op) \
       
   406         ((const char *)((PyUnicodeObject *)(op))->str)
       
   407 
       
   408 /* --- Constants ---------------------------------------------------------- */
       
   409 
       
   410 /* This Unicode character will be used as replacement character during
       
   411    decoding if the errors argument is set to "replace". Note: the
       
   412    Unicode character U+FFFD is the official REPLACEMENT CHARACTER in
       
   413    Unicode 3.0. */
       
   414 
       
   415 #define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UNICODE) 0xFFFD)
       
   416 
       
   417 /* === Public API ========================================================= */
       
   418 
       
   419 /* --- Plain Py_UNICODE --------------------------------------------------- */
       
   420 
       
   421 /* Create a Unicode Object from the Py_UNICODE buffer u of the given
       
   422    size. 
       
   423 
       
   424    u may be NULL which causes the contents to be undefined. It is the
       
   425    user's responsibility to fill in the needed data afterwards. Note
       
   426    that modifying the Unicode object contents after construction is
       
   427    only allowed if u was set to NULL.
       
   428 
       
   429    The buffer is copied into the new object. */
       
   430 
       
   431 PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode(
       
   432     const Py_UNICODE *u,        /* Unicode buffer */
       
   433     Py_ssize_t size             /* size of buffer */
       
   434     );
       
   435 
       
   436 /* Return a read-only pointer to the Unicode object's internal
       
   437    Py_UNICODE buffer. */
       
   438 
       
   439 PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(
       
   440     PyObject *unicode	 	/* Unicode object */
       
   441     );
       
   442 
       
   443 /* Get the length of the Unicode object. */
       
   444 
       
   445 PyAPI_FUNC(Py_ssize_t) PyUnicode_GetSize(
       
   446     PyObject *unicode	 	/* Unicode object */
       
   447     );
       
   448 
       
   449 /* Get the maximum ordinal for a Unicode character. */
       
   450 PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void);
       
   451 
       
   452 /* Resize an already allocated Unicode object to the new size length.
       
   453 
       
   454    *unicode is modified to point to the new (resized) object and 0
       
   455    returned on success.
       
   456 
       
   457    This API may only be called by the function which also called the
       
   458    Unicode constructor. The refcount on the object must be 1. Otherwise,
       
   459    an error is returned.
       
   460 
       
   461    Error handling is implemented as follows: an exception is set, -1
       
   462    is returned and *unicode left untouched.
       
   463 
       
   464 */
       
   465 
       
   466 PyAPI_FUNC(int) PyUnicode_Resize(
       
   467     PyObject **unicode,		/* Pointer to the Unicode object */
       
   468     Py_ssize_t length		/* New length */
       
   469     );
       
   470 
       
   471 /* Coerce obj to an Unicode object and return a reference with
       
   472    *incremented* refcount.
       
   473 
       
   474    Coercion is done in the following way:
       
   475 
       
   476    1. String and other char buffer compatible objects are decoded
       
   477       under the assumptions that they contain data using the current
       
   478       default encoding. Decoding is done in "strict" mode.
       
   479 
       
   480    2. All other objects (including Unicode objects) raise an
       
   481       exception.
       
   482 
       
   483    The API returns NULL in case of an error. The caller is responsible
       
   484    for decref'ing the returned objects.
       
   485 
       
   486 */
       
   487 
       
   488 PyAPI_FUNC(PyObject*) PyUnicode_FromEncodedObject(
       
   489     register PyObject *obj, 	/* Object */
       
   490     const char *encoding,       /* encoding */
       
   491     const char *errors          /* error handling */
       
   492     );
       
   493 
       
   494 /* Coerce obj to an Unicode object and return a reference with
       
   495    *incremented* refcount.
       
   496    
       
   497    Unicode objects are passed back as-is (subclasses are converted to
       
   498    true Unicode objects), all other objects are delegated to
       
   499    PyUnicode_FromEncodedObject(obj, NULL, "strict") which results in
       
   500    using the default encoding as basis for decoding the object.
       
   501 
       
   502    The API returns NULL in case of an error. The caller is responsible
       
   503    for decref'ing the returned objects.
       
   504 
       
   505 */
       
   506 
       
   507 PyAPI_FUNC(PyObject*) PyUnicode_FromObject(
       
   508     register PyObject *obj 	/* Object */
       
   509     );
       
   510 
       
   511 /* --- wchar_t support for platforms which support it --------------------- */
       
   512 
       
   513 #ifdef HAVE_WCHAR_H
       
   514 
       
   515 /* Create a Unicode Object from the whcar_t buffer w of the given
       
   516    size.
       
   517 
       
   518    The buffer is copied into the new object. */
       
   519 
       
   520 PyAPI_FUNC(PyObject*) PyUnicode_FromWideChar(
       
   521     register const wchar_t *w,  /* wchar_t buffer */
       
   522     Py_ssize_t size             /* size of buffer */
       
   523     );
       
   524 
       
   525 /* Copies the Unicode Object contents into the wchar_t buffer w.  At
       
   526    most size wchar_t characters are copied.
       
   527 
       
   528    Note that the resulting wchar_t string may or may not be
       
   529    0-terminated.  It is the responsibility of the caller to make sure
       
   530    that the wchar_t string is 0-terminated in case this is required by
       
   531    the application.
       
   532 
       
   533    Returns the number of wchar_t characters copied (excluding a
       
   534    possibly trailing 0-termination character) or -1 in case of an
       
   535    error. */
       
   536 
       
   537 PyAPI_FUNC(Py_ssize_t) PyUnicode_AsWideChar(
       
   538     PyUnicodeObject *unicode,   /* Unicode object */
       
   539     register wchar_t *w,        /* wchar_t buffer */
       
   540     Py_ssize_t size             /* size of buffer */
       
   541     );
       
   542 
       
   543 #endif
       
   544 
       
   545 /* --- Unicode ordinals --------------------------------------------------- */
       
   546 
       
   547 /* Create a Unicode Object from the given Unicode code point ordinal. 
       
   548  
       
   549    The ordinal must be in range(0x10000) on narrow Python builds
       
   550    (UCS2), and range(0x110000) on wide builds (UCS4). A ValueError is
       
   551    raised in case it is not.
       
   552 
       
   553 */
       
   554 
       
   555 PyAPI_FUNC(PyObject*) PyUnicode_FromOrdinal(int ordinal);
       
   556 
       
   557 /* === Builtin Codecs ===================================================== 
       
   558 
       
   559    Many of these APIs take two arguments encoding and errors. These
       
   560    parameters encoding and errors have the same semantics as the ones
       
   561    of the builtin unicode() API. 
       
   562 
       
   563    Setting encoding to NULL causes the default encoding to be used.
       
   564 
       
   565    Error handling is set by errors which may also be set to NULL
       
   566    meaning to use the default handling defined for the codec. Default
       
   567    error handling for all builtin codecs is "strict" (ValueErrors are
       
   568    raised).
       
   569 
       
   570    The codecs all use a similar interface. Only deviation from the
       
   571    generic ones are documented.
       
   572 
       
   573 */
       
   574 
       
   575 /* --- Manage the default encoding ---------------------------------------- */
       
   576 
       
   577 /* Return a Python string holding the default encoded value of the
       
   578    Unicode object. 
       
   579 
       
   580    The resulting string is cached in the Unicode object for subsequent
       
   581    usage by this function. The cached version is needed to implement
       
   582    the character buffer interface and will live (at least) as long as
       
   583    the Unicode object itself.
       
   584 
       
   585    The refcount of the string is *not* incremented.
       
   586 
       
   587    *** Exported for internal use by the interpreter only !!! ***
       
   588 
       
   589 */
       
   590 
       
   591 PyAPI_FUNC(PyObject *) _PyUnicode_AsDefaultEncodedString(
       
   592     PyObject *, const char *);
       
   593 
       
   594 /* Returns the currently active default encoding.
       
   595 
       
   596    The default encoding is currently implemented as run-time settable
       
   597    process global.  This may change in future versions of the
       
   598    interpreter to become a parameter which is managed on a per-thread
       
   599    basis.
       
   600    
       
   601  */
       
   602 
       
   603 PyAPI_FUNC(const char*) PyUnicode_GetDefaultEncoding(void);
       
   604 
       
   605 /* Sets the currently active default encoding.
       
   606 
       
   607    Returns 0 on success, -1 in case of an error.
       
   608    
       
   609  */
       
   610 
       
   611 PyAPI_FUNC(int) PyUnicode_SetDefaultEncoding(
       
   612     const char *encoding	/* Encoding name in standard form */
       
   613     );
       
   614 
       
   615 /* --- Generic Codecs ----------------------------------------------------- */
       
   616 
       
   617 /* Create a Unicode object by decoding the encoded string s of the
       
   618    given size. */
       
   619 
       
   620 PyAPI_FUNC(PyObject*) PyUnicode_Decode(
       
   621     const char *s,              /* encoded string */
       
   622     Py_ssize_t size,            /* size of buffer */
       
   623     const char *encoding,       /* encoding */
       
   624     const char *errors          /* error handling */
       
   625     );
       
   626 
       
   627 /* Encodes a Py_UNICODE buffer of the given size and returns a 
       
   628    Python string object. */
       
   629 
       
   630 PyAPI_FUNC(PyObject*) PyUnicode_Encode(
       
   631     const Py_UNICODE *s,        /* Unicode char buffer */
       
   632     Py_ssize_t size,            /* number of Py_UNICODE chars to encode */
       
   633     const char *encoding,       /* encoding */
       
   634     const char *errors          /* error handling */
       
   635     );
       
   636 
       
   637 /* Encodes a Unicode object and returns the result as Python
       
   638    object. */
       
   639 
       
   640 PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedObject(
       
   641     PyObject *unicode,	 	/* Unicode object */
       
   642     const char *encoding,	/* encoding */
       
   643     const char *errors		/* error handling */
       
   644     );
       
   645 
       
   646 /* Encodes a Unicode object and returns the result as Python string
       
   647    object. */
       
   648 
       
   649 PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString(
       
   650     PyObject *unicode,	 	/* Unicode object */
       
   651     const char *encoding,	/* encoding */
       
   652     const char *errors		/* error handling */
       
   653     );
       
   654 
       
   655 PyAPI_FUNC(PyObject*) PyUnicode_BuildEncodingMap(
       
   656     PyObject* string            /* 256 character map */
       
   657    );
       
   658 
       
   659 
       
   660 /* --- UTF-7 Codecs ------------------------------------------------------- */
       
   661 
       
   662 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7(
       
   663     const char *string, 	/* UTF-7 encoded string */
       
   664     Py_ssize_t length,	 	/* size of string */
       
   665     const char *errors		/* error handling */
       
   666     );
       
   667 
       
   668 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7(
       
   669     const Py_UNICODE *data, 	/* Unicode char buffer */
       
   670     Py_ssize_t length,	 		/* number of Py_UNICODE chars to encode */
       
   671     int encodeSetO,             /* force the encoder to encode characters in
       
   672                                    Set O, as described in RFC2152 */
       
   673     int encodeWhiteSpace,       /* force the encoder to encode space, tab,
       
   674                                    carriage return and linefeed characters */
       
   675     const char *errors		/* error handling */
       
   676     );
       
   677 
       
   678 /* --- UTF-8 Codecs ------------------------------------------------------- */
       
   679 
       
   680 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8(
       
   681     const char *string, 	/* UTF-8 encoded string */
       
   682     Py_ssize_t length,	 	/* size of string */
       
   683     const char *errors		/* error handling */
       
   684     );
       
   685 
       
   686 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8Stateful(
       
   687     const char *string, 	/* UTF-8 encoded string */
       
   688     Py_ssize_t length,	 	/* size of string */
       
   689     const char *errors,		/* error handling */
       
   690     Py_ssize_t *consumed		/* bytes consumed */
       
   691     );
       
   692 
       
   693 PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String(
       
   694     PyObject *unicode	 	/* Unicode object */
       
   695     );
       
   696 
       
   697 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF8(
       
   698     const Py_UNICODE *data, 	/* Unicode char buffer */
       
   699     Py_ssize_t length,	 		/* number of Py_UNICODE chars to encode */
       
   700     const char *errors		/* error handling */
       
   701     );
       
   702 
       
   703 /* --- UTF-16 Codecs ------------------------------------------------------ */
       
   704 
       
   705 /* Decodes length bytes from a UTF-16 encoded buffer string and returns
       
   706    the corresponding Unicode object.
       
   707 
       
   708    errors (if non-NULL) defines the error handling. It defaults
       
   709    to "strict". 
       
   710 
       
   711    If byteorder is non-NULL, the decoder starts decoding using the
       
   712    given byte order:
       
   713 
       
   714 	*byteorder == -1: little endian
       
   715 	*byteorder == 0:  native order
       
   716 	*byteorder == 1:  big endian
       
   717 
       
   718    In native mode, the first two bytes of the stream are checked for a
       
   719    BOM mark. If found, the BOM mark is analysed, the byte order
       
   720    adjusted and the BOM skipped.  In the other modes, no BOM mark
       
   721    interpretation is done. After completion, *byteorder is set to the
       
   722    current byte order at the end of input data.
       
   723 
       
   724    If byteorder is NULL, the codec starts in native order mode.
       
   725 
       
   726 */
       
   727 
       
   728 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16(
       
   729     const char *string, 	/* UTF-16 encoded string */
       
   730     Py_ssize_t length,	 	/* size of string */
       
   731     const char *errors,		/* error handling */
       
   732     int *byteorder		/* pointer to byteorder to use
       
   733 				   0=native;-1=LE,1=BE; updated on
       
   734 				   exit */
       
   735     );
       
   736 
       
   737 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16Stateful(
       
   738     const char *string, 	/* UTF-16 encoded string */
       
   739     Py_ssize_t length,	 	/* size of string */
       
   740     const char *errors,		/* error handling */
       
   741     int *byteorder,		/* pointer to byteorder to use
       
   742 				   0=native;-1=LE,1=BE; updated on
       
   743 				   exit */
       
   744     Py_ssize_t *consumed		/* bytes consumed */
       
   745     );
       
   746 
       
   747 /* Returns a Python string using the UTF-16 encoding in native byte
       
   748    order. The string always starts with a BOM mark.  */
       
   749 
       
   750 PyAPI_FUNC(PyObject*) PyUnicode_AsUTF16String(
       
   751     PyObject *unicode	 	/* Unicode object */
       
   752     );
       
   753 
       
   754 /* Returns a Python string object holding the UTF-16 encoded value of
       
   755    the Unicode data.
       
   756 
       
   757    If byteorder is not 0, output is written according to the following
       
   758    byte order:
       
   759 
       
   760    byteorder == -1: little endian
       
   761    byteorder == 0:  native byte order (writes a BOM mark)
       
   762    byteorder == 1:  big endian
       
   763 
       
   764    If byteorder is 0, the output string will always start with the
       
   765    Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
       
   766    prepended.
       
   767 
       
   768    Note that Py_UNICODE data is being interpreted as UTF-16 reduced to
       
   769    UCS-2. This trick makes it possible to add full UTF-16 capabilities
       
   770    at a later point without compromising the APIs.
       
   771 
       
   772 */
       
   773 
       
   774 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF16(
       
   775     const Py_UNICODE *data, 	/* Unicode char buffer */
       
   776     Py_ssize_t length,	 		/* number of Py_UNICODE chars to encode */
       
   777     const char *errors,		/* error handling */
       
   778     int byteorder		/* byteorder to use 0=BOM+native;-1=LE,1=BE */
       
   779     );
       
   780 
       
   781 /* --- Unicode-Escape Codecs ---------------------------------------------- */
       
   782 
       
   783 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUnicodeEscape(
       
   784     const char *string, 	/* Unicode-Escape encoded string */
       
   785     Py_ssize_t length,	 	/* size of string */
       
   786     const char *errors		/* error handling */
       
   787     );
       
   788 
       
   789 PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString(
       
   790     PyObject *unicode	 	/* Unicode object */
       
   791     );
       
   792 
       
   793 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUnicodeEscape(
       
   794     const Py_UNICODE *data, 	/* Unicode char buffer */
       
   795     Py_ssize_t length	 		/* Number of Py_UNICODE chars to encode */
       
   796     );
       
   797 
       
   798 /* --- Raw-Unicode-Escape Codecs ------------------------------------------ */
       
   799 
       
   800 PyAPI_FUNC(PyObject*) PyUnicode_DecodeRawUnicodeEscape(
       
   801     const char *string, 	/* Raw-Unicode-Escape encoded string */
       
   802     Py_ssize_t length,	 	/* size of string */
       
   803     const char *errors		/* error handling */
       
   804     );
       
   805 
       
   806 PyAPI_FUNC(PyObject*) PyUnicode_AsRawUnicodeEscapeString(
       
   807     PyObject *unicode	 	/* Unicode object */
       
   808     );
       
   809 
       
   810 PyAPI_FUNC(PyObject*) PyUnicode_EncodeRawUnicodeEscape(
       
   811     const Py_UNICODE *data, 	/* Unicode char buffer */
       
   812     Py_ssize_t length	 		/* Number of Py_UNICODE chars to encode */
       
   813     );
       
   814 
       
   815 /* --- Unicode Internal Codec ---------------------------------------------
       
   816 
       
   817     Only for internal use in _codecsmodule.c */
       
   818 
       
   819 PyObject *_PyUnicode_DecodeUnicodeInternal(
       
   820     const char *string,
       
   821     Py_ssize_t length,
       
   822     const char *errors
       
   823     );
       
   824 
       
   825 /* --- Latin-1 Codecs ----------------------------------------------------- 
       
   826 
       
   827    Note: Latin-1 corresponds to the first 256 Unicode ordinals.
       
   828 
       
   829 */
       
   830 
       
   831 PyAPI_FUNC(PyObject*) PyUnicode_DecodeLatin1(
       
   832     const char *string, 	/* Latin-1 encoded string */
       
   833     Py_ssize_t length,	 	/* size of string */
       
   834     const char *errors		/* error handling */
       
   835     );
       
   836 
       
   837 PyAPI_FUNC(PyObject*) PyUnicode_AsLatin1String(
       
   838     PyObject *unicode	 	/* Unicode object */
       
   839     );
       
   840 
       
   841 PyAPI_FUNC(PyObject*) PyUnicode_EncodeLatin1(
       
   842     const Py_UNICODE *data, 	/* Unicode char buffer */
       
   843     Py_ssize_t length,	 		/* Number of Py_UNICODE chars to encode */
       
   844     const char *errors		/* error handling */
       
   845     );
       
   846 
       
   847 /* --- ASCII Codecs ------------------------------------------------------- 
       
   848 
       
   849    Only 7-bit ASCII data is excepted. All other codes generate errors.
       
   850 
       
   851 */
       
   852 
       
   853 PyAPI_FUNC(PyObject*) PyUnicode_DecodeASCII(
       
   854     const char *string, 	/* ASCII encoded string */
       
   855     Py_ssize_t length,	 	/* size of string */
       
   856     const char *errors		/* error handling */
       
   857     );
       
   858 
       
   859 PyAPI_FUNC(PyObject*) PyUnicode_AsASCIIString(
       
   860     PyObject *unicode	 	/* Unicode object */
       
   861     );
       
   862 
       
   863 PyAPI_FUNC(PyObject*) PyUnicode_EncodeASCII(
       
   864     const Py_UNICODE *data, 	/* Unicode char buffer */
       
   865     Py_ssize_t length,	 		/* Number of Py_UNICODE chars to encode */
       
   866     const char *errors		/* error handling */
       
   867     );
       
   868 
       
   869 /* --- Character Map Codecs ----------------------------------------------- 
       
   870 
       
   871    This codec uses mappings to encode and decode characters. 
       
   872 
       
   873    Decoding mappings must map single string characters to single
       
   874    Unicode characters, integers (which are then interpreted as Unicode
       
   875    ordinals) or None (meaning "undefined mapping" and causing an
       
   876    error).
       
   877 
       
   878    Encoding mappings must map single Unicode characters to single
       
   879    string characters, integers (which are then interpreted as Latin-1
       
   880    ordinals) or None (meaning "undefined mapping" and causing an
       
   881    error).
       
   882 
       
   883    If a character lookup fails with a LookupError, the character is
       
   884    copied as-is meaning that its ordinal value will be interpreted as
       
   885    Unicode or Latin-1 ordinal resp. Because of this mappings only need
       
   886    to contain those mappings which map characters to different code
       
   887    points.
       
   888 
       
   889 */
       
   890 
       
   891 PyAPI_FUNC(PyObject*) PyUnicode_DecodeCharmap(
       
   892     const char *string, 	/* Encoded string */
       
   893     Py_ssize_t length,	 	/* size of string */
       
   894     PyObject *mapping,		/* character mapping 
       
   895 				   (char ordinal -> unicode ordinal) */
       
   896     const char *errors		/* error handling */
       
   897     );
       
   898 
       
   899 PyAPI_FUNC(PyObject*) PyUnicode_AsCharmapString(
       
   900     PyObject *unicode,	 	/* Unicode object */
       
   901     PyObject *mapping		/* character mapping 
       
   902 				   (unicode ordinal -> char ordinal) */
       
   903     );
       
   904 
       
   905 PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap(
       
   906     const Py_UNICODE *data, 	/* Unicode char buffer */
       
   907     Py_ssize_t length,	 	/* Number of Py_UNICODE chars to encode */
       
   908     PyObject *mapping,		/* character mapping 
       
   909 				   (unicode ordinal -> char ordinal) */
       
   910     const char *errors		/* error handling */
       
   911     );
       
   912 
       
   913 /* Translate a Py_UNICODE buffer of the given length by applying a
       
   914    character mapping table to it and return the resulting Unicode
       
   915    object.
       
   916 
       
   917    The mapping table must map Unicode ordinal integers to Unicode
       
   918    ordinal integers or None (causing deletion of the character). 
       
   919 
       
   920    Mapping tables may be dictionaries or sequences. Unmapped character
       
   921    ordinals (ones which cause a LookupError) are left untouched and
       
   922    are copied as-is.
       
   923 
       
   924 */
       
   925 
       
   926 PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap(
       
   927     const Py_UNICODE *data, 	/* Unicode char buffer */
       
   928     Py_ssize_t length,	 		/* Number of Py_UNICODE chars to encode */
       
   929     PyObject *table,		/* Translate table */
       
   930     const char *errors		/* error handling */
       
   931     );
       
   932 
       
   933 #ifdef MS_WIN32
       
   934 
       
   935 /* --- MBCS codecs for Windows -------------------------------------------- */
       
   936 
       
   937 PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCS(
       
   938     const char *string,         /* MBCS encoded string */
       
   939     Py_ssize_t length,              /* size of string */
       
   940     const char *errors          /* error handling */
       
   941     );
       
   942 
       
   943 PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCSStateful(
       
   944     const char *string,         /* MBCS encoded string */
       
   945     Py_ssize_t length,          /* size of string */
       
   946     const char *errors,         /* error handling */
       
   947     Py_ssize_t *consumed        /* bytes consumed */
       
   948     );
       
   949 
       
   950 PyAPI_FUNC(PyObject*) PyUnicode_AsMBCSString(
       
   951     PyObject *unicode           /* Unicode object */
       
   952     );
       
   953 
       
   954 PyAPI_FUNC(PyObject*) PyUnicode_EncodeMBCS(
       
   955     const Py_UNICODE *data,     /* Unicode char buffer */
       
   956     Py_ssize_t length,              /* Number of Py_UNICODE chars to encode */
       
   957     const char *errors          /* error handling */
       
   958     );
       
   959 
       
   960 #endif /* MS_WIN32 */
       
   961 
       
   962 /* --- Decimal Encoder ---------------------------------------------------- */
       
   963 
       
   964 /* Takes a Unicode string holding a decimal value and writes it into
       
   965    an output buffer using standard ASCII digit codes.
       
   966 
       
   967    The output buffer has to provide at least length+1 bytes of storage
       
   968    area. The output string is 0-terminated.
       
   969 
       
   970    The encoder converts whitespace to ' ', decimal characters to their
       
   971    corresponding ASCII digit and all other Latin-1 characters except
       
   972    \0 as-is. Characters outside this range (Unicode ordinals 1-256)
       
   973    are treated as errors. This includes embedded NULL bytes.
       
   974 
       
   975    Error handling is defined by the errors argument:
       
   976 
       
   977       NULL or "strict": raise a ValueError
       
   978       "ignore": ignore the wrong characters (these are not copied to the
       
   979 		output buffer)
       
   980       "replace": replaces illegal characters with '?'
       
   981 
       
   982    Returns 0 on success, -1 on failure.
       
   983 
       
   984 */
       
   985 
       
   986 PyAPI_FUNC(int) PyUnicode_EncodeDecimal(
       
   987     Py_UNICODE *s,		/* Unicode buffer */
       
   988     Py_ssize_t length,			/* Number of Py_UNICODE chars to encode */
       
   989     char *output,		/* Output buffer; must have size >= length */
       
   990     const char *errors		/* error handling */
       
   991     );
       
   992 
       
   993 /* --- Methods & Slots ----------------------------------------------------
       
   994 
       
   995    These are capable of handling Unicode objects and strings on input
       
   996    (we refer to them as strings in the descriptions) and return
       
   997    Unicode objects or integers as apporpriate. */
       
   998 
       
   999 /* Concat two strings giving a new Unicode string. */
       
  1000 
       
  1001 PyAPI_FUNC(PyObject*) PyUnicode_Concat(
       
  1002     PyObject *left,	 	/* Left string */
       
  1003     PyObject *right	 	/* Right string */
       
  1004     );
       
  1005 
       
  1006 /* Split a string giving a list of Unicode strings.
       
  1007 
       
  1008    If sep is NULL, splitting will be done at all whitespace
       
  1009    substrings. Otherwise, splits occur at the given separator.
       
  1010 
       
  1011    At most maxsplit splits will be done. If negative, no limit is set.
       
  1012 
       
  1013    Separators are not included in the resulting list.
       
  1014 
       
  1015 */
       
  1016 
       
  1017 PyAPI_FUNC(PyObject*) PyUnicode_Split(
       
  1018     PyObject *s,		/* String to split */
       
  1019     PyObject *sep,		/* String separator */
       
  1020     Py_ssize_t maxsplit		/* Maxsplit count */
       
  1021     );		
       
  1022 
       
  1023 /* Dito, but split at line breaks.
       
  1024 
       
  1025    CRLF is considered to be one line break. Line breaks are not
       
  1026    included in the resulting list. */
       
  1027     
       
  1028 PyAPI_FUNC(PyObject*) PyUnicode_Splitlines(
       
  1029     PyObject *s,		/* String to split */
       
  1030     int keepends		/* If true, line end markers are included */
       
  1031     );		
       
  1032 
       
  1033 /* Partition a string using a given separator. */
       
  1034 
       
  1035 PyAPI_FUNC(PyObject*) PyUnicode_Partition(
       
  1036     PyObject *s,		/* String to partition */
       
  1037     PyObject *sep		/* String separator */
       
  1038     );		
       
  1039 
       
  1040 /* Partition a string using a given separator, searching from the end of the
       
  1041    string. */
       
  1042 
       
  1043 PyAPI_FUNC(PyObject*) PyUnicode_RPartition(
       
  1044     PyObject *s,		/* String to partition */
       
  1045     PyObject *sep		/* String separator */
       
  1046     );		
       
  1047 
       
  1048 /* Split a string giving a list of Unicode strings.
       
  1049 
       
  1050    If sep is NULL, splitting will be done at all whitespace
       
  1051    substrings. Otherwise, splits occur at the given separator.
       
  1052 
       
  1053    At most maxsplit splits will be done. But unlike PyUnicode_Split
       
  1054    PyUnicode_RSplit splits from the end of the string. If negative,
       
  1055    no limit is set.
       
  1056 
       
  1057    Separators are not included in the resulting list.
       
  1058 
       
  1059 */
       
  1060 
       
  1061 PyAPI_FUNC(PyObject*) PyUnicode_RSplit(
       
  1062     PyObject *s,		/* String to split */
       
  1063     PyObject *sep,		/* String separator */
       
  1064     Py_ssize_t maxsplit		/* Maxsplit count */
       
  1065     );		
       
  1066 
       
  1067 /* Translate a string by applying a character mapping table to it and
       
  1068    return the resulting Unicode object.
       
  1069 
       
  1070    The mapping table must map Unicode ordinal integers to Unicode
       
  1071    ordinal integers or None (causing deletion of the character). 
       
  1072 
       
  1073    Mapping tables may be dictionaries or sequences. Unmapped character
       
  1074    ordinals (ones which cause a LookupError) are left untouched and
       
  1075    are copied as-is.
       
  1076 
       
  1077 */
       
  1078 
       
  1079 PyAPI_FUNC(PyObject *) PyUnicode_Translate(
       
  1080     PyObject *str,		/* String */ 
       
  1081     PyObject *table,		/* Translate table */
       
  1082     const char *errors		/* error handling */
       
  1083     );
       
  1084 
       
  1085 /* Join a sequence of strings using the given separator and return
       
  1086    the resulting Unicode string. */
       
  1087     
       
  1088 PyAPI_FUNC(PyObject*) PyUnicode_Join(
       
  1089     PyObject *separator, 	/* Separator string */
       
  1090     PyObject *seq	 	/* Sequence object */
       
  1091     );
       
  1092 
       
  1093 /* Return 1 if substr matches str[start:end] at the given tail end, 0
       
  1094    otherwise. */
       
  1095 
       
  1096 PyAPI_FUNC(Py_ssize_t) PyUnicode_Tailmatch(
       
  1097     PyObject *str,		/* String */ 
       
  1098     PyObject *substr,		/* Prefix or Suffix string */
       
  1099     Py_ssize_t start,		/* Start index */
       
  1100     Py_ssize_t end,		/* Stop index */
       
  1101     int direction		/* Tail end: -1 prefix, +1 suffix */
       
  1102     );
       
  1103 
       
  1104 /* Return the first position of substr in str[start:end] using the
       
  1105    given search direction or -1 if not found. -2 is returned in case
       
  1106    an error occurred and an exception is set. */
       
  1107 
       
  1108 PyAPI_FUNC(Py_ssize_t) PyUnicode_Find(
       
  1109     PyObject *str,		/* String */ 
       
  1110     PyObject *substr,		/* Substring to find */
       
  1111     Py_ssize_t start,		/* Start index */
       
  1112     Py_ssize_t end,		/* Stop index */
       
  1113     int direction		/* Find direction: +1 forward, -1 backward */
       
  1114     );
       
  1115 
       
  1116 /* Count the number of occurrences of substr in str[start:end]. */
       
  1117 
       
  1118 PyAPI_FUNC(Py_ssize_t) PyUnicode_Count(
       
  1119     PyObject *str,		/* String */ 
       
  1120     PyObject *substr,		/* Substring to count */
       
  1121     Py_ssize_t start,		/* Start index */
       
  1122     Py_ssize_t end		/* Stop index */
       
  1123     );
       
  1124 
       
  1125 /* Replace at most maxcount occurrences of substr in str with replstr
       
  1126    and return the resulting Unicode object. */
       
  1127 
       
  1128 PyAPI_FUNC(PyObject *) PyUnicode_Replace(
       
  1129     PyObject *str,		/* String */ 
       
  1130     PyObject *substr,		/* Substring to find */
       
  1131     PyObject *replstr,		/* Substring to replace */
       
  1132     Py_ssize_t maxcount		/* Max. number of replacements to apply;
       
  1133 				   -1 = all */
       
  1134     );
       
  1135 
       
  1136 /* Compare two strings and return -1, 0, 1 for less than, equal,
       
  1137    greater than resp. */
       
  1138 
       
  1139 PyAPI_FUNC(int) PyUnicode_Compare(
       
  1140     PyObject *left,		/* Left string */ 
       
  1141     PyObject *right		/* Right string */
       
  1142     );
       
  1143 
       
  1144 /* Rich compare two strings and return one of the following:
       
  1145 
       
  1146    - NULL in case an exception was raised
       
  1147    - Py_True or Py_False for successfuly comparisons
       
  1148    - Py_NotImplemented in case the type combination is unknown
       
  1149 
       
  1150    Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in
       
  1151    case the conversion of the arguments to Unicode fails with a
       
  1152    UnicodeDecodeError.
       
  1153 
       
  1154    Possible values for op:
       
  1155 
       
  1156      Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE
       
  1157 
       
  1158 */
       
  1159 
       
  1160 PyAPI_FUNC(PyObject *) PyUnicode_RichCompare(
       
  1161     PyObject *left,		/* Left string */ 
       
  1162     PyObject *right,		/* Right string */
       
  1163     int op			/* Operation: Py_EQ, Py_NE, Py_GT, etc. */
       
  1164     );
       
  1165 
       
  1166 /* Apply a argument tuple or dictionary to a format string and return
       
  1167    the resulting Unicode string. */
       
  1168 
       
  1169 PyAPI_FUNC(PyObject *) PyUnicode_Format(
       
  1170     PyObject *format,		/* Format string */ 
       
  1171     PyObject *args		/* Argument tuple or dictionary */
       
  1172     );
       
  1173 
       
  1174 /* Checks whether element is contained in container and return 1/0
       
  1175    accordingly.
       
  1176 
       
  1177    element has to coerce to an one element Unicode string. -1 is
       
  1178    returned in case of an error. */
       
  1179 
       
  1180 PyAPI_FUNC(int) PyUnicode_Contains(
       
  1181     PyObject *container,	/* Container string */ 
       
  1182     PyObject *element		/* Element string */
       
  1183     );
       
  1184 
       
  1185 /* Externally visible for str.strip(unicode) */
       
  1186 PyAPI_FUNC(PyObject *) _PyUnicode_XStrip(
       
  1187     PyUnicodeObject *self,
       
  1188     int striptype,
       
  1189     PyObject *sepobj
       
  1190     );
       
  1191 
       
  1192 /* === Characters Type APIs =============================================== */
       
  1193 
       
  1194 /* These should not be used directly. Use the Py_UNICODE_IS* and
       
  1195    Py_UNICODE_TO* macros instead. 
       
  1196 
       
  1197    These APIs are implemented in Objects/unicodectype.c.
       
  1198 
       
  1199 */
       
  1200 
       
  1201 PyAPI_FUNC(int) _PyUnicode_IsLowercase(
       
  1202     Py_UNICODE ch 	/* Unicode character */
       
  1203     );
       
  1204 
       
  1205 PyAPI_FUNC(int) _PyUnicode_IsUppercase(
       
  1206     Py_UNICODE ch 	/* Unicode character */
       
  1207     );
       
  1208 
       
  1209 PyAPI_FUNC(int) _PyUnicode_IsTitlecase(
       
  1210     Py_UNICODE ch 	/* Unicode character */
       
  1211     );
       
  1212 
       
  1213 PyAPI_FUNC(int) _PyUnicode_IsWhitespace(
       
  1214     const Py_UNICODE ch 	/* Unicode character */
       
  1215     );
       
  1216 
       
  1217 PyAPI_FUNC(int) _PyUnicode_IsLinebreak(
       
  1218     const Py_UNICODE ch 	/* Unicode character */
       
  1219     );
       
  1220 
       
  1221 PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToLowercase(
       
  1222     Py_UNICODE ch 	/* Unicode character */
       
  1223     );
       
  1224 
       
  1225 PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToUppercase(
       
  1226     Py_UNICODE ch 	/* Unicode character */
       
  1227     );
       
  1228 
       
  1229 PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToTitlecase(
       
  1230     Py_UNICODE ch 	/* Unicode character */
       
  1231     );
       
  1232 
       
  1233 PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit(
       
  1234     Py_UNICODE ch 	/* Unicode character */
       
  1235     );
       
  1236 
       
  1237 PyAPI_FUNC(int) _PyUnicode_ToDigit(
       
  1238     Py_UNICODE ch 	/* Unicode character */
       
  1239     );
       
  1240 
       
  1241 PyAPI_FUNC(double) _PyUnicode_ToNumeric(
       
  1242     Py_UNICODE ch 	/* Unicode character */
       
  1243     );
       
  1244 
       
  1245 PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit(
       
  1246     Py_UNICODE ch 	/* Unicode character */
       
  1247     );
       
  1248 
       
  1249 PyAPI_FUNC(int) _PyUnicode_IsDigit(
       
  1250     Py_UNICODE ch 	/* Unicode character */
       
  1251     );
       
  1252 
       
  1253 PyAPI_FUNC(int) _PyUnicode_IsNumeric(
       
  1254     Py_UNICODE ch 	/* Unicode character */
       
  1255     );
       
  1256 
       
  1257 PyAPI_FUNC(int) _PyUnicode_IsAlpha(
       
  1258     Py_UNICODE ch 	/* Unicode character */
       
  1259     );
       
  1260 
       
  1261 #ifdef __cplusplus
       
  1262 }
       
  1263 #endif
       
  1264 #endif /* Py_USING_UNICODE */
       
  1265 #endif /* !Py_UNICODEOBJECT_H */