Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 10:06:51

0001 #ifndef Py_UNICODEOBJECT_H
0002 #define Py_UNICODEOBJECT_H
0003 
0004 #include <stdarg.h>               // va_list
0005 
0006 /*
0007 
0008 Unicode implementation based on original code by Fredrik Lundh,
0009 modified by Marc-Andre Lemburg (mal@lemburg.com) according to the
0010 Unicode Integration Proposal. (See
0011 http://www.egenix.com/files/python/unicode-proposal.txt).
0012 
0013 Copyright (c) Corporation for National Research Initiatives.
0014 
0015 
0016  Original header:
0017  --------------------------------------------------------------------
0018 
0019  * Yet another Unicode string type for Python.  This type supports the
0020  * 16-bit Basic Multilingual Plane (BMP) only.
0021  *
0022  * Written by Fredrik Lundh, January 1999.
0023  *
0024  * Copyright (c) 1999 by Secret Labs AB.
0025  * Copyright (c) 1999 by Fredrik Lundh.
0026  *
0027  * fredrik@pythonware.com
0028  * http://www.pythonware.com
0029  *
0030  * --------------------------------------------------------------------
0031  * This Unicode String Type is
0032  *
0033  * Copyright (c) 1999 by Secret Labs AB
0034  * Copyright (c) 1999 by Fredrik Lundh
0035  *
0036  * By obtaining, using, and/or copying this software and/or its
0037  * associated documentation, you agree that you have read, understood,
0038  * and will comply with the following terms and conditions:
0039  *
0040  * Permission to use, copy, modify, and distribute this software and its
0041  * associated documentation for any purpose and without fee is hereby
0042  * granted, provided that the above copyright notice appears in all
0043  * copies, and that both that copyright notice and this permission notice
0044  * appear in supporting documentation, and that the name of Secret Labs
0045  * AB or the author not be used in advertising or publicity pertaining to
0046  * distribution of the software without specific, written prior
0047  * permission.
0048  *
0049  * SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
0050  * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
0051  * FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
0052  * ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
0053  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
0054  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
0055  * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
0056  * -------------------------------------------------------------------- */
0057 
0058 #include <ctype.h>
0059 
0060 /* === Internal API ======================================================= */
0061 
0062 /* --- Internal Unicode Format -------------------------------------------- */
0063 
0064 /* Python 3.x requires unicode */
0065 #define Py_USING_UNICODE
0066 
0067 #ifndef SIZEOF_WCHAR_T
0068 #error Must define SIZEOF_WCHAR_T
0069 #endif
0070 
0071 #define Py_UNICODE_SIZE SIZEOF_WCHAR_T
0072 
0073 /* If wchar_t can be used for UCS-4 storage, set Py_UNICODE_WIDE.
0074    Otherwise, Unicode strings are stored as UCS-2 (with limited support
0075    for UTF-16) */
0076 
0077 #if Py_UNICODE_SIZE >= 4
0078 #define Py_UNICODE_WIDE
0079 #endif
0080 
0081 /* Set these flags if the platform has "wchar.h" and the
0082    wchar_t type is a 16-bit unsigned type */
0083 /* #define HAVE_WCHAR_H */
0084 /* #define HAVE_USABLE_WCHAR_T */
0085 
0086 /* If the compiler provides a wchar_t type we try to support it
0087    through the interface functions PyUnicode_FromWideChar(),
0088    PyUnicode_AsWideChar() and PyUnicode_AsWideCharString(). */
0089 
0090 #ifdef HAVE_USABLE_WCHAR_T
0091 # ifndef HAVE_WCHAR_H
0092 #  define HAVE_WCHAR_H
0093 # endif
0094 #endif
0095 
0096 #ifdef HAVE_WCHAR_H
0097 #  include <wchar.h>
0098 #endif
0099 
0100 /* Py_UCS4 and Py_UCS2 are typedefs for the respective
0101    unicode representations. */
0102 typedef uint32_t Py_UCS4;
0103 typedef uint16_t Py_UCS2;
0104 typedef uint8_t Py_UCS1;
0105 
0106 #ifdef __cplusplus
0107 extern "C" {
0108 #endif
0109 
0110 
0111 PyAPI_DATA(PyTypeObject) PyUnicode_Type;
0112 PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
0113 
0114 #define PyUnicode_Check(op) \
0115     PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_UNICODE_SUBCLASS)
0116 #define PyUnicode_CheckExact(op) Py_IS_TYPE((op), &PyUnicode_Type)
0117 
0118 /* --- Constants ---------------------------------------------------------- */
0119 
0120 /* This Unicode character will be used as replacement character during
0121    decoding if the errors argument is set to "replace". Note: the
0122    Unicode character U+FFFD is the official REPLACEMENT CHARACTER in
0123    Unicode 3.0. */
0124 
0125 #define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UCS4) 0xFFFD)
0126 
0127 /* === Public API ========================================================= */
0128 
0129 /* Similar to PyUnicode_FromUnicode(), but u points to UTF-8 encoded bytes */
0130 PyAPI_FUNC(PyObject*) PyUnicode_FromStringAndSize(
0131     const char *u,             /* UTF-8 encoded string */
0132     Py_ssize_t size            /* size of buffer */
0133     );
0134 
0135 /* Similar to PyUnicode_FromUnicode(), but u points to null-terminated
0136    UTF-8 encoded bytes.  The size is determined with strlen(). */
0137 PyAPI_FUNC(PyObject*) PyUnicode_FromString(
0138     const char *u              /* UTF-8 encoded string */
0139     );
0140 
0141 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
0142 PyAPI_FUNC(PyObject*) PyUnicode_Substring(
0143     PyObject *str,
0144     Py_ssize_t start,
0145     Py_ssize_t end);
0146 #endif
0147 
0148 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
0149 /* Copy the string into a UCS4 buffer including the null character if copy_null
0150    is set. Return NULL and raise an exception on error. Raise a SystemError if
0151    the buffer is smaller than the string. Return buffer on success.
0152 
0153    buflen is the length of the buffer in (Py_UCS4) characters. */
0154 PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4(
0155     PyObject *unicode,
0156     Py_UCS4* buffer,
0157     Py_ssize_t buflen,
0158     int copy_null);
0159 
0160 /* Copy the string into a UCS4 buffer. A new buffer is allocated using
0161  * PyMem_Malloc; if this fails, NULL is returned with a memory error
0162    exception set. */
0163 PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4Copy(PyObject *unicode);
0164 #endif
0165 
0166 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
0167 /* Get the length of the Unicode object. */
0168 
0169 PyAPI_FUNC(Py_ssize_t) PyUnicode_GetLength(
0170     PyObject *unicode
0171 );
0172 #endif
0173 
0174 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
0175 /* Read a character from the string. */
0176 
0177 PyAPI_FUNC(Py_UCS4) PyUnicode_ReadChar(
0178     PyObject *unicode,
0179     Py_ssize_t index
0180     );
0181 
0182 /* Write a character to the string. The string must have been created through
0183    PyUnicode_New, must not be shared, and must not have been hashed yet.
0184 
0185    Return 0 on success, -1 on error. */
0186 
0187 PyAPI_FUNC(int) PyUnicode_WriteChar(
0188     PyObject *unicode,
0189     Py_ssize_t index,
0190     Py_UCS4 character
0191     );
0192 #endif
0193 
0194 /* Resize a Unicode object. The length is the number of codepoints.
0195 
0196    *unicode is modified to point to the new (resized) object and 0
0197    returned on success.
0198 
0199    Try to resize the string in place (which is usually faster than allocating
0200    a new string and copy characters), or create a new string.
0201 
0202    Error handling is implemented as follows: an exception is set, -1
0203    is returned and *unicode left untouched.
0204 
0205    WARNING: The function doesn't check string content, the result may not be a
0206             string in canonical representation. */
0207 
0208 PyAPI_FUNC(int) PyUnicode_Resize(
0209     PyObject **unicode,         /* Pointer to the Unicode object */
0210     Py_ssize_t length           /* New length */
0211     );
0212 
0213 /* Decode obj to a Unicode object.
0214 
0215    bytes, bytearray and other bytes-like objects are decoded according to the
0216    given encoding and error handler. The encoding and error handler can be
0217    NULL to have the interface use UTF-8 and "strict".
0218 
0219    All other objects (including Unicode objects) raise an exception.
0220 
0221    The API returns NULL in case of an error. The caller is responsible
0222    for decref'ing the returned objects.
0223 
0224 */
0225 
0226 PyAPI_FUNC(PyObject*) PyUnicode_FromEncodedObject(
0227     PyObject *obj,              /* Object */
0228     const char *encoding,       /* encoding */
0229     const char *errors          /* error handling */
0230     );
0231 
0232 /* Copy an instance of a Unicode subtype to a new true Unicode object if
0233    necessary. If obj is already a true Unicode object (not a subtype), return
0234    the reference with *incremented* refcount.
0235 
0236    The API returns NULL in case of an error. The caller is responsible
0237    for decref'ing the returned objects.
0238 
0239 */
0240 
0241 PyAPI_FUNC(PyObject*) PyUnicode_FromObject(
0242     PyObject *obj      /* Object */
0243     );
0244 
0245 PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV(
0246     const char *format,   /* ASCII-encoded string  */
0247     va_list vargs
0248     );
0249 PyAPI_FUNC(PyObject *) PyUnicode_FromFormat(
0250     const char *format,   /* ASCII-encoded string  */
0251     ...
0252     );
0253 
0254 PyAPI_FUNC(void) PyUnicode_InternInPlace(PyObject **);
0255 PyAPI_FUNC(PyObject *) PyUnicode_InternFromString(
0256     const char *u              /* UTF-8 encoded string */
0257     );
0258 
0259 /* --- wchar_t support for platforms which support it --------------------- */
0260 
0261 #ifdef HAVE_WCHAR_H
0262 
0263 /* Create a Unicode Object from the wchar_t buffer w of the given
0264    size.
0265 
0266    The buffer is copied into the new object. */
0267 
0268 PyAPI_FUNC(PyObject*) PyUnicode_FromWideChar(
0269     const wchar_t *w,           /* wchar_t buffer */
0270     Py_ssize_t size             /* size of buffer */
0271     );
0272 
0273 /* Copies the Unicode Object contents into the wchar_t buffer w.  At
0274    most size wchar_t characters are copied.
0275 
0276    Note that the resulting wchar_t string may or may not be
0277    0-terminated.  It is the responsibility of the caller to make sure
0278    that the wchar_t string is 0-terminated in case this is required by
0279    the application.
0280 
0281    Returns the number of wchar_t characters copied (excluding a
0282    possibly trailing 0-termination character) or -1 in case of an
0283    error. */
0284 
0285 PyAPI_FUNC(Py_ssize_t) PyUnicode_AsWideChar(
0286     PyObject *unicode,          /* Unicode object */
0287     wchar_t *w,                 /* wchar_t buffer */
0288     Py_ssize_t size             /* size of buffer */
0289     );
0290 
0291 /* Convert the Unicode object to a wide character string. The output string
0292    always ends with a nul character. If size is not NULL, write the number of
0293    wide characters (excluding the null character) into *size.
0294 
0295    Returns a buffer allocated by PyMem_Malloc() (use PyMem_Free() to free it)
0296    on success. On error, returns NULL, *size is undefined and raises a
0297    MemoryError. */
0298 
0299 PyAPI_FUNC(wchar_t*) PyUnicode_AsWideCharString(
0300     PyObject *unicode,          /* Unicode object */
0301     Py_ssize_t *size            /* number of characters of the result */
0302     );
0303 
0304 #endif
0305 
0306 /* --- Unicode ordinals --------------------------------------------------- */
0307 
0308 /* Create a Unicode Object from the given Unicode code point ordinal.
0309 
0310    The ordinal must be in range(0x110000). A ValueError is
0311    raised in case it is not.
0312 
0313 */
0314 
0315 PyAPI_FUNC(PyObject*) PyUnicode_FromOrdinal(int ordinal);
0316 
0317 /* === Builtin Codecs =====================================================
0318 
0319    Many of these APIs take two arguments encoding and errors. These
0320    parameters encoding and errors have the same semantics as the ones
0321    of the builtin str() API.
0322 
0323    Setting encoding to NULL causes the default encoding (UTF-8) to be used.
0324 
0325    Error handling is set by errors which may also be set to NULL
0326    meaning to use the default handling defined for the codec. Default
0327    error handling for all builtin codecs is "strict" (ValueErrors are
0328    raised).
0329 
0330    The codecs all use a similar interface. Only deviation from the
0331    generic ones are documented.
0332 
0333 */
0334 
0335 /* --- Manage the default encoding ---------------------------------------- */
0336 
0337 /* Returns "utf-8".  */
0338 PyAPI_FUNC(const char*) PyUnicode_GetDefaultEncoding(void);
0339 
0340 /* --- Generic Codecs ----------------------------------------------------- */
0341 
0342 /* Create a Unicode object by decoding the encoded string s of the
0343    given size. */
0344 
0345 PyAPI_FUNC(PyObject*) PyUnicode_Decode(
0346     const char *s,              /* encoded string */
0347     Py_ssize_t size,            /* size of buffer */
0348     const char *encoding,       /* encoding */
0349     const char *errors          /* error handling */
0350     );
0351 
0352 /* Decode a Unicode object unicode and return the result as Python
0353    object.
0354 
0355    This API is DEPRECATED. The only supported standard encoding is rot13.
0356    Use PyCodec_Decode() to decode with rot13 and non-standard codecs
0357    that decode from str. */
0358 
0359 Py_DEPRECATED(3.6) PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedObject(
0360     PyObject *unicode,          /* Unicode object */
0361     const char *encoding,       /* encoding */
0362     const char *errors          /* error handling */
0363     );
0364 
0365 /* Decode a Unicode object unicode and return the result as Unicode
0366    object.
0367 
0368    This API is DEPRECATED. The only supported standard encoding is rot13.
0369    Use PyCodec_Decode() to decode with rot13 and non-standard codecs
0370    that decode from str to str. */
0371 
0372 Py_DEPRECATED(3.6) PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedUnicode(
0373     PyObject *unicode,          /* Unicode object */
0374     const char *encoding,       /* encoding */
0375     const char *errors          /* error handling */
0376     );
0377 
0378 /* Encodes a Unicode object and returns the result as Python
0379    object.
0380 
0381    This API is DEPRECATED.  It is superseded by PyUnicode_AsEncodedString()
0382    since all standard encodings (except rot13) encode str to bytes.
0383    Use PyCodec_Encode() for encoding with rot13 and non-standard codecs
0384    that encode form str to non-bytes. */
0385 
0386 Py_DEPRECATED(3.6) PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedObject(
0387     PyObject *unicode,          /* Unicode object */
0388     const char *encoding,       /* encoding */
0389     const char *errors          /* error handling */
0390     );
0391 
0392 /* Encodes a Unicode object and returns the result as Python string
0393    object. */
0394 
0395 PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString(
0396     PyObject *unicode,          /* Unicode object */
0397     const char *encoding,       /* encoding */
0398     const char *errors          /* error handling */
0399     );
0400 
0401 /* Encodes a Unicode object and returns the result as Unicode
0402    object.
0403 
0404    This API is DEPRECATED.  The only supported standard encodings is rot13.
0405    Use PyCodec_Encode() to encode with rot13 and non-standard codecs
0406    that encode from str to str. */
0407 
0408 Py_DEPRECATED(3.6) PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedUnicode(
0409     PyObject *unicode,          /* Unicode object */
0410     const char *encoding,       /* encoding */
0411     const char *errors          /* error handling */
0412     );
0413 
0414 /* Build an encoding map. */
0415 
0416 PyAPI_FUNC(PyObject*) PyUnicode_BuildEncodingMap(
0417     PyObject* string            /* 256 character map */
0418    );
0419 
0420 /* --- UTF-7 Codecs ------------------------------------------------------- */
0421 
0422 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7(
0423     const char *string,         /* UTF-7 encoded string */
0424     Py_ssize_t length,          /* size of string */
0425     const char *errors          /* error handling */
0426     );
0427 
0428 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7Stateful(
0429     const char *string,         /* UTF-7 encoded string */
0430     Py_ssize_t length,          /* size of string */
0431     const char *errors,         /* error handling */
0432     Py_ssize_t *consumed        /* bytes consumed */
0433     );
0434 
0435 /* --- UTF-8 Codecs ------------------------------------------------------- */
0436 
0437 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8(
0438     const char *string,         /* UTF-8 encoded string */
0439     Py_ssize_t length,          /* size of string */
0440     const char *errors          /* error handling */
0441     );
0442 
0443 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8Stateful(
0444     const char *string,         /* UTF-8 encoded string */
0445     Py_ssize_t length,          /* size of string */
0446     const char *errors,         /* error handling */
0447     Py_ssize_t *consumed        /* bytes consumed */
0448     );
0449 
0450 PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String(
0451     PyObject *unicode           /* Unicode object */
0452     );
0453 
0454 /* Returns a pointer to the default encoding (UTF-8) of the
0455    Unicode object unicode and the size of the encoded representation
0456    in bytes stored in *size.
0457 
0458    In case of an error, no *size is set.
0459 
0460    This function caches the UTF-8 encoded string in the unicodeobject
0461    and subsequent calls will return the same string.  The memory is released
0462    when the unicodeobject is deallocated.
0463 */
0464 
0465 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030A0000
0466 PyAPI_FUNC(const char *) PyUnicode_AsUTF8AndSize(
0467     PyObject *unicode,
0468     Py_ssize_t *size);
0469 #endif
0470 
0471 /* --- UTF-32 Codecs ------------------------------------------------------ */
0472 
0473 /* Decodes length bytes from a UTF-32 encoded buffer string and returns
0474    the corresponding Unicode object.
0475 
0476    errors (if non-NULL) defines the error handling. It defaults
0477    to "strict".
0478 
0479    If byteorder is non-NULL, the decoder starts decoding using the
0480    given byte order:
0481 
0482     *byteorder == -1: little endian
0483     *byteorder == 0:  native order
0484     *byteorder == 1:  big endian
0485 
0486    In native mode, the first four bytes of the stream are checked for a
0487    BOM mark. If found, the BOM mark is analysed, the byte order
0488    adjusted and the BOM skipped.  In the other modes, no BOM mark
0489    interpretation is done. After completion, *byteorder is set to the
0490    current byte order at the end of input data.
0491 
0492    If byteorder is NULL, the codec starts in native order mode.
0493 
0494 */
0495 
0496 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32(
0497     const char *string,         /* UTF-32 encoded string */
0498     Py_ssize_t length,          /* size of string */
0499     const char *errors,         /* error handling */
0500     int *byteorder              /* pointer to byteorder to use
0501                                    0=native;-1=LE,1=BE; updated on
0502                                    exit */
0503     );
0504 
0505 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32Stateful(
0506     const char *string,         /* UTF-32 encoded string */
0507     Py_ssize_t length,          /* size of string */
0508     const char *errors,         /* error handling */
0509     int *byteorder,             /* pointer to byteorder to use
0510                                    0=native;-1=LE,1=BE; updated on
0511                                    exit */
0512     Py_ssize_t *consumed        /* bytes consumed */
0513     );
0514 
0515 /* Returns a Python string using the UTF-32 encoding in native byte
0516    order. The string always starts with a BOM mark.  */
0517 
0518 PyAPI_FUNC(PyObject*) PyUnicode_AsUTF32String(
0519     PyObject *unicode           /* Unicode object */
0520     );
0521 
0522 /* Returns a Python string object holding the UTF-32 encoded value of
0523    the Unicode data.
0524 
0525    If byteorder is not 0, output is written according to the following
0526    byte order:
0527 
0528    byteorder == -1: little endian
0529    byteorder == 0:  native byte order (writes a BOM mark)
0530    byteorder == 1:  big endian
0531 
0532    If byteorder is 0, the output string will always start with the
0533    Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
0534    prepended.
0535 
0536 */
0537 
0538 /* --- UTF-16 Codecs ------------------------------------------------------ */
0539 
0540 /* Decodes length bytes from a UTF-16 encoded buffer string and returns
0541    the corresponding Unicode object.
0542 
0543    errors (if non-NULL) defines the error handling. It defaults
0544    to "strict".
0545 
0546    If byteorder is non-NULL, the decoder starts decoding using the
0547    given byte order:
0548 
0549     *byteorder == -1: little endian
0550     *byteorder == 0:  native order
0551     *byteorder == 1:  big endian
0552 
0553    In native mode, the first two bytes of the stream are checked for a
0554    BOM mark. If found, the BOM mark is analysed, the byte order
0555    adjusted and the BOM skipped.  In the other modes, no BOM mark
0556    interpretation is done. After completion, *byteorder is set to the
0557    current byte order at the end of input data.
0558 
0559    If byteorder is NULL, the codec starts in native order mode.
0560 
0561 */
0562 
0563 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16(
0564     const char *string,         /* UTF-16 encoded string */
0565     Py_ssize_t length,          /* size of string */
0566     const char *errors,         /* error handling */
0567     int *byteorder              /* pointer to byteorder to use
0568                                    0=native;-1=LE,1=BE; updated on
0569                                    exit */
0570     );
0571 
0572 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16Stateful(
0573     const char *string,         /* UTF-16 encoded string */
0574     Py_ssize_t length,          /* size of string */
0575     const char *errors,         /* error handling */
0576     int *byteorder,             /* pointer to byteorder to use
0577                                    0=native;-1=LE,1=BE; updated on
0578                                    exit */
0579     Py_ssize_t *consumed        /* bytes consumed */
0580     );
0581 
0582 /* Returns a Python string using the UTF-16 encoding in native byte
0583    order. The string always starts with a BOM mark.  */
0584 
0585 PyAPI_FUNC(PyObject*) PyUnicode_AsUTF16String(
0586     PyObject *unicode           /* Unicode object */
0587     );
0588 
0589 /* --- Unicode-Escape Codecs ---------------------------------------------- */
0590 
0591 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUnicodeEscape(
0592     const char *string,         /* Unicode-Escape encoded string */
0593     Py_ssize_t length,          /* size of string */
0594     const char *errors          /* error handling */
0595     );
0596 
0597 PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString(
0598     PyObject *unicode           /* Unicode object */
0599     );
0600 
0601 /* --- Raw-Unicode-Escape Codecs ------------------------------------------ */
0602 
0603 PyAPI_FUNC(PyObject*) PyUnicode_DecodeRawUnicodeEscape(
0604     const char *string,         /* Raw-Unicode-Escape encoded string */
0605     Py_ssize_t length,          /* size of string */
0606     const char *errors          /* error handling */
0607     );
0608 
0609 PyAPI_FUNC(PyObject*) PyUnicode_AsRawUnicodeEscapeString(
0610     PyObject *unicode           /* Unicode object */
0611     );
0612 
0613 /* --- Latin-1 Codecs -----------------------------------------------------
0614 
0615    Note: Latin-1 corresponds to the first 256 Unicode ordinals. */
0616 
0617 PyAPI_FUNC(PyObject*) PyUnicode_DecodeLatin1(
0618     const char *string,         /* Latin-1 encoded string */
0619     Py_ssize_t length,          /* size of string */
0620     const char *errors          /* error handling */
0621     );
0622 
0623 PyAPI_FUNC(PyObject*) PyUnicode_AsLatin1String(
0624     PyObject *unicode           /* Unicode object */
0625     );
0626 
0627 /* --- ASCII Codecs -------------------------------------------------------
0628 
0629    Only 7-bit ASCII data is expected. All other codes generate errors.
0630 
0631 */
0632 
0633 PyAPI_FUNC(PyObject*) PyUnicode_DecodeASCII(
0634     const char *string,         /* ASCII encoded string */
0635     Py_ssize_t length,          /* size of string */
0636     const char *errors          /* error handling */
0637     );
0638 
0639 PyAPI_FUNC(PyObject*) PyUnicode_AsASCIIString(
0640     PyObject *unicode           /* Unicode object */
0641     );
0642 
0643 /* --- Character Map Codecs -----------------------------------------------
0644 
0645    This codec uses mappings to encode and decode characters.
0646 
0647    Decoding mappings must map byte ordinals (integers in the range from 0 to
0648    255) to Unicode strings, integers (which are then interpreted as Unicode
0649    ordinals) or None.  Unmapped data bytes (ones which cause a LookupError)
0650    as well as mapped to None, 0xFFFE or '\ufffe' are treated as "undefined
0651    mapping" and cause an error.
0652 
0653    Encoding mappings must map Unicode ordinal integers to bytes objects,
0654    integers in the range from 0 to 255 or None.  Unmapped character
0655    ordinals (ones which cause a LookupError) as well as mapped to
0656    None are treated as "undefined mapping" and cause an error.
0657 
0658 */
0659 
0660 PyAPI_FUNC(PyObject*) PyUnicode_DecodeCharmap(
0661     const char *string,         /* Encoded string */
0662     Py_ssize_t length,          /* size of string */
0663     PyObject *mapping,          /* decoding mapping */
0664     const char *errors          /* error handling */
0665     );
0666 
0667 PyAPI_FUNC(PyObject*) PyUnicode_AsCharmapString(
0668     PyObject *unicode,          /* Unicode object */
0669     PyObject *mapping           /* encoding mapping */
0670     );
0671 
0672 /* --- MBCS codecs for Windows -------------------------------------------- */
0673 
0674 #ifdef MS_WINDOWS
0675 PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCS(
0676     const char *string,         /* MBCS encoded string */
0677     Py_ssize_t length,          /* size of string */
0678     const char *errors          /* error handling */
0679     );
0680 
0681 PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCSStateful(
0682     const char *string,         /* MBCS encoded string */
0683     Py_ssize_t length,          /* size of string */
0684     const char *errors,         /* error handling */
0685     Py_ssize_t *consumed        /* bytes consumed */
0686     );
0687 
0688 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
0689 PyAPI_FUNC(PyObject*) PyUnicode_DecodeCodePageStateful(
0690     int code_page,              /* code page number */
0691     const char *string,         /* encoded string */
0692     Py_ssize_t length,          /* size of string */
0693     const char *errors,         /* error handling */
0694     Py_ssize_t *consumed        /* bytes consumed */
0695     );
0696 #endif
0697 
0698 PyAPI_FUNC(PyObject*) PyUnicode_AsMBCSString(
0699     PyObject *unicode           /* Unicode object */
0700     );
0701 
0702 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
0703 PyAPI_FUNC(PyObject*) PyUnicode_EncodeCodePage(
0704     int code_page,              /* code page number */
0705     PyObject *unicode,          /* Unicode object */
0706     const char *errors          /* error handling */
0707     );
0708 #endif
0709 
0710 #endif /* MS_WINDOWS */
0711 
0712 /* --- Locale encoding --------------------------------------------------- */
0713 
0714 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
0715 /* Decode a string from the current locale encoding. The decoder is strict if
0716    *surrogateescape* is equal to zero, otherwise it uses the 'surrogateescape'
0717    error handler (PEP 383) to escape undecodable bytes. If a byte sequence can
0718    be decoded as a surrogate character and *surrogateescape* is not equal to
0719    zero, the byte sequence is escaped using the 'surrogateescape' error handler
0720    instead of being decoded. *str* must end with a null character but cannot
0721    contain embedded null characters. */
0722 
0723 PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocaleAndSize(
0724     const char *str,
0725     Py_ssize_t len,
0726     const char *errors);
0727 
0728 /* Similar to PyUnicode_DecodeLocaleAndSize(), but compute the string
0729    length using strlen(). */
0730 
0731 PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocale(
0732     const char *str,
0733     const char *errors);
0734 
0735 /* Encode a Unicode object to the current locale encoding. The encoder is
0736    strict is *surrogateescape* is equal to zero, otherwise the
0737    "surrogateescape" error handler is used. Return a bytes object. The string
0738    cannot contain embedded null characters. */
0739 
0740 PyAPI_FUNC(PyObject*) PyUnicode_EncodeLocale(
0741     PyObject *unicode,
0742     const char *errors
0743     );
0744 #endif
0745 
0746 /* --- File system encoding ---------------------------------------------- */
0747 
0748 /* ParseTuple converter: encode str objects to bytes using
0749    PyUnicode_EncodeFSDefault(); bytes objects are output as-is. */
0750 
0751 PyAPI_FUNC(int) PyUnicode_FSConverter(PyObject*, void*);
0752 
0753 /* ParseTuple converter: decode bytes objects to unicode using
0754    PyUnicode_DecodeFSDefaultAndSize(); str objects are output as-is. */
0755 
0756 PyAPI_FUNC(int) PyUnicode_FSDecoder(PyObject*, void*);
0757 
0758 /* Decode a null-terminated string from the Python filesystem encoding
0759    and error handler.
0760 
0761    If the string length is known, use PyUnicode_DecodeFSDefaultAndSize(). */
0762 PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault(
0763     const char *s               /* encoded string */
0764     );
0765 
0766 /* Decode a string from the Python filesystem encoding and error handler. */
0767 PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize(
0768     const char *s,               /* encoded string */
0769     Py_ssize_t size              /* size */
0770     );
0771 
0772 /* Encode a Unicode object to the Python filesystem encoding and error handler.
0773    Return bytes. */
0774 PyAPI_FUNC(PyObject*) PyUnicode_EncodeFSDefault(
0775     PyObject *unicode
0776     );
0777 
0778 /* --- Methods & Slots ----------------------------------------------------
0779 
0780    These are capable of handling Unicode objects and strings on input
0781    (we refer to them as strings in the descriptions) and return
0782    Unicode objects or integers as appropriate. */
0783 
0784 /* Concat two strings giving a new Unicode string. */
0785 
0786 PyAPI_FUNC(PyObject*) PyUnicode_Concat(
0787     PyObject *left,             /* Left string */
0788     PyObject *right             /* Right string */
0789     );
0790 
0791 /* Concat two strings and put the result in *pleft
0792    (sets *pleft to NULL on error) */
0793 
0794 PyAPI_FUNC(void) PyUnicode_Append(
0795     PyObject **pleft,           /* Pointer to left string */
0796     PyObject *right             /* Right string */
0797     );
0798 
0799 /* Concat two strings, put the result in *pleft and drop the right object
0800    (sets *pleft to NULL on error) */
0801 
0802 PyAPI_FUNC(void) PyUnicode_AppendAndDel(
0803     PyObject **pleft,           /* Pointer to left string */
0804     PyObject *right             /* Right string */
0805     );
0806 
0807 /* Split a string giving a list of Unicode strings.
0808 
0809    If sep is NULL, splitting will be done at all whitespace
0810    substrings. Otherwise, splits occur at the given separator.
0811 
0812    At most maxsplit splits will be done. If negative, no limit is set.
0813 
0814    Separators are not included in the resulting list.
0815 
0816 */
0817 
0818 PyAPI_FUNC(PyObject*) PyUnicode_Split(
0819     PyObject *s,                /* String to split */
0820     PyObject *sep,              /* String separator */
0821     Py_ssize_t maxsplit         /* Maxsplit count */
0822     );
0823 
0824 /* Dito, but split at line breaks.
0825 
0826    CRLF is considered to be one line break. Line breaks are not
0827    included in the resulting list. */
0828 
0829 PyAPI_FUNC(PyObject*) PyUnicode_Splitlines(
0830     PyObject *s,                /* String to split */
0831     int keepends                /* If true, line end markers are included */
0832     );
0833 
0834 /* Partition a string using a given separator. */
0835 
0836 PyAPI_FUNC(PyObject*) PyUnicode_Partition(
0837     PyObject *s,                /* String to partition */
0838     PyObject *sep               /* String separator */
0839     );
0840 
0841 /* Partition a string using a given separator, searching from the end of the
0842    string. */
0843 
0844 PyAPI_FUNC(PyObject*) PyUnicode_RPartition(
0845     PyObject *s,                /* String to partition */
0846     PyObject *sep               /* String separator */
0847     );
0848 
0849 /* Split a string giving a list of Unicode strings.
0850 
0851    If sep is NULL, splitting will be done at all whitespace
0852    substrings. Otherwise, splits occur at the given separator.
0853 
0854    At most maxsplit splits will be done. But unlike PyUnicode_Split
0855    PyUnicode_RSplit splits from the end of the string. If negative,
0856    no limit is set.
0857 
0858    Separators are not included in the resulting list.
0859 
0860 */
0861 
0862 PyAPI_FUNC(PyObject*) PyUnicode_RSplit(
0863     PyObject *s,                /* String to split */
0864     PyObject *sep,              /* String separator */
0865     Py_ssize_t maxsplit         /* Maxsplit count */
0866     );
0867 
0868 /* Translate a string by applying a character mapping table to it and
0869    return the resulting Unicode object.
0870 
0871    The mapping table must map Unicode ordinal integers to Unicode strings,
0872    Unicode ordinal integers or None (causing deletion of the character).
0873 
0874    Mapping tables may be dictionaries or sequences. Unmapped character
0875    ordinals (ones which cause a LookupError) are left untouched and
0876    are copied as-is.
0877 
0878 */
0879 
0880 PyAPI_FUNC(PyObject *) PyUnicode_Translate(
0881     PyObject *str,              /* String */
0882     PyObject *table,            /* Translate table */
0883     const char *errors          /* error handling */
0884     );
0885 
0886 /* Join a sequence of strings using the given separator and return
0887    the resulting Unicode string. */
0888 
0889 PyAPI_FUNC(PyObject*) PyUnicode_Join(
0890     PyObject *separator,        /* Separator string */
0891     PyObject *seq               /* Sequence object */
0892     );
0893 
0894 /* Return 1 if substr matches str[start:end] at the given tail end, 0
0895    otherwise. */
0896 
0897 PyAPI_FUNC(Py_ssize_t) PyUnicode_Tailmatch(
0898     PyObject *str,              /* String */
0899     PyObject *substr,           /* Prefix or Suffix string */
0900     Py_ssize_t start,           /* Start index */
0901     Py_ssize_t end,             /* Stop index */
0902     int direction               /* Tail end: -1 prefix, +1 suffix */
0903     );
0904 
0905 /* Return the first position of substr in str[start:end] using the
0906    given search direction or -1 if not found. -2 is returned in case
0907    an error occurred and an exception is set. */
0908 
0909 PyAPI_FUNC(Py_ssize_t) PyUnicode_Find(
0910     PyObject *str,              /* String */
0911     PyObject *substr,           /* Substring to find */
0912     Py_ssize_t start,           /* Start index */
0913     Py_ssize_t end,             /* Stop index */
0914     int direction               /* Find direction: +1 forward, -1 backward */
0915     );
0916 
0917 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
0918 /* Like PyUnicode_Find, but search for single character only. */
0919 PyAPI_FUNC(Py_ssize_t) PyUnicode_FindChar(
0920     PyObject *str,
0921     Py_UCS4 ch,
0922     Py_ssize_t start,
0923     Py_ssize_t end,
0924     int direction
0925     );
0926 #endif
0927 
0928 /* Count the number of occurrences of substr in str[start:end]. */
0929 
0930 PyAPI_FUNC(Py_ssize_t) PyUnicode_Count(
0931     PyObject *str,              /* String */
0932     PyObject *substr,           /* Substring to count */
0933     Py_ssize_t start,           /* Start index */
0934     Py_ssize_t end              /* Stop index */
0935     );
0936 
0937 /* Replace at most maxcount occurrences of substr in str with replstr
0938    and return the resulting Unicode object. */
0939 
0940 PyAPI_FUNC(PyObject *) PyUnicode_Replace(
0941     PyObject *str,              /* String */
0942     PyObject *substr,           /* Substring to find */
0943     PyObject *replstr,          /* Substring to replace */
0944     Py_ssize_t maxcount         /* Max. number of replacements to apply;
0945                                    -1 = all */
0946     );
0947 
0948 /* Compare two strings and return -1, 0, 1 for less than, equal,
0949    greater than resp.
0950    Raise an exception and return -1 on error. */
0951 
0952 PyAPI_FUNC(int) PyUnicode_Compare(
0953     PyObject *left,             /* Left string */
0954     PyObject *right             /* Right string */
0955     );
0956 
0957 /* Compare a Unicode object with C string and return -1, 0, 1 for less than,
0958    equal, and greater than, respectively.  It is best to pass only
0959    ASCII-encoded strings, but the function interprets the input string as
0960    ISO-8859-1 if it contains non-ASCII characters.
0961    This function does not raise exceptions. */
0962 
0963 PyAPI_FUNC(int) PyUnicode_CompareWithASCIIString(
0964     PyObject *left,
0965     const char *right           /* ASCII-encoded string */
0966     );
0967 
0968 /* Rich compare two strings and return one of the following:
0969 
0970    - NULL in case an exception was raised
0971    - Py_True or Py_False for successful comparisons
0972    - Py_NotImplemented in case the type combination is unknown
0973 
0974    Possible values for op:
0975 
0976      Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE
0977 
0978 */
0979 
0980 PyAPI_FUNC(PyObject *) PyUnicode_RichCompare(
0981     PyObject *left,             /* Left string */
0982     PyObject *right,            /* Right string */
0983     int op                      /* Operation: Py_EQ, Py_NE, Py_GT, etc. */
0984     );
0985 
0986 /* Apply an argument tuple or dictionary to a format string and return
0987    the resulting Unicode string. */
0988 
0989 PyAPI_FUNC(PyObject *) PyUnicode_Format(
0990     PyObject *format,           /* Format string */
0991     PyObject *args              /* Argument tuple or dictionary */
0992     );
0993 
0994 /* Checks whether element is contained in container and return 1/0
0995    accordingly.
0996 
0997    element has to coerce to a one element Unicode string. -1 is
0998    returned in case of an error. */
0999 
1000 PyAPI_FUNC(int) PyUnicode_Contains(
1001     PyObject *container,        /* Container string */
1002     PyObject *element           /* Element string */
1003     );
1004 
1005 /* Checks whether argument is a valid identifier. */
1006 
1007 PyAPI_FUNC(int) PyUnicode_IsIdentifier(PyObject *s);
1008 
1009 /* === Characters Type APIs =============================================== */
1010 
1011 #ifndef Py_LIMITED_API
1012 #  define Py_CPYTHON_UNICODEOBJECT_H
1013 #  include "cpython/unicodeobject.h"
1014 #  undef Py_CPYTHON_UNICODEOBJECT_H
1015 #endif
1016 
1017 #ifdef __cplusplus
1018 }
1019 #endif
1020 #endif /* !Py_UNICODEOBJECT_H */