Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-11-19 09:50:55

0001 #ifndef Py_UNICODEOBJECT_H
0002 #define Py_UNICODEOBJECT_H
0003 
0004 /*
0005 
0006 Unicode implementation based on original code by Fredrik Lundh,
0007 modified by Marc-Andre Lemburg (mal@lemburg.com) according to the
0008 Unicode Integration Proposal. (See
0009 http://www.egenix.com/files/python/unicode-proposal.txt).
0010 
0011 Copyright (c) Corporation for National Research Initiatives.
0012 
0013 
0014  Original header:
0015  --------------------------------------------------------------------
0016 
0017  * Yet another Unicode string type for Python.  This type supports the
0018  * 16-bit Basic Multilingual Plane (BMP) only.
0019  *
0020  * Written by Fredrik Lundh, January 1999.
0021  *
0022  * Copyright (c) 1999 by Secret Labs AB.
0023  * Copyright (c) 1999 by Fredrik Lundh.
0024  *
0025  * fredrik@pythonware.com
0026  * http://www.pythonware.com
0027  *
0028  * --------------------------------------------------------------------
0029  * This Unicode String Type is
0030  *
0031  * Copyright (c) 1999 by Secret Labs AB
0032  * Copyright (c) 1999 by Fredrik Lundh
0033  *
0034  * By obtaining, using, and/or copying this software and/or its
0035  * associated documentation, you agree that you have read, understood,
0036  * and will comply with the following terms and conditions:
0037  *
0038  * Permission to use, copy, modify, and distribute this software and its
0039  * associated documentation for any purpose and without fee is hereby
0040  * granted, provided that the above copyright notice appears in all
0041  * copies, and that both that copyright notice and this permission notice
0042  * appear in supporting documentation, and that the name of Secret Labs
0043  * AB or the author not be used in advertising or publicity pertaining to
0044  * distribution of the software without specific, written prior
0045  * permission.
0046  *
0047  * SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
0048  * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
0049  * FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
0050  * ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
0051  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
0052  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
0053  * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
0054  * -------------------------------------------------------------------- */
0055 
0056 /* === Internal API ======================================================= */
0057 
0058 /* --- Internal Unicode Format -------------------------------------------- */
0059 
0060 /* Python 3.x requires unicode */
0061 #define Py_USING_UNICODE
0062 
0063 #ifndef SIZEOF_WCHAR_T
0064 #error Must define SIZEOF_WCHAR_T
0065 #endif
0066 
0067 #define Py_UNICODE_SIZE SIZEOF_WCHAR_T
0068 
0069 /* If wchar_t can be used for UCS-4 storage, set Py_UNICODE_WIDE.
0070    Otherwise, Unicode strings are stored as UCS-2 (with limited support
0071    for UTF-16) */
0072 
0073 #if Py_UNICODE_SIZE >= 4
0074 #define Py_UNICODE_WIDE
0075 #endif
0076 
0077 /* Set these flags if the platform has "wchar.h" and the
0078    wchar_t type is a 16-bit unsigned type */
0079 /* #define HAVE_WCHAR_H */
0080 /* #define HAVE_USABLE_WCHAR_T */
0081 
0082 /* If the compiler provides a wchar_t type we try to support it
0083    through the interface functions PyUnicode_FromWideChar(),
0084    PyUnicode_AsWideChar() and PyUnicode_AsWideCharString(). */
0085 
0086 #ifdef HAVE_USABLE_WCHAR_T
0087 # ifndef HAVE_WCHAR_H
0088 #  define HAVE_WCHAR_H
0089 # endif
0090 #endif
0091 
0092 /* Py_UCS4 and Py_UCS2 are typedefs for the respective
0093    unicode representations. */
0094 typedef uint32_t Py_UCS4;
0095 typedef uint16_t Py_UCS2;
0096 typedef uint8_t Py_UCS1;
0097 
0098 #ifdef __cplusplus
0099 extern "C" {
0100 #endif
0101 
0102 
0103 PyAPI_DATA(PyTypeObject) PyUnicode_Type;
0104 PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
0105 
0106 #define PyUnicode_Check(op) \
0107     PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_UNICODE_SUBCLASS)
0108 #define PyUnicode_CheckExact(op) Py_IS_TYPE((op), &PyUnicode_Type)
0109 
0110 /* --- Constants ---------------------------------------------------------- */
0111 
0112 /* This Unicode character will be used as replacement character during
0113    decoding if the errors argument is set to "replace". Note: the
0114    Unicode character U+FFFD is the official REPLACEMENT CHARACTER in
0115    Unicode 3.0. */
0116 
0117 #define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UCS4) 0xFFFD)
0118 
0119 /* === Public API ========================================================= */
0120 
0121 /* Similar to PyUnicode_FromUnicode(), but u points to UTF-8 encoded bytes */
0122 PyAPI_FUNC(PyObject*) PyUnicode_FromStringAndSize(
0123     const char *u,             /* UTF-8 encoded string */
0124     Py_ssize_t size            /* size of buffer */
0125     );
0126 
0127 /* Similar to PyUnicode_FromUnicode(), but u points to null-terminated
0128    UTF-8 encoded bytes.  The size is determined with strlen(). */
0129 PyAPI_FUNC(PyObject*) PyUnicode_FromString(
0130     const char *u              /* UTF-8 encoded string */
0131     );
0132 
0133 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
0134 PyAPI_FUNC(PyObject*) PyUnicode_Substring(
0135     PyObject *str,
0136     Py_ssize_t start,
0137     Py_ssize_t end);
0138 #endif
0139 
0140 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
0141 /* Copy the string into a UCS4 buffer including the null character if copy_null
0142    is set. Return NULL and raise an exception on error. Raise a SystemError if
0143    the buffer is smaller than the string. Return buffer on success.
0144 
0145    buflen is the length of the buffer in (Py_UCS4) characters. */
0146 PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4(
0147     PyObject *unicode,
0148     Py_UCS4* buffer,
0149     Py_ssize_t buflen,
0150     int copy_null);
0151 
0152 /* Copy the string into a UCS4 buffer. A new buffer is allocated using
0153  * PyMem_Malloc; if this fails, NULL is returned with a memory error
0154    exception set. */
0155 PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4Copy(PyObject *unicode);
0156 #endif
0157 
0158 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
0159 /* Get the length of the Unicode object. */
0160 
0161 PyAPI_FUNC(Py_ssize_t) PyUnicode_GetLength(
0162     PyObject *unicode
0163 );
0164 #endif
0165 
0166 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
0167 /* Read a character from the string. */
0168 
0169 PyAPI_FUNC(Py_UCS4) PyUnicode_ReadChar(
0170     PyObject *unicode,
0171     Py_ssize_t index
0172     );
0173 
0174 /* Write a character to the string. The string must have been created through
0175    PyUnicode_New, must not be shared, and must not have been hashed yet.
0176 
0177    Return 0 on success, -1 on error. */
0178 
0179 PyAPI_FUNC(int) PyUnicode_WriteChar(
0180     PyObject *unicode,
0181     Py_ssize_t index,
0182     Py_UCS4 character
0183     );
0184 #endif
0185 
0186 /* Resize a Unicode object. The length is the number of codepoints.
0187 
0188    *unicode is modified to point to the new (resized) object and 0
0189    returned on success.
0190 
0191    Try to resize the string in place (which is usually faster than allocating
0192    a new string and copy characters), or create a new string.
0193 
0194    Error handling is implemented as follows: an exception is set, -1
0195    is returned and *unicode left untouched.
0196 
0197    WARNING: The function doesn't check string content, the result may not be a
0198             string in canonical representation. */
0199 
0200 PyAPI_FUNC(int) PyUnicode_Resize(
0201     PyObject **unicode,         /* Pointer to the Unicode object */
0202     Py_ssize_t length           /* New length */
0203     );
0204 
0205 /* Decode obj to a Unicode object.
0206 
0207    bytes, bytearray and other bytes-like objects are decoded according to the
0208    given encoding and error handler. The encoding and error handler can be
0209    NULL to have the interface use UTF-8 and "strict".
0210 
0211    All other objects (including Unicode objects) raise an exception.
0212 
0213    The API returns NULL in case of an error. The caller is responsible
0214    for decref'ing the returned objects.
0215 
0216 */
0217 
0218 PyAPI_FUNC(PyObject*) PyUnicode_FromEncodedObject(
0219     PyObject *obj,              /* Object */
0220     const char *encoding,       /* encoding */
0221     const char *errors          /* error handling */
0222     );
0223 
0224 /* Copy an instance of a Unicode subtype to a new true Unicode object if
0225    necessary. If obj is already a true Unicode object (not a subtype), return
0226    the reference with *incremented* refcount.
0227 
0228    The API returns NULL in case of an error. The caller is responsible
0229    for decref'ing the returned objects.
0230 
0231 */
0232 
0233 PyAPI_FUNC(PyObject*) PyUnicode_FromObject(
0234     PyObject *obj      /* Object */
0235     );
0236 
0237 PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV(
0238     const char *format,   /* ASCII-encoded string  */
0239     va_list vargs
0240     );
0241 PyAPI_FUNC(PyObject *) PyUnicode_FromFormat(
0242     const char *format,   /* ASCII-encoded string  */
0243     ...
0244     );
0245 
0246 PyAPI_FUNC(void) PyUnicode_InternInPlace(PyObject **);
0247 PyAPI_FUNC(PyObject *) PyUnicode_InternFromString(
0248     const char *u              /* UTF-8 encoded string */
0249     );
0250 
0251 /* --- wchar_t support for platforms which support it --------------------- */
0252 
0253 #ifdef HAVE_WCHAR_H
0254 
0255 /* Create a Unicode Object from the wchar_t buffer w of the given
0256    size.
0257 
0258    The buffer is copied into the new object. */
0259 
0260 PyAPI_FUNC(PyObject*) PyUnicode_FromWideChar(
0261     const wchar_t *w,           /* wchar_t buffer */
0262     Py_ssize_t size             /* size of buffer */
0263     );
0264 
0265 /* Copies the Unicode Object contents into the wchar_t buffer w.  At
0266    most size wchar_t characters are copied.
0267 
0268    Note that the resulting wchar_t string may or may not be
0269    0-terminated.  It is the responsibility of the caller to make sure
0270    that the wchar_t string is 0-terminated in case this is required by
0271    the application.
0272 
0273    Returns the number of wchar_t characters copied (excluding a
0274    possibly trailing 0-termination character) or -1 in case of an
0275    error. */
0276 
0277 PyAPI_FUNC(Py_ssize_t) PyUnicode_AsWideChar(
0278     PyObject *unicode,          /* Unicode object */
0279     wchar_t *w,                 /* wchar_t buffer */
0280     Py_ssize_t size             /* size of buffer */
0281     );
0282 
0283 /* Convert the Unicode object to a wide character string. The output string
0284    always ends with a nul character. If size is not NULL, write the number of
0285    wide characters (excluding the null character) into *size.
0286 
0287    Returns a buffer allocated by PyMem_Malloc() (use PyMem_Free() to free it)
0288    on success. On error, returns NULL, *size is undefined and raises a
0289    MemoryError. */
0290 
0291 PyAPI_FUNC(wchar_t*) PyUnicode_AsWideCharString(
0292     PyObject *unicode,          /* Unicode object */
0293     Py_ssize_t *size            /* number of characters of the result */
0294     );
0295 
0296 #endif
0297 
0298 /* --- Unicode ordinals --------------------------------------------------- */
0299 
0300 /* Create a Unicode Object from the given Unicode code point ordinal.
0301 
0302    The ordinal must be in range(0x110000). A ValueError is
0303    raised in case it is not.
0304 
0305 */
0306 
0307 PyAPI_FUNC(PyObject*) PyUnicode_FromOrdinal(int ordinal);
0308 
0309 /* === Builtin Codecs =====================================================
0310 
0311    Many of these APIs take two arguments encoding and errors. These
0312    parameters encoding and errors have the same semantics as the ones
0313    of the builtin str() API.
0314 
0315    Setting encoding to NULL causes the default encoding (UTF-8) to be used.
0316 
0317    Error handling is set by errors which may also be set to NULL
0318    meaning to use the default handling defined for the codec. Default
0319    error handling for all builtin codecs is "strict" (ValueErrors are
0320    raised).
0321 
0322    The codecs all use a similar interface. Only deviation from the
0323    generic ones are documented.
0324 
0325 */
0326 
0327 /* --- Manage the default encoding ---------------------------------------- */
0328 
0329 /* Returns "utf-8".  */
0330 PyAPI_FUNC(const char*) PyUnicode_GetDefaultEncoding(void);
0331 
0332 /* --- Generic Codecs ----------------------------------------------------- */
0333 
0334 /* Create a Unicode object by decoding the encoded string s of the
0335    given size. */
0336 
0337 PyAPI_FUNC(PyObject*) PyUnicode_Decode(
0338     const char *s,              /* encoded string */
0339     Py_ssize_t size,            /* size of buffer */
0340     const char *encoding,       /* encoding */
0341     const char *errors          /* error handling */
0342     );
0343 
0344 /* Decode a Unicode object unicode and return the result as Python
0345    object.
0346 
0347    This API is DEPRECATED. The only supported standard encoding is rot13.
0348    Use PyCodec_Decode() to decode with rot13 and non-standard codecs
0349    that decode from str. */
0350 
0351 Py_DEPRECATED(3.6) PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedObject(
0352     PyObject *unicode,          /* Unicode object */
0353     const char *encoding,       /* encoding */
0354     const char *errors          /* error handling */
0355     );
0356 
0357 /* Decode a Unicode object unicode and return the result as Unicode
0358    object.
0359 
0360    This API is DEPRECATED. The only supported standard encoding is rot13.
0361    Use PyCodec_Decode() to decode with rot13 and non-standard codecs
0362    that decode from str to str. */
0363 
0364 Py_DEPRECATED(3.6) PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedUnicode(
0365     PyObject *unicode,          /* Unicode object */
0366     const char *encoding,       /* encoding */
0367     const char *errors          /* error handling */
0368     );
0369 
0370 /* Encodes a Unicode object and returns the result as Python
0371    object.
0372 
0373    This API is DEPRECATED.  It is superseded by PyUnicode_AsEncodedString()
0374    since all standard encodings (except rot13) encode str to bytes.
0375    Use PyCodec_Encode() for encoding with rot13 and non-standard codecs
0376    that encode form str to non-bytes. */
0377 
0378 Py_DEPRECATED(3.6) PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedObject(
0379     PyObject *unicode,          /* Unicode object */
0380     const char *encoding,       /* encoding */
0381     const char *errors          /* error handling */
0382     );
0383 
0384 /* Encodes a Unicode object and returns the result as Python string
0385    object. */
0386 
0387 PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString(
0388     PyObject *unicode,          /* Unicode object */
0389     const char *encoding,       /* encoding */
0390     const char *errors          /* error handling */
0391     );
0392 
0393 /* Encodes a Unicode object and returns the result as Unicode
0394    object.
0395 
0396    This API is DEPRECATED.  The only supported standard encodings is rot13.
0397    Use PyCodec_Encode() to encode with rot13 and non-standard codecs
0398    that encode from str to str. */
0399 
0400 Py_DEPRECATED(3.6) PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedUnicode(
0401     PyObject *unicode,          /* Unicode object */
0402     const char *encoding,       /* encoding */
0403     const char *errors          /* error handling */
0404     );
0405 
0406 /* Build an encoding map. */
0407 
0408 PyAPI_FUNC(PyObject*) PyUnicode_BuildEncodingMap(
0409     PyObject* string            /* 256 character map */
0410    );
0411 
0412 /* --- UTF-7 Codecs ------------------------------------------------------- */
0413 
0414 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7(
0415     const char *string,         /* UTF-7 encoded string */
0416     Py_ssize_t length,          /* size of string */
0417     const char *errors          /* error handling */
0418     );
0419 
0420 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7Stateful(
0421     const char *string,         /* UTF-7 encoded string */
0422     Py_ssize_t length,          /* size of string */
0423     const char *errors,         /* error handling */
0424     Py_ssize_t *consumed        /* bytes consumed */
0425     );
0426 
0427 /* --- UTF-8 Codecs ------------------------------------------------------- */
0428 
0429 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8(
0430     const char *string,         /* UTF-8 encoded string */
0431     Py_ssize_t length,          /* size of string */
0432     const char *errors          /* error handling */
0433     );
0434 
0435 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8Stateful(
0436     const char *string,         /* UTF-8 encoded string */
0437     Py_ssize_t length,          /* size of string */
0438     const char *errors,         /* error handling */
0439     Py_ssize_t *consumed        /* bytes consumed */
0440     );
0441 
0442 PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String(
0443     PyObject *unicode           /* Unicode object */
0444     );
0445 
0446 /* Returns a pointer to the default encoding (UTF-8) of the
0447    Unicode object unicode and the size of the encoded representation
0448    in bytes stored in *size.
0449 
0450    In case of an error, no *size is set.
0451 
0452    This function caches the UTF-8 encoded string in the unicodeobject
0453    and subsequent calls will return the same string.  The memory is released
0454    when the unicodeobject is deallocated.
0455 */
0456 
0457 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030A0000
0458 PyAPI_FUNC(const char *) PyUnicode_AsUTF8AndSize(
0459     PyObject *unicode,
0460     Py_ssize_t *size);
0461 #endif
0462 
0463 /* --- UTF-32 Codecs ------------------------------------------------------ */
0464 
0465 /* Decodes length bytes from a UTF-32 encoded buffer string and returns
0466    the corresponding Unicode object.
0467 
0468    errors (if non-NULL) defines the error handling. It defaults
0469    to "strict".
0470 
0471    If byteorder is non-NULL, the decoder starts decoding using the
0472    given byte order:
0473 
0474     *byteorder == -1: little endian
0475     *byteorder == 0:  native order
0476     *byteorder == 1:  big endian
0477 
0478    In native mode, the first four bytes of the stream are checked for a
0479    BOM mark. If found, the BOM mark is analysed, the byte order
0480    adjusted and the BOM skipped.  In the other modes, no BOM mark
0481    interpretation is done. After completion, *byteorder is set to the
0482    current byte order at the end of input data.
0483 
0484    If byteorder is NULL, the codec starts in native order mode.
0485 
0486 */
0487 
0488 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32(
0489     const char *string,         /* UTF-32 encoded string */
0490     Py_ssize_t length,          /* size of string */
0491     const char *errors,         /* error handling */
0492     int *byteorder              /* pointer to byteorder to use
0493                                    0=native;-1=LE,1=BE; updated on
0494                                    exit */
0495     );
0496 
0497 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32Stateful(
0498     const char *string,         /* UTF-32 encoded string */
0499     Py_ssize_t length,          /* size of string */
0500     const char *errors,         /* error handling */
0501     int *byteorder,             /* pointer to byteorder to use
0502                                    0=native;-1=LE,1=BE; updated on
0503                                    exit */
0504     Py_ssize_t *consumed        /* bytes consumed */
0505     );
0506 
0507 /* Returns a Python string using the UTF-32 encoding in native byte
0508    order. The string always starts with a BOM mark.  */
0509 
0510 PyAPI_FUNC(PyObject*) PyUnicode_AsUTF32String(
0511     PyObject *unicode           /* Unicode object */
0512     );
0513 
0514 /* Returns a Python string object holding the UTF-32 encoded value of
0515    the Unicode data.
0516 
0517    If byteorder is not 0, output is written according to the following
0518    byte order:
0519 
0520    byteorder == -1: little endian
0521    byteorder == 0:  native byte order (writes a BOM mark)
0522    byteorder == 1:  big endian
0523 
0524    If byteorder is 0, the output string will always start with the
0525    Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
0526    prepended.
0527 
0528 */
0529 
0530 /* --- UTF-16 Codecs ------------------------------------------------------ */
0531 
0532 /* Decodes length bytes from a UTF-16 encoded buffer string and returns
0533    the corresponding Unicode object.
0534 
0535    errors (if non-NULL) defines the error handling. It defaults
0536    to "strict".
0537 
0538    If byteorder is non-NULL, the decoder starts decoding using the
0539    given byte order:
0540 
0541     *byteorder == -1: little endian
0542     *byteorder == 0:  native order
0543     *byteorder == 1:  big endian
0544 
0545    In native mode, the first two bytes of the stream are checked for a
0546    BOM mark. If found, the BOM mark is analysed, the byte order
0547    adjusted and the BOM skipped.  In the other modes, no BOM mark
0548    interpretation is done. After completion, *byteorder is set to the
0549    current byte order at the end of input data.
0550 
0551    If byteorder is NULL, the codec starts in native order mode.
0552 
0553 */
0554 
0555 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16(
0556     const char *string,         /* UTF-16 encoded string */
0557     Py_ssize_t length,          /* size of string */
0558     const char *errors,         /* error handling */
0559     int *byteorder              /* pointer to byteorder to use
0560                                    0=native;-1=LE,1=BE; updated on
0561                                    exit */
0562     );
0563 
0564 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16Stateful(
0565     const char *string,         /* UTF-16 encoded string */
0566     Py_ssize_t length,          /* size of string */
0567     const char *errors,         /* error handling */
0568     int *byteorder,             /* pointer to byteorder to use
0569                                    0=native;-1=LE,1=BE; updated on
0570                                    exit */
0571     Py_ssize_t *consumed        /* bytes consumed */
0572     );
0573 
0574 /* Returns a Python string using the UTF-16 encoding in native byte
0575    order. The string always starts with a BOM mark.  */
0576 
0577 PyAPI_FUNC(PyObject*) PyUnicode_AsUTF16String(
0578     PyObject *unicode           /* Unicode object */
0579     );
0580 
0581 /* --- Unicode-Escape Codecs ---------------------------------------------- */
0582 
0583 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUnicodeEscape(
0584     const char *string,         /* Unicode-Escape encoded string */
0585     Py_ssize_t length,          /* size of string */
0586     const char *errors          /* error handling */
0587     );
0588 
0589 PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString(
0590     PyObject *unicode           /* Unicode object */
0591     );
0592 
0593 /* --- Raw-Unicode-Escape Codecs ------------------------------------------ */
0594 
0595 PyAPI_FUNC(PyObject*) PyUnicode_DecodeRawUnicodeEscape(
0596     const char *string,         /* Raw-Unicode-Escape encoded string */
0597     Py_ssize_t length,          /* size of string */
0598     const char *errors          /* error handling */
0599     );
0600 
0601 PyAPI_FUNC(PyObject*) PyUnicode_AsRawUnicodeEscapeString(
0602     PyObject *unicode           /* Unicode object */
0603     );
0604 
0605 /* --- Latin-1 Codecs -----------------------------------------------------
0606 
0607    Note: Latin-1 corresponds to the first 256 Unicode ordinals. */
0608 
0609 PyAPI_FUNC(PyObject*) PyUnicode_DecodeLatin1(
0610     const char *string,         /* Latin-1 encoded string */
0611     Py_ssize_t length,          /* size of string */
0612     const char *errors          /* error handling */
0613     );
0614 
0615 PyAPI_FUNC(PyObject*) PyUnicode_AsLatin1String(
0616     PyObject *unicode           /* Unicode object */
0617     );
0618 
0619 /* --- ASCII Codecs -------------------------------------------------------
0620 
0621    Only 7-bit ASCII data is expected. All other codes generate errors.
0622 
0623 */
0624 
0625 PyAPI_FUNC(PyObject*) PyUnicode_DecodeASCII(
0626     const char *string,         /* ASCII encoded string */
0627     Py_ssize_t length,          /* size of string */
0628     const char *errors          /* error handling */
0629     );
0630 
0631 PyAPI_FUNC(PyObject*) PyUnicode_AsASCIIString(
0632     PyObject *unicode           /* Unicode object */
0633     );
0634 
0635 /* --- Character Map Codecs -----------------------------------------------
0636 
0637    This codec uses mappings to encode and decode characters.
0638 
0639    Decoding mappings must map byte ordinals (integers in the range from 0 to
0640    255) to Unicode strings, integers (which are then interpreted as Unicode
0641    ordinals) or None.  Unmapped data bytes (ones which cause a LookupError)
0642    as well as mapped to None, 0xFFFE or '\ufffe' are treated as "undefined
0643    mapping" and cause an error.
0644 
0645    Encoding mappings must map Unicode ordinal integers to bytes objects,
0646    integers in the range from 0 to 255 or None.  Unmapped character
0647    ordinals (ones which cause a LookupError) as well as mapped to
0648    None are treated as "undefined mapping" and cause an error.
0649 
0650 */
0651 
0652 PyAPI_FUNC(PyObject*) PyUnicode_DecodeCharmap(
0653     const char *string,         /* Encoded string */
0654     Py_ssize_t length,          /* size of string */
0655     PyObject *mapping,          /* decoding mapping */
0656     const char *errors          /* error handling */
0657     );
0658 
0659 PyAPI_FUNC(PyObject*) PyUnicode_AsCharmapString(
0660     PyObject *unicode,          /* Unicode object */
0661     PyObject *mapping           /* encoding mapping */
0662     );
0663 
0664 /* --- MBCS codecs for Windows -------------------------------------------- */
0665 
0666 #ifdef MS_WINDOWS
0667 PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCS(
0668     const char *string,         /* MBCS encoded string */
0669     Py_ssize_t length,          /* size of string */
0670     const char *errors          /* error handling */
0671     );
0672 
0673 PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCSStateful(
0674     const char *string,         /* MBCS encoded string */
0675     Py_ssize_t length,          /* size of string */
0676     const char *errors,         /* error handling */
0677     Py_ssize_t *consumed        /* bytes consumed */
0678     );
0679 
0680 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
0681 PyAPI_FUNC(PyObject*) PyUnicode_DecodeCodePageStateful(
0682     int code_page,              /* code page number */
0683     const char *string,         /* encoded string */
0684     Py_ssize_t length,          /* size of string */
0685     const char *errors,         /* error handling */
0686     Py_ssize_t *consumed        /* bytes consumed */
0687     );
0688 #endif
0689 
0690 PyAPI_FUNC(PyObject*) PyUnicode_AsMBCSString(
0691     PyObject *unicode           /* Unicode object */
0692     );
0693 
0694 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
0695 PyAPI_FUNC(PyObject*) PyUnicode_EncodeCodePage(
0696     int code_page,              /* code page number */
0697     PyObject *unicode,          /* Unicode object */
0698     const char *errors          /* error handling */
0699     );
0700 #endif
0701 
0702 #endif /* MS_WINDOWS */
0703 
0704 /* --- Locale encoding --------------------------------------------------- */
0705 
0706 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
0707 /* Decode a string from the current locale encoding. The decoder is strict if
0708    *surrogateescape* is equal to zero, otherwise it uses the 'surrogateescape'
0709    error handler (PEP 383) to escape undecodable bytes. If a byte sequence can
0710    be decoded as a surrogate character and *surrogateescape* is not equal to
0711    zero, the byte sequence is escaped using the 'surrogateescape' error handler
0712    instead of being decoded. *str* must end with a null character but cannot
0713    contain embedded null characters. */
0714 
0715 PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocaleAndSize(
0716     const char *str,
0717     Py_ssize_t len,
0718     const char *errors);
0719 
0720 /* Similar to PyUnicode_DecodeLocaleAndSize(), but compute the string
0721    length using strlen(). */
0722 
0723 PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocale(
0724     const char *str,
0725     const char *errors);
0726 
0727 /* Encode a Unicode object to the current locale encoding. The encoder is
0728    strict is *surrogateescape* is equal to zero, otherwise the
0729    "surrogateescape" error handler is used. Return a bytes object. The string
0730    cannot contain embedded null characters. */
0731 
0732 PyAPI_FUNC(PyObject*) PyUnicode_EncodeLocale(
0733     PyObject *unicode,
0734     const char *errors
0735     );
0736 #endif
0737 
0738 /* --- File system encoding ---------------------------------------------- */
0739 
0740 /* ParseTuple converter: encode str objects to bytes using
0741    PyUnicode_EncodeFSDefault(); bytes objects are output as-is. */
0742 
0743 PyAPI_FUNC(int) PyUnicode_FSConverter(PyObject*, void*);
0744 
0745 /* ParseTuple converter: decode bytes objects to unicode using
0746    PyUnicode_DecodeFSDefaultAndSize(); str objects are output as-is. */
0747 
0748 PyAPI_FUNC(int) PyUnicode_FSDecoder(PyObject*, void*);
0749 
0750 /* Decode a null-terminated string from the Python filesystem encoding
0751    and error handler.
0752 
0753    If the string length is known, use PyUnicode_DecodeFSDefaultAndSize(). */
0754 PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault(
0755     const char *s               /* encoded string */
0756     );
0757 
0758 /* Decode a string from the Python filesystem encoding and error handler. */
0759 PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize(
0760     const char *s,               /* encoded string */
0761     Py_ssize_t size              /* size */
0762     );
0763 
0764 /* Encode a Unicode object to the Python filesystem encoding and error handler.
0765    Return bytes. */
0766 PyAPI_FUNC(PyObject*) PyUnicode_EncodeFSDefault(
0767     PyObject *unicode
0768     );
0769 
0770 /* --- Methods & Slots ----------------------------------------------------
0771 
0772    These are capable of handling Unicode objects and strings on input
0773    (we refer to them as strings in the descriptions) and return
0774    Unicode objects or integers as appropriate. */
0775 
0776 /* Concat two strings giving a new Unicode string. */
0777 
0778 PyAPI_FUNC(PyObject*) PyUnicode_Concat(
0779     PyObject *left,             /* Left string */
0780     PyObject *right             /* Right string */
0781     );
0782 
0783 /* Concat two strings and put the result in *pleft
0784    (sets *pleft to NULL on error) */
0785 
0786 PyAPI_FUNC(void) PyUnicode_Append(
0787     PyObject **pleft,           /* Pointer to left string */
0788     PyObject *right             /* Right string */
0789     );
0790 
0791 /* Concat two strings, put the result in *pleft and drop the right object
0792    (sets *pleft to NULL on error) */
0793 
0794 PyAPI_FUNC(void) PyUnicode_AppendAndDel(
0795     PyObject **pleft,           /* Pointer to left string */
0796     PyObject *right             /* Right string */
0797     );
0798 
0799 /* Split a string giving a list of Unicode strings.
0800 
0801    If sep is NULL, splitting will be done at all whitespace
0802    substrings. Otherwise, splits occur at the given separator.
0803 
0804    At most maxsplit splits will be done. If negative, no limit is set.
0805 
0806    Separators are not included in the resulting list.
0807 
0808 */
0809 
0810 PyAPI_FUNC(PyObject*) PyUnicode_Split(
0811     PyObject *s,                /* String to split */
0812     PyObject *sep,              /* String separator */
0813     Py_ssize_t maxsplit         /* Maxsplit count */
0814     );
0815 
0816 /* Dito, but split at line breaks.
0817 
0818    CRLF is considered to be one line break. Line breaks are not
0819    included in the resulting list. */
0820 
0821 PyAPI_FUNC(PyObject*) PyUnicode_Splitlines(
0822     PyObject *s,                /* String to split */
0823     int keepends                /* If true, line end markers are included */
0824     );
0825 
0826 /* Partition a string using a given separator. */
0827 
0828 PyAPI_FUNC(PyObject*) PyUnicode_Partition(
0829     PyObject *s,                /* String to partition */
0830     PyObject *sep               /* String separator */
0831     );
0832 
0833 /* Partition a string using a given separator, searching from the end of the
0834    string. */
0835 
0836 PyAPI_FUNC(PyObject*) PyUnicode_RPartition(
0837     PyObject *s,                /* String to partition */
0838     PyObject *sep               /* String separator */
0839     );
0840 
0841 /* Split a string giving a list of Unicode strings.
0842 
0843    If sep is NULL, splitting will be done at all whitespace
0844    substrings. Otherwise, splits occur at the given separator.
0845 
0846    At most maxsplit splits will be done. But unlike PyUnicode_Split
0847    PyUnicode_RSplit splits from the end of the string. If negative,
0848    no limit is set.
0849 
0850    Separators are not included in the resulting list.
0851 
0852 */
0853 
0854 PyAPI_FUNC(PyObject*) PyUnicode_RSplit(
0855     PyObject *s,                /* String to split */
0856     PyObject *sep,              /* String separator */
0857     Py_ssize_t maxsplit         /* Maxsplit count */
0858     );
0859 
0860 /* Translate a string by applying a character mapping table to it and
0861    return the resulting Unicode object.
0862 
0863    The mapping table must map Unicode ordinal integers to Unicode strings,
0864    Unicode ordinal integers or None (causing deletion of the character).
0865 
0866    Mapping tables may be dictionaries or sequences. Unmapped character
0867    ordinals (ones which cause a LookupError) are left untouched and
0868    are copied as-is.
0869 
0870 */
0871 
0872 PyAPI_FUNC(PyObject *) PyUnicode_Translate(
0873     PyObject *str,              /* String */
0874     PyObject *table,            /* Translate table */
0875     const char *errors          /* error handling */
0876     );
0877 
0878 /* Join a sequence of strings using the given separator and return
0879    the resulting Unicode string. */
0880 
0881 PyAPI_FUNC(PyObject*) PyUnicode_Join(
0882     PyObject *separator,        /* Separator string */
0883     PyObject *seq               /* Sequence object */
0884     );
0885 
0886 /* Return 1 if substr matches str[start:end] at the given tail end, 0
0887    otherwise. */
0888 
0889 PyAPI_FUNC(Py_ssize_t) PyUnicode_Tailmatch(
0890     PyObject *str,              /* String */
0891     PyObject *substr,           /* Prefix or Suffix string */
0892     Py_ssize_t start,           /* Start index */
0893     Py_ssize_t end,             /* Stop index */
0894     int direction               /* Tail end: -1 prefix, +1 suffix */
0895     );
0896 
0897 /* Return the first position of substr in str[start:end] using the
0898    given search direction or -1 if not found. -2 is returned in case
0899    an error occurred and an exception is set. */
0900 
0901 PyAPI_FUNC(Py_ssize_t) PyUnicode_Find(
0902     PyObject *str,              /* String */
0903     PyObject *substr,           /* Substring to find */
0904     Py_ssize_t start,           /* Start index */
0905     Py_ssize_t end,             /* Stop index */
0906     int direction               /* Find direction: +1 forward, -1 backward */
0907     );
0908 
0909 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
0910 /* Like PyUnicode_Find, but search for single character only. */
0911 PyAPI_FUNC(Py_ssize_t) PyUnicode_FindChar(
0912     PyObject *str,
0913     Py_UCS4 ch,
0914     Py_ssize_t start,
0915     Py_ssize_t end,
0916     int direction
0917     );
0918 #endif
0919 
0920 /* Count the number of occurrences of substr in str[start:end]. */
0921 
0922 PyAPI_FUNC(Py_ssize_t) PyUnicode_Count(
0923     PyObject *str,              /* String */
0924     PyObject *substr,           /* Substring to count */
0925     Py_ssize_t start,           /* Start index */
0926     Py_ssize_t end              /* Stop index */
0927     );
0928 
0929 /* Replace at most maxcount occurrences of substr in str with replstr
0930    and return the resulting Unicode object. */
0931 
0932 PyAPI_FUNC(PyObject *) PyUnicode_Replace(
0933     PyObject *str,              /* String */
0934     PyObject *substr,           /* Substring to find */
0935     PyObject *replstr,          /* Substring to replace */
0936     Py_ssize_t maxcount         /* Max. number of replacements to apply;
0937                                    -1 = all */
0938     );
0939 
0940 /* Compare two strings and return -1, 0, 1 for less than, equal,
0941    greater than resp.
0942    Raise an exception and return -1 on error. */
0943 
0944 PyAPI_FUNC(int) PyUnicode_Compare(
0945     PyObject *left,             /* Left string */
0946     PyObject *right             /* Right string */
0947     );
0948 
0949 /* Compare a Unicode object with C string and return -1, 0, 1 for less than,
0950    equal, and greater than, respectively.  It is best to pass only
0951    ASCII-encoded strings, but the function interprets the input string as
0952    ISO-8859-1 if it contains non-ASCII characters.
0953    This function does not raise exceptions. */
0954 
0955 PyAPI_FUNC(int) PyUnicode_CompareWithASCIIString(
0956     PyObject *left,
0957     const char *right           /* ASCII-encoded string */
0958     );
0959 
0960 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030D0000
0961 /* Compare a Unicode object with UTF-8 encoded C string.
0962    Return 1 if they are equal, or 0 otherwise.
0963    This function does not raise exceptions. */
0964 
0965 PyAPI_FUNC(int) PyUnicode_EqualToUTF8(PyObject *, const char *);
0966 PyAPI_FUNC(int) PyUnicode_EqualToUTF8AndSize(PyObject *, const char *, Py_ssize_t);
0967 #endif
0968 
0969 /* Rich compare two strings and return one of the following:
0970 
0971    - NULL in case an exception was raised
0972    - Py_True or Py_False for successful comparisons
0973    - Py_NotImplemented in case the type combination is unknown
0974 
0975    Possible values for op:
0976 
0977      Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE
0978 
0979 */
0980 
0981 PyAPI_FUNC(PyObject *) PyUnicode_RichCompare(
0982     PyObject *left,             /* Left string */
0983     PyObject *right,            /* Right string */
0984     int op                      /* Operation: Py_EQ, Py_NE, Py_GT, etc. */
0985     );
0986 
0987 /* Apply an argument tuple or dictionary to a format string and return
0988    the resulting Unicode string. */
0989 
0990 PyAPI_FUNC(PyObject *) PyUnicode_Format(
0991     PyObject *format,           /* Format string */
0992     PyObject *args              /* Argument tuple or dictionary */
0993     );
0994 
0995 /* Checks whether element is contained in container and return 1/0
0996    accordingly.
0997 
0998    element has to coerce to a one element Unicode string. -1 is
0999    returned in case of an error. */
1000 
1001 PyAPI_FUNC(int) PyUnicode_Contains(
1002     PyObject *container,        /* Container string */
1003     PyObject *element           /* Element string */
1004     );
1005 
1006 /* Checks whether argument is a valid identifier. */
1007 
1008 PyAPI_FUNC(int) PyUnicode_IsIdentifier(PyObject *s);
1009 
1010 /* === Characters Type APIs =============================================== */
1011 
1012 #ifndef Py_LIMITED_API
1013 #  define Py_CPYTHON_UNICODEOBJECT_H
1014 #  include "cpython/unicodeobject.h"
1015 #  undef Py_CPYTHON_UNICODEOBJECT_H
1016 #endif
1017 
1018 #ifdef __cplusplus
1019 }
1020 #endif
1021 #endif /* !Py_UNICODEOBJECT_H */