Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 10:13:10

0001 // © 2016 and later: Unicode, Inc. and others.
0002 // License & terms of use: http://www.unicode.org/copyright.html
0003 /*
0004 **********************************************************************
0005 *   Copyright (C) 1999-2009, International Business Machines
0006 *   Corporation and others.  All Rights Reserved.
0007 **********************************************************************
0008  *
0009  *
0010  *   ucnv_err.h:
0011  */
0012 
0013 /**
0014  * \file
0015  * \brief C API: UConverter predefined error callbacks
0016  *
0017  *  <h2>Error Behaviour Functions</h2>
0018  *  Defines some error behaviour functions called by ucnv_{from,to}Unicode
0019  *  These are provided as part of ICU and many are stable, but they
0020  *  can also be considered only as an example of what can be done with
0021  *  callbacks.  You may of course write your own.
0022  *
0023  *  If you want to write your own, you may also find the functions from
0024  *  ucnv_cb.h useful when writing your own callbacks.
0025  *
0026  *  These functions, although public, should NEVER be called directly.
0027  *  They should be used as parameters to the ucnv_setFromUCallback
0028  *  and ucnv_setToUCallback functions, to set the behaviour of a converter
0029  *  when it encounters ILLEGAL/UNMAPPED/INVALID sequences.
0030  *
0031  *  usage example:  'STOP' doesn't need any context, but newContext
0032  *    could be set to something other than 'NULL' if needed. The available
0033  *    contexts in this header can modify the default behavior of the callback.
0034  *
0035  *  \code
0036  *  UErrorCode err = U_ZERO_ERROR;
0037  *  UConverter *myConverter = ucnv_open("ibm-949", &err);
0038  *  const void *oldContext;
0039  *  UConverterFromUCallback oldAction;
0040  *
0041  *
0042  *  if (U_SUCCESS(err))
0043  *  {
0044  *      ucnv_setFromUCallBack(myConverter,
0045  *                       UCNV_FROM_U_CALLBACK_STOP,
0046  *                       NULL,
0047  *                       &oldAction,
0048  *                       &oldContext,
0049  *                       &status);
0050  *  }
0051  *  \endcode
0052  *
0053  *  The code above tells "myConverter" to stop when it encounters an
0054  *  ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from
0055  *  Unicode -> Codepage. The behavior from Codepage to Unicode is not changed,
0056  *  and ucnv_setToUCallBack would need to be called in order to change
0057  *  that behavior too.
0058  *
0059  *  Here is an example with a context:
0060  *
0061  *  \code
0062  *  UErrorCode err = U_ZERO_ERROR;
0063  *  UConverter *myConverter = ucnv_open("ibm-949", &err);
0064  *  const void *oldContext;
0065  *  UConverterFromUCallback oldAction;
0066  *
0067  *
0068  *  if (U_SUCCESS(err))
0069  *  {
0070  *      ucnv_setToUCallBack(myConverter,
0071  *                       UCNV_TO_U_CALLBACK_SUBSTITUTE,
0072  *                       UCNV_SUB_STOP_ON_ILLEGAL,
0073  *                       &oldAction,
0074  *                       &oldContext,
0075  *                       &status);
0076  *  }
0077  *  \endcode
0078  *
0079  *  The code above tells "myConverter" to stop when it encounters an
0080  *  ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from
0081  *  Codepage -> Unicode. Any unmapped and legal characters will be
0082  *  substituted to be the default substitution character.
0083  */
0084 
0085 #ifndef UCNV_ERR_H
0086 #define UCNV_ERR_H
0087 
0088 #include "unicode/utypes.h"
0089 
0090 #if !UCONFIG_NO_CONVERSION
0091 
0092 /** Forward declaring the UConverter structure. @stable ICU 2.0 */
0093 struct UConverter;
0094 
0095 /** @stable ICU 2.0 */
0096 typedef struct UConverter UConverter;
0097 
0098 /**
0099  * FROM_U, TO_U context options for sub callback
0100  * @stable ICU 2.0
0101  */
0102 #define UCNV_SUB_STOP_ON_ILLEGAL "i"
0103 
0104 /**
0105  * FROM_U, TO_U context options for skip callback
0106  * @stable ICU 2.0
0107  */
0108 #define UCNV_SKIP_STOP_ON_ILLEGAL "i"
0109 
0110 /**
0111  * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to ICU (%UXXXX) 
0112  * @stable ICU 2.0
0113  */
0114 #define UCNV_ESCAPE_ICU       NULL
0115 /**
0116  * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to JAVA (\\uXXXX)
0117  * @stable ICU 2.0
0118  */
0119 #define UCNV_ESCAPE_JAVA      "J"
0120 /**
0121  * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to C (\\uXXXX \\UXXXXXXXX)
0122  * TO_U_CALLBACK_ESCAPE option to escape the character value according to C (\\xXXXX)
0123  * @stable ICU 2.0
0124  */
0125 #define UCNV_ESCAPE_C         "C"
0126 /**
0127  * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Decimal escape \htmlonly(&amp;#DDDD;)\endhtmlonly
0128  * TO_U_CALLBACK_ESCAPE context option to escape the character value according to XML Decimal escape \htmlonly(&amp;#DDDD;)\endhtmlonly
0129  * @stable ICU 2.0
0130  */
0131 #define UCNV_ESCAPE_XML_DEC   "D"
0132 /**
0133  * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Hex escape \htmlonly(&amp;#xXXXX;)\endhtmlonly
0134  * TO_U_CALLBACK_ESCAPE context option to escape the character value according to XML Hex escape \htmlonly(&amp;#xXXXX;)\endhtmlonly
0135  * @stable ICU 2.0
0136  */
0137 #define UCNV_ESCAPE_XML_HEX   "X"
0138 /**
0139  * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to Unicode (U+XXXXX)
0140  * @stable ICU 2.0
0141  */
0142 #define UCNV_ESCAPE_UNICODE   "U"
0143 
0144 /**
0145  * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to CSS2 conventions (\\HH..H<space>, that is,
0146  * a backslash, 1..6 hex digits, and a space)
0147  * @stable ICU 4.0
0148  */
0149 #define UCNV_ESCAPE_CSS2   "S"
0150 
0151 /** 
0152  * The process condition code to be used with the callbacks.  
0153  * Codes which are greater than UCNV_IRREGULAR should be 
0154  * passed on to any chained callbacks.
0155  * @stable ICU 2.0
0156  */
0157 typedef enum {
0158     UCNV_UNASSIGNED = 0,  /**< The code point is unassigned.
0159                              The error code U_INVALID_CHAR_FOUND will be set. */
0160     UCNV_ILLEGAL = 1,     /**< The code point is illegal. For example, 
0161                              \\x81\\x2E is illegal in SJIS because \\x2E
0162                              is not a valid trail byte for the \\x81 
0163                              lead byte.
0164                              Also, starting with Unicode 3.0.1, non-shortest byte sequences
0165                              in UTF-8 (like \\xC1\\xA1 instead of \\x61 for U+0061)
0166                              are also illegal, not just irregular.
0167                              The error code U_ILLEGAL_CHAR_FOUND will be set. */
0168     UCNV_IRREGULAR = 2,   /**< The codepoint is not a regular sequence in 
0169                              the encoding. For example, \\xED\\xA0\\x80..\\xED\\xBF\\xBF
0170                              are irregular UTF-8 byte sequences for single surrogate
0171                              code points.
0172                              The error code U_INVALID_CHAR_FOUND will be set. */
0173     UCNV_RESET = 3,       /**< The callback is called with this reason when a
0174                              'reset' has occurred. Callback should reset all
0175                              state. */
0176     UCNV_CLOSE = 4,        /**< Called when the converter is closed. The
0177                              callback should release any allocated memory.*/
0178     UCNV_CLONE = 5         /**< Called when ucnv_safeClone() is called on the
0179                               converter. the pointer available as the
0180                               'context' is an alias to the original converters'
0181                               context pointer. If the context must be owned
0182                               by the new converter, the callback must clone 
0183                               the data and call ucnv_setFromUCallback 
0184                               (or setToUCallback) with the correct pointer.
0185                               @stable ICU 2.2
0186                            */
0187 } UConverterCallbackReason;
0188 
0189 
0190 /**
0191  * The structure for the fromUnicode callback function parameter.
0192  * @stable ICU 2.0
0193  */
0194 typedef struct {
0195     uint16_t size;              /**< The size of this struct. @stable ICU 2.0 */
0196     UBool flush;                /**< The internal state of converter will be reset and data flushed if set to true. @stable ICU 2.0    */
0197     UConverter *converter;      /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0  */
0198     const UChar *source;        /**< Pointer to the source source buffer. @stable ICU 2.0    */
0199     const UChar *sourceLimit;   /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0    */
0200     char *target;               /**< Pointer to the target buffer. @stable ICU 2.0    */
0201     const char *targetLimit;    /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0     */
0202     int32_t *offsets;           /**< Pointer to the buffer that receives the offsets. *offset = blah ; offset++;. @stable ICU 2.0  */
0203 } UConverterFromUnicodeArgs;
0204 
0205 
0206 /**
0207  * The structure for the toUnicode callback function parameter.
0208  * @stable ICU 2.0
0209  */
0210 typedef struct {
0211     uint16_t size;              /**< The size of this struct   @stable ICU 2.0 */
0212     UBool flush;                /**< The internal state of converter will be reset and data flushed if set to true. @stable ICU 2.0   */
0213     UConverter *converter;      /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 */
0214     const char *source;         /**< Pointer to the source source buffer. @stable ICU 2.0    */
0215     const char *sourceLimit;    /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0    */
0216     UChar *target;              /**< Pointer to the target buffer. @stable ICU 2.0    */
0217     const UChar *targetLimit;   /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0     */
0218     int32_t *offsets;           /**< Pointer to the buffer that receives the offsets. *offset = blah ; offset++;. @stable ICU 2.0  */
0219 } UConverterToUnicodeArgs;
0220 
0221 
0222 /**
0223  * DO NOT CALL THIS FUNCTION DIRECTLY!
0224  * This From Unicode callback STOPS at the ILLEGAL_SEQUENCE,
0225  * returning the error code back to the caller immediately.
0226  *
0227  * @param context Pointer to the callback's private data
0228  * @param fromUArgs Information about the conversion in progress
0229  * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
0230  * @param length Size (in bytes) of the concerned codepage sequence
0231  * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
0232  * @param reason Defines the reason the callback was invoked
0233  * @param err This should always be set to a failure status prior to calling.
0234  * @stable ICU 2.0
0235  */
0236 U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_STOP (
0237                   const void *context,
0238                   UConverterFromUnicodeArgs *fromUArgs,
0239                   const UChar* codeUnits,
0240                   int32_t length,
0241                   UChar32 codePoint,
0242                   UConverterCallbackReason reason,
0243                   UErrorCode * err);
0244 
0245 
0246 
0247 /**
0248  * DO NOT CALL THIS FUNCTION DIRECTLY!
0249  * This To Unicode callback STOPS at the ILLEGAL_SEQUENCE,
0250  * returning the error code back to the caller immediately.
0251  *
0252  * @param context Pointer to the callback's private data
0253  * @param toUArgs Information about the conversion in progress
0254  * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
0255  * @param length Size (in bytes) of the concerned codepage sequence
0256  * @param reason Defines the reason the callback was invoked
0257  * @param err This should always be set to a failure status prior to calling.
0258  * @stable ICU 2.0
0259  */
0260 U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_STOP (
0261                   const void *context,
0262                   UConverterToUnicodeArgs *toUArgs,
0263                   const char* codeUnits,
0264                   int32_t length,
0265                   UConverterCallbackReason reason,
0266                   UErrorCode * err);
0267 
0268 /**
0269  * DO NOT CALL THIS FUNCTION DIRECTLY!
0270  * This From Unicode callback skips any ILLEGAL_SEQUENCE, or
0271  * skips only UNASSIGNED_SEQUENCE depending on the context parameter
0272  * simply ignoring those characters. 
0273  *
0274  * @param context  The function currently recognizes the callback options:
0275  *                 UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
0276  *                      returning the error code back to the caller immediately.
0277  *                 NULL: Skips any ILLEGAL_SEQUENCE
0278  * @param fromUArgs Information about the conversion in progress
0279  * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
0280  * @param length Size (in bytes) of the concerned codepage sequence
0281  * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
0282  * @param reason Defines the reason the callback was invoked
0283  * @param err Return value will be set to success if the callback was handled,
0284  *      otherwise this value will be set to a failure status.
0285  * @stable ICU 2.0
0286  */
0287 U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_SKIP (
0288                   const void *context,
0289                   UConverterFromUnicodeArgs *fromUArgs,
0290                   const UChar* codeUnits,
0291                   int32_t length,
0292                   UChar32 codePoint,
0293                   UConverterCallbackReason reason,
0294                   UErrorCode * err);
0295 
0296 /**
0297  * DO NOT CALL THIS FUNCTION DIRECTLY!
0298  * This From Unicode callback will Substitute the ILLEGAL SEQUENCE, or 
0299  * UNASSIGNED_SEQUENCE depending on context parameter, with the
0300  * current substitution string for the converter. This is the default
0301  * callback.
0302  *
0303  * @param context The function currently recognizes the callback options:
0304  *                 UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
0305  *                      returning the error code back to the caller immediately.
0306  *                 NULL: Substitutes any ILLEGAL_SEQUENCE
0307  * @param fromUArgs Information about the conversion in progress
0308  * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
0309  * @param length Size (in bytes) of the concerned codepage sequence
0310  * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
0311  * @param reason Defines the reason the callback was invoked
0312  * @param err Return value will be set to success if the callback was handled,
0313  *      otherwise this value will be set to a failure status.
0314  * @see ucnv_setSubstChars
0315  * @stable ICU 2.0
0316  */
0317 U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_SUBSTITUTE (
0318                   const void *context,
0319                   UConverterFromUnicodeArgs *fromUArgs,
0320                   const UChar* codeUnits,
0321                   int32_t length,
0322                   UChar32 codePoint,
0323                   UConverterCallbackReason reason,
0324                   UErrorCode * err);
0325 
0326 /**
0327  * DO NOT CALL THIS FUNCTION DIRECTLY!
0328  * This From Unicode callback will Substitute the ILLEGAL SEQUENCE with the
0329  * hexadecimal representation of the illegal codepoints
0330  *
0331  * @param context The function currently recognizes the callback options:
0332  *        <ul>
0333  *        <li>UCNV_ESCAPE_ICU: Substitutes the  ILLEGAL SEQUENCE with the hexadecimal 
0334  *          representation in the format  %UXXXX, e.g. "%uFFFE%u00AC%uC8FE"). 
0335  *          In the Event the converter doesn't support the characters {%,U}[A-F][0-9], 
0336  *          it will  substitute  the illegal sequence with the substitution characters.
0337  *          Note that  codeUnit(32bit int eg: unit of a surrogate pair) is represented as
0338  *          %UD84D%UDC56</li>
0339  *        <li>UCNV_ESCAPE_JAVA: Substitutes the  ILLEGAL SEQUENCE with the hexadecimal 
0340  *          representation in the format  \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE"). 
0341  *          In the Event the converter doesn't support the characters {\,u}[A-F][0-9], 
0342  *          it will  substitute  the illegal sequence with the substitution characters.
0343  *          Note that  codeUnit(32bit int eg: unit of a surrogate pair) is represented as
0344  *          \\uD84D\\uDC56</li>
0345  *        <li>UCNV_ESCAPE_C: Substitutes the  ILLEGAL SEQUENCE with the hexadecimal 
0346  *          representation in the format  \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE"). 
0347  *          In the Event the converter doesn't support the characters {\,u,U}[A-F][0-9], 
0348  *          it will  substitute  the illegal sequence with the substitution characters.
0349  *          Note that  codeUnit(32bit int eg: unit of a surrogate pair) is represented as
0350  *          \\U00023456</li>
0351  *        <li>UCNV_ESCAPE_XML_DEC: Substitutes the  ILLEGAL SEQUENCE with the decimal 
0352  *          representation in the format \htmlonly&amp;#DDDDDDDD;, e.g. "&amp;#65534;&amp;#172;&amp;#51454;")\endhtmlonly. 
0353  *          In the Event the converter doesn't support the characters {&amp;,#}[0-9], 
0354  *          it will  substitute  the illegal sequence with the substitution characters.
0355  *          Note that  codeUnit(32bit int eg: unit of a surrogate pair) is represented as
0356  *          &amp;#144470; and Zero padding is ignored.</li>
0357  *        <li>UCNV_ESCAPE_XML_HEX:Substitutes the  ILLEGAL SEQUENCE with the decimal 
0358  *          representation in the format \htmlonly&amp;#xXXXX; e.g. "&amp;#xFFFE;&amp;#x00AC;&amp;#xC8FE;")\endhtmlonly. 
0359  *          In the Event the converter doesn't support the characters {&,#,x}[0-9], 
0360  *          it will  substitute  the illegal sequence with the substitution characters.
0361  *          Note that  codeUnit(32bit int eg: unit of a surrogate pair) is represented as
0362  *          \htmlonly&amp;#x23456;\endhtmlonly</li>
0363  *        </ul>
0364  * @param fromUArgs Information about the conversion in progress
0365  * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
0366  * @param length Size (in bytes) of the concerned codepage sequence
0367  * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
0368  * @param reason Defines the reason the callback was invoked
0369  * @param err Return value will be set to success if the callback was handled,
0370  *      otherwise this value will be set to a failure status.
0371  * @stable ICU 2.0
0372  */
0373 U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_ESCAPE (
0374                   const void *context,
0375                   UConverterFromUnicodeArgs *fromUArgs,
0376                   const UChar* codeUnits,
0377                   int32_t length,
0378                   UChar32 codePoint,
0379                   UConverterCallbackReason reason,
0380                   UErrorCode * err);
0381 
0382 
0383 /**
0384  * DO NOT CALL THIS FUNCTION DIRECTLY!
0385  * This To Unicode callback skips any ILLEGAL_SEQUENCE, or
0386  * skips only UNASSIGNED_SEQUENCE depending on the context parameter
0387  * simply ignoring those characters. 
0388  *
0389  * @param context  The function currently recognizes the callback options:
0390  *                 UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
0391  *                      returning the error code back to the caller immediately.
0392  *                 NULL: Skips any ILLEGAL_SEQUENCE
0393  * @param toUArgs Information about the conversion in progress
0394  * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
0395  * @param length Size (in bytes) of the concerned codepage sequence
0396  * @param reason Defines the reason the callback was invoked
0397  * @param err Return value will be set to success if the callback was handled,
0398  *      otherwise this value will be set to a failure status.
0399  * @stable ICU 2.0
0400  */
0401 U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_SKIP (
0402                   const void *context,
0403                   UConverterToUnicodeArgs *toUArgs,
0404                   const char* codeUnits,
0405                   int32_t length,
0406                   UConverterCallbackReason reason,
0407                   UErrorCode * err);
0408 
0409 /**
0410  * DO NOT CALL THIS FUNCTION DIRECTLY!
0411  * This To Unicode callback will Substitute the ILLEGAL SEQUENCE,or 
0412  * UNASSIGNED_SEQUENCE depending on context parameter,  with the
0413  * Unicode substitution character, U+FFFD.
0414  *
0415  * @param context  The function currently recognizes the callback options:
0416  *                 UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
0417  *                      returning the error code back to the caller immediately.
0418  *                 NULL: Substitutes any ILLEGAL_SEQUENCE
0419  * @param toUArgs Information about the conversion in progress
0420  * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
0421  * @param length Size (in bytes) of the concerned codepage sequence
0422  * @param reason Defines the reason the callback was invoked
0423  * @param err Return value will be set to success if the callback was handled,
0424  *      otherwise this value will be set to a failure status.
0425  * @stable ICU 2.0
0426  */
0427 U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_SUBSTITUTE (
0428                   const void *context,
0429                   UConverterToUnicodeArgs *toUArgs,
0430                   const char* codeUnits,
0431                   int32_t length,
0432                   UConverterCallbackReason reason,
0433                   UErrorCode * err);
0434 
0435 /**
0436  * DO NOT CALL THIS FUNCTION DIRECTLY!
0437  * This To Unicode callback will Substitute the ILLEGAL SEQUENCE with the
0438  * hexadecimal representation of the illegal bytes
0439  *  (in the format  %XNN, e.g. "%XFF%X0A%XC8%X03").
0440  *
0441  * @param context This function currently recognizes the callback options:
0442  *      UCNV_ESCAPE_ICU, UCNV_ESCAPE_JAVA, UCNV_ESCAPE_C, UCNV_ESCAPE_XML_DEC,
0443  *      UCNV_ESCAPE_XML_HEX and UCNV_ESCAPE_UNICODE.
0444  * @param toUArgs Information about the conversion in progress
0445  * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
0446  * @param length Size (in bytes) of the concerned codepage sequence
0447  * @param reason Defines the reason the callback was invoked
0448  * @param err Return value will be set to success if the callback was handled,
0449  *      otherwise this value will be set to a failure status.
0450  * @stable ICU 2.0
0451  */
0452 
0453 U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_ESCAPE (
0454                   const void *context,
0455                   UConverterToUnicodeArgs *toUArgs,
0456                   const char* codeUnits,
0457                   int32_t length,
0458                   UConverterCallbackReason reason,
0459                   UErrorCode * err);
0460 
0461 #endif
0462 
0463 #endif
0464 
0465 /*UCNV_ERR_H*/