|
||||
File indexing completed on 2025-01-18 10:13:10
0001 // © 2016 and later: Unicode, Inc. and others. 0002 // License & terms of use: http://www.unicode.org/copyright.html 0003 /* 0004 ********************************************************************** 0005 * Copyright (C) 1999-2009, International Business Machines 0006 * Corporation and others. All Rights Reserved. 0007 ********************************************************************** 0008 * 0009 * 0010 * ucnv_err.h: 0011 */ 0012 0013 /** 0014 * \file 0015 * \brief C API: UConverter predefined error callbacks 0016 * 0017 * <h2>Error Behaviour Functions</h2> 0018 * Defines some error behaviour functions called by ucnv_{from,to}Unicode 0019 * These are provided as part of ICU and many are stable, but they 0020 * can also be considered only as an example of what can be done with 0021 * callbacks. You may of course write your own. 0022 * 0023 * If you want to write your own, you may also find the functions from 0024 * ucnv_cb.h useful when writing your own callbacks. 0025 * 0026 * These functions, although public, should NEVER be called directly. 0027 * They should be used as parameters to the ucnv_setFromUCallback 0028 * and ucnv_setToUCallback functions, to set the behaviour of a converter 0029 * when it encounters ILLEGAL/UNMAPPED/INVALID sequences. 0030 * 0031 * usage example: 'STOP' doesn't need any context, but newContext 0032 * could be set to something other than 'NULL' if needed. The available 0033 * contexts in this header can modify the default behavior of the callback. 0034 * 0035 * \code 0036 * UErrorCode err = U_ZERO_ERROR; 0037 * UConverter *myConverter = ucnv_open("ibm-949", &err); 0038 * const void *oldContext; 0039 * UConverterFromUCallback oldAction; 0040 * 0041 * 0042 * if (U_SUCCESS(err)) 0043 * { 0044 * ucnv_setFromUCallBack(myConverter, 0045 * UCNV_FROM_U_CALLBACK_STOP, 0046 * NULL, 0047 * &oldAction, 0048 * &oldContext, 0049 * &status); 0050 * } 0051 * \endcode 0052 * 0053 * The code above tells "myConverter" to stop when it encounters an 0054 * ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from 0055 * Unicode -> Codepage. The behavior from Codepage to Unicode is not changed, 0056 * and ucnv_setToUCallBack would need to be called in order to change 0057 * that behavior too. 0058 * 0059 * Here is an example with a context: 0060 * 0061 * \code 0062 * UErrorCode err = U_ZERO_ERROR; 0063 * UConverter *myConverter = ucnv_open("ibm-949", &err); 0064 * const void *oldContext; 0065 * UConverterFromUCallback oldAction; 0066 * 0067 * 0068 * if (U_SUCCESS(err)) 0069 * { 0070 * ucnv_setToUCallBack(myConverter, 0071 * UCNV_TO_U_CALLBACK_SUBSTITUTE, 0072 * UCNV_SUB_STOP_ON_ILLEGAL, 0073 * &oldAction, 0074 * &oldContext, 0075 * &status); 0076 * } 0077 * \endcode 0078 * 0079 * The code above tells "myConverter" to stop when it encounters an 0080 * ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from 0081 * Codepage -> Unicode. Any unmapped and legal characters will be 0082 * substituted to be the default substitution character. 0083 */ 0084 0085 #ifndef UCNV_ERR_H 0086 #define UCNV_ERR_H 0087 0088 #include "unicode/utypes.h" 0089 0090 #if !UCONFIG_NO_CONVERSION 0091 0092 /** Forward declaring the UConverter structure. @stable ICU 2.0 */ 0093 struct UConverter; 0094 0095 /** @stable ICU 2.0 */ 0096 typedef struct UConverter UConverter; 0097 0098 /** 0099 * FROM_U, TO_U context options for sub callback 0100 * @stable ICU 2.0 0101 */ 0102 #define UCNV_SUB_STOP_ON_ILLEGAL "i" 0103 0104 /** 0105 * FROM_U, TO_U context options for skip callback 0106 * @stable ICU 2.0 0107 */ 0108 #define UCNV_SKIP_STOP_ON_ILLEGAL "i" 0109 0110 /** 0111 * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to ICU (%UXXXX) 0112 * @stable ICU 2.0 0113 */ 0114 #define UCNV_ESCAPE_ICU NULL 0115 /** 0116 * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to JAVA (\\uXXXX) 0117 * @stable ICU 2.0 0118 */ 0119 #define UCNV_ESCAPE_JAVA "J" 0120 /** 0121 * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to C (\\uXXXX \\UXXXXXXXX) 0122 * TO_U_CALLBACK_ESCAPE option to escape the character value according to C (\\xXXXX) 0123 * @stable ICU 2.0 0124 */ 0125 #define UCNV_ESCAPE_C "C" 0126 /** 0127 * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly 0128 * TO_U_CALLBACK_ESCAPE context option to escape the character value according to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly 0129 * @stable ICU 2.0 0130 */ 0131 #define UCNV_ESCAPE_XML_DEC "D" 0132 /** 0133 * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly 0134 * TO_U_CALLBACK_ESCAPE context option to escape the character value according to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly 0135 * @stable ICU 2.0 0136 */ 0137 #define UCNV_ESCAPE_XML_HEX "X" 0138 /** 0139 * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to Unicode (U+XXXXX) 0140 * @stable ICU 2.0 0141 */ 0142 #define UCNV_ESCAPE_UNICODE "U" 0143 0144 /** 0145 * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to CSS2 conventions (\\HH..H<space>, that is, 0146 * a backslash, 1..6 hex digits, and a space) 0147 * @stable ICU 4.0 0148 */ 0149 #define UCNV_ESCAPE_CSS2 "S" 0150 0151 /** 0152 * The process condition code to be used with the callbacks. 0153 * Codes which are greater than UCNV_IRREGULAR should be 0154 * passed on to any chained callbacks. 0155 * @stable ICU 2.0 0156 */ 0157 typedef enum { 0158 UCNV_UNASSIGNED = 0, /**< The code point is unassigned. 0159 The error code U_INVALID_CHAR_FOUND will be set. */ 0160 UCNV_ILLEGAL = 1, /**< The code point is illegal. For example, 0161 \\x81\\x2E is illegal in SJIS because \\x2E 0162 is not a valid trail byte for the \\x81 0163 lead byte. 0164 Also, starting with Unicode 3.0.1, non-shortest byte sequences 0165 in UTF-8 (like \\xC1\\xA1 instead of \\x61 for U+0061) 0166 are also illegal, not just irregular. 0167 The error code U_ILLEGAL_CHAR_FOUND will be set. */ 0168 UCNV_IRREGULAR = 2, /**< The codepoint is not a regular sequence in 0169 the encoding. For example, \\xED\\xA0\\x80..\\xED\\xBF\\xBF 0170 are irregular UTF-8 byte sequences for single surrogate 0171 code points. 0172 The error code U_INVALID_CHAR_FOUND will be set. */ 0173 UCNV_RESET = 3, /**< The callback is called with this reason when a 0174 'reset' has occurred. Callback should reset all 0175 state. */ 0176 UCNV_CLOSE = 4, /**< Called when the converter is closed. The 0177 callback should release any allocated memory.*/ 0178 UCNV_CLONE = 5 /**< Called when ucnv_safeClone() is called on the 0179 converter. the pointer available as the 0180 'context' is an alias to the original converters' 0181 context pointer. If the context must be owned 0182 by the new converter, the callback must clone 0183 the data and call ucnv_setFromUCallback 0184 (or setToUCallback) with the correct pointer. 0185 @stable ICU 2.2 0186 */ 0187 } UConverterCallbackReason; 0188 0189 0190 /** 0191 * The structure for the fromUnicode callback function parameter. 0192 * @stable ICU 2.0 0193 */ 0194 typedef struct { 0195 uint16_t size; /**< The size of this struct. @stable ICU 2.0 */ 0196 UBool flush; /**< The internal state of converter will be reset and data flushed if set to true. @stable ICU 2.0 */ 0197 UConverter *converter; /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 */ 0198 const UChar *source; /**< Pointer to the source source buffer. @stable ICU 2.0 */ 0199 const UChar *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */ 0200 char *target; /**< Pointer to the target buffer. @stable ICU 2.0 */ 0201 const char *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */ 0202 int32_t *offsets; /**< Pointer to the buffer that receives the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */ 0203 } UConverterFromUnicodeArgs; 0204 0205 0206 /** 0207 * The structure for the toUnicode callback function parameter. 0208 * @stable ICU 2.0 0209 */ 0210 typedef struct { 0211 uint16_t size; /**< The size of this struct @stable ICU 2.0 */ 0212 UBool flush; /**< The internal state of converter will be reset and data flushed if set to true. @stable ICU 2.0 */ 0213 UConverter *converter; /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 */ 0214 const char *source; /**< Pointer to the source source buffer. @stable ICU 2.0 */ 0215 const char *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */ 0216 UChar *target; /**< Pointer to the target buffer. @stable ICU 2.0 */ 0217 const UChar *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */ 0218 int32_t *offsets; /**< Pointer to the buffer that receives the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */ 0219 } UConverterToUnicodeArgs; 0220 0221 0222 /** 0223 * DO NOT CALL THIS FUNCTION DIRECTLY! 0224 * This From Unicode callback STOPS at the ILLEGAL_SEQUENCE, 0225 * returning the error code back to the caller immediately. 0226 * 0227 * @param context Pointer to the callback's private data 0228 * @param fromUArgs Information about the conversion in progress 0229 * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence 0230 * @param length Size (in bytes) of the concerned codepage sequence 0231 * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint. 0232 * @param reason Defines the reason the callback was invoked 0233 * @param err This should always be set to a failure status prior to calling. 0234 * @stable ICU 2.0 0235 */ 0236 U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_STOP ( 0237 const void *context, 0238 UConverterFromUnicodeArgs *fromUArgs, 0239 const UChar* codeUnits, 0240 int32_t length, 0241 UChar32 codePoint, 0242 UConverterCallbackReason reason, 0243 UErrorCode * err); 0244 0245 0246 0247 /** 0248 * DO NOT CALL THIS FUNCTION DIRECTLY! 0249 * This To Unicode callback STOPS at the ILLEGAL_SEQUENCE, 0250 * returning the error code back to the caller immediately. 0251 * 0252 * @param context Pointer to the callback's private data 0253 * @param toUArgs Information about the conversion in progress 0254 * @param codeUnits Points to 'length' bytes of the concerned codepage sequence 0255 * @param length Size (in bytes) of the concerned codepage sequence 0256 * @param reason Defines the reason the callback was invoked 0257 * @param err This should always be set to a failure status prior to calling. 0258 * @stable ICU 2.0 0259 */ 0260 U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_STOP ( 0261 const void *context, 0262 UConverterToUnicodeArgs *toUArgs, 0263 const char* codeUnits, 0264 int32_t length, 0265 UConverterCallbackReason reason, 0266 UErrorCode * err); 0267 0268 /** 0269 * DO NOT CALL THIS FUNCTION DIRECTLY! 0270 * This From Unicode callback skips any ILLEGAL_SEQUENCE, or 0271 * skips only UNASSIGNED_SEQUENCE depending on the context parameter 0272 * simply ignoring those characters. 0273 * 0274 * @param context The function currently recognizes the callback options: 0275 * UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE, 0276 * returning the error code back to the caller immediately. 0277 * NULL: Skips any ILLEGAL_SEQUENCE 0278 * @param fromUArgs Information about the conversion in progress 0279 * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence 0280 * @param length Size (in bytes) of the concerned codepage sequence 0281 * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint. 0282 * @param reason Defines the reason the callback was invoked 0283 * @param err Return value will be set to success if the callback was handled, 0284 * otherwise this value will be set to a failure status. 0285 * @stable ICU 2.0 0286 */ 0287 U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_SKIP ( 0288 const void *context, 0289 UConverterFromUnicodeArgs *fromUArgs, 0290 const UChar* codeUnits, 0291 int32_t length, 0292 UChar32 codePoint, 0293 UConverterCallbackReason reason, 0294 UErrorCode * err); 0295 0296 /** 0297 * DO NOT CALL THIS FUNCTION DIRECTLY! 0298 * This From Unicode callback will Substitute the ILLEGAL SEQUENCE, or 0299 * UNASSIGNED_SEQUENCE depending on context parameter, with the 0300 * current substitution string for the converter. This is the default 0301 * callback. 0302 * 0303 * @param context The function currently recognizes the callback options: 0304 * UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE, 0305 * returning the error code back to the caller immediately. 0306 * NULL: Substitutes any ILLEGAL_SEQUENCE 0307 * @param fromUArgs Information about the conversion in progress 0308 * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence 0309 * @param length Size (in bytes) of the concerned codepage sequence 0310 * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint. 0311 * @param reason Defines the reason the callback was invoked 0312 * @param err Return value will be set to success if the callback was handled, 0313 * otherwise this value will be set to a failure status. 0314 * @see ucnv_setSubstChars 0315 * @stable ICU 2.0 0316 */ 0317 U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_SUBSTITUTE ( 0318 const void *context, 0319 UConverterFromUnicodeArgs *fromUArgs, 0320 const UChar* codeUnits, 0321 int32_t length, 0322 UChar32 codePoint, 0323 UConverterCallbackReason reason, 0324 UErrorCode * err); 0325 0326 /** 0327 * DO NOT CALL THIS FUNCTION DIRECTLY! 0328 * This From Unicode callback will Substitute the ILLEGAL SEQUENCE with the 0329 * hexadecimal representation of the illegal codepoints 0330 * 0331 * @param context The function currently recognizes the callback options: 0332 * <ul> 0333 * <li>UCNV_ESCAPE_ICU: Substitutes the ILLEGAL SEQUENCE with the hexadecimal 0334 * representation in the format %UXXXX, e.g. "%uFFFE%u00AC%uC8FE"). 0335 * In the Event the converter doesn't support the characters {%,U}[A-F][0-9], 0336 * it will substitute the illegal sequence with the substitution characters. 0337 * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as 0338 * %UD84D%UDC56</li> 0339 * <li>UCNV_ESCAPE_JAVA: Substitutes the ILLEGAL SEQUENCE with the hexadecimal 0340 * representation in the format \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE"). 0341 * In the Event the converter doesn't support the characters {\,u}[A-F][0-9], 0342 * it will substitute the illegal sequence with the substitution characters. 0343 * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as 0344 * \\uD84D\\uDC56</li> 0345 * <li>UCNV_ESCAPE_C: Substitutes the ILLEGAL SEQUENCE with the hexadecimal 0346 * representation in the format \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE"). 0347 * In the Event the converter doesn't support the characters {\,u,U}[A-F][0-9], 0348 * it will substitute the illegal sequence with the substitution characters. 0349 * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as 0350 * \\U00023456</li> 0351 * <li>UCNV_ESCAPE_XML_DEC: Substitutes the ILLEGAL SEQUENCE with the decimal 0352 * representation in the format \htmlonly&#DDDDDDDD;, e.g. "&#65534;&#172;&#51454;")\endhtmlonly. 0353 * In the Event the converter doesn't support the characters {&,#}[0-9], 0354 * it will substitute the illegal sequence with the substitution characters. 0355 * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as 0356 * &#144470; and Zero padding is ignored.</li> 0357 * <li>UCNV_ESCAPE_XML_HEX:Substitutes the ILLEGAL SEQUENCE with the decimal 0358 * representation in the format \htmlonly&#xXXXX; e.g. "&#xFFFE;&#x00AC;&#xC8FE;")\endhtmlonly. 0359 * In the Event the converter doesn't support the characters {&,#,x}[0-9], 0360 * it will substitute the illegal sequence with the substitution characters. 0361 * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as 0362 * \htmlonly&#x23456;\endhtmlonly</li> 0363 * </ul> 0364 * @param fromUArgs Information about the conversion in progress 0365 * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence 0366 * @param length Size (in bytes) of the concerned codepage sequence 0367 * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint. 0368 * @param reason Defines the reason the callback was invoked 0369 * @param err Return value will be set to success if the callback was handled, 0370 * otherwise this value will be set to a failure status. 0371 * @stable ICU 2.0 0372 */ 0373 U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_ESCAPE ( 0374 const void *context, 0375 UConverterFromUnicodeArgs *fromUArgs, 0376 const UChar* codeUnits, 0377 int32_t length, 0378 UChar32 codePoint, 0379 UConverterCallbackReason reason, 0380 UErrorCode * err); 0381 0382 0383 /** 0384 * DO NOT CALL THIS FUNCTION DIRECTLY! 0385 * This To Unicode callback skips any ILLEGAL_SEQUENCE, or 0386 * skips only UNASSIGNED_SEQUENCE depending on the context parameter 0387 * simply ignoring those characters. 0388 * 0389 * @param context The function currently recognizes the callback options: 0390 * UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE, 0391 * returning the error code back to the caller immediately. 0392 * NULL: Skips any ILLEGAL_SEQUENCE 0393 * @param toUArgs Information about the conversion in progress 0394 * @param codeUnits Points to 'length' bytes of the concerned codepage sequence 0395 * @param length Size (in bytes) of the concerned codepage sequence 0396 * @param reason Defines the reason the callback was invoked 0397 * @param err Return value will be set to success if the callback was handled, 0398 * otherwise this value will be set to a failure status. 0399 * @stable ICU 2.0 0400 */ 0401 U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_SKIP ( 0402 const void *context, 0403 UConverterToUnicodeArgs *toUArgs, 0404 const char* codeUnits, 0405 int32_t length, 0406 UConverterCallbackReason reason, 0407 UErrorCode * err); 0408 0409 /** 0410 * DO NOT CALL THIS FUNCTION DIRECTLY! 0411 * This To Unicode callback will Substitute the ILLEGAL SEQUENCE,or 0412 * UNASSIGNED_SEQUENCE depending on context parameter, with the 0413 * Unicode substitution character, U+FFFD. 0414 * 0415 * @param context The function currently recognizes the callback options: 0416 * UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE, 0417 * returning the error code back to the caller immediately. 0418 * NULL: Substitutes any ILLEGAL_SEQUENCE 0419 * @param toUArgs Information about the conversion in progress 0420 * @param codeUnits Points to 'length' bytes of the concerned codepage sequence 0421 * @param length Size (in bytes) of the concerned codepage sequence 0422 * @param reason Defines the reason the callback was invoked 0423 * @param err Return value will be set to success if the callback was handled, 0424 * otherwise this value will be set to a failure status. 0425 * @stable ICU 2.0 0426 */ 0427 U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_SUBSTITUTE ( 0428 const void *context, 0429 UConverterToUnicodeArgs *toUArgs, 0430 const char* codeUnits, 0431 int32_t length, 0432 UConverterCallbackReason reason, 0433 UErrorCode * err); 0434 0435 /** 0436 * DO NOT CALL THIS FUNCTION DIRECTLY! 0437 * This To Unicode callback will Substitute the ILLEGAL SEQUENCE with the 0438 * hexadecimal representation of the illegal bytes 0439 * (in the format %XNN, e.g. "%XFF%X0A%XC8%X03"). 0440 * 0441 * @param context This function currently recognizes the callback options: 0442 * UCNV_ESCAPE_ICU, UCNV_ESCAPE_JAVA, UCNV_ESCAPE_C, UCNV_ESCAPE_XML_DEC, 0443 * UCNV_ESCAPE_XML_HEX and UCNV_ESCAPE_UNICODE. 0444 * @param toUArgs Information about the conversion in progress 0445 * @param codeUnits Points to 'length' bytes of the concerned codepage sequence 0446 * @param length Size (in bytes) of the concerned codepage sequence 0447 * @param reason Defines the reason the callback was invoked 0448 * @param err Return value will be set to success if the callback was handled, 0449 * otherwise this value will be set to a failure status. 0450 * @stable ICU 2.0 0451 */ 0452 0453 U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_ESCAPE ( 0454 const void *context, 0455 UConverterToUnicodeArgs *toUArgs, 0456 const char* codeUnits, 0457 int32_t length, 0458 UConverterCallbackReason reason, 0459 UErrorCode * err); 0460 0461 #endif 0462 0463 #endif 0464 0465 /*UCNV_ERR_H*/
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |