|
|
|||
File indexing completed on 2026-03-28 08:32:44
0001 // © 2016 and later: Unicode, Inc. and others. 0002 // License & terms of use: http://www.unicode.org/copyright.html 0003 /* 0004 ******************************************************************************* 0005 * Copyright (C) 2010-2012, International Business Machines 0006 * Corporation and others. All Rights Reserved. 0007 ******************************************************************************* 0008 * file name: idna.h 0009 * encoding: UTF-8 0010 * tab size: 8 (not used) 0011 * indentation:4 0012 * 0013 * created on: 2010mar05 0014 * created by: Markus W. Scherer 0015 */ 0016 0017 #ifndef __IDNA_H__ 0018 #define __IDNA_H__ 0019 0020 /** 0021 * \file 0022 * \brief C++ API: Internationalizing Domain Names in Applications (IDNA) 0023 */ 0024 0025 #include "unicode/utypes.h" 0026 0027 #if U_SHOW_CPLUSPLUS_API 0028 0029 #if !UCONFIG_NO_IDNA 0030 0031 #include "unicode/bytestream.h" 0032 #include "unicode/stringpiece.h" 0033 #include "unicode/uidna.h" 0034 #include "unicode/unistr.h" 0035 0036 U_NAMESPACE_BEGIN 0037 0038 class IDNAInfo; 0039 0040 /** 0041 * Abstract base class for IDNA processing. 0042 * See http://www.unicode.org/reports/tr46/ 0043 * and http://www.ietf.org/rfc/rfc3490.txt 0044 * 0045 * The IDNA class is not intended for public subclassing. 0046 * 0047 * This C++ API currently only implements UTS #46. 0048 * The uidna.h C API implements both UTS #46 (functions using UIDNA service object) 0049 * and IDNA2003 (functions that do not use a service object). 0050 * @stable ICU 4.6 0051 */ 0052 class U_COMMON_API IDNA : public UObject { 0053 public: 0054 /** 0055 * Destructor. 0056 * @stable ICU 4.6 0057 */ 0058 ~IDNA(); 0059 0060 /** 0061 * Returns an IDNA instance which implements UTS #46. 0062 * Returns an unmodifiable instance, owned by the caller. 0063 * Cache it for multiple operations, and delete it when done. 0064 * The instance is thread-safe, that is, it can be used concurrently. 0065 * 0066 * UTS #46 defines Unicode IDNA Compatibility Processing, 0067 * updated to the latest version of Unicode and compatible with both 0068 * IDNA2003 and IDNA2008. 0069 * 0070 * The worker functions use transitional processing, including deviation mappings, 0071 * unless UIDNA_NONTRANSITIONAL_TO_ASCII or UIDNA_NONTRANSITIONAL_TO_UNICODE 0072 * is used in which case the deviation characters are passed through without change. 0073 * <b>Unicode 15.1 UTS #46 deprecated transitional processing.</b> 0074 * 0075 * Disallowed characters are mapped to U+FFFD. 0076 * 0077 * For available options see the uidna.h header. 0078 * Operations with the UTS #46 instance do not support the 0079 * UIDNA_ALLOW_UNASSIGNED option. 0080 * 0081 * By default, the UTS #46 implementation allows all ASCII characters (as valid or mapped). 0082 * When the UIDNA_USE_STD3_RULES option is used, ASCII characters other than 0083 * letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD. 0084 * 0085 * @param options Bit set to modify the processing and error checking. 0086 * These should include UIDNA_DEFAULT, or 0087 * UIDNA_NONTRANSITIONAL_TO_ASCII | UIDNA_NONTRANSITIONAL_TO_UNICODE. 0088 * See option bit set values in uidna.h. 0089 * @param errorCode Standard ICU error code. Its input value must 0090 * pass the U_SUCCESS() test, or else the function returns 0091 * immediately. Check for U_FAILURE() on output or use with 0092 * function chaining. (See User Guide for details.) 0093 * @return the UTS #46 IDNA instance, if successful 0094 * @stable ICU 4.6 0095 */ 0096 static IDNA * 0097 createUTS46Instance(uint32_t options, UErrorCode &errorCode); 0098 0099 /** 0100 * Converts a single domain name label into its ASCII form for DNS lookup. 0101 * If any processing step fails, then info.hasErrors() will be true and 0102 * the result might not be an ASCII string. 0103 * The label might be modified according to the types of errors. 0104 * Labels with severe errors will be left in (or turned into) their Unicode form. 0105 * 0106 * The UErrorCode indicates an error only in exceptional cases, 0107 * such as a U_MEMORY_ALLOCATION_ERROR. 0108 * 0109 * @param label Input domain name label 0110 * @param dest Destination string object 0111 * @param info Output container of IDNA processing details. 0112 * @param errorCode Standard ICU error code. Its input value must 0113 * pass the U_SUCCESS() test, or else the function returns 0114 * immediately. Check for U_FAILURE() on output or use with 0115 * function chaining. (See User Guide for details.) 0116 * @return dest 0117 * @stable ICU 4.6 0118 */ 0119 virtual UnicodeString & 0120 labelToASCII(const UnicodeString &label, UnicodeString &dest, 0121 IDNAInfo &info, UErrorCode &errorCode) const = 0; 0122 0123 /** 0124 * Converts a single domain name label into its Unicode form for human-readable display. 0125 * If any processing step fails, then info.hasErrors() will be true. 0126 * The label might be modified according to the types of errors. 0127 * 0128 * The UErrorCode indicates an error only in exceptional cases, 0129 * such as a U_MEMORY_ALLOCATION_ERROR. 0130 * 0131 * @param label Input domain name label 0132 * @param dest Destination string object 0133 * @param info Output container of IDNA processing details. 0134 * @param errorCode Standard ICU error code. Its input value must 0135 * pass the U_SUCCESS() test, or else the function returns 0136 * immediately. Check for U_FAILURE() on output or use with 0137 * function chaining. (See User Guide for details.) 0138 * @return dest 0139 * @stable ICU 4.6 0140 */ 0141 virtual UnicodeString & 0142 labelToUnicode(const UnicodeString &label, UnicodeString &dest, 0143 IDNAInfo &info, UErrorCode &errorCode) const = 0; 0144 0145 /** 0146 * Converts a whole domain name into its ASCII form for DNS lookup. 0147 * If any processing step fails, then info.hasErrors() will be true and 0148 * the result might not be an ASCII string. 0149 * The domain name might be modified according to the types of errors. 0150 * Labels with severe errors will be left in (or turned into) their Unicode form. 0151 * 0152 * The UErrorCode indicates an error only in exceptional cases, 0153 * such as a U_MEMORY_ALLOCATION_ERROR. 0154 * 0155 * @param name Input domain name 0156 * @param dest Destination string object 0157 * @param info Output container of IDNA processing details. 0158 * @param errorCode Standard ICU error code. Its input value must 0159 * pass the U_SUCCESS() test, or else the function returns 0160 * immediately. Check for U_FAILURE() on output or use with 0161 * function chaining. (See User Guide for details.) 0162 * @return dest 0163 * @stable ICU 4.6 0164 */ 0165 virtual UnicodeString & 0166 nameToASCII(const UnicodeString &name, UnicodeString &dest, 0167 IDNAInfo &info, UErrorCode &errorCode) const = 0; 0168 0169 /** 0170 * Converts a whole domain name into its Unicode form for human-readable display. 0171 * If any processing step fails, then info.hasErrors() will be true. 0172 * The domain name might be modified according to the types of errors. 0173 * 0174 * The UErrorCode indicates an error only in exceptional cases, 0175 * such as a U_MEMORY_ALLOCATION_ERROR. 0176 * 0177 * @param name Input domain name 0178 * @param dest Destination string object 0179 * @param info Output container of IDNA processing details. 0180 * @param errorCode Standard ICU error code. Its input value must 0181 * pass the U_SUCCESS() test, or else the function returns 0182 * immediately. Check for U_FAILURE() on output or use with 0183 * function chaining. (See User Guide for details.) 0184 * @return dest 0185 * @stable ICU 4.6 0186 */ 0187 virtual UnicodeString & 0188 nameToUnicode(const UnicodeString &name, UnicodeString &dest, 0189 IDNAInfo &info, UErrorCode &errorCode) const = 0; 0190 0191 // UTF-8 versions of the processing methods ---------------------------- *** 0192 0193 /** 0194 * Converts a single domain name label into its ASCII form for DNS lookup. 0195 * UTF-8 version of labelToASCII(), same behavior. 0196 * 0197 * @param label Input domain name label 0198 * @param dest Destination byte sink; Flush()ed if successful 0199 * @param info Output container of IDNA processing details. 0200 * @param errorCode Standard ICU error code. Its input value must 0201 * pass the U_SUCCESS() test, or else the function returns 0202 * immediately. Check for U_FAILURE() on output or use with 0203 * function chaining. (See User Guide for details.) 0204 * @return dest 0205 * @stable ICU 4.6 0206 */ 0207 virtual void 0208 labelToASCII_UTF8(StringPiece label, ByteSink &dest, 0209 IDNAInfo &info, UErrorCode &errorCode) const; 0210 0211 /** 0212 * Converts a single domain name label into its Unicode form for human-readable display. 0213 * UTF-8 version of labelToUnicode(), same behavior. 0214 * 0215 * @param label Input domain name label 0216 * @param dest Destination byte sink; Flush()ed if successful 0217 * @param info Output container of IDNA processing details. 0218 * @param errorCode Standard ICU error code. Its input value must 0219 * pass the U_SUCCESS() test, or else the function returns 0220 * immediately. Check for U_FAILURE() on output or use with 0221 * function chaining. (See User Guide for details.) 0222 * @return dest 0223 * @stable ICU 4.6 0224 */ 0225 virtual void 0226 labelToUnicodeUTF8(StringPiece label, ByteSink &dest, 0227 IDNAInfo &info, UErrorCode &errorCode) const; 0228 0229 /** 0230 * Converts a whole domain name into its ASCII form for DNS lookup. 0231 * UTF-8 version of nameToASCII(), same behavior. 0232 * 0233 * @param name Input domain name 0234 * @param dest Destination byte sink; Flush()ed if successful 0235 * @param info Output container of IDNA processing details. 0236 * @param errorCode Standard ICU error code. Its input value must 0237 * pass the U_SUCCESS() test, or else the function returns 0238 * immediately. Check for U_FAILURE() on output or use with 0239 * function chaining. (See User Guide for details.) 0240 * @return dest 0241 * @stable ICU 4.6 0242 */ 0243 virtual void 0244 nameToASCII_UTF8(StringPiece name, ByteSink &dest, 0245 IDNAInfo &info, UErrorCode &errorCode) const; 0246 0247 /** 0248 * Converts a whole domain name into its Unicode form for human-readable display. 0249 * UTF-8 version of nameToUnicode(), same behavior. 0250 * 0251 * @param name Input domain name 0252 * @param dest Destination byte sink; Flush()ed if successful 0253 * @param info Output container of IDNA processing details. 0254 * @param errorCode Standard ICU error code. Its input value must 0255 * pass the U_SUCCESS() test, or else the function returns 0256 * immediately. Check for U_FAILURE() on output or use with 0257 * function chaining. (See User Guide for details.) 0258 * @return dest 0259 * @stable ICU 4.6 0260 */ 0261 virtual void 0262 nameToUnicodeUTF8(StringPiece name, ByteSink &dest, 0263 IDNAInfo &info, UErrorCode &errorCode) const; 0264 }; 0265 0266 class UTS46; 0267 0268 /** 0269 * Output container for IDNA processing errors. 0270 * The IDNAInfo class is not suitable for subclassing. 0271 * @stable ICU 4.6 0272 */ 0273 class U_COMMON_API IDNAInfo : public UMemory { 0274 public: 0275 /** 0276 * Constructor for stack allocation. 0277 * @stable ICU 4.6 0278 */ 0279 IDNAInfo() : errors(0), labelErrors(0), isTransDiff(false), isBiDi(false), isOkBiDi(true) {} 0280 /** 0281 * Were there IDNA processing errors? 0282 * @return true if there were processing errors 0283 * @stable ICU 4.6 0284 */ 0285 UBool hasErrors() const { return errors!=0; } 0286 /** 0287 * Returns a bit set indicating IDNA processing errors. 0288 * See UIDNA_ERROR_... constants in uidna.h. 0289 * @return bit set of processing errors 0290 * @stable ICU 4.6 0291 */ 0292 uint32_t getErrors() const { return errors; } 0293 /** 0294 * Returns true if transitional and nontransitional processing produce different results. 0295 * This is the case when the input label or domain name contains 0296 * one or more deviation characters outside a Punycode label (see UTS #46). 0297 * <ul> 0298 * <li>With nontransitional processing, such characters are 0299 * copied to the destination string. 0300 * <li>With transitional processing, such characters are 0301 * mapped (sharp s/sigma) or removed (joiner/nonjoiner). 0302 * </ul> 0303 * @return true if transitional and nontransitional processing produce different results 0304 * @stable ICU 4.6 0305 */ 0306 UBool isTransitionalDifferent() const { return isTransDiff; } 0307 0308 private: 0309 friend class UTS46; 0310 0311 IDNAInfo(const IDNAInfo &other) = delete; // no copying 0312 IDNAInfo &operator=(const IDNAInfo &other) = delete; // no copying 0313 0314 void reset() { 0315 errors=labelErrors=0; 0316 isTransDiff=false; 0317 isBiDi=false; 0318 isOkBiDi=true; 0319 } 0320 0321 uint32_t errors, labelErrors; 0322 UBool isTransDiff; 0323 UBool isBiDi; 0324 UBool isOkBiDi; 0325 }; 0326 0327 U_NAMESPACE_END 0328 0329 #endif // UCONFIG_NO_IDNA 0330 0331 #endif /* U_SHOW_CPLUSPLUS_API */ 0332 0333 #endif // __IDNA_H__
| [ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
|
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |
|