Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-03-28 08:32:44

0001 // © 2016 and later: Unicode, Inc. and others.
0002 // License & terms of use: http://www.unicode.org/copyright.html
0003 /*
0004 *******************************************************************************
0005 *   Copyright (C) 2010-2012, International Business Machines
0006 *   Corporation and others.  All Rights Reserved.
0007 *******************************************************************************
0008 *   file name:  idna.h
0009 *   encoding:   UTF-8
0010 *   tab size:   8 (not used)
0011 *   indentation:4
0012 *
0013 *   created on: 2010mar05
0014 *   created by: Markus W. Scherer
0015 */
0016 
0017 #ifndef __IDNA_H__
0018 #define __IDNA_H__
0019 
0020 /**
0021  * \file
0022  * \brief C++ API: Internationalizing Domain Names in Applications (IDNA)
0023  */
0024 
0025 #include "unicode/utypes.h"
0026 
0027 #if U_SHOW_CPLUSPLUS_API
0028 
0029 #if !UCONFIG_NO_IDNA
0030 
0031 #include "unicode/bytestream.h"
0032 #include "unicode/stringpiece.h"
0033 #include "unicode/uidna.h"
0034 #include "unicode/unistr.h"
0035 
0036 U_NAMESPACE_BEGIN
0037 
0038 class IDNAInfo;
0039 
0040 /**
0041  * Abstract base class for IDNA processing.
0042  * See http://www.unicode.org/reports/tr46/
0043  * and http://www.ietf.org/rfc/rfc3490.txt
0044  *
0045  * The IDNA class is not intended for public subclassing.
0046  *
0047  * This C++ API currently only implements UTS #46.
0048  * The uidna.h C API implements both UTS #46 (functions using UIDNA service object)
0049  * and IDNA2003 (functions that do not use a service object).
0050  * @stable ICU 4.6
0051  */
0052 class U_COMMON_API IDNA : public UObject {
0053 public:
0054     /**
0055      * Destructor.
0056      * @stable ICU 4.6
0057      */
0058     ~IDNA();
0059 
0060     /**
0061      * Returns an IDNA instance which implements UTS #46.
0062      * Returns an unmodifiable instance, owned by the caller.
0063      * Cache it for multiple operations, and delete it when done.
0064      * The instance is thread-safe, that is, it can be used concurrently.
0065      *
0066      * UTS #46 defines Unicode IDNA Compatibility Processing,
0067      * updated to the latest version of Unicode and compatible with both
0068      * IDNA2003 and IDNA2008.
0069      *
0070      * The worker functions use transitional processing, including deviation mappings,
0071      * unless UIDNA_NONTRANSITIONAL_TO_ASCII or UIDNA_NONTRANSITIONAL_TO_UNICODE
0072      * is used in which case the deviation characters are passed through without change.
0073      * <b>Unicode 15.1 UTS #46 deprecated transitional processing.</b>
0074      *
0075      * Disallowed characters are mapped to U+FFFD.
0076      *
0077      * For available options see the uidna.h header.
0078      * Operations with the UTS #46 instance do not support the
0079      * UIDNA_ALLOW_UNASSIGNED option.
0080      *
0081      * By default, the UTS #46 implementation allows all ASCII characters (as valid or mapped).
0082      * When the UIDNA_USE_STD3_RULES option is used, ASCII characters other than
0083      * letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD.
0084      *
0085      * @param options Bit set to modify the processing and error checking.
0086      *                These should include UIDNA_DEFAULT, or
0087      *                UIDNA_NONTRANSITIONAL_TO_ASCII | UIDNA_NONTRANSITIONAL_TO_UNICODE.
0088      *                See option bit set values in uidna.h.
0089      * @param errorCode Standard ICU error code. Its input value must
0090      *                  pass the U_SUCCESS() test, or else the function returns
0091      *                  immediately. Check for U_FAILURE() on output or use with
0092      *                  function chaining. (See User Guide for details.)
0093      * @return the UTS #46 IDNA instance, if successful
0094      * @stable ICU 4.6
0095      */
0096     static IDNA *
0097     createUTS46Instance(uint32_t options, UErrorCode &errorCode);
0098 
0099     /**
0100      * Converts a single domain name label into its ASCII form for DNS lookup.
0101      * If any processing step fails, then info.hasErrors() will be true and
0102      * the result might not be an ASCII string.
0103      * The label might be modified according to the types of errors.
0104      * Labels with severe errors will be left in (or turned into) their Unicode form.
0105      *
0106      * The UErrorCode indicates an error only in exceptional cases,
0107      * such as a U_MEMORY_ALLOCATION_ERROR.
0108      *
0109      * @param label Input domain name label
0110      * @param dest Destination string object
0111      * @param info Output container of IDNA processing details.
0112      * @param errorCode Standard ICU error code. Its input value must
0113      *                  pass the U_SUCCESS() test, or else the function returns
0114      *                  immediately. Check for U_FAILURE() on output or use with
0115      *                  function chaining. (See User Guide for details.)
0116      * @return dest
0117      * @stable ICU 4.6
0118      */
0119     virtual UnicodeString &
0120     labelToASCII(const UnicodeString &label, UnicodeString &dest,
0121                  IDNAInfo &info, UErrorCode &errorCode) const = 0;
0122 
0123     /**
0124      * Converts a single domain name label into its Unicode form for human-readable display.
0125      * If any processing step fails, then info.hasErrors() will be true.
0126      * The label might be modified according to the types of errors.
0127      *
0128      * The UErrorCode indicates an error only in exceptional cases,
0129      * such as a U_MEMORY_ALLOCATION_ERROR.
0130      *
0131      * @param label Input domain name label
0132      * @param dest Destination string object
0133      * @param info Output container of IDNA processing details.
0134      * @param errorCode Standard ICU error code. Its input value must
0135      *                  pass the U_SUCCESS() test, or else the function returns
0136      *                  immediately. Check for U_FAILURE() on output or use with
0137      *                  function chaining. (See User Guide for details.)
0138      * @return dest
0139      * @stable ICU 4.6
0140      */
0141     virtual UnicodeString &
0142     labelToUnicode(const UnicodeString &label, UnicodeString &dest,
0143                    IDNAInfo &info, UErrorCode &errorCode) const = 0;
0144 
0145     /**
0146      * Converts a whole domain name into its ASCII form for DNS lookup.
0147      * If any processing step fails, then info.hasErrors() will be true and
0148      * the result might not be an ASCII string.
0149      * The domain name might be modified according to the types of errors.
0150      * Labels with severe errors will be left in (or turned into) their Unicode form.
0151      *
0152      * The UErrorCode indicates an error only in exceptional cases,
0153      * such as a U_MEMORY_ALLOCATION_ERROR.
0154      *
0155      * @param name Input domain name
0156      * @param dest Destination string object
0157      * @param info Output container of IDNA processing details.
0158      * @param errorCode Standard ICU error code. Its input value must
0159      *                  pass the U_SUCCESS() test, or else the function returns
0160      *                  immediately. Check for U_FAILURE() on output or use with
0161      *                  function chaining. (See User Guide for details.)
0162      * @return dest
0163      * @stable ICU 4.6
0164      */
0165     virtual UnicodeString &
0166     nameToASCII(const UnicodeString &name, UnicodeString &dest,
0167                 IDNAInfo &info, UErrorCode &errorCode) const = 0;
0168 
0169     /**
0170      * Converts a whole domain name into its Unicode form for human-readable display.
0171      * If any processing step fails, then info.hasErrors() will be true.
0172      * The domain name might be modified according to the types of errors.
0173      *
0174      * The UErrorCode indicates an error only in exceptional cases,
0175      * such as a U_MEMORY_ALLOCATION_ERROR.
0176      *
0177      * @param name Input domain name
0178      * @param dest Destination string object
0179      * @param info Output container of IDNA processing details.
0180      * @param errorCode Standard ICU error code. Its input value must
0181      *                  pass the U_SUCCESS() test, or else the function returns
0182      *                  immediately. Check for U_FAILURE() on output or use with
0183      *                  function chaining. (See User Guide for details.)
0184      * @return dest
0185      * @stable ICU 4.6
0186      */
0187     virtual UnicodeString &
0188     nameToUnicode(const UnicodeString &name, UnicodeString &dest,
0189                   IDNAInfo &info, UErrorCode &errorCode) const = 0;
0190 
0191     // UTF-8 versions of the processing methods ---------------------------- ***
0192 
0193     /**
0194      * Converts a single domain name label into its ASCII form for DNS lookup.
0195      * UTF-8 version of labelToASCII(), same behavior.
0196      *
0197      * @param label Input domain name label
0198      * @param dest Destination byte sink; Flush()ed if successful
0199      * @param info Output container of IDNA processing details.
0200      * @param errorCode Standard ICU error code. Its input value must
0201      *                  pass the U_SUCCESS() test, or else the function returns
0202      *                  immediately. Check for U_FAILURE() on output or use with
0203      *                  function chaining. (See User Guide for details.)
0204      * @return dest
0205      * @stable ICU 4.6
0206      */
0207     virtual void
0208     labelToASCII_UTF8(StringPiece label, ByteSink &dest,
0209                       IDNAInfo &info, UErrorCode &errorCode) const;
0210 
0211     /**
0212      * Converts a single domain name label into its Unicode form for human-readable display.
0213      * UTF-8 version of labelToUnicode(), same behavior.
0214      *
0215      * @param label Input domain name label
0216      * @param dest Destination byte sink; Flush()ed if successful
0217      * @param info Output container of IDNA processing details.
0218      * @param errorCode Standard ICU error code. Its input value must
0219      *                  pass the U_SUCCESS() test, or else the function returns
0220      *                  immediately. Check for U_FAILURE() on output or use with
0221      *                  function chaining. (See User Guide for details.)
0222      * @return dest
0223      * @stable ICU 4.6
0224      */
0225     virtual void
0226     labelToUnicodeUTF8(StringPiece label, ByteSink &dest,
0227                        IDNAInfo &info, UErrorCode &errorCode) const;
0228 
0229     /**
0230      * Converts a whole domain name into its ASCII form for DNS lookup.
0231      * UTF-8 version of nameToASCII(), same behavior.
0232      *
0233      * @param name Input domain name
0234      * @param dest Destination byte sink; Flush()ed if successful
0235      * @param info Output container of IDNA processing details.
0236      * @param errorCode Standard ICU error code. Its input value must
0237      *                  pass the U_SUCCESS() test, or else the function returns
0238      *                  immediately. Check for U_FAILURE() on output or use with
0239      *                  function chaining. (See User Guide for details.)
0240      * @return dest
0241      * @stable ICU 4.6
0242      */
0243     virtual void
0244     nameToASCII_UTF8(StringPiece name, ByteSink &dest,
0245                      IDNAInfo &info, UErrorCode &errorCode) const;
0246 
0247     /**
0248      * Converts a whole domain name into its Unicode form for human-readable display.
0249      * UTF-8 version of nameToUnicode(), same behavior.
0250      *
0251      * @param name Input domain name
0252      * @param dest Destination byte sink; Flush()ed if successful
0253      * @param info Output container of IDNA processing details.
0254      * @param errorCode Standard ICU error code. Its input value must
0255      *                  pass the U_SUCCESS() test, or else the function returns
0256      *                  immediately. Check for U_FAILURE() on output or use with
0257      *                  function chaining. (See User Guide for details.)
0258      * @return dest
0259      * @stable ICU 4.6
0260      */
0261     virtual void
0262     nameToUnicodeUTF8(StringPiece name, ByteSink &dest,
0263                       IDNAInfo &info, UErrorCode &errorCode) const;
0264 };
0265 
0266 class UTS46;
0267 
0268 /**
0269  * Output container for IDNA processing errors.
0270  * The IDNAInfo class is not suitable for subclassing.
0271  * @stable ICU 4.6
0272  */
0273 class U_COMMON_API IDNAInfo : public UMemory {
0274 public:
0275     /**
0276      * Constructor for stack allocation.
0277      * @stable ICU 4.6
0278      */
0279     IDNAInfo() : errors(0), labelErrors(0), isTransDiff(false), isBiDi(false), isOkBiDi(true) {}
0280     /**
0281      * Were there IDNA processing errors?
0282      * @return true if there were processing errors
0283      * @stable ICU 4.6
0284      */
0285     UBool hasErrors() const { return errors!=0; }
0286     /**
0287      * Returns a bit set indicating IDNA processing errors.
0288      * See UIDNA_ERROR_... constants in uidna.h.
0289      * @return bit set of processing errors
0290      * @stable ICU 4.6
0291      */
0292     uint32_t getErrors() const { return errors; }
0293     /**
0294      * Returns true if transitional and nontransitional processing produce different results.
0295      * This is the case when the input label or domain name contains
0296      * one or more deviation characters outside a Punycode label (see UTS #46).
0297      * <ul>
0298      * <li>With nontransitional processing, such characters are
0299      * copied to the destination string.
0300      * <li>With transitional processing, such characters are
0301      * mapped (sharp s/sigma) or removed (joiner/nonjoiner).
0302      * </ul>
0303      * @return true if transitional and nontransitional processing produce different results
0304      * @stable ICU 4.6
0305      */
0306     UBool isTransitionalDifferent() const { return isTransDiff; }
0307 
0308 private:
0309     friend class UTS46;
0310 
0311     IDNAInfo(const IDNAInfo &other) = delete;  // no copying
0312     IDNAInfo &operator=(const IDNAInfo &other) = delete;  // no copying
0313 
0314     void reset() {
0315         errors=labelErrors=0;
0316         isTransDiff=false;
0317         isBiDi=false;
0318         isOkBiDi=true;
0319     }
0320 
0321     uint32_t errors, labelErrors;
0322     UBool isTransDiff;
0323     UBool isBiDi;
0324     UBool isOkBiDi;
0325 };
0326 
0327 U_NAMESPACE_END
0328 
0329 #endif  // UCONFIG_NO_IDNA
0330 
0331 #endif /* U_SHOW_CPLUSPLUS_API */
0332 
0333 #endif  // __IDNA_H__