michael@0: /* michael@0: ******************************************************************************* michael@0: * Copyright (C) 2010-2012, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: ******************************************************************************* michael@0: * file name: idna.h michael@0: * encoding: US-ASCII michael@0: * tab size: 8 (not used) michael@0: * indentation:4 michael@0: * michael@0: * created on: 2010mar05 michael@0: * created by: Markus W. Scherer michael@0: */ michael@0: michael@0: #ifndef __IDNA_H__ michael@0: #define __IDNA_H__ michael@0: michael@0: /** michael@0: * \file michael@0: * \brief C++ API: Internationalizing Domain Names in Applications (IDNA) michael@0: */ michael@0: michael@0: #include "unicode/utypes.h" michael@0: michael@0: #if !UCONFIG_NO_IDNA michael@0: michael@0: #include "unicode/bytestream.h" michael@0: #include "unicode/stringpiece.h" michael@0: #include "unicode/uidna.h" michael@0: #include "unicode/unistr.h" michael@0: michael@0: U_NAMESPACE_BEGIN michael@0: michael@0: class IDNAInfo; michael@0: michael@0: /** michael@0: * Abstract base class for IDNA processing. michael@0: * See http://www.unicode.org/reports/tr46/ michael@0: * and http://www.ietf.org/rfc/rfc3490.txt michael@0: * michael@0: * The IDNA class is not intended for public subclassing. michael@0: * michael@0: * This C++ API currently only implements UTS #46. michael@0: * The uidna.h C API implements both UTS #46 (functions using UIDNA service object) michael@0: * and IDNA2003 (functions that do not use a service object). michael@0: * @stable ICU 4.6 michael@0: */ michael@0: class U_COMMON_API IDNA : public UObject { michael@0: public: michael@0: /** michael@0: * Destructor. michael@0: * @stable ICU 4.6 michael@0: */ michael@0: ~IDNA(); michael@0: michael@0: /** michael@0: * Returns an IDNA instance which implements UTS #46. michael@0: * Returns an unmodifiable instance, owned by the caller. michael@0: * Cache it for multiple operations, and delete it when done. michael@0: * The instance is thread-safe, that is, it can be used concurrently. michael@0: * michael@0: * UTS #46 defines Unicode IDNA Compatibility Processing, michael@0: * updated to the latest version of Unicode and compatible with both michael@0: * IDNA2003 and IDNA2008. michael@0: * michael@0: * The worker functions use transitional processing, including deviation mappings, michael@0: * unless UIDNA_NONTRANSITIONAL_TO_ASCII or UIDNA_NONTRANSITIONAL_TO_UNICODE michael@0: * is used in which case the deviation characters are passed through without change. michael@0: * michael@0: * Disallowed characters are mapped to U+FFFD. michael@0: * michael@0: * For available options see the uidna.h header. michael@0: * Operations with the UTS #46 instance do not support the michael@0: * UIDNA_ALLOW_UNASSIGNED option. michael@0: * michael@0: * By default, the UTS #46 implementation allows all ASCII characters (as valid or mapped). michael@0: * When the UIDNA_USE_STD3_RULES option is used, ASCII characters other than michael@0: * letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD. michael@0: * michael@0: * @param options Bit set to modify the processing and error checking. michael@0: * See option bit set values in uidna.h. michael@0: * @param errorCode Standard ICU error code. Its input value must michael@0: * pass the U_SUCCESS() test, or else the function returns michael@0: * immediately. Check for U_FAILURE() on output or use with michael@0: * function chaining. (See User Guide for details.) michael@0: * @return the UTS #46 IDNA instance, if successful michael@0: * @stable ICU 4.6 michael@0: */ michael@0: static IDNA * michael@0: createUTS46Instance(uint32_t options, UErrorCode &errorCode); michael@0: michael@0: /** michael@0: * Converts a single domain name label into its ASCII form for DNS lookup. michael@0: * If any processing step fails, then info.hasErrors() will be TRUE and michael@0: * the result might not be an ASCII string. michael@0: * The label might be modified according to the types of errors. michael@0: * Labels with severe errors will be left in (or turned into) their Unicode form. michael@0: * michael@0: * The UErrorCode indicates an error only in exceptional cases, michael@0: * such as a U_MEMORY_ALLOCATION_ERROR. michael@0: * michael@0: * @param label Input domain name label michael@0: * @param dest Destination string object michael@0: * @param info Output container of IDNA processing details. michael@0: * @param errorCode Standard ICU error code. Its input value must michael@0: * pass the U_SUCCESS() test, or else the function returns michael@0: * immediately. Check for U_FAILURE() on output or use with michael@0: * function chaining. (See User Guide for details.) michael@0: * @return dest michael@0: * @stable ICU 4.6 michael@0: */ michael@0: virtual UnicodeString & michael@0: labelToASCII(const UnicodeString &label, UnicodeString &dest, michael@0: IDNAInfo &info, UErrorCode &errorCode) const = 0; michael@0: michael@0: /** michael@0: * Converts a single domain name label into its Unicode form for human-readable display. michael@0: * If any processing step fails, then info.hasErrors() will be TRUE. michael@0: * The label might be modified according to the types of errors. michael@0: * michael@0: * The UErrorCode indicates an error only in exceptional cases, michael@0: * such as a U_MEMORY_ALLOCATION_ERROR. michael@0: * michael@0: * @param label Input domain name label michael@0: * @param dest Destination string object michael@0: * @param info Output container of IDNA processing details. michael@0: * @param errorCode Standard ICU error code. Its input value must michael@0: * pass the U_SUCCESS() test, or else the function returns michael@0: * immediately. Check for U_FAILURE() on output or use with michael@0: * function chaining. (See User Guide for details.) michael@0: * @return dest michael@0: * @stable ICU 4.6 michael@0: */ michael@0: virtual UnicodeString & michael@0: labelToUnicode(const UnicodeString &label, UnicodeString &dest, michael@0: IDNAInfo &info, UErrorCode &errorCode) const = 0; michael@0: michael@0: /** michael@0: * Converts a whole domain name into its ASCII form for DNS lookup. michael@0: * If any processing step fails, then info.hasErrors() will be TRUE and michael@0: * the result might not be an ASCII string. michael@0: * The domain name might be modified according to the types of errors. michael@0: * Labels with severe errors will be left in (or turned into) their Unicode form. michael@0: * michael@0: * The UErrorCode indicates an error only in exceptional cases, michael@0: * such as a U_MEMORY_ALLOCATION_ERROR. michael@0: * michael@0: * @param name Input domain name michael@0: * @param dest Destination string object michael@0: * @param info Output container of IDNA processing details. michael@0: * @param errorCode Standard ICU error code. Its input value must michael@0: * pass the U_SUCCESS() test, or else the function returns michael@0: * immediately. Check for U_FAILURE() on output or use with michael@0: * function chaining. (See User Guide for details.) michael@0: * @return dest michael@0: * @stable ICU 4.6 michael@0: */ michael@0: virtual UnicodeString & michael@0: nameToASCII(const UnicodeString &name, UnicodeString &dest, michael@0: IDNAInfo &info, UErrorCode &errorCode) const = 0; michael@0: michael@0: /** michael@0: * Converts a whole domain name into its Unicode form for human-readable display. michael@0: * If any processing step fails, then info.hasErrors() will be TRUE. michael@0: * The domain name might be modified according to the types of errors. michael@0: * michael@0: * The UErrorCode indicates an error only in exceptional cases, michael@0: * such as a U_MEMORY_ALLOCATION_ERROR. michael@0: * michael@0: * @param name Input domain name michael@0: * @param dest Destination string object michael@0: * @param info Output container of IDNA processing details. michael@0: * @param errorCode Standard ICU error code. Its input value must michael@0: * pass the U_SUCCESS() test, or else the function returns michael@0: * immediately. Check for U_FAILURE() on output or use with michael@0: * function chaining. (See User Guide for details.) michael@0: * @return dest michael@0: * @stable ICU 4.6 michael@0: */ michael@0: virtual UnicodeString & michael@0: nameToUnicode(const UnicodeString &name, UnicodeString &dest, michael@0: IDNAInfo &info, UErrorCode &errorCode) const = 0; michael@0: michael@0: // UTF-8 versions of the processing methods ---------------------------- *** michael@0: michael@0: /** michael@0: * Converts a single domain name label into its ASCII form for DNS lookup. michael@0: * UTF-8 version of labelToASCII(), same behavior. michael@0: * michael@0: * @param label Input domain name label michael@0: * @param dest Destination byte sink; Flush()ed if successful michael@0: * @param info Output container of IDNA processing details. michael@0: * @param errorCode Standard ICU error code. Its input value must michael@0: * pass the U_SUCCESS() test, or else the function returns michael@0: * immediately. Check for U_FAILURE() on output or use with michael@0: * function chaining. (See User Guide for details.) michael@0: * @return dest michael@0: * @stable ICU 4.6 michael@0: */ michael@0: virtual void michael@0: labelToASCII_UTF8(const StringPiece &label, ByteSink &dest, michael@0: IDNAInfo &info, UErrorCode &errorCode) const; michael@0: michael@0: /** michael@0: * Converts a single domain name label into its Unicode form for human-readable display. michael@0: * UTF-8 version of labelToUnicode(), same behavior. michael@0: * michael@0: * @param label Input domain name label michael@0: * @param dest Destination byte sink; Flush()ed if successful michael@0: * @param info Output container of IDNA processing details. michael@0: * @param errorCode Standard ICU error code. Its input value must michael@0: * pass the U_SUCCESS() test, or else the function returns michael@0: * immediately. Check for U_FAILURE() on output or use with michael@0: * function chaining. (See User Guide for details.) michael@0: * @return dest michael@0: * @stable ICU 4.6 michael@0: */ michael@0: virtual void michael@0: labelToUnicodeUTF8(const StringPiece &label, ByteSink &dest, michael@0: IDNAInfo &info, UErrorCode &errorCode) const; michael@0: michael@0: /** michael@0: * Converts a whole domain name into its ASCII form for DNS lookup. michael@0: * UTF-8 version of nameToASCII(), same behavior. michael@0: * michael@0: * @param name Input domain name michael@0: * @param dest Destination byte sink; Flush()ed if successful michael@0: * @param info Output container of IDNA processing details. michael@0: * @param errorCode Standard ICU error code. Its input value must michael@0: * pass the U_SUCCESS() test, or else the function returns michael@0: * immediately. Check for U_FAILURE() on output or use with michael@0: * function chaining. (See User Guide for details.) michael@0: * @return dest michael@0: * @stable ICU 4.6 michael@0: */ michael@0: virtual void michael@0: nameToASCII_UTF8(const StringPiece &name, ByteSink &dest, michael@0: IDNAInfo &info, UErrorCode &errorCode) const; michael@0: michael@0: /** michael@0: * Converts a whole domain name into its Unicode form for human-readable display. michael@0: * UTF-8 version of nameToUnicode(), same behavior. michael@0: * michael@0: * @param name Input domain name michael@0: * @param dest Destination byte sink; Flush()ed if successful michael@0: * @param info Output container of IDNA processing details. michael@0: * @param errorCode Standard ICU error code. Its input value must michael@0: * pass the U_SUCCESS() test, or else the function returns michael@0: * immediately. Check for U_FAILURE() on output or use with michael@0: * function chaining. (See User Guide for details.) michael@0: * @return dest michael@0: * @stable ICU 4.6 michael@0: */ michael@0: virtual void michael@0: nameToUnicodeUTF8(const StringPiece &name, ByteSink &dest, michael@0: IDNAInfo &info, UErrorCode &errorCode) const; michael@0: }; michael@0: michael@0: class UTS46; michael@0: michael@0: /** michael@0: * Output container for IDNA processing errors. michael@0: * The IDNAInfo class is not suitable for subclassing. michael@0: * @stable ICU 4.6 michael@0: */ michael@0: class U_COMMON_API IDNAInfo : public UMemory { michael@0: public: michael@0: /** michael@0: * Constructor for stack allocation. michael@0: * @stable ICU 4.6 michael@0: */ michael@0: IDNAInfo() : errors(0), labelErrors(0), isTransDiff(FALSE), isBiDi(FALSE), isOkBiDi(TRUE) {} michael@0: /** michael@0: * Were there IDNA processing errors? michael@0: * @return TRUE if there were processing errors michael@0: * @stable ICU 4.6 michael@0: */ michael@0: UBool hasErrors() const { return errors!=0; } michael@0: /** michael@0: * Returns a bit set indicating IDNA processing errors. michael@0: * See UIDNA_ERROR_... constants in uidna.h. michael@0: * @return bit set of processing errors michael@0: * @stable ICU 4.6 michael@0: */ michael@0: uint32_t getErrors() const { return errors; } michael@0: /** michael@0: * Returns TRUE if transitional and nontransitional processing produce different results. michael@0: * This is the case when the input label or domain name contains michael@0: * one or more deviation characters outside a Punycode label (see UTS #46). michael@0: *