1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/unicode/idna.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,323 @@ 1.4 +/* 1.5 +******************************************************************************* 1.6 +* Copyright (C) 2010-2012, International Business Machines 1.7 +* Corporation and others. All Rights Reserved. 1.8 +******************************************************************************* 1.9 +* file name: idna.h 1.10 +* encoding: US-ASCII 1.11 +* tab size: 8 (not used) 1.12 +* indentation:4 1.13 +* 1.14 +* created on: 2010mar05 1.15 +* created by: Markus W. Scherer 1.16 +*/ 1.17 + 1.18 +#ifndef __IDNA_H__ 1.19 +#define __IDNA_H__ 1.20 + 1.21 +/** 1.22 + * \file 1.23 + * \brief C++ API: Internationalizing Domain Names in Applications (IDNA) 1.24 + */ 1.25 + 1.26 +#include "unicode/utypes.h" 1.27 + 1.28 +#if !UCONFIG_NO_IDNA 1.29 + 1.30 +#include "unicode/bytestream.h" 1.31 +#include "unicode/stringpiece.h" 1.32 +#include "unicode/uidna.h" 1.33 +#include "unicode/unistr.h" 1.34 + 1.35 +U_NAMESPACE_BEGIN 1.36 + 1.37 +class IDNAInfo; 1.38 + 1.39 +/** 1.40 + * Abstract base class for IDNA processing. 1.41 + * See http://www.unicode.org/reports/tr46/ 1.42 + * and http://www.ietf.org/rfc/rfc3490.txt 1.43 + * 1.44 + * The IDNA class is not intended for public subclassing. 1.45 + * 1.46 + * This C++ API currently only implements UTS #46. 1.47 + * The uidna.h C API implements both UTS #46 (functions using UIDNA service object) 1.48 + * and IDNA2003 (functions that do not use a service object). 1.49 + * @stable ICU 4.6 1.50 + */ 1.51 +class U_COMMON_API IDNA : public UObject { 1.52 +public: 1.53 + /** 1.54 + * Destructor. 1.55 + * @stable ICU 4.6 1.56 + */ 1.57 + ~IDNA(); 1.58 + 1.59 + /** 1.60 + * Returns an IDNA instance which implements UTS #46. 1.61 + * Returns an unmodifiable instance, owned by the caller. 1.62 + * Cache it for multiple operations, and delete it when done. 1.63 + * The instance is thread-safe, that is, it can be used concurrently. 1.64 + * 1.65 + * UTS #46 defines Unicode IDNA Compatibility Processing, 1.66 + * updated to the latest version of Unicode and compatible with both 1.67 + * IDNA2003 and IDNA2008. 1.68 + * 1.69 + * The worker functions use transitional processing, including deviation mappings, 1.70 + * unless UIDNA_NONTRANSITIONAL_TO_ASCII or UIDNA_NONTRANSITIONAL_TO_UNICODE 1.71 + * is used in which case the deviation characters are passed through without change. 1.72 + * 1.73 + * Disallowed characters are mapped to U+FFFD. 1.74 + * 1.75 + * For available options see the uidna.h header. 1.76 + * Operations with the UTS #46 instance do not support the 1.77 + * UIDNA_ALLOW_UNASSIGNED option. 1.78 + * 1.79 + * By default, the UTS #46 implementation allows all ASCII characters (as valid or mapped). 1.80 + * When the UIDNA_USE_STD3_RULES option is used, ASCII characters other than 1.81 + * letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD. 1.82 + * 1.83 + * @param options Bit set to modify the processing and error checking. 1.84 + * See option bit set values in uidna.h. 1.85 + * @param errorCode Standard ICU error code. Its input value must 1.86 + * pass the U_SUCCESS() test, or else the function returns 1.87 + * immediately. Check for U_FAILURE() on output or use with 1.88 + * function chaining. (See User Guide for details.) 1.89 + * @return the UTS #46 IDNA instance, if successful 1.90 + * @stable ICU 4.6 1.91 + */ 1.92 + static IDNA * 1.93 + createUTS46Instance(uint32_t options, UErrorCode &errorCode); 1.94 + 1.95 + /** 1.96 + * Converts a single domain name label into its ASCII form for DNS lookup. 1.97 + * If any processing step fails, then info.hasErrors() will be TRUE and 1.98 + * the result might not be an ASCII string. 1.99 + * The label might be modified according to the types of errors. 1.100 + * Labels with severe errors will be left in (or turned into) their Unicode form. 1.101 + * 1.102 + * The UErrorCode indicates an error only in exceptional cases, 1.103 + * such as a U_MEMORY_ALLOCATION_ERROR. 1.104 + * 1.105 + * @param label Input domain name label 1.106 + * @param dest Destination string object 1.107 + * @param info Output container of IDNA processing details. 1.108 + * @param errorCode Standard ICU error code. Its input value must 1.109 + * pass the U_SUCCESS() test, or else the function returns 1.110 + * immediately. Check for U_FAILURE() on output or use with 1.111 + * function chaining. (See User Guide for details.) 1.112 + * @return dest 1.113 + * @stable ICU 4.6 1.114 + */ 1.115 + virtual UnicodeString & 1.116 + labelToASCII(const UnicodeString &label, UnicodeString &dest, 1.117 + IDNAInfo &info, UErrorCode &errorCode) const = 0; 1.118 + 1.119 + /** 1.120 + * Converts a single domain name label into its Unicode form for human-readable display. 1.121 + * If any processing step fails, then info.hasErrors() will be TRUE. 1.122 + * The label might be modified according to the types of errors. 1.123 + * 1.124 + * The UErrorCode indicates an error only in exceptional cases, 1.125 + * such as a U_MEMORY_ALLOCATION_ERROR. 1.126 + * 1.127 + * @param label Input domain name label 1.128 + * @param dest Destination string object 1.129 + * @param info Output container of IDNA processing details. 1.130 + * @param errorCode Standard ICU error code. Its input value must 1.131 + * pass the U_SUCCESS() test, or else the function returns 1.132 + * immediately. Check for U_FAILURE() on output or use with 1.133 + * function chaining. (See User Guide for details.) 1.134 + * @return dest 1.135 + * @stable ICU 4.6 1.136 + */ 1.137 + virtual UnicodeString & 1.138 + labelToUnicode(const UnicodeString &label, UnicodeString &dest, 1.139 + IDNAInfo &info, UErrorCode &errorCode) const = 0; 1.140 + 1.141 + /** 1.142 + * Converts a whole domain name into its ASCII form for DNS lookup. 1.143 + * If any processing step fails, then info.hasErrors() will be TRUE and 1.144 + * the result might not be an ASCII string. 1.145 + * The domain name might be modified according to the types of errors. 1.146 + * Labels with severe errors will be left in (or turned into) their Unicode form. 1.147 + * 1.148 + * The UErrorCode indicates an error only in exceptional cases, 1.149 + * such as a U_MEMORY_ALLOCATION_ERROR. 1.150 + * 1.151 + * @param name Input domain name 1.152 + * @param dest Destination string object 1.153 + * @param info Output container of IDNA processing details. 1.154 + * @param errorCode Standard ICU error code. Its input value must 1.155 + * pass the U_SUCCESS() test, or else the function returns 1.156 + * immediately. Check for U_FAILURE() on output or use with 1.157 + * function chaining. (See User Guide for details.) 1.158 + * @return dest 1.159 + * @stable ICU 4.6 1.160 + */ 1.161 + virtual UnicodeString & 1.162 + nameToASCII(const UnicodeString &name, UnicodeString &dest, 1.163 + IDNAInfo &info, UErrorCode &errorCode) const = 0; 1.164 + 1.165 + /** 1.166 + * Converts a whole domain name into its Unicode form for human-readable display. 1.167 + * If any processing step fails, then info.hasErrors() will be TRUE. 1.168 + * The domain name might be modified according to the types of errors. 1.169 + * 1.170 + * The UErrorCode indicates an error only in exceptional cases, 1.171 + * such as a U_MEMORY_ALLOCATION_ERROR. 1.172 + * 1.173 + * @param name Input domain name 1.174 + * @param dest Destination string object 1.175 + * @param info Output container of IDNA processing details. 1.176 + * @param errorCode Standard ICU error code. Its input value must 1.177 + * pass the U_SUCCESS() test, or else the function returns 1.178 + * immediately. Check for U_FAILURE() on output or use with 1.179 + * function chaining. (See User Guide for details.) 1.180 + * @return dest 1.181 + * @stable ICU 4.6 1.182 + */ 1.183 + virtual UnicodeString & 1.184 + nameToUnicode(const UnicodeString &name, UnicodeString &dest, 1.185 + IDNAInfo &info, UErrorCode &errorCode) const = 0; 1.186 + 1.187 + // UTF-8 versions of the processing methods ---------------------------- *** 1.188 + 1.189 + /** 1.190 + * Converts a single domain name label into its ASCII form for DNS lookup. 1.191 + * UTF-8 version of labelToASCII(), same behavior. 1.192 + * 1.193 + * @param label Input domain name label 1.194 + * @param dest Destination byte sink; Flush()ed if successful 1.195 + * @param info Output container of IDNA processing details. 1.196 + * @param errorCode Standard ICU error code. Its input value must 1.197 + * pass the U_SUCCESS() test, or else the function returns 1.198 + * immediately. Check for U_FAILURE() on output or use with 1.199 + * function chaining. (See User Guide for details.) 1.200 + * @return dest 1.201 + * @stable ICU 4.6 1.202 + */ 1.203 + virtual void 1.204 + labelToASCII_UTF8(const StringPiece &label, ByteSink &dest, 1.205 + IDNAInfo &info, UErrorCode &errorCode) const; 1.206 + 1.207 + /** 1.208 + * Converts a single domain name label into its Unicode form for human-readable display. 1.209 + * UTF-8 version of labelToUnicode(), same behavior. 1.210 + * 1.211 + * @param label Input domain name label 1.212 + * @param dest Destination byte sink; Flush()ed if successful 1.213 + * @param info Output container of IDNA processing details. 1.214 + * @param errorCode Standard ICU error code. Its input value must 1.215 + * pass the U_SUCCESS() test, or else the function returns 1.216 + * immediately. Check for U_FAILURE() on output or use with 1.217 + * function chaining. (See User Guide for details.) 1.218 + * @return dest 1.219 + * @stable ICU 4.6 1.220 + */ 1.221 + virtual void 1.222 + labelToUnicodeUTF8(const StringPiece &label, ByteSink &dest, 1.223 + IDNAInfo &info, UErrorCode &errorCode) const; 1.224 + 1.225 + /** 1.226 + * Converts a whole domain name into its ASCII form for DNS lookup. 1.227 + * UTF-8 version of nameToASCII(), same behavior. 1.228 + * 1.229 + * @param name Input domain name 1.230 + * @param dest Destination byte sink; Flush()ed if successful 1.231 + * @param info Output container of IDNA processing details. 1.232 + * @param errorCode Standard ICU error code. Its input value must 1.233 + * pass the U_SUCCESS() test, or else the function returns 1.234 + * immediately. Check for U_FAILURE() on output or use with 1.235 + * function chaining. (See User Guide for details.) 1.236 + * @return dest 1.237 + * @stable ICU 4.6 1.238 + */ 1.239 + virtual void 1.240 + nameToASCII_UTF8(const StringPiece &name, ByteSink &dest, 1.241 + IDNAInfo &info, UErrorCode &errorCode) const; 1.242 + 1.243 + /** 1.244 + * Converts a whole domain name into its Unicode form for human-readable display. 1.245 + * UTF-8 version of nameToUnicode(), same behavior. 1.246 + * 1.247 + * @param name Input domain name 1.248 + * @param dest Destination byte sink; Flush()ed if successful 1.249 + * @param info Output container of IDNA processing details. 1.250 + * @param errorCode Standard ICU error code. Its input value must 1.251 + * pass the U_SUCCESS() test, or else the function returns 1.252 + * immediately. Check for U_FAILURE() on output or use with 1.253 + * function chaining. (See User Guide for details.) 1.254 + * @return dest 1.255 + * @stable ICU 4.6 1.256 + */ 1.257 + virtual void 1.258 + nameToUnicodeUTF8(const StringPiece &name, ByteSink &dest, 1.259 + IDNAInfo &info, UErrorCode &errorCode) const; 1.260 +}; 1.261 + 1.262 +class UTS46; 1.263 + 1.264 +/** 1.265 + * Output container for IDNA processing errors. 1.266 + * The IDNAInfo class is not suitable for subclassing. 1.267 + * @stable ICU 4.6 1.268 + */ 1.269 +class U_COMMON_API IDNAInfo : public UMemory { 1.270 +public: 1.271 + /** 1.272 + * Constructor for stack allocation. 1.273 + * @stable ICU 4.6 1.274 + */ 1.275 + IDNAInfo() : errors(0), labelErrors(0), isTransDiff(FALSE), isBiDi(FALSE), isOkBiDi(TRUE) {} 1.276 + /** 1.277 + * Were there IDNA processing errors? 1.278 + * @return TRUE if there were processing errors 1.279 + * @stable ICU 4.6 1.280 + */ 1.281 + UBool hasErrors() const { return errors!=0; } 1.282 + /** 1.283 + * Returns a bit set indicating IDNA processing errors. 1.284 + * See UIDNA_ERROR_... constants in uidna.h. 1.285 + * @return bit set of processing errors 1.286 + * @stable ICU 4.6 1.287 + */ 1.288 + uint32_t getErrors() const { return errors; } 1.289 + /** 1.290 + * Returns TRUE if transitional and nontransitional processing produce different results. 1.291 + * This is the case when the input label or domain name contains 1.292 + * one or more deviation characters outside a Punycode label (see UTS #46). 1.293 + * <ul> 1.294 + * <li>With nontransitional processing, such characters are 1.295 + * copied to the destination string. 1.296 + * <li>With transitional processing, such characters are 1.297 + * mapped (sharp s/sigma) or removed (joiner/nonjoiner). 1.298 + * </ul> 1.299 + * @return TRUE if transitional and nontransitional processing produce different results 1.300 + * @stable ICU 4.6 1.301 + */ 1.302 + UBool isTransitionalDifferent() const { return isTransDiff; } 1.303 + 1.304 +private: 1.305 + friend class UTS46; 1.306 + 1.307 + IDNAInfo(const IDNAInfo &other); // no copying 1.308 + IDNAInfo &operator=(const IDNAInfo &other); // no copying 1.309 + 1.310 + void reset() { 1.311 + errors=labelErrors=0; 1.312 + isTransDiff=FALSE; 1.313 + isBiDi=FALSE; 1.314 + isOkBiDi=TRUE; 1.315 + } 1.316 + 1.317 + uint32_t errors, labelErrors; 1.318 + UBool isTransDiff; 1.319 + UBool isBiDi; 1.320 + UBool isOkBiDi; 1.321 +}; 1.322 + 1.323 +U_NAMESPACE_END 1.324 + 1.325 +#endif // UCONFIG_NO_IDNA 1.326 +#endif // __IDNA_H__