Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
michael@0 | 1 | /* |
michael@0 | 2 | ******************************************************************************* |
michael@0 | 3 | * Copyright (C) 2010-2012, International Business Machines |
michael@0 | 4 | * Corporation and others. All Rights Reserved. |
michael@0 | 5 | ******************************************************************************* |
michael@0 | 6 | * file name: idna.h |
michael@0 | 7 | * encoding: US-ASCII |
michael@0 | 8 | * tab size: 8 (not used) |
michael@0 | 9 | * indentation:4 |
michael@0 | 10 | * |
michael@0 | 11 | * created on: 2010mar05 |
michael@0 | 12 | * created by: Markus W. Scherer |
michael@0 | 13 | */ |
michael@0 | 14 | |
michael@0 | 15 | #ifndef __IDNA_H__ |
michael@0 | 16 | #define __IDNA_H__ |
michael@0 | 17 | |
michael@0 | 18 | /** |
michael@0 | 19 | * \file |
michael@0 | 20 | * \brief C++ API: Internationalizing Domain Names in Applications (IDNA) |
michael@0 | 21 | */ |
michael@0 | 22 | |
michael@0 | 23 | #include "unicode/utypes.h" |
michael@0 | 24 | |
michael@0 | 25 | #if !UCONFIG_NO_IDNA |
michael@0 | 26 | |
michael@0 | 27 | #include "unicode/bytestream.h" |
michael@0 | 28 | #include "unicode/stringpiece.h" |
michael@0 | 29 | #include "unicode/uidna.h" |
michael@0 | 30 | #include "unicode/unistr.h" |
michael@0 | 31 | |
michael@0 | 32 | U_NAMESPACE_BEGIN |
michael@0 | 33 | |
michael@0 | 34 | class IDNAInfo; |
michael@0 | 35 | |
michael@0 | 36 | /** |
michael@0 | 37 | * Abstract base class for IDNA processing. |
michael@0 | 38 | * See http://www.unicode.org/reports/tr46/ |
michael@0 | 39 | * and http://www.ietf.org/rfc/rfc3490.txt |
michael@0 | 40 | * |
michael@0 | 41 | * The IDNA class is not intended for public subclassing. |
michael@0 | 42 | * |
michael@0 | 43 | * This C++ API currently only implements UTS #46. |
michael@0 | 44 | * The uidna.h C API implements both UTS #46 (functions using UIDNA service object) |
michael@0 | 45 | * and IDNA2003 (functions that do not use a service object). |
michael@0 | 46 | * @stable ICU 4.6 |
michael@0 | 47 | */ |
michael@0 | 48 | class U_COMMON_API IDNA : public UObject { |
michael@0 | 49 | public: |
michael@0 | 50 | /** |
michael@0 | 51 | * Destructor. |
michael@0 | 52 | * @stable ICU 4.6 |
michael@0 | 53 | */ |
michael@0 | 54 | ~IDNA(); |
michael@0 | 55 | |
michael@0 | 56 | /** |
michael@0 | 57 | * Returns an IDNA instance which implements UTS #46. |
michael@0 | 58 | * Returns an unmodifiable instance, owned by the caller. |
michael@0 | 59 | * Cache it for multiple operations, and delete it when done. |
michael@0 | 60 | * The instance is thread-safe, that is, it can be used concurrently. |
michael@0 | 61 | * |
michael@0 | 62 | * UTS #46 defines Unicode IDNA Compatibility Processing, |
michael@0 | 63 | * updated to the latest version of Unicode and compatible with both |
michael@0 | 64 | * IDNA2003 and IDNA2008. |
michael@0 | 65 | * |
michael@0 | 66 | * The worker functions use transitional processing, including deviation mappings, |
michael@0 | 67 | * unless UIDNA_NONTRANSITIONAL_TO_ASCII or UIDNA_NONTRANSITIONAL_TO_UNICODE |
michael@0 | 68 | * is used in which case the deviation characters are passed through without change. |
michael@0 | 69 | * |
michael@0 | 70 | * Disallowed characters are mapped to U+FFFD. |
michael@0 | 71 | * |
michael@0 | 72 | * For available options see the uidna.h header. |
michael@0 | 73 | * Operations with the UTS #46 instance do not support the |
michael@0 | 74 | * UIDNA_ALLOW_UNASSIGNED option. |
michael@0 | 75 | * |
michael@0 | 76 | * By default, the UTS #46 implementation allows all ASCII characters (as valid or mapped). |
michael@0 | 77 | * When the UIDNA_USE_STD3_RULES option is used, ASCII characters other than |
michael@0 | 78 | * letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD. |
michael@0 | 79 | * |
michael@0 | 80 | * @param options Bit set to modify the processing and error checking. |
michael@0 | 81 | * See option bit set values in uidna.h. |
michael@0 | 82 | * @param errorCode Standard ICU error code. Its input value must |
michael@0 | 83 | * pass the U_SUCCESS() test, or else the function returns |
michael@0 | 84 | * immediately. Check for U_FAILURE() on output or use with |
michael@0 | 85 | * function chaining. (See User Guide for details.) |
michael@0 | 86 | * @return the UTS #46 IDNA instance, if successful |
michael@0 | 87 | * @stable ICU 4.6 |
michael@0 | 88 | */ |
michael@0 | 89 | static IDNA * |
michael@0 | 90 | createUTS46Instance(uint32_t options, UErrorCode &errorCode); |
michael@0 | 91 | |
michael@0 | 92 | /** |
michael@0 | 93 | * Converts a single domain name label into its ASCII form for DNS lookup. |
michael@0 | 94 | * If any processing step fails, then info.hasErrors() will be TRUE and |
michael@0 | 95 | * the result might not be an ASCII string. |
michael@0 | 96 | * The label might be modified according to the types of errors. |
michael@0 | 97 | * Labels with severe errors will be left in (or turned into) their Unicode form. |
michael@0 | 98 | * |
michael@0 | 99 | * The UErrorCode indicates an error only in exceptional cases, |
michael@0 | 100 | * such as a U_MEMORY_ALLOCATION_ERROR. |
michael@0 | 101 | * |
michael@0 | 102 | * @param label Input domain name label |
michael@0 | 103 | * @param dest Destination string object |
michael@0 | 104 | * @param info Output container of IDNA processing details. |
michael@0 | 105 | * @param errorCode Standard ICU error code. Its input value must |
michael@0 | 106 | * pass the U_SUCCESS() test, or else the function returns |
michael@0 | 107 | * immediately. Check for U_FAILURE() on output or use with |
michael@0 | 108 | * function chaining. (See User Guide for details.) |
michael@0 | 109 | * @return dest |
michael@0 | 110 | * @stable ICU 4.6 |
michael@0 | 111 | */ |
michael@0 | 112 | virtual UnicodeString & |
michael@0 | 113 | labelToASCII(const UnicodeString &label, UnicodeString &dest, |
michael@0 | 114 | IDNAInfo &info, UErrorCode &errorCode) const = 0; |
michael@0 | 115 | |
michael@0 | 116 | /** |
michael@0 | 117 | * Converts a single domain name label into its Unicode form for human-readable display. |
michael@0 | 118 | * If any processing step fails, then info.hasErrors() will be TRUE. |
michael@0 | 119 | * The label might be modified according to the types of errors. |
michael@0 | 120 | * |
michael@0 | 121 | * The UErrorCode indicates an error only in exceptional cases, |
michael@0 | 122 | * such as a U_MEMORY_ALLOCATION_ERROR. |
michael@0 | 123 | * |
michael@0 | 124 | * @param label Input domain name label |
michael@0 | 125 | * @param dest Destination string object |
michael@0 | 126 | * @param info Output container of IDNA processing details. |
michael@0 | 127 | * @param errorCode Standard ICU error code. Its input value must |
michael@0 | 128 | * pass the U_SUCCESS() test, or else the function returns |
michael@0 | 129 | * immediately. Check for U_FAILURE() on output or use with |
michael@0 | 130 | * function chaining. (See User Guide for details.) |
michael@0 | 131 | * @return dest |
michael@0 | 132 | * @stable ICU 4.6 |
michael@0 | 133 | */ |
michael@0 | 134 | virtual UnicodeString & |
michael@0 | 135 | labelToUnicode(const UnicodeString &label, UnicodeString &dest, |
michael@0 | 136 | IDNAInfo &info, UErrorCode &errorCode) const = 0; |
michael@0 | 137 | |
michael@0 | 138 | /** |
michael@0 | 139 | * Converts a whole domain name into its ASCII form for DNS lookup. |
michael@0 | 140 | * If any processing step fails, then info.hasErrors() will be TRUE and |
michael@0 | 141 | * the result might not be an ASCII string. |
michael@0 | 142 | * The domain name might be modified according to the types of errors. |
michael@0 | 143 | * Labels with severe errors will be left in (or turned into) their Unicode form. |
michael@0 | 144 | * |
michael@0 | 145 | * The UErrorCode indicates an error only in exceptional cases, |
michael@0 | 146 | * such as a U_MEMORY_ALLOCATION_ERROR. |
michael@0 | 147 | * |
michael@0 | 148 | * @param name Input domain name |
michael@0 | 149 | * @param dest Destination string object |
michael@0 | 150 | * @param info Output container of IDNA processing details. |
michael@0 | 151 | * @param errorCode Standard ICU error code. Its input value must |
michael@0 | 152 | * pass the U_SUCCESS() test, or else the function returns |
michael@0 | 153 | * immediately. Check for U_FAILURE() on output or use with |
michael@0 | 154 | * function chaining. (See User Guide for details.) |
michael@0 | 155 | * @return dest |
michael@0 | 156 | * @stable ICU 4.6 |
michael@0 | 157 | */ |
michael@0 | 158 | virtual UnicodeString & |
michael@0 | 159 | nameToASCII(const UnicodeString &name, UnicodeString &dest, |
michael@0 | 160 | IDNAInfo &info, UErrorCode &errorCode) const = 0; |
michael@0 | 161 | |
michael@0 | 162 | /** |
michael@0 | 163 | * Converts a whole domain name into its Unicode form for human-readable display. |
michael@0 | 164 | * If any processing step fails, then info.hasErrors() will be TRUE. |
michael@0 | 165 | * The domain name might be modified according to the types of errors. |
michael@0 | 166 | * |
michael@0 | 167 | * The UErrorCode indicates an error only in exceptional cases, |
michael@0 | 168 | * such as a U_MEMORY_ALLOCATION_ERROR. |
michael@0 | 169 | * |
michael@0 | 170 | * @param name Input domain name |
michael@0 | 171 | * @param dest Destination string object |
michael@0 | 172 | * @param info Output container of IDNA processing details. |
michael@0 | 173 | * @param errorCode Standard ICU error code. Its input value must |
michael@0 | 174 | * pass the U_SUCCESS() test, or else the function returns |
michael@0 | 175 | * immediately. Check for U_FAILURE() on output or use with |
michael@0 | 176 | * function chaining. (See User Guide for details.) |
michael@0 | 177 | * @return dest |
michael@0 | 178 | * @stable ICU 4.6 |
michael@0 | 179 | */ |
michael@0 | 180 | virtual UnicodeString & |
michael@0 | 181 | nameToUnicode(const UnicodeString &name, UnicodeString &dest, |
michael@0 | 182 | IDNAInfo &info, UErrorCode &errorCode) const = 0; |
michael@0 | 183 | |
michael@0 | 184 | // UTF-8 versions of the processing methods ---------------------------- *** |
michael@0 | 185 | |
michael@0 | 186 | /** |
michael@0 | 187 | * Converts a single domain name label into its ASCII form for DNS lookup. |
michael@0 | 188 | * UTF-8 version of labelToASCII(), same behavior. |
michael@0 | 189 | * |
michael@0 | 190 | * @param label Input domain name label |
michael@0 | 191 | * @param dest Destination byte sink; Flush()ed if successful |
michael@0 | 192 | * @param info Output container of IDNA processing details. |
michael@0 | 193 | * @param errorCode Standard ICU error code. Its input value must |
michael@0 | 194 | * pass the U_SUCCESS() test, or else the function returns |
michael@0 | 195 | * immediately. Check for U_FAILURE() on output or use with |
michael@0 | 196 | * function chaining. (See User Guide for details.) |
michael@0 | 197 | * @return dest |
michael@0 | 198 | * @stable ICU 4.6 |
michael@0 | 199 | */ |
michael@0 | 200 | virtual void |
michael@0 | 201 | labelToASCII_UTF8(const StringPiece &label, ByteSink &dest, |
michael@0 | 202 | IDNAInfo &info, UErrorCode &errorCode) const; |
michael@0 | 203 | |
michael@0 | 204 | /** |
michael@0 | 205 | * Converts a single domain name label into its Unicode form for human-readable display. |
michael@0 | 206 | * UTF-8 version of labelToUnicode(), same behavior. |
michael@0 | 207 | * |
michael@0 | 208 | * @param label Input domain name label |
michael@0 | 209 | * @param dest Destination byte sink; Flush()ed if successful |
michael@0 | 210 | * @param info Output container of IDNA processing details. |
michael@0 | 211 | * @param errorCode Standard ICU error code. Its input value must |
michael@0 | 212 | * pass the U_SUCCESS() test, or else the function returns |
michael@0 | 213 | * immediately. Check for U_FAILURE() on output or use with |
michael@0 | 214 | * function chaining. (See User Guide for details.) |
michael@0 | 215 | * @return dest |
michael@0 | 216 | * @stable ICU 4.6 |
michael@0 | 217 | */ |
michael@0 | 218 | virtual void |
michael@0 | 219 | labelToUnicodeUTF8(const StringPiece &label, ByteSink &dest, |
michael@0 | 220 | IDNAInfo &info, UErrorCode &errorCode) const; |
michael@0 | 221 | |
michael@0 | 222 | /** |
michael@0 | 223 | * Converts a whole domain name into its ASCII form for DNS lookup. |
michael@0 | 224 | * UTF-8 version of nameToASCII(), same behavior. |
michael@0 | 225 | * |
michael@0 | 226 | * @param name Input domain name |
michael@0 | 227 | * @param dest Destination byte sink; Flush()ed if successful |
michael@0 | 228 | * @param info Output container of IDNA processing details. |
michael@0 | 229 | * @param errorCode Standard ICU error code. Its input value must |
michael@0 | 230 | * pass the U_SUCCESS() test, or else the function returns |
michael@0 | 231 | * immediately. Check for U_FAILURE() on output or use with |
michael@0 | 232 | * function chaining. (See User Guide for details.) |
michael@0 | 233 | * @return dest |
michael@0 | 234 | * @stable ICU 4.6 |
michael@0 | 235 | */ |
michael@0 | 236 | virtual void |
michael@0 | 237 | nameToASCII_UTF8(const StringPiece &name, ByteSink &dest, |
michael@0 | 238 | IDNAInfo &info, UErrorCode &errorCode) const; |
michael@0 | 239 | |
michael@0 | 240 | /** |
michael@0 | 241 | * Converts a whole domain name into its Unicode form for human-readable display. |
michael@0 | 242 | * UTF-8 version of nameToUnicode(), same behavior. |
michael@0 | 243 | * |
michael@0 | 244 | * @param name Input domain name |
michael@0 | 245 | * @param dest Destination byte sink; Flush()ed if successful |
michael@0 | 246 | * @param info Output container of IDNA processing details. |
michael@0 | 247 | * @param errorCode Standard ICU error code. Its input value must |
michael@0 | 248 | * pass the U_SUCCESS() test, or else the function returns |
michael@0 | 249 | * immediately. Check for U_FAILURE() on output or use with |
michael@0 | 250 | * function chaining. (See User Guide for details.) |
michael@0 | 251 | * @return dest |
michael@0 | 252 | * @stable ICU 4.6 |
michael@0 | 253 | */ |
michael@0 | 254 | virtual void |
michael@0 | 255 | nameToUnicodeUTF8(const StringPiece &name, ByteSink &dest, |
michael@0 | 256 | IDNAInfo &info, UErrorCode &errorCode) const; |
michael@0 | 257 | }; |
michael@0 | 258 | |
michael@0 | 259 | class UTS46; |
michael@0 | 260 | |
michael@0 | 261 | /** |
michael@0 | 262 | * Output container for IDNA processing errors. |
michael@0 | 263 | * The IDNAInfo class is not suitable for subclassing. |
michael@0 | 264 | * @stable ICU 4.6 |
michael@0 | 265 | */ |
michael@0 | 266 | class U_COMMON_API IDNAInfo : public UMemory { |
michael@0 | 267 | public: |
michael@0 | 268 | /** |
michael@0 | 269 | * Constructor for stack allocation. |
michael@0 | 270 | * @stable ICU 4.6 |
michael@0 | 271 | */ |
michael@0 | 272 | IDNAInfo() : errors(0), labelErrors(0), isTransDiff(FALSE), isBiDi(FALSE), isOkBiDi(TRUE) {} |
michael@0 | 273 | /** |
michael@0 | 274 | * Were there IDNA processing errors? |
michael@0 | 275 | * @return TRUE if there were processing errors |
michael@0 | 276 | * @stable ICU 4.6 |
michael@0 | 277 | */ |
michael@0 | 278 | UBool hasErrors() const { return errors!=0; } |
michael@0 | 279 | /** |
michael@0 | 280 | * Returns a bit set indicating IDNA processing errors. |
michael@0 | 281 | * See UIDNA_ERROR_... constants in uidna.h. |
michael@0 | 282 | * @return bit set of processing errors |
michael@0 | 283 | * @stable ICU 4.6 |
michael@0 | 284 | */ |
michael@0 | 285 | uint32_t getErrors() const { return errors; } |
michael@0 | 286 | /** |
michael@0 | 287 | * Returns TRUE if transitional and nontransitional processing produce different results. |
michael@0 | 288 | * This is the case when the input label or domain name contains |
michael@0 | 289 | * one or more deviation characters outside a Punycode label (see UTS #46). |
michael@0 | 290 | * <ul> |
michael@0 | 291 | * <li>With nontransitional processing, such characters are |
michael@0 | 292 | * copied to the destination string. |
michael@0 | 293 | * <li>With transitional processing, such characters are |
michael@0 | 294 | * mapped (sharp s/sigma) or removed (joiner/nonjoiner). |
michael@0 | 295 | * </ul> |
michael@0 | 296 | * @return TRUE if transitional and nontransitional processing produce different results |
michael@0 | 297 | * @stable ICU 4.6 |
michael@0 | 298 | */ |
michael@0 | 299 | UBool isTransitionalDifferent() const { return isTransDiff; } |
michael@0 | 300 | |
michael@0 | 301 | private: |
michael@0 | 302 | friend class UTS46; |
michael@0 | 303 | |
michael@0 | 304 | IDNAInfo(const IDNAInfo &other); // no copying |
michael@0 | 305 | IDNAInfo &operator=(const IDNAInfo &other); // no copying |
michael@0 | 306 | |
michael@0 | 307 | void reset() { |
michael@0 | 308 | errors=labelErrors=0; |
michael@0 | 309 | isTransDiff=FALSE; |
michael@0 | 310 | isBiDi=FALSE; |
michael@0 | 311 | isOkBiDi=TRUE; |
michael@0 | 312 | } |
michael@0 | 313 | |
michael@0 | 314 | uint32_t errors, labelErrors; |
michael@0 | 315 | UBool isTransDiff; |
michael@0 | 316 | UBool isBiDi; |
michael@0 | 317 | UBool isOkBiDi; |
michael@0 | 318 | }; |
michael@0 | 319 | |
michael@0 | 320 | U_NAMESPACE_END |
michael@0 | 321 | |
michael@0 | 322 | #endif // UCONFIG_NO_IDNA |
michael@0 | 323 | #endif // __IDNA_H__ |