1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/unicode/uidna.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,762 @@ 1.4 +/* 1.5 + ******************************************************************************* 1.6 + * 1.7 + * Copyright (C) 2003-2013, International Business Machines 1.8 + * Corporation and others. All Rights Reserved. 1.9 + * 1.10 + ******************************************************************************* 1.11 + * file name: uidna.h 1.12 + * encoding: US-ASCII 1.13 + * tab size: 8 (not used) 1.14 + * indentation:4 1.15 + * 1.16 + * created on: 2003feb1 1.17 + * created by: Ram Viswanadha 1.18 + */ 1.19 + 1.20 +#ifndef __UIDNA_H__ 1.21 +#define __UIDNA_H__ 1.22 + 1.23 +#include "unicode/utypes.h" 1.24 + 1.25 +#if !UCONFIG_NO_IDNA 1.26 + 1.27 +#include "unicode/localpointer.h" 1.28 +#include "unicode/parseerr.h" 1.29 + 1.30 +/** 1.31 + * \file 1.32 + * \brief C API: Internationalizing Domain Names in Applications (IDNA) 1.33 + * 1.34 + * IDNA2008 is implemented according to UTS #46, see the IDNA C++ class in idna.h. 1.35 + * 1.36 + * The C API functions which do take a UIDNA * service object pointer 1.37 + * implement UTS #46 and IDNA2008. 1.38 + * The C API functions which do not take a service object pointer 1.39 + * implement IDNA2003. 1.40 + */ 1.41 + 1.42 +/* 1.43 + * IDNA option bit set values. 1.44 + */ 1.45 +enum { 1.46 + /** 1.47 + * Default options value: None of the other options are set. 1.48 + * For use in static worker and factory methods. 1.49 + * @stable ICU 2.6 1.50 + */ 1.51 + UIDNA_DEFAULT=0, 1.52 + /** 1.53 + * Option to allow unassigned code points in domain names and labels. 1.54 + * For use in static worker and factory methods. 1.55 + * <p>This option is ignored by the UTS46 implementation. 1.56 + * (UTS #46 disallows unassigned code points.) 1.57 + * @stable ICU 2.6 1.58 + */ 1.59 + UIDNA_ALLOW_UNASSIGNED=1, 1.60 + /** 1.61 + * Option to check whether the input conforms to the STD3 ASCII rules, 1.62 + * for example the restriction of labels to LDH characters 1.63 + * (ASCII Letters, Digits and Hyphen-Minus). 1.64 + * For use in static worker and factory methods. 1.65 + * @stable ICU 2.6 1.66 + */ 1.67 + UIDNA_USE_STD3_RULES=2, 1.68 + /** 1.69 + * IDNA option to check for whether the input conforms to the BiDi rules. 1.70 + * For use in static worker and factory methods. 1.71 + * <p>This option is ignored by the IDNA2003 implementation. 1.72 + * (IDNA2003 always performs a BiDi check.) 1.73 + * @stable ICU 4.6 1.74 + */ 1.75 + UIDNA_CHECK_BIDI=4, 1.76 + /** 1.77 + * IDNA option to check for whether the input conforms to the CONTEXTJ rules. 1.78 + * For use in static worker and factory methods. 1.79 + * <p>This option is ignored by the IDNA2003 implementation. 1.80 + * (The CONTEXTJ check is new in IDNA2008.) 1.81 + * @stable ICU 4.6 1.82 + */ 1.83 + UIDNA_CHECK_CONTEXTJ=8, 1.84 + /** 1.85 + * IDNA option for nontransitional processing in ToASCII(). 1.86 + * For use in static worker and factory methods. 1.87 + * <p>By default, ToASCII() uses transitional processing. 1.88 + * <p>This option is ignored by the IDNA2003 implementation. 1.89 + * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.) 1.90 + * @stable ICU 4.6 1.91 + */ 1.92 + UIDNA_NONTRANSITIONAL_TO_ASCII=0x10, 1.93 + /** 1.94 + * IDNA option for nontransitional processing in ToUnicode(). 1.95 + * For use in static worker and factory methods. 1.96 + * <p>By default, ToUnicode() uses transitional processing. 1.97 + * <p>This option is ignored by the IDNA2003 implementation. 1.98 + * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.) 1.99 + * @stable ICU 4.6 1.100 + */ 1.101 + UIDNA_NONTRANSITIONAL_TO_UNICODE=0x20, 1.102 + /** 1.103 + * IDNA option to check for whether the input conforms to the CONTEXTO rules. 1.104 + * For use in static worker and factory methods. 1.105 + * <p>This option is ignored by the IDNA2003 implementation. 1.106 + * (The CONTEXTO check is new in IDNA2008.) 1.107 + * <p>This is for use by registries for IDNA2008 conformance. 1.108 + * UTS #46 does not require the CONTEXTO check. 1.109 + * @stable ICU 49 1.110 + */ 1.111 + UIDNA_CHECK_CONTEXTO=0x40 1.112 +}; 1.113 + 1.114 +/** 1.115 + * Opaque C service object type for the new IDNA API. 1.116 + * @stable ICU 4.6 1.117 + */ 1.118 +struct UIDNA; 1.119 +typedef struct UIDNA UIDNA; /**< C typedef for struct UIDNA. @stable ICU 4.6 */ 1.120 + 1.121 +/** 1.122 + * Returns a UIDNA instance which implements UTS #46. 1.123 + * Returns an unmodifiable instance, owned by the caller. 1.124 + * Cache it for multiple operations, and uidna_close() it when done. 1.125 + * The instance is thread-safe, that is, it can be used concurrently. 1.126 + * 1.127 + * For details about the UTS #46 implementation see the IDNA C++ class in idna.h. 1.128 + * 1.129 + * @param options Bit set to modify the processing and error checking. 1.130 + * See option bit set values in uidna.h. 1.131 + * @param pErrorCode Standard ICU error code. Its input value must 1.132 + * pass the U_SUCCESS() test, or else the function returns 1.133 + * immediately. Check for U_FAILURE() on output or use with 1.134 + * function chaining. (See User Guide for details.) 1.135 + * @return the UTS #46 UIDNA instance, if successful 1.136 + * @stable ICU 4.6 1.137 + */ 1.138 +U_STABLE UIDNA * U_EXPORT2 1.139 +uidna_openUTS46(uint32_t options, UErrorCode *pErrorCode); 1.140 + 1.141 +/** 1.142 + * Closes a UIDNA instance. 1.143 + * @param idna UIDNA instance to be closed 1.144 + * @stable ICU 4.6 1.145 + */ 1.146 +U_STABLE void U_EXPORT2 1.147 +uidna_close(UIDNA *idna); 1.148 + 1.149 +#if U_SHOW_CPLUSPLUS_API 1.150 + 1.151 +U_NAMESPACE_BEGIN 1.152 + 1.153 +/** 1.154 + * \class LocalUIDNAPointer 1.155 + * "Smart pointer" class, closes a UIDNA via uidna_close(). 1.156 + * For most methods see the LocalPointerBase base class. 1.157 + * 1.158 + * @see LocalPointerBase 1.159 + * @see LocalPointer 1.160 + * @stable ICU 4.6 1.161 + */ 1.162 +U_DEFINE_LOCAL_OPEN_POINTER(LocalUIDNAPointer, UIDNA, uidna_close); 1.163 + 1.164 +U_NAMESPACE_END 1.165 + 1.166 +#endif 1.167 + 1.168 +/** 1.169 + * Output container for IDNA processing errors. 1.170 + * Initialize with UIDNA_INFO_INITIALIZER: 1.171 + * \code 1.172 + * UIDNAInfo info = UIDNA_INFO_INITIALIZER; 1.173 + * int32_t length = uidna_nameToASCII(..., &info, &errorCode); 1.174 + * if(U_SUCCESS(errorCode) && info.errors!=0) { ... } 1.175 + * \endcode 1.176 + * @stable ICU 4.6 1.177 + */ 1.178 +typedef struct UIDNAInfo { 1.179 + /** sizeof(UIDNAInfo) @stable ICU 4.6 */ 1.180 + int16_t size; 1.181 + /** 1.182 + * Set to TRUE if transitional and nontransitional processing produce different results. 1.183 + * For details see C++ IDNAInfo::isTransitionalDifferent(). 1.184 + * @stable ICU 4.6 1.185 + */ 1.186 + UBool isTransitionalDifferent; 1.187 + UBool reservedB3; /**< Reserved field, do not use. @internal */ 1.188 + /** 1.189 + * Bit set indicating IDNA processing errors. 0 if no errors. 1.190 + * See UIDNA_ERROR_... constants. 1.191 + * @stable ICU 4.6 1.192 + */ 1.193 + uint32_t errors; 1.194 + int32_t reservedI2; /**< Reserved field, do not use. @internal */ 1.195 + int32_t reservedI3; /**< Reserved field, do not use. @internal */ 1.196 +} UIDNAInfo; 1.197 + 1.198 +/** 1.199 + * Static initializer for a UIDNAInfo struct. 1.200 + * @stable ICU 4.6 1.201 + */ 1.202 +#define UIDNA_INFO_INITIALIZER { \ 1.203 + (int16_t)sizeof(UIDNAInfo), \ 1.204 + FALSE, FALSE, \ 1.205 + 0, 0, 0 } 1.206 + 1.207 +/** 1.208 + * Converts a single domain name label into its ASCII form for DNS lookup. 1.209 + * If any processing step fails, then pInfo->errors will be non-zero and 1.210 + * the result might not be an ASCII string. 1.211 + * The label might be modified according to the types of errors. 1.212 + * Labels with severe errors will be left in (or turned into) their Unicode form. 1.213 + * 1.214 + * The UErrorCode indicates an error only in exceptional cases, 1.215 + * such as a U_MEMORY_ALLOCATION_ERROR. 1.216 + * 1.217 + * @param idna UIDNA instance 1.218 + * @param label Input domain name label 1.219 + * @param length Label length, or -1 if NUL-terminated 1.220 + * @param dest Destination string buffer 1.221 + * @param capacity Destination buffer capacity 1.222 + * @param pInfo Output container of IDNA processing details. 1.223 + * @param pErrorCode Standard ICU error code. Its input value must 1.224 + * pass the U_SUCCESS() test, or else the function returns 1.225 + * immediately. Check for U_FAILURE() on output or use with 1.226 + * function chaining. (See User Guide for details.) 1.227 + * @return destination string length 1.228 + * @stable ICU 4.6 1.229 + */ 1.230 +U_STABLE int32_t U_EXPORT2 1.231 +uidna_labelToASCII(const UIDNA *idna, 1.232 + const UChar *label, int32_t length, 1.233 + UChar *dest, int32_t capacity, 1.234 + UIDNAInfo *pInfo, UErrorCode *pErrorCode); 1.235 + 1.236 +/** 1.237 + * Converts a single domain name label into its Unicode form for human-readable display. 1.238 + * If any processing step fails, then pInfo->errors will be non-zero. 1.239 + * The label might be modified according to the types of errors. 1.240 + * 1.241 + * The UErrorCode indicates an error only in exceptional cases, 1.242 + * such as a U_MEMORY_ALLOCATION_ERROR. 1.243 + * 1.244 + * @param idna UIDNA instance 1.245 + * @param label Input domain name label 1.246 + * @param length Label length, or -1 if NUL-terminated 1.247 + * @param dest Destination string buffer 1.248 + * @param capacity Destination buffer capacity 1.249 + * @param pInfo Output container of IDNA processing details. 1.250 + * @param pErrorCode Standard ICU error code. Its input value must 1.251 + * pass the U_SUCCESS() test, or else the function returns 1.252 + * immediately. Check for U_FAILURE() on output or use with 1.253 + * function chaining. (See User Guide for details.) 1.254 + * @return destination string length 1.255 + * @stable ICU 4.6 1.256 + */ 1.257 +U_STABLE int32_t U_EXPORT2 1.258 +uidna_labelToUnicode(const UIDNA *idna, 1.259 + const UChar *label, int32_t length, 1.260 + UChar *dest, int32_t capacity, 1.261 + UIDNAInfo *pInfo, UErrorCode *pErrorCode); 1.262 + 1.263 +/** 1.264 + * Converts a whole domain name into its ASCII form for DNS lookup. 1.265 + * If any processing step fails, then pInfo->errors will be non-zero and 1.266 + * the result might not be an ASCII string. 1.267 + * The domain name might be modified according to the types of errors. 1.268 + * Labels with severe errors will be left in (or turned into) their Unicode form. 1.269 + * 1.270 + * The UErrorCode indicates an error only in exceptional cases, 1.271 + * such as a U_MEMORY_ALLOCATION_ERROR. 1.272 + * 1.273 + * @param idna UIDNA instance 1.274 + * @param name Input domain name 1.275 + * @param length Domain name length, or -1 if NUL-terminated 1.276 + * @param dest Destination string buffer 1.277 + * @param capacity Destination buffer capacity 1.278 + * @param pInfo Output container of IDNA processing details. 1.279 + * @param pErrorCode Standard ICU error code. Its input value must 1.280 + * pass the U_SUCCESS() test, or else the function returns 1.281 + * immediately. Check for U_FAILURE() on output or use with 1.282 + * function chaining. (See User Guide for details.) 1.283 + * @return destination string length 1.284 + * @stable ICU 4.6 1.285 + */ 1.286 +U_STABLE int32_t U_EXPORT2 1.287 +uidna_nameToASCII(const UIDNA *idna, 1.288 + const UChar *name, int32_t length, 1.289 + UChar *dest, int32_t capacity, 1.290 + UIDNAInfo *pInfo, UErrorCode *pErrorCode); 1.291 + 1.292 +/** 1.293 + * Converts a whole domain name into its Unicode form for human-readable display. 1.294 + * If any processing step fails, then pInfo->errors will be non-zero. 1.295 + * The domain name might be modified according to the types of errors. 1.296 + * 1.297 + * The UErrorCode indicates an error only in exceptional cases, 1.298 + * such as a U_MEMORY_ALLOCATION_ERROR. 1.299 + * 1.300 + * @param idna UIDNA instance 1.301 + * @param name Input domain name 1.302 + * @param length Domain name length, or -1 if NUL-terminated 1.303 + * @param dest Destination string buffer 1.304 + * @param capacity Destination buffer capacity 1.305 + * @param pInfo Output container of IDNA processing details. 1.306 + * @param pErrorCode Standard ICU error code. Its input value must 1.307 + * pass the U_SUCCESS() test, or else the function returns 1.308 + * immediately. Check for U_FAILURE() on output or use with 1.309 + * function chaining. (See User Guide for details.) 1.310 + * @return destination string length 1.311 + * @stable ICU 4.6 1.312 + */ 1.313 +U_STABLE int32_t U_EXPORT2 1.314 +uidna_nameToUnicode(const UIDNA *idna, 1.315 + const UChar *name, int32_t length, 1.316 + UChar *dest, int32_t capacity, 1.317 + UIDNAInfo *pInfo, UErrorCode *pErrorCode); 1.318 + 1.319 +/* UTF-8 versions of the processing methods --------------------------------- */ 1.320 + 1.321 +/** 1.322 + * Converts a single domain name label into its ASCII form for DNS lookup. 1.323 + * UTF-8 version of uidna_labelToASCII(), same behavior. 1.324 + * 1.325 + * @param idna UIDNA instance 1.326 + * @param label Input domain name label 1.327 + * @param length Label length, or -1 if NUL-terminated 1.328 + * @param dest Destination string buffer 1.329 + * @param capacity Destination buffer capacity 1.330 + * @param pInfo Output container of IDNA processing details. 1.331 + * @param pErrorCode Standard ICU error code. Its input value must 1.332 + * pass the U_SUCCESS() test, or else the function returns 1.333 + * immediately. Check for U_FAILURE() on output or use with 1.334 + * function chaining. (See User Guide for details.) 1.335 + * @return destination string length 1.336 + * @stable ICU 4.6 1.337 + */ 1.338 +U_STABLE int32_t U_EXPORT2 1.339 +uidna_labelToASCII_UTF8(const UIDNA *idna, 1.340 + const char *label, int32_t length, 1.341 + char *dest, int32_t capacity, 1.342 + UIDNAInfo *pInfo, UErrorCode *pErrorCode); 1.343 + 1.344 +/** 1.345 + * Converts a single domain name label into its Unicode form for human-readable display. 1.346 + * UTF-8 version of uidna_labelToUnicode(), same behavior. 1.347 + * 1.348 + * @param idna UIDNA instance 1.349 + * @param label Input domain name label 1.350 + * @param length Label length, or -1 if NUL-terminated 1.351 + * @param dest Destination string buffer 1.352 + * @param capacity Destination buffer capacity 1.353 + * @param pInfo Output container of IDNA processing details. 1.354 + * @param pErrorCode Standard ICU error code. Its input value must 1.355 + * pass the U_SUCCESS() test, or else the function returns 1.356 + * immediately. Check for U_FAILURE() on output or use with 1.357 + * function chaining. (See User Guide for details.) 1.358 + * @return destination string length 1.359 + * @stable ICU 4.6 1.360 + */ 1.361 +U_STABLE int32_t U_EXPORT2 1.362 +uidna_labelToUnicodeUTF8(const UIDNA *idna, 1.363 + const char *label, int32_t length, 1.364 + char *dest, int32_t capacity, 1.365 + UIDNAInfo *pInfo, UErrorCode *pErrorCode); 1.366 + 1.367 +/** 1.368 + * Converts a whole domain name into its ASCII form for DNS lookup. 1.369 + * UTF-8 version of uidna_nameToASCII(), same behavior. 1.370 + * 1.371 + * @param idna UIDNA instance 1.372 + * @param name Input domain name 1.373 + * @param length Domain name length, or -1 if NUL-terminated 1.374 + * @param dest Destination string buffer 1.375 + * @param capacity Destination buffer capacity 1.376 + * @param pInfo Output container of IDNA processing details. 1.377 + * @param pErrorCode Standard ICU error code. Its input value must 1.378 + * pass the U_SUCCESS() test, or else the function returns 1.379 + * immediately. Check for U_FAILURE() on output or use with 1.380 + * function chaining. (See User Guide for details.) 1.381 + * @return destination string length 1.382 + * @stable ICU 4.6 1.383 + */ 1.384 +U_STABLE int32_t U_EXPORT2 1.385 +uidna_nameToASCII_UTF8(const UIDNA *idna, 1.386 + const char *name, int32_t length, 1.387 + char *dest, int32_t capacity, 1.388 + UIDNAInfo *pInfo, UErrorCode *pErrorCode); 1.389 + 1.390 +/** 1.391 + * Converts a whole domain name into its Unicode form for human-readable display. 1.392 + * UTF-8 version of uidna_nameToUnicode(), same behavior. 1.393 + * 1.394 + * @param idna UIDNA instance 1.395 + * @param name Input domain name 1.396 + * @param length Domain name length, or -1 if NUL-terminated 1.397 + * @param dest Destination string buffer 1.398 + * @param capacity Destination buffer capacity 1.399 + * @param pInfo Output container of IDNA processing details. 1.400 + * @param pErrorCode Standard ICU error code. Its input value must 1.401 + * pass the U_SUCCESS() test, or else the function returns 1.402 + * immediately. Check for U_FAILURE() on output or use with 1.403 + * function chaining. (See User Guide for details.) 1.404 + * @return destination string length 1.405 + * @stable ICU 4.6 1.406 + */ 1.407 +U_STABLE int32_t U_EXPORT2 1.408 +uidna_nameToUnicodeUTF8(const UIDNA *idna, 1.409 + const char *name, int32_t length, 1.410 + char *dest, int32_t capacity, 1.411 + UIDNAInfo *pInfo, UErrorCode *pErrorCode); 1.412 + 1.413 +/* 1.414 + * IDNA error bit set values. 1.415 + * When a domain name or label fails a processing step or does not meet the 1.416 + * validity criteria, then one or more of these error bits are set. 1.417 + */ 1.418 +enum { 1.419 + /** 1.420 + * A non-final domain name label (or the whole domain name) is empty. 1.421 + * @stable ICU 4.6 1.422 + */ 1.423 + UIDNA_ERROR_EMPTY_LABEL=1, 1.424 + /** 1.425 + * A domain name label is longer than 63 bytes. 1.426 + * (See STD13/RFC1034 3.1. Name space specifications and terminology.) 1.427 + * This is only checked in ToASCII operations, and only if the output label is all-ASCII. 1.428 + * @stable ICU 4.6 1.429 + */ 1.430 + UIDNA_ERROR_LABEL_TOO_LONG=2, 1.431 + /** 1.432 + * A domain name is longer than 255 bytes in its storage form. 1.433 + * (See STD13/RFC1034 3.1. Name space specifications and terminology.) 1.434 + * This is only checked in ToASCII operations, and only if the output domain name is all-ASCII. 1.435 + * @stable ICU 4.6 1.436 + */ 1.437 + UIDNA_ERROR_DOMAIN_NAME_TOO_LONG=4, 1.438 + /** 1.439 + * A label starts with a hyphen-minus ('-'). 1.440 + * @stable ICU 4.6 1.441 + */ 1.442 + UIDNA_ERROR_LEADING_HYPHEN=8, 1.443 + /** 1.444 + * A label ends with a hyphen-minus ('-'). 1.445 + * @stable ICU 4.6 1.446 + */ 1.447 + UIDNA_ERROR_TRAILING_HYPHEN=0x10, 1.448 + /** 1.449 + * A label contains hyphen-minus ('-') in the third and fourth positions. 1.450 + * @stable ICU 4.6 1.451 + */ 1.452 + UIDNA_ERROR_HYPHEN_3_4=0x20, 1.453 + /** 1.454 + * A label starts with a combining mark. 1.455 + * @stable ICU 4.6 1.456 + */ 1.457 + UIDNA_ERROR_LEADING_COMBINING_MARK=0x40, 1.458 + /** 1.459 + * A label or domain name contains disallowed characters. 1.460 + * @stable ICU 4.6 1.461 + */ 1.462 + UIDNA_ERROR_DISALLOWED=0x80, 1.463 + /** 1.464 + * A label starts with "xn--" but does not contain valid Punycode. 1.465 + * That is, an xn-- label failed Punycode decoding. 1.466 + * @stable ICU 4.6 1.467 + */ 1.468 + UIDNA_ERROR_PUNYCODE=0x100, 1.469 + /** 1.470 + * A label contains a dot=full stop. 1.471 + * This can occur in an input string for a single-label function. 1.472 + * @stable ICU 4.6 1.473 + */ 1.474 + UIDNA_ERROR_LABEL_HAS_DOT=0x200, 1.475 + /** 1.476 + * An ACE label does not contain a valid label string. 1.477 + * The label was successfully ACE (Punycode) decoded but the resulting 1.478 + * string had severe validation errors. For example, 1.479 + * it might contain characters that are not allowed in ACE labels, 1.480 + * or it might not be normalized. 1.481 + * @stable ICU 4.6 1.482 + */ 1.483 + UIDNA_ERROR_INVALID_ACE_LABEL=0x400, 1.484 + /** 1.485 + * A label does not meet the IDNA BiDi requirements (for right-to-left characters). 1.486 + * @stable ICU 4.6 1.487 + */ 1.488 + UIDNA_ERROR_BIDI=0x800, 1.489 + /** 1.490 + * A label does not meet the IDNA CONTEXTJ requirements. 1.491 + * @stable ICU 4.6 1.492 + */ 1.493 + UIDNA_ERROR_CONTEXTJ=0x1000, 1.494 + /** 1.495 + * A label does not meet the IDNA CONTEXTO requirements for punctuation characters. 1.496 + * Some punctuation characters "Would otherwise have been DISALLOWED" 1.497 + * but are allowed in certain contexts. (RFC 5892) 1.498 + * @stable ICU 49 1.499 + */ 1.500 + UIDNA_ERROR_CONTEXTO_PUNCTUATION=0x2000, 1.501 + /** 1.502 + * A label does not meet the IDNA CONTEXTO requirements for digits. 1.503 + * Arabic-Indic Digits (U+066x) must not be mixed with Extended Arabic-Indic Digits (U+06Fx). 1.504 + * @stable ICU 49 1.505 + */ 1.506 + UIDNA_ERROR_CONTEXTO_DIGITS=0x4000 1.507 +}; 1.508 + 1.509 +/* IDNA2003 API ------------------------------------------------------------- */ 1.510 + 1.511 +/** 1.512 + * IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC. 1.513 + * This operation is done on <b>single labels</b> before sending it to something that expects 1.514 + * ASCII names. A label is an individual part of a domain name. Labels are usually 1.515 + * separated by dots; e.g. "www.example.com" is composed of 3 labels "www","example", and "com". 1.516 + * 1.517 + * IDNA2003 API Overview: 1.518 + * 1.519 + * The uidna_ API implements the IDNA protocol as defined in the IDNA RFC 1.520 + * (http://www.ietf.org/rfc/rfc3490.txt). 1.521 + * The RFC defines 2 operations: ToASCII and ToUnicode. Domain name labels 1.522 + * containing non-ASCII code points are processed by the 1.523 + * ToASCII operation before passing it to resolver libraries. Domain names 1.524 + * that are obtained from resolver libraries are processed by the 1.525 + * ToUnicode operation before displaying the domain name to the user. 1.526 + * IDNA requires that implementations process input strings with Nameprep 1.527 + * (http://www.ietf.org/rfc/rfc3491.txt), 1.528 + * which is a profile of Stringprep (http://www.ietf.org/rfc/rfc3454.txt), 1.529 + * and then with Punycode (http://www.ietf.org/rfc/rfc3492.txt). 1.530 + * Implementations of IDNA MUST fully implement Nameprep and Punycode; 1.531 + * neither Nameprep nor Punycode are optional. 1.532 + * The input and output of ToASCII and ToUnicode operations are Unicode 1.533 + * and are designed to be chainable, i.e., applying ToASCII or ToUnicode operations 1.534 + * multiple times to an input string will yield the same result as applying the operation 1.535 + * once. 1.536 + * ToUnicode(ToUnicode(ToUnicode...(ToUnicode(string)))) == ToUnicode(string) 1.537 + * ToASCII(ToASCII(ToASCII...(ToASCII(string))) == ToASCII(string). 1.538 + * 1.539 + * @param src Input UChar array containing label in Unicode. 1.540 + * @param srcLength Number of UChars in src, or -1 if NUL-terminated. 1.541 + * @param dest Output UChar array with ASCII (ACE encoded) label. 1.542 + * @param destCapacity Size of dest. 1.543 + * @param options A bit set of options: 1.544 + * 1.545 + * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points 1.546 + * and do not use STD3 ASCII rules 1.547 + * If unassigned code points are found the operation fails with 1.548 + * U_UNASSIGNED_ERROR error code. 1.549 + * 1.550 + * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations 1.551 + * If this option is set, the unassigned code points are in the input 1.552 + * are treated as normal Unicode code points. 1.553 + * 1.554 + * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 1.555 + * If this option is set and the input does not satisfy STD3 rules, 1.556 + * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR 1.557 + * 1.558 + * @param parseError Pointer to UParseError struct to receive information on position 1.559 + * of error if an error is encountered. Can be NULL. 1.560 + * @param status ICU in/out error code parameter. 1.561 + * U_INVALID_CHAR_FOUND if src contains 1.562 + * unmatched single surrogates. 1.563 + * U_INDEX_OUTOFBOUNDS_ERROR if src contains 1.564 + * too many code points. 1.565 + * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough 1.566 + * @return The length of the result string, if successful - or in case of a buffer overflow, 1.567 + * in which case it will be greater than destCapacity. 1.568 + * @stable ICU 2.6 1.569 + */ 1.570 +U_STABLE int32_t U_EXPORT2 1.571 +uidna_toASCII(const UChar* src, int32_t srcLength, 1.572 + UChar* dest, int32_t destCapacity, 1.573 + int32_t options, 1.574 + UParseError* parseError, 1.575 + UErrorCode* status); 1.576 + 1.577 + 1.578 +/** 1.579 + * IDNA2003: This function implements the ToUnicode operation as defined in the IDNA RFC. 1.580 + * This operation is done on <b>single labels</b> before sending it to something that expects 1.581 + * Unicode names. A label is an individual part of a domain name. Labels are usually 1.582 + * separated by dots; for e.g. "www.example.com" is composed of 3 labels "www","example", and "com". 1.583 + * 1.584 + * @param src Input UChar array containing ASCII (ACE encoded) label. 1.585 + * @param srcLength Number of UChars in src, or -1 if NUL-terminated. 1.586 + * @param dest Output Converted UChar array containing Unicode equivalent of label. 1.587 + * @param destCapacity Size of dest. 1.588 + * @param options A bit set of options: 1.589 + * 1.590 + * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points 1.591 + * and do not use STD3 ASCII rules 1.592 + * If unassigned code points are found the operation fails with 1.593 + * U_UNASSIGNED_ERROR error code. 1.594 + * 1.595 + * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations 1.596 + * If this option is set, the unassigned code points are in the input 1.597 + * are treated as normal Unicode code points. <b> Note: </b> This option is 1.598 + * required on toUnicode operation because the RFC mandates 1.599 + * verification of decoded ACE input by applying toASCII and comparing 1.600 + * its output with source 1.601 + * 1.602 + * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 1.603 + * If this option is set and the input does not satisfy STD3 rules, 1.604 + * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR 1.605 + * 1.606 + * @param parseError Pointer to UParseError struct to receive information on position 1.607 + * of error if an error is encountered. Can be NULL. 1.608 + * @param status ICU in/out error code parameter. 1.609 + * U_INVALID_CHAR_FOUND if src contains 1.610 + * unmatched single surrogates. 1.611 + * U_INDEX_OUTOFBOUNDS_ERROR if src contains 1.612 + * too many code points. 1.613 + * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough 1.614 + * @return The length of the result string, if successful - or in case of a buffer overflow, 1.615 + * in which case it will be greater than destCapacity. 1.616 + * @stable ICU 2.6 1.617 + */ 1.618 +U_STABLE int32_t U_EXPORT2 1.619 +uidna_toUnicode(const UChar* src, int32_t srcLength, 1.620 + UChar* dest, int32_t destCapacity, 1.621 + int32_t options, 1.622 + UParseError* parseError, 1.623 + UErrorCode* status); 1.624 + 1.625 + 1.626 +/** 1.627 + * IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC. 1.628 + * This operation is done on complete domain names, e.g: "www.example.com". 1.629 + * It is important to note that this operation can fail. If it fails, then the input 1.630 + * domain name cannot be used as an Internationalized Domain Name and the application 1.631 + * should have methods defined to deal with the failure. 1.632 + * 1.633 + * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name 1.634 + * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each, 1.635 + * and then convert. This function does not offer that level of granularity. The options once 1.636 + * set will apply to all labels in the domain name 1.637 + * 1.638 + * @param src Input UChar array containing IDN in Unicode. 1.639 + * @param srcLength Number of UChars in src, or -1 if NUL-terminated. 1.640 + * @param dest Output UChar array with ASCII (ACE encoded) IDN. 1.641 + * @param destCapacity Size of dest. 1.642 + * @param options A bit set of options: 1.643 + * 1.644 + * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points 1.645 + * and do not use STD3 ASCII rules 1.646 + * If unassigned code points are found the operation fails with 1.647 + * U_UNASSIGNED_CODE_POINT_FOUND error code. 1.648 + * 1.649 + * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations 1.650 + * If this option is set, the unassigned code points are in the input 1.651 + * are treated as normal Unicode code points. 1.652 + * 1.653 + * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 1.654 + * If this option is set and the input does not satisfy STD3 rules, 1.655 + * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR 1.656 + * 1.657 + * @param parseError Pointer to UParseError struct to receive information on position 1.658 + * of error if an error is encountered. Can be NULL. 1.659 + * @param status ICU in/out error code parameter. 1.660 + * U_INVALID_CHAR_FOUND if src contains 1.661 + * unmatched single surrogates. 1.662 + * U_INDEX_OUTOFBOUNDS_ERROR if src contains 1.663 + * too many code points. 1.664 + * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough 1.665 + * @return The length of the result string, if successful - or in case of a buffer overflow, 1.666 + * in which case it will be greater than destCapacity. 1.667 + * @stable ICU 2.6 1.668 + */ 1.669 +U_STABLE int32_t U_EXPORT2 1.670 +uidna_IDNToASCII( const UChar* src, int32_t srcLength, 1.671 + UChar* dest, int32_t destCapacity, 1.672 + int32_t options, 1.673 + UParseError* parseError, 1.674 + UErrorCode* status); 1.675 + 1.676 +/** 1.677 + * IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC. 1.678 + * This operation is done on complete domain names, e.g: "www.example.com". 1.679 + * 1.680 + * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name 1.681 + * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each, 1.682 + * and then convert. This function does not offer that level of granularity. The options once 1.683 + * set will apply to all labels in the domain name 1.684 + * 1.685 + * @param src Input UChar array containing IDN in ASCII (ACE encoded) form. 1.686 + * @param srcLength Number of UChars in src, or -1 if NUL-terminated. 1.687 + * @param dest Output UChar array containing Unicode equivalent of source IDN. 1.688 + * @param destCapacity Size of dest. 1.689 + * @param options A bit set of options: 1.690 + * 1.691 + * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points 1.692 + * and do not use STD3 ASCII rules 1.693 + * If unassigned code points are found the operation fails with 1.694 + * U_UNASSIGNED_CODE_POINT_FOUND error code. 1.695 + * 1.696 + * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations 1.697 + * If this option is set, the unassigned code points are in the input 1.698 + * are treated as normal Unicode code points. 1.699 + * 1.700 + * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 1.701 + * If this option is set and the input does not satisfy STD3 rules, 1.702 + * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR 1.703 + * 1.704 + * @param parseError Pointer to UParseError struct to receive information on position 1.705 + * of error if an error is encountered. Can be NULL. 1.706 + * @param status ICU in/out error code parameter. 1.707 + * U_INVALID_CHAR_FOUND if src contains 1.708 + * unmatched single surrogates. 1.709 + * U_INDEX_OUTOFBOUNDS_ERROR if src contains 1.710 + * too many code points. 1.711 + * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough 1.712 + * @return The length of the result string, if successful - or in case of a buffer overflow, 1.713 + * in which case it will be greater than destCapacity. 1.714 + * @stable ICU 2.6 1.715 + */ 1.716 +U_STABLE int32_t U_EXPORT2 1.717 +uidna_IDNToUnicode( const UChar* src, int32_t srcLength, 1.718 + UChar* dest, int32_t destCapacity, 1.719 + int32_t options, 1.720 + UParseError* parseError, 1.721 + UErrorCode* status); 1.722 + 1.723 +/** 1.724 + * IDNA2003: Compare two IDN strings for equivalence. 1.725 + * This function splits the domain names into labels and compares them. 1.726 + * According to IDN RFC, whenever two labels are compared, they are 1.727 + * considered equal if and only if their ASCII forms (obtained by 1.728 + * applying toASCII) match using an case-insensitive ASCII comparison. 1.729 + * Two domain names are considered a match if and only if all labels 1.730 + * match regardless of whether label separators match. 1.731 + * 1.732 + * @param s1 First source string. 1.733 + * @param length1 Length of first source string, or -1 if NUL-terminated. 1.734 + * 1.735 + * @param s2 Second source string. 1.736 + * @param length2 Length of second source string, or -1 if NUL-terminated. 1.737 + * @param options A bit set of options: 1.738 + * 1.739 + * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points 1.740 + * and do not use STD3 ASCII rules 1.741 + * If unassigned code points are found the operation fails with 1.742 + * U_UNASSIGNED_CODE_POINT_FOUND error code. 1.743 + * 1.744 + * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations 1.745 + * If this option is set, the unassigned code points are in the input 1.746 + * are treated as normal Unicode code points. 1.747 + * 1.748 + * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 1.749 + * If this option is set and the input does not satisfy STD3 rules, 1.750 + * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR 1.751 + * 1.752 + * @param status ICU error code in/out parameter. 1.753 + * Must fulfill U_SUCCESS before the function call. 1.754 + * @return <0 or 0 or >0 as usual for string comparisons 1.755 + * @stable ICU 2.6 1.756 + */ 1.757 +U_STABLE int32_t U_EXPORT2 1.758 +uidna_compare( const UChar *s1, int32_t length1, 1.759 + const UChar *s2, int32_t length2, 1.760 + int32_t options, 1.761 + UErrorCode* status); 1.762 + 1.763 +#endif /* #if !UCONFIG_NO_IDNA */ 1.764 + 1.765 +#endif