intl/icu/source/common/unicode/uidna.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /*
michael@0 2 *******************************************************************************
michael@0 3 *
michael@0 4 * Copyright (C) 2003-2013, International Business Machines
michael@0 5 * Corporation and others. All Rights Reserved.
michael@0 6 *
michael@0 7 *******************************************************************************
michael@0 8 * file name: uidna.h
michael@0 9 * encoding: US-ASCII
michael@0 10 * tab size: 8 (not used)
michael@0 11 * indentation:4
michael@0 12 *
michael@0 13 * created on: 2003feb1
michael@0 14 * created by: Ram Viswanadha
michael@0 15 */
michael@0 16
michael@0 17 #ifndef __UIDNA_H__
michael@0 18 #define __UIDNA_H__
michael@0 19
michael@0 20 #include "unicode/utypes.h"
michael@0 21
michael@0 22 #if !UCONFIG_NO_IDNA
michael@0 23
michael@0 24 #include "unicode/localpointer.h"
michael@0 25 #include "unicode/parseerr.h"
michael@0 26
michael@0 27 /**
michael@0 28 * \file
michael@0 29 * \brief C API: Internationalizing Domain Names in Applications (IDNA)
michael@0 30 *
michael@0 31 * IDNA2008 is implemented according to UTS #46, see the IDNA C++ class in idna.h.
michael@0 32 *
michael@0 33 * The C API functions which do take a UIDNA * service object pointer
michael@0 34 * implement UTS #46 and IDNA2008.
michael@0 35 * The C API functions which do not take a service object pointer
michael@0 36 * implement IDNA2003.
michael@0 37 */
michael@0 38
michael@0 39 /*
michael@0 40 * IDNA option bit set values.
michael@0 41 */
michael@0 42 enum {
michael@0 43 /**
michael@0 44 * Default options value: None of the other options are set.
michael@0 45 * For use in static worker and factory methods.
michael@0 46 * @stable ICU 2.6
michael@0 47 */
michael@0 48 UIDNA_DEFAULT=0,
michael@0 49 /**
michael@0 50 * Option to allow unassigned code points in domain names and labels.
michael@0 51 * For use in static worker and factory methods.
michael@0 52 * <p>This option is ignored by the UTS46 implementation.
michael@0 53 * (UTS #46 disallows unassigned code points.)
michael@0 54 * @stable ICU 2.6
michael@0 55 */
michael@0 56 UIDNA_ALLOW_UNASSIGNED=1,
michael@0 57 /**
michael@0 58 * Option to check whether the input conforms to the STD3 ASCII rules,
michael@0 59 * for example the restriction of labels to LDH characters
michael@0 60 * (ASCII Letters, Digits and Hyphen-Minus).
michael@0 61 * For use in static worker and factory methods.
michael@0 62 * @stable ICU 2.6
michael@0 63 */
michael@0 64 UIDNA_USE_STD3_RULES=2,
michael@0 65 /**
michael@0 66 * IDNA option to check for whether the input conforms to the BiDi rules.
michael@0 67 * For use in static worker and factory methods.
michael@0 68 * <p>This option is ignored by the IDNA2003 implementation.
michael@0 69 * (IDNA2003 always performs a BiDi check.)
michael@0 70 * @stable ICU 4.6
michael@0 71 */
michael@0 72 UIDNA_CHECK_BIDI=4,
michael@0 73 /**
michael@0 74 * IDNA option to check for whether the input conforms to the CONTEXTJ rules.
michael@0 75 * For use in static worker and factory methods.
michael@0 76 * <p>This option is ignored by the IDNA2003 implementation.
michael@0 77 * (The CONTEXTJ check is new in IDNA2008.)
michael@0 78 * @stable ICU 4.6
michael@0 79 */
michael@0 80 UIDNA_CHECK_CONTEXTJ=8,
michael@0 81 /**
michael@0 82 * IDNA option for nontransitional processing in ToASCII().
michael@0 83 * For use in static worker and factory methods.
michael@0 84 * <p>By default, ToASCII() uses transitional processing.
michael@0 85 * <p>This option is ignored by the IDNA2003 implementation.
michael@0 86 * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
michael@0 87 * @stable ICU 4.6
michael@0 88 */
michael@0 89 UIDNA_NONTRANSITIONAL_TO_ASCII=0x10,
michael@0 90 /**
michael@0 91 * IDNA option for nontransitional processing in ToUnicode().
michael@0 92 * For use in static worker and factory methods.
michael@0 93 * <p>By default, ToUnicode() uses transitional processing.
michael@0 94 * <p>This option is ignored by the IDNA2003 implementation.
michael@0 95 * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
michael@0 96 * @stable ICU 4.6
michael@0 97 */
michael@0 98 UIDNA_NONTRANSITIONAL_TO_UNICODE=0x20,
michael@0 99 /**
michael@0 100 * IDNA option to check for whether the input conforms to the CONTEXTO rules.
michael@0 101 * For use in static worker and factory methods.
michael@0 102 * <p>This option is ignored by the IDNA2003 implementation.
michael@0 103 * (The CONTEXTO check is new in IDNA2008.)
michael@0 104 * <p>This is for use by registries for IDNA2008 conformance.
michael@0 105 * UTS #46 does not require the CONTEXTO check.
michael@0 106 * @stable ICU 49
michael@0 107 */
michael@0 108 UIDNA_CHECK_CONTEXTO=0x40
michael@0 109 };
michael@0 110
michael@0 111 /**
michael@0 112 * Opaque C service object type for the new IDNA API.
michael@0 113 * @stable ICU 4.6
michael@0 114 */
michael@0 115 struct UIDNA;
michael@0 116 typedef struct UIDNA UIDNA; /**< C typedef for struct UIDNA. @stable ICU 4.6 */
michael@0 117
michael@0 118 /**
michael@0 119 * Returns a UIDNA instance which implements UTS #46.
michael@0 120 * Returns an unmodifiable instance, owned by the caller.
michael@0 121 * Cache it for multiple operations, and uidna_close() it when done.
michael@0 122 * The instance is thread-safe, that is, it can be used concurrently.
michael@0 123 *
michael@0 124 * For details about the UTS #46 implementation see the IDNA C++ class in idna.h.
michael@0 125 *
michael@0 126 * @param options Bit set to modify the processing and error checking.
michael@0 127 * See option bit set values in uidna.h.
michael@0 128 * @param pErrorCode Standard ICU error code. Its input value must
michael@0 129 * pass the U_SUCCESS() test, or else the function returns
michael@0 130 * immediately. Check for U_FAILURE() on output or use with
michael@0 131 * function chaining. (See User Guide for details.)
michael@0 132 * @return the UTS #46 UIDNA instance, if successful
michael@0 133 * @stable ICU 4.6
michael@0 134 */
michael@0 135 U_STABLE UIDNA * U_EXPORT2
michael@0 136 uidna_openUTS46(uint32_t options, UErrorCode *pErrorCode);
michael@0 137
michael@0 138 /**
michael@0 139 * Closes a UIDNA instance.
michael@0 140 * @param idna UIDNA instance to be closed
michael@0 141 * @stable ICU 4.6
michael@0 142 */
michael@0 143 U_STABLE void U_EXPORT2
michael@0 144 uidna_close(UIDNA *idna);
michael@0 145
michael@0 146 #if U_SHOW_CPLUSPLUS_API
michael@0 147
michael@0 148 U_NAMESPACE_BEGIN
michael@0 149
michael@0 150 /**
michael@0 151 * \class LocalUIDNAPointer
michael@0 152 * "Smart pointer" class, closes a UIDNA via uidna_close().
michael@0 153 * For most methods see the LocalPointerBase base class.
michael@0 154 *
michael@0 155 * @see LocalPointerBase
michael@0 156 * @see LocalPointer
michael@0 157 * @stable ICU 4.6
michael@0 158 */
michael@0 159 U_DEFINE_LOCAL_OPEN_POINTER(LocalUIDNAPointer, UIDNA, uidna_close);
michael@0 160
michael@0 161 U_NAMESPACE_END
michael@0 162
michael@0 163 #endif
michael@0 164
michael@0 165 /**
michael@0 166 * Output container for IDNA processing errors.
michael@0 167 * Initialize with UIDNA_INFO_INITIALIZER:
michael@0 168 * \code
michael@0 169 * UIDNAInfo info = UIDNA_INFO_INITIALIZER;
michael@0 170 * int32_t length = uidna_nameToASCII(..., &info, &errorCode);
michael@0 171 * if(U_SUCCESS(errorCode) && info.errors!=0) { ... }
michael@0 172 * \endcode
michael@0 173 * @stable ICU 4.6
michael@0 174 */
michael@0 175 typedef struct UIDNAInfo {
michael@0 176 /** sizeof(UIDNAInfo) @stable ICU 4.6 */
michael@0 177 int16_t size;
michael@0 178 /**
michael@0 179 * Set to TRUE if transitional and nontransitional processing produce different results.
michael@0 180 * For details see C++ IDNAInfo::isTransitionalDifferent().
michael@0 181 * @stable ICU 4.6
michael@0 182 */
michael@0 183 UBool isTransitionalDifferent;
michael@0 184 UBool reservedB3; /**< Reserved field, do not use. @internal */
michael@0 185 /**
michael@0 186 * Bit set indicating IDNA processing errors. 0 if no errors.
michael@0 187 * See UIDNA_ERROR_... constants.
michael@0 188 * @stable ICU 4.6
michael@0 189 */
michael@0 190 uint32_t errors;
michael@0 191 int32_t reservedI2; /**< Reserved field, do not use. @internal */
michael@0 192 int32_t reservedI3; /**< Reserved field, do not use. @internal */
michael@0 193 } UIDNAInfo;
michael@0 194
michael@0 195 /**
michael@0 196 * Static initializer for a UIDNAInfo struct.
michael@0 197 * @stable ICU 4.6
michael@0 198 */
michael@0 199 #define UIDNA_INFO_INITIALIZER { \
michael@0 200 (int16_t)sizeof(UIDNAInfo), \
michael@0 201 FALSE, FALSE, \
michael@0 202 0, 0, 0 }
michael@0 203
michael@0 204 /**
michael@0 205 * Converts a single domain name label into its ASCII form for DNS lookup.
michael@0 206 * If any processing step fails, then pInfo->errors will be non-zero and
michael@0 207 * the result might not be an ASCII string.
michael@0 208 * The label might be modified according to the types of errors.
michael@0 209 * Labels with severe errors will be left in (or turned into) their Unicode form.
michael@0 210 *
michael@0 211 * The UErrorCode indicates an error only in exceptional cases,
michael@0 212 * such as a U_MEMORY_ALLOCATION_ERROR.
michael@0 213 *
michael@0 214 * @param idna UIDNA instance
michael@0 215 * @param label Input domain name label
michael@0 216 * @param length Label length, or -1 if NUL-terminated
michael@0 217 * @param dest Destination string buffer
michael@0 218 * @param capacity Destination buffer capacity
michael@0 219 * @param pInfo Output container of IDNA processing details.
michael@0 220 * @param pErrorCode Standard ICU error code. Its input value must
michael@0 221 * pass the U_SUCCESS() test, or else the function returns
michael@0 222 * immediately. Check for U_FAILURE() on output or use with
michael@0 223 * function chaining. (See User Guide for details.)
michael@0 224 * @return destination string length
michael@0 225 * @stable ICU 4.6
michael@0 226 */
michael@0 227 U_STABLE int32_t U_EXPORT2
michael@0 228 uidna_labelToASCII(const UIDNA *idna,
michael@0 229 const UChar *label, int32_t length,
michael@0 230 UChar *dest, int32_t capacity,
michael@0 231 UIDNAInfo *pInfo, UErrorCode *pErrorCode);
michael@0 232
michael@0 233 /**
michael@0 234 * Converts a single domain name label into its Unicode form for human-readable display.
michael@0 235 * If any processing step fails, then pInfo->errors will be non-zero.
michael@0 236 * The label might be modified according to the types of errors.
michael@0 237 *
michael@0 238 * The UErrorCode indicates an error only in exceptional cases,
michael@0 239 * such as a U_MEMORY_ALLOCATION_ERROR.
michael@0 240 *
michael@0 241 * @param idna UIDNA instance
michael@0 242 * @param label Input domain name label
michael@0 243 * @param length Label length, or -1 if NUL-terminated
michael@0 244 * @param dest Destination string buffer
michael@0 245 * @param capacity Destination buffer capacity
michael@0 246 * @param pInfo Output container of IDNA processing details.
michael@0 247 * @param pErrorCode Standard ICU error code. Its input value must
michael@0 248 * pass the U_SUCCESS() test, or else the function returns
michael@0 249 * immediately. Check for U_FAILURE() on output or use with
michael@0 250 * function chaining. (See User Guide for details.)
michael@0 251 * @return destination string length
michael@0 252 * @stable ICU 4.6
michael@0 253 */
michael@0 254 U_STABLE int32_t U_EXPORT2
michael@0 255 uidna_labelToUnicode(const UIDNA *idna,
michael@0 256 const UChar *label, int32_t length,
michael@0 257 UChar *dest, int32_t capacity,
michael@0 258 UIDNAInfo *pInfo, UErrorCode *pErrorCode);
michael@0 259
michael@0 260 /**
michael@0 261 * Converts a whole domain name into its ASCII form for DNS lookup.
michael@0 262 * If any processing step fails, then pInfo->errors will be non-zero and
michael@0 263 * the result might not be an ASCII string.
michael@0 264 * The domain name might be modified according to the types of errors.
michael@0 265 * Labels with severe errors will be left in (or turned into) their Unicode form.
michael@0 266 *
michael@0 267 * The UErrorCode indicates an error only in exceptional cases,
michael@0 268 * such as a U_MEMORY_ALLOCATION_ERROR.
michael@0 269 *
michael@0 270 * @param idna UIDNA instance
michael@0 271 * @param name Input domain name
michael@0 272 * @param length Domain name length, or -1 if NUL-terminated
michael@0 273 * @param dest Destination string buffer
michael@0 274 * @param capacity Destination buffer capacity
michael@0 275 * @param pInfo Output container of IDNA processing details.
michael@0 276 * @param pErrorCode Standard ICU error code. Its input value must
michael@0 277 * pass the U_SUCCESS() test, or else the function returns
michael@0 278 * immediately. Check for U_FAILURE() on output or use with
michael@0 279 * function chaining. (See User Guide for details.)
michael@0 280 * @return destination string length
michael@0 281 * @stable ICU 4.6
michael@0 282 */
michael@0 283 U_STABLE int32_t U_EXPORT2
michael@0 284 uidna_nameToASCII(const UIDNA *idna,
michael@0 285 const UChar *name, int32_t length,
michael@0 286 UChar *dest, int32_t capacity,
michael@0 287 UIDNAInfo *pInfo, UErrorCode *pErrorCode);
michael@0 288
michael@0 289 /**
michael@0 290 * Converts a whole domain name into its Unicode form for human-readable display.
michael@0 291 * If any processing step fails, then pInfo->errors will be non-zero.
michael@0 292 * The domain name might be modified according to the types of errors.
michael@0 293 *
michael@0 294 * The UErrorCode indicates an error only in exceptional cases,
michael@0 295 * such as a U_MEMORY_ALLOCATION_ERROR.
michael@0 296 *
michael@0 297 * @param idna UIDNA instance
michael@0 298 * @param name Input domain name
michael@0 299 * @param length Domain name length, or -1 if NUL-terminated
michael@0 300 * @param dest Destination string buffer
michael@0 301 * @param capacity Destination buffer capacity
michael@0 302 * @param pInfo Output container of IDNA processing details.
michael@0 303 * @param pErrorCode Standard ICU error code. Its input value must
michael@0 304 * pass the U_SUCCESS() test, or else the function returns
michael@0 305 * immediately. Check for U_FAILURE() on output or use with
michael@0 306 * function chaining. (See User Guide for details.)
michael@0 307 * @return destination string length
michael@0 308 * @stable ICU 4.6
michael@0 309 */
michael@0 310 U_STABLE int32_t U_EXPORT2
michael@0 311 uidna_nameToUnicode(const UIDNA *idna,
michael@0 312 const UChar *name, int32_t length,
michael@0 313 UChar *dest, int32_t capacity,
michael@0 314 UIDNAInfo *pInfo, UErrorCode *pErrorCode);
michael@0 315
michael@0 316 /* UTF-8 versions of the processing methods --------------------------------- */
michael@0 317
michael@0 318 /**
michael@0 319 * Converts a single domain name label into its ASCII form for DNS lookup.
michael@0 320 * UTF-8 version of uidna_labelToASCII(), same behavior.
michael@0 321 *
michael@0 322 * @param idna UIDNA instance
michael@0 323 * @param label Input domain name label
michael@0 324 * @param length Label length, or -1 if NUL-terminated
michael@0 325 * @param dest Destination string buffer
michael@0 326 * @param capacity Destination buffer capacity
michael@0 327 * @param pInfo Output container of IDNA processing details.
michael@0 328 * @param pErrorCode Standard ICU error code. Its input value must
michael@0 329 * pass the U_SUCCESS() test, or else the function returns
michael@0 330 * immediately. Check for U_FAILURE() on output or use with
michael@0 331 * function chaining. (See User Guide for details.)
michael@0 332 * @return destination string length
michael@0 333 * @stable ICU 4.6
michael@0 334 */
michael@0 335 U_STABLE int32_t U_EXPORT2
michael@0 336 uidna_labelToASCII_UTF8(const UIDNA *idna,
michael@0 337 const char *label, int32_t length,
michael@0 338 char *dest, int32_t capacity,
michael@0 339 UIDNAInfo *pInfo, UErrorCode *pErrorCode);
michael@0 340
michael@0 341 /**
michael@0 342 * Converts a single domain name label into its Unicode form for human-readable display.
michael@0 343 * UTF-8 version of uidna_labelToUnicode(), same behavior.
michael@0 344 *
michael@0 345 * @param idna UIDNA instance
michael@0 346 * @param label Input domain name label
michael@0 347 * @param length Label length, or -1 if NUL-terminated
michael@0 348 * @param dest Destination string buffer
michael@0 349 * @param capacity Destination buffer capacity
michael@0 350 * @param pInfo Output container of IDNA processing details.
michael@0 351 * @param pErrorCode Standard ICU error code. Its input value must
michael@0 352 * pass the U_SUCCESS() test, or else the function returns
michael@0 353 * immediately. Check for U_FAILURE() on output or use with
michael@0 354 * function chaining. (See User Guide for details.)
michael@0 355 * @return destination string length
michael@0 356 * @stable ICU 4.6
michael@0 357 */
michael@0 358 U_STABLE int32_t U_EXPORT2
michael@0 359 uidna_labelToUnicodeUTF8(const UIDNA *idna,
michael@0 360 const char *label, int32_t length,
michael@0 361 char *dest, int32_t capacity,
michael@0 362 UIDNAInfo *pInfo, UErrorCode *pErrorCode);
michael@0 363
michael@0 364 /**
michael@0 365 * Converts a whole domain name into its ASCII form for DNS lookup.
michael@0 366 * UTF-8 version of uidna_nameToASCII(), same behavior.
michael@0 367 *
michael@0 368 * @param idna UIDNA instance
michael@0 369 * @param name Input domain name
michael@0 370 * @param length Domain name length, or -1 if NUL-terminated
michael@0 371 * @param dest Destination string buffer
michael@0 372 * @param capacity Destination buffer capacity
michael@0 373 * @param pInfo Output container of IDNA processing details.
michael@0 374 * @param pErrorCode Standard ICU error code. Its input value must
michael@0 375 * pass the U_SUCCESS() test, or else the function returns
michael@0 376 * immediately. Check for U_FAILURE() on output or use with
michael@0 377 * function chaining. (See User Guide for details.)
michael@0 378 * @return destination string length
michael@0 379 * @stable ICU 4.6
michael@0 380 */
michael@0 381 U_STABLE int32_t U_EXPORT2
michael@0 382 uidna_nameToASCII_UTF8(const UIDNA *idna,
michael@0 383 const char *name, int32_t length,
michael@0 384 char *dest, int32_t capacity,
michael@0 385 UIDNAInfo *pInfo, UErrorCode *pErrorCode);
michael@0 386
michael@0 387 /**
michael@0 388 * Converts a whole domain name into its Unicode form for human-readable display.
michael@0 389 * UTF-8 version of uidna_nameToUnicode(), same behavior.
michael@0 390 *
michael@0 391 * @param idna UIDNA instance
michael@0 392 * @param name Input domain name
michael@0 393 * @param length Domain name length, or -1 if NUL-terminated
michael@0 394 * @param dest Destination string buffer
michael@0 395 * @param capacity Destination buffer capacity
michael@0 396 * @param pInfo Output container of IDNA processing details.
michael@0 397 * @param pErrorCode Standard ICU error code. Its input value must
michael@0 398 * pass the U_SUCCESS() test, or else the function returns
michael@0 399 * immediately. Check for U_FAILURE() on output or use with
michael@0 400 * function chaining. (See User Guide for details.)
michael@0 401 * @return destination string length
michael@0 402 * @stable ICU 4.6
michael@0 403 */
michael@0 404 U_STABLE int32_t U_EXPORT2
michael@0 405 uidna_nameToUnicodeUTF8(const UIDNA *idna,
michael@0 406 const char *name, int32_t length,
michael@0 407 char *dest, int32_t capacity,
michael@0 408 UIDNAInfo *pInfo, UErrorCode *pErrorCode);
michael@0 409
michael@0 410 /*
michael@0 411 * IDNA error bit set values.
michael@0 412 * When a domain name or label fails a processing step or does not meet the
michael@0 413 * validity criteria, then one or more of these error bits are set.
michael@0 414 */
michael@0 415 enum {
michael@0 416 /**
michael@0 417 * A non-final domain name label (or the whole domain name) is empty.
michael@0 418 * @stable ICU 4.6
michael@0 419 */
michael@0 420 UIDNA_ERROR_EMPTY_LABEL=1,
michael@0 421 /**
michael@0 422 * A domain name label is longer than 63 bytes.
michael@0 423 * (See STD13/RFC1034 3.1. Name space specifications and terminology.)
michael@0 424 * This is only checked in ToASCII operations, and only if the output label is all-ASCII.
michael@0 425 * @stable ICU 4.6
michael@0 426 */
michael@0 427 UIDNA_ERROR_LABEL_TOO_LONG=2,
michael@0 428 /**
michael@0 429 * A domain name is longer than 255 bytes in its storage form.
michael@0 430 * (See STD13/RFC1034 3.1. Name space specifications and terminology.)
michael@0 431 * This is only checked in ToASCII operations, and only if the output domain name is all-ASCII.
michael@0 432 * @stable ICU 4.6
michael@0 433 */
michael@0 434 UIDNA_ERROR_DOMAIN_NAME_TOO_LONG=4,
michael@0 435 /**
michael@0 436 * A label starts with a hyphen-minus ('-').
michael@0 437 * @stable ICU 4.6
michael@0 438 */
michael@0 439 UIDNA_ERROR_LEADING_HYPHEN=8,
michael@0 440 /**
michael@0 441 * A label ends with a hyphen-minus ('-').
michael@0 442 * @stable ICU 4.6
michael@0 443 */
michael@0 444 UIDNA_ERROR_TRAILING_HYPHEN=0x10,
michael@0 445 /**
michael@0 446 * A label contains hyphen-minus ('-') in the third and fourth positions.
michael@0 447 * @stable ICU 4.6
michael@0 448 */
michael@0 449 UIDNA_ERROR_HYPHEN_3_4=0x20,
michael@0 450 /**
michael@0 451 * A label starts with a combining mark.
michael@0 452 * @stable ICU 4.6
michael@0 453 */
michael@0 454 UIDNA_ERROR_LEADING_COMBINING_MARK=0x40,
michael@0 455 /**
michael@0 456 * A label or domain name contains disallowed characters.
michael@0 457 * @stable ICU 4.6
michael@0 458 */
michael@0 459 UIDNA_ERROR_DISALLOWED=0x80,
michael@0 460 /**
michael@0 461 * A label starts with "xn--" but does not contain valid Punycode.
michael@0 462 * That is, an xn-- label failed Punycode decoding.
michael@0 463 * @stable ICU 4.6
michael@0 464 */
michael@0 465 UIDNA_ERROR_PUNYCODE=0x100,
michael@0 466 /**
michael@0 467 * A label contains a dot=full stop.
michael@0 468 * This can occur in an input string for a single-label function.
michael@0 469 * @stable ICU 4.6
michael@0 470 */
michael@0 471 UIDNA_ERROR_LABEL_HAS_DOT=0x200,
michael@0 472 /**
michael@0 473 * An ACE label does not contain a valid label string.
michael@0 474 * The label was successfully ACE (Punycode) decoded but the resulting
michael@0 475 * string had severe validation errors. For example,
michael@0 476 * it might contain characters that are not allowed in ACE labels,
michael@0 477 * or it might not be normalized.
michael@0 478 * @stable ICU 4.6
michael@0 479 */
michael@0 480 UIDNA_ERROR_INVALID_ACE_LABEL=0x400,
michael@0 481 /**
michael@0 482 * A label does not meet the IDNA BiDi requirements (for right-to-left characters).
michael@0 483 * @stable ICU 4.6
michael@0 484 */
michael@0 485 UIDNA_ERROR_BIDI=0x800,
michael@0 486 /**
michael@0 487 * A label does not meet the IDNA CONTEXTJ requirements.
michael@0 488 * @stable ICU 4.6
michael@0 489 */
michael@0 490 UIDNA_ERROR_CONTEXTJ=0x1000,
michael@0 491 /**
michael@0 492 * A label does not meet the IDNA CONTEXTO requirements for punctuation characters.
michael@0 493 * Some punctuation characters "Would otherwise have been DISALLOWED"
michael@0 494 * but are allowed in certain contexts. (RFC 5892)
michael@0 495 * @stable ICU 49
michael@0 496 */
michael@0 497 UIDNA_ERROR_CONTEXTO_PUNCTUATION=0x2000,
michael@0 498 /**
michael@0 499 * A label does not meet the IDNA CONTEXTO requirements for digits.
michael@0 500 * Arabic-Indic Digits (U+066x) must not be mixed with Extended Arabic-Indic Digits (U+06Fx).
michael@0 501 * @stable ICU 49
michael@0 502 */
michael@0 503 UIDNA_ERROR_CONTEXTO_DIGITS=0x4000
michael@0 504 };
michael@0 505
michael@0 506 /* IDNA2003 API ------------------------------------------------------------- */
michael@0 507
michael@0 508 /**
michael@0 509 * IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC.
michael@0 510 * This operation is done on <b>single labels</b> before sending it to something that expects
michael@0 511 * ASCII names. A label is an individual part of a domain name. Labels are usually
michael@0 512 * separated by dots; e.g. "www.example.com" is composed of 3 labels "www","example", and "com".
michael@0 513 *
michael@0 514 * IDNA2003 API Overview:
michael@0 515 *
michael@0 516 * The uidna_ API implements the IDNA protocol as defined in the IDNA RFC
michael@0 517 * (http://www.ietf.org/rfc/rfc3490.txt).
michael@0 518 * The RFC defines 2 operations: ToASCII and ToUnicode. Domain name labels
michael@0 519 * containing non-ASCII code points are processed by the
michael@0 520 * ToASCII operation before passing it to resolver libraries. Domain names
michael@0 521 * that are obtained from resolver libraries are processed by the
michael@0 522 * ToUnicode operation before displaying the domain name to the user.
michael@0 523 * IDNA requires that implementations process input strings with Nameprep
michael@0 524 * (http://www.ietf.org/rfc/rfc3491.txt),
michael@0 525 * which is a profile of Stringprep (http://www.ietf.org/rfc/rfc3454.txt),
michael@0 526 * and then with Punycode (http://www.ietf.org/rfc/rfc3492.txt).
michael@0 527 * Implementations of IDNA MUST fully implement Nameprep and Punycode;
michael@0 528 * neither Nameprep nor Punycode are optional.
michael@0 529 * The input and output of ToASCII and ToUnicode operations are Unicode
michael@0 530 * and are designed to be chainable, i.e., applying ToASCII or ToUnicode operations
michael@0 531 * multiple times to an input string will yield the same result as applying the operation
michael@0 532 * once.
michael@0 533 * ToUnicode(ToUnicode(ToUnicode...(ToUnicode(string)))) == ToUnicode(string)
michael@0 534 * ToASCII(ToASCII(ToASCII...(ToASCII(string))) == ToASCII(string).
michael@0 535 *
michael@0 536 * @param src Input UChar array containing label in Unicode.
michael@0 537 * @param srcLength Number of UChars in src, or -1 if NUL-terminated.
michael@0 538 * @param dest Output UChar array with ASCII (ACE encoded) label.
michael@0 539 * @param destCapacity Size of dest.
michael@0 540 * @param options A bit set of options:
michael@0 541 *
michael@0 542 * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
michael@0 543 * and do not use STD3 ASCII rules
michael@0 544 * If unassigned code points are found the operation fails with
michael@0 545 * U_UNASSIGNED_ERROR error code.
michael@0 546 *
michael@0 547 * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
michael@0 548 * If this option is set, the unassigned code points are in the input
michael@0 549 * are treated as normal Unicode code points.
michael@0 550 *
michael@0 551 * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
michael@0 552 * If this option is set and the input does not satisfy STD3 rules,
michael@0 553 * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
michael@0 554 *
michael@0 555 * @param parseError Pointer to UParseError struct to receive information on position
michael@0 556 * of error if an error is encountered. Can be NULL.
michael@0 557 * @param status ICU in/out error code parameter.
michael@0 558 * U_INVALID_CHAR_FOUND if src contains
michael@0 559 * unmatched single surrogates.
michael@0 560 * U_INDEX_OUTOFBOUNDS_ERROR if src contains
michael@0 561 * too many code points.
michael@0 562 * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
michael@0 563 * @return The length of the result string, if successful - or in case of a buffer overflow,
michael@0 564 * in which case it will be greater than destCapacity.
michael@0 565 * @stable ICU 2.6
michael@0 566 */
michael@0 567 U_STABLE int32_t U_EXPORT2
michael@0 568 uidna_toASCII(const UChar* src, int32_t srcLength,
michael@0 569 UChar* dest, int32_t destCapacity,
michael@0 570 int32_t options,
michael@0 571 UParseError* parseError,
michael@0 572 UErrorCode* status);
michael@0 573
michael@0 574
michael@0 575 /**
michael@0 576 * IDNA2003: This function implements the ToUnicode operation as defined in the IDNA RFC.
michael@0 577 * This operation is done on <b>single labels</b> before sending it to something that expects
michael@0 578 * Unicode names. A label is an individual part of a domain name. Labels are usually
michael@0 579 * separated by dots; for e.g. "www.example.com" is composed of 3 labels "www","example", and "com".
michael@0 580 *
michael@0 581 * @param src Input UChar array containing ASCII (ACE encoded) label.
michael@0 582 * @param srcLength Number of UChars in src, or -1 if NUL-terminated.
michael@0 583 * @param dest Output Converted UChar array containing Unicode equivalent of label.
michael@0 584 * @param destCapacity Size of dest.
michael@0 585 * @param options A bit set of options:
michael@0 586 *
michael@0 587 * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
michael@0 588 * and do not use STD3 ASCII rules
michael@0 589 * If unassigned code points are found the operation fails with
michael@0 590 * U_UNASSIGNED_ERROR error code.
michael@0 591 *
michael@0 592 * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
michael@0 593 * If this option is set, the unassigned code points are in the input
michael@0 594 * are treated as normal Unicode code points. <b> Note: </b> This option is
michael@0 595 * required on toUnicode operation because the RFC mandates
michael@0 596 * verification of decoded ACE input by applying toASCII and comparing
michael@0 597 * its output with source
michael@0 598 *
michael@0 599 * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
michael@0 600 * If this option is set and the input does not satisfy STD3 rules,
michael@0 601 * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
michael@0 602 *
michael@0 603 * @param parseError Pointer to UParseError struct to receive information on position
michael@0 604 * of error if an error is encountered. Can be NULL.
michael@0 605 * @param status ICU in/out error code parameter.
michael@0 606 * U_INVALID_CHAR_FOUND if src contains
michael@0 607 * unmatched single surrogates.
michael@0 608 * U_INDEX_OUTOFBOUNDS_ERROR if src contains
michael@0 609 * too many code points.
michael@0 610 * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
michael@0 611 * @return The length of the result string, if successful - or in case of a buffer overflow,
michael@0 612 * in which case it will be greater than destCapacity.
michael@0 613 * @stable ICU 2.6
michael@0 614 */
michael@0 615 U_STABLE int32_t U_EXPORT2
michael@0 616 uidna_toUnicode(const UChar* src, int32_t srcLength,
michael@0 617 UChar* dest, int32_t destCapacity,
michael@0 618 int32_t options,
michael@0 619 UParseError* parseError,
michael@0 620 UErrorCode* status);
michael@0 621
michael@0 622
michael@0 623 /**
michael@0 624 * IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
michael@0 625 * This operation is done on complete domain names, e.g: "www.example.com".
michael@0 626 * It is important to note that this operation can fail. If it fails, then the input
michael@0 627 * domain name cannot be used as an Internationalized Domain Name and the application
michael@0 628 * should have methods defined to deal with the failure.
michael@0 629 *
michael@0 630 * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
michael@0 631 * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
michael@0 632 * and then convert. This function does not offer that level of granularity. The options once
michael@0 633 * set will apply to all labels in the domain name
michael@0 634 *
michael@0 635 * @param src Input UChar array containing IDN in Unicode.
michael@0 636 * @param srcLength Number of UChars in src, or -1 if NUL-terminated.
michael@0 637 * @param dest Output UChar array with ASCII (ACE encoded) IDN.
michael@0 638 * @param destCapacity Size of dest.
michael@0 639 * @param options A bit set of options:
michael@0 640 *
michael@0 641 * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
michael@0 642 * and do not use STD3 ASCII rules
michael@0 643 * If unassigned code points are found the operation fails with
michael@0 644 * U_UNASSIGNED_CODE_POINT_FOUND error code.
michael@0 645 *
michael@0 646 * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
michael@0 647 * If this option is set, the unassigned code points are in the input
michael@0 648 * are treated as normal Unicode code points.
michael@0 649 *
michael@0 650 * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
michael@0 651 * If this option is set and the input does not satisfy STD3 rules,
michael@0 652 * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
michael@0 653 *
michael@0 654 * @param parseError Pointer to UParseError struct to receive information on position
michael@0 655 * of error if an error is encountered. Can be NULL.
michael@0 656 * @param status ICU in/out error code parameter.
michael@0 657 * U_INVALID_CHAR_FOUND if src contains
michael@0 658 * unmatched single surrogates.
michael@0 659 * U_INDEX_OUTOFBOUNDS_ERROR if src contains
michael@0 660 * too many code points.
michael@0 661 * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
michael@0 662 * @return The length of the result string, if successful - or in case of a buffer overflow,
michael@0 663 * in which case it will be greater than destCapacity.
michael@0 664 * @stable ICU 2.6
michael@0 665 */
michael@0 666 U_STABLE int32_t U_EXPORT2
michael@0 667 uidna_IDNToASCII( const UChar* src, int32_t srcLength,
michael@0 668 UChar* dest, int32_t destCapacity,
michael@0 669 int32_t options,
michael@0 670 UParseError* parseError,
michael@0 671 UErrorCode* status);
michael@0 672
michael@0 673 /**
michael@0 674 * IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
michael@0 675 * This operation is done on complete domain names, e.g: "www.example.com".
michael@0 676 *
michael@0 677 * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
michael@0 678 * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
michael@0 679 * and then convert. This function does not offer that level of granularity. The options once
michael@0 680 * set will apply to all labels in the domain name
michael@0 681 *
michael@0 682 * @param src Input UChar array containing IDN in ASCII (ACE encoded) form.
michael@0 683 * @param srcLength Number of UChars in src, or -1 if NUL-terminated.
michael@0 684 * @param dest Output UChar array containing Unicode equivalent of source IDN.
michael@0 685 * @param destCapacity Size of dest.
michael@0 686 * @param options A bit set of options:
michael@0 687 *
michael@0 688 * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
michael@0 689 * and do not use STD3 ASCII rules
michael@0 690 * If unassigned code points are found the operation fails with
michael@0 691 * U_UNASSIGNED_CODE_POINT_FOUND error code.
michael@0 692 *
michael@0 693 * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
michael@0 694 * If this option is set, the unassigned code points are in the input
michael@0 695 * are treated as normal Unicode code points.
michael@0 696 *
michael@0 697 * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
michael@0 698 * If this option is set and the input does not satisfy STD3 rules,
michael@0 699 * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
michael@0 700 *
michael@0 701 * @param parseError Pointer to UParseError struct to receive information on position
michael@0 702 * of error if an error is encountered. Can be NULL.
michael@0 703 * @param status ICU in/out error code parameter.
michael@0 704 * U_INVALID_CHAR_FOUND if src contains
michael@0 705 * unmatched single surrogates.
michael@0 706 * U_INDEX_OUTOFBOUNDS_ERROR if src contains
michael@0 707 * too many code points.
michael@0 708 * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
michael@0 709 * @return The length of the result string, if successful - or in case of a buffer overflow,
michael@0 710 * in which case it will be greater than destCapacity.
michael@0 711 * @stable ICU 2.6
michael@0 712 */
michael@0 713 U_STABLE int32_t U_EXPORT2
michael@0 714 uidna_IDNToUnicode( const UChar* src, int32_t srcLength,
michael@0 715 UChar* dest, int32_t destCapacity,
michael@0 716 int32_t options,
michael@0 717 UParseError* parseError,
michael@0 718 UErrorCode* status);
michael@0 719
michael@0 720 /**
michael@0 721 * IDNA2003: Compare two IDN strings for equivalence.
michael@0 722 * This function splits the domain names into labels and compares them.
michael@0 723 * According to IDN RFC, whenever two labels are compared, they are
michael@0 724 * considered equal if and only if their ASCII forms (obtained by
michael@0 725 * applying toASCII) match using an case-insensitive ASCII comparison.
michael@0 726 * Two domain names are considered a match if and only if all labels
michael@0 727 * match regardless of whether label separators match.
michael@0 728 *
michael@0 729 * @param s1 First source string.
michael@0 730 * @param length1 Length of first source string, or -1 if NUL-terminated.
michael@0 731 *
michael@0 732 * @param s2 Second source string.
michael@0 733 * @param length2 Length of second source string, or -1 if NUL-terminated.
michael@0 734 * @param options A bit set of options:
michael@0 735 *
michael@0 736 * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
michael@0 737 * and do not use STD3 ASCII rules
michael@0 738 * If unassigned code points are found the operation fails with
michael@0 739 * U_UNASSIGNED_CODE_POINT_FOUND error code.
michael@0 740 *
michael@0 741 * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
michael@0 742 * If this option is set, the unassigned code points are in the input
michael@0 743 * are treated as normal Unicode code points.
michael@0 744 *
michael@0 745 * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
michael@0 746 * If this option is set and the input does not satisfy STD3 rules,
michael@0 747 * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
michael@0 748 *
michael@0 749 * @param status ICU error code in/out parameter.
michael@0 750 * Must fulfill U_SUCCESS before the function call.
michael@0 751 * @return <0 or 0 or >0 as usual for string comparisons
michael@0 752 * @stable ICU 2.6
michael@0 753 */
michael@0 754 U_STABLE int32_t U_EXPORT2
michael@0 755 uidna_compare( const UChar *s1, int32_t length1,
michael@0 756 const UChar *s2, int32_t length2,
michael@0 757 int32_t options,
michael@0 758 UErrorCode* status);
michael@0 759
michael@0 760 #endif /* #if !UCONFIG_NO_IDNA */
michael@0 761
michael@0 762 #endif

mercurial