intl/icu/source/common/unicode/uidna.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /*
     2  *******************************************************************************
     3  *
     4  *   Copyright (C) 2003-2013, International Business Machines
     5  *   Corporation and others.  All Rights Reserved.
     6  *
     7  *******************************************************************************
     8  *   file name:  uidna.h
     9  *   encoding:   US-ASCII
    10  *   tab size:   8 (not used)
    11  *   indentation:4
    12  *
    13  *   created on: 2003feb1
    14  *   created by: Ram Viswanadha
    15  */
    17 #ifndef __UIDNA_H__
    18 #define __UIDNA_H__
    20 #include "unicode/utypes.h"
    22 #if !UCONFIG_NO_IDNA
    24 #include "unicode/localpointer.h"
    25 #include "unicode/parseerr.h"
    27 /**
    28  * \file
    29  * \brief C API: Internationalizing Domain Names in Applications (IDNA)
    30  *
    31  * IDNA2008 is implemented according to UTS #46, see the IDNA C++ class in idna.h.
    32  *
    33  * The C API functions which do take a UIDNA * service object pointer
    34  * implement UTS #46 and IDNA2008.
    35  * The C API functions which do not take a service object pointer
    36  * implement IDNA2003.
    37  */
    39 /*
    40  * IDNA option bit set values.
    41  */
    42 enum {
    43     /**
    44      * Default options value: None of the other options are set.
    45      * For use in static worker and factory methods.
    46      * @stable ICU 2.6
    47      */
    48     UIDNA_DEFAULT=0,
    49     /**
    50      * Option to allow unassigned code points in domain names and labels.
    51      * For use in static worker and factory methods.
    52      * <p>This option is ignored by the UTS46 implementation.
    53      * (UTS #46 disallows unassigned code points.)
    54      * @stable ICU 2.6
    55      */
    56     UIDNA_ALLOW_UNASSIGNED=1,
    57     /**
    58      * Option to check whether the input conforms to the STD3 ASCII rules,
    59      * for example the restriction of labels to LDH characters
    60      * (ASCII Letters, Digits and Hyphen-Minus).
    61      * For use in static worker and factory methods.
    62      * @stable ICU 2.6
    63      */
    64     UIDNA_USE_STD3_RULES=2,
    65     /**
    66      * IDNA option to check for whether the input conforms to the BiDi rules.
    67      * For use in static worker and factory methods.
    68      * <p>This option is ignored by the IDNA2003 implementation.
    69      * (IDNA2003 always performs a BiDi check.)
    70      * @stable ICU 4.6
    71      */
    72     UIDNA_CHECK_BIDI=4,
    73     /**
    74      * IDNA option to check for whether the input conforms to the CONTEXTJ rules.
    75      * For use in static worker and factory methods.
    76      * <p>This option is ignored by the IDNA2003 implementation.
    77      * (The CONTEXTJ check is new in IDNA2008.)
    78      * @stable ICU 4.6
    79      */
    80     UIDNA_CHECK_CONTEXTJ=8,
    81     /**
    82      * IDNA option for nontransitional processing in ToASCII().
    83      * For use in static worker and factory methods.
    84      * <p>By default, ToASCII() uses transitional processing.
    85      * <p>This option is ignored by the IDNA2003 implementation.
    86      * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
    87      * @stable ICU 4.6
    88      */
    89     UIDNA_NONTRANSITIONAL_TO_ASCII=0x10,
    90     /**
    91      * IDNA option for nontransitional processing in ToUnicode().
    92      * For use in static worker and factory methods.
    93      * <p>By default, ToUnicode() uses transitional processing.
    94      * <p>This option is ignored by the IDNA2003 implementation.
    95      * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
    96      * @stable ICU 4.6
    97      */
    98     UIDNA_NONTRANSITIONAL_TO_UNICODE=0x20,
    99     /**
   100      * IDNA option to check for whether the input conforms to the CONTEXTO rules.
   101      * For use in static worker and factory methods.
   102      * <p>This option is ignored by the IDNA2003 implementation.
   103      * (The CONTEXTO check is new in IDNA2008.)
   104      * <p>This is for use by registries for IDNA2008 conformance.
   105      * UTS #46 does not require the CONTEXTO check.
   106      * @stable ICU 49
   107      */
   108     UIDNA_CHECK_CONTEXTO=0x40
   109 };
   111 /**
   112  * Opaque C service object type for the new IDNA API.
   113  * @stable ICU 4.6
   114  */
   115 struct UIDNA;
   116 typedef struct UIDNA UIDNA;  /**< C typedef for struct UIDNA. @stable ICU 4.6 */
   118 /**
   119  * Returns a UIDNA instance which implements UTS #46.
   120  * Returns an unmodifiable instance, owned by the caller.
   121  * Cache it for multiple operations, and uidna_close() it when done.
   122  * The instance is thread-safe, that is, it can be used concurrently.
   123  *
   124  * For details about the UTS #46 implementation see the IDNA C++ class in idna.h.
   125  *
   126  * @param options Bit set to modify the processing and error checking.
   127  *                See option bit set values in uidna.h.
   128  * @param pErrorCode Standard ICU error code. Its input value must
   129  *                  pass the U_SUCCESS() test, or else the function returns
   130  *                  immediately. Check for U_FAILURE() on output or use with
   131  *                  function chaining. (See User Guide for details.)
   132  * @return the UTS #46 UIDNA instance, if successful
   133  * @stable ICU 4.6
   134  */
   135 U_STABLE UIDNA * U_EXPORT2
   136 uidna_openUTS46(uint32_t options, UErrorCode *pErrorCode);
   138 /**
   139  * Closes a UIDNA instance.
   140  * @param idna UIDNA instance to be closed
   141  * @stable ICU 4.6
   142  */
   143 U_STABLE void U_EXPORT2
   144 uidna_close(UIDNA *idna);
   146 #if U_SHOW_CPLUSPLUS_API
   148 U_NAMESPACE_BEGIN
   150 /**
   151  * \class LocalUIDNAPointer
   152  * "Smart pointer" class, closes a UIDNA via uidna_close().
   153  * For most methods see the LocalPointerBase base class.
   154  *
   155  * @see LocalPointerBase
   156  * @see LocalPointer
   157  * @stable ICU 4.6
   158  */
   159 U_DEFINE_LOCAL_OPEN_POINTER(LocalUIDNAPointer, UIDNA, uidna_close);
   161 U_NAMESPACE_END
   163 #endif
   165 /**
   166  * Output container for IDNA processing errors.
   167  * Initialize with UIDNA_INFO_INITIALIZER:
   168  * \code
   169  * UIDNAInfo info = UIDNA_INFO_INITIALIZER;
   170  * int32_t length = uidna_nameToASCII(..., &info, &errorCode);
   171  * if(U_SUCCESS(errorCode) && info.errors!=0) { ... }
   172  * \endcode
   173  * @stable ICU 4.6
   174  */
   175 typedef struct UIDNAInfo {
   176     /** sizeof(UIDNAInfo) @stable ICU 4.6 */
   177     int16_t size;
   178     /**
   179      * Set to TRUE if transitional and nontransitional processing produce different results.
   180      * For details see C++ IDNAInfo::isTransitionalDifferent().
   181      * @stable ICU 4.6
   182      */
   183     UBool isTransitionalDifferent;
   184     UBool reservedB3;  /**< Reserved field, do not use. @internal */
   185     /**
   186      * Bit set indicating IDNA processing errors. 0 if no errors.
   187      * See UIDNA_ERROR_... constants.
   188      * @stable ICU 4.6
   189      */
   190     uint32_t errors;
   191     int32_t reservedI2;  /**< Reserved field, do not use. @internal */
   192     int32_t reservedI3;  /**< Reserved field, do not use. @internal */
   193 } UIDNAInfo;
   195 /**
   196  * Static initializer for a UIDNAInfo struct.
   197  * @stable ICU 4.6
   198  */
   199 #define UIDNA_INFO_INITIALIZER { \
   200     (int16_t)sizeof(UIDNAInfo), \
   201     FALSE, FALSE, \
   202     0, 0, 0 }
   204 /**
   205  * Converts a single domain name label into its ASCII form for DNS lookup.
   206  * If any processing step fails, then pInfo->errors will be non-zero and
   207  * the result might not be an ASCII string.
   208  * The label might be modified according to the types of errors.
   209  * Labels with severe errors will be left in (or turned into) their Unicode form.
   210  *
   211  * The UErrorCode indicates an error only in exceptional cases,
   212  * such as a U_MEMORY_ALLOCATION_ERROR.
   213  *
   214  * @param idna UIDNA instance
   215  * @param label Input domain name label
   216  * @param length Label length, or -1 if NUL-terminated
   217  * @param dest Destination string buffer
   218  * @param capacity Destination buffer capacity
   219  * @param pInfo Output container of IDNA processing details.
   220  * @param pErrorCode Standard ICU error code. Its input value must
   221  *                  pass the U_SUCCESS() test, or else the function returns
   222  *                  immediately. Check for U_FAILURE() on output or use with
   223  *                  function chaining. (See User Guide for details.)
   224  * @return destination string length
   225  * @stable ICU 4.6
   226  */
   227 U_STABLE int32_t U_EXPORT2
   228 uidna_labelToASCII(const UIDNA *idna,
   229                    const UChar *label, int32_t length,
   230                    UChar *dest, int32_t capacity,
   231                    UIDNAInfo *pInfo, UErrorCode *pErrorCode);
   233 /**
   234  * Converts a single domain name label into its Unicode form for human-readable display.
   235  * If any processing step fails, then pInfo->errors will be non-zero.
   236  * The label might be modified according to the types of errors.
   237  *
   238  * The UErrorCode indicates an error only in exceptional cases,
   239  * such as a U_MEMORY_ALLOCATION_ERROR.
   240  *
   241  * @param idna UIDNA instance
   242  * @param label Input domain name label
   243  * @param length Label length, or -1 if NUL-terminated
   244  * @param dest Destination string buffer
   245  * @param capacity Destination buffer capacity
   246  * @param pInfo Output container of IDNA processing details.
   247  * @param pErrorCode Standard ICU error code. Its input value must
   248  *                  pass the U_SUCCESS() test, or else the function returns
   249  *                  immediately. Check for U_FAILURE() on output or use with
   250  *                  function chaining. (See User Guide for details.)
   251  * @return destination string length
   252  * @stable ICU 4.6
   253  */
   254 U_STABLE int32_t U_EXPORT2
   255 uidna_labelToUnicode(const UIDNA *idna,
   256                      const UChar *label, int32_t length,
   257                      UChar *dest, int32_t capacity,
   258                      UIDNAInfo *pInfo, UErrorCode *pErrorCode);
   260 /**
   261  * Converts a whole domain name into its ASCII form for DNS lookup.
   262  * If any processing step fails, then pInfo->errors will be non-zero and
   263  * the result might not be an ASCII string.
   264  * The domain name might be modified according to the types of errors.
   265  * Labels with severe errors will be left in (or turned into) their Unicode form.
   266  *
   267  * The UErrorCode indicates an error only in exceptional cases,
   268  * such as a U_MEMORY_ALLOCATION_ERROR.
   269  *
   270  * @param idna UIDNA instance
   271  * @param name Input domain name
   272  * @param length Domain name length, or -1 if NUL-terminated
   273  * @param dest Destination string buffer
   274  * @param capacity Destination buffer capacity
   275  * @param pInfo Output container of IDNA processing details.
   276  * @param pErrorCode Standard ICU error code. Its input value must
   277  *                  pass the U_SUCCESS() test, or else the function returns
   278  *                  immediately. Check for U_FAILURE() on output or use with
   279  *                  function chaining. (See User Guide for details.)
   280  * @return destination string length
   281  * @stable ICU 4.6
   282  */
   283 U_STABLE int32_t U_EXPORT2
   284 uidna_nameToASCII(const UIDNA *idna,
   285                   const UChar *name, int32_t length,
   286                   UChar *dest, int32_t capacity,
   287                   UIDNAInfo *pInfo, UErrorCode *pErrorCode);
   289 /**
   290  * Converts a whole domain name into its Unicode form for human-readable display.
   291  * If any processing step fails, then pInfo->errors will be non-zero.
   292  * The domain name might be modified according to the types of errors.
   293  *
   294  * The UErrorCode indicates an error only in exceptional cases,
   295  * such as a U_MEMORY_ALLOCATION_ERROR.
   296  *
   297  * @param idna UIDNA instance
   298  * @param name Input domain name
   299  * @param length Domain name length, or -1 if NUL-terminated
   300  * @param dest Destination string buffer
   301  * @param capacity Destination buffer capacity
   302  * @param pInfo Output container of IDNA processing details.
   303  * @param pErrorCode Standard ICU error code. Its input value must
   304  *                  pass the U_SUCCESS() test, or else the function returns
   305  *                  immediately. Check for U_FAILURE() on output or use with
   306  *                  function chaining. (See User Guide for details.)
   307  * @return destination string length
   308  * @stable ICU 4.6
   309  */
   310 U_STABLE int32_t U_EXPORT2
   311 uidna_nameToUnicode(const UIDNA *idna,
   312                     const UChar *name, int32_t length,
   313                     UChar *dest, int32_t capacity,
   314                     UIDNAInfo *pInfo, UErrorCode *pErrorCode);
   316 /* UTF-8 versions of the processing methods --------------------------------- */
   318 /**
   319  * Converts a single domain name label into its ASCII form for DNS lookup.
   320  * UTF-8 version of uidna_labelToASCII(), same behavior.
   321  *
   322  * @param idna UIDNA instance
   323  * @param label Input domain name label
   324  * @param length Label length, or -1 if NUL-terminated
   325  * @param dest Destination string buffer
   326  * @param capacity Destination buffer capacity
   327  * @param pInfo Output container of IDNA processing details.
   328  * @param pErrorCode Standard ICU error code. Its input value must
   329  *                  pass the U_SUCCESS() test, or else the function returns
   330  *                  immediately. Check for U_FAILURE() on output or use with
   331  *                  function chaining. (See User Guide for details.)
   332  * @return destination string length
   333  * @stable ICU 4.6
   334  */
   335 U_STABLE int32_t U_EXPORT2
   336 uidna_labelToASCII_UTF8(const UIDNA *idna,
   337                         const char *label, int32_t length,
   338                         char *dest, int32_t capacity,
   339                         UIDNAInfo *pInfo, UErrorCode *pErrorCode);
   341 /**
   342  * Converts a single domain name label into its Unicode form for human-readable display.
   343  * UTF-8 version of uidna_labelToUnicode(), same behavior.
   344  *
   345  * @param idna UIDNA instance
   346  * @param label Input domain name label
   347  * @param length Label length, or -1 if NUL-terminated
   348  * @param dest Destination string buffer
   349  * @param capacity Destination buffer capacity
   350  * @param pInfo Output container of IDNA processing details.
   351  * @param pErrorCode Standard ICU error code. Its input value must
   352  *                  pass the U_SUCCESS() test, or else the function returns
   353  *                  immediately. Check for U_FAILURE() on output or use with
   354  *                  function chaining. (See User Guide for details.)
   355  * @return destination string length
   356  * @stable ICU 4.6
   357  */
   358 U_STABLE int32_t U_EXPORT2
   359 uidna_labelToUnicodeUTF8(const UIDNA *idna,
   360                          const char *label, int32_t length,
   361                          char *dest, int32_t capacity,
   362                          UIDNAInfo *pInfo, UErrorCode *pErrorCode);
   364 /**
   365  * Converts a whole domain name into its ASCII form for DNS lookup.
   366  * UTF-8 version of uidna_nameToASCII(), same behavior.
   367  *
   368  * @param idna UIDNA instance
   369  * @param name Input domain name
   370  * @param length Domain name length, or -1 if NUL-terminated
   371  * @param dest Destination string buffer
   372  * @param capacity Destination buffer capacity
   373  * @param pInfo Output container of IDNA processing details.
   374  * @param pErrorCode Standard ICU error code. Its input value must
   375  *                  pass the U_SUCCESS() test, or else the function returns
   376  *                  immediately. Check for U_FAILURE() on output or use with
   377  *                  function chaining. (See User Guide for details.)
   378  * @return destination string length
   379  * @stable ICU 4.6
   380  */
   381 U_STABLE int32_t U_EXPORT2
   382 uidna_nameToASCII_UTF8(const UIDNA *idna,
   383                        const char *name, int32_t length,
   384                        char *dest, int32_t capacity,
   385                        UIDNAInfo *pInfo, UErrorCode *pErrorCode);
   387 /**
   388  * Converts a whole domain name into its Unicode form for human-readable display.
   389  * UTF-8 version of uidna_nameToUnicode(), same behavior.
   390  *
   391  * @param idna UIDNA instance
   392  * @param name Input domain name
   393  * @param length Domain name length, or -1 if NUL-terminated
   394  * @param dest Destination string buffer
   395  * @param capacity Destination buffer capacity
   396  * @param pInfo Output container of IDNA processing details.
   397  * @param pErrorCode Standard ICU error code. Its input value must
   398  *                  pass the U_SUCCESS() test, or else the function returns
   399  *                  immediately. Check for U_FAILURE() on output or use with
   400  *                  function chaining. (See User Guide for details.)
   401  * @return destination string length
   402  * @stable ICU 4.6
   403  */
   404 U_STABLE int32_t U_EXPORT2
   405 uidna_nameToUnicodeUTF8(const UIDNA *idna,
   406                         const char *name, int32_t length,
   407                         char *dest, int32_t capacity,
   408                         UIDNAInfo *pInfo, UErrorCode *pErrorCode);
   410 /*
   411  * IDNA error bit set values.
   412  * When a domain name or label fails a processing step or does not meet the
   413  * validity criteria, then one or more of these error bits are set.
   414  */
   415 enum {
   416     /**
   417      * A non-final domain name label (or the whole domain name) is empty.
   418      * @stable ICU 4.6
   419      */
   420     UIDNA_ERROR_EMPTY_LABEL=1,
   421     /**
   422      * A domain name label is longer than 63 bytes.
   423      * (See STD13/RFC1034 3.1. Name space specifications and terminology.)
   424      * This is only checked in ToASCII operations, and only if the output label is all-ASCII.
   425      * @stable ICU 4.6
   426      */
   427     UIDNA_ERROR_LABEL_TOO_LONG=2,
   428     /**
   429      * A domain name is longer than 255 bytes in its storage form.
   430      * (See STD13/RFC1034 3.1. Name space specifications and terminology.)
   431      * This is only checked in ToASCII operations, and only if the output domain name is all-ASCII.
   432      * @stable ICU 4.6
   433      */
   434     UIDNA_ERROR_DOMAIN_NAME_TOO_LONG=4,
   435     /**
   436      * A label starts with a hyphen-minus ('-').
   437      * @stable ICU 4.6
   438      */
   439     UIDNA_ERROR_LEADING_HYPHEN=8,
   440     /**
   441      * A label ends with a hyphen-minus ('-').
   442      * @stable ICU 4.6
   443      */
   444     UIDNA_ERROR_TRAILING_HYPHEN=0x10,
   445     /**
   446      * A label contains hyphen-minus ('-') in the third and fourth positions.
   447      * @stable ICU 4.6
   448      */
   449     UIDNA_ERROR_HYPHEN_3_4=0x20,
   450     /**
   451      * A label starts with a combining mark.
   452      * @stable ICU 4.6
   453      */
   454     UIDNA_ERROR_LEADING_COMBINING_MARK=0x40,
   455     /**
   456      * A label or domain name contains disallowed characters.
   457      * @stable ICU 4.6
   458      */
   459     UIDNA_ERROR_DISALLOWED=0x80,
   460     /**
   461      * A label starts with "xn--" but does not contain valid Punycode.
   462      * That is, an xn-- label failed Punycode decoding.
   463      * @stable ICU 4.6
   464      */
   465     UIDNA_ERROR_PUNYCODE=0x100,
   466     /**
   467      * A label contains a dot=full stop.
   468      * This can occur in an input string for a single-label function.
   469      * @stable ICU 4.6
   470      */
   471     UIDNA_ERROR_LABEL_HAS_DOT=0x200,
   472     /**
   473      * An ACE label does not contain a valid label string.
   474      * The label was successfully ACE (Punycode) decoded but the resulting
   475      * string had severe validation errors. For example,
   476      * it might contain characters that are not allowed in ACE labels,
   477      * or it might not be normalized.
   478      * @stable ICU 4.6
   479      */
   480     UIDNA_ERROR_INVALID_ACE_LABEL=0x400,
   481     /**
   482      * A label does not meet the IDNA BiDi requirements (for right-to-left characters).
   483      * @stable ICU 4.6
   484      */
   485     UIDNA_ERROR_BIDI=0x800,
   486     /**
   487      * A label does not meet the IDNA CONTEXTJ requirements.
   488      * @stable ICU 4.6
   489      */
   490     UIDNA_ERROR_CONTEXTJ=0x1000,
   491     /**
   492      * A label does not meet the IDNA CONTEXTO requirements for punctuation characters.
   493      * Some punctuation characters "Would otherwise have been DISALLOWED"
   494      * but are allowed in certain contexts. (RFC 5892)
   495      * @stable ICU 49
   496      */
   497     UIDNA_ERROR_CONTEXTO_PUNCTUATION=0x2000,
   498     /**
   499      * A label does not meet the IDNA CONTEXTO requirements for digits.
   500      * Arabic-Indic Digits (U+066x) must not be mixed with Extended Arabic-Indic Digits (U+06Fx).
   501      * @stable ICU 49
   502      */
   503     UIDNA_ERROR_CONTEXTO_DIGITS=0x4000
   504 };
   506 /* IDNA2003 API ------------------------------------------------------------- */
   508 /**
   509  * IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC.
   510  * This operation is done on <b>single labels</b> before sending it to something that expects
   511  * ASCII names. A label is an individual part of a domain name. Labels are usually
   512  * separated by dots; e.g. "www.example.com" is composed of 3 labels "www","example", and "com".
   513  *
   514  * IDNA2003 API Overview:
   515  *
   516  * The uidna_ API implements the IDNA protocol as defined in the IDNA RFC
   517  * (http://www.ietf.org/rfc/rfc3490.txt).
   518  * The RFC defines 2 operations: ToASCII and ToUnicode. Domain name labels
   519  * containing non-ASCII code points are processed by the
   520  * ToASCII operation before passing it to resolver libraries. Domain names
   521  * that are obtained from resolver libraries are processed by the
   522  * ToUnicode operation before displaying the domain name to the user.
   523  * IDNA requires that implementations process input strings with Nameprep
   524  * (http://www.ietf.org/rfc/rfc3491.txt),
   525  * which is a profile of Stringprep (http://www.ietf.org/rfc/rfc3454.txt),
   526  * and then with Punycode (http://www.ietf.org/rfc/rfc3492.txt).
   527  * Implementations of IDNA MUST fully implement Nameprep and Punycode;
   528  * neither Nameprep nor Punycode are optional.
   529  * The input and output of ToASCII and ToUnicode operations are Unicode
   530  * and are designed to be chainable, i.e., applying ToASCII or ToUnicode operations
   531  * multiple times to an input string will yield the same result as applying the operation
   532  * once.
   533  * ToUnicode(ToUnicode(ToUnicode...(ToUnicode(string)))) == ToUnicode(string) 
   534  * ToASCII(ToASCII(ToASCII...(ToASCII(string))) == ToASCII(string).
   535  *
   536  * @param src               Input UChar array containing label in Unicode.
   537  * @param srcLength         Number of UChars in src, or -1 if NUL-terminated.
   538  * @param dest              Output UChar array with ASCII (ACE encoded) label.
   539  * @param destCapacity      Size of dest.
   540  * @param options           A bit set of options:
   541  *
   542  *  - UIDNA_DEFAULT             Use default options, i.e., do not process unassigned code points
   543  *                              and do not use STD3 ASCII rules
   544  *                              If unassigned code points are found the operation fails with 
   545  *                              U_UNASSIGNED_ERROR error code.
   546  *
   547  *  - UIDNA_ALLOW_UNASSIGNED    Unassigned values can be converted to ASCII for query operations
   548  *                              If this option is set, the unassigned code points are in the input 
   549  *                              are treated as normal Unicode code points.
   550  *
   551  *  - UIDNA_USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
   552  *                              If this option is set and the input does not satisfy STD3 rules,  
   553  *                              the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
   554  *
   555  * @param parseError        Pointer to UParseError struct to receive information on position 
   556  *                          of error if an error is encountered. Can be NULL.
   557  * @param status            ICU in/out error code parameter.
   558  *                          U_INVALID_CHAR_FOUND if src contains
   559  *                          unmatched single surrogates.
   560  *                          U_INDEX_OUTOFBOUNDS_ERROR if src contains
   561  *                          too many code points.
   562  *                          U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
   563  * @return The length of the result string, if successful - or in case of a buffer overflow,
   564  *         in which case it will be greater than destCapacity.
   565  * @stable ICU 2.6
   566  */
   567 U_STABLE int32_t U_EXPORT2
   568 uidna_toASCII(const UChar* src, int32_t srcLength, 
   569               UChar* dest, int32_t destCapacity,
   570               int32_t options,
   571               UParseError* parseError,
   572               UErrorCode* status);
   575 /**
   576  * IDNA2003: This function implements the ToUnicode operation as defined in the IDNA RFC.
   577  * This operation is done on <b>single labels</b> before sending it to something that expects
   578  * Unicode names. A label is an individual part of a domain name. Labels are usually
   579  * separated by dots; for e.g. "www.example.com" is composed of 3 labels "www","example", and "com".
   580  *
   581  * @param src               Input UChar array containing ASCII (ACE encoded) label.
   582  * @param srcLength         Number of UChars in src, or -1 if NUL-terminated.
   583  * @param dest Output       Converted UChar array containing Unicode equivalent of label.
   584  * @param destCapacity      Size of dest.
   585  * @param options           A bit set of options:
   586  *
   587  *  - UIDNA_DEFAULT             Use default options, i.e., do not process unassigned code points
   588  *                              and do not use STD3 ASCII rules
   589  *                              If unassigned code points are found the operation fails with 
   590  *                              U_UNASSIGNED_ERROR error code.
   591  *
   592  *  - UIDNA_ALLOW_UNASSIGNED      Unassigned values can be converted to ASCII for query operations
   593  *                              If this option is set, the unassigned code points are in the input 
   594  *                              are treated as normal Unicode code points. <b> Note: </b> This option is 
   595  *                              required on toUnicode operation because the RFC mandates 
   596  *                              verification of decoded ACE input by applying toASCII and comparing
   597  *                              its output with source
   598  *
   599  *  - UIDNA_USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
   600  *                              If this option is set and the input does not satisfy STD3 rules,  
   601  *                              the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
   602  *
   603  * @param parseError        Pointer to UParseError struct to receive information on position 
   604  *                          of error if an error is encountered. Can be NULL.
   605  * @param status            ICU in/out error code parameter.
   606  *                          U_INVALID_CHAR_FOUND if src contains
   607  *                          unmatched single surrogates.
   608  *                          U_INDEX_OUTOFBOUNDS_ERROR if src contains
   609  *                          too many code points.
   610  *                          U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
   611  * @return The length of the result string, if successful - or in case of a buffer overflow,
   612  *         in which case it will be greater than destCapacity.
   613  * @stable ICU 2.6
   614  */
   615 U_STABLE int32_t U_EXPORT2
   616 uidna_toUnicode(const UChar* src, int32_t srcLength,
   617                 UChar* dest, int32_t destCapacity,
   618                 int32_t options,
   619                 UParseError* parseError,
   620                 UErrorCode* status);
   623 /**
   624  * IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
   625  * This operation is done on complete domain names, e.g: "www.example.com". 
   626  * It is important to note that this operation can fail. If it fails, then the input 
   627  * domain name cannot be used as an Internationalized Domain Name and the application
   628  * should have methods defined to deal with the failure.
   629  *
   630  * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
   631  * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each, 
   632  * and then convert. This function does not offer that level of granularity. The options once  
   633  * set will apply to all labels in the domain name
   634  *
   635  * @param src               Input UChar array containing IDN in Unicode.
   636  * @param srcLength         Number of UChars in src, or -1 if NUL-terminated.
   637  * @param dest              Output UChar array with ASCII (ACE encoded) IDN.
   638  * @param destCapacity      Size of dest.
   639  * @param options           A bit set of options:
   640  *
   641  *  - UIDNA_DEFAULT             Use default options, i.e., do not process unassigned code points
   642  *                              and do not use STD3 ASCII rules
   643  *                              If unassigned code points are found the operation fails with 
   644  *                              U_UNASSIGNED_CODE_POINT_FOUND error code.
   645  *
   646  *  - UIDNA_ALLOW_UNASSIGNED    Unassigned values can be converted to ASCII for query operations
   647  *                              If this option is set, the unassigned code points are in the input 
   648  *                              are treated as normal Unicode code points.
   649  *
   650  *  - UIDNA_USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
   651  *                              If this option is set and the input does not satisfy STD3 rules,  
   652  *                              the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
   653  *
   654  * @param parseError        Pointer to UParseError struct to receive information on position 
   655  *                          of error if an error is encountered. Can be NULL.
   656  * @param status            ICU in/out error code parameter.
   657  *                          U_INVALID_CHAR_FOUND if src contains
   658  *                          unmatched single surrogates.
   659  *                          U_INDEX_OUTOFBOUNDS_ERROR if src contains
   660  *                          too many code points.
   661  *                          U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
   662  * @return The length of the result string, if successful - or in case of a buffer overflow,
   663  *         in which case it will be greater than destCapacity.
   664  * @stable ICU 2.6
   665  */
   666 U_STABLE int32_t U_EXPORT2
   667 uidna_IDNToASCII(  const UChar* src, int32_t srcLength,
   668                    UChar* dest, int32_t destCapacity,
   669                    int32_t options,
   670                    UParseError* parseError,
   671                    UErrorCode* status);
   673 /**
   674  * IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
   675  * This operation is done on complete domain names, e.g: "www.example.com". 
   676  *
   677  * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
   678  * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each, 
   679  * and then convert. This function does not offer that level of granularity. The options once  
   680  * set will apply to all labels in the domain name
   681  *
   682  * @param src               Input UChar array containing IDN in ASCII (ACE encoded) form.
   683  * @param srcLength         Number of UChars in src, or -1 if NUL-terminated.
   684  * @param dest Output       UChar array containing Unicode equivalent of source IDN.
   685  * @param destCapacity      Size of dest.
   686  * @param options           A bit set of options:
   687  *
   688  *  - UIDNA_DEFAULT             Use default options, i.e., do not process unassigned code points
   689  *                              and do not use STD3 ASCII rules
   690  *                              If unassigned code points are found the operation fails with 
   691  *                              U_UNASSIGNED_CODE_POINT_FOUND error code.
   692  *
   693  *  - UIDNA_ALLOW_UNASSIGNED    Unassigned values can be converted to ASCII for query operations
   694  *                              If this option is set, the unassigned code points are in the input 
   695  *                              are treated as normal Unicode code points.
   696  *
   697  *  - UIDNA_USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
   698  *                              If this option is set and the input does not satisfy STD3 rules,  
   699  *                              the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
   700  *
   701  * @param parseError        Pointer to UParseError struct to receive information on position 
   702  *                          of error if an error is encountered. Can be NULL.
   703  * @param status            ICU in/out error code parameter.
   704  *                          U_INVALID_CHAR_FOUND if src contains
   705  *                          unmatched single surrogates.
   706  *                          U_INDEX_OUTOFBOUNDS_ERROR if src contains
   707  *                          too many code points.
   708  *                          U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
   709  * @return The length of the result string, if successful - or in case of a buffer overflow,
   710  *         in which case it will be greater than destCapacity.
   711  * @stable ICU 2.6
   712  */
   713 U_STABLE int32_t U_EXPORT2
   714 uidna_IDNToUnicode(  const UChar* src, int32_t srcLength,
   715                      UChar* dest, int32_t destCapacity,
   716                      int32_t options,
   717                      UParseError* parseError,
   718                      UErrorCode* status);
   720 /**
   721  * IDNA2003: Compare two IDN strings for equivalence.
   722  * This function splits the domain names into labels and compares them.
   723  * According to IDN RFC, whenever two labels are compared, they are 
   724  * considered equal if and only if their ASCII forms (obtained by 
   725  * applying toASCII) match using an case-insensitive ASCII comparison.
   726  * Two domain names are considered a match if and only if all labels 
   727  * match regardless of whether label separators match.
   728  *
   729  * @param s1                First source string.
   730  * @param length1           Length of first source string, or -1 if NUL-terminated.
   731  *
   732  * @param s2                Second source string.
   733  * @param length2           Length of second source string, or -1 if NUL-terminated.
   734  * @param options           A bit set of options:
   735  *
   736  *  - UIDNA_DEFAULT             Use default options, i.e., do not process unassigned code points
   737  *                              and do not use STD3 ASCII rules
   738  *                              If unassigned code points are found the operation fails with 
   739  *                              U_UNASSIGNED_CODE_POINT_FOUND error code.
   740  *
   741  *  - UIDNA_ALLOW_UNASSIGNED    Unassigned values can be converted to ASCII for query operations
   742  *                              If this option is set, the unassigned code points are in the input 
   743  *                              are treated as normal Unicode code points.
   744  *
   745  *  - UIDNA_USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
   746  *                              If this option is set and the input does not satisfy STD3 rules,  
   747  *                              the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
   748  *
   749  * @param status            ICU error code in/out parameter.
   750  *                          Must fulfill U_SUCCESS before the function call.
   751  * @return <0 or 0 or >0 as usual for string comparisons
   752  * @stable ICU 2.6
   753  */
   754 U_STABLE int32_t U_EXPORT2
   755 uidna_compare(  const UChar *s1, int32_t length1,
   756                 const UChar *s2, int32_t length2,
   757                 int32_t options,
   758                 UErrorCode* status);
   760 #endif /* #if !UCONFIG_NO_IDNA */
   762 #endif

mercurial