intl/icu/source/common/unicode/idna.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/common/unicode/idna.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,323 @@
     1.4 +/*
     1.5 +*******************************************************************************
     1.6 +*   Copyright (C) 2010-2012, International Business Machines
     1.7 +*   Corporation and others.  All Rights Reserved.
     1.8 +*******************************************************************************
     1.9 +*   file name:  idna.h
    1.10 +*   encoding:   US-ASCII
    1.11 +*   tab size:   8 (not used)
    1.12 +*   indentation:4
    1.13 +*
    1.14 +*   created on: 2010mar05
    1.15 +*   created by: Markus W. Scherer
    1.16 +*/
    1.17 +
    1.18 +#ifndef __IDNA_H__
    1.19 +#define __IDNA_H__
    1.20 +
    1.21 +/**
    1.22 + * \file
    1.23 + * \brief C++ API: Internationalizing Domain Names in Applications (IDNA)
    1.24 + */
    1.25 +
    1.26 +#include "unicode/utypes.h"
    1.27 +
    1.28 +#if !UCONFIG_NO_IDNA
    1.29 +
    1.30 +#include "unicode/bytestream.h"
    1.31 +#include "unicode/stringpiece.h"
    1.32 +#include "unicode/uidna.h"
    1.33 +#include "unicode/unistr.h"
    1.34 +
    1.35 +U_NAMESPACE_BEGIN
    1.36 +
    1.37 +class IDNAInfo;
    1.38 +
    1.39 +/**
    1.40 + * Abstract base class for IDNA processing.
    1.41 + * See http://www.unicode.org/reports/tr46/
    1.42 + * and http://www.ietf.org/rfc/rfc3490.txt
    1.43 + *
    1.44 + * The IDNA class is not intended for public subclassing.
    1.45 + *
    1.46 + * This C++ API currently only implements UTS #46.
    1.47 + * The uidna.h C API implements both UTS #46 (functions using UIDNA service object)
    1.48 + * and IDNA2003 (functions that do not use a service object).
    1.49 + * @stable ICU 4.6
    1.50 + */
    1.51 +class U_COMMON_API IDNA : public UObject {
    1.52 +public:
    1.53 +    /**
    1.54 +     * Destructor.
    1.55 +     * @stable ICU 4.6
    1.56 +     */
    1.57 +    ~IDNA();
    1.58 +
    1.59 +    /**
    1.60 +     * Returns an IDNA instance which implements UTS #46.
    1.61 +     * Returns an unmodifiable instance, owned by the caller.
    1.62 +     * Cache it for multiple operations, and delete it when done.
    1.63 +     * The instance is thread-safe, that is, it can be used concurrently.
    1.64 +     *
    1.65 +     * UTS #46 defines Unicode IDNA Compatibility Processing,
    1.66 +     * updated to the latest version of Unicode and compatible with both
    1.67 +     * IDNA2003 and IDNA2008.
    1.68 +     *
    1.69 +     * The worker functions use transitional processing, including deviation mappings,
    1.70 +     * unless UIDNA_NONTRANSITIONAL_TO_ASCII or UIDNA_NONTRANSITIONAL_TO_UNICODE
    1.71 +     * is used in which case the deviation characters are passed through without change.
    1.72 +     *
    1.73 +     * Disallowed characters are mapped to U+FFFD.
    1.74 +     *
    1.75 +     * For available options see the uidna.h header.
    1.76 +     * Operations with the UTS #46 instance do not support the
    1.77 +     * UIDNA_ALLOW_UNASSIGNED option.
    1.78 +     *
    1.79 +     * By default, the UTS #46 implementation allows all ASCII characters (as valid or mapped).
    1.80 +     * When the UIDNA_USE_STD3_RULES option is used, ASCII characters other than
    1.81 +     * letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD.
    1.82 +     *
    1.83 +     * @param options Bit set to modify the processing and error checking.
    1.84 +     *                See option bit set values in uidna.h.
    1.85 +     * @param errorCode Standard ICU error code. Its input value must
    1.86 +     *                  pass the U_SUCCESS() test, or else the function returns
    1.87 +     *                  immediately. Check for U_FAILURE() on output or use with
    1.88 +     *                  function chaining. (See User Guide for details.)
    1.89 +     * @return the UTS #46 IDNA instance, if successful
    1.90 +     * @stable ICU 4.6
    1.91 +     */
    1.92 +    static IDNA *
    1.93 +    createUTS46Instance(uint32_t options, UErrorCode &errorCode);
    1.94 +
    1.95 +    /**
    1.96 +     * Converts a single domain name label into its ASCII form for DNS lookup.
    1.97 +     * If any processing step fails, then info.hasErrors() will be TRUE and
    1.98 +     * the result might not be an ASCII string.
    1.99 +     * The label might be modified according to the types of errors.
   1.100 +     * Labels with severe errors will be left in (or turned into) their Unicode form.
   1.101 +     *
   1.102 +     * The UErrorCode indicates an error only in exceptional cases,
   1.103 +     * such as a U_MEMORY_ALLOCATION_ERROR.
   1.104 +     *
   1.105 +     * @param label Input domain name label
   1.106 +     * @param dest Destination string object
   1.107 +     * @param info Output container of IDNA processing details.
   1.108 +     * @param errorCode Standard ICU error code. Its input value must
   1.109 +     *                  pass the U_SUCCESS() test, or else the function returns
   1.110 +     *                  immediately. Check for U_FAILURE() on output or use with
   1.111 +     *                  function chaining. (See User Guide for details.)
   1.112 +     * @return dest
   1.113 +     * @stable ICU 4.6
   1.114 +     */
   1.115 +    virtual UnicodeString &
   1.116 +    labelToASCII(const UnicodeString &label, UnicodeString &dest,
   1.117 +                 IDNAInfo &info, UErrorCode &errorCode) const = 0;
   1.118 +
   1.119 +    /**
   1.120 +     * Converts a single domain name label into its Unicode form for human-readable display.
   1.121 +     * If any processing step fails, then info.hasErrors() will be TRUE.
   1.122 +     * The label might be modified according to the types of errors.
   1.123 +     *
   1.124 +     * The UErrorCode indicates an error only in exceptional cases,
   1.125 +     * such as a U_MEMORY_ALLOCATION_ERROR.
   1.126 +     *
   1.127 +     * @param label Input domain name label
   1.128 +     * @param dest Destination string object
   1.129 +     * @param info Output container of IDNA processing details.
   1.130 +     * @param errorCode Standard ICU error code. Its input value must
   1.131 +     *                  pass the U_SUCCESS() test, or else the function returns
   1.132 +     *                  immediately. Check for U_FAILURE() on output or use with
   1.133 +     *                  function chaining. (See User Guide for details.)
   1.134 +     * @return dest
   1.135 +     * @stable ICU 4.6
   1.136 +     */
   1.137 +    virtual UnicodeString &
   1.138 +    labelToUnicode(const UnicodeString &label, UnicodeString &dest,
   1.139 +                   IDNAInfo &info, UErrorCode &errorCode) const = 0;
   1.140 +
   1.141 +    /**
   1.142 +     * Converts a whole domain name into its ASCII form for DNS lookup.
   1.143 +     * If any processing step fails, then info.hasErrors() will be TRUE and
   1.144 +     * the result might not be an ASCII string.
   1.145 +     * The domain name might be modified according to the types of errors.
   1.146 +     * Labels with severe errors will be left in (or turned into) their Unicode form.
   1.147 +     *
   1.148 +     * The UErrorCode indicates an error only in exceptional cases,
   1.149 +     * such as a U_MEMORY_ALLOCATION_ERROR.
   1.150 +     *
   1.151 +     * @param name Input domain name
   1.152 +     * @param dest Destination string object
   1.153 +     * @param info Output container of IDNA processing details.
   1.154 +     * @param errorCode Standard ICU error code. Its input value must
   1.155 +     *                  pass the U_SUCCESS() test, or else the function returns
   1.156 +     *                  immediately. Check for U_FAILURE() on output or use with
   1.157 +     *                  function chaining. (See User Guide for details.)
   1.158 +     * @return dest
   1.159 +     * @stable ICU 4.6
   1.160 +     */
   1.161 +    virtual UnicodeString &
   1.162 +    nameToASCII(const UnicodeString &name, UnicodeString &dest,
   1.163 +                IDNAInfo &info, UErrorCode &errorCode) const = 0;
   1.164 +
   1.165 +    /**
   1.166 +     * Converts a whole domain name into its Unicode form for human-readable display.
   1.167 +     * If any processing step fails, then info.hasErrors() will be TRUE.
   1.168 +     * The domain name might be modified according to the types of errors.
   1.169 +     *
   1.170 +     * The UErrorCode indicates an error only in exceptional cases,
   1.171 +     * such as a U_MEMORY_ALLOCATION_ERROR.
   1.172 +     *
   1.173 +     * @param name Input domain name
   1.174 +     * @param dest Destination string object
   1.175 +     * @param info Output container of IDNA processing details.
   1.176 +     * @param errorCode Standard ICU error code. Its input value must
   1.177 +     *                  pass the U_SUCCESS() test, or else the function returns
   1.178 +     *                  immediately. Check for U_FAILURE() on output or use with
   1.179 +     *                  function chaining. (See User Guide for details.)
   1.180 +     * @return dest
   1.181 +     * @stable ICU 4.6
   1.182 +     */
   1.183 +    virtual UnicodeString &
   1.184 +    nameToUnicode(const UnicodeString &name, UnicodeString &dest,
   1.185 +                  IDNAInfo &info, UErrorCode &errorCode) const = 0;
   1.186 +
   1.187 +    // UTF-8 versions of the processing methods ---------------------------- ***
   1.188 +
   1.189 +    /**
   1.190 +     * Converts a single domain name label into its ASCII form for DNS lookup.
   1.191 +     * UTF-8 version of labelToASCII(), same behavior.
   1.192 +     *
   1.193 +     * @param label Input domain name label
   1.194 +     * @param dest Destination byte sink; Flush()ed if successful
   1.195 +     * @param info Output container of IDNA processing details.
   1.196 +     * @param errorCode Standard ICU error code. Its input value must
   1.197 +     *                  pass the U_SUCCESS() test, or else the function returns
   1.198 +     *                  immediately. Check for U_FAILURE() on output or use with
   1.199 +     *                  function chaining. (See User Guide for details.)
   1.200 +     * @return dest
   1.201 +     * @stable ICU 4.6
   1.202 +     */
   1.203 +    virtual void
   1.204 +    labelToASCII_UTF8(const StringPiece &label, ByteSink &dest,
   1.205 +                      IDNAInfo &info, UErrorCode &errorCode) const;
   1.206 +
   1.207 +    /**
   1.208 +     * Converts a single domain name label into its Unicode form for human-readable display.
   1.209 +     * UTF-8 version of labelToUnicode(), same behavior.
   1.210 +     *
   1.211 +     * @param label Input domain name label
   1.212 +     * @param dest Destination byte sink; Flush()ed if successful
   1.213 +     * @param info Output container of IDNA processing details.
   1.214 +     * @param errorCode Standard ICU error code. Its input value must
   1.215 +     *                  pass the U_SUCCESS() test, or else the function returns
   1.216 +     *                  immediately. Check for U_FAILURE() on output or use with
   1.217 +     *                  function chaining. (See User Guide for details.)
   1.218 +     * @return dest
   1.219 +     * @stable ICU 4.6
   1.220 +     */
   1.221 +    virtual void
   1.222 +    labelToUnicodeUTF8(const StringPiece &label, ByteSink &dest,
   1.223 +                       IDNAInfo &info, UErrorCode &errorCode) const;
   1.224 +
   1.225 +    /**
   1.226 +     * Converts a whole domain name into its ASCII form for DNS lookup.
   1.227 +     * UTF-8 version of nameToASCII(), same behavior.
   1.228 +     *
   1.229 +     * @param name Input domain name
   1.230 +     * @param dest Destination byte sink; Flush()ed if successful
   1.231 +     * @param info Output container of IDNA processing details.
   1.232 +     * @param errorCode Standard ICU error code. Its input value must
   1.233 +     *                  pass the U_SUCCESS() test, or else the function returns
   1.234 +     *                  immediately. Check for U_FAILURE() on output or use with
   1.235 +     *                  function chaining. (See User Guide for details.)
   1.236 +     * @return dest
   1.237 +     * @stable ICU 4.6
   1.238 +     */
   1.239 +    virtual void
   1.240 +    nameToASCII_UTF8(const StringPiece &name, ByteSink &dest,
   1.241 +                     IDNAInfo &info, UErrorCode &errorCode) const;
   1.242 +
   1.243 +    /**
   1.244 +     * Converts a whole domain name into its Unicode form for human-readable display.
   1.245 +     * UTF-8 version of nameToUnicode(), same behavior.
   1.246 +     *
   1.247 +     * @param name Input domain name
   1.248 +     * @param dest Destination byte sink; Flush()ed if successful
   1.249 +     * @param info Output container of IDNA processing details.
   1.250 +     * @param errorCode Standard ICU error code. Its input value must
   1.251 +     *                  pass the U_SUCCESS() test, or else the function returns
   1.252 +     *                  immediately. Check for U_FAILURE() on output or use with
   1.253 +     *                  function chaining. (See User Guide for details.)
   1.254 +     * @return dest
   1.255 +     * @stable ICU 4.6
   1.256 +     */
   1.257 +    virtual void
   1.258 +    nameToUnicodeUTF8(const StringPiece &name, ByteSink &dest,
   1.259 +                      IDNAInfo &info, UErrorCode &errorCode) const;
   1.260 +};
   1.261 +
   1.262 +class UTS46;
   1.263 +
   1.264 +/**
   1.265 + * Output container for IDNA processing errors.
   1.266 + * The IDNAInfo class is not suitable for subclassing.
   1.267 + * @stable ICU 4.6
   1.268 + */
   1.269 +class U_COMMON_API IDNAInfo : public UMemory {
   1.270 +public:
   1.271 +    /**
   1.272 +     * Constructor for stack allocation.
   1.273 +     * @stable ICU 4.6
   1.274 +     */
   1.275 +    IDNAInfo() : errors(0), labelErrors(0), isTransDiff(FALSE), isBiDi(FALSE), isOkBiDi(TRUE) {}
   1.276 +    /**
   1.277 +     * Were there IDNA processing errors?
   1.278 +     * @return TRUE if there were processing errors
   1.279 +     * @stable ICU 4.6
   1.280 +     */
   1.281 +    UBool hasErrors() const { return errors!=0; }
   1.282 +    /**
   1.283 +     * Returns a bit set indicating IDNA processing errors.
   1.284 +     * See UIDNA_ERROR_... constants in uidna.h.
   1.285 +     * @return bit set of processing errors
   1.286 +     * @stable ICU 4.6
   1.287 +     */
   1.288 +    uint32_t getErrors() const { return errors; }
   1.289 +    /**
   1.290 +     * Returns TRUE if transitional and nontransitional processing produce different results.
   1.291 +     * This is the case when the input label or domain name contains
   1.292 +     * one or more deviation characters outside a Punycode label (see UTS #46).
   1.293 +     * <ul>
   1.294 +     * <li>With nontransitional processing, such characters are
   1.295 +     * copied to the destination string.
   1.296 +     * <li>With transitional processing, such characters are
   1.297 +     * mapped (sharp s/sigma) or removed (joiner/nonjoiner).
   1.298 +     * </ul>
   1.299 +     * @return TRUE if transitional and nontransitional processing produce different results
   1.300 +     * @stable ICU 4.6
   1.301 +     */
   1.302 +    UBool isTransitionalDifferent() const { return isTransDiff; }
   1.303 +
   1.304 +private:
   1.305 +    friend class UTS46;
   1.306 +
   1.307 +    IDNAInfo(const IDNAInfo &other);  // no copying
   1.308 +    IDNAInfo &operator=(const IDNAInfo &other);  // no copying
   1.309 +
   1.310 +    void reset() {
   1.311 +        errors=labelErrors=0;
   1.312 +        isTransDiff=FALSE;
   1.313 +        isBiDi=FALSE;
   1.314 +        isOkBiDi=TRUE;
   1.315 +    }
   1.316 +
   1.317 +    uint32_t errors, labelErrors;
   1.318 +    UBool isTransDiff;
   1.319 +    UBool isBiDi;
   1.320 +    UBool isOkBiDi;
   1.321 +};
   1.322 +
   1.323 +U_NAMESPACE_END
   1.324 +
   1.325 +#endif  // UCONFIG_NO_IDNA
   1.326 +#endif  // __IDNA_H__

mercurial