intl/icu/source/common/unicode/unorm2.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/common/unicode/unorm2.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,528 @@
     1.4 +/*
     1.5 +*******************************************************************************
     1.6 +*
     1.7 +*   Copyright (C) 2009-2013, International Business Machines
     1.8 +*   Corporation and others.  All Rights Reserved.
     1.9 +*
    1.10 +*******************************************************************************
    1.11 +*   file name:  unorm2.h
    1.12 +*   encoding:   US-ASCII
    1.13 +*   tab size:   8 (not used)
    1.14 +*   indentation:4
    1.15 +*
    1.16 +*   created on: 2009dec15
    1.17 +*   created by: Markus W. Scherer
    1.18 +*/
    1.19 +
    1.20 +#ifndef __UNORM2_H__
    1.21 +#define __UNORM2_H__
    1.22 +
    1.23 +/**
    1.24 + * \file
    1.25 + * \brief C API: New API for Unicode Normalization.
    1.26 + *
    1.27 + * Unicode normalization functionality for standard Unicode normalization or
    1.28 + * for using custom mapping tables.
    1.29 + * All instances of UNormalizer2 are unmodifiable/immutable.
    1.30 + * Instances returned by unorm2_getInstance() are singletons that must not be deleted by the caller.
    1.31 + * For more details see the Normalizer2 C++ class.
    1.32 + */
    1.33 +
    1.34 +#include "unicode/utypes.h"
    1.35 +#include "unicode/localpointer.h"
    1.36 +#include "unicode/uset.h"
    1.37 +
    1.38 +/**
    1.39 + * Constants for normalization modes.
    1.40 + * For details about standard Unicode normalization forms
    1.41 + * and about the algorithms which are also used with custom mapping tables
    1.42 + * see http://www.unicode.org/unicode/reports/tr15/
    1.43 + * @stable ICU 4.4
    1.44 + */
    1.45 +typedef enum {
    1.46 +    /**
    1.47 +     * Decomposition followed by composition.
    1.48 +     * Same as standard NFC when using an "nfc" instance.
    1.49 +     * Same as standard NFKC when using an "nfkc" instance.
    1.50 +     * For details about standard Unicode normalization forms
    1.51 +     * see http://www.unicode.org/unicode/reports/tr15/
    1.52 +     * @stable ICU 4.4
    1.53 +     */
    1.54 +    UNORM2_COMPOSE,
    1.55 +    /**
    1.56 +     * Map, and reorder canonically.
    1.57 +     * Same as standard NFD when using an "nfc" instance.
    1.58 +     * Same as standard NFKD when using an "nfkc" instance.
    1.59 +     * For details about standard Unicode normalization forms
    1.60 +     * see http://www.unicode.org/unicode/reports/tr15/
    1.61 +     * @stable ICU 4.4
    1.62 +     */
    1.63 +    UNORM2_DECOMPOSE,
    1.64 +    /**
    1.65 +     * "Fast C or D" form.
    1.66 +     * If a string is in this form, then further decomposition <i>without reordering</i>
    1.67 +     * would yield the same form as DECOMPOSE.
    1.68 +     * Text in "Fast C or D" form can be processed efficiently with data tables
    1.69 +     * that are "canonically closed", that is, that provide equivalent data for
    1.70 +     * equivalent text, without having to be fully normalized.
    1.71 +     * Not a standard Unicode normalization form.
    1.72 +     * Not a unique form: Different FCD strings can be canonically equivalent.
    1.73 +     * For details see http://www.unicode.org/notes/tn5/#FCD
    1.74 +     * @stable ICU 4.4
    1.75 +     */
    1.76 +    UNORM2_FCD,
    1.77 +    /**
    1.78 +     * Compose only contiguously.
    1.79 +     * Also known as "FCC" or "Fast C Contiguous".
    1.80 +     * The result will often but not always be in NFC.
    1.81 +     * The result will conform to FCD which is useful for processing.
    1.82 +     * Not a standard Unicode normalization form.
    1.83 +     * For details see http://www.unicode.org/notes/tn5/#FCC
    1.84 +     * @stable ICU 4.4
    1.85 +     */
    1.86 +    UNORM2_COMPOSE_CONTIGUOUS
    1.87 +} UNormalization2Mode;
    1.88 +
    1.89 +/**
    1.90 + * Result values for normalization quick check functions.
    1.91 + * For details see http://www.unicode.org/reports/tr15/#Detecting_Normalization_Forms
    1.92 + * @stable ICU 2.0
    1.93 + */
    1.94 +typedef enum UNormalizationCheckResult {
    1.95 +  /**
    1.96 +   * The input string is not in the normalization form.
    1.97 +   * @stable ICU 2.0
    1.98 +   */
    1.99 +  UNORM_NO,
   1.100 +  /**
   1.101 +   * The input string is in the normalization form.
   1.102 +   * @stable ICU 2.0
   1.103 +   */
   1.104 +  UNORM_YES,
   1.105 +  /**
   1.106 +   * The input string may or may not be in the normalization form.
   1.107 +   * This value is only returned for composition forms like NFC and FCC,
   1.108 +   * when a backward-combining character is found for which the surrounding text
   1.109 +   * would have to be analyzed further.
   1.110 +   * @stable ICU 2.0
   1.111 +   */
   1.112 +  UNORM_MAYBE
   1.113 +} UNormalizationCheckResult;
   1.114 +
   1.115 +/**
   1.116 + * Opaque C service object type for the new normalization API.
   1.117 + * @stable ICU 4.4
   1.118 + */
   1.119 +struct UNormalizer2;
   1.120 +typedef struct UNormalizer2 UNormalizer2;  /**< C typedef for struct UNormalizer2. @stable ICU 4.4 */
   1.121 +
   1.122 +#if !UCONFIG_NO_NORMALIZATION
   1.123 +
   1.124 +/**
   1.125 + * Returns a UNormalizer2 instance for Unicode NFC normalization.
   1.126 + * Same as unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE, pErrorCode).
   1.127 + * Returns an unmodifiable singleton instance. Do not delete it.
   1.128 + * @param pErrorCode Standard ICU error code. Its input value must
   1.129 + *                  pass the U_SUCCESS() test, or else the function returns
   1.130 + *                  immediately. Check for U_FAILURE() on output or use with
   1.131 + *                  function chaining. (See User Guide for details.)
   1.132 + * @return the requested Normalizer2, if successful
   1.133 + * @stable ICU 49
   1.134 + */
   1.135 +U_STABLE const UNormalizer2 * U_EXPORT2
   1.136 +unorm2_getNFCInstance(UErrorCode *pErrorCode);
   1.137 +
   1.138 +/**
   1.139 + * Returns a UNormalizer2 instance for Unicode NFD normalization.
   1.140 + * Same as unorm2_getInstance(NULL, "nfc", UNORM2_DECOMPOSE, pErrorCode).
   1.141 + * Returns an unmodifiable singleton instance. Do not delete it.
   1.142 + * @param pErrorCode Standard ICU error code. Its input value must
   1.143 + *                  pass the U_SUCCESS() test, or else the function returns
   1.144 + *                  immediately. Check for U_FAILURE() on output or use with
   1.145 + *                  function chaining. (See User Guide for details.)
   1.146 + * @return the requested Normalizer2, if successful
   1.147 + * @stable ICU 49
   1.148 + */
   1.149 +U_STABLE const UNormalizer2 * U_EXPORT2
   1.150 +unorm2_getNFDInstance(UErrorCode *pErrorCode);
   1.151 +
   1.152 +/**
   1.153 + * Returns a UNormalizer2 instance for Unicode NFKC normalization.
   1.154 + * Same as unorm2_getInstance(NULL, "nfkc", UNORM2_COMPOSE, pErrorCode).
   1.155 + * Returns an unmodifiable singleton instance. Do not delete it.
   1.156 + * @param pErrorCode Standard ICU error code. Its input value must
   1.157 + *                  pass the U_SUCCESS() test, or else the function returns
   1.158 + *                  immediately. Check for U_FAILURE() on output or use with
   1.159 + *                  function chaining. (See User Guide for details.)
   1.160 + * @return the requested Normalizer2, if successful
   1.161 + * @stable ICU 49
   1.162 + */
   1.163 +U_STABLE const UNormalizer2 * U_EXPORT2
   1.164 +unorm2_getNFKCInstance(UErrorCode *pErrorCode);
   1.165 +
   1.166 +/**
   1.167 + * Returns a UNormalizer2 instance for Unicode NFKD normalization.
   1.168 + * Same as unorm2_getInstance(NULL, "nfkc", UNORM2_DECOMPOSE, pErrorCode).
   1.169 + * Returns an unmodifiable singleton instance. Do not delete it.
   1.170 + * @param pErrorCode Standard ICU error code. Its input value must
   1.171 + *                  pass the U_SUCCESS() test, or else the function returns
   1.172 + *                  immediately. Check for U_FAILURE() on output or use with
   1.173 + *                  function chaining. (See User Guide for details.)
   1.174 + * @return the requested Normalizer2, if successful
   1.175 + * @stable ICU 49
   1.176 + */
   1.177 +U_STABLE const UNormalizer2 * U_EXPORT2
   1.178 +unorm2_getNFKDInstance(UErrorCode *pErrorCode);
   1.179 +
   1.180 +/**
   1.181 + * Returns a UNormalizer2 instance for Unicode NFKC_Casefold normalization.
   1.182 + * Same as unorm2_getInstance(NULL, "nfkc_cf", UNORM2_COMPOSE, pErrorCode).
   1.183 + * Returns an unmodifiable singleton instance. Do not delete it.
   1.184 + * @param pErrorCode Standard ICU error code. Its input value must
   1.185 + *                  pass the U_SUCCESS() test, or else the function returns
   1.186 + *                  immediately. Check for U_FAILURE() on output or use with
   1.187 + *                  function chaining. (See User Guide for details.)
   1.188 + * @return the requested Normalizer2, if successful
   1.189 + * @stable ICU 49
   1.190 + */
   1.191 +U_STABLE const UNormalizer2 * U_EXPORT2
   1.192 +unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode);
   1.193 +
   1.194 +/**
   1.195 + * Returns a UNormalizer2 instance which uses the specified data file
   1.196 + * (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle)
   1.197 + * and which composes or decomposes text according to the specified mode.
   1.198 + * Returns an unmodifiable singleton instance. Do not delete it.
   1.199 + *
   1.200 + * Use packageName=NULL for data files that are part of ICU's own data.
   1.201 + * Use name="nfc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFC/NFD.
   1.202 + * Use name="nfkc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFKC/NFKD.
   1.203 + * Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold.
   1.204 + *
   1.205 + * @param packageName NULL for ICU built-in data, otherwise application data package name
   1.206 + * @param name "nfc" or "nfkc" or "nfkc_cf" or name of custom data file
   1.207 + * @param mode normalization mode (compose or decompose etc.)
   1.208 + * @param pErrorCode Standard ICU error code. Its input value must
   1.209 + *                  pass the U_SUCCESS() test, or else the function returns
   1.210 + *                  immediately. Check for U_FAILURE() on output or use with
   1.211 + *                  function chaining. (See User Guide for details.)
   1.212 + * @return the requested UNormalizer2, if successful
   1.213 + * @stable ICU 4.4
   1.214 + */
   1.215 +U_STABLE const UNormalizer2 * U_EXPORT2
   1.216 +unorm2_getInstance(const char *packageName,
   1.217 +                   const char *name,
   1.218 +                   UNormalization2Mode mode,
   1.219 +                   UErrorCode *pErrorCode);
   1.220 +
   1.221 +/**
   1.222 + * Constructs a filtered normalizer wrapping any UNormalizer2 instance
   1.223 + * and a filter set.
   1.224 + * Both are aliased and must not be modified or deleted while this object
   1.225 + * is used.
   1.226 + * The filter set should be frozen; otherwise the performance will suffer greatly.
   1.227 + * @param norm2 wrapped UNormalizer2 instance
   1.228 + * @param filterSet USet which determines the characters to be normalized
   1.229 + * @param pErrorCode Standard ICU error code. Its input value must
   1.230 + *                   pass the U_SUCCESS() test, or else the function returns
   1.231 + *                   immediately. Check for U_FAILURE() on output or use with
   1.232 + *                   function chaining. (See User Guide for details.)
   1.233 + * @return the requested UNormalizer2, if successful
   1.234 + * @stable ICU 4.4
   1.235 + */
   1.236 +U_STABLE UNormalizer2 * U_EXPORT2
   1.237 +unorm2_openFiltered(const UNormalizer2 *norm2, const USet *filterSet, UErrorCode *pErrorCode);
   1.238 +
   1.239 +/**
   1.240 + * Closes a UNormalizer2 instance from unorm2_openFiltered().
   1.241 + * Do not close instances from unorm2_getInstance()!
   1.242 + * @param norm2 UNormalizer2 instance to be closed
   1.243 + * @stable ICU 4.4
   1.244 + */
   1.245 +U_STABLE void U_EXPORT2
   1.246 +unorm2_close(UNormalizer2 *norm2);
   1.247 +
   1.248 +#if U_SHOW_CPLUSPLUS_API
   1.249 +
   1.250 +U_NAMESPACE_BEGIN
   1.251 +
   1.252 +/**
   1.253 + * \class LocalUNormalizer2Pointer
   1.254 + * "Smart pointer" class, closes a UNormalizer2 via unorm2_close().
   1.255 + * For most methods see the LocalPointerBase base class.
   1.256 + *
   1.257 + * @see LocalPointerBase
   1.258 + * @see LocalPointer
   1.259 + * @stable ICU 4.4
   1.260 + */
   1.261 +U_DEFINE_LOCAL_OPEN_POINTER(LocalUNormalizer2Pointer, UNormalizer2, unorm2_close);
   1.262 +
   1.263 +U_NAMESPACE_END
   1.264 +
   1.265 +#endif
   1.266 +
   1.267 +/**
   1.268 + * Writes the normalized form of the source string to the destination string
   1.269 + * (replacing its contents) and returns the length of the destination string.
   1.270 + * The source and destination strings must be different buffers.
   1.271 + * @param norm2 UNormalizer2 instance
   1.272 + * @param src source string
   1.273 + * @param length length of the source string, or -1 if NUL-terminated
   1.274 + * @param dest destination string; its contents is replaced with normalized src
   1.275 + * @param capacity number of UChars that can be written to dest
   1.276 + * @param pErrorCode Standard ICU error code. Its input value must
   1.277 + *                   pass the U_SUCCESS() test, or else the function returns
   1.278 + *                   immediately. Check for U_FAILURE() on output or use with
   1.279 + *                   function chaining. (See User Guide for details.)
   1.280 + * @return dest
   1.281 + * @stable ICU 4.4
   1.282 + */
   1.283 +U_STABLE int32_t U_EXPORT2
   1.284 +unorm2_normalize(const UNormalizer2 *norm2,
   1.285 +                 const UChar *src, int32_t length,
   1.286 +                 UChar *dest, int32_t capacity,
   1.287 +                 UErrorCode *pErrorCode);
   1.288 +/**
   1.289 + * Appends the normalized form of the second string to the first string
   1.290 + * (merging them at the boundary) and returns the length of the first string.
   1.291 + * The result is normalized if the first string was normalized.
   1.292 + * The first and second strings must be different buffers.
   1.293 + * @param norm2 UNormalizer2 instance
   1.294 + * @param first string, should be normalized
   1.295 + * @param firstLength length of the first string, or -1 if NUL-terminated
   1.296 + * @param firstCapacity number of UChars that can be written to first
   1.297 + * @param second string, will be normalized
   1.298 + * @param secondLength length of the source string, or -1 if NUL-terminated
   1.299 + * @param pErrorCode Standard ICU error code. Its input value must
   1.300 + *                   pass the U_SUCCESS() test, or else the function returns
   1.301 + *                   immediately. Check for U_FAILURE() on output or use with
   1.302 + *                   function chaining. (See User Guide for details.)
   1.303 + * @return first
   1.304 + * @stable ICU 4.4
   1.305 + */
   1.306 +U_STABLE int32_t U_EXPORT2
   1.307 +unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2,
   1.308 +                                UChar *first, int32_t firstLength, int32_t firstCapacity,
   1.309 +                                const UChar *second, int32_t secondLength,
   1.310 +                                UErrorCode *pErrorCode);
   1.311 +/**
   1.312 + * Appends the second string to the first string
   1.313 + * (merging them at the boundary) and returns the length of the first string.
   1.314 + * The result is normalized if both the strings were normalized.
   1.315 + * The first and second strings must be different buffers.
   1.316 + * @param norm2 UNormalizer2 instance
   1.317 + * @param first string, should be normalized
   1.318 + * @param firstLength length of the first string, or -1 if NUL-terminated
   1.319 + * @param firstCapacity number of UChars that can be written to first
   1.320 + * @param second string, should be normalized
   1.321 + * @param secondLength length of the source string, or -1 if NUL-terminated
   1.322 + * @param pErrorCode Standard ICU error code. Its input value must
   1.323 + *                   pass the U_SUCCESS() test, or else the function returns
   1.324 + *                   immediately. Check for U_FAILURE() on output or use with
   1.325 + *                   function chaining. (See User Guide for details.)
   1.326 + * @return first
   1.327 + * @stable ICU 4.4
   1.328 + */
   1.329 +U_STABLE int32_t U_EXPORT2
   1.330 +unorm2_append(const UNormalizer2 *norm2,
   1.331 +              UChar *first, int32_t firstLength, int32_t firstCapacity,
   1.332 +              const UChar *second, int32_t secondLength,
   1.333 +              UErrorCode *pErrorCode);
   1.334 +
   1.335 +/**
   1.336 + * Gets the decomposition mapping of c.
   1.337 + * Roughly equivalent to normalizing the String form of c
   1.338 + * on a UNORM2_DECOMPOSE UNormalizer2 instance, but much faster, and except that this function
   1.339 + * returns a negative value and does not write a string
   1.340 + * if c does not have a decomposition mapping in this instance's data.
   1.341 + * This function is independent of the mode of the UNormalizer2.
   1.342 + * @param norm2 UNormalizer2 instance
   1.343 + * @param c code point
   1.344 + * @param decomposition String buffer which will be set to c's
   1.345 + *                      decomposition mapping, if there is one.
   1.346 + * @param capacity number of UChars that can be written to decomposition
   1.347 + * @param pErrorCode Standard ICU error code. Its input value must
   1.348 + *                   pass the U_SUCCESS() test, or else the function returns
   1.349 + *                   immediately. Check for U_FAILURE() on output or use with
   1.350 + *                   function chaining. (See User Guide for details.)
   1.351 + * @return the non-negative length of c's decomposition, if there is one; otherwise a negative value
   1.352 + * @stable ICU 4.6
   1.353 + */
   1.354 +U_STABLE int32_t U_EXPORT2
   1.355 +unorm2_getDecomposition(const UNormalizer2 *norm2,
   1.356 +                        UChar32 c, UChar *decomposition, int32_t capacity,
   1.357 +                        UErrorCode *pErrorCode);
   1.358 +
   1.359 +/**
   1.360 + * Gets the raw decomposition mapping of c.
   1.361 + *
   1.362 + * This is similar to the unorm2_getDecomposition() function but returns the
   1.363 + * raw decomposition mapping as specified in UnicodeData.txt or
   1.364 + * (for custom data) in the mapping files processed by the gennorm2 tool.
   1.365 + * By contrast, unorm2_getDecomposition() returns the processed,
   1.366 + * recursively-decomposed version of this mapping.
   1.367 + *
   1.368 + * When used on a standard NFKC Normalizer2 instance,
   1.369 + * unorm2_getRawDecomposition() returns the Unicode Decomposition_Mapping (dm) property.
   1.370 + *
   1.371 + * When used on a standard NFC Normalizer2 instance,
   1.372 + * it returns the Decomposition_Mapping only if the Decomposition_Type (dt) is Canonical (Can);
   1.373 + * in this case, the result contains either one or two code points (=1..4 UChars).
   1.374 + *
   1.375 + * This function is independent of the mode of the UNormalizer2.
   1.376 + * @param norm2 UNormalizer2 instance
   1.377 + * @param c code point
   1.378 + * @param decomposition String buffer which will be set to c's
   1.379 + *                      raw decomposition mapping, if there is one.
   1.380 + * @param capacity number of UChars that can be written to decomposition
   1.381 + * @param pErrorCode Standard ICU error code. Its input value must
   1.382 + *                   pass the U_SUCCESS() test, or else the function returns
   1.383 + *                   immediately. Check for U_FAILURE() on output or use with
   1.384 + *                   function chaining. (See User Guide for details.)
   1.385 + * @return the non-negative length of c's raw decomposition, if there is one; otherwise a negative value
   1.386 + * @stable ICU 49
   1.387 + */
   1.388 +U_STABLE int32_t U_EXPORT2
   1.389 +unorm2_getRawDecomposition(const UNormalizer2 *norm2,
   1.390 +                           UChar32 c, UChar *decomposition, int32_t capacity,
   1.391 +                           UErrorCode *pErrorCode);
   1.392 +
   1.393 +/**
   1.394 + * Performs pairwise composition of a & b and returns the composite if there is one.
   1.395 + *
   1.396 + * Returns a composite code point c only if c has a two-way mapping to a+b.
   1.397 + * In standard Unicode normalization, this means that
   1.398 + * c has a canonical decomposition to a+b
   1.399 + * and c does not have the Full_Composition_Exclusion property.
   1.400 + *
   1.401 + * This function is independent of the mode of the UNormalizer2.
   1.402 + * @param norm2 UNormalizer2 instance
   1.403 + * @param a A (normalization starter) code point.
   1.404 + * @param b Another code point.
   1.405 + * @return The non-negative composite code point if there is one; otherwise a negative value.
   1.406 + * @stable ICU 49
   1.407 + */
   1.408 +U_STABLE UChar32 U_EXPORT2
   1.409 +unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b);
   1.410 +
   1.411 +/**
   1.412 + * Gets the combining class of c.
   1.413 + * The default implementation returns 0
   1.414 + * but all standard implementations return the Unicode Canonical_Combining_Class value.
   1.415 + * @param norm2 UNormalizer2 instance
   1.416 + * @param c code point
   1.417 + * @return c's combining class
   1.418 + * @stable ICU 49
   1.419 + */
   1.420 +U_STABLE uint8_t U_EXPORT2
   1.421 +unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c);
   1.422 +
   1.423 +/**
   1.424 + * Tests if the string is normalized.
   1.425 + * Internally, in cases where the quickCheck() method would return "maybe"
   1.426 + * (which is only possible for the two COMPOSE modes) this method
   1.427 + * resolves to "yes" or "no" to provide a definitive result,
   1.428 + * at the cost of doing more work in those cases.
   1.429 + * @param norm2 UNormalizer2 instance
   1.430 + * @param s input string
   1.431 + * @param length length of the string, or -1 if NUL-terminated
   1.432 + * @param pErrorCode Standard ICU error code. Its input value must
   1.433 + *                   pass the U_SUCCESS() test, or else the function returns
   1.434 + *                   immediately. Check for U_FAILURE() on output or use with
   1.435 + *                   function chaining. (See User Guide for details.)
   1.436 + * @return TRUE if s is normalized
   1.437 + * @stable ICU 4.4
   1.438 + */
   1.439 +U_STABLE UBool U_EXPORT2
   1.440 +unorm2_isNormalized(const UNormalizer2 *norm2,
   1.441 +                    const UChar *s, int32_t length,
   1.442 +                    UErrorCode *pErrorCode);
   1.443 +
   1.444 +/**
   1.445 + * Tests if the string is normalized.
   1.446 + * For the two COMPOSE modes, the result could be "maybe" in cases that
   1.447 + * would take a little more work to resolve definitively.
   1.448 + * Use spanQuickCheckYes() and normalizeSecondAndAppend() for a faster
   1.449 + * combination of quick check + normalization, to avoid
   1.450 + * re-checking the "yes" prefix.
   1.451 + * @param norm2 UNormalizer2 instance
   1.452 + * @param s input string
   1.453 + * @param length length of the string, or -1 if NUL-terminated
   1.454 + * @param pErrorCode Standard ICU error code. Its input value must
   1.455 + *                   pass the U_SUCCESS() test, or else the function returns
   1.456 + *                   immediately. Check for U_FAILURE() on output or use with
   1.457 + *                   function chaining. (See User Guide for details.)
   1.458 + * @return UNormalizationCheckResult
   1.459 + * @stable ICU 4.4
   1.460 + */
   1.461 +U_STABLE UNormalizationCheckResult U_EXPORT2
   1.462 +unorm2_quickCheck(const UNormalizer2 *norm2,
   1.463 +                  const UChar *s, int32_t length,
   1.464 +                  UErrorCode *pErrorCode);
   1.465 +
   1.466 +/**
   1.467 + * Returns the end of the normalized substring of the input string.
   1.468 + * In other words, with <code>end=spanQuickCheckYes(s, ec);</code>
   1.469 + * the substring <code>UnicodeString(s, 0, end)</code>
   1.470 + * will pass the quick check with a "yes" result.
   1.471 + *
   1.472 + * The returned end index is usually one or more characters before the
   1.473 + * "no" or "maybe" character: The end index is at a normalization boundary.
   1.474 + * (See the class documentation for more about normalization boundaries.)
   1.475 + *
   1.476 + * When the goal is a normalized string and most input strings are expected
   1.477 + * to be normalized already, then call this method,
   1.478 + * and if it returns a prefix shorter than the input string,
   1.479 + * copy that prefix and use normalizeSecondAndAppend() for the remainder.
   1.480 + * @param norm2 UNormalizer2 instance
   1.481 + * @param s input string
   1.482 + * @param length length of the string, or -1 if NUL-terminated
   1.483 + * @param pErrorCode Standard ICU error code. Its input value must
   1.484 + *                   pass the U_SUCCESS() test, or else the function returns
   1.485 + *                   immediately. Check for U_FAILURE() on output or use with
   1.486 + *                   function chaining. (See User Guide for details.)
   1.487 + * @return "yes" span end index
   1.488 + * @stable ICU 4.4
   1.489 + */
   1.490 +U_STABLE int32_t U_EXPORT2
   1.491 +unorm2_spanQuickCheckYes(const UNormalizer2 *norm2,
   1.492 +                         const UChar *s, int32_t length,
   1.493 +                         UErrorCode *pErrorCode);
   1.494 +
   1.495 +/**
   1.496 + * Tests if the character always has a normalization boundary before it,
   1.497 + * regardless of context.
   1.498 + * For details see the Normalizer2 base class documentation.
   1.499 + * @param norm2 UNormalizer2 instance
   1.500 + * @param c character to test
   1.501 + * @return TRUE if c has a normalization boundary before it
   1.502 + * @stable ICU 4.4
   1.503 + */
   1.504 +U_STABLE UBool U_EXPORT2
   1.505 +unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c);
   1.506 +
   1.507 +/**
   1.508 + * Tests if the character always has a normalization boundary after it,
   1.509 + * regardless of context.
   1.510 + * For details see the Normalizer2 base class documentation.
   1.511 + * @param norm2 UNormalizer2 instance
   1.512 + * @param c character to test
   1.513 + * @return TRUE if c has a normalization boundary after it
   1.514 + * @stable ICU 4.4
   1.515 + */
   1.516 +U_STABLE UBool U_EXPORT2
   1.517 +unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c);
   1.518 +
   1.519 +/**
   1.520 + * Tests if the character is normalization-inert.
   1.521 + * For details see the Normalizer2 base class documentation.
   1.522 + * @param norm2 UNormalizer2 instance
   1.523 + * @param c character to test
   1.524 + * @return TRUE if c is normalization-inert
   1.525 + * @stable ICU 4.4
   1.526 + */
   1.527 +U_STABLE UBool U_EXPORT2
   1.528 +unorm2_isInert(const UNormalizer2 *norm2, UChar32 c);
   1.529 +
   1.530 +#endif  /* !UCONFIG_NO_NORMALIZATION */
   1.531 +#endif  /* __UNORM2_H__ */

mercurial