1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/unicode/normalizer2.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,658 @@ 1.4 +/* 1.5 +******************************************************************************* 1.6 +* 1.7 +* Copyright (C) 2009-2013, International Business Machines 1.8 +* Corporation and others. All Rights Reserved. 1.9 +* 1.10 +******************************************************************************* 1.11 +* file name: normalizer2.h 1.12 +* encoding: US-ASCII 1.13 +* tab size: 8 (not used) 1.14 +* indentation:4 1.15 +* 1.16 +* created on: 2009nov22 1.17 +* created by: Markus W. Scherer 1.18 +*/ 1.19 + 1.20 +#ifndef __NORMALIZER2_H__ 1.21 +#define __NORMALIZER2_H__ 1.22 + 1.23 +/** 1.24 + * \file 1.25 + * \brief C++ API: New API for Unicode Normalization. 1.26 + */ 1.27 + 1.28 +#include "unicode/utypes.h" 1.29 + 1.30 +#if !UCONFIG_NO_NORMALIZATION 1.31 + 1.32 +#include "unicode/uniset.h" 1.33 +#include "unicode/unistr.h" 1.34 +#include "unicode/unorm2.h" 1.35 + 1.36 +U_NAMESPACE_BEGIN 1.37 + 1.38 +/** 1.39 + * Unicode normalization functionality for standard Unicode normalization or 1.40 + * for using custom mapping tables. 1.41 + * All instances of this class are unmodifiable/immutable. 1.42 + * Instances returned by getInstance() are singletons that must not be deleted by the caller. 1.43 + * The Normalizer2 class is not intended for public subclassing. 1.44 + * 1.45 + * The primary functions are to produce a normalized string and to detect whether 1.46 + * a string is already normalized. 1.47 + * The most commonly used normalization forms are those defined in 1.48 + * http://www.unicode.org/unicode/reports/tr15/ 1.49 + * However, this API supports additional normalization forms for specialized purposes. 1.50 + * For example, NFKC_Casefold is provided via getInstance("nfkc_cf", COMPOSE) 1.51 + * and can be used in implementations of UTS #46. 1.52 + * 1.53 + * Not only are the standard compose and decompose modes supplied, 1.54 + * but additional modes are provided as documented in the Mode enum. 1.55 + * 1.56 + * Some of the functions in this class identify normalization boundaries. 1.57 + * At a normalization boundary, the portions of the string 1.58 + * before it and starting from it do not interact and can be handled independently. 1.59 + * 1.60 + * The spanQuickCheckYes() stops at a normalization boundary. 1.61 + * When the goal is a normalized string, then the text before the boundary 1.62 + * can be copied, and the remainder can be processed with normalizeSecondAndAppend(). 1.63 + * 1.64 + * The hasBoundaryBefore(), hasBoundaryAfter() and isInert() functions test whether 1.65 + * a character is guaranteed to be at a normalization boundary, 1.66 + * regardless of context. 1.67 + * This is used for moving from one normalization boundary to the next 1.68 + * or preceding boundary, and for performing iterative normalization. 1.69 + * 1.70 + * Iterative normalization is useful when only a small portion of a 1.71 + * longer string needs to be processed. 1.72 + * For example, in ICU, iterative normalization is used by the NormalizationTransliterator 1.73 + * (to avoid replacing already-normalized text) and ucol_nextSortKeyPart() 1.74 + * (to process only the substring for which sort key bytes are computed). 1.75 + * 1.76 + * The set of normalization boundaries returned by these functions may not be 1.77 + * complete: There may be more boundaries that could be returned. 1.78 + * Different functions may return different boundaries. 1.79 + * @stable ICU 4.4 1.80 + */ 1.81 +class U_COMMON_API Normalizer2 : public UObject { 1.82 +public: 1.83 + /** 1.84 + * Destructor. 1.85 + * @stable ICU 4.4 1.86 + */ 1.87 + ~Normalizer2(); 1.88 + 1.89 + /** 1.90 + * Returns a Normalizer2 instance for Unicode NFC normalization. 1.91 + * Same as getInstance(NULL, "nfc", UNORM2_COMPOSE, errorCode). 1.92 + * Returns an unmodifiable singleton instance. Do not delete it. 1.93 + * @param errorCode Standard ICU error code. Its input value must 1.94 + * pass the U_SUCCESS() test, or else the function returns 1.95 + * immediately. Check for U_FAILURE() on output or use with 1.96 + * function chaining. (See User Guide for details.) 1.97 + * @return the requested Normalizer2, if successful 1.98 + * @stable ICU 49 1.99 + */ 1.100 + static const Normalizer2 * 1.101 + getNFCInstance(UErrorCode &errorCode); 1.102 + 1.103 + /** 1.104 + * Returns a Normalizer2 instance for Unicode NFD normalization. 1.105 + * Same as getInstance(NULL, "nfc", UNORM2_DECOMPOSE, errorCode). 1.106 + * Returns an unmodifiable singleton instance. Do not delete it. 1.107 + * @param errorCode Standard ICU error code. Its input value must 1.108 + * pass the U_SUCCESS() test, or else the function returns 1.109 + * immediately. Check for U_FAILURE() on output or use with 1.110 + * function chaining. (See User Guide for details.) 1.111 + * @return the requested Normalizer2, if successful 1.112 + * @stable ICU 49 1.113 + */ 1.114 + static const Normalizer2 * 1.115 + getNFDInstance(UErrorCode &errorCode); 1.116 + 1.117 + /** 1.118 + * Returns a Normalizer2 instance for Unicode NFKC normalization. 1.119 + * Same as getInstance(NULL, "nfkc", UNORM2_COMPOSE, errorCode). 1.120 + * Returns an unmodifiable singleton instance. Do not delete it. 1.121 + * @param errorCode Standard ICU error code. Its input value must 1.122 + * pass the U_SUCCESS() test, or else the function returns 1.123 + * immediately. Check for U_FAILURE() on output or use with 1.124 + * function chaining. (See User Guide for details.) 1.125 + * @return the requested Normalizer2, if successful 1.126 + * @stable ICU 49 1.127 + */ 1.128 + static const Normalizer2 * 1.129 + getNFKCInstance(UErrorCode &errorCode); 1.130 + 1.131 + /** 1.132 + * Returns a Normalizer2 instance for Unicode NFKD normalization. 1.133 + * Same as getInstance(NULL, "nfkc", UNORM2_DECOMPOSE, errorCode). 1.134 + * Returns an unmodifiable singleton instance. Do not delete it. 1.135 + * @param errorCode Standard ICU error code. Its input value must 1.136 + * pass the U_SUCCESS() test, or else the function returns 1.137 + * immediately. Check for U_FAILURE() on output or use with 1.138 + * function chaining. (See User Guide for details.) 1.139 + * @return the requested Normalizer2, if successful 1.140 + * @stable ICU 49 1.141 + */ 1.142 + static const Normalizer2 * 1.143 + getNFKDInstance(UErrorCode &errorCode); 1.144 + 1.145 + /** 1.146 + * Returns a Normalizer2 instance for Unicode NFKC_Casefold normalization. 1.147 + * Same as getInstance(NULL, "nfkc_cf", UNORM2_COMPOSE, errorCode). 1.148 + * Returns an unmodifiable singleton instance. Do not delete it. 1.149 + * @param errorCode Standard ICU error code. Its input value must 1.150 + * pass the U_SUCCESS() test, or else the function returns 1.151 + * immediately. Check for U_FAILURE() on output or use with 1.152 + * function chaining. (See User Guide for details.) 1.153 + * @return the requested Normalizer2, if successful 1.154 + * @stable ICU 49 1.155 + */ 1.156 + static const Normalizer2 * 1.157 + getNFKCCasefoldInstance(UErrorCode &errorCode); 1.158 + 1.159 + /** 1.160 + * Returns a Normalizer2 instance which uses the specified data file 1.161 + * (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle) 1.162 + * and which composes or decomposes text according to the specified mode. 1.163 + * Returns an unmodifiable singleton instance. Do not delete it. 1.164 + * 1.165 + * Use packageName=NULL for data files that are part of ICU's own data. 1.166 + * Use name="nfc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFC/NFD. 1.167 + * Use name="nfkc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFKC/NFKD. 1.168 + * Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold. 1.169 + * 1.170 + * @param packageName NULL for ICU built-in data, otherwise application data package name 1.171 + * @param name "nfc" or "nfkc" or "nfkc_cf" or name of custom data file 1.172 + * @param mode normalization mode (compose or decompose etc.) 1.173 + * @param errorCode Standard ICU error code. Its input value must 1.174 + * pass the U_SUCCESS() test, or else the function returns 1.175 + * immediately. Check for U_FAILURE() on output or use with 1.176 + * function chaining. (See User Guide for details.) 1.177 + * @return the requested Normalizer2, if successful 1.178 + * @stable ICU 4.4 1.179 + */ 1.180 + static const Normalizer2 * 1.181 + getInstance(const char *packageName, 1.182 + const char *name, 1.183 + UNormalization2Mode mode, 1.184 + UErrorCode &errorCode); 1.185 + 1.186 + /** 1.187 + * Returns the normalized form of the source string. 1.188 + * @param src source string 1.189 + * @param errorCode Standard ICU error code. Its input value must 1.190 + * pass the U_SUCCESS() test, or else the function returns 1.191 + * immediately. Check for U_FAILURE() on output or use with 1.192 + * function chaining. (See User Guide for details.) 1.193 + * @return normalized src 1.194 + * @stable ICU 4.4 1.195 + */ 1.196 + UnicodeString 1.197 + normalize(const UnicodeString &src, UErrorCode &errorCode) const { 1.198 + UnicodeString result; 1.199 + normalize(src, result, errorCode); 1.200 + return result; 1.201 + } 1.202 + /** 1.203 + * Writes the normalized form of the source string to the destination string 1.204 + * (replacing its contents) and returns the destination string. 1.205 + * The source and destination strings must be different objects. 1.206 + * @param src source string 1.207 + * @param dest destination string; its contents is replaced with normalized src 1.208 + * @param errorCode Standard ICU error code. Its input value must 1.209 + * pass the U_SUCCESS() test, or else the function returns 1.210 + * immediately. Check for U_FAILURE() on output or use with 1.211 + * function chaining. (See User Guide for details.) 1.212 + * @return dest 1.213 + * @stable ICU 4.4 1.214 + */ 1.215 + virtual UnicodeString & 1.216 + normalize(const UnicodeString &src, 1.217 + UnicodeString &dest, 1.218 + UErrorCode &errorCode) const = 0; 1.219 + /** 1.220 + * Appends the normalized form of the second string to the first string 1.221 + * (merging them at the boundary) and returns the first string. 1.222 + * The result is normalized if the first string was normalized. 1.223 + * The first and second strings must be different objects. 1.224 + * @param first string, should be normalized 1.225 + * @param second string, will be normalized 1.226 + * @param errorCode Standard ICU error code. Its input value must 1.227 + * pass the U_SUCCESS() test, or else the function returns 1.228 + * immediately. Check for U_FAILURE() on output or use with 1.229 + * function chaining. (See User Guide for details.) 1.230 + * @return first 1.231 + * @stable ICU 4.4 1.232 + */ 1.233 + virtual UnicodeString & 1.234 + normalizeSecondAndAppend(UnicodeString &first, 1.235 + const UnicodeString &second, 1.236 + UErrorCode &errorCode) const = 0; 1.237 + /** 1.238 + * Appends the second string to the first string 1.239 + * (merging them at the boundary) and returns the first string. 1.240 + * The result is normalized if both the strings were normalized. 1.241 + * The first and second strings must be different objects. 1.242 + * @param first string, should be normalized 1.243 + * @param second string, should be normalized 1.244 + * @param errorCode Standard ICU error code. Its input value must 1.245 + * pass the U_SUCCESS() test, or else the function returns 1.246 + * immediately. Check for U_FAILURE() on output or use with 1.247 + * function chaining. (See User Guide for details.) 1.248 + * @return first 1.249 + * @stable ICU 4.4 1.250 + */ 1.251 + virtual UnicodeString & 1.252 + append(UnicodeString &first, 1.253 + const UnicodeString &second, 1.254 + UErrorCode &errorCode) const = 0; 1.255 + 1.256 + /** 1.257 + * Gets the decomposition mapping of c. 1.258 + * Roughly equivalent to normalizing the String form of c 1.259 + * on a UNORM2_DECOMPOSE Normalizer2 instance, but much faster, and except that this function 1.260 + * returns FALSE and does not write a string 1.261 + * if c does not have a decomposition mapping in this instance's data. 1.262 + * This function is independent of the mode of the Normalizer2. 1.263 + * @param c code point 1.264 + * @param decomposition String object which will be set to c's 1.265 + * decomposition mapping, if there is one. 1.266 + * @return TRUE if c has a decomposition, otherwise FALSE 1.267 + * @stable ICU 4.6 1.268 + */ 1.269 + virtual UBool 1.270 + getDecomposition(UChar32 c, UnicodeString &decomposition) const = 0; 1.271 + 1.272 + /** 1.273 + * Gets the raw decomposition mapping of c. 1.274 + * 1.275 + * This is similar to the getDecomposition() method but returns the 1.276 + * raw decomposition mapping as specified in UnicodeData.txt or 1.277 + * (for custom data) in the mapping files processed by the gennorm2 tool. 1.278 + * By contrast, getDecomposition() returns the processed, 1.279 + * recursively-decomposed version of this mapping. 1.280 + * 1.281 + * When used on a standard NFKC Normalizer2 instance, 1.282 + * getRawDecomposition() returns the Unicode Decomposition_Mapping (dm) property. 1.283 + * 1.284 + * When used on a standard NFC Normalizer2 instance, 1.285 + * it returns the Decomposition_Mapping only if the Decomposition_Type (dt) is Canonical (Can); 1.286 + * in this case, the result contains either one or two code points (=1..4 UChars). 1.287 + * 1.288 + * This function is independent of the mode of the Normalizer2. 1.289 + * The default implementation returns FALSE. 1.290 + * @param c code point 1.291 + * @param decomposition String object which will be set to c's 1.292 + * raw decomposition mapping, if there is one. 1.293 + * @return TRUE if c has a decomposition, otherwise FALSE 1.294 + * @stable ICU 49 1.295 + */ 1.296 + virtual UBool 1.297 + getRawDecomposition(UChar32 c, UnicodeString &decomposition) const; 1.298 + 1.299 + /** 1.300 + * Performs pairwise composition of a & b and returns the composite if there is one. 1.301 + * 1.302 + * Returns a composite code point c only if c has a two-way mapping to a+b. 1.303 + * In standard Unicode normalization, this means that 1.304 + * c has a canonical decomposition to a+b 1.305 + * and c does not have the Full_Composition_Exclusion property. 1.306 + * 1.307 + * This function is independent of the mode of the Normalizer2. 1.308 + * The default implementation returns a negative value. 1.309 + * @param a A (normalization starter) code point. 1.310 + * @param b Another code point. 1.311 + * @return The non-negative composite code point if there is one; otherwise a negative value. 1.312 + * @stable ICU 49 1.313 + */ 1.314 + virtual UChar32 1.315 + composePair(UChar32 a, UChar32 b) const; 1.316 + 1.317 + /** 1.318 + * Gets the combining class of c. 1.319 + * The default implementation returns 0 1.320 + * but all standard implementations return the Unicode Canonical_Combining_Class value. 1.321 + * @param c code point 1.322 + * @return c's combining class 1.323 + * @stable ICU 49 1.324 + */ 1.325 + virtual uint8_t 1.326 + getCombiningClass(UChar32 c) const; 1.327 + 1.328 + /** 1.329 + * Tests if the string is normalized. 1.330 + * Internally, in cases where the quickCheck() method would return "maybe" 1.331 + * (which is only possible for the two COMPOSE modes) this method 1.332 + * resolves to "yes" or "no" to provide a definitive result, 1.333 + * at the cost of doing more work in those cases. 1.334 + * @param s input string 1.335 + * @param errorCode Standard ICU error code. Its input value must 1.336 + * pass the U_SUCCESS() test, or else the function returns 1.337 + * immediately. Check for U_FAILURE() on output or use with 1.338 + * function chaining. (See User Guide for details.) 1.339 + * @return TRUE if s is normalized 1.340 + * @stable ICU 4.4 1.341 + */ 1.342 + virtual UBool 1.343 + isNormalized(const UnicodeString &s, UErrorCode &errorCode) const = 0; 1.344 + 1.345 + /** 1.346 + * Tests if the string is normalized. 1.347 + * For the two COMPOSE modes, the result could be "maybe" in cases that 1.348 + * would take a little more work to resolve definitively. 1.349 + * Use spanQuickCheckYes() and normalizeSecondAndAppend() for a faster 1.350 + * combination of quick check + normalization, to avoid 1.351 + * re-checking the "yes" prefix. 1.352 + * @param s input string 1.353 + * @param errorCode Standard ICU error code. Its input value must 1.354 + * pass the U_SUCCESS() test, or else the function returns 1.355 + * immediately. Check for U_FAILURE() on output or use with 1.356 + * function chaining. (See User Guide for details.) 1.357 + * @return UNormalizationCheckResult 1.358 + * @stable ICU 4.4 1.359 + */ 1.360 + virtual UNormalizationCheckResult 1.361 + quickCheck(const UnicodeString &s, UErrorCode &errorCode) const = 0; 1.362 + 1.363 + /** 1.364 + * Returns the end of the normalized substring of the input string. 1.365 + * In other words, with <code>end=spanQuickCheckYes(s, ec);</code> 1.366 + * the substring <code>UnicodeString(s, 0, end)</code> 1.367 + * will pass the quick check with a "yes" result. 1.368 + * 1.369 + * The returned end index is usually one or more characters before the 1.370 + * "no" or "maybe" character: The end index is at a normalization boundary. 1.371 + * (See the class documentation for more about normalization boundaries.) 1.372 + * 1.373 + * When the goal is a normalized string and most input strings are expected 1.374 + * to be normalized already, then call this method, 1.375 + * and if it returns a prefix shorter than the input string, 1.376 + * copy that prefix and use normalizeSecondAndAppend() for the remainder. 1.377 + * @param s input string 1.378 + * @param errorCode Standard ICU error code. Its input value must 1.379 + * pass the U_SUCCESS() test, or else the function returns 1.380 + * immediately. Check for U_FAILURE() on output or use with 1.381 + * function chaining. (See User Guide for details.) 1.382 + * @return "yes" span end index 1.383 + * @stable ICU 4.4 1.384 + */ 1.385 + virtual int32_t 1.386 + spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const = 0; 1.387 + 1.388 + /** 1.389 + * Tests if the character always has a normalization boundary before it, 1.390 + * regardless of context. 1.391 + * If true, then the character does not normalization-interact with 1.392 + * preceding characters. 1.393 + * In other words, a string containing this character can be normalized 1.394 + * by processing portions before this character and starting from this 1.395 + * character independently. 1.396 + * This is used for iterative normalization. See the class documentation for details. 1.397 + * @param c character to test 1.398 + * @return TRUE if c has a normalization boundary before it 1.399 + * @stable ICU 4.4 1.400 + */ 1.401 + virtual UBool hasBoundaryBefore(UChar32 c) const = 0; 1.402 + 1.403 + /** 1.404 + * Tests if the character always has a normalization boundary after it, 1.405 + * regardless of context. 1.406 + * If true, then the character does not normalization-interact with 1.407 + * following characters. 1.408 + * In other words, a string containing this character can be normalized 1.409 + * by processing portions up to this character and after this 1.410 + * character independently. 1.411 + * This is used for iterative normalization. See the class documentation for details. 1.412 + * Note that this operation may be significantly slower than hasBoundaryBefore(). 1.413 + * @param c character to test 1.414 + * @return TRUE if c has a normalization boundary after it 1.415 + * @stable ICU 4.4 1.416 + */ 1.417 + virtual UBool hasBoundaryAfter(UChar32 c) const = 0; 1.418 + 1.419 + /** 1.420 + * Tests if the character is normalization-inert. 1.421 + * If true, then the character does not change, nor normalization-interact with 1.422 + * preceding or following characters. 1.423 + * In other words, a string containing this character can be normalized 1.424 + * by processing portions before this character and after this 1.425 + * character independently. 1.426 + * This is used for iterative normalization. See the class documentation for details. 1.427 + * Note that this operation may be significantly slower than hasBoundaryBefore(). 1.428 + * @param c character to test 1.429 + * @return TRUE if c is normalization-inert 1.430 + * @stable ICU 4.4 1.431 + */ 1.432 + virtual UBool isInert(UChar32 c) const = 0; 1.433 +}; 1.434 + 1.435 +/** 1.436 + * Normalization filtered by a UnicodeSet. 1.437 + * Normalizes portions of the text contained in the filter set and leaves 1.438 + * portions not contained in the filter set unchanged. 1.439 + * Filtering is done via UnicodeSet::span(..., USET_SPAN_SIMPLE). 1.440 + * Not-in-the-filter text is treated as "is normalized" and "quick check yes". 1.441 + * This class implements all of (and only) the Normalizer2 API. 1.442 + * An instance of this class is unmodifiable/immutable but is constructed and 1.443 + * must be destructed by the owner. 1.444 + * @stable ICU 4.4 1.445 + */ 1.446 +class U_COMMON_API FilteredNormalizer2 : public Normalizer2 { 1.447 +public: 1.448 + /** 1.449 + * Constructs a filtered normalizer wrapping any Normalizer2 instance 1.450 + * and a filter set. 1.451 + * Both are aliased and must not be modified or deleted while this object 1.452 + * is used. 1.453 + * The filter set should be frozen; otherwise the performance will suffer greatly. 1.454 + * @param n2 wrapped Normalizer2 instance 1.455 + * @param filterSet UnicodeSet which determines the characters to be normalized 1.456 + * @stable ICU 4.4 1.457 + */ 1.458 + FilteredNormalizer2(const Normalizer2 &n2, const UnicodeSet &filterSet) : 1.459 + norm2(n2), set(filterSet) {} 1.460 + 1.461 + /** 1.462 + * Destructor. 1.463 + * @stable ICU 4.4 1.464 + */ 1.465 + ~FilteredNormalizer2(); 1.466 + 1.467 + /** 1.468 + * Writes the normalized form of the source string to the destination string 1.469 + * (replacing its contents) and returns the destination string. 1.470 + * The source and destination strings must be different objects. 1.471 + * @param src source string 1.472 + * @param dest destination string; its contents is replaced with normalized src 1.473 + * @param errorCode Standard ICU error code. Its input value must 1.474 + * pass the U_SUCCESS() test, or else the function returns 1.475 + * immediately. Check for U_FAILURE() on output or use with 1.476 + * function chaining. (See User Guide for details.) 1.477 + * @return dest 1.478 + * @stable ICU 4.4 1.479 + */ 1.480 + virtual UnicodeString & 1.481 + normalize(const UnicodeString &src, 1.482 + UnicodeString &dest, 1.483 + UErrorCode &errorCode) const; 1.484 + /** 1.485 + * Appends the normalized form of the second string to the first string 1.486 + * (merging them at the boundary) and returns the first string. 1.487 + * The result is normalized if the first string was normalized. 1.488 + * The first and second strings must be different objects. 1.489 + * @param first string, should be normalized 1.490 + * @param second string, will be normalized 1.491 + * @param errorCode Standard ICU error code. Its input value must 1.492 + * pass the U_SUCCESS() test, or else the function returns 1.493 + * immediately. Check for U_FAILURE() on output or use with 1.494 + * function chaining. (See User Guide for details.) 1.495 + * @return first 1.496 + * @stable ICU 4.4 1.497 + */ 1.498 + virtual UnicodeString & 1.499 + normalizeSecondAndAppend(UnicodeString &first, 1.500 + const UnicodeString &second, 1.501 + UErrorCode &errorCode) const; 1.502 + /** 1.503 + * Appends the second string to the first string 1.504 + * (merging them at the boundary) and returns the first string. 1.505 + * The result is normalized if both the strings were normalized. 1.506 + * The first and second strings must be different objects. 1.507 + * @param first string, should be normalized 1.508 + * @param second string, should be normalized 1.509 + * @param errorCode Standard ICU error code. Its input value must 1.510 + * pass the U_SUCCESS() test, or else the function returns 1.511 + * immediately. Check for U_FAILURE() on output or use with 1.512 + * function chaining. (See User Guide for details.) 1.513 + * @return first 1.514 + * @stable ICU 4.4 1.515 + */ 1.516 + virtual UnicodeString & 1.517 + append(UnicodeString &first, 1.518 + const UnicodeString &second, 1.519 + UErrorCode &errorCode) const; 1.520 + 1.521 + /** 1.522 + * Gets the decomposition mapping of c. 1.523 + * For details see the base class documentation. 1.524 + * 1.525 + * This function is independent of the mode of the Normalizer2. 1.526 + * @param c code point 1.527 + * @param decomposition String object which will be set to c's 1.528 + * decomposition mapping, if there is one. 1.529 + * @return TRUE if c has a decomposition, otherwise FALSE 1.530 + * @stable ICU 4.6 1.531 + */ 1.532 + virtual UBool 1.533 + getDecomposition(UChar32 c, UnicodeString &decomposition) const; 1.534 + 1.535 + /** 1.536 + * Gets the raw decomposition mapping of c. 1.537 + * For details see the base class documentation. 1.538 + * 1.539 + * This function is independent of the mode of the Normalizer2. 1.540 + * @param c code point 1.541 + * @param decomposition String object which will be set to c's 1.542 + * raw decomposition mapping, if there is one. 1.543 + * @return TRUE if c has a decomposition, otherwise FALSE 1.544 + * @stable ICU 49 1.545 + */ 1.546 + virtual UBool 1.547 + getRawDecomposition(UChar32 c, UnicodeString &decomposition) const; 1.548 + 1.549 + /** 1.550 + * Performs pairwise composition of a & b and returns the composite if there is one. 1.551 + * For details see the base class documentation. 1.552 + * 1.553 + * This function is independent of the mode of the Normalizer2. 1.554 + * @param a A (normalization starter) code point. 1.555 + * @param b Another code point. 1.556 + * @return The non-negative composite code point if there is one; otherwise a negative value. 1.557 + * @stable ICU 49 1.558 + */ 1.559 + virtual UChar32 1.560 + composePair(UChar32 a, UChar32 b) const; 1.561 + 1.562 + /** 1.563 + * Gets the combining class of c. 1.564 + * The default implementation returns 0 1.565 + * but all standard implementations return the Unicode Canonical_Combining_Class value. 1.566 + * @param c code point 1.567 + * @return c's combining class 1.568 + * @stable ICU 49 1.569 + */ 1.570 + virtual uint8_t 1.571 + getCombiningClass(UChar32 c) const; 1.572 + 1.573 + /** 1.574 + * Tests if the string is normalized. 1.575 + * For details see the Normalizer2 base class documentation. 1.576 + * @param s input string 1.577 + * @param errorCode Standard ICU error code. Its input value must 1.578 + * pass the U_SUCCESS() test, or else the function returns 1.579 + * immediately. Check for U_FAILURE() on output or use with 1.580 + * function chaining. (See User Guide for details.) 1.581 + * @return TRUE if s is normalized 1.582 + * @stable ICU 4.4 1.583 + */ 1.584 + virtual UBool 1.585 + isNormalized(const UnicodeString &s, UErrorCode &errorCode) const; 1.586 + /** 1.587 + * Tests if the string is normalized. 1.588 + * For details see the Normalizer2 base class documentation. 1.589 + * @param s input string 1.590 + * @param errorCode Standard ICU error code. Its input value must 1.591 + * pass the U_SUCCESS() test, or else the function returns 1.592 + * immediately. Check for U_FAILURE() on output or use with 1.593 + * function chaining. (See User Guide for details.) 1.594 + * @return UNormalizationCheckResult 1.595 + * @stable ICU 4.4 1.596 + */ 1.597 + virtual UNormalizationCheckResult 1.598 + quickCheck(const UnicodeString &s, UErrorCode &errorCode) const; 1.599 + /** 1.600 + * Returns the end of the normalized substring of the input string. 1.601 + * For details see the Normalizer2 base class documentation. 1.602 + * @param s input string 1.603 + * @param errorCode Standard ICU error code. Its input value must 1.604 + * pass the U_SUCCESS() test, or else the function returns 1.605 + * immediately. Check for U_FAILURE() on output or use with 1.606 + * function chaining. (See User Guide for details.) 1.607 + * @return "yes" span end index 1.608 + * @stable ICU 4.4 1.609 + */ 1.610 + virtual int32_t 1.611 + spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const; 1.612 + 1.613 + /** 1.614 + * Tests if the character always has a normalization boundary before it, 1.615 + * regardless of context. 1.616 + * For details see the Normalizer2 base class documentation. 1.617 + * @param c character to test 1.618 + * @return TRUE if c has a normalization boundary before it 1.619 + * @stable ICU 4.4 1.620 + */ 1.621 + virtual UBool hasBoundaryBefore(UChar32 c) const; 1.622 + 1.623 + /** 1.624 + * Tests if the character always has a normalization boundary after it, 1.625 + * regardless of context. 1.626 + * For details see the Normalizer2 base class documentation. 1.627 + * @param c character to test 1.628 + * @return TRUE if c has a normalization boundary after it 1.629 + * @stable ICU 4.4 1.630 + */ 1.631 + virtual UBool hasBoundaryAfter(UChar32 c) const; 1.632 + 1.633 + /** 1.634 + * Tests if the character is normalization-inert. 1.635 + * For details see the Normalizer2 base class documentation. 1.636 + * @param c character to test 1.637 + * @return TRUE if c is normalization-inert 1.638 + * @stable ICU 4.4 1.639 + */ 1.640 + virtual UBool isInert(UChar32 c) const; 1.641 +private: 1.642 + UnicodeString & 1.643 + normalize(const UnicodeString &src, 1.644 + UnicodeString &dest, 1.645 + USetSpanCondition spanCondition, 1.646 + UErrorCode &errorCode) const; 1.647 + 1.648 + UnicodeString & 1.649 + normalizeSecondAndAppend(UnicodeString &first, 1.650 + const UnicodeString &second, 1.651 + UBool doNormalize, 1.652 + UErrorCode &errorCode) const; 1.653 + 1.654 + const Normalizer2 &norm2; 1.655 + const UnicodeSet &set; 1.656 +}; 1.657 + 1.658 +U_NAMESPACE_END 1.659 + 1.660 +#endif // !UCONFIG_NO_NORMALIZATION 1.661 +#endif // __NORMALIZER2_H__