intl/icu/source/common/unicode/normalizer2.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /*
     2 *******************************************************************************
     3 *
     4 *   Copyright (C) 2009-2013, International Business Machines
     5 *   Corporation and others.  All Rights Reserved.
     6 *
     7 *******************************************************************************
     8 *   file name:  normalizer2.h
     9 *   encoding:   US-ASCII
    10 *   tab size:   8 (not used)
    11 *   indentation:4
    12 *
    13 *   created on: 2009nov22
    14 *   created by: Markus W. Scherer
    15 */
    17 #ifndef __NORMALIZER2_H__
    18 #define __NORMALIZER2_H__
    20 /**
    21  * \file
    22  * \brief C++ API: New API for Unicode Normalization.
    23  */
    25 #include "unicode/utypes.h"
    27 #if !UCONFIG_NO_NORMALIZATION
    29 #include "unicode/uniset.h"
    30 #include "unicode/unistr.h"
    31 #include "unicode/unorm2.h"
    33 U_NAMESPACE_BEGIN
    35 /**
    36  * Unicode normalization functionality for standard Unicode normalization or
    37  * for using custom mapping tables.
    38  * All instances of this class are unmodifiable/immutable.
    39  * Instances returned by getInstance() are singletons that must not be deleted by the caller.
    40  * The Normalizer2 class is not intended for public subclassing.
    41  *
    42  * The primary functions are to produce a normalized string and to detect whether
    43  * a string is already normalized.
    44  * The most commonly used normalization forms are those defined in
    45  * http://www.unicode.org/unicode/reports/tr15/
    46  * However, this API supports additional normalization forms for specialized purposes.
    47  * For example, NFKC_Casefold is provided via getInstance("nfkc_cf", COMPOSE)
    48  * and can be used in implementations of UTS #46.
    49  *
    50  * Not only are the standard compose and decompose modes supplied,
    51  * but additional modes are provided as documented in the Mode enum.
    52  *
    53  * Some of the functions in this class identify normalization boundaries.
    54  * At a normalization boundary, the portions of the string
    55  * before it and starting from it do not interact and can be handled independently.
    56  *
    57  * The spanQuickCheckYes() stops at a normalization boundary.
    58  * When the goal is a normalized string, then the text before the boundary
    59  * can be copied, and the remainder can be processed with normalizeSecondAndAppend().
    60  *
    61  * The hasBoundaryBefore(), hasBoundaryAfter() and isInert() functions test whether
    62  * a character is guaranteed to be at a normalization boundary,
    63  * regardless of context.
    64  * This is used for moving from one normalization boundary to the next
    65  * or preceding boundary, and for performing iterative normalization.
    66  *
    67  * Iterative normalization is useful when only a small portion of a
    68  * longer string needs to be processed.
    69  * For example, in ICU, iterative normalization is used by the NormalizationTransliterator
    70  * (to avoid replacing already-normalized text) and ucol_nextSortKeyPart()
    71  * (to process only the substring for which sort key bytes are computed).
    72  *
    73  * The set of normalization boundaries returned by these functions may not be
    74  * complete: There may be more boundaries that could be returned.
    75  * Different functions may return different boundaries.
    76  * @stable ICU 4.4
    77  */
    78 class U_COMMON_API Normalizer2 : public UObject {
    79 public:
    80     /**
    81      * Destructor.
    82      * @stable ICU 4.4
    83      */
    84     ~Normalizer2();
    86     /**
    87      * Returns a Normalizer2 instance for Unicode NFC normalization.
    88      * Same as getInstance(NULL, "nfc", UNORM2_COMPOSE, errorCode).
    89      * Returns an unmodifiable singleton instance. Do not delete it.
    90      * @param errorCode Standard ICU error code. Its input value must
    91      *                  pass the U_SUCCESS() test, or else the function returns
    92      *                  immediately. Check for U_FAILURE() on output or use with
    93      *                  function chaining. (See User Guide for details.)
    94      * @return the requested Normalizer2, if successful
    95      * @stable ICU 49
    96      */
    97     static const Normalizer2 *
    98     getNFCInstance(UErrorCode &errorCode);
   100     /**
   101      * Returns a Normalizer2 instance for Unicode NFD normalization.
   102      * Same as getInstance(NULL, "nfc", UNORM2_DECOMPOSE, errorCode).
   103      * Returns an unmodifiable singleton instance. Do not delete it.
   104      * @param errorCode Standard ICU error code. Its input value must
   105      *                  pass the U_SUCCESS() test, or else the function returns
   106      *                  immediately. Check for U_FAILURE() on output or use with
   107      *                  function chaining. (See User Guide for details.)
   108      * @return the requested Normalizer2, if successful
   109      * @stable ICU 49
   110      */
   111     static const Normalizer2 *
   112     getNFDInstance(UErrorCode &errorCode);
   114     /**
   115      * Returns a Normalizer2 instance for Unicode NFKC normalization.
   116      * Same as getInstance(NULL, "nfkc", UNORM2_COMPOSE, errorCode).
   117      * Returns an unmodifiable singleton instance. Do not delete it.
   118      * @param errorCode Standard ICU error code. Its input value must
   119      *                  pass the U_SUCCESS() test, or else the function returns
   120      *                  immediately. Check for U_FAILURE() on output or use with
   121      *                  function chaining. (See User Guide for details.)
   122      * @return the requested Normalizer2, if successful
   123      * @stable ICU 49
   124      */
   125     static const Normalizer2 *
   126     getNFKCInstance(UErrorCode &errorCode);
   128     /**
   129      * Returns a Normalizer2 instance for Unicode NFKD normalization.
   130      * Same as getInstance(NULL, "nfkc", UNORM2_DECOMPOSE, errorCode).
   131      * Returns an unmodifiable singleton instance. Do not delete it.
   132      * @param errorCode Standard ICU error code. Its input value must
   133      *                  pass the U_SUCCESS() test, or else the function returns
   134      *                  immediately. Check for U_FAILURE() on output or use with
   135      *                  function chaining. (See User Guide for details.)
   136      * @return the requested Normalizer2, if successful
   137      * @stable ICU 49
   138      */
   139     static const Normalizer2 *
   140     getNFKDInstance(UErrorCode &errorCode);
   142     /**
   143      * Returns a Normalizer2 instance for Unicode NFKC_Casefold normalization.
   144      * Same as getInstance(NULL, "nfkc_cf", UNORM2_COMPOSE, errorCode).
   145      * Returns an unmodifiable singleton instance. Do not delete it.
   146      * @param errorCode Standard ICU error code. Its input value must
   147      *                  pass the U_SUCCESS() test, or else the function returns
   148      *                  immediately. Check for U_FAILURE() on output or use with
   149      *                  function chaining. (See User Guide for details.)
   150      * @return the requested Normalizer2, if successful
   151      * @stable ICU 49
   152      */
   153     static const Normalizer2 *
   154     getNFKCCasefoldInstance(UErrorCode &errorCode);
   156     /**
   157      * Returns a Normalizer2 instance which uses the specified data file
   158      * (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle)
   159      * and which composes or decomposes text according to the specified mode.
   160      * Returns an unmodifiable singleton instance. Do not delete it.
   161      *
   162      * Use packageName=NULL for data files that are part of ICU's own data.
   163      * Use name="nfc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFC/NFD.
   164      * Use name="nfkc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFKC/NFKD.
   165      * Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold.
   166      *
   167      * @param packageName NULL for ICU built-in data, otherwise application data package name
   168      * @param name "nfc" or "nfkc" or "nfkc_cf" or name of custom data file
   169      * @param mode normalization mode (compose or decompose etc.)
   170      * @param errorCode Standard ICU error code. Its input value must
   171      *                  pass the U_SUCCESS() test, or else the function returns
   172      *                  immediately. Check for U_FAILURE() on output or use with
   173      *                  function chaining. (See User Guide for details.)
   174      * @return the requested Normalizer2, if successful
   175      * @stable ICU 4.4
   176      */
   177     static const Normalizer2 *
   178     getInstance(const char *packageName,
   179                 const char *name,
   180                 UNormalization2Mode mode,
   181                 UErrorCode &errorCode);
   183     /**
   184      * Returns the normalized form of the source string.
   185      * @param src source string
   186      * @param errorCode Standard ICU error code. Its input value must
   187      *                  pass the U_SUCCESS() test, or else the function returns
   188      *                  immediately. Check for U_FAILURE() on output or use with
   189      *                  function chaining. (See User Guide for details.)
   190      * @return normalized src
   191      * @stable ICU 4.4
   192      */
   193     UnicodeString
   194     normalize(const UnicodeString &src, UErrorCode &errorCode) const {
   195         UnicodeString result;
   196         normalize(src, result, errorCode);
   197         return result;
   198     }
   199     /**
   200      * Writes the normalized form of the source string to the destination string
   201      * (replacing its contents) and returns the destination string.
   202      * The source and destination strings must be different objects.
   203      * @param src source string
   204      * @param dest destination string; its contents is replaced with normalized src
   205      * @param errorCode Standard ICU error code. Its input value must
   206      *                  pass the U_SUCCESS() test, or else the function returns
   207      *                  immediately. Check for U_FAILURE() on output or use with
   208      *                  function chaining. (See User Guide for details.)
   209      * @return dest
   210      * @stable ICU 4.4
   211      */
   212     virtual UnicodeString &
   213     normalize(const UnicodeString &src,
   214               UnicodeString &dest,
   215               UErrorCode &errorCode) const = 0;
   216     /**
   217      * Appends the normalized form of the second string to the first string
   218      * (merging them at the boundary) and returns the first string.
   219      * The result is normalized if the first string was normalized.
   220      * The first and second strings must be different objects.
   221      * @param first string, should be normalized
   222      * @param second string, will be normalized
   223      * @param errorCode Standard ICU error code. Its input value must
   224      *                  pass the U_SUCCESS() test, or else the function returns
   225      *                  immediately. Check for U_FAILURE() on output or use with
   226      *                  function chaining. (See User Guide for details.)
   227      * @return first
   228      * @stable ICU 4.4
   229      */
   230     virtual UnicodeString &
   231     normalizeSecondAndAppend(UnicodeString &first,
   232                              const UnicodeString &second,
   233                              UErrorCode &errorCode) const = 0;
   234     /**
   235      * Appends the second string to the first string
   236      * (merging them at the boundary) and returns the first string.
   237      * The result is normalized if both the strings were normalized.
   238      * The first and second strings must be different objects.
   239      * @param first string, should be normalized
   240      * @param second string, should be normalized
   241      * @param errorCode Standard ICU error code. Its input value must
   242      *                  pass the U_SUCCESS() test, or else the function returns
   243      *                  immediately. Check for U_FAILURE() on output or use with
   244      *                  function chaining. (See User Guide for details.)
   245      * @return first
   246      * @stable ICU 4.4
   247      */
   248     virtual UnicodeString &
   249     append(UnicodeString &first,
   250            const UnicodeString &second,
   251            UErrorCode &errorCode) const = 0;
   253     /**
   254      * Gets the decomposition mapping of c.
   255      * Roughly equivalent to normalizing the String form of c
   256      * on a UNORM2_DECOMPOSE Normalizer2 instance, but much faster, and except that this function
   257      * returns FALSE and does not write a string
   258      * if c does not have a decomposition mapping in this instance's data.
   259      * This function is independent of the mode of the Normalizer2.
   260      * @param c code point
   261      * @param decomposition String object which will be set to c's
   262      *                      decomposition mapping, if there is one.
   263      * @return TRUE if c has a decomposition, otherwise FALSE
   264      * @stable ICU 4.6
   265      */
   266     virtual UBool
   267     getDecomposition(UChar32 c, UnicodeString &decomposition) const = 0;
   269     /**
   270      * Gets the raw decomposition mapping of c.
   271      *
   272      * This is similar to the getDecomposition() method but returns the
   273      * raw decomposition mapping as specified in UnicodeData.txt or
   274      * (for custom data) in the mapping files processed by the gennorm2 tool.
   275      * By contrast, getDecomposition() returns the processed,
   276      * recursively-decomposed version of this mapping.
   277      *
   278      * When used on a standard NFKC Normalizer2 instance,
   279      * getRawDecomposition() returns the Unicode Decomposition_Mapping (dm) property.
   280      *
   281      * When used on a standard NFC Normalizer2 instance,
   282      * it returns the Decomposition_Mapping only if the Decomposition_Type (dt) is Canonical (Can);
   283      * in this case, the result contains either one or two code points (=1..4 UChars).
   284      *
   285      * This function is independent of the mode of the Normalizer2.
   286      * The default implementation returns FALSE.
   287      * @param c code point
   288      * @param decomposition String object which will be set to c's
   289      *                      raw decomposition mapping, if there is one.
   290      * @return TRUE if c has a decomposition, otherwise FALSE
   291      * @stable ICU 49
   292      */
   293     virtual UBool
   294     getRawDecomposition(UChar32 c, UnicodeString &decomposition) const;
   296     /**
   297      * Performs pairwise composition of a & b and returns the composite if there is one.
   298      *
   299      * Returns a composite code point c only if c has a two-way mapping to a+b.
   300      * In standard Unicode normalization, this means that
   301      * c has a canonical decomposition to a+b
   302      * and c does not have the Full_Composition_Exclusion property.
   303      *
   304      * This function is independent of the mode of the Normalizer2.
   305      * The default implementation returns a negative value.
   306      * @param a A (normalization starter) code point.
   307      * @param b Another code point.
   308      * @return The non-negative composite code point if there is one; otherwise a negative value.
   309      * @stable ICU 49
   310      */
   311     virtual UChar32
   312     composePair(UChar32 a, UChar32 b) const;
   314     /**
   315      * Gets the combining class of c.
   316      * The default implementation returns 0
   317      * but all standard implementations return the Unicode Canonical_Combining_Class value.
   318      * @param c code point
   319      * @return c's combining class
   320      * @stable ICU 49
   321      */
   322     virtual uint8_t
   323     getCombiningClass(UChar32 c) const;
   325     /**
   326      * Tests if the string is normalized.
   327      * Internally, in cases where the quickCheck() method would return "maybe"
   328      * (which is only possible for the two COMPOSE modes) this method
   329      * resolves to "yes" or "no" to provide a definitive result,
   330      * at the cost of doing more work in those cases.
   331      * @param s input string
   332      * @param errorCode Standard ICU error code. Its input value must
   333      *                  pass the U_SUCCESS() test, or else the function returns
   334      *                  immediately. Check for U_FAILURE() on output or use with
   335      *                  function chaining. (See User Guide for details.)
   336      * @return TRUE if s is normalized
   337      * @stable ICU 4.4
   338      */
   339     virtual UBool
   340     isNormalized(const UnicodeString &s, UErrorCode &errorCode) const = 0;
   342     /**
   343      * Tests if the string is normalized.
   344      * For the two COMPOSE modes, the result could be "maybe" in cases that
   345      * would take a little more work to resolve definitively.
   346      * Use spanQuickCheckYes() and normalizeSecondAndAppend() for a faster
   347      * combination of quick check + normalization, to avoid
   348      * re-checking the "yes" prefix.
   349      * @param s input string
   350      * @param errorCode Standard ICU error code. Its input value must
   351      *                  pass the U_SUCCESS() test, or else the function returns
   352      *                  immediately. Check for U_FAILURE() on output or use with
   353      *                  function chaining. (See User Guide for details.)
   354      * @return UNormalizationCheckResult
   355      * @stable ICU 4.4
   356      */
   357     virtual UNormalizationCheckResult
   358     quickCheck(const UnicodeString &s, UErrorCode &errorCode) const = 0;
   360     /**
   361      * Returns the end of the normalized substring of the input string.
   362      * In other words, with <code>end=spanQuickCheckYes(s, ec);</code>
   363      * the substring <code>UnicodeString(s, 0, end)</code>
   364      * will pass the quick check with a "yes" result.
   365      *
   366      * The returned end index is usually one or more characters before the
   367      * "no" or "maybe" character: The end index is at a normalization boundary.
   368      * (See the class documentation for more about normalization boundaries.)
   369      *
   370      * When the goal is a normalized string and most input strings are expected
   371      * to be normalized already, then call this method,
   372      * and if it returns a prefix shorter than the input string,
   373      * copy that prefix and use normalizeSecondAndAppend() for the remainder.
   374      * @param s input string
   375      * @param errorCode Standard ICU error code. Its input value must
   376      *                  pass the U_SUCCESS() test, or else the function returns
   377      *                  immediately. Check for U_FAILURE() on output or use with
   378      *                  function chaining. (See User Guide for details.)
   379      * @return "yes" span end index
   380      * @stable ICU 4.4
   381      */
   382     virtual int32_t
   383     spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const = 0;
   385     /**
   386      * Tests if the character always has a normalization boundary before it,
   387      * regardless of context.
   388      * If true, then the character does not normalization-interact with
   389      * preceding characters.
   390      * In other words, a string containing this character can be normalized
   391      * by processing portions before this character and starting from this
   392      * character independently.
   393      * This is used for iterative normalization. See the class documentation for details.
   394      * @param c character to test
   395      * @return TRUE if c has a normalization boundary before it
   396      * @stable ICU 4.4
   397      */
   398     virtual UBool hasBoundaryBefore(UChar32 c) const = 0;
   400     /**
   401      * Tests if the character always has a normalization boundary after it,
   402      * regardless of context.
   403      * If true, then the character does not normalization-interact with
   404      * following characters.
   405      * In other words, a string containing this character can be normalized
   406      * by processing portions up to this character and after this
   407      * character independently.
   408      * This is used for iterative normalization. See the class documentation for details.
   409      * Note that this operation may be significantly slower than hasBoundaryBefore().
   410      * @param c character to test
   411      * @return TRUE if c has a normalization boundary after it
   412      * @stable ICU 4.4
   413      */
   414     virtual UBool hasBoundaryAfter(UChar32 c) const = 0;
   416     /**
   417      * Tests if the character is normalization-inert.
   418      * If true, then the character does not change, nor normalization-interact with
   419      * preceding or following characters.
   420      * In other words, a string containing this character can be normalized
   421      * by processing portions before this character and after this
   422      * character independently.
   423      * This is used for iterative normalization. See the class documentation for details.
   424      * Note that this operation may be significantly slower than hasBoundaryBefore().
   425      * @param c character to test
   426      * @return TRUE if c is normalization-inert
   427      * @stable ICU 4.4
   428      */
   429     virtual UBool isInert(UChar32 c) const = 0;
   430 };
   432 /**
   433  * Normalization filtered by a UnicodeSet.
   434  * Normalizes portions of the text contained in the filter set and leaves
   435  * portions not contained in the filter set unchanged.
   436  * Filtering is done via UnicodeSet::span(..., USET_SPAN_SIMPLE).
   437  * Not-in-the-filter text is treated as "is normalized" and "quick check yes".
   438  * This class implements all of (and only) the Normalizer2 API.
   439  * An instance of this class is unmodifiable/immutable but is constructed and
   440  * must be destructed by the owner.
   441  * @stable ICU 4.4
   442  */
   443 class U_COMMON_API FilteredNormalizer2 : public Normalizer2 {
   444 public:
   445     /**
   446      * Constructs a filtered normalizer wrapping any Normalizer2 instance
   447      * and a filter set.
   448      * Both are aliased and must not be modified or deleted while this object
   449      * is used.
   450      * The filter set should be frozen; otherwise the performance will suffer greatly.
   451      * @param n2 wrapped Normalizer2 instance
   452      * @param filterSet UnicodeSet which determines the characters to be normalized
   453      * @stable ICU 4.4
   454      */
   455     FilteredNormalizer2(const Normalizer2 &n2, const UnicodeSet &filterSet) :
   456             norm2(n2), set(filterSet) {}
   458     /**
   459      * Destructor.
   460      * @stable ICU 4.4
   461      */
   462     ~FilteredNormalizer2();
   464     /**
   465      * Writes the normalized form of the source string to the destination string
   466      * (replacing its contents) and returns the destination string.
   467      * The source and destination strings must be different objects.
   468      * @param src source string
   469      * @param dest destination string; its contents is replaced with normalized src
   470      * @param errorCode Standard ICU error code. Its input value must
   471      *                  pass the U_SUCCESS() test, or else the function returns
   472      *                  immediately. Check for U_FAILURE() on output or use with
   473      *                  function chaining. (See User Guide for details.)
   474      * @return dest
   475      * @stable ICU 4.4
   476      */
   477     virtual UnicodeString &
   478     normalize(const UnicodeString &src,
   479               UnicodeString &dest,
   480               UErrorCode &errorCode) const;
   481     /**
   482      * Appends the normalized form of the second string to the first string
   483      * (merging them at the boundary) and returns the first string.
   484      * The result is normalized if the first string was normalized.
   485      * The first and second strings must be different objects.
   486      * @param first string, should be normalized
   487      * @param second string, will be normalized
   488      * @param errorCode Standard ICU error code. Its input value must
   489      *                  pass the U_SUCCESS() test, or else the function returns
   490      *                  immediately. Check for U_FAILURE() on output or use with
   491      *                  function chaining. (See User Guide for details.)
   492      * @return first
   493      * @stable ICU 4.4
   494      */
   495     virtual UnicodeString &
   496     normalizeSecondAndAppend(UnicodeString &first,
   497                              const UnicodeString &second,
   498                              UErrorCode &errorCode) const;
   499     /**
   500      * Appends the second string to the first string
   501      * (merging them at the boundary) and returns the first string.
   502      * The result is normalized if both the strings were normalized.
   503      * The first and second strings must be different objects.
   504      * @param first string, should be normalized
   505      * @param second string, should be normalized
   506      * @param errorCode Standard ICU error code. Its input value must
   507      *                  pass the U_SUCCESS() test, or else the function returns
   508      *                  immediately. Check for U_FAILURE() on output or use with
   509      *                  function chaining. (See User Guide for details.)
   510      * @return first
   511      * @stable ICU 4.4
   512      */
   513     virtual UnicodeString &
   514     append(UnicodeString &first,
   515            const UnicodeString &second,
   516            UErrorCode &errorCode) const;
   518     /**
   519      * Gets the decomposition mapping of c.
   520      * For details see the base class documentation.
   521      *
   522      * This function is independent of the mode of the Normalizer2.
   523      * @param c code point
   524      * @param decomposition String object which will be set to c's
   525      *                      decomposition mapping, if there is one.
   526      * @return TRUE if c has a decomposition, otherwise FALSE
   527      * @stable ICU 4.6
   528      */
   529     virtual UBool
   530     getDecomposition(UChar32 c, UnicodeString &decomposition) const;
   532     /**
   533      * Gets the raw decomposition mapping of c.
   534      * For details see the base class documentation.
   535      *
   536      * This function is independent of the mode of the Normalizer2.
   537      * @param c code point
   538      * @param decomposition String object which will be set to c's
   539      *                      raw decomposition mapping, if there is one.
   540      * @return TRUE if c has a decomposition, otherwise FALSE
   541      * @stable ICU 49
   542      */
   543     virtual UBool
   544     getRawDecomposition(UChar32 c, UnicodeString &decomposition) const;
   546     /**
   547      * Performs pairwise composition of a & b and returns the composite if there is one.
   548      * For details see the base class documentation.
   549      *
   550      * This function is independent of the mode of the Normalizer2.
   551      * @param a A (normalization starter) code point.
   552      * @param b Another code point.
   553      * @return The non-negative composite code point if there is one; otherwise a negative value.
   554      * @stable ICU 49
   555      */
   556     virtual UChar32
   557     composePair(UChar32 a, UChar32 b) const;
   559     /**
   560      * Gets the combining class of c.
   561      * The default implementation returns 0
   562      * but all standard implementations return the Unicode Canonical_Combining_Class value.
   563      * @param c code point
   564      * @return c's combining class
   565      * @stable ICU 49
   566      */
   567     virtual uint8_t
   568     getCombiningClass(UChar32 c) const;
   570     /**
   571      * Tests if the string is normalized.
   572      * For details see the Normalizer2 base class documentation.
   573      * @param s input string
   574      * @param errorCode Standard ICU error code. Its input value must
   575      *                  pass the U_SUCCESS() test, or else the function returns
   576      *                  immediately. Check for U_FAILURE() on output or use with
   577      *                  function chaining. (See User Guide for details.)
   578      * @return TRUE if s is normalized
   579      * @stable ICU 4.4
   580      */
   581     virtual UBool
   582     isNormalized(const UnicodeString &s, UErrorCode &errorCode) const;
   583     /**
   584      * Tests if the string is normalized.
   585      * For details see the Normalizer2 base class documentation.
   586      * @param s input string
   587      * @param errorCode Standard ICU error code. Its input value must
   588      *                  pass the U_SUCCESS() test, or else the function returns
   589      *                  immediately. Check for U_FAILURE() on output or use with
   590      *                  function chaining. (See User Guide for details.)
   591      * @return UNormalizationCheckResult
   592      * @stable ICU 4.4
   593      */
   594     virtual UNormalizationCheckResult
   595     quickCheck(const UnicodeString &s, UErrorCode &errorCode) const;
   596     /**
   597      * Returns the end of the normalized substring of the input string.
   598      * For details see the Normalizer2 base class documentation.
   599      * @param s input string
   600      * @param errorCode Standard ICU error code. Its input value must
   601      *                  pass the U_SUCCESS() test, or else the function returns
   602      *                  immediately. Check for U_FAILURE() on output or use with
   603      *                  function chaining. (See User Guide for details.)
   604      * @return "yes" span end index
   605      * @stable ICU 4.4
   606      */
   607     virtual int32_t
   608     spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const;
   610     /**
   611      * Tests if the character always has a normalization boundary before it,
   612      * regardless of context.
   613      * For details see the Normalizer2 base class documentation.
   614      * @param c character to test
   615      * @return TRUE if c has a normalization boundary before it
   616      * @stable ICU 4.4
   617      */
   618     virtual UBool hasBoundaryBefore(UChar32 c) const;
   620     /**
   621      * Tests if the character always has a normalization boundary after it,
   622      * regardless of context.
   623      * For details see the Normalizer2 base class documentation.
   624      * @param c character to test
   625      * @return TRUE if c has a normalization boundary after it
   626      * @stable ICU 4.4
   627      */
   628     virtual UBool hasBoundaryAfter(UChar32 c) const;
   630     /**
   631      * Tests if the character is normalization-inert.
   632      * For details see the Normalizer2 base class documentation.
   633      * @param c character to test
   634      * @return TRUE if c is normalization-inert
   635      * @stable ICU 4.4
   636      */
   637     virtual UBool isInert(UChar32 c) const;
   638 private:
   639     UnicodeString &
   640     normalize(const UnicodeString &src,
   641               UnicodeString &dest,
   642               USetSpanCondition spanCondition,
   643               UErrorCode &errorCode) const;
   645     UnicodeString &
   646     normalizeSecondAndAppend(UnicodeString &first,
   647                              const UnicodeString &second,
   648                              UBool doNormalize,
   649                              UErrorCode &errorCode) const;
   651     const Normalizer2 &norm2;
   652     const UnicodeSet &set;
   653 };
   655 U_NAMESPACE_END
   657 #endif  // !UCONFIG_NO_NORMALIZATION
   658 #endif  // __NORMALIZER2_H__

mercurial