intl/icu/source/i18n/unicode/tblcoll.h

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rw-r--r--

Correct previous dual key logic pending first delivery installment.

     1 /*
     2 ******************************************************************************
     3 * Copyright (C) 1996-2013, International Business Machines Corporation and
     4 * others. All Rights Reserved.
     5 ******************************************************************************
     6 */
     8 /**
     9  * \file 
    10  * \brief C++ API: RuleBasedCollator class provides the simple implementation of Collator.
    11  */
    13 /**
    14 * File tblcoll.h
    15 *
    16 * Created by: Helena Shih
    17 *
    18 * Modification History:
    19 *
    20 *  Date        Name        Description
    21 *  2/5/97      aliu        Added streamIn and streamOut methods.  Added
    22 *                          constructor which reads RuleBasedCollator object from
    23 *                          a binary file.  Added writeToFile method which streams
    24 *                          RuleBasedCollator out to a binary file.  The streamIn
    25 *                          and streamOut methods use istream and ostream objects
    26 *                          in binary mode.
    27 *  2/12/97     aliu        Modified to use TableCollationData sub-object to
    28 *                          hold invariant data.
    29 *  2/13/97     aliu        Moved several methods into this class from Collation.
    30 *                          Added a private RuleBasedCollator(Locale&) constructor,
    31 *                          to be used by Collator::createDefault().  General
    32 *                          clean up.
    33 *  2/20/97     helena      Added clone, operator==, operator!=, operator=, and copy
    34 *                          constructor and getDynamicClassID.
    35 *  3/5/97      aliu        Modified constructFromFile() to add parameter
    36 *                          specifying whether or not binary loading is to be
    37 *                          attempted.  This is required for dynamic rule loading.
    38 * 05/07/97     helena      Added memory allocation error detection.
    39 *  6/17/97     helena      Added IDENTICAL strength for compare, changed getRules to
    40 *                          use MergeCollation::getPattern.
    41 *  6/20/97     helena      Java class name change.
    42 *  8/18/97     helena      Added internal API documentation.
    43 * 09/03/97     helena      Added createCollationKeyValues().
    44 * 02/10/98     damiba      Added compare with "length" parameter
    45 * 08/05/98     erm         Synched with 1.2 version of RuleBasedCollator.java
    46 * 04/23/99     stephen     Removed EDecompositionMode, merged with
    47 *                          Normalizer::EMode
    48 * 06/14/99     stephen     Removed kResourceBundleSuffix
    49 * 11/02/99     helena      Collator performance enhancements.  Eliminates the
    50 *                          UnicodeString construction and special case for NO_OP.
    51 * 11/23/99     srl         More performance enhancements. Updates to NormalizerIterator
    52 *                          internal state management.
    53 * 12/15/99     aliu        Update to support Thai collation.  Move NormalizerIterator
    54 *                          to implementation file.
    55 * 01/29/01     synwee      Modified into a C++ wrapper which calls C API
    56 *                          (ucol.h)
    57 */
    59 #ifndef TBLCOLL_H
    60 #define TBLCOLL_H
    62 #include "unicode/utypes.h"
    65 #if !UCONFIG_NO_COLLATION
    67 #include "unicode/coll.h"
    68 #include "unicode/ucol.h"
    69 #include "unicode/sortkey.h"
    70 #include "unicode/normlzr.h"
    72 U_NAMESPACE_BEGIN
    74 /**
    75 * @stable ICU 2.0
    76 */
    77 class StringSearch;
    78 /**
    79 * @stable ICU 2.0
    80 */
    81 class CollationElementIterator;
    83 /**
    84  * The RuleBasedCollator class provides the simple implementation of
    85  * Collator, using data-driven tables. The user can create a customized
    86  * table-based collation.
    87  * <P>
    88  * <em>Important: </em>The ICU collation service has been reimplemented 
    89  * in order to achieve better performance and UCA compliance. 
    90  * For details, see the 
    91  * <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm">
    92  * collation design document</a>.
    93  * <p>
    94  * RuleBasedCollator is a thin C++ wrapper over the C implementation.
    95  * <p>
    96  * For more information about the collation service see 
    97  * <a href="http://icu-project.org/userguide/Collate_Intro.html">the users guide</a>.
    98  * <p>
    99  * Collation service provides correct sorting orders for most locales supported in ICU. 
   100  * If specific data for a locale is not available, the orders eventually falls back
   101  * to the <a href="http://www.unicode.org/unicode/reports/tr10/">UCA sort order</a>. 
   102  * <p>
   103  * Sort ordering may be customized by providing your own set of rules. For more on
   104  * this subject see the <a href="http://icu-project.org/userguide/Collate_Customization.html">
   105  * Collation customization</a> section of the users guide.
   106  * <p>
   107  * Note, RuleBasedCollator is not to be subclassed.
   108  * @see        Collator
   109  * @version    2.0 11/15/2001
   110  */
   111 class U_I18N_API RuleBasedCollator : public Collator
   112 {
   113 public:
   115   // constructor -------------------------------------------------------------
   117     /**
   118      * RuleBasedCollator constructor. This takes the table rules and builds a
   119      * collation table out of them. Please see RuleBasedCollator class
   120      * description for more details on the collation rule syntax.
   121      * @param rules the collation rules to build the collation table from.
   122      * @param status reporting a success or an error.
   123      * @see Locale
   124      * @stable ICU 2.0
   125      */
   126     RuleBasedCollator(const UnicodeString& rules, UErrorCode& status);
   128     /**
   129      * RuleBasedCollator constructor. This takes the table rules and builds a
   130      * collation table out of them. Please see RuleBasedCollator class
   131      * description for more details on the collation rule syntax.
   132      * @param rules the collation rules to build the collation table from.
   133      * @param collationStrength default strength for comparison
   134      * @param status reporting a success or an error.
   135      * @see Locale
   136      * @stable ICU 2.0
   137      */
   138     RuleBasedCollator(const UnicodeString& rules,
   139                        ECollationStrength collationStrength,
   140                        UErrorCode& status);
   142     /**
   143      * RuleBasedCollator constructor. This takes the table rules and builds a
   144      * collation table out of them. Please see RuleBasedCollator class
   145      * description for more details on the collation rule syntax.
   146      * @param rules the collation rules to build the collation table from.
   147      * @param decompositionMode the normalisation mode
   148      * @param status reporting a success or an error.
   149      * @see Locale
   150      * @stable ICU 2.0
   151      */
   152     RuleBasedCollator(const UnicodeString& rules,
   153                     UColAttributeValue decompositionMode,
   154                     UErrorCode& status);
   156     /**
   157      * RuleBasedCollator constructor. This takes the table rules and builds a
   158      * collation table out of them. Please see RuleBasedCollator class
   159      * description for more details on the collation rule syntax.
   160      * @param rules the collation rules to build the collation table from.
   161      * @param collationStrength default strength for comparison
   162      * @param decompositionMode the normalisation mode
   163      * @param status reporting a success or an error.
   164      * @see Locale
   165      * @stable ICU 2.0
   166      */
   167     RuleBasedCollator(const UnicodeString& rules,
   168                     ECollationStrength collationStrength,
   169                     UColAttributeValue decompositionMode,
   170                     UErrorCode& status);
   172     /**
   173      * Copy constructor.
   174      * @param other the RuleBasedCollator object to be copied
   175      * @see Locale
   176      * @stable ICU 2.0
   177      */
   178     RuleBasedCollator(const RuleBasedCollator& other);
   181     /** Opens a collator from a collator binary image created using
   182     *  cloneBinary. Binary image used in instantiation of the 
   183     *  collator remains owned by the user and should stay around for 
   184     *  the lifetime of the collator. The API also takes a base collator
   185     *  which usualy should be UCA.
   186     *  @param bin binary image owned by the user and required through the
   187     *             lifetime of the collator
   188     *  @param length size of the image. If negative, the API will try to
   189     *                figure out the length of the image
   190     *  @param base fallback collator, usually UCA. Base is required to be
   191     *              present through the lifetime of the collator. Currently 
   192     *              it cannot be NULL.
   193     *  @param status for catching errors
   194     *  @return newly created collator
   195     *  @see cloneBinary
   196     *  @stable ICU 3.4
   197     */
   198     RuleBasedCollator(const uint8_t *bin, int32_t length, 
   199                     const RuleBasedCollator *base, 
   200                     UErrorCode &status);
   201     // destructor --------------------------------------------------------------
   203     /**
   204      * Destructor.
   205      * @stable ICU 2.0
   206      */
   207     virtual ~RuleBasedCollator();
   209     // public methods ----------------------------------------------------------
   211     /**
   212      * Assignment operator.
   213      * @param other other RuleBasedCollator object to compare with.
   214      * @stable ICU 2.0
   215      */
   216     RuleBasedCollator& operator=(const RuleBasedCollator& other);
   218     /**
   219      * Returns true if argument is the same as this object.
   220      * @param other Collator object to be compared.
   221      * @return true if arguments is the same as this object.
   222      * @stable ICU 2.0
   223      */
   224     virtual UBool operator==(const Collator& other) const;
   226     /**
   227      * Makes a copy of this object.
   228      * @return a copy of this object, owned by the caller
   229      * @stable ICU 2.0
   230      */
   231     virtual Collator* clone(void) const;
   233     /**
   234      * Creates a collation element iterator for the source string. The caller of
   235      * this method is responsible for the memory management of the return
   236      * pointer.
   237      * @param source the string over which the CollationElementIterator will
   238      *        iterate.
   239      * @return the collation element iterator of the source string using this as
   240      *         the based Collator.
   241      * @stable ICU 2.2
   242      */
   243     virtual CollationElementIterator* createCollationElementIterator(
   244                                            const UnicodeString& source) const;
   246     /**
   247      * Creates a collation element iterator for the source. The caller of this
   248      * method is responsible for the memory management of the returned pointer.
   249      * @param source the CharacterIterator which produces the characters over
   250      *        which the CollationElementItgerator will iterate.
   251      * @return the collation element iterator of the source using this as the
   252      *         based Collator.
   253      * @stable ICU 2.2
   254      */
   255     virtual CollationElementIterator* createCollationElementIterator(
   256                                          const CharacterIterator& source) const;
   258     // Make deprecated versions of Collator::compare() visible.
   259     using Collator::compare;
   261     /**
   262     * The comparison function compares the character data stored in two
   263     * different strings. Returns information about whether a string is less 
   264     * than, greater than or equal to another string.
   265     * @param source the source string to be compared with.
   266     * @param target the string that is to be compared with the source string.
   267     * @param status possible error code
   268     * @return Returns an enum value. UCOL_GREATER if source is greater
   269     * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
   270     * than target
   271     * @stable ICU 2.6
   272     **/
   273     virtual UCollationResult compare(const UnicodeString& source,
   274                                       const UnicodeString& target,
   275                                       UErrorCode &status) const;
   277     /**
   278     * Does the same thing as compare but limits the comparison to a specified 
   279     * length
   280     * @param source the source string to be compared with.
   281     * @param target the string that is to be compared with the source string.
   282     * @param length the length the comparison is limited to
   283     * @param status possible error code
   284     * @return Returns an enum value. UCOL_GREATER if source (up to the specified 
   285     *         length) is greater than target; UCOL_EQUAL if source (up to specified 
   286     *         length) is equal to target; UCOL_LESS if source (up to the specified 
   287     *         length) is less  than target.
   288     * @stable ICU 2.6
   289     */
   290     virtual UCollationResult compare(const UnicodeString& source,
   291                                       const UnicodeString& target,
   292                                       int32_t length,
   293                                       UErrorCode &status) const;
   295     /**
   296     * The comparison function compares the character data stored in two
   297     * different string arrays. Returns information about whether a string array 
   298     * is less than, greater than or equal to another string array.
   299     * @param source the source string array to be compared with.
   300     * @param sourceLength the length of the source string array.  If this value
   301     *        is equal to -1, the string array is null-terminated.
   302     * @param target the string that is to be compared with the source string.
   303     * @param targetLength the length of the target string array.  If this value
   304     *        is equal to -1, the string array is null-terminated.
   305     * @param status possible error code
   306     * @return Returns an enum value. UCOL_GREATER if source is greater
   307     * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
   308     * than target
   309     * @stable ICU 2.6
   310     */
   311     virtual UCollationResult compare(const UChar* source, int32_t sourceLength,
   312                                       const UChar* target, int32_t targetLength,
   313                                       UErrorCode &status) const;
   315     /**
   316      * Compares two strings using the Collator.
   317      * Returns whether the first one compares less than/equal to/greater than
   318      * the second one.
   319      * This version takes UCharIterator input.
   320      * @param sIter the first ("source") string iterator
   321      * @param tIter the second ("target") string iterator
   322      * @param status ICU status
   323      * @return UCOL_LESS, UCOL_EQUAL or UCOL_GREATER
   324      * @stable ICU 4.2
   325      */
   326     virtual UCollationResult compare(UCharIterator &sIter,
   327                                      UCharIterator &tIter,
   328                                      UErrorCode &status) const;
   330     /**
   331     * Transforms a specified region of the string into a series of characters
   332     * that can be compared with CollationKey.compare. Use a CollationKey when
   333     * you need to do repeated comparisions on the same string. For a single
   334     * comparison the compare method will be faster.
   335     * @param source the source string.
   336     * @param key the transformed key of the source string.
   337     * @param status the error code status.
   338     * @return the transformed key.
   339     * @see CollationKey
   340     * @stable ICU 2.0
   341     */
   342     virtual CollationKey& getCollationKey(const UnicodeString& source,
   343                                           CollationKey& key,
   344                                           UErrorCode& status) const;
   346     /**
   347     * Transforms a specified region of the string into a series of characters
   348     * that can be compared with CollationKey.compare. Use a CollationKey when
   349     * you need to do repeated comparisions on the same string. For a single
   350     * comparison the compare method will be faster.
   351     * @param source the source string.
   352     * @param sourceLength the length of the source string.
   353     * @param key the transformed key of the source string.
   354     * @param status the error code status.
   355     * @return the transformed key.
   356     * @see CollationKey
   357     * @stable ICU 2.0
   358     */
   359     virtual CollationKey& getCollationKey(const UChar *source,
   360                                           int32_t sourceLength,
   361                                           CollationKey& key,
   362                                           UErrorCode& status) const;
   364     /**
   365      * Generates the hash code for the rule-based collation object.
   366      * @return the hash code.
   367      * @stable ICU 2.0
   368      */
   369     virtual int32_t hashCode(void) const;
   371     /**
   372     * Gets the locale of the Collator
   373     * @param type can be either requested, valid or actual locale. For more
   374     *             information see the definition of ULocDataLocaleType in
   375     *             uloc.h
   376     * @param status the error code status.
   377     * @return locale where the collation data lives. If the collator
   378     *         was instantiated from rules, locale is empty.
   379     * @deprecated ICU 2.8 likely to change in ICU 3.0, based on feedback
   380     */
   381     virtual Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
   383     /**
   384      * Gets the tailoring rules for this collator.
   385      * @return the collation tailoring from which this collator was created
   386      * @stable ICU 2.0
   387      */
   388     const UnicodeString& getRules(void) const;
   390     /**
   391      * Gets the version information for a Collator.
   392      * @param info the version # information, the result will be filled in
   393      * @stable ICU 2.0
   394      */
   395     virtual void getVersion(UVersionInfo info) const;
   397 #ifndef U_HIDE_DEPRECATED_API 
   398     /**
   399      * Returns the maximum length of any expansion sequences that end with the
   400      * specified comparison order.
   401      *
   402      * This is specific to the kind of collation element values and sequences
   403      * returned by the CollationElementIterator.
   404      * Call CollationElementIterator::getMaxExpansion() instead.
   405      *
   406      * @param order a collation order returned by CollationElementIterator::previous
   407      *              or CollationElementIterator::next.
   408      * @return maximum size of the expansion sequences ending with the collation
   409      *         element, or 1 if the collation element does not occur at the end of
   410      *         any expansion sequence
   411      * @see CollationElementIterator#getMaxExpansion
   412      * @deprecated ICU 51 Use CollationElementIterator::getMaxExpansion() instead.
   413      */
   414     int32_t getMaxExpansion(int32_t order) const;
   415 #endif  /* U_HIDE_DEPRECATED_API */
   417     /**
   418      * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This
   419      * method is to implement a simple version of RTTI, since not all C++
   420      * compilers support genuine RTTI. Polymorphic operator==() and clone()
   421      * methods call this method.
   422      * @return The class ID for this object. All objects of a given class have
   423      *         the same class ID. Objects of other classes have different class
   424      *         IDs.
   425      * @stable ICU 2.0
   426      */
   427     virtual UClassID getDynamicClassID(void) const;
   429     /**
   430      * Returns the class ID for this class. This is useful only for comparing to
   431      * a return value from getDynamicClassID(). For example:
   432      * <pre>
   433      * Base* polymorphic_pointer = createPolymorphicObject();
   434      * if (polymorphic_pointer->getDynamicClassID() ==
   435      *                                          Derived::getStaticClassID()) ...
   436      * </pre>
   437      * @return The class ID for all objects of this class.
   438      * @stable ICU 2.0
   439      */
   440     static UClassID U_EXPORT2 getStaticClassID(void);
   442 #ifndef U_HIDE_DEPRECATED_API 
   443     /**
   444      * Do not use this method: The caller and the ICU library might use different heaps.
   445      * Use cloneBinary() instead which writes to caller-provided memory.
   446      *
   447      * Returns a binary format of this collator.
   448      * @param length Returns the length of the data, in bytes
   449      * @param status the error code status.
   450      * @return memory, owned by the caller, of size 'length' bytes.
   451      * @deprecated ICU 52. Use cloneBinary() instead.
   452      */
   453     uint8_t *cloneRuleData(int32_t &length, UErrorCode &status);
   454 #endif  /* U_HIDE_DEPRECATED_API */
   456     /** Creates a binary image of a collator. This binary image can be stored and 
   457     *  later used to instantiate a collator using ucol_openBinary.
   458     *  This API supports preflighting.
   459     *  @param buffer a fill-in buffer to receive the binary image
   460     *  @param capacity capacity of the destination buffer
   461     *  @param status for catching errors
   462     *  @return size of the image
   463     *  @see ucol_openBinary
   464     *  @stable ICU 3.4
   465     */
   466     int32_t cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status);
   468     /**
   469      * Returns current rules. Delta defines whether full rules are returned or
   470      * just the tailoring.
   471      *
   472      * getRules(void) should normally be used instead.
   473      * See http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales
   474      * @param delta one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES.
   475      * @param buffer UnicodeString to store the result rules
   476      * @stable ICU 2.2
   477      * @see UCOL_FULL_RULES
   478      */
   479     void getRules(UColRuleOption delta, UnicodeString &buffer);
   481     /**
   482      * Universal attribute setter
   483      * @param attr attribute type
   484      * @param value attribute value
   485      * @param status to indicate whether the operation went on smoothly or there were errors
   486      * @stable ICU 2.2
   487      */
   488     virtual void setAttribute(UColAttribute attr, UColAttributeValue value,
   489                               UErrorCode &status);
   491     /**
   492      * Universal attribute getter.
   493      * @param attr attribute type
   494      * @param status to indicate whether the operation went on smoothly or there were errors
   495      * @return attribute value
   496      * @stable ICU 2.2
   497      */
   498     virtual UColAttributeValue getAttribute(UColAttribute attr,
   499                                             UErrorCode &status) const;
   501     /**
   502      * Sets the variable top to a collation element value of a string supplied.
   503      * @param varTop one or more (if contraction) UChars to which the variable top should be set
   504      * @param len length of variable top string. If -1 it is considered to be zero terminated.
   505      * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
   506      *    U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction<br>
   507      *    U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes
   508      * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined
   509      * @stable ICU 2.0
   510      */
   511     virtual uint32_t setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status);
   513     /**
   514      * Sets the variable top to a collation element value of a string supplied.
   515      * @param varTop an UnicodeString size 1 or more (if contraction) of UChars to which the variable top should be set
   516      * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
   517      *    U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction<br>
   518      *    U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes
   519      * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined
   520      * @stable ICU 2.0
   521      */
   522     virtual uint32_t setVariableTop(const UnicodeString &varTop, UErrorCode &status);
   524     /**
   525      * Sets the variable top to a collation element value supplied. Variable top is set to the upper 16 bits.
   526      * Lower 16 bits are ignored.
   527      * @param varTop CE value, as returned by setVariableTop or ucol)getVariableTop
   528      * @param status error code (not changed by function)
   529      * @stable ICU 2.0
   530      */
   531     virtual void setVariableTop(uint32_t varTop, UErrorCode &status);
   533     /**
   534      * Gets the variable top value of a Collator.
   535      * Lower 16 bits are undefined and should be ignored.
   536      * @param status error code (not changed by function). If error code is set, the return value is undefined.
   537      * @stable ICU 2.0
   538      */
   539     virtual uint32_t getVariableTop(UErrorCode &status) const;
   541     /**
   542      * Get an UnicodeSet that contains all the characters and sequences tailored in 
   543      * this collator.
   544      * @param status      error code of the operation
   545      * @return a pointer to a UnicodeSet object containing all the 
   546      *         code points and sequences that may sort differently than
   547      *         in the UCA. The object must be disposed of by using delete
   548      * @stable ICU 2.4
   549      */
   550     virtual UnicodeSet *getTailoredSet(UErrorCode &status) const;
   552     /**
   553      * Get the sort key as an array of bytes from an UnicodeString.
   554      * @param source string to be processed.
   555      * @param result buffer to store result in. If NULL, number of bytes needed
   556      *        will be returned.
   557      * @param resultLength length of the result buffer. If if not enough the
   558      *        buffer will be filled to capacity.
   559      * @return Number of bytes needed for storing the sort key
   560      * @stable ICU 2.0
   561      */
   562     virtual int32_t getSortKey(const UnicodeString& source, uint8_t *result,
   563                                int32_t resultLength) const;
   565     /**
   566      * Get the sort key as an array of bytes from an UChar buffer.
   567      * @param source string to be processed.
   568      * @param sourceLength length of string to be processed. If -1, the string
   569      *        is 0 terminated and length will be decided by the function.
   570      * @param result buffer to store result in. If NULL, number of bytes needed
   571      *        will be returned.
   572      * @param resultLength length of the result buffer. If if not enough the
   573      *        buffer will be filled to capacity.
   574      * @return Number of bytes needed for storing the sort key
   575      * @stable ICU 2.2
   576      */
   577     virtual int32_t getSortKey(const UChar *source, int32_t sourceLength,
   578                                uint8_t *result, int32_t resultLength) const;
   580     /**
   581      * Retrieves the reordering codes for this collator.
   582      * @param dest The array to fill with the script ordering.
   583      * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function
   584      *  will only return the length of the result without writing any of the result string (pre-flighting).
   585      * @param status A reference to an error code value, which must not indicate
   586      * a failure before the function call.
   587      * @return The length of the script ordering array.
   588      * @see ucol_setReorderCodes
   589      * @see Collator#getEquivalentReorderCodes
   590      * @see Collator#setReorderCodes
   591      * @stable ICU 4.8 
   592      */
   593      virtual int32_t getReorderCodes(int32_t *dest,
   594                                      int32_t destCapacity,
   595                                      UErrorCode& status) const;
   597     /**
   598      * Sets the ordering of scripts for this collator.
   599      * @param reorderCodes An array of script codes in the new order. This can be NULL if the 
   600      * length is also set to 0. An empty array will clear any reordering codes on the collator.
   601      * @param reorderCodesLength The length of reorderCodes.
   602      * @param status error code
   603      * @see Collator#getReorderCodes
   604      * @see Collator#getEquivalentReorderCodes
   605      * @stable ICU 4.8 
   606      */
   607      virtual void setReorderCodes(const int32_t* reorderCodes,
   608                                   int32_t reorderCodesLength,
   609                                   UErrorCode& status) ;
   611     /**
   612      * Retrieves the reorder codes that are grouped with the given reorder code. Some reorder
   613      * codes will be grouped and must reorder together.
   614      * @param reorderCode The reorder code to determine equivalence for. 
   615      * @param dest The array to fill with the script equivalene reordering codes.
   616      * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the 
   617      * function will only return the length of the result without writing any of the result 
   618      * string (pre-flighting).
   619      * @param status A reference to an error code value, which must not indicate 
   620      * a failure before the function call.
   621      * @return The length of the of the reordering code equivalence array.
   622      * @see ucol_setReorderCodes
   623      * @see Collator#getReorderCodes
   624      * @see Collator#setReorderCodes
   625      * @stable ICU 4.8 
   626      */
   627     static int32_t U_EXPORT2 getEquivalentReorderCodes(int32_t reorderCode,
   628                                 int32_t* dest,
   629                                 int32_t destCapacity,
   630                                 UErrorCode& status);
   632 private:
   634     // private static constants -----------------------------------------------
   636     enum {
   637         /* need look up in .commit() */
   638         CHARINDEX = 0x70000000,
   639         /* Expand index follows */
   640         EXPANDCHARINDEX = 0x7E000000,
   641         /* contract indexes follows */
   642         CONTRACTCHARINDEX = 0x7F000000,
   643         /* unmapped character values */
   644         UNMAPPED = 0xFFFFFFFF,
   645         /* primary strength increment */
   646         PRIMARYORDERINCREMENT = 0x00010000,
   647         /* secondary strength increment */
   648         SECONDARYORDERINCREMENT = 0x00000100,
   649         /* tertiary strength increment */
   650         TERTIARYORDERINCREMENT = 0x00000001,
   651         /* mask off anything but primary order */
   652         PRIMARYORDERMASK = 0xffff0000,
   653         /* mask off anything but secondary order */
   654         SECONDARYORDERMASK = 0x0000ff00,
   655         /* mask off anything but tertiary order */
   656         TERTIARYORDERMASK = 0x000000ff,
   657         /* mask off ignorable char order */
   658         IGNORABLEMASK = 0x0000ffff,
   659         /* use only the primary difference */
   660         PRIMARYDIFFERENCEONLY = 0xffff0000,
   661         /* use only the primary and secondary difference */
   662         SECONDARYDIFFERENCEONLY = 0xffffff00,
   663         /* primary order shift */
   664         PRIMARYORDERSHIFT = 16,
   665         /* secondary order shift */
   666         SECONDARYORDERSHIFT = 8,
   667         /* starting value for collation elements */
   668         COLELEMENTSTART = 0x02020202,
   669         /* testing mask for primary low element */
   670         PRIMARYLOWZEROMASK = 0x00FF0000,
   671         /* reseting value for secondaries and tertiaries */
   672         RESETSECONDARYTERTIARY = 0x00000202,
   673         /* reseting value for tertiaries */
   674         RESETTERTIARY = 0x00000002,
   676         PRIMIGNORABLE = 0x0202
   677     };
   679     // private data members ---------------------------------------------------
   681     UBool dataIsOwned;
   683     UBool isWriteThroughAlias;
   685     /**
   686     * c struct for collation. All initialisation for it has to be done through
   687     * setUCollator().
   688     */
   689     UCollator *ucollator;
   691     /**
   692     * Rule UnicodeString
   693     */
   694     UnicodeString urulestring;
   696     // friend classes --------------------------------------------------------
   698     /**
   699     * Used to iterate over collation elements in a character source.
   700     */
   701     friend class CollationElementIterator;
   703     /**
   704     * Collator ONLY needs access to RuleBasedCollator(const Locale&,
   705     *                                                       UErrorCode&)
   706     */
   707     friend class Collator;
   709     /**
   710     * Searching over collation elements in a character source
   711     */
   712     friend class StringSearch;
   714     // private constructors --------------------------------------------------
   716     /**
   717      * Default constructor
   718      */
   719     RuleBasedCollator();
   721     /**
   722      * RuleBasedCollator constructor. This constructor takes a locale. The
   723      * only caller of this class should be Collator::createInstance(). If
   724      * createInstance() happens to know that the requested locale's collation is
   725      * implemented as a RuleBasedCollator, it can then call this constructor.
   726      * OTHERWISE IT SHOULDN'T, since this constructor ALWAYS RETURNS A VALID
   727      * COLLATION TABLE. It does this by falling back to defaults.
   728      * @param desiredLocale locale used
   729      * @param status error code status
   730      */
   731     RuleBasedCollator(const Locale& desiredLocale, UErrorCode& status);
   733     /**
   734      * common constructor implementation
   735      *
   736      * @param rules the collation rules to build the collation table from.
   737      * @param collationStrength default strength for comparison
   738      * @param decompositionMode the normalisation mode
   739      * @param status reporting a success or an error.
   740      */
   741     void
   742     construct(const UnicodeString& rules,
   743               UColAttributeValue collationStrength,
   744               UColAttributeValue decompositionMode,
   745               UErrorCode& status);
   747     // private methods -------------------------------------------------------
   749     /**
   750     * Creates the c struct for ucollator
   751     * @param locale desired locale
   752     * @param status error status
   753     */
   754     void setUCollator(const Locale& locale, UErrorCode& status);
   756     /**
   757     * Creates the c struct for ucollator
   758     * @param locale desired locale name
   759     * @param status error status
   760     */
   761     void setUCollator(const char* locale, UErrorCode& status);
   763     /**
   764     * Creates the c struct for ucollator. This used internally by StringSearch.
   765     * Hence the responsibility of cleaning up the ucollator is not done by
   766     * this RuleBasedCollator. The isDataOwned flag is set to FALSE.
   767     * @param collator new ucollator data
   768     */
   769     void setUCollator(UCollator *collator);
   771 public:
   772 #ifndef U_HIDE_INTERNAL_API
   773     /**
   774     * Get UCollator data struct. Used only by StringSearch & intltest.
   775     * @return UCollator data struct
   776     * @internal
   777     */
   778     const UCollator * getUCollator();
   779 #endif  /* U_HIDE_INTERNAL_API */
   781 protected:
   782    /**
   783     * Used internally by registraton to define the requested and valid locales.
   784     * @param requestedLocale the requsted locale
   785     * @param validLocale the valid locale
   786     * @param actualLocale the actual locale
   787     * @internal
   788     */
   789     virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale);
   791 private:
   792     // if not owned and not a write through alias, copy the ucollator
   793     void checkOwned(void);
   795     // utility to init rule string used by checkOwned and construct
   796     void setRuleStringFromCollator();
   798 public:
   799     /** Get the short definition string for a collator. This internal API harvests the collator's
   800      *  locale and the attribute set and produces a string that can be used for opening 
   801      *  a collator with the same properties using the ucol_openFromShortString API.
   802      *  This string will be normalized.
   803      *  The structure and the syntax of the string is defined in the "Naming collators"
   804      *  section of the users guide: 
   805      *  http://icu-project.org/userguide/Collate_Concepts.html#Naming_Collators
   806      *  This function supports preflighting.
   807      * 
   808      *  This is internal, and intended to be used with delegate converters.
   809      *
   810      *  @param locale a locale that will appear as a collators locale in the resulting
   811      *                short string definition. If NULL, the locale will be harvested 
   812      *                from the collator.
   813      *  @param buffer space to hold the resulting string
   814      *  @param capacity capacity of the buffer
   815      *  @param status for returning errors. All the preflighting errors are featured
   816      *  @return length of the resulting string
   817      *  @see ucol_openFromShortString
   818      *  @see ucol_normalizeShortDefinitionString
   819      *  @see ucol_getShortDefinitionString
   820      *  @internal
   821      */
   822     virtual int32_t internalGetShortDefinitionString(const char *locale,
   823                                                      char *buffer,
   824                                                      int32_t capacity,
   825                                                      UErrorCode &status) const;
   826 };
   828 // inline method implementation ---------------------------------------------
   830 inline void RuleBasedCollator::setUCollator(const Locale &locale,
   831                                                UErrorCode &status)
   832 {
   833     setUCollator(locale.getName(), status);
   834 }
   837 inline void RuleBasedCollator::setUCollator(UCollator     *collator)
   838 {
   840     if (ucollator && dataIsOwned) {
   841         ucol_close(ucollator);
   842     }
   843     ucollator   = collator;
   844     dataIsOwned = FALSE;
   845     isWriteThroughAlias = TRUE;
   846     setRuleStringFromCollator();
   847 }
   849 #ifndef U_HIDE_INTERNAL_API
   850 inline const UCollator * RuleBasedCollator::getUCollator()
   851 {
   852     return ucollator;
   853 }
   854 #endif  /* U_HIDE_INTERNAL_API */
   856 U_NAMESPACE_END
   858 #endif /* #if !UCONFIG_NO_COLLATION */
   860 #endif

mercurial