michael@0: /* michael@0: ****************************************************************************** michael@0: * Copyright (C) 1996-2013, International Business Machines Corporation and michael@0: * others. All Rights Reserved. michael@0: ****************************************************************************** michael@0: */ michael@0: michael@0: /** michael@0: * \file michael@0: * \brief C++ API: RuleBasedCollator class provides the simple implementation of Collator. michael@0: */ michael@0: michael@0: /** michael@0: * File tblcoll.h michael@0: * michael@0: * Created by: Helena Shih michael@0: * michael@0: * Modification History: michael@0: * michael@0: * Date Name Description michael@0: * 2/5/97 aliu Added streamIn and streamOut methods. Added michael@0: * constructor which reads RuleBasedCollator object from michael@0: * a binary file. Added writeToFile method which streams michael@0: * RuleBasedCollator out to a binary file. The streamIn michael@0: * and streamOut methods use istream and ostream objects michael@0: * in binary mode. michael@0: * 2/12/97 aliu Modified to use TableCollationData sub-object to michael@0: * hold invariant data. michael@0: * 2/13/97 aliu Moved several methods into this class from Collation. michael@0: * Added a private RuleBasedCollator(Locale&) constructor, michael@0: * to be used by Collator::createDefault(). General michael@0: * clean up. michael@0: * 2/20/97 helena Added clone, operator==, operator!=, operator=, and copy michael@0: * constructor and getDynamicClassID. michael@0: * 3/5/97 aliu Modified constructFromFile() to add parameter michael@0: * specifying whether or not binary loading is to be michael@0: * attempted. This is required for dynamic rule loading. michael@0: * 05/07/97 helena Added memory allocation error detection. michael@0: * 6/17/97 helena Added IDENTICAL strength for compare, changed getRules to michael@0: * use MergeCollation::getPattern. michael@0: * 6/20/97 helena Java class name change. michael@0: * 8/18/97 helena Added internal API documentation. michael@0: * 09/03/97 helena Added createCollationKeyValues(). michael@0: * 02/10/98 damiba Added compare with "length" parameter michael@0: * 08/05/98 erm Synched with 1.2 version of RuleBasedCollator.java michael@0: * 04/23/99 stephen Removed EDecompositionMode, merged with michael@0: * Normalizer::EMode michael@0: * 06/14/99 stephen Removed kResourceBundleSuffix michael@0: * 11/02/99 helena Collator performance enhancements. Eliminates the michael@0: * UnicodeString construction and special case for NO_OP. michael@0: * 11/23/99 srl More performance enhancements. Updates to NormalizerIterator michael@0: * internal state management. michael@0: * 12/15/99 aliu Update to support Thai collation. Move NormalizerIterator michael@0: * to implementation file. michael@0: * 01/29/01 synwee Modified into a C++ wrapper which calls C API michael@0: * (ucol.h) michael@0: */ michael@0: michael@0: #ifndef TBLCOLL_H michael@0: #define TBLCOLL_H michael@0: michael@0: #include "unicode/utypes.h" michael@0: michael@0: michael@0: #if !UCONFIG_NO_COLLATION michael@0: michael@0: #include "unicode/coll.h" michael@0: #include "unicode/ucol.h" michael@0: #include "unicode/sortkey.h" michael@0: #include "unicode/normlzr.h" michael@0: michael@0: U_NAMESPACE_BEGIN michael@0: michael@0: /** michael@0: * @stable ICU 2.0 michael@0: */ michael@0: class StringSearch; michael@0: /** michael@0: * @stable ICU 2.0 michael@0: */ michael@0: class CollationElementIterator; michael@0: michael@0: /** michael@0: * The RuleBasedCollator class provides the simple implementation of michael@0: * Collator, using data-driven tables. The user can create a customized michael@0: * table-based collation. michael@0: *

michael@0: * Important: The ICU collation service has been reimplemented michael@0: * in order to achieve better performance and UCA compliance. michael@0: * For details, see the michael@0: * michael@0: * collation design document. michael@0: *

michael@0: * RuleBasedCollator is a thin C++ wrapper over the C implementation. michael@0: *

michael@0: * For more information about the collation service see michael@0: * the users guide. michael@0: *

michael@0: * Collation service provides correct sorting orders for most locales supported in ICU. michael@0: * If specific data for a locale is not available, the orders eventually falls back michael@0: * to the UCA sort order. michael@0: *

michael@0: * Sort ordering may be customized by providing your own set of rules. For more on michael@0: * this subject see the michael@0: * Collation customization section of the users guide. michael@0: *

michael@0: * Note, RuleBasedCollator is not to be subclassed. michael@0: * @see Collator michael@0: * @version 2.0 11/15/2001 michael@0: */ michael@0: class U_I18N_API RuleBasedCollator : public Collator michael@0: { michael@0: public: michael@0: michael@0: // constructor ------------------------------------------------------------- michael@0: michael@0: /** michael@0: * RuleBasedCollator constructor. This takes the table rules and builds a michael@0: * collation table out of them. Please see RuleBasedCollator class michael@0: * description for more details on the collation rule syntax. michael@0: * @param rules the collation rules to build the collation table from. michael@0: * @param status reporting a success or an error. michael@0: * @see Locale michael@0: * @stable ICU 2.0 michael@0: */ michael@0: RuleBasedCollator(const UnicodeString& rules, UErrorCode& status); michael@0: michael@0: /** michael@0: * RuleBasedCollator constructor. This takes the table rules and builds a michael@0: * collation table out of them. Please see RuleBasedCollator class michael@0: * description for more details on the collation rule syntax. michael@0: * @param rules the collation rules to build the collation table from. michael@0: * @param collationStrength default strength for comparison michael@0: * @param status reporting a success or an error. michael@0: * @see Locale michael@0: * @stable ICU 2.0 michael@0: */ michael@0: RuleBasedCollator(const UnicodeString& rules, michael@0: ECollationStrength collationStrength, michael@0: UErrorCode& status); michael@0: michael@0: /** michael@0: * RuleBasedCollator constructor. This takes the table rules and builds a michael@0: * collation table out of them. Please see RuleBasedCollator class michael@0: * description for more details on the collation rule syntax. michael@0: * @param rules the collation rules to build the collation table from. michael@0: * @param decompositionMode the normalisation mode michael@0: * @param status reporting a success or an error. michael@0: * @see Locale michael@0: * @stable ICU 2.0 michael@0: */ michael@0: RuleBasedCollator(const UnicodeString& rules, michael@0: UColAttributeValue decompositionMode, michael@0: UErrorCode& status); michael@0: michael@0: /** michael@0: * RuleBasedCollator constructor. This takes the table rules and builds a michael@0: * collation table out of them. Please see RuleBasedCollator class michael@0: * description for more details on the collation rule syntax. michael@0: * @param rules the collation rules to build the collation table from. michael@0: * @param collationStrength default strength for comparison michael@0: * @param decompositionMode the normalisation mode michael@0: * @param status reporting a success or an error. michael@0: * @see Locale michael@0: * @stable ICU 2.0 michael@0: */ michael@0: RuleBasedCollator(const UnicodeString& rules, michael@0: ECollationStrength collationStrength, michael@0: UColAttributeValue decompositionMode, michael@0: UErrorCode& status); michael@0: michael@0: /** michael@0: * Copy constructor. michael@0: * @param other the RuleBasedCollator object to be copied michael@0: * @see Locale michael@0: * @stable ICU 2.0 michael@0: */ michael@0: RuleBasedCollator(const RuleBasedCollator& other); michael@0: michael@0: michael@0: /** Opens a collator from a collator binary image created using michael@0: * cloneBinary. Binary image used in instantiation of the michael@0: * collator remains owned by the user and should stay around for michael@0: * the lifetime of the collator. The API also takes a base collator michael@0: * which usualy should be UCA. michael@0: * @param bin binary image owned by the user and required through the michael@0: * lifetime of the collator michael@0: * @param length size of the image. If negative, the API will try to michael@0: * figure out the length of the image michael@0: * @param base fallback collator, usually UCA. Base is required to be michael@0: * present through the lifetime of the collator. Currently michael@0: * it cannot be NULL. michael@0: * @param status for catching errors michael@0: * @return newly created collator michael@0: * @see cloneBinary michael@0: * @stable ICU 3.4 michael@0: */ michael@0: RuleBasedCollator(const uint8_t *bin, int32_t length, michael@0: const RuleBasedCollator *base, michael@0: UErrorCode &status); michael@0: // destructor -------------------------------------------------------------- michael@0: michael@0: /** michael@0: * Destructor. michael@0: * @stable ICU 2.0 michael@0: */ michael@0: virtual ~RuleBasedCollator(); michael@0: michael@0: // public methods ---------------------------------------------------------- michael@0: michael@0: /** michael@0: * Assignment operator. michael@0: * @param other other RuleBasedCollator object to compare with. michael@0: * @stable ICU 2.0 michael@0: */ michael@0: RuleBasedCollator& operator=(const RuleBasedCollator& other); michael@0: michael@0: /** michael@0: * Returns true if argument is the same as this object. michael@0: * @param other Collator object to be compared. michael@0: * @return true if arguments is the same as this object. michael@0: * @stable ICU 2.0 michael@0: */ michael@0: virtual UBool operator==(const Collator& other) const; michael@0: michael@0: /** michael@0: * Makes a copy of this object. michael@0: * @return a copy of this object, owned by the caller michael@0: * @stable ICU 2.0 michael@0: */ michael@0: virtual Collator* clone(void) const; michael@0: michael@0: /** michael@0: * Creates a collation element iterator for the source string. The caller of michael@0: * this method is responsible for the memory management of the return michael@0: * pointer. michael@0: * @param source the string over which the CollationElementIterator will michael@0: * iterate. michael@0: * @return the collation element iterator of the source string using this as michael@0: * the based Collator. michael@0: * @stable ICU 2.2 michael@0: */ michael@0: virtual CollationElementIterator* createCollationElementIterator( michael@0: const UnicodeString& source) const; michael@0: michael@0: /** michael@0: * Creates a collation element iterator for the source. The caller of this michael@0: * method is responsible for the memory management of the returned pointer. michael@0: * @param source the CharacterIterator which produces the characters over michael@0: * which the CollationElementItgerator will iterate. michael@0: * @return the collation element iterator of the source using this as the michael@0: * based Collator. michael@0: * @stable ICU 2.2 michael@0: */ michael@0: virtual CollationElementIterator* createCollationElementIterator( michael@0: const CharacterIterator& source) const; michael@0: michael@0: // Make deprecated versions of Collator::compare() visible. michael@0: using Collator::compare; michael@0: michael@0: /** michael@0: * The comparison function compares the character data stored in two michael@0: * different strings. Returns information about whether a string is less michael@0: * than, greater than or equal to another string. michael@0: * @param source the source string to be compared with. michael@0: * @param target the string that is to be compared with the source string. michael@0: * @param status possible error code michael@0: * @return Returns an enum value. UCOL_GREATER if source is greater michael@0: * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less michael@0: * than target michael@0: * @stable ICU 2.6 michael@0: **/ michael@0: virtual UCollationResult compare(const UnicodeString& source, michael@0: const UnicodeString& target, michael@0: UErrorCode &status) const; michael@0: michael@0: /** michael@0: * Does the same thing as compare but limits the comparison to a specified michael@0: * length michael@0: * @param source the source string to be compared with. michael@0: * @param target the string that is to be compared with the source string. michael@0: * @param length the length the comparison is limited to michael@0: * @param status possible error code michael@0: * @return Returns an enum value. UCOL_GREATER if source (up to the specified michael@0: * length) is greater than target; UCOL_EQUAL if source (up to specified michael@0: * length) is equal to target; UCOL_LESS if source (up to the specified michael@0: * length) is less than target. michael@0: * @stable ICU 2.6 michael@0: */ michael@0: virtual UCollationResult compare(const UnicodeString& source, michael@0: const UnicodeString& target, michael@0: int32_t length, michael@0: UErrorCode &status) const; michael@0: michael@0: /** michael@0: * The comparison function compares the character data stored in two michael@0: * different string arrays. Returns information about whether a string array michael@0: * is less than, greater than or equal to another string array. michael@0: * @param source the source string array to be compared with. michael@0: * @param sourceLength the length of the source string array. If this value michael@0: * is equal to -1, the string array is null-terminated. michael@0: * @param target the string that is to be compared with the source string. michael@0: * @param targetLength the length of the target string array. If this value michael@0: * is equal to -1, the string array is null-terminated. michael@0: * @param status possible error code michael@0: * @return Returns an enum value. UCOL_GREATER if source is greater michael@0: * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less michael@0: * than target michael@0: * @stable ICU 2.6 michael@0: */ michael@0: virtual UCollationResult compare(const UChar* source, int32_t sourceLength, michael@0: const UChar* target, int32_t targetLength, michael@0: UErrorCode &status) const; michael@0: michael@0: /** michael@0: * Compares two strings using the Collator. michael@0: * Returns whether the first one compares less than/equal to/greater than michael@0: * the second one. michael@0: * This version takes UCharIterator input. michael@0: * @param sIter the first ("source") string iterator michael@0: * @param tIter the second ("target") string iterator michael@0: * @param status ICU status michael@0: * @return UCOL_LESS, UCOL_EQUAL or UCOL_GREATER michael@0: * @stable ICU 4.2 michael@0: */ michael@0: virtual UCollationResult compare(UCharIterator &sIter, michael@0: UCharIterator &tIter, michael@0: UErrorCode &status) const; michael@0: michael@0: /** michael@0: * Transforms a specified region of the string into a series of characters michael@0: * that can be compared with CollationKey.compare. Use a CollationKey when michael@0: * you need to do repeated comparisions on the same string. For a single michael@0: * comparison the compare method will be faster. michael@0: * @param source the source string. michael@0: * @param key the transformed key of the source string. michael@0: * @param status the error code status. michael@0: * @return the transformed key. michael@0: * @see CollationKey michael@0: * @stable ICU 2.0 michael@0: */ michael@0: virtual CollationKey& getCollationKey(const UnicodeString& source, michael@0: CollationKey& key, michael@0: UErrorCode& status) const; michael@0: michael@0: /** michael@0: * Transforms a specified region of the string into a series of characters michael@0: * that can be compared with CollationKey.compare. Use a CollationKey when michael@0: * you need to do repeated comparisions on the same string. For a single michael@0: * comparison the compare method will be faster. michael@0: * @param source the source string. michael@0: * @param sourceLength the length of the source string. michael@0: * @param key the transformed key of the source string. michael@0: * @param status the error code status. michael@0: * @return the transformed key. michael@0: * @see CollationKey michael@0: * @stable ICU 2.0 michael@0: */ michael@0: virtual CollationKey& getCollationKey(const UChar *source, michael@0: int32_t sourceLength, michael@0: CollationKey& key, michael@0: UErrorCode& status) const; michael@0: michael@0: /** michael@0: * Generates the hash code for the rule-based collation object. michael@0: * @return the hash code. michael@0: * @stable ICU 2.0 michael@0: */ michael@0: virtual int32_t hashCode(void) const; michael@0: michael@0: /** michael@0: * Gets the locale of the Collator michael@0: * @param type can be either requested, valid or actual locale. For more michael@0: * information see the definition of ULocDataLocaleType in michael@0: * uloc.h michael@0: * @param status the error code status. michael@0: * @return locale where the collation data lives. If the collator michael@0: * was instantiated from rules, locale is empty. michael@0: * @deprecated ICU 2.8 likely to change in ICU 3.0, based on feedback michael@0: */ michael@0: virtual Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const; michael@0: michael@0: /** michael@0: * Gets the tailoring rules for this collator. michael@0: * @return the collation tailoring from which this collator was created michael@0: * @stable ICU 2.0 michael@0: */ michael@0: const UnicodeString& getRules(void) const; michael@0: michael@0: /** michael@0: * Gets the version information for a Collator. michael@0: * @param info the version # information, the result will be filled in michael@0: * @stable ICU 2.0 michael@0: */ michael@0: virtual void getVersion(UVersionInfo info) const; michael@0: michael@0: #ifndef U_HIDE_DEPRECATED_API michael@0: /** michael@0: * Returns the maximum length of any expansion sequences that end with the michael@0: * specified comparison order. michael@0: * michael@0: * This is specific to the kind of collation element values and sequences michael@0: * returned by the CollationElementIterator. michael@0: * Call CollationElementIterator::getMaxExpansion() instead. michael@0: * michael@0: * @param order a collation order returned by CollationElementIterator::previous michael@0: * or CollationElementIterator::next. michael@0: * @return maximum size of the expansion sequences ending with the collation michael@0: * element, or 1 if the collation element does not occur at the end of michael@0: * any expansion sequence michael@0: * @see CollationElementIterator#getMaxExpansion michael@0: * @deprecated ICU 51 Use CollationElementIterator::getMaxExpansion() instead. michael@0: */ michael@0: int32_t getMaxExpansion(int32_t order) const; michael@0: #endif /* U_HIDE_DEPRECATED_API */ michael@0: michael@0: /** michael@0: * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This michael@0: * method is to implement a simple version of RTTI, since not all C++ michael@0: * compilers support genuine RTTI. Polymorphic operator==() and clone() michael@0: * methods call this method. michael@0: * @return The class ID for this object. All objects of a given class have michael@0: * the same class ID. Objects of other classes have different class michael@0: * IDs. michael@0: * @stable ICU 2.0 michael@0: */ michael@0: virtual UClassID getDynamicClassID(void) const; michael@0: michael@0: /** michael@0: * Returns the class ID for this class. This is useful only for comparing to michael@0: * a return value from getDynamicClassID(). For example: michael@0: *

michael@0:      * Base* polymorphic_pointer = createPolymorphicObject();
michael@0:      * if (polymorphic_pointer->getDynamicClassID() ==
michael@0:      *                                          Derived::getStaticClassID()) ...
michael@0:      * 
michael@0: * @return The class ID for all objects of this class. michael@0: * @stable ICU 2.0 michael@0: */ michael@0: static UClassID U_EXPORT2 getStaticClassID(void); michael@0: michael@0: #ifndef U_HIDE_DEPRECATED_API michael@0: /** michael@0: * Do not use this method: The caller and the ICU library might use different heaps. michael@0: * Use cloneBinary() instead which writes to caller-provided memory. michael@0: * michael@0: * Returns a binary format of this collator. michael@0: * @param length Returns the length of the data, in bytes michael@0: * @param status the error code status. michael@0: * @return memory, owned by the caller, of size 'length' bytes. michael@0: * @deprecated ICU 52. Use cloneBinary() instead. michael@0: */ michael@0: uint8_t *cloneRuleData(int32_t &length, UErrorCode &status); michael@0: #endif /* U_HIDE_DEPRECATED_API */ michael@0: michael@0: /** Creates a binary image of a collator. This binary image can be stored and michael@0: * later used to instantiate a collator using ucol_openBinary. michael@0: * This API supports preflighting. michael@0: * @param buffer a fill-in buffer to receive the binary image michael@0: * @param capacity capacity of the destination buffer michael@0: * @param status for catching errors michael@0: * @return size of the image michael@0: * @see ucol_openBinary michael@0: * @stable ICU 3.4 michael@0: */ michael@0: int32_t cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status); michael@0: michael@0: /** michael@0: * Returns current rules. Delta defines whether full rules are returned or michael@0: * just the tailoring. michael@0: * michael@0: * getRules(void) should normally be used instead. michael@0: * See http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales michael@0: * @param delta one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES. michael@0: * @param buffer UnicodeString to store the result rules michael@0: * @stable ICU 2.2 michael@0: * @see UCOL_FULL_RULES michael@0: */ michael@0: void getRules(UColRuleOption delta, UnicodeString &buffer); michael@0: michael@0: /** michael@0: * Universal attribute setter michael@0: * @param attr attribute type michael@0: * @param value attribute value michael@0: * @param status to indicate whether the operation went on smoothly or there were errors michael@0: * @stable ICU 2.2 michael@0: */ michael@0: virtual void setAttribute(UColAttribute attr, UColAttributeValue value, michael@0: UErrorCode &status); michael@0: michael@0: /** michael@0: * Universal attribute getter. michael@0: * @param attr attribute type michael@0: * @param status to indicate whether the operation went on smoothly or there were errors michael@0: * @return attribute value michael@0: * @stable ICU 2.2 michael@0: */ michael@0: virtual UColAttributeValue getAttribute(UColAttribute attr, michael@0: UErrorCode &status) const; michael@0: michael@0: /** michael@0: * Sets the variable top to a collation element value of a string supplied. michael@0: * @param varTop one or more (if contraction) UChars to which the variable top should be set michael@0: * @param len length of variable top string. If -1 it is considered to be zero terminated. michael@0: * @param status error code. If error code is set, the return value is undefined. Errors set by this function are:
michael@0: * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction
michael@0: * U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes michael@0: * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined michael@0: * @stable ICU 2.0 michael@0: */ michael@0: virtual uint32_t setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status); michael@0: michael@0: /** michael@0: * Sets the variable top to a collation element value of a string supplied. michael@0: * @param varTop an UnicodeString size 1 or more (if contraction) of UChars to which the variable top should be set michael@0: * @param status error code. If error code is set, the return value is undefined. Errors set by this function are:
michael@0: * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction
michael@0: * U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes michael@0: * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined michael@0: * @stable ICU 2.0 michael@0: */ michael@0: virtual uint32_t setVariableTop(const UnicodeString &varTop, UErrorCode &status); michael@0: michael@0: /** michael@0: * Sets the variable top to a collation element value supplied. Variable top is set to the upper 16 bits. michael@0: * Lower 16 bits are ignored. michael@0: * @param varTop CE value, as returned by setVariableTop or ucol)getVariableTop michael@0: * @param status error code (not changed by function) michael@0: * @stable ICU 2.0 michael@0: */ michael@0: virtual void setVariableTop(uint32_t varTop, UErrorCode &status); michael@0: michael@0: /** michael@0: * Gets the variable top value of a Collator. michael@0: * Lower 16 bits are undefined and should be ignored. michael@0: * @param status error code (not changed by function). If error code is set, the return value is undefined. michael@0: * @stable ICU 2.0 michael@0: */ michael@0: virtual uint32_t getVariableTop(UErrorCode &status) const; michael@0: michael@0: /** michael@0: * Get an UnicodeSet that contains all the characters and sequences tailored in michael@0: * this collator. michael@0: * @param status error code of the operation michael@0: * @return a pointer to a UnicodeSet object containing all the michael@0: * code points and sequences that may sort differently than michael@0: * in the UCA. The object must be disposed of by using delete michael@0: * @stable ICU 2.4 michael@0: */ michael@0: virtual UnicodeSet *getTailoredSet(UErrorCode &status) const; michael@0: michael@0: /** michael@0: * Get the sort key as an array of bytes from an UnicodeString. michael@0: * @param source string to be processed. michael@0: * @param result buffer to store result in. If NULL, number of bytes needed michael@0: * will be returned. michael@0: * @param resultLength length of the result buffer. If if not enough the michael@0: * buffer will be filled to capacity. michael@0: * @return Number of bytes needed for storing the sort key michael@0: * @stable ICU 2.0 michael@0: */ michael@0: virtual int32_t getSortKey(const UnicodeString& source, uint8_t *result, michael@0: int32_t resultLength) const; michael@0: michael@0: /** michael@0: * Get the sort key as an array of bytes from an UChar buffer. michael@0: * @param source string to be processed. michael@0: * @param sourceLength length of string to be processed. If -1, the string michael@0: * is 0 terminated and length will be decided by the function. michael@0: * @param result buffer to store result in. If NULL, number of bytes needed michael@0: * will be returned. michael@0: * @param resultLength length of the result buffer. If if not enough the michael@0: * buffer will be filled to capacity. michael@0: * @return Number of bytes needed for storing the sort key michael@0: * @stable ICU 2.2 michael@0: */ michael@0: virtual int32_t getSortKey(const UChar *source, int32_t sourceLength, michael@0: uint8_t *result, int32_t resultLength) const; michael@0: michael@0: /** michael@0: * Retrieves the reordering codes for this collator. michael@0: * @param dest The array to fill with the script ordering. michael@0: * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function michael@0: * will only return the length of the result without writing any of the result string (pre-flighting). michael@0: * @param status A reference to an error code value, which must not indicate michael@0: * a failure before the function call. michael@0: * @return The length of the script ordering array. michael@0: * @see ucol_setReorderCodes michael@0: * @see Collator#getEquivalentReorderCodes michael@0: * @see Collator#setReorderCodes michael@0: * @stable ICU 4.8 michael@0: */ michael@0: virtual int32_t getReorderCodes(int32_t *dest, michael@0: int32_t destCapacity, michael@0: UErrorCode& status) const; michael@0: michael@0: /** michael@0: * Sets the ordering of scripts for this collator. michael@0: * @param reorderCodes An array of script codes in the new order. This can be NULL if the michael@0: * length is also set to 0. An empty array will clear any reordering codes on the collator. michael@0: * @param reorderCodesLength The length of reorderCodes. michael@0: * @param status error code michael@0: * @see Collator#getReorderCodes michael@0: * @see Collator#getEquivalentReorderCodes michael@0: * @stable ICU 4.8 michael@0: */ michael@0: virtual void setReorderCodes(const int32_t* reorderCodes, michael@0: int32_t reorderCodesLength, michael@0: UErrorCode& status) ; michael@0: michael@0: /** michael@0: * Retrieves the reorder codes that are grouped with the given reorder code. Some reorder michael@0: * codes will be grouped and must reorder together. michael@0: * @param reorderCode The reorder code to determine equivalence for. michael@0: * @param dest The array to fill with the script equivalene reordering codes. michael@0: * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the michael@0: * function will only return the length of the result without writing any of the result michael@0: * string (pre-flighting). michael@0: * @param status A reference to an error code value, which must not indicate michael@0: * a failure before the function call. michael@0: * @return The length of the of the reordering code equivalence array. michael@0: * @see ucol_setReorderCodes michael@0: * @see Collator#getReorderCodes michael@0: * @see Collator#setReorderCodes michael@0: * @stable ICU 4.8 michael@0: */ michael@0: static int32_t U_EXPORT2 getEquivalentReorderCodes(int32_t reorderCode, michael@0: int32_t* dest, michael@0: int32_t destCapacity, michael@0: UErrorCode& status); michael@0: michael@0: private: michael@0: michael@0: // private static constants ----------------------------------------------- michael@0: michael@0: enum { michael@0: /* need look up in .commit() */ michael@0: CHARINDEX = 0x70000000, michael@0: /* Expand index follows */ michael@0: EXPANDCHARINDEX = 0x7E000000, michael@0: /* contract indexes follows */ michael@0: CONTRACTCHARINDEX = 0x7F000000, michael@0: /* unmapped character values */ michael@0: UNMAPPED = 0xFFFFFFFF, michael@0: /* primary strength increment */ michael@0: PRIMARYORDERINCREMENT = 0x00010000, michael@0: /* secondary strength increment */ michael@0: SECONDARYORDERINCREMENT = 0x00000100, michael@0: /* tertiary strength increment */ michael@0: TERTIARYORDERINCREMENT = 0x00000001, michael@0: /* mask off anything but primary order */ michael@0: PRIMARYORDERMASK = 0xffff0000, michael@0: /* mask off anything but secondary order */ michael@0: SECONDARYORDERMASK = 0x0000ff00, michael@0: /* mask off anything but tertiary order */ michael@0: TERTIARYORDERMASK = 0x000000ff, michael@0: /* mask off ignorable char order */ michael@0: IGNORABLEMASK = 0x0000ffff, michael@0: /* use only the primary difference */ michael@0: PRIMARYDIFFERENCEONLY = 0xffff0000, michael@0: /* use only the primary and secondary difference */ michael@0: SECONDARYDIFFERENCEONLY = 0xffffff00, michael@0: /* primary order shift */ michael@0: PRIMARYORDERSHIFT = 16, michael@0: /* secondary order shift */ michael@0: SECONDARYORDERSHIFT = 8, michael@0: /* starting value for collation elements */ michael@0: COLELEMENTSTART = 0x02020202, michael@0: /* testing mask for primary low element */ michael@0: PRIMARYLOWZEROMASK = 0x00FF0000, michael@0: /* reseting value for secondaries and tertiaries */ michael@0: RESETSECONDARYTERTIARY = 0x00000202, michael@0: /* reseting value for tertiaries */ michael@0: RESETTERTIARY = 0x00000002, michael@0: michael@0: PRIMIGNORABLE = 0x0202 michael@0: }; michael@0: michael@0: // private data members --------------------------------------------------- michael@0: michael@0: UBool dataIsOwned; michael@0: michael@0: UBool isWriteThroughAlias; michael@0: michael@0: /** michael@0: * c struct for collation. All initialisation for it has to be done through michael@0: * setUCollator(). michael@0: */ michael@0: UCollator *ucollator; michael@0: michael@0: /** michael@0: * Rule UnicodeString michael@0: */ michael@0: UnicodeString urulestring; michael@0: michael@0: // friend classes -------------------------------------------------------- michael@0: michael@0: /** michael@0: * Used to iterate over collation elements in a character source. michael@0: */ michael@0: friend class CollationElementIterator; michael@0: michael@0: /** michael@0: * Collator ONLY needs access to RuleBasedCollator(const Locale&, michael@0: * UErrorCode&) michael@0: */ michael@0: friend class Collator; michael@0: michael@0: /** michael@0: * Searching over collation elements in a character source michael@0: */ michael@0: friend class StringSearch; michael@0: michael@0: // private constructors -------------------------------------------------- michael@0: michael@0: /** michael@0: * Default constructor michael@0: */ michael@0: RuleBasedCollator(); michael@0: michael@0: /** michael@0: * RuleBasedCollator constructor. This constructor takes a locale. The michael@0: * only caller of this class should be Collator::createInstance(). If michael@0: * createInstance() happens to know that the requested locale's collation is michael@0: * implemented as a RuleBasedCollator, it can then call this constructor. michael@0: * OTHERWISE IT SHOULDN'T, since this constructor ALWAYS RETURNS A VALID michael@0: * COLLATION TABLE. It does this by falling back to defaults. michael@0: * @param desiredLocale locale used michael@0: * @param status error code status michael@0: */ michael@0: RuleBasedCollator(const Locale& desiredLocale, UErrorCode& status); michael@0: michael@0: /** michael@0: * common constructor implementation michael@0: * michael@0: * @param rules the collation rules to build the collation table from. michael@0: * @param collationStrength default strength for comparison michael@0: * @param decompositionMode the normalisation mode michael@0: * @param status reporting a success or an error. michael@0: */ michael@0: void michael@0: construct(const UnicodeString& rules, michael@0: UColAttributeValue collationStrength, michael@0: UColAttributeValue decompositionMode, michael@0: UErrorCode& status); michael@0: michael@0: // private methods ------------------------------------------------------- michael@0: michael@0: /** michael@0: * Creates the c struct for ucollator michael@0: * @param locale desired locale michael@0: * @param status error status michael@0: */ michael@0: void setUCollator(const Locale& locale, UErrorCode& status); michael@0: michael@0: /** michael@0: * Creates the c struct for ucollator michael@0: * @param locale desired locale name michael@0: * @param status error status michael@0: */ michael@0: void setUCollator(const char* locale, UErrorCode& status); michael@0: michael@0: /** michael@0: * Creates the c struct for ucollator. This used internally by StringSearch. michael@0: * Hence the responsibility of cleaning up the ucollator is not done by michael@0: * this RuleBasedCollator. The isDataOwned flag is set to FALSE. michael@0: * @param collator new ucollator data michael@0: */ michael@0: void setUCollator(UCollator *collator); michael@0: michael@0: public: michael@0: #ifndef U_HIDE_INTERNAL_API michael@0: /** michael@0: * Get UCollator data struct. Used only by StringSearch & intltest. michael@0: * @return UCollator data struct michael@0: * @internal michael@0: */ michael@0: const UCollator * getUCollator(); michael@0: #endif /* U_HIDE_INTERNAL_API */ michael@0: michael@0: protected: michael@0: /** michael@0: * Used internally by registraton to define the requested and valid locales. michael@0: * @param requestedLocale the requsted locale michael@0: * @param validLocale the valid locale michael@0: * @param actualLocale the actual locale michael@0: * @internal michael@0: */ michael@0: virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale); michael@0: michael@0: private: michael@0: // if not owned and not a write through alias, copy the ucollator michael@0: void checkOwned(void); michael@0: michael@0: // utility to init rule string used by checkOwned and construct michael@0: void setRuleStringFromCollator(); michael@0: michael@0: public: michael@0: /** Get the short definition string for a collator. This internal API harvests the collator's michael@0: * locale and the attribute set and produces a string that can be used for opening michael@0: * a collator with the same properties using the ucol_openFromShortString API. michael@0: * This string will be normalized. michael@0: * The structure and the syntax of the string is defined in the "Naming collators" michael@0: * section of the users guide: michael@0: * http://icu-project.org/userguide/Collate_Concepts.html#Naming_Collators michael@0: * This function supports preflighting. michael@0: * michael@0: * This is internal, and intended to be used with delegate converters. michael@0: * michael@0: * @param locale a locale that will appear as a collators locale in the resulting michael@0: * short string definition. If NULL, the locale will be harvested michael@0: * from the collator. michael@0: * @param buffer space to hold the resulting string michael@0: * @param capacity capacity of the buffer michael@0: * @param status for returning errors. All the preflighting errors are featured michael@0: * @return length of the resulting string michael@0: * @see ucol_openFromShortString michael@0: * @see ucol_normalizeShortDefinitionString michael@0: * @see ucol_getShortDefinitionString michael@0: * @internal michael@0: */ michael@0: virtual int32_t internalGetShortDefinitionString(const char *locale, michael@0: char *buffer, michael@0: int32_t capacity, michael@0: UErrorCode &status) const; michael@0: }; michael@0: michael@0: // inline method implementation --------------------------------------------- michael@0: michael@0: inline void RuleBasedCollator::setUCollator(const Locale &locale, michael@0: UErrorCode &status) michael@0: { michael@0: setUCollator(locale.getName(), status); michael@0: } michael@0: michael@0: michael@0: inline void RuleBasedCollator::setUCollator(UCollator *collator) michael@0: { michael@0: michael@0: if (ucollator && dataIsOwned) { michael@0: ucol_close(ucollator); michael@0: } michael@0: ucollator = collator; michael@0: dataIsOwned = FALSE; michael@0: isWriteThroughAlias = TRUE; michael@0: setRuleStringFromCollator(); michael@0: } michael@0: michael@0: #ifndef U_HIDE_INTERNAL_API michael@0: inline const UCollator * RuleBasedCollator::getUCollator() michael@0: { michael@0: return ucollator; michael@0: } michael@0: #endif /* U_HIDE_INTERNAL_API */ michael@0: michael@0: U_NAMESPACE_END michael@0: michael@0: #endif /* #if !UCONFIG_NO_COLLATION */ michael@0: michael@0: #endif