1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/i18n/unicode/tblcoll.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,860 @@ 1.4 +/* 1.5 +****************************************************************************** 1.6 +* Copyright (C) 1996-2013, International Business Machines Corporation and 1.7 +* others. All Rights Reserved. 1.8 +****************************************************************************** 1.9 +*/ 1.10 + 1.11 +/** 1.12 + * \file 1.13 + * \brief C++ API: RuleBasedCollator class provides the simple implementation of Collator. 1.14 + */ 1.15 + 1.16 +/** 1.17 +* File tblcoll.h 1.18 +* 1.19 +* Created by: Helena Shih 1.20 +* 1.21 +* Modification History: 1.22 +* 1.23 +* Date Name Description 1.24 +* 2/5/97 aliu Added streamIn and streamOut methods. Added 1.25 +* constructor which reads RuleBasedCollator object from 1.26 +* a binary file. Added writeToFile method which streams 1.27 +* RuleBasedCollator out to a binary file. The streamIn 1.28 +* and streamOut methods use istream and ostream objects 1.29 +* in binary mode. 1.30 +* 2/12/97 aliu Modified to use TableCollationData sub-object to 1.31 +* hold invariant data. 1.32 +* 2/13/97 aliu Moved several methods into this class from Collation. 1.33 +* Added a private RuleBasedCollator(Locale&) constructor, 1.34 +* to be used by Collator::createDefault(). General 1.35 +* clean up. 1.36 +* 2/20/97 helena Added clone, operator==, operator!=, operator=, and copy 1.37 +* constructor and getDynamicClassID. 1.38 +* 3/5/97 aliu Modified constructFromFile() to add parameter 1.39 +* specifying whether or not binary loading is to be 1.40 +* attempted. This is required for dynamic rule loading. 1.41 +* 05/07/97 helena Added memory allocation error detection. 1.42 +* 6/17/97 helena Added IDENTICAL strength for compare, changed getRules to 1.43 +* use MergeCollation::getPattern. 1.44 +* 6/20/97 helena Java class name change. 1.45 +* 8/18/97 helena Added internal API documentation. 1.46 +* 09/03/97 helena Added createCollationKeyValues(). 1.47 +* 02/10/98 damiba Added compare with "length" parameter 1.48 +* 08/05/98 erm Synched with 1.2 version of RuleBasedCollator.java 1.49 +* 04/23/99 stephen Removed EDecompositionMode, merged with 1.50 +* Normalizer::EMode 1.51 +* 06/14/99 stephen Removed kResourceBundleSuffix 1.52 +* 11/02/99 helena Collator performance enhancements. Eliminates the 1.53 +* UnicodeString construction and special case for NO_OP. 1.54 +* 11/23/99 srl More performance enhancements. Updates to NormalizerIterator 1.55 +* internal state management. 1.56 +* 12/15/99 aliu Update to support Thai collation. Move NormalizerIterator 1.57 +* to implementation file. 1.58 +* 01/29/01 synwee Modified into a C++ wrapper which calls C API 1.59 +* (ucol.h) 1.60 +*/ 1.61 + 1.62 +#ifndef TBLCOLL_H 1.63 +#define TBLCOLL_H 1.64 + 1.65 +#include "unicode/utypes.h" 1.66 + 1.67 + 1.68 +#if !UCONFIG_NO_COLLATION 1.69 + 1.70 +#include "unicode/coll.h" 1.71 +#include "unicode/ucol.h" 1.72 +#include "unicode/sortkey.h" 1.73 +#include "unicode/normlzr.h" 1.74 + 1.75 +U_NAMESPACE_BEGIN 1.76 + 1.77 +/** 1.78 +* @stable ICU 2.0 1.79 +*/ 1.80 +class StringSearch; 1.81 +/** 1.82 +* @stable ICU 2.0 1.83 +*/ 1.84 +class CollationElementIterator; 1.85 + 1.86 +/** 1.87 + * The RuleBasedCollator class provides the simple implementation of 1.88 + * Collator, using data-driven tables. The user can create a customized 1.89 + * table-based collation. 1.90 + * <P> 1.91 + * <em>Important: </em>The ICU collation service has been reimplemented 1.92 + * in order to achieve better performance and UCA compliance. 1.93 + * For details, see the 1.94 + * <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm"> 1.95 + * collation design document</a>. 1.96 + * <p> 1.97 + * RuleBasedCollator is a thin C++ wrapper over the C implementation. 1.98 + * <p> 1.99 + * For more information about the collation service see 1.100 + * <a href="http://icu-project.org/userguide/Collate_Intro.html">the users guide</a>. 1.101 + * <p> 1.102 + * Collation service provides correct sorting orders for most locales supported in ICU. 1.103 + * If specific data for a locale is not available, the orders eventually falls back 1.104 + * to the <a href="http://www.unicode.org/unicode/reports/tr10/">UCA sort order</a>. 1.105 + * <p> 1.106 + * Sort ordering may be customized by providing your own set of rules. For more on 1.107 + * this subject see the <a href="http://icu-project.org/userguide/Collate_Customization.html"> 1.108 + * Collation customization</a> section of the users guide. 1.109 + * <p> 1.110 + * Note, RuleBasedCollator is not to be subclassed. 1.111 + * @see Collator 1.112 + * @version 2.0 11/15/2001 1.113 + */ 1.114 +class U_I18N_API RuleBasedCollator : public Collator 1.115 +{ 1.116 +public: 1.117 + 1.118 + // constructor ------------------------------------------------------------- 1.119 + 1.120 + /** 1.121 + * RuleBasedCollator constructor. This takes the table rules and builds a 1.122 + * collation table out of them. Please see RuleBasedCollator class 1.123 + * description for more details on the collation rule syntax. 1.124 + * @param rules the collation rules to build the collation table from. 1.125 + * @param status reporting a success or an error. 1.126 + * @see Locale 1.127 + * @stable ICU 2.0 1.128 + */ 1.129 + RuleBasedCollator(const UnicodeString& rules, UErrorCode& status); 1.130 + 1.131 + /** 1.132 + * RuleBasedCollator constructor. This takes the table rules and builds a 1.133 + * collation table out of them. Please see RuleBasedCollator class 1.134 + * description for more details on the collation rule syntax. 1.135 + * @param rules the collation rules to build the collation table from. 1.136 + * @param collationStrength default strength for comparison 1.137 + * @param status reporting a success or an error. 1.138 + * @see Locale 1.139 + * @stable ICU 2.0 1.140 + */ 1.141 + RuleBasedCollator(const UnicodeString& rules, 1.142 + ECollationStrength collationStrength, 1.143 + UErrorCode& status); 1.144 + 1.145 + /** 1.146 + * RuleBasedCollator constructor. This takes the table rules and builds a 1.147 + * collation table out of them. Please see RuleBasedCollator class 1.148 + * description for more details on the collation rule syntax. 1.149 + * @param rules the collation rules to build the collation table from. 1.150 + * @param decompositionMode the normalisation mode 1.151 + * @param status reporting a success or an error. 1.152 + * @see Locale 1.153 + * @stable ICU 2.0 1.154 + */ 1.155 + RuleBasedCollator(const UnicodeString& rules, 1.156 + UColAttributeValue decompositionMode, 1.157 + UErrorCode& status); 1.158 + 1.159 + /** 1.160 + * RuleBasedCollator constructor. This takes the table rules and builds a 1.161 + * collation table out of them. Please see RuleBasedCollator class 1.162 + * description for more details on the collation rule syntax. 1.163 + * @param rules the collation rules to build the collation table from. 1.164 + * @param collationStrength default strength for comparison 1.165 + * @param decompositionMode the normalisation mode 1.166 + * @param status reporting a success or an error. 1.167 + * @see Locale 1.168 + * @stable ICU 2.0 1.169 + */ 1.170 + RuleBasedCollator(const UnicodeString& rules, 1.171 + ECollationStrength collationStrength, 1.172 + UColAttributeValue decompositionMode, 1.173 + UErrorCode& status); 1.174 + 1.175 + /** 1.176 + * Copy constructor. 1.177 + * @param other the RuleBasedCollator object to be copied 1.178 + * @see Locale 1.179 + * @stable ICU 2.0 1.180 + */ 1.181 + RuleBasedCollator(const RuleBasedCollator& other); 1.182 + 1.183 + 1.184 + /** Opens a collator from a collator binary image created using 1.185 + * cloneBinary. Binary image used in instantiation of the 1.186 + * collator remains owned by the user and should stay around for 1.187 + * the lifetime of the collator. The API also takes a base collator 1.188 + * which usualy should be UCA. 1.189 + * @param bin binary image owned by the user and required through the 1.190 + * lifetime of the collator 1.191 + * @param length size of the image. If negative, the API will try to 1.192 + * figure out the length of the image 1.193 + * @param base fallback collator, usually UCA. Base is required to be 1.194 + * present through the lifetime of the collator. Currently 1.195 + * it cannot be NULL. 1.196 + * @param status for catching errors 1.197 + * @return newly created collator 1.198 + * @see cloneBinary 1.199 + * @stable ICU 3.4 1.200 + */ 1.201 + RuleBasedCollator(const uint8_t *bin, int32_t length, 1.202 + const RuleBasedCollator *base, 1.203 + UErrorCode &status); 1.204 + // destructor -------------------------------------------------------------- 1.205 + 1.206 + /** 1.207 + * Destructor. 1.208 + * @stable ICU 2.0 1.209 + */ 1.210 + virtual ~RuleBasedCollator(); 1.211 + 1.212 + // public methods ---------------------------------------------------------- 1.213 + 1.214 + /** 1.215 + * Assignment operator. 1.216 + * @param other other RuleBasedCollator object to compare with. 1.217 + * @stable ICU 2.0 1.218 + */ 1.219 + RuleBasedCollator& operator=(const RuleBasedCollator& other); 1.220 + 1.221 + /** 1.222 + * Returns true if argument is the same as this object. 1.223 + * @param other Collator object to be compared. 1.224 + * @return true if arguments is the same as this object. 1.225 + * @stable ICU 2.0 1.226 + */ 1.227 + virtual UBool operator==(const Collator& other) const; 1.228 + 1.229 + /** 1.230 + * Makes a copy of this object. 1.231 + * @return a copy of this object, owned by the caller 1.232 + * @stable ICU 2.0 1.233 + */ 1.234 + virtual Collator* clone(void) const; 1.235 + 1.236 + /** 1.237 + * Creates a collation element iterator for the source string. The caller of 1.238 + * this method is responsible for the memory management of the return 1.239 + * pointer. 1.240 + * @param source the string over which the CollationElementIterator will 1.241 + * iterate. 1.242 + * @return the collation element iterator of the source string using this as 1.243 + * the based Collator. 1.244 + * @stable ICU 2.2 1.245 + */ 1.246 + virtual CollationElementIterator* createCollationElementIterator( 1.247 + const UnicodeString& source) const; 1.248 + 1.249 + /** 1.250 + * Creates a collation element iterator for the source. The caller of this 1.251 + * method is responsible for the memory management of the returned pointer. 1.252 + * @param source the CharacterIterator which produces the characters over 1.253 + * which the CollationElementItgerator will iterate. 1.254 + * @return the collation element iterator of the source using this as the 1.255 + * based Collator. 1.256 + * @stable ICU 2.2 1.257 + */ 1.258 + virtual CollationElementIterator* createCollationElementIterator( 1.259 + const CharacterIterator& source) const; 1.260 + 1.261 + // Make deprecated versions of Collator::compare() visible. 1.262 + using Collator::compare; 1.263 + 1.264 + /** 1.265 + * The comparison function compares the character data stored in two 1.266 + * different strings. Returns information about whether a string is less 1.267 + * than, greater than or equal to another string. 1.268 + * @param source the source string to be compared with. 1.269 + * @param target the string that is to be compared with the source string. 1.270 + * @param status possible error code 1.271 + * @return Returns an enum value. UCOL_GREATER if source is greater 1.272 + * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less 1.273 + * than target 1.274 + * @stable ICU 2.6 1.275 + **/ 1.276 + virtual UCollationResult compare(const UnicodeString& source, 1.277 + const UnicodeString& target, 1.278 + UErrorCode &status) const; 1.279 + 1.280 + /** 1.281 + * Does the same thing as compare but limits the comparison to a specified 1.282 + * length 1.283 + * @param source the source string to be compared with. 1.284 + * @param target the string that is to be compared with the source string. 1.285 + * @param length the length the comparison is limited to 1.286 + * @param status possible error code 1.287 + * @return Returns an enum value. UCOL_GREATER if source (up to the specified 1.288 + * length) is greater than target; UCOL_EQUAL if source (up to specified 1.289 + * length) is equal to target; UCOL_LESS if source (up to the specified 1.290 + * length) is less than target. 1.291 + * @stable ICU 2.6 1.292 + */ 1.293 + virtual UCollationResult compare(const UnicodeString& source, 1.294 + const UnicodeString& target, 1.295 + int32_t length, 1.296 + UErrorCode &status) const; 1.297 + 1.298 + /** 1.299 + * The comparison function compares the character data stored in two 1.300 + * different string arrays. Returns information about whether a string array 1.301 + * is less than, greater than or equal to another string array. 1.302 + * @param source the source string array to be compared with. 1.303 + * @param sourceLength the length of the source string array. If this value 1.304 + * is equal to -1, the string array is null-terminated. 1.305 + * @param target the string that is to be compared with the source string. 1.306 + * @param targetLength the length of the target string array. If this value 1.307 + * is equal to -1, the string array is null-terminated. 1.308 + * @param status possible error code 1.309 + * @return Returns an enum value. UCOL_GREATER if source is greater 1.310 + * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less 1.311 + * than target 1.312 + * @stable ICU 2.6 1.313 + */ 1.314 + virtual UCollationResult compare(const UChar* source, int32_t sourceLength, 1.315 + const UChar* target, int32_t targetLength, 1.316 + UErrorCode &status) const; 1.317 + 1.318 + /** 1.319 + * Compares two strings using the Collator. 1.320 + * Returns whether the first one compares less than/equal to/greater than 1.321 + * the second one. 1.322 + * This version takes UCharIterator input. 1.323 + * @param sIter the first ("source") string iterator 1.324 + * @param tIter the second ("target") string iterator 1.325 + * @param status ICU status 1.326 + * @return UCOL_LESS, UCOL_EQUAL or UCOL_GREATER 1.327 + * @stable ICU 4.2 1.328 + */ 1.329 + virtual UCollationResult compare(UCharIterator &sIter, 1.330 + UCharIterator &tIter, 1.331 + UErrorCode &status) const; 1.332 + 1.333 + /** 1.334 + * Transforms a specified region of the string into a series of characters 1.335 + * that can be compared with CollationKey.compare. Use a CollationKey when 1.336 + * you need to do repeated comparisions on the same string. For a single 1.337 + * comparison the compare method will be faster. 1.338 + * @param source the source string. 1.339 + * @param key the transformed key of the source string. 1.340 + * @param status the error code status. 1.341 + * @return the transformed key. 1.342 + * @see CollationKey 1.343 + * @stable ICU 2.0 1.344 + */ 1.345 + virtual CollationKey& getCollationKey(const UnicodeString& source, 1.346 + CollationKey& key, 1.347 + UErrorCode& status) const; 1.348 + 1.349 + /** 1.350 + * Transforms a specified region of the string into a series of characters 1.351 + * that can be compared with CollationKey.compare. Use a CollationKey when 1.352 + * you need to do repeated comparisions on the same string. For a single 1.353 + * comparison the compare method will be faster. 1.354 + * @param source the source string. 1.355 + * @param sourceLength the length of the source string. 1.356 + * @param key the transformed key of the source string. 1.357 + * @param status the error code status. 1.358 + * @return the transformed key. 1.359 + * @see CollationKey 1.360 + * @stable ICU 2.0 1.361 + */ 1.362 + virtual CollationKey& getCollationKey(const UChar *source, 1.363 + int32_t sourceLength, 1.364 + CollationKey& key, 1.365 + UErrorCode& status) const; 1.366 + 1.367 + /** 1.368 + * Generates the hash code for the rule-based collation object. 1.369 + * @return the hash code. 1.370 + * @stable ICU 2.0 1.371 + */ 1.372 + virtual int32_t hashCode(void) const; 1.373 + 1.374 + /** 1.375 + * Gets the locale of the Collator 1.376 + * @param type can be either requested, valid or actual locale. For more 1.377 + * information see the definition of ULocDataLocaleType in 1.378 + * uloc.h 1.379 + * @param status the error code status. 1.380 + * @return locale where the collation data lives. If the collator 1.381 + * was instantiated from rules, locale is empty. 1.382 + * @deprecated ICU 2.8 likely to change in ICU 3.0, based on feedback 1.383 + */ 1.384 + virtual Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const; 1.385 + 1.386 + /** 1.387 + * Gets the tailoring rules for this collator. 1.388 + * @return the collation tailoring from which this collator was created 1.389 + * @stable ICU 2.0 1.390 + */ 1.391 + const UnicodeString& getRules(void) const; 1.392 + 1.393 + /** 1.394 + * Gets the version information for a Collator. 1.395 + * @param info the version # information, the result will be filled in 1.396 + * @stable ICU 2.0 1.397 + */ 1.398 + virtual void getVersion(UVersionInfo info) const; 1.399 + 1.400 +#ifndef U_HIDE_DEPRECATED_API 1.401 + /** 1.402 + * Returns the maximum length of any expansion sequences that end with the 1.403 + * specified comparison order. 1.404 + * 1.405 + * This is specific to the kind of collation element values and sequences 1.406 + * returned by the CollationElementIterator. 1.407 + * Call CollationElementIterator::getMaxExpansion() instead. 1.408 + * 1.409 + * @param order a collation order returned by CollationElementIterator::previous 1.410 + * or CollationElementIterator::next. 1.411 + * @return maximum size of the expansion sequences ending with the collation 1.412 + * element, or 1 if the collation element does not occur at the end of 1.413 + * any expansion sequence 1.414 + * @see CollationElementIterator#getMaxExpansion 1.415 + * @deprecated ICU 51 Use CollationElementIterator::getMaxExpansion() instead. 1.416 + */ 1.417 + int32_t getMaxExpansion(int32_t order) const; 1.418 +#endif /* U_HIDE_DEPRECATED_API */ 1.419 + 1.420 + /** 1.421 + * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This 1.422 + * method is to implement a simple version of RTTI, since not all C++ 1.423 + * compilers support genuine RTTI. Polymorphic operator==() and clone() 1.424 + * methods call this method. 1.425 + * @return The class ID for this object. All objects of a given class have 1.426 + * the same class ID. Objects of other classes have different class 1.427 + * IDs. 1.428 + * @stable ICU 2.0 1.429 + */ 1.430 + virtual UClassID getDynamicClassID(void) const; 1.431 + 1.432 + /** 1.433 + * Returns the class ID for this class. This is useful only for comparing to 1.434 + * a return value from getDynamicClassID(). For example: 1.435 + * <pre> 1.436 + * Base* polymorphic_pointer = createPolymorphicObject(); 1.437 + * if (polymorphic_pointer->getDynamicClassID() == 1.438 + * Derived::getStaticClassID()) ... 1.439 + * </pre> 1.440 + * @return The class ID for all objects of this class. 1.441 + * @stable ICU 2.0 1.442 + */ 1.443 + static UClassID U_EXPORT2 getStaticClassID(void); 1.444 + 1.445 +#ifndef U_HIDE_DEPRECATED_API 1.446 + /** 1.447 + * Do not use this method: The caller and the ICU library might use different heaps. 1.448 + * Use cloneBinary() instead which writes to caller-provided memory. 1.449 + * 1.450 + * Returns a binary format of this collator. 1.451 + * @param length Returns the length of the data, in bytes 1.452 + * @param status the error code status. 1.453 + * @return memory, owned by the caller, of size 'length' bytes. 1.454 + * @deprecated ICU 52. Use cloneBinary() instead. 1.455 + */ 1.456 + uint8_t *cloneRuleData(int32_t &length, UErrorCode &status); 1.457 +#endif /* U_HIDE_DEPRECATED_API */ 1.458 + 1.459 + /** Creates a binary image of a collator. This binary image can be stored and 1.460 + * later used to instantiate a collator using ucol_openBinary. 1.461 + * This API supports preflighting. 1.462 + * @param buffer a fill-in buffer to receive the binary image 1.463 + * @param capacity capacity of the destination buffer 1.464 + * @param status for catching errors 1.465 + * @return size of the image 1.466 + * @see ucol_openBinary 1.467 + * @stable ICU 3.4 1.468 + */ 1.469 + int32_t cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status); 1.470 + 1.471 + /** 1.472 + * Returns current rules. Delta defines whether full rules are returned or 1.473 + * just the tailoring. 1.474 + * 1.475 + * getRules(void) should normally be used instead. 1.476 + * See http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales 1.477 + * @param delta one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES. 1.478 + * @param buffer UnicodeString to store the result rules 1.479 + * @stable ICU 2.2 1.480 + * @see UCOL_FULL_RULES 1.481 + */ 1.482 + void getRules(UColRuleOption delta, UnicodeString &buffer); 1.483 + 1.484 + /** 1.485 + * Universal attribute setter 1.486 + * @param attr attribute type 1.487 + * @param value attribute value 1.488 + * @param status to indicate whether the operation went on smoothly or there were errors 1.489 + * @stable ICU 2.2 1.490 + */ 1.491 + virtual void setAttribute(UColAttribute attr, UColAttributeValue value, 1.492 + UErrorCode &status); 1.493 + 1.494 + /** 1.495 + * Universal attribute getter. 1.496 + * @param attr attribute type 1.497 + * @param status to indicate whether the operation went on smoothly or there were errors 1.498 + * @return attribute value 1.499 + * @stable ICU 2.2 1.500 + */ 1.501 + virtual UColAttributeValue getAttribute(UColAttribute attr, 1.502 + UErrorCode &status) const; 1.503 + 1.504 + /** 1.505 + * Sets the variable top to a collation element value of a string supplied. 1.506 + * @param varTop one or more (if contraction) UChars to which the variable top should be set 1.507 + * @param len length of variable top string. If -1 it is considered to be zero terminated. 1.508 + * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br> 1.509 + * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction<br> 1.510 + * U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes 1.511 + * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined 1.512 + * @stable ICU 2.0 1.513 + */ 1.514 + virtual uint32_t setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status); 1.515 + 1.516 + /** 1.517 + * Sets the variable top to a collation element value of a string supplied. 1.518 + * @param varTop an UnicodeString size 1 or more (if contraction) of UChars to which the variable top should be set 1.519 + * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br> 1.520 + * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction<br> 1.521 + * U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes 1.522 + * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined 1.523 + * @stable ICU 2.0 1.524 + */ 1.525 + virtual uint32_t setVariableTop(const UnicodeString &varTop, UErrorCode &status); 1.526 + 1.527 + /** 1.528 + * Sets the variable top to a collation element value supplied. Variable top is set to the upper 16 bits. 1.529 + * Lower 16 bits are ignored. 1.530 + * @param varTop CE value, as returned by setVariableTop or ucol)getVariableTop 1.531 + * @param status error code (not changed by function) 1.532 + * @stable ICU 2.0 1.533 + */ 1.534 + virtual void setVariableTop(uint32_t varTop, UErrorCode &status); 1.535 + 1.536 + /** 1.537 + * Gets the variable top value of a Collator. 1.538 + * Lower 16 bits are undefined and should be ignored. 1.539 + * @param status error code (not changed by function). If error code is set, the return value is undefined. 1.540 + * @stable ICU 2.0 1.541 + */ 1.542 + virtual uint32_t getVariableTop(UErrorCode &status) const; 1.543 + 1.544 + /** 1.545 + * Get an UnicodeSet that contains all the characters and sequences tailored in 1.546 + * this collator. 1.547 + * @param status error code of the operation 1.548 + * @return a pointer to a UnicodeSet object containing all the 1.549 + * code points and sequences that may sort differently than 1.550 + * in the UCA. The object must be disposed of by using delete 1.551 + * @stable ICU 2.4 1.552 + */ 1.553 + virtual UnicodeSet *getTailoredSet(UErrorCode &status) const; 1.554 + 1.555 + /** 1.556 + * Get the sort key as an array of bytes from an UnicodeString. 1.557 + * @param source string to be processed. 1.558 + * @param result buffer to store result in. If NULL, number of bytes needed 1.559 + * will be returned. 1.560 + * @param resultLength length of the result buffer. If if not enough the 1.561 + * buffer will be filled to capacity. 1.562 + * @return Number of bytes needed for storing the sort key 1.563 + * @stable ICU 2.0 1.564 + */ 1.565 + virtual int32_t getSortKey(const UnicodeString& source, uint8_t *result, 1.566 + int32_t resultLength) const; 1.567 + 1.568 + /** 1.569 + * Get the sort key as an array of bytes from an UChar buffer. 1.570 + * @param source string to be processed. 1.571 + * @param sourceLength length of string to be processed. If -1, the string 1.572 + * is 0 terminated and length will be decided by the function. 1.573 + * @param result buffer to store result in. If NULL, number of bytes needed 1.574 + * will be returned. 1.575 + * @param resultLength length of the result buffer. If if not enough the 1.576 + * buffer will be filled to capacity. 1.577 + * @return Number of bytes needed for storing the sort key 1.578 + * @stable ICU 2.2 1.579 + */ 1.580 + virtual int32_t getSortKey(const UChar *source, int32_t sourceLength, 1.581 + uint8_t *result, int32_t resultLength) const; 1.582 + 1.583 + /** 1.584 + * Retrieves the reordering codes for this collator. 1.585 + * @param dest The array to fill with the script ordering. 1.586 + * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function 1.587 + * will only return the length of the result without writing any of the result string (pre-flighting). 1.588 + * @param status A reference to an error code value, which must not indicate 1.589 + * a failure before the function call. 1.590 + * @return The length of the script ordering array. 1.591 + * @see ucol_setReorderCodes 1.592 + * @see Collator#getEquivalentReorderCodes 1.593 + * @see Collator#setReorderCodes 1.594 + * @stable ICU 4.8 1.595 + */ 1.596 + virtual int32_t getReorderCodes(int32_t *dest, 1.597 + int32_t destCapacity, 1.598 + UErrorCode& status) const; 1.599 + 1.600 + /** 1.601 + * Sets the ordering of scripts for this collator. 1.602 + * @param reorderCodes An array of script codes in the new order. This can be NULL if the 1.603 + * length is also set to 0. An empty array will clear any reordering codes on the collator. 1.604 + * @param reorderCodesLength The length of reorderCodes. 1.605 + * @param status error code 1.606 + * @see Collator#getReorderCodes 1.607 + * @see Collator#getEquivalentReorderCodes 1.608 + * @stable ICU 4.8 1.609 + */ 1.610 + virtual void setReorderCodes(const int32_t* reorderCodes, 1.611 + int32_t reorderCodesLength, 1.612 + UErrorCode& status) ; 1.613 + 1.614 + /** 1.615 + * Retrieves the reorder codes that are grouped with the given reorder code. Some reorder 1.616 + * codes will be grouped and must reorder together. 1.617 + * @param reorderCode The reorder code to determine equivalence for. 1.618 + * @param dest The array to fill with the script equivalene reordering codes. 1.619 + * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the 1.620 + * function will only return the length of the result without writing any of the result 1.621 + * string (pre-flighting). 1.622 + * @param status A reference to an error code value, which must not indicate 1.623 + * a failure before the function call. 1.624 + * @return The length of the of the reordering code equivalence array. 1.625 + * @see ucol_setReorderCodes 1.626 + * @see Collator#getReorderCodes 1.627 + * @see Collator#setReorderCodes 1.628 + * @stable ICU 4.8 1.629 + */ 1.630 + static int32_t U_EXPORT2 getEquivalentReorderCodes(int32_t reorderCode, 1.631 + int32_t* dest, 1.632 + int32_t destCapacity, 1.633 + UErrorCode& status); 1.634 + 1.635 +private: 1.636 + 1.637 + // private static constants ----------------------------------------------- 1.638 + 1.639 + enum { 1.640 + /* need look up in .commit() */ 1.641 + CHARINDEX = 0x70000000, 1.642 + /* Expand index follows */ 1.643 + EXPANDCHARINDEX = 0x7E000000, 1.644 + /* contract indexes follows */ 1.645 + CONTRACTCHARINDEX = 0x7F000000, 1.646 + /* unmapped character values */ 1.647 + UNMAPPED = 0xFFFFFFFF, 1.648 + /* primary strength increment */ 1.649 + PRIMARYORDERINCREMENT = 0x00010000, 1.650 + /* secondary strength increment */ 1.651 + SECONDARYORDERINCREMENT = 0x00000100, 1.652 + /* tertiary strength increment */ 1.653 + TERTIARYORDERINCREMENT = 0x00000001, 1.654 + /* mask off anything but primary order */ 1.655 + PRIMARYORDERMASK = 0xffff0000, 1.656 + /* mask off anything but secondary order */ 1.657 + SECONDARYORDERMASK = 0x0000ff00, 1.658 + /* mask off anything but tertiary order */ 1.659 + TERTIARYORDERMASK = 0x000000ff, 1.660 + /* mask off ignorable char order */ 1.661 + IGNORABLEMASK = 0x0000ffff, 1.662 + /* use only the primary difference */ 1.663 + PRIMARYDIFFERENCEONLY = 0xffff0000, 1.664 + /* use only the primary and secondary difference */ 1.665 + SECONDARYDIFFERENCEONLY = 0xffffff00, 1.666 + /* primary order shift */ 1.667 + PRIMARYORDERSHIFT = 16, 1.668 + /* secondary order shift */ 1.669 + SECONDARYORDERSHIFT = 8, 1.670 + /* starting value for collation elements */ 1.671 + COLELEMENTSTART = 0x02020202, 1.672 + /* testing mask for primary low element */ 1.673 + PRIMARYLOWZEROMASK = 0x00FF0000, 1.674 + /* reseting value for secondaries and tertiaries */ 1.675 + RESETSECONDARYTERTIARY = 0x00000202, 1.676 + /* reseting value for tertiaries */ 1.677 + RESETTERTIARY = 0x00000002, 1.678 + 1.679 + PRIMIGNORABLE = 0x0202 1.680 + }; 1.681 + 1.682 + // private data members --------------------------------------------------- 1.683 + 1.684 + UBool dataIsOwned; 1.685 + 1.686 + UBool isWriteThroughAlias; 1.687 + 1.688 + /** 1.689 + * c struct for collation. All initialisation for it has to be done through 1.690 + * setUCollator(). 1.691 + */ 1.692 + UCollator *ucollator; 1.693 + 1.694 + /** 1.695 + * Rule UnicodeString 1.696 + */ 1.697 + UnicodeString urulestring; 1.698 + 1.699 + // friend classes -------------------------------------------------------- 1.700 + 1.701 + /** 1.702 + * Used to iterate over collation elements in a character source. 1.703 + */ 1.704 + friend class CollationElementIterator; 1.705 + 1.706 + /** 1.707 + * Collator ONLY needs access to RuleBasedCollator(const Locale&, 1.708 + * UErrorCode&) 1.709 + */ 1.710 + friend class Collator; 1.711 + 1.712 + /** 1.713 + * Searching over collation elements in a character source 1.714 + */ 1.715 + friend class StringSearch; 1.716 + 1.717 + // private constructors -------------------------------------------------- 1.718 + 1.719 + /** 1.720 + * Default constructor 1.721 + */ 1.722 + RuleBasedCollator(); 1.723 + 1.724 + /** 1.725 + * RuleBasedCollator constructor. This constructor takes a locale. The 1.726 + * only caller of this class should be Collator::createInstance(). If 1.727 + * createInstance() happens to know that the requested locale's collation is 1.728 + * implemented as a RuleBasedCollator, it can then call this constructor. 1.729 + * OTHERWISE IT SHOULDN'T, since this constructor ALWAYS RETURNS A VALID 1.730 + * COLLATION TABLE. It does this by falling back to defaults. 1.731 + * @param desiredLocale locale used 1.732 + * @param status error code status 1.733 + */ 1.734 + RuleBasedCollator(const Locale& desiredLocale, UErrorCode& status); 1.735 + 1.736 + /** 1.737 + * common constructor implementation 1.738 + * 1.739 + * @param rules the collation rules to build the collation table from. 1.740 + * @param collationStrength default strength for comparison 1.741 + * @param decompositionMode the normalisation mode 1.742 + * @param status reporting a success or an error. 1.743 + */ 1.744 + void 1.745 + construct(const UnicodeString& rules, 1.746 + UColAttributeValue collationStrength, 1.747 + UColAttributeValue decompositionMode, 1.748 + UErrorCode& status); 1.749 + 1.750 + // private methods ------------------------------------------------------- 1.751 + 1.752 + /** 1.753 + * Creates the c struct for ucollator 1.754 + * @param locale desired locale 1.755 + * @param status error status 1.756 + */ 1.757 + void setUCollator(const Locale& locale, UErrorCode& status); 1.758 + 1.759 + /** 1.760 + * Creates the c struct for ucollator 1.761 + * @param locale desired locale name 1.762 + * @param status error status 1.763 + */ 1.764 + void setUCollator(const char* locale, UErrorCode& status); 1.765 + 1.766 + /** 1.767 + * Creates the c struct for ucollator. This used internally by StringSearch. 1.768 + * Hence the responsibility of cleaning up the ucollator is not done by 1.769 + * this RuleBasedCollator. The isDataOwned flag is set to FALSE. 1.770 + * @param collator new ucollator data 1.771 + */ 1.772 + void setUCollator(UCollator *collator); 1.773 + 1.774 +public: 1.775 +#ifndef U_HIDE_INTERNAL_API 1.776 + /** 1.777 + * Get UCollator data struct. Used only by StringSearch & intltest. 1.778 + * @return UCollator data struct 1.779 + * @internal 1.780 + */ 1.781 + const UCollator * getUCollator(); 1.782 +#endif /* U_HIDE_INTERNAL_API */ 1.783 + 1.784 +protected: 1.785 + /** 1.786 + * Used internally by registraton to define the requested and valid locales. 1.787 + * @param requestedLocale the requsted locale 1.788 + * @param validLocale the valid locale 1.789 + * @param actualLocale the actual locale 1.790 + * @internal 1.791 + */ 1.792 + virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale); 1.793 + 1.794 +private: 1.795 + // if not owned and not a write through alias, copy the ucollator 1.796 + void checkOwned(void); 1.797 + 1.798 + // utility to init rule string used by checkOwned and construct 1.799 + void setRuleStringFromCollator(); 1.800 + 1.801 +public: 1.802 + /** Get the short definition string for a collator. This internal API harvests the collator's 1.803 + * locale and the attribute set and produces a string that can be used for opening 1.804 + * a collator with the same properties using the ucol_openFromShortString API. 1.805 + * This string will be normalized. 1.806 + * The structure and the syntax of the string is defined in the "Naming collators" 1.807 + * section of the users guide: 1.808 + * http://icu-project.org/userguide/Collate_Concepts.html#Naming_Collators 1.809 + * This function supports preflighting. 1.810 + * 1.811 + * This is internal, and intended to be used with delegate converters. 1.812 + * 1.813 + * @param locale a locale that will appear as a collators locale in the resulting 1.814 + * short string definition. If NULL, the locale will be harvested 1.815 + * from the collator. 1.816 + * @param buffer space to hold the resulting string 1.817 + * @param capacity capacity of the buffer 1.818 + * @param status for returning errors. All the preflighting errors are featured 1.819 + * @return length of the resulting string 1.820 + * @see ucol_openFromShortString 1.821 + * @see ucol_normalizeShortDefinitionString 1.822 + * @see ucol_getShortDefinitionString 1.823 + * @internal 1.824 + */ 1.825 + virtual int32_t internalGetShortDefinitionString(const char *locale, 1.826 + char *buffer, 1.827 + int32_t capacity, 1.828 + UErrorCode &status) const; 1.829 +}; 1.830 + 1.831 +// inline method implementation --------------------------------------------- 1.832 + 1.833 +inline void RuleBasedCollator::setUCollator(const Locale &locale, 1.834 + UErrorCode &status) 1.835 +{ 1.836 + setUCollator(locale.getName(), status); 1.837 +} 1.838 + 1.839 + 1.840 +inline void RuleBasedCollator::setUCollator(UCollator *collator) 1.841 +{ 1.842 + 1.843 + if (ucollator && dataIsOwned) { 1.844 + ucol_close(ucollator); 1.845 + } 1.846 + ucollator = collator; 1.847 + dataIsOwned = FALSE; 1.848 + isWriteThroughAlias = TRUE; 1.849 + setRuleStringFromCollator(); 1.850 +} 1.851 + 1.852 +#ifndef U_HIDE_INTERNAL_API 1.853 +inline const UCollator * RuleBasedCollator::getUCollator() 1.854 +{ 1.855 + return ucollator; 1.856 +} 1.857 +#endif /* U_HIDE_INTERNAL_API */ 1.858 + 1.859 +U_NAMESPACE_END 1.860 + 1.861 +#endif /* #if !UCONFIG_NO_COLLATION */ 1.862 + 1.863 +#endif