intl/icu/source/i18n/unicode/coll.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /*
michael@0 2 ******************************************************************************
michael@0 3 * Copyright (C) 1996-2012, International Business Machines *
michael@0 4 * Corporation and others. All Rights Reserved. *
michael@0 5 ******************************************************************************
michael@0 6 */
michael@0 7
michael@0 8 /**
michael@0 9 * \file
michael@0 10 * \brief C++ API: Collation Service.
michael@0 11 */
michael@0 12
michael@0 13 /**
michael@0 14 * File coll.h
michael@0 15 *
michael@0 16 * Created by: Helena Shih
michael@0 17 *
michael@0 18 * Modification History:
michael@0 19 *
michael@0 20 * Date Name Description
michael@0 21 * 02/5/97 aliu Modified createDefault to load collation data from
michael@0 22 * binary files when possible. Added related methods
michael@0 23 * createCollationFromFile, chopLocale, createPathName.
michael@0 24 * 02/11/97 aliu Added members addToCache, findInCache, and fgCache.
michael@0 25 * 02/12/97 aliu Modified to create objects from RuleBasedCollator cache.
michael@0 26 * Moved cache out of Collation class.
michael@0 27 * 02/13/97 aliu Moved several methods out of this class and into
michael@0 28 * RuleBasedCollator, with modifications. Modified
michael@0 29 * createDefault() to call new RuleBasedCollator(Locale&)
michael@0 30 * constructor. General clean up and documentation.
michael@0 31 * 02/20/97 helena Added clone, operator==, operator!=, operator=, copy
michael@0 32 * constructor and getDynamicClassID.
michael@0 33 * 03/25/97 helena Updated with platform independent data types.
michael@0 34 * 05/06/97 helena Added memory allocation error detection.
michael@0 35 * 06/20/97 helena Java class name change.
michael@0 36 * 09/03/97 helena Added createCollationKeyValues().
michael@0 37 * 02/10/98 damiba Added compare() with length as parameter.
michael@0 38 * 04/23/99 stephen Removed EDecompositionMode, merged with
michael@0 39 * Normalizer::EMode.
michael@0 40 * 11/02/99 helena Collator performance enhancements. Eliminates the
michael@0 41 * UnicodeString construction and special case for NO_OP.
michael@0 42 * 11/23/99 srl More performance enhancements. Inlining of
michael@0 43 * critical accessors.
michael@0 44 * 05/15/00 helena Added version information API.
michael@0 45 * 01/29/01 synwee Modified into a C++ wrapper which calls C apis
michael@0 46 * (ucoll.h).
michael@0 47 */
michael@0 48
michael@0 49 #ifndef COLL_H
michael@0 50 #define COLL_H
michael@0 51
michael@0 52 #include "unicode/utypes.h"
michael@0 53
michael@0 54 #if !UCONFIG_NO_COLLATION
michael@0 55
michael@0 56 #include "unicode/uobject.h"
michael@0 57 #include "unicode/ucol.h"
michael@0 58 #include "unicode/normlzr.h"
michael@0 59 #include "unicode/locid.h"
michael@0 60 #include "unicode/uniset.h"
michael@0 61 #include "unicode/umisc.h"
michael@0 62 #include "unicode/uiter.h"
michael@0 63 #include "unicode/stringpiece.h"
michael@0 64
michael@0 65 U_NAMESPACE_BEGIN
michael@0 66
michael@0 67 class StringEnumeration;
michael@0 68
michael@0 69 #if !UCONFIG_NO_SERVICE
michael@0 70 /**
michael@0 71 * @stable ICU 2.6
michael@0 72 */
michael@0 73 class CollatorFactory;
michael@0 74 #endif
michael@0 75
michael@0 76 /**
michael@0 77 * @stable ICU 2.0
michael@0 78 */
michael@0 79 class CollationKey;
michael@0 80
michael@0 81 /**
michael@0 82 * The <code>Collator</code> class performs locale-sensitive string
michael@0 83 * comparison.<br>
michael@0 84 * You use this class to build searching and sorting routines for natural
michael@0 85 * language text.<br>
michael@0 86 * <em>Important: </em>The ICU collation service has been reimplemented
michael@0 87 * in order to achieve better performance and UCA compliance.
michael@0 88 * For details, see the
michael@0 89 * <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm">
michael@0 90 * collation design document</a>.
michael@0 91 * <p>
michael@0 92 * <code>Collator</code> is an abstract base class. Subclasses implement
michael@0 93 * specific collation strategies. One subclass,
michael@0 94 * <code>RuleBasedCollator</code>, is currently provided and is applicable
michael@0 95 * to a wide set of languages. Other subclasses may be created to handle more
michael@0 96 * specialized needs.
michael@0 97 * <p>
michael@0 98 * Like other locale-sensitive classes, you can use the static factory method,
michael@0 99 * <code>createInstance</code>, to obtain the appropriate
michael@0 100 * <code>Collator</code> object for a given locale. You will only need to
michael@0 101 * look at the subclasses of <code>Collator</code> if you need to
michael@0 102 * understand the details of a particular collation strategy or if you need to
michael@0 103 * modify that strategy.
michael@0 104 * <p>
michael@0 105 * The following example shows how to compare two strings using the
michael@0 106 * <code>Collator</code> for the default locale.
michael@0 107 * \htmlonly<blockquote>\endhtmlonly
michael@0 108 * <pre>
michael@0 109 * \code
michael@0 110 * // Compare two strings in the default locale
michael@0 111 * UErrorCode success = U_ZERO_ERROR;
michael@0 112 * Collator* myCollator = Collator::createInstance(success);
michael@0 113 * if (myCollator->compare("abc", "ABC") < 0)
michael@0 114 * cout << "abc is less than ABC" << endl;
michael@0 115 * else
michael@0 116 * cout << "abc is greater than or equal to ABC" << endl;
michael@0 117 * \endcode
michael@0 118 * </pre>
michael@0 119 * \htmlonly</blockquote>\endhtmlonly
michael@0 120 * <p>
michael@0 121 * You can set a <code>Collator</code>'s <em>strength</em> property to
michael@0 122 * determine the level of difference considered significant in comparisons.
michael@0 123 * Five strengths are provided: <code>PRIMARY</code>, <code>SECONDARY</code>,
michael@0 124 * <code>TERTIARY</code>, <code>QUATERNARY</code> and <code>IDENTICAL</code>.
michael@0 125 * The exact assignment of strengths to language features is locale dependant.
michael@0 126 * For example, in Czech, "e" and "f" are considered primary differences,
michael@0 127 * while "e" and "\u00EA" are secondary differences, "e" and "E" are tertiary
michael@0 128 * differences and "e" and "e" are identical. The following shows how both case
michael@0 129 * and accents could be ignored for US English.
michael@0 130 * \htmlonly<blockquote>\endhtmlonly
michael@0 131 * <pre>
michael@0 132 * \code
michael@0 133 * //Get the Collator for US English and set its strength to PRIMARY
michael@0 134 * UErrorCode success = U_ZERO_ERROR;
michael@0 135 * Collator* usCollator = Collator::createInstance(Locale::US, success);
michael@0 136 * usCollator->setStrength(Collator::PRIMARY);
michael@0 137 * if (usCollator->compare("abc", "ABC") == 0)
michael@0 138 * cout << "'abc' and 'ABC' strings are equivalent with strength PRIMARY" << endl;
michael@0 139 * \endcode
michael@0 140 * </pre>
michael@0 141 * \htmlonly</blockquote>\endhtmlonly
michael@0 142 * <p>
michael@0 143 * For comparing strings exactly once, the <code>compare</code> method
michael@0 144 * provides the best performance. When sorting a list of strings however, it
michael@0 145 * is generally necessary to compare each string multiple times. In this case,
michael@0 146 * sort keys provide better performance. The <code>getSortKey</code> methods
michael@0 147 * convert a string to a series of bytes that can be compared bitwise against
michael@0 148 * other sort keys using <code>strcmp()</code>. Sort keys are written as
michael@0 149 * zero-terminated byte strings. They consist of several substrings, one for
michael@0 150 * each collation strength level, that are delimited by 0x01 bytes.
michael@0 151 * If the string code points are appended for UCOL_IDENTICAL, then they are
michael@0 152 * processed for correct code point order comparison and may contain 0x01
michael@0 153 * bytes but not zero bytes.
michael@0 154 * </p>
michael@0 155 * <p>
michael@0 156 * An older set of APIs returns a <code>CollationKey</code> object that wraps
michael@0 157 * the sort key bytes instead of returning the bytes themselves.
michael@0 158 * Its use is deprecated, but it is still available for compatibility with
michael@0 159 * Java.
michael@0 160 * </p>
michael@0 161 * <p>
michael@0 162 * <strong>Note:</strong> <code>Collator</code>s with different Locale,
michael@0 163 * and CollationStrength settings will return different sort
michael@0 164 * orders for the same set of strings. Locales have specific collation rules,
michael@0 165 * and the way in which secondary and tertiary differences are taken into
michael@0 166 * account, for example, will result in a different sorting order for same
michael@0 167 * strings.
michael@0 168 * </p>
michael@0 169 * @see RuleBasedCollator
michael@0 170 * @see CollationKey
michael@0 171 * @see CollationElementIterator
michael@0 172 * @see Locale
michael@0 173 * @see Normalizer
michael@0 174 * @version 2.0 11/15/01
michael@0 175 */
michael@0 176
michael@0 177 class U_I18N_API Collator : public UObject {
michael@0 178 public:
michael@0 179
michael@0 180 // Collator public enums -----------------------------------------------
michael@0 181
michael@0 182 /**
michael@0 183 * Base letter represents a primary difference. Set comparison level to
michael@0 184 * PRIMARY to ignore secondary and tertiary differences.<br>
michael@0 185 * Use this to set the strength of a Collator object.<br>
michael@0 186 * Example of primary difference, "abc" &lt; "abd"
michael@0 187 *
michael@0 188 * Diacritical differences on the same base letter represent a secondary
michael@0 189 * difference. Set comparison level to SECONDARY to ignore tertiary
michael@0 190 * differences. Use this to set the strength of a Collator object.<br>
michael@0 191 * Example of secondary difference, "&auml;" >> "a".
michael@0 192 *
michael@0 193 * Uppercase and lowercase versions of the same character represents a
michael@0 194 * tertiary difference. Set comparison level to TERTIARY to include all
michael@0 195 * comparison differences. Use this to set the strength of a Collator
michael@0 196 * object.<br>
michael@0 197 * Example of tertiary difference, "abc" &lt;&lt;&lt; "ABC".
michael@0 198 *
michael@0 199 * Two characters are considered "identical" when they have the same unicode
michael@0 200 * spellings.<br>
michael@0 201 * For example, "&auml;" == "&auml;".
michael@0 202 *
michael@0 203 * UCollationStrength is also used to determine the strength of sort keys
michael@0 204 * generated from Collator objects.
michael@0 205 * @stable ICU 2.0
michael@0 206 */
michael@0 207 enum ECollationStrength
michael@0 208 {
michael@0 209 PRIMARY = UCOL_PRIMARY, // 0
michael@0 210 SECONDARY = UCOL_SECONDARY, // 1
michael@0 211 TERTIARY = UCOL_TERTIARY, // 2
michael@0 212 QUATERNARY = UCOL_QUATERNARY, // 3
michael@0 213 IDENTICAL = UCOL_IDENTICAL // 15
michael@0 214 };
michael@0 215
michael@0 216 /**
michael@0 217 * LESS is returned if source string is compared to be less than target
michael@0 218 * string in the compare() method.
michael@0 219 * EQUAL is returned if source string is compared to be equal to target
michael@0 220 * string in the compare() method.
michael@0 221 * GREATER is returned if source string is compared to be greater than
michael@0 222 * target string in the compare() method.
michael@0 223 * @see Collator#compare
michael@0 224 * @deprecated ICU 2.6. Use C enum UCollationResult defined in ucol.h
michael@0 225 */
michael@0 226 enum EComparisonResult
michael@0 227 {
michael@0 228 LESS = UCOL_LESS, // -1
michael@0 229 EQUAL = UCOL_EQUAL, // 0
michael@0 230 GREATER = UCOL_GREATER // 1
michael@0 231 };
michael@0 232
michael@0 233 // Collator public destructor -----------------------------------------
michael@0 234
michael@0 235 /**
michael@0 236 * Destructor
michael@0 237 * @stable ICU 2.0
michael@0 238 */
michael@0 239 virtual ~Collator();
michael@0 240
michael@0 241 // Collator public methods --------------------------------------------
michael@0 242
michael@0 243 /**
michael@0 244 * Returns TRUE if "other" is the same as "this".
michael@0 245 *
michael@0 246 * The base class implementation returns TRUE if "other" has the same type/class as "this":
michael@0 247 * <code>typeid(*this) == typeid(other)</code>.
michael@0 248 *
michael@0 249 * Subclass implementations should do something like the following:
michael@0 250 * <pre>
michael@0 251 * if (this == &other) { return TRUE; }
michael@0 252 * if (!Collator::operator==(other)) { return FALSE; } // not the same class
michael@0 253 *
michael@0 254 * const MyCollator &o = (const MyCollator&)other;
michael@0 255 * (compare this vs. o's subclass fields)
michael@0 256 * </pre>
michael@0 257 * @param other Collator object to be compared
michael@0 258 * @return TRUE if other is the same as this.
michael@0 259 * @stable ICU 2.0
michael@0 260 */
michael@0 261 virtual UBool operator==(const Collator& other) const;
michael@0 262
michael@0 263 /**
michael@0 264 * Returns true if "other" is not the same as "this".
michael@0 265 * Calls ! operator==(const Collator&) const which works for all subclasses.
michael@0 266 * @param other Collator object to be compared
michael@0 267 * @return TRUE if other is not the same as this.
michael@0 268 * @stable ICU 2.0
michael@0 269 */
michael@0 270 virtual UBool operator!=(const Collator& other) const;
michael@0 271
michael@0 272 /**
michael@0 273 * Makes a copy of this object.
michael@0 274 * @return a copy of this object, owned by the caller
michael@0 275 * @stable ICU 2.0
michael@0 276 */
michael@0 277 virtual Collator* clone(void) const = 0;
michael@0 278
michael@0 279 /**
michael@0 280 * Creates the Collator object for the current default locale.
michael@0 281 * The default locale is determined by Locale::getDefault.
michael@0 282 * The UErrorCode& err parameter is used to return status information to the user.
michael@0 283 * To check whether the construction succeeded or not, you should check the
michael@0 284 * value of U_SUCCESS(err). If you wish more detailed information, you can
michael@0 285 * check for informational error results which still indicate success.
michael@0 286 * U_USING_FALLBACK_ERROR indicates that a fall back locale was used. For
michael@0 287 * example, 'de_CH' was requested, but nothing was found there, so 'de' was
michael@0 288 * used. U_USING_DEFAULT_ERROR indicates that the default locale data was
michael@0 289 * used; neither the requested locale nor any of its fall back locales
michael@0 290 * could be found.
michael@0 291 * The caller owns the returned object and is responsible for deleting it.
michael@0 292 *
michael@0 293 * @param err the error code status.
michael@0 294 * @return the collation object of the default locale.(for example, en_US)
michael@0 295 * @see Locale#getDefault
michael@0 296 * @stable ICU 2.0
michael@0 297 */
michael@0 298 static Collator* U_EXPORT2 createInstance(UErrorCode& err);
michael@0 299
michael@0 300 /**
michael@0 301 * Gets the table-based collation object for the desired locale. The
michael@0 302 * resource of the desired locale will be loaded by ResourceLoader.
michael@0 303 * Locale::ENGLISH is the base collation table and all other languages are
michael@0 304 * built on top of it with additional language-specific modifications.
michael@0 305 * The UErrorCode& err parameter is used to return status information to the user.
michael@0 306 * To check whether the construction succeeded or not, you should check
michael@0 307 * the value of U_SUCCESS(err). If you wish more detailed information, you
michael@0 308 * can check for informational error results which still indicate success.
michael@0 309 * U_USING_FALLBACK_ERROR indicates that a fall back locale was used. For
michael@0 310 * example, 'de_CH' was requested, but nothing was found there, so 'de' was
michael@0 311 * used. U_USING_DEFAULT_ERROR indicates that the default locale data was
michael@0 312 * used; neither the requested locale nor any of its fall back locales
michael@0 313 * could be found.
michael@0 314 * The caller owns the returned object and is responsible for deleting it.
michael@0 315 * @param loc The locale ID for which to open a collator.
michael@0 316 * @param err the error code status.
michael@0 317 * @return the created table-based collation object based on the desired
michael@0 318 * locale.
michael@0 319 * @see Locale
michael@0 320 * @see ResourceLoader
michael@0 321 * @stable ICU 2.2
michael@0 322 */
michael@0 323 static Collator* U_EXPORT2 createInstance(const Locale& loc, UErrorCode& err);
michael@0 324
michael@0 325 #ifdef U_USE_COLLATION_OBSOLETE_2_6
michael@0 326 /**
michael@0 327 * Create a Collator with a specific version.
michael@0 328 * This is the same as createInstance(loc, err) except that getVersion() of
michael@0 329 * the returned object is guaranteed to be the same as the version
michael@0 330 * parameter.
michael@0 331 * This is designed to be used to open the same collator for a given
michael@0 332 * locale even when ICU is updated.
michael@0 333 * The same locale and version guarantees the same sort keys and
michael@0 334 * comparison results.
michael@0 335 * <p>
michael@0 336 * Note: this API will be removed in a future release. Use
michael@0 337 * <tt>createInstance(const Locale&, UErrorCode&) instead.</tt></p>
michael@0 338 *
michael@0 339 * @param loc The locale ID for which to open a collator.
michael@0 340 * @param version The requested collator version.
michael@0 341 * @param err A reference to a UErrorCode,
michael@0 342 * must not indicate a failure before calling this function.
michael@0 343 * @return A pointer to a Collator, or 0 if an error occurred
michael@0 344 * or a collator with the requested version is not available.
michael@0 345 *
michael@0 346 * @see getVersion
michael@0 347 * @obsolete ICU 2.6
michael@0 348 */
michael@0 349 static Collator *createInstance(const Locale &loc, UVersionInfo version, UErrorCode &err);
michael@0 350 #endif
michael@0 351
michael@0 352 /**
michael@0 353 * The comparison function compares the character data stored in two
michael@0 354 * different strings. Returns information about whether a string is less
michael@0 355 * than, greater than or equal to another string.
michael@0 356 * @param source the source string to be compared with.
michael@0 357 * @param target the string that is to be compared with the source string.
michael@0 358 * @return Returns a byte value. GREATER if source is greater
michael@0 359 * than target; EQUAL if source is equal to target; LESS if source is less
michael@0 360 * than target
michael@0 361 * @deprecated ICU 2.6 use the overload with UErrorCode &
michael@0 362 */
michael@0 363 virtual EComparisonResult compare(const UnicodeString& source,
michael@0 364 const UnicodeString& target) const;
michael@0 365
michael@0 366 /**
michael@0 367 * The comparison function compares the character data stored in two
michael@0 368 * different strings. Returns information about whether a string is less
michael@0 369 * than, greater than or equal to another string.
michael@0 370 * @param source the source string to be compared with.
michael@0 371 * @param target the string that is to be compared with the source string.
michael@0 372 * @param status possible error code
michael@0 373 * @return Returns an enum value. UCOL_GREATER if source is greater
michael@0 374 * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
michael@0 375 * than target
michael@0 376 * @stable ICU 2.6
michael@0 377 */
michael@0 378 virtual UCollationResult compare(const UnicodeString& source,
michael@0 379 const UnicodeString& target,
michael@0 380 UErrorCode &status) const = 0;
michael@0 381
michael@0 382 /**
michael@0 383 * Does the same thing as compare but limits the comparison to a specified
michael@0 384 * length
michael@0 385 * @param source the source string to be compared with.
michael@0 386 * @param target the string that is to be compared with the source string.
michael@0 387 * @param length the length the comparison is limited to
michael@0 388 * @return Returns a byte value. GREATER if source (up to the specified
michael@0 389 * length) is greater than target; EQUAL if source (up to specified
michael@0 390 * length) is equal to target; LESS if source (up to the specified
michael@0 391 * length) is less than target.
michael@0 392 * @deprecated ICU 2.6 use the overload with UErrorCode &
michael@0 393 */
michael@0 394 virtual EComparisonResult compare(const UnicodeString& source,
michael@0 395 const UnicodeString& target,
michael@0 396 int32_t length) const;
michael@0 397
michael@0 398 /**
michael@0 399 * Does the same thing as compare but limits the comparison to a specified
michael@0 400 * length
michael@0 401 * @param source the source string to be compared with.
michael@0 402 * @param target the string that is to be compared with the source string.
michael@0 403 * @param length the length the comparison is limited to
michael@0 404 * @param status possible error code
michael@0 405 * @return Returns an enum value. UCOL_GREATER if source (up to the specified
michael@0 406 * length) is greater than target; UCOL_EQUAL if source (up to specified
michael@0 407 * length) is equal to target; UCOL_LESS if source (up to the specified
michael@0 408 * length) is less than target.
michael@0 409 * @stable ICU 2.6
michael@0 410 */
michael@0 411 virtual UCollationResult compare(const UnicodeString& source,
michael@0 412 const UnicodeString& target,
michael@0 413 int32_t length,
michael@0 414 UErrorCode &status) const = 0;
michael@0 415
michael@0 416 /**
michael@0 417 * The comparison function compares the character data stored in two
michael@0 418 * different string arrays. Returns information about whether a string array
michael@0 419 * is less than, greater than or equal to another string array.
michael@0 420 * <p>Example of use:
michael@0 421 * <pre>
michael@0 422 * . UChar ABC[] = {0x41, 0x42, 0x43, 0}; // = "ABC"
michael@0 423 * . UChar abc[] = {0x61, 0x62, 0x63, 0}; // = "abc"
michael@0 424 * . UErrorCode status = U_ZERO_ERROR;
michael@0 425 * . Collator *myCollation =
michael@0 426 * . Collator::createInstance(Locale::US, status);
michael@0 427 * . if (U_FAILURE(status)) return;
michael@0 428 * . myCollation->setStrength(Collator::PRIMARY);
michael@0 429 * . // result would be Collator::EQUAL ("abc" == "ABC")
michael@0 430 * . // (no primary difference between "abc" and "ABC")
michael@0 431 * . Collator::EComparisonResult result =
michael@0 432 * . myCollation->compare(abc, 3, ABC, 3);
michael@0 433 * . myCollation->setStrength(Collator::TERTIARY);
michael@0 434 * . // result would be Collator::LESS ("abc" &lt;&lt;&lt; "ABC")
michael@0 435 * . // (with tertiary difference between "abc" and "ABC")
michael@0 436 * . result = myCollation->compare(abc, 3, ABC, 3);
michael@0 437 * </pre>
michael@0 438 * @param source the source string array to be compared with.
michael@0 439 * @param sourceLength the length of the source string array. If this value
michael@0 440 * is equal to -1, the string array is null-terminated.
michael@0 441 * @param target the string that is to be compared with the source string.
michael@0 442 * @param targetLength the length of the target string array. If this value
michael@0 443 * is equal to -1, the string array is null-terminated.
michael@0 444 * @return Returns a byte value. GREATER if source is greater than target;
michael@0 445 * EQUAL if source is equal to target; LESS if source is less than
michael@0 446 * target
michael@0 447 * @deprecated ICU 2.6 use the overload with UErrorCode &
michael@0 448 */
michael@0 449 virtual EComparisonResult compare(const UChar* source, int32_t sourceLength,
michael@0 450 const UChar* target, int32_t targetLength)
michael@0 451 const;
michael@0 452
michael@0 453 /**
michael@0 454 * The comparison function compares the character data stored in two
michael@0 455 * different string arrays. Returns information about whether a string array
michael@0 456 * is less than, greater than or equal to another string array.
michael@0 457 * @param source the source string array to be compared with.
michael@0 458 * @param sourceLength the length of the source string array. If this value
michael@0 459 * is equal to -1, the string array is null-terminated.
michael@0 460 * @param target the string that is to be compared with the source string.
michael@0 461 * @param targetLength the length of the target string array. If this value
michael@0 462 * is equal to -1, the string array is null-terminated.
michael@0 463 * @param status possible error code
michael@0 464 * @return Returns an enum value. UCOL_GREATER if source is greater
michael@0 465 * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
michael@0 466 * than target
michael@0 467 * @stable ICU 2.6
michael@0 468 */
michael@0 469 virtual UCollationResult compare(const UChar* source, int32_t sourceLength,
michael@0 470 const UChar* target, int32_t targetLength,
michael@0 471 UErrorCode &status) const = 0;
michael@0 472
michael@0 473 /**
michael@0 474 * Compares two strings using the Collator.
michael@0 475 * Returns whether the first one compares less than/equal to/greater than
michael@0 476 * the second one.
michael@0 477 * This version takes UCharIterator input.
michael@0 478 * @param sIter the first ("source") string iterator
michael@0 479 * @param tIter the second ("target") string iterator
michael@0 480 * @param status ICU status
michael@0 481 * @return UCOL_LESS, UCOL_EQUAL or UCOL_GREATER
michael@0 482 * @stable ICU 4.2
michael@0 483 */
michael@0 484 virtual UCollationResult compare(UCharIterator &sIter,
michael@0 485 UCharIterator &tIter,
michael@0 486 UErrorCode &status) const;
michael@0 487
michael@0 488 /**
michael@0 489 * Compares two UTF-8 strings using the Collator.
michael@0 490 * Returns whether the first one compares less than/equal to/greater than
michael@0 491 * the second one.
michael@0 492 * This version takes UTF-8 input.
michael@0 493 * Note that a StringPiece can be implicitly constructed
michael@0 494 * from a std::string or a NUL-terminated const char * string.
michael@0 495 * @param source the first UTF-8 string
michael@0 496 * @param target the second UTF-8 string
michael@0 497 * @param status ICU status
michael@0 498 * @return UCOL_LESS, UCOL_EQUAL or UCOL_GREATER
michael@0 499 * @stable ICU 4.2
michael@0 500 */
michael@0 501 virtual UCollationResult compareUTF8(const StringPiece &source,
michael@0 502 const StringPiece &target,
michael@0 503 UErrorCode &status) const;
michael@0 504
michael@0 505 /**
michael@0 506 * Transforms the string into a series of characters that can be compared
michael@0 507 * with CollationKey::compareTo. It is not possible to restore the original
michael@0 508 * string from the chars in the sort key. The generated sort key handles
michael@0 509 * only a limited number of ignorable characters.
michael@0 510 * <p>Use CollationKey::equals or CollationKey::compare to compare the
michael@0 511 * generated sort keys.
michael@0 512 * If the source string is null, a null collation key will be returned.
michael@0 513 * @param source the source string to be transformed into a sort key.
michael@0 514 * @param key the collation key to be filled in
michael@0 515 * @param status the error code status.
michael@0 516 * @return the collation key of the string based on the collation rules.
michael@0 517 * @see CollationKey#compare
michael@0 518 * @stable ICU 2.0
michael@0 519 */
michael@0 520 virtual CollationKey& getCollationKey(const UnicodeString& source,
michael@0 521 CollationKey& key,
michael@0 522 UErrorCode& status) const = 0;
michael@0 523
michael@0 524 /**
michael@0 525 * Transforms the string into a series of characters that can be compared
michael@0 526 * with CollationKey::compareTo. It is not possible to restore the original
michael@0 527 * string from the chars in the sort key. The generated sort key handles
michael@0 528 * only a limited number of ignorable characters.
michael@0 529 * <p>Use CollationKey::equals or CollationKey::compare to compare the
michael@0 530 * generated sort keys.
michael@0 531 * <p>If the source string is null, a null collation key will be returned.
michael@0 532 * @param source the source string to be transformed into a sort key.
michael@0 533 * @param sourceLength length of the collation key
michael@0 534 * @param key the collation key to be filled in
michael@0 535 * @param status the error code status.
michael@0 536 * @return the collation key of the string based on the collation rules.
michael@0 537 * @see CollationKey#compare
michael@0 538 * @stable ICU 2.0
michael@0 539 */
michael@0 540 virtual CollationKey& getCollationKey(const UChar*source,
michael@0 541 int32_t sourceLength,
michael@0 542 CollationKey& key,
michael@0 543 UErrorCode& status) const = 0;
michael@0 544 /**
michael@0 545 * Generates the hash code for the collation object
michael@0 546 * @stable ICU 2.0
michael@0 547 */
michael@0 548 virtual int32_t hashCode(void) const = 0;
michael@0 549
michael@0 550 /**
michael@0 551 * Gets the locale of the Collator
michael@0 552 *
michael@0 553 * @param type can be either requested, valid or actual locale. For more
michael@0 554 * information see the definition of ULocDataLocaleType in
michael@0 555 * uloc.h
michael@0 556 * @param status the error code status.
michael@0 557 * @return locale where the collation data lives. If the collator
michael@0 558 * was instantiated from rules, locale is empty.
michael@0 559 * @deprecated ICU 2.8 This API is under consideration for revision
michael@0 560 * in ICU 3.0.
michael@0 561 */
michael@0 562 virtual Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const = 0;
michael@0 563
michael@0 564 /**
michael@0 565 * Convenience method for comparing two strings based on the collation rules.
michael@0 566 * @param source the source string to be compared with.
michael@0 567 * @param target the target string to be compared with.
michael@0 568 * @return true if the first string is greater than the second one,
michael@0 569 * according to the collation rules. false, otherwise.
michael@0 570 * @see Collator#compare
michael@0 571 * @stable ICU 2.0
michael@0 572 */
michael@0 573 UBool greater(const UnicodeString& source, const UnicodeString& target)
michael@0 574 const;
michael@0 575
michael@0 576 /**
michael@0 577 * Convenience method for comparing two strings based on the collation rules.
michael@0 578 * @param source the source string to be compared with.
michael@0 579 * @param target the target string to be compared with.
michael@0 580 * @return true if the first string is greater than or equal to the second
michael@0 581 * one, according to the collation rules. false, otherwise.
michael@0 582 * @see Collator#compare
michael@0 583 * @stable ICU 2.0
michael@0 584 */
michael@0 585 UBool greaterOrEqual(const UnicodeString& source,
michael@0 586 const UnicodeString& target) const;
michael@0 587
michael@0 588 /**
michael@0 589 * Convenience method for comparing two strings based on the collation rules.
michael@0 590 * @param source the source string to be compared with.
michael@0 591 * @param target the target string to be compared with.
michael@0 592 * @return true if the strings are equal according to the collation rules.
michael@0 593 * false, otherwise.
michael@0 594 * @see Collator#compare
michael@0 595 * @stable ICU 2.0
michael@0 596 */
michael@0 597 UBool equals(const UnicodeString& source, const UnicodeString& target) const;
michael@0 598
michael@0 599 /**
michael@0 600 * Determines the minimum strength that will be used in comparison or
michael@0 601 * transformation.
michael@0 602 * <p>E.g. with strength == SECONDARY, the tertiary difference is ignored
michael@0 603 * <p>E.g. with strength == PRIMARY, the secondary and tertiary difference
michael@0 604 * are ignored.
michael@0 605 * @return the current comparison level.
michael@0 606 * @see Collator#setStrength
michael@0 607 * @deprecated ICU 2.6 Use getAttribute(UCOL_STRENGTH...) instead
michael@0 608 */
michael@0 609 virtual ECollationStrength getStrength(void) const;
michael@0 610
michael@0 611 /**
michael@0 612 * Sets the minimum strength to be used in comparison or transformation.
michael@0 613 * <p>Example of use:
michael@0 614 * <pre>
michael@0 615 * \code
michael@0 616 * UErrorCode status = U_ZERO_ERROR;
michael@0 617 * Collator*myCollation = Collator::createInstance(Locale::US, status);
michael@0 618 * if (U_FAILURE(status)) return;
michael@0 619 * myCollation->setStrength(Collator::PRIMARY);
michael@0 620 * // result will be "abc" == "ABC"
michael@0 621 * // tertiary differences will be ignored
michael@0 622 * Collator::ComparisonResult result = myCollation->compare("abc", "ABC");
michael@0 623 * \endcode
michael@0 624 * </pre>
michael@0 625 * @see Collator#getStrength
michael@0 626 * @param newStrength the new comparison level.
michael@0 627 * @deprecated ICU 2.6 Use setAttribute(UCOL_STRENGTH...) instead
michael@0 628 */
michael@0 629 virtual void setStrength(ECollationStrength newStrength);
michael@0 630
michael@0 631 /**
michael@0 632 * Retrieves the reordering codes for this collator.
michael@0 633 * @param dest The array to fill with the script ordering.
michael@0 634 * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function
michael@0 635 * will only return the length of the result without writing any of the result string (pre-flighting).
michael@0 636 * @param status A reference to an error code value, which must not indicate
michael@0 637 * a failure before the function call.
michael@0 638 * @return The length of the script ordering array.
michael@0 639 * @see ucol_setReorderCodes
michael@0 640 * @see Collator#getEquivalentReorderCodes
michael@0 641 * @see Collator#setReorderCodes
michael@0 642 * @see UScriptCode
michael@0 643 * @see UColReorderCode
michael@0 644 * @stable ICU 4.8
michael@0 645 */
michael@0 646 virtual int32_t getReorderCodes(int32_t *dest,
michael@0 647 int32_t destCapacity,
michael@0 648 UErrorCode& status) const;
michael@0 649
michael@0 650 /**
michael@0 651 * Sets the ordering of scripts for this collator.
michael@0 652 *
michael@0 653 * <p>The reordering codes are a combination of script codes and reorder codes.
michael@0 654 * @param reorderCodes An array of script codes in the new order. This can be NULL if the
michael@0 655 * length is also set to 0. An empty array will clear any reordering codes on the collator.
michael@0 656 * @param reorderCodesLength The length of reorderCodes.
michael@0 657 * @param status error code
michael@0 658 * @see Collator#getReorderCodes
michael@0 659 * @see Collator#getEquivalentReorderCodes
michael@0 660 * @see UScriptCode
michael@0 661 * @see UColReorderCode
michael@0 662 * @stable ICU 4.8
michael@0 663 */
michael@0 664 virtual void setReorderCodes(const int32_t* reorderCodes,
michael@0 665 int32_t reorderCodesLength,
michael@0 666 UErrorCode& status) ;
michael@0 667
michael@0 668 /**
michael@0 669 * Retrieves the reorder codes that are grouped with the given reorder code. Some reorder
michael@0 670 * codes will be grouped and must reorder together.
michael@0 671 * @param reorderCode The reorder code to determine equivalence for.
michael@0 672 * @param dest The array to fill with the script equivalene reordering codes.
michael@0 673 * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the
michael@0 674 * function will only return the length of the result without writing any of the result
michael@0 675 * string (pre-flighting).
michael@0 676 * @param status A reference to an error code value, which must not indicate
michael@0 677 * a failure before the function call.
michael@0 678 * @return The length of the of the reordering code equivalence array.
michael@0 679 * @see ucol_setReorderCodes
michael@0 680 * @see Collator#getReorderCodes
michael@0 681 * @see Collator#setReorderCodes
michael@0 682 * @see UScriptCode
michael@0 683 * @see UColReorderCode
michael@0 684 * @stable ICU 4.8
michael@0 685 */
michael@0 686 static int32_t U_EXPORT2 getEquivalentReorderCodes(int32_t reorderCode,
michael@0 687 int32_t* dest,
michael@0 688 int32_t destCapacity,
michael@0 689 UErrorCode& status);
michael@0 690
michael@0 691 /**
michael@0 692 * Get name of the object for the desired Locale, in the desired langauge
michael@0 693 * @param objectLocale must be from getAvailableLocales
michael@0 694 * @param displayLocale specifies the desired locale for output
michael@0 695 * @param name the fill-in parameter of the return value
michael@0 696 * @return display-able name of the object for the object locale in the
michael@0 697 * desired language
michael@0 698 * @stable ICU 2.0
michael@0 699 */
michael@0 700 static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
michael@0 701 const Locale& displayLocale,
michael@0 702 UnicodeString& name);
michael@0 703
michael@0 704 /**
michael@0 705 * Get name of the object for the desired Locale, in the langauge of the
michael@0 706 * default locale.
michael@0 707 * @param objectLocale must be from getAvailableLocales
michael@0 708 * @param name the fill-in parameter of the return value
michael@0 709 * @return name of the object for the desired locale in the default language
michael@0 710 * @stable ICU 2.0
michael@0 711 */
michael@0 712 static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
michael@0 713 UnicodeString& name);
michael@0 714
michael@0 715 /**
michael@0 716 * Get the set of Locales for which Collations are installed.
michael@0 717 *
michael@0 718 * <p>Note this does not include locales supported by registered collators.
michael@0 719 * If collators might have been registered, use the overload of getAvailableLocales
michael@0 720 * that returns a StringEnumeration.</p>
michael@0 721 *
michael@0 722 * @param count the output parameter of number of elements in the locale list
michael@0 723 * @return the list of available locales for which collations are installed
michael@0 724 * @stable ICU 2.0
michael@0 725 */
michael@0 726 static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count);
michael@0 727
michael@0 728 /**
michael@0 729 * Return a StringEnumeration over the locales available at the time of the call,
michael@0 730 * including registered locales. If a severe error occurs (such as out of memory
michael@0 731 * condition) this will return null. If there is no locale data, an empty enumeration
michael@0 732 * will be returned.
michael@0 733 * @return a StringEnumeration over the locales available at the time of the call
michael@0 734 * @stable ICU 2.6
michael@0 735 */
michael@0 736 static StringEnumeration* U_EXPORT2 getAvailableLocales(void);
michael@0 737
michael@0 738 /**
michael@0 739 * Create a string enumerator of all possible keywords that are relevant to
michael@0 740 * collation. At this point, the only recognized keyword for this
michael@0 741 * service is "collation".
michael@0 742 * @param status input-output error code
michael@0 743 * @return a string enumeration over locale strings. The caller is
michael@0 744 * responsible for closing the result.
michael@0 745 * @stable ICU 3.0
michael@0 746 */
michael@0 747 static StringEnumeration* U_EXPORT2 getKeywords(UErrorCode& status);
michael@0 748
michael@0 749 /**
michael@0 750 * Given a keyword, create a string enumeration of all values
michael@0 751 * for that keyword that are currently in use.
michael@0 752 * @param keyword a particular keyword as enumerated by
michael@0 753 * ucol_getKeywords. If any other keyword is passed in, status is set
michael@0 754 * to U_ILLEGAL_ARGUMENT_ERROR.
michael@0 755 * @param status input-output error code
michael@0 756 * @return a string enumeration over collation keyword values, or NULL
michael@0 757 * upon error. The caller is responsible for deleting the result.
michael@0 758 * @stable ICU 3.0
michael@0 759 */
michael@0 760 static StringEnumeration* U_EXPORT2 getKeywordValues(const char *keyword, UErrorCode& status);
michael@0 761
michael@0 762 /**
michael@0 763 * Given a key and a locale, returns an array of string values in a preferred
michael@0 764 * order that would make a difference. These are all and only those values where
michael@0 765 * the open (creation) of the service with the locale formed from the input locale
michael@0 766 * plus input keyword and that value has different behavior than creation with the
michael@0 767 * input locale alone.
michael@0 768 * @param keyword one of the keys supported by this service. For now, only
michael@0 769 * "collation" is supported.
michael@0 770 * @param locale the locale
michael@0 771 * @param commonlyUsed if set to true it will return only commonly used values
michael@0 772 * with the given locale in preferred order. Otherwise,
michael@0 773 * it will return all the available values for the locale.
michael@0 774 * @param status ICU status
michael@0 775 * @return a string enumeration over keyword values for the given key and the locale.
michael@0 776 * @stable ICU 4.2
michael@0 777 */
michael@0 778 static StringEnumeration* U_EXPORT2 getKeywordValuesForLocale(const char* keyword, const Locale& locale,
michael@0 779 UBool commonlyUsed, UErrorCode& status);
michael@0 780
michael@0 781 /**
michael@0 782 * Return the functionally equivalent locale for the given
michael@0 783 * requested locale, with respect to given keyword, for the
michael@0 784 * collation service. If two locales return the same result, then
michael@0 785 * collators instantiated for these locales will behave
michael@0 786 * equivalently. The converse is not always true; two collators
michael@0 787 * may in fact be equivalent, but return different results, due to
michael@0 788 * internal details. The return result has no other meaning than
michael@0 789 * that stated above, and implies nothing as to the relationship
michael@0 790 * between the two locales. This is intended for use by
michael@0 791 * applications who wish to cache collators, or otherwise reuse
michael@0 792 * collators when possible. The functional equivalent may change
michael@0 793 * over time. For more information, please see the <a
michael@0 794 * href="http://icu-project.org/userguide/locale.html#services">
michael@0 795 * Locales and Services</a> section of the ICU User Guide.
michael@0 796 * @param keyword a particular keyword as enumerated by
michael@0 797 * ucol_getKeywords.
michael@0 798 * @param locale the requested locale
michael@0 799 * @param isAvailable reference to a fillin parameter that
michael@0 800 * indicates whether the requested locale was 'available' to the
michael@0 801 * collation service. A locale is defined as 'available' if it
michael@0 802 * physically exists within the collation locale data.
michael@0 803 * @param status reference to input-output error code
michael@0 804 * @return the functionally equivalent collation locale, or the root
michael@0 805 * locale upon error.
michael@0 806 * @stable ICU 3.0
michael@0 807 */
michael@0 808 static Locale U_EXPORT2 getFunctionalEquivalent(const char* keyword, const Locale& locale,
michael@0 809 UBool& isAvailable, UErrorCode& status);
michael@0 810
michael@0 811 #if !UCONFIG_NO_SERVICE
michael@0 812 /**
michael@0 813 * Register a new Collator. The collator will be adopted.
michael@0 814 * @param toAdopt the Collator instance to be adopted
michael@0 815 * @param locale the locale with which the collator will be associated
michael@0 816 * @param status the in/out status code, no special meanings are assigned
michael@0 817 * @return a registry key that can be used to unregister this collator
michael@0 818 * @stable ICU 2.6
michael@0 819 */
michael@0 820 static URegistryKey U_EXPORT2 registerInstance(Collator* toAdopt, const Locale& locale, UErrorCode& status);
michael@0 821
michael@0 822 /**
michael@0 823 * Register a new CollatorFactory. The factory will be adopted.
michael@0 824 * @param toAdopt the CollatorFactory instance to be adopted
michael@0 825 * @param status the in/out status code, no special meanings are assigned
michael@0 826 * @return a registry key that can be used to unregister this collator
michael@0 827 * @stable ICU 2.6
michael@0 828 */
michael@0 829 static URegistryKey U_EXPORT2 registerFactory(CollatorFactory* toAdopt, UErrorCode& status);
michael@0 830
michael@0 831 /**
michael@0 832 * Unregister a previously-registered Collator or CollatorFactory
michael@0 833 * using the key returned from the register call. Key becomes
michael@0 834 * invalid after a successful call and should not be used again.
michael@0 835 * The object corresponding to the key will be deleted.
michael@0 836 * @param key the registry key returned by a previous call to registerInstance
michael@0 837 * @param status the in/out status code, no special meanings are assigned
michael@0 838 * @return TRUE if the collator for the key was successfully unregistered
michael@0 839 * @stable ICU 2.6
michael@0 840 */
michael@0 841 static UBool U_EXPORT2 unregister(URegistryKey key, UErrorCode& status);
michael@0 842 #endif /* UCONFIG_NO_SERVICE */
michael@0 843
michael@0 844 /**
michael@0 845 * Gets the version information for a Collator.
michael@0 846 * @param info the version # information, the result will be filled in
michael@0 847 * @stable ICU 2.0
michael@0 848 */
michael@0 849 virtual void getVersion(UVersionInfo info) const = 0;
michael@0 850
michael@0 851 /**
michael@0 852 * Returns a unique class ID POLYMORPHICALLY. Pure virtual method.
michael@0 853 * This method is to implement a simple version of RTTI, since not all C++
michael@0 854 * compilers support genuine RTTI. Polymorphic operator==() and clone()
michael@0 855 * methods call this method.
michael@0 856 * @return The class ID for this object. All objects of a given class have
michael@0 857 * the same class ID. Objects of other classes have different class
michael@0 858 * IDs.
michael@0 859 * @stable ICU 2.0
michael@0 860 */
michael@0 861 virtual UClassID getDynamicClassID(void) const = 0;
michael@0 862
michael@0 863 /**
michael@0 864 * Universal attribute setter
michael@0 865 * @param attr attribute type
michael@0 866 * @param value attribute value
michael@0 867 * @param status to indicate whether the operation went on smoothly or
michael@0 868 * there were errors
michael@0 869 * @stable ICU 2.2
michael@0 870 */
michael@0 871 virtual void setAttribute(UColAttribute attr, UColAttributeValue value,
michael@0 872 UErrorCode &status) = 0;
michael@0 873
michael@0 874 /**
michael@0 875 * Universal attribute getter
michael@0 876 * @param attr attribute type
michael@0 877 * @param status to indicate whether the operation went on smoothly or
michael@0 878 * there were errors
michael@0 879 * @return attribute value
michael@0 880 * @stable ICU 2.2
michael@0 881 */
michael@0 882 virtual UColAttributeValue getAttribute(UColAttribute attr,
michael@0 883 UErrorCode &status) const = 0;
michael@0 884
michael@0 885 /**
michael@0 886 * Sets the variable top to a collation element value of a string supplied.
michael@0 887 * @param varTop one or more (if contraction) UChars to which the variable top should be set
michael@0 888 * @param len length of variable top string. If -1 it is considered to be zero terminated.
michael@0 889 * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
michael@0 890 * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction<br>
michael@0 891 * U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes
michael@0 892 * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined
michael@0 893 * @stable ICU 2.0
michael@0 894 */
michael@0 895 virtual uint32_t setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status) = 0;
michael@0 896
michael@0 897 /**
michael@0 898 * Sets the variable top to a collation element value of a string supplied.
michael@0 899 * @param varTop an UnicodeString size 1 or more (if contraction) of UChars to which the variable top should be set
michael@0 900 * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
michael@0 901 * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction<br>
michael@0 902 * U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes
michael@0 903 * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined
michael@0 904 * @stable ICU 2.0
michael@0 905 */
michael@0 906 virtual uint32_t setVariableTop(const UnicodeString &varTop, UErrorCode &status) = 0;
michael@0 907
michael@0 908 /**
michael@0 909 * Sets the variable top to a collation element value supplied. Variable top is set to the upper 16 bits.
michael@0 910 * Lower 16 bits are ignored.
michael@0 911 * @param varTop CE value, as returned by setVariableTop or ucol)getVariableTop
michael@0 912 * @param status error code (not changed by function)
michael@0 913 * @stable ICU 2.0
michael@0 914 */
michael@0 915 virtual void setVariableTop(uint32_t varTop, UErrorCode &status) = 0;
michael@0 916
michael@0 917 /**
michael@0 918 * Gets the variable top value of a Collator.
michael@0 919 * Lower 16 bits are undefined and should be ignored.
michael@0 920 * @param status error code (not changed by function). If error code is set, the return value is undefined.
michael@0 921 * @stable ICU 2.0
michael@0 922 */
michael@0 923 virtual uint32_t getVariableTop(UErrorCode &status) const = 0;
michael@0 924
michael@0 925 /**
michael@0 926 * Get an UnicodeSet that contains all the characters and sequences
michael@0 927 * tailored in this collator.
michael@0 928 * @param status error code of the operation
michael@0 929 * @return a pointer to a UnicodeSet object containing all the
michael@0 930 * code points and sequences that may sort differently than
michael@0 931 * in the UCA. The object must be disposed of by using delete
michael@0 932 * @stable ICU 2.4
michael@0 933 */
michael@0 934 virtual UnicodeSet *getTailoredSet(UErrorCode &status) const;
michael@0 935
michael@0 936 /**
michael@0 937 * Same as clone().
michael@0 938 * The base class implementation simply calls clone().
michael@0 939 * @return a copy of this object, owned by the caller
michael@0 940 * @see clone()
michael@0 941 * @deprecated ICU 50 no need to have two methods for cloning
michael@0 942 */
michael@0 943 virtual Collator* safeClone(void) const;
michael@0 944
michael@0 945 /**
michael@0 946 * Get the sort key as an array of bytes from an UnicodeString.
michael@0 947 * Sort key byte arrays are zero-terminated and can be compared using
michael@0 948 * strcmp().
michael@0 949 * @param source string to be processed.
michael@0 950 * @param result buffer to store result in. If NULL, number of bytes needed
michael@0 951 * will be returned.
michael@0 952 * @param resultLength length of the result buffer. If if not enough the
michael@0 953 * buffer will be filled to capacity.
michael@0 954 * @return Number of bytes needed for storing the sort key
michael@0 955 * @stable ICU 2.2
michael@0 956 */
michael@0 957 virtual int32_t getSortKey(const UnicodeString& source,
michael@0 958 uint8_t* result,
michael@0 959 int32_t resultLength) const = 0;
michael@0 960
michael@0 961 /**
michael@0 962 * Get the sort key as an array of bytes from an UChar buffer.
michael@0 963 * Sort key byte arrays are zero-terminated and can be compared using
michael@0 964 * strcmp().
michael@0 965 * @param source string to be processed.
michael@0 966 * @param sourceLength length of string to be processed.
michael@0 967 * If -1, the string is 0 terminated and length will be decided by the
michael@0 968 * function.
michael@0 969 * @param result buffer to store result in. If NULL, number of bytes needed
michael@0 970 * will be returned.
michael@0 971 * @param resultLength length of the result buffer. If if not enough the
michael@0 972 * buffer will be filled to capacity.
michael@0 973 * @return Number of bytes needed for storing the sort key
michael@0 974 * @stable ICU 2.2
michael@0 975 */
michael@0 976 virtual int32_t getSortKey(const UChar*source, int32_t sourceLength,
michael@0 977 uint8_t*result, int32_t resultLength) const = 0;
michael@0 978
michael@0 979 /**
michael@0 980 * Produce a bound for a given sortkey and a number of levels.
michael@0 981 * Return value is always the number of bytes needed, regardless of
michael@0 982 * whether the result buffer was big enough or even valid.<br>
michael@0 983 * Resulting bounds can be used to produce a range of strings that are
michael@0 984 * between upper and lower bounds. For example, if bounds are produced
michael@0 985 * for a sortkey of string "smith", strings between upper and lower
michael@0 986 * bounds with one level would include "Smith", "SMITH", "sMiTh".<br>
michael@0 987 * There are two upper bounds that can be produced. If UCOL_BOUND_UPPER
michael@0 988 * is produced, strings matched would be as above. However, if bound
michael@0 989 * produced using UCOL_BOUND_UPPER_LONG is used, the above example will
michael@0 990 * also match "Smithsonian" and similar.<br>
michael@0 991 * For more on usage, see example in cintltst/capitst.c in procedure
michael@0 992 * TestBounds.
michael@0 993 * Sort keys may be compared using <TT>strcmp</TT>.
michael@0 994 * @param source The source sortkey.
michael@0 995 * @param sourceLength The length of source, or -1 if null-terminated.
michael@0 996 * (If an unmodified sortkey is passed, it is always null
michael@0 997 * terminated).
michael@0 998 * @param boundType Type of bound required. It can be UCOL_BOUND_LOWER, which
michael@0 999 * produces a lower inclusive bound, UCOL_BOUND_UPPER, that
michael@0 1000 * produces upper bound that matches strings of the same length
michael@0 1001 * or UCOL_BOUND_UPPER_LONG that matches strings that have the
michael@0 1002 * same starting substring as the source string.
michael@0 1003 * @param noOfLevels Number of levels required in the resulting bound (for most
michael@0 1004 * uses, the recommended value is 1). See users guide for
michael@0 1005 * explanation on number of levels a sortkey can have.
michael@0 1006 * @param result A pointer to a buffer to receive the resulting sortkey.
michael@0 1007 * @param resultLength The maximum size of result.
michael@0 1008 * @param status Used for returning error code if something went wrong. If the
michael@0 1009 * number of levels requested is higher than the number of levels
michael@0 1010 * in the source key, a warning (U_SORT_KEY_TOO_SHORT_WARNING) is
michael@0 1011 * issued.
michael@0 1012 * @return The size needed to fully store the bound.
michael@0 1013 * @see ucol_keyHashCode
michael@0 1014 * @stable ICU 2.1
michael@0 1015 */
michael@0 1016 static int32_t U_EXPORT2 getBound(const uint8_t *source,
michael@0 1017 int32_t sourceLength,
michael@0 1018 UColBoundMode boundType,
michael@0 1019 uint32_t noOfLevels,
michael@0 1020 uint8_t *result,
michael@0 1021 int32_t resultLength,
michael@0 1022 UErrorCode &status);
michael@0 1023
michael@0 1024
michael@0 1025 protected:
michael@0 1026
michael@0 1027 // Collator protected constructors -------------------------------------
michael@0 1028
michael@0 1029 /**
michael@0 1030 * Default constructor.
michael@0 1031 * Constructor is different from the old default Collator constructor.
michael@0 1032 * The task for determing the default collation strength and normalization
michael@0 1033 * mode is left to the child class.
michael@0 1034 * @stable ICU 2.0
michael@0 1035 */
michael@0 1036 Collator();
michael@0 1037
michael@0 1038 #ifndef U_HIDE_DEPRECATED_API
michael@0 1039 /**
michael@0 1040 * Constructor.
michael@0 1041 * Empty constructor, does not handle the arguments.
michael@0 1042 * This constructor is done for backward compatibility with 1.7 and 1.8.
michael@0 1043 * The task for handling the argument collation strength and normalization
michael@0 1044 * mode is left to the child class.
michael@0 1045 * @param collationStrength collation strength
michael@0 1046 * @param decompositionMode
michael@0 1047 * @deprecated ICU 2.4. Subclasses should use the default constructor
michael@0 1048 * instead and handle the strength and normalization mode themselves.
michael@0 1049 */
michael@0 1050 Collator(UCollationStrength collationStrength,
michael@0 1051 UNormalizationMode decompositionMode);
michael@0 1052 #endif /* U_HIDE_DEPRECATED_API */
michael@0 1053
michael@0 1054 /**
michael@0 1055 * Copy constructor.
michael@0 1056 * @param other Collator object to be copied from
michael@0 1057 * @stable ICU 2.0
michael@0 1058 */
michael@0 1059 Collator(const Collator& other);
michael@0 1060
michael@0 1061 // Collator protected methods -----------------------------------------
michael@0 1062
michael@0 1063
michael@0 1064 /**
michael@0 1065 * Used internally by registraton to define the requested and valid locales.
michael@0 1066 * @param requestedLocale the requested locale
michael@0 1067 * @param validLocale the valid locale
michael@0 1068 * @param actualLocale the actual locale
michael@0 1069 * @internal
michael@0 1070 */
michael@0 1071 virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale);
michael@0 1072
michael@0 1073 public:
michael@0 1074 #if !UCONFIG_NO_SERVICE
michael@0 1075 #ifndef U_HIDE_INTERNAL_API
michael@0 1076 /**
michael@0 1077 * used only by ucol_open, not for public use
michael@0 1078 * @internal
michael@0 1079 */
michael@0 1080 static UCollator* createUCollator(const char* loc, UErrorCode* status);
michael@0 1081 #endif /* U_HIDE_INTERNAL_API */
michael@0 1082 #endif
michael@0 1083
michael@0 1084 /** Get the short definition string for a collator. This internal API harvests the collator's
michael@0 1085 * locale and the attribute set and produces a string that can be used for opening
michael@0 1086 * a collator with the same properties using the ucol_openFromShortString API.
michael@0 1087 * This string will be normalized.
michael@0 1088 * The structure and the syntax of the string is defined in the "Naming collators"
michael@0 1089 * section of the users guide:
michael@0 1090 * http://icu-project.org/userguide/Collate_Concepts.html#Naming_Collators
michael@0 1091 * This function supports preflighting.
michael@0 1092 *
michael@0 1093 * This is internal, and intended to be used with delegate converters.
michael@0 1094 *
michael@0 1095 * @param locale a locale that will appear as a collators locale in the resulting
michael@0 1096 * short string definition. If NULL, the locale will be harvested
michael@0 1097 * from the collator.
michael@0 1098 * @param buffer space to hold the resulting string
michael@0 1099 * @param capacity capacity of the buffer
michael@0 1100 * @param status for returning errors. All the preflighting errors are featured
michael@0 1101 * @return length of the resulting string
michael@0 1102 * @see ucol_openFromShortString
michael@0 1103 * @see ucol_normalizeShortDefinitionString
michael@0 1104 * @see ucol_getShortDefinitionString
michael@0 1105 * @internal
michael@0 1106 */
michael@0 1107 virtual int32_t internalGetShortDefinitionString(const char *locale,
michael@0 1108 char *buffer,
michael@0 1109 int32_t capacity,
michael@0 1110 UErrorCode &status) const;
michael@0 1111 private:
michael@0 1112 /**
michael@0 1113 * Assignment operator. Private for now.
michael@0 1114 * @internal
michael@0 1115 */
michael@0 1116 Collator& operator=(const Collator& other);
michael@0 1117
michael@0 1118 friend class CFactory;
michael@0 1119 friend class SimpleCFactory;
michael@0 1120 friend class ICUCollatorFactory;
michael@0 1121 friend class ICUCollatorService;
michael@0 1122 static Collator* makeInstance(const Locale& desiredLocale,
michael@0 1123 UErrorCode& status);
michael@0 1124
michael@0 1125 // Collator private data members ---------------------------------------
michael@0 1126
michael@0 1127 /*
michael@0 1128 synwee : removed as attributes to be handled by child class
michael@0 1129 UCollationStrength strength;
michael@0 1130 Normalizer::EMode decmp;
michael@0 1131 */
michael@0 1132 /* This is useless information */
michael@0 1133 /* static const UVersionInfo fVersion;*/
michael@0 1134 };
michael@0 1135
michael@0 1136 #if !UCONFIG_NO_SERVICE
michael@0 1137 /**
michael@0 1138 * A factory, used with registerFactory, the creates multiple collators and provides
michael@0 1139 * display names for them. A factory supports some number of locales-- these are the
michael@0 1140 * locales for which it can create collators. The factory can be visible, in which
michael@0 1141 * case the supported locales will be enumerated by getAvailableLocales, or invisible,
michael@0 1142 * in which they are not. Invisible locales are still supported, they are just not
michael@0 1143 * listed by getAvailableLocales.
michael@0 1144 * <p>
michael@0 1145 * If standard locale display names are sufficient, Collator instances can
michael@0 1146 * be registered using registerInstance instead.</p>
michael@0 1147 * <p>
michael@0 1148 * Note: if the collators are to be used from C APIs, they must be instances
michael@0 1149 * of RuleBasedCollator.</p>
michael@0 1150 *
michael@0 1151 * @stable ICU 2.6
michael@0 1152 */
michael@0 1153 class U_I18N_API CollatorFactory : public UObject {
michael@0 1154 public:
michael@0 1155
michael@0 1156 /**
michael@0 1157 * Destructor
michael@0 1158 * @stable ICU 3.0
michael@0 1159 */
michael@0 1160 virtual ~CollatorFactory();
michael@0 1161
michael@0 1162 /**
michael@0 1163 * Return true if this factory is visible. Default is true.
michael@0 1164 * If not visible, the locales supported by this factory will not
michael@0 1165 * be listed by getAvailableLocales.
michael@0 1166 * @return true if the factory is visible.
michael@0 1167 * @stable ICU 2.6
michael@0 1168 */
michael@0 1169 virtual UBool visible(void) const;
michael@0 1170
michael@0 1171 /**
michael@0 1172 * Return a collator for the provided locale. If the locale
michael@0 1173 * is not supported, return NULL.
michael@0 1174 * @param loc the locale identifying the collator to be created.
michael@0 1175 * @return a new collator if the locale is supported, otherwise NULL.
michael@0 1176 * @stable ICU 2.6
michael@0 1177 */
michael@0 1178 virtual Collator* createCollator(const Locale& loc) = 0;
michael@0 1179
michael@0 1180 /**
michael@0 1181 * Return the name of the collator for the objectLocale, localized for the displayLocale.
michael@0 1182 * If objectLocale is not supported, or the factory is not visible, set the result string
michael@0 1183 * to bogus.
michael@0 1184 * @param objectLocale the locale identifying the collator
michael@0 1185 * @param displayLocale the locale for which the display name of the collator should be localized
michael@0 1186 * @param result an output parameter for the display name, set to bogus if not supported.
michael@0 1187 * @return the display name
michael@0 1188 * @stable ICU 2.6
michael@0 1189 */
michael@0 1190 virtual UnicodeString& getDisplayName(const Locale& objectLocale,
michael@0 1191 const Locale& displayLocale,
michael@0 1192 UnicodeString& result);
michael@0 1193
michael@0 1194 /**
michael@0 1195 * Return an array of all the locale names directly supported by this factory.
michael@0 1196 * The number of names is returned in count. This array is owned by the factory.
michael@0 1197 * Its contents must never change.
michael@0 1198 * @param count output parameter for the number of locales supported by the factory
michael@0 1199 * @param status the in/out error code
michael@0 1200 * @return a pointer to an array of count UnicodeStrings.
michael@0 1201 * @stable ICU 2.6
michael@0 1202 */
michael@0 1203 virtual const UnicodeString * getSupportedIDs(int32_t &count, UErrorCode& status) = 0;
michael@0 1204 };
michael@0 1205 #endif /* UCONFIG_NO_SERVICE */
michael@0 1206
michael@0 1207 // Collator inline methods -----------------------------------------------
michael@0 1208
michael@0 1209 U_NAMESPACE_END
michael@0 1210
michael@0 1211 #endif /* #if !UCONFIG_NO_COLLATION */
michael@0 1212
michael@0 1213 #endif

mercurial