intl/icu/source/i18n/unicode/coll.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/i18n/unicode/coll.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,1213 @@
     1.4 +/*
     1.5 +******************************************************************************
     1.6 +*   Copyright (C) 1996-2012, International Business Machines                 *
     1.7 +*   Corporation and others.  All Rights Reserved.                            *
     1.8 +******************************************************************************
     1.9 +*/
    1.10 +
    1.11 +/**
    1.12 + * \file 
    1.13 + * \brief C++ API: Collation Service.
    1.14 + */
    1.15 + 
    1.16 +/**
    1.17 +* File coll.h
    1.18 +*
    1.19 +* Created by: Helena Shih
    1.20 +*
    1.21 +* Modification History:
    1.22 +*
    1.23 +*  Date        Name        Description
    1.24 +* 02/5/97      aliu        Modified createDefault to load collation data from
    1.25 +*                          binary files when possible.  Added related methods
    1.26 +*                          createCollationFromFile, chopLocale, createPathName.
    1.27 +* 02/11/97     aliu        Added members addToCache, findInCache, and fgCache.
    1.28 +* 02/12/97     aliu        Modified to create objects from RuleBasedCollator cache.
    1.29 +*                          Moved cache out of Collation class.
    1.30 +* 02/13/97     aliu        Moved several methods out of this class and into
    1.31 +*                          RuleBasedCollator, with modifications.  Modified
    1.32 +*                          createDefault() to call new RuleBasedCollator(Locale&)
    1.33 +*                          constructor.  General clean up and documentation.
    1.34 +* 02/20/97     helena      Added clone, operator==, operator!=, operator=, copy
    1.35 +*                          constructor and getDynamicClassID.
    1.36 +* 03/25/97     helena      Updated with platform independent data types.
    1.37 +* 05/06/97     helena      Added memory allocation error detection.
    1.38 +* 06/20/97     helena      Java class name change.
    1.39 +* 09/03/97     helena      Added createCollationKeyValues().
    1.40 +* 02/10/98     damiba      Added compare() with length as parameter.
    1.41 +* 04/23/99     stephen     Removed EDecompositionMode, merged with
    1.42 +*                          Normalizer::EMode.
    1.43 +* 11/02/99     helena      Collator performance enhancements.  Eliminates the
    1.44 +*                          UnicodeString construction and special case for NO_OP.
    1.45 +* 11/23/99     srl         More performance enhancements. Inlining of
    1.46 +*                          critical accessors.
    1.47 +* 05/15/00     helena      Added version information API.
    1.48 +* 01/29/01     synwee      Modified into a C++ wrapper which calls C apis
    1.49 +*                          (ucoll.h).
    1.50 +*/
    1.51 +
    1.52 +#ifndef COLL_H
    1.53 +#define COLL_H
    1.54 +
    1.55 +#include "unicode/utypes.h"
    1.56 +
    1.57 +#if !UCONFIG_NO_COLLATION
    1.58 +
    1.59 +#include "unicode/uobject.h"
    1.60 +#include "unicode/ucol.h"
    1.61 +#include "unicode/normlzr.h"
    1.62 +#include "unicode/locid.h"
    1.63 +#include "unicode/uniset.h"
    1.64 +#include "unicode/umisc.h"
    1.65 +#include "unicode/uiter.h"
    1.66 +#include "unicode/stringpiece.h"
    1.67 +
    1.68 +U_NAMESPACE_BEGIN
    1.69 +
    1.70 +class StringEnumeration;
    1.71 +
    1.72 +#if !UCONFIG_NO_SERVICE
    1.73 +/**
    1.74 + * @stable ICU 2.6
    1.75 + */
    1.76 +class CollatorFactory;
    1.77 +#endif
    1.78 +
    1.79 +/**
    1.80 +* @stable ICU 2.0
    1.81 +*/
    1.82 +class CollationKey;
    1.83 +
    1.84 +/**
    1.85 +* The <code>Collator</code> class performs locale-sensitive string
    1.86 +* comparison.<br>
    1.87 +* You use this class to build searching and sorting routines for natural
    1.88 +* language text.<br>
    1.89 +* <em>Important: </em>The ICU collation service has been reimplemented
    1.90 +* in order to achieve better performance and UCA compliance.
    1.91 +* For details, see the
    1.92 +* <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm">
    1.93 +* collation design document</a>.
    1.94 +* <p>
    1.95 +* <code>Collator</code> is an abstract base class. Subclasses implement
    1.96 +* specific collation strategies. One subclass,
    1.97 +* <code>RuleBasedCollator</code>, is currently provided and is applicable
    1.98 +* to a wide set of languages. Other subclasses may be created to handle more
    1.99 +* specialized needs.
   1.100 +* <p>
   1.101 +* Like other locale-sensitive classes, you can use the static factory method,
   1.102 +* <code>createInstance</code>, to obtain the appropriate
   1.103 +* <code>Collator</code> object for a given locale. You will only need to
   1.104 +* look at the subclasses of <code>Collator</code> if you need to
   1.105 +* understand the details of a particular collation strategy or if you need to
   1.106 +* modify that strategy.
   1.107 +* <p>
   1.108 +* The following example shows how to compare two strings using the
   1.109 +* <code>Collator</code> for the default locale.
   1.110 +* \htmlonly<blockquote>\endhtmlonly
   1.111 +* <pre>
   1.112 +* \code
   1.113 +* // Compare two strings in the default locale
   1.114 +* UErrorCode success = U_ZERO_ERROR;
   1.115 +* Collator* myCollator = Collator::createInstance(success);
   1.116 +* if (myCollator->compare("abc", "ABC") < 0)
   1.117 +*   cout << "abc is less than ABC" << endl;
   1.118 +* else
   1.119 +*   cout << "abc is greater than or equal to ABC" << endl;
   1.120 +* \endcode
   1.121 +* </pre>
   1.122 +* \htmlonly</blockquote>\endhtmlonly
   1.123 +* <p>
   1.124 +* You can set a <code>Collator</code>'s <em>strength</em> property to
   1.125 +* determine the level of difference considered significant in comparisons.
   1.126 +* Five strengths are provided: <code>PRIMARY</code>, <code>SECONDARY</code>,
   1.127 +* <code>TERTIARY</code>, <code>QUATERNARY</code> and <code>IDENTICAL</code>.
   1.128 +* The exact assignment of strengths to language features is locale dependant.
   1.129 +* For example, in Czech, "e" and "f" are considered primary differences,
   1.130 +* while "e" and "\u00EA" are secondary differences, "e" and "E" are tertiary
   1.131 +* differences and "e" and "e" are identical. The following shows how both case
   1.132 +* and accents could be ignored for US English.
   1.133 +* \htmlonly<blockquote>\endhtmlonly
   1.134 +* <pre>
   1.135 +* \code
   1.136 +* //Get the Collator for US English and set its strength to PRIMARY
   1.137 +* UErrorCode success = U_ZERO_ERROR;
   1.138 +* Collator* usCollator = Collator::createInstance(Locale::US, success);
   1.139 +* usCollator->setStrength(Collator::PRIMARY);
   1.140 +* if (usCollator->compare("abc", "ABC") == 0)
   1.141 +*     cout << "'abc' and 'ABC' strings are equivalent with strength PRIMARY" << endl;
   1.142 +* \endcode
   1.143 +* </pre>
   1.144 +* \htmlonly</blockquote>\endhtmlonly
   1.145 +* <p>
   1.146 +* For comparing strings exactly once, the <code>compare</code> method
   1.147 +* provides the best performance. When sorting a list of strings however, it
   1.148 +* is generally necessary to compare each string multiple times. In this case,
   1.149 +* sort keys provide better performance. The <code>getSortKey</code> methods
   1.150 +* convert a string to a series of bytes that can be compared bitwise against
   1.151 +* other sort keys using <code>strcmp()</code>. Sort keys are written as
   1.152 +* zero-terminated byte strings. They consist of several substrings, one for
   1.153 +* each collation strength level, that are delimited by 0x01 bytes.
   1.154 +* If the string code points are appended for UCOL_IDENTICAL, then they are
   1.155 +* processed for correct code point order comparison and may contain 0x01
   1.156 +* bytes but not zero bytes.
   1.157 +* </p>
   1.158 +* <p>
   1.159 +* An older set of APIs returns a <code>CollationKey</code> object that wraps
   1.160 +* the sort key bytes instead of returning the bytes themselves.
   1.161 +* Its use is deprecated, but it is still available for compatibility with
   1.162 +* Java.
   1.163 +* </p>
   1.164 +* <p>
   1.165 +* <strong>Note:</strong> <code>Collator</code>s with different Locale,
   1.166 +* and CollationStrength settings will return different sort
   1.167 +* orders for the same set of strings. Locales have specific collation rules,
   1.168 +* and the way in which secondary and tertiary differences are taken into
   1.169 +* account, for example, will result in a different sorting order for same
   1.170 +* strings.
   1.171 +* </p>
   1.172 +* @see         RuleBasedCollator
   1.173 +* @see         CollationKey
   1.174 +* @see         CollationElementIterator
   1.175 +* @see         Locale
   1.176 +* @see         Normalizer
   1.177 +* @version     2.0 11/15/01
   1.178 +*/
   1.179 +
   1.180 +class U_I18N_API Collator : public UObject {
   1.181 +public:
   1.182 +
   1.183 +    // Collator public enums -----------------------------------------------
   1.184 +
   1.185 +    /**
   1.186 +     * Base letter represents a primary difference. Set comparison level to
   1.187 +     * PRIMARY to ignore secondary and tertiary differences.<br>
   1.188 +     * Use this to set the strength of a Collator object.<br>
   1.189 +     * Example of primary difference, "abc" &lt; "abd"
   1.190 +     *
   1.191 +     * Diacritical differences on the same base letter represent a secondary
   1.192 +     * difference. Set comparison level to SECONDARY to ignore tertiary
   1.193 +     * differences. Use this to set the strength of a Collator object.<br>
   1.194 +     * Example of secondary difference, "&auml;" >> "a".
   1.195 +     *
   1.196 +     * Uppercase and lowercase versions of the same character represents a
   1.197 +     * tertiary difference.  Set comparison level to TERTIARY to include all
   1.198 +     * comparison differences. Use this to set the strength of a Collator
   1.199 +     * object.<br>
   1.200 +     * Example of tertiary difference, "abc" &lt;&lt;&lt; "ABC".
   1.201 +     *
   1.202 +     * Two characters are considered "identical" when they have the same unicode
   1.203 +     * spellings.<br>
   1.204 +     * For example, "&auml;" == "&auml;".
   1.205 +     *
   1.206 +     * UCollationStrength is also used to determine the strength of sort keys
   1.207 +     * generated from Collator objects.
   1.208 +     * @stable ICU 2.0
   1.209 +     */
   1.210 +    enum ECollationStrength
   1.211 +    {
   1.212 +        PRIMARY    = UCOL_PRIMARY,  // 0
   1.213 +        SECONDARY  = UCOL_SECONDARY,  // 1
   1.214 +        TERTIARY   = UCOL_TERTIARY,  // 2
   1.215 +        QUATERNARY = UCOL_QUATERNARY,  // 3
   1.216 +        IDENTICAL  = UCOL_IDENTICAL  // 15
   1.217 +    };
   1.218 +
   1.219 +    /**
   1.220 +     * LESS is returned if source string is compared to be less than target
   1.221 +     * string in the compare() method.
   1.222 +     * EQUAL is returned if source string is compared to be equal to target
   1.223 +     * string in the compare() method.
   1.224 +     * GREATER is returned if source string is compared to be greater than
   1.225 +     * target string in the compare() method.
   1.226 +     * @see Collator#compare
   1.227 +     * @deprecated ICU 2.6. Use C enum UCollationResult defined in ucol.h
   1.228 +     */
   1.229 +    enum EComparisonResult
   1.230 +    {
   1.231 +        LESS = UCOL_LESS,  // -1
   1.232 +        EQUAL = UCOL_EQUAL,  // 0
   1.233 +        GREATER = UCOL_GREATER  // 1
   1.234 +    };
   1.235 +
   1.236 +    // Collator public destructor -----------------------------------------
   1.237 +
   1.238 +    /**
   1.239 +     * Destructor
   1.240 +     * @stable ICU 2.0
   1.241 +     */
   1.242 +    virtual ~Collator();
   1.243 +
   1.244 +    // Collator public methods --------------------------------------------
   1.245 +
   1.246 +    /**
   1.247 +     * Returns TRUE if "other" is the same as "this".
   1.248 +     *
   1.249 +     * The base class implementation returns TRUE if "other" has the same type/class as "this":
   1.250 +     * <code>typeid(*this) == typeid(other)</code>.
   1.251 +     *
   1.252 +     * Subclass implementations should do something like the following:
   1.253 +     * <pre>
   1.254 +     *   if (this == &other) { return TRUE; }
   1.255 +     *   if (!Collator::operator==(other)) { return FALSE; }  // not the same class
   1.256 +     *
   1.257 +     *   const MyCollator &o = (const MyCollator&)other;
   1.258 +     *   (compare this vs. o's subclass fields)
   1.259 +     * </pre>
   1.260 +     * @param other Collator object to be compared
   1.261 +     * @return TRUE if other is the same as this.
   1.262 +     * @stable ICU 2.0
   1.263 +     */
   1.264 +    virtual UBool operator==(const Collator& other) const;
   1.265 +
   1.266 +    /**
   1.267 +     * Returns true if "other" is not the same as "this".
   1.268 +     * Calls ! operator==(const Collator&) const which works for all subclasses.
   1.269 +     * @param other Collator object to be compared
   1.270 +     * @return TRUE if other is not the same as this.
   1.271 +     * @stable ICU 2.0
   1.272 +     */
   1.273 +    virtual UBool operator!=(const Collator& other) const;
   1.274 +
   1.275 +    /**
   1.276 +     * Makes a copy of this object.
   1.277 +     * @return a copy of this object, owned by the caller
   1.278 +     * @stable ICU 2.0
   1.279 +     */
   1.280 +    virtual Collator* clone(void) const = 0;
   1.281 +
   1.282 +    /**
   1.283 +     * Creates the Collator object for the current default locale.
   1.284 +     * The default locale is determined by Locale::getDefault.
   1.285 +     * The UErrorCode& err parameter is used to return status information to the user.
   1.286 +     * To check whether the construction succeeded or not, you should check the
   1.287 +     * value of U_SUCCESS(err).  If you wish more detailed information, you can
   1.288 +     * check for informational error results which still indicate success.
   1.289 +     * U_USING_FALLBACK_ERROR indicates that a fall back locale was used. For
   1.290 +     * example, 'de_CH' was requested, but nothing was found there, so 'de' was
   1.291 +     * used. U_USING_DEFAULT_ERROR indicates that the default locale data was
   1.292 +     * used; neither the requested locale nor any of its fall back locales
   1.293 +     * could be found.
   1.294 +     * The caller owns the returned object and is responsible for deleting it.
   1.295 +     *
   1.296 +     * @param err    the error code status.
   1.297 +     * @return       the collation object of the default locale.(for example, en_US)
   1.298 +     * @see Locale#getDefault
   1.299 +     * @stable ICU 2.0
   1.300 +     */
   1.301 +    static Collator* U_EXPORT2 createInstance(UErrorCode&  err);
   1.302 +
   1.303 +    /**
   1.304 +     * Gets the table-based collation object for the desired locale. The
   1.305 +     * resource of the desired locale will be loaded by ResourceLoader.
   1.306 +     * Locale::ENGLISH is the base collation table and all other languages are
   1.307 +     * built on top of it with additional language-specific modifications.
   1.308 +     * The UErrorCode& err parameter is used to return status information to the user.
   1.309 +     * To check whether the construction succeeded or not, you should check
   1.310 +     * the value of U_SUCCESS(err).  If you wish more detailed information, you
   1.311 +     * can check for informational error results which still indicate success.
   1.312 +     * U_USING_FALLBACK_ERROR indicates that a fall back locale was used.  For
   1.313 +     * example, 'de_CH' was requested, but nothing was found there, so 'de' was
   1.314 +     * used.  U_USING_DEFAULT_ERROR indicates that the default locale data was
   1.315 +     * used; neither the requested locale nor any of its fall back locales
   1.316 +     * could be found.
   1.317 +     * The caller owns the returned object and is responsible for deleting it.
   1.318 +     * @param loc    The locale ID for which to open a collator.
   1.319 +     * @param err    the error code status.
   1.320 +     * @return       the created table-based collation object based on the desired
   1.321 +     *               locale.
   1.322 +     * @see Locale
   1.323 +     * @see ResourceLoader
   1.324 +     * @stable ICU 2.2
   1.325 +     */
   1.326 +    static Collator* U_EXPORT2 createInstance(const Locale& loc, UErrorCode& err);
   1.327 +
   1.328 +#ifdef U_USE_COLLATION_OBSOLETE_2_6
   1.329 +    /**
   1.330 +     * Create a Collator with a specific version.
   1.331 +     * This is the same as createInstance(loc, err) except that getVersion() of
   1.332 +     * the returned object is guaranteed to be the same as the version
   1.333 +     * parameter.
   1.334 +     * This is designed to be used to open the same collator for a given
   1.335 +     * locale even when ICU is updated.
   1.336 +     * The same locale and version guarantees the same sort keys and
   1.337 +     * comparison results.
   1.338 +     * <p>
   1.339 +     * Note: this API will be removed in a future release.  Use
   1.340 +     * <tt>createInstance(const Locale&, UErrorCode&) instead.</tt></p>
   1.341 +     *
   1.342 +     * @param loc The locale ID for which to open a collator.
   1.343 +     * @param version The requested collator version.
   1.344 +     * @param err A reference to a UErrorCode,
   1.345 +     *            must not indicate a failure before calling this function.
   1.346 +     * @return A pointer to a Collator, or 0 if an error occurred
   1.347 +     *         or a collator with the requested version is not available.
   1.348 +     *
   1.349 +     * @see getVersion
   1.350 +     * @obsolete ICU 2.6
   1.351 +     */
   1.352 +    static Collator *createInstance(const Locale &loc, UVersionInfo version, UErrorCode &err);
   1.353 +#endif
   1.354 +
   1.355 +    /**
   1.356 +     * The comparison function compares the character data stored in two
   1.357 +     * different strings. Returns information about whether a string is less
   1.358 +     * than, greater than or equal to another string.
   1.359 +     * @param source the source string to be compared with.
   1.360 +     * @param target the string that is to be compared with the source string.
   1.361 +     * @return Returns a byte value. GREATER if source is greater
   1.362 +     * than target; EQUAL if source is equal to target; LESS if source is less
   1.363 +     * than target
   1.364 +     * @deprecated ICU 2.6 use the overload with UErrorCode &
   1.365 +     */
   1.366 +    virtual EComparisonResult compare(const UnicodeString& source,
   1.367 +                                      const UnicodeString& target) const;
   1.368 +
   1.369 +    /**
   1.370 +     * The comparison function compares the character data stored in two
   1.371 +     * different strings. Returns information about whether a string is less
   1.372 +     * than, greater than or equal to another string.
   1.373 +     * @param source the source string to be compared with.
   1.374 +     * @param target the string that is to be compared with the source string.
   1.375 +     * @param status possible error code
   1.376 +     * @return Returns an enum value. UCOL_GREATER if source is greater
   1.377 +     * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
   1.378 +     * than target
   1.379 +     * @stable ICU 2.6
   1.380 +     */
   1.381 +    virtual UCollationResult compare(const UnicodeString& source,
   1.382 +                                      const UnicodeString& target,
   1.383 +                                      UErrorCode &status) const = 0;
   1.384 +
   1.385 +    /**
   1.386 +     * Does the same thing as compare but limits the comparison to a specified
   1.387 +     * length
   1.388 +     * @param source the source string to be compared with.
   1.389 +     * @param target the string that is to be compared with the source string.
   1.390 +     * @param length the length the comparison is limited to
   1.391 +     * @return Returns a byte value. GREATER if source (up to the specified
   1.392 +     *         length) is greater than target; EQUAL if source (up to specified
   1.393 +     *         length) is equal to target; LESS if source (up to the specified
   1.394 +     *         length) is less  than target.
   1.395 +     * @deprecated ICU 2.6 use the overload with UErrorCode &
   1.396 +     */
   1.397 +    virtual EComparisonResult compare(const UnicodeString& source,
   1.398 +                                      const UnicodeString& target,
   1.399 +                                      int32_t length) const;
   1.400 +
   1.401 +    /**
   1.402 +     * Does the same thing as compare but limits the comparison to a specified
   1.403 +     * length
   1.404 +     * @param source the source string to be compared with.
   1.405 +     * @param target the string that is to be compared with the source string.
   1.406 +     * @param length the length the comparison is limited to
   1.407 +     * @param status possible error code
   1.408 +     * @return Returns an enum value. UCOL_GREATER if source (up to the specified
   1.409 +     *         length) is greater than target; UCOL_EQUAL if source (up to specified
   1.410 +     *         length) is equal to target; UCOL_LESS if source (up to the specified
   1.411 +     *         length) is less  than target.
   1.412 +     * @stable ICU 2.6
   1.413 +     */
   1.414 +    virtual UCollationResult compare(const UnicodeString& source,
   1.415 +                                      const UnicodeString& target,
   1.416 +                                      int32_t length,
   1.417 +                                      UErrorCode &status) const = 0;
   1.418 +
   1.419 +    /**
   1.420 +     * The comparison function compares the character data stored in two
   1.421 +     * different string arrays. Returns information about whether a string array
   1.422 +     * is less than, greater than or equal to another string array.
   1.423 +     * <p>Example of use:
   1.424 +     * <pre>
   1.425 +     * .       UChar ABC[] = {0x41, 0x42, 0x43, 0};  // = "ABC"
   1.426 +     * .       UChar abc[] = {0x61, 0x62, 0x63, 0};  // = "abc"
   1.427 +     * .       UErrorCode status = U_ZERO_ERROR;
   1.428 +     * .       Collator *myCollation =
   1.429 +     * .                         Collator::createInstance(Locale::US, status);
   1.430 +     * .       if (U_FAILURE(status)) return;
   1.431 +     * .       myCollation->setStrength(Collator::PRIMARY);
   1.432 +     * .       // result would be Collator::EQUAL ("abc" == "ABC")
   1.433 +     * .       // (no primary difference between "abc" and "ABC")
   1.434 +     * .       Collator::EComparisonResult result =
   1.435 +     * .                             myCollation->compare(abc, 3, ABC, 3);
   1.436 +     * .       myCollation->setStrength(Collator::TERTIARY);
   1.437 +     * .       // result would be Collator::LESS ("abc" &lt;&lt;&lt; "ABC")
   1.438 +     * .       // (with tertiary difference between "abc" and "ABC")
   1.439 +     * .       result = myCollation->compare(abc, 3, ABC, 3);
   1.440 +     * </pre>
   1.441 +     * @param source the source string array to be compared with.
   1.442 +     * @param sourceLength the length of the source string array.  If this value
   1.443 +     *        is equal to -1, the string array is null-terminated.
   1.444 +     * @param target the string that is to be compared with the source string.
   1.445 +     * @param targetLength the length of the target string array.  If this value
   1.446 +     *        is equal to -1, the string array is null-terminated.
   1.447 +     * @return Returns a byte value. GREATER if source is greater than target;
   1.448 +     *         EQUAL if source is equal to target; LESS if source is less than
   1.449 +     *         target
   1.450 +     * @deprecated ICU 2.6 use the overload with UErrorCode &
   1.451 +     */
   1.452 +    virtual EComparisonResult compare(const UChar* source, int32_t sourceLength,
   1.453 +                                      const UChar* target, int32_t targetLength)
   1.454 +                                      const;
   1.455 +
   1.456 +    /**
   1.457 +     * The comparison function compares the character data stored in two
   1.458 +     * different string arrays. Returns information about whether a string array
   1.459 +     * is less than, greater than or equal to another string array.
   1.460 +     * @param source the source string array to be compared with.
   1.461 +     * @param sourceLength the length of the source string array.  If this value
   1.462 +     *        is equal to -1, the string array is null-terminated.
   1.463 +     * @param target the string that is to be compared with the source string.
   1.464 +     * @param targetLength the length of the target string array.  If this value
   1.465 +     *        is equal to -1, the string array is null-terminated.
   1.466 +     * @param status possible error code
   1.467 +     * @return Returns an enum value. UCOL_GREATER if source is greater
   1.468 +     * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
   1.469 +     * than target
   1.470 +     * @stable ICU 2.6
   1.471 +     */
   1.472 +    virtual UCollationResult compare(const UChar* source, int32_t sourceLength,
   1.473 +                                      const UChar* target, int32_t targetLength,
   1.474 +                                      UErrorCode &status) const = 0;
   1.475 +
   1.476 +    /**
   1.477 +     * Compares two strings using the Collator.
   1.478 +     * Returns whether the first one compares less than/equal to/greater than
   1.479 +     * the second one.
   1.480 +     * This version takes UCharIterator input.
   1.481 +     * @param sIter the first ("source") string iterator
   1.482 +     * @param tIter the second ("target") string iterator
   1.483 +     * @param status ICU status
   1.484 +     * @return UCOL_LESS, UCOL_EQUAL or UCOL_GREATER
   1.485 +     * @stable ICU 4.2
   1.486 +     */
   1.487 +    virtual UCollationResult compare(UCharIterator &sIter,
   1.488 +                                     UCharIterator &tIter,
   1.489 +                                     UErrorCode &status) const;
   1.490 +
   1.491 +    /**
   1.492 +     * Compares two UTF-8 strings using the Collator.
   1.493 +     * Returns whether the first one compares less than/equal to/greater than
   1.494 +     * the second one.
   1.495 +     * This version takes UTF-8 input.
   1.496 +     * Note that a StringPiece can be implicitly constructed
   1.497 +     * from a std::string or a NUL-terminated const char * string.
   1.498 +     * @param source the first UTF-8 string
   1.499 +     * @param target the second UTF-8 string
   1.500 +     * @param status ICU status
   1.501 +     * @return UCOL_LESS, UCOL_EQUAL or UCOL_GREATER
   1.502 +     * @stable ICU 4.2
   1.503 +     */
   1.504 +    virtual UCollationResult compareUTF8(const StringPiece &source,
   1.505 +                                         const StringPiece &target,
   1.506 +                                         UErrorCode &status) const;
   1.507 +
   1.508 +    /**
   1.509 +     * Transforms the string into a series of characters that can be compared
   1.510 +     * with CollationKey::compareTo. It is not possible to restore the original
   1.511 +     * string from the chars in the sort key.  The generated sort key handles
   1.512 +     * only a limited number of ignorable characters.
   1.513 +     * <p>Use CollationKey::equals or CollationKey::compare to compare the
   1.514 +     * generated sort keys.
   1.515 +     * If the source string is null, a null collation key will be returned.
   1.516 +     * @param source the source string to be transformed into a sort key.
   1.517 +     * @param key the collation key to be filled in
   1.518 +     * @param status the error code status.
   1.519 +     * @return the collation key of the string based on the collation rules.
   1.520 +     * @see CollationKey#compare
   1.521 +     * @stable ICU 2.0
   1.522 +     */
   1.523 +    virtual CollationKey& getCollationKey(const UnicodeString&  source,
   1.524 +                                          CollationKey& key,
   1.525 +                                          UErrorCode& status) const = 0;
   1.526 +
   1.527 +    /**
   1.528 +     * Transforms the string into a series of characters that can be compared
   1.529 +     * with CollationKey::compareTo. It is not possible to restore the original
   1.530 +     * string from the chars in the sort key.  The generated sort key handles
   1.531 +     * only a limited number of ignorable characters.
   1.532 +     * <p>Use CollationKey::equals or CollationKey::compare to compare the
   1.533 +     * generated sort keys.
   1.534 +     * <p>If the source string is null, a null collation key will be returned.
   1.535 +     * @param source the source string to be transformed into a sort key.
   1.536 +     * @param sourceLength length of the collation key
   1.537 +     * @param key the collation key to be filled in
   1.538 +     * @param status the error code status.
   1.539 +     * @return the collation key of the string based on the collation rules.
   1.540 +     * @see CollationKey#compare
   1.541 +     * @stable ICU 2.0
   1.542 +     */
   1.543 +    virtual CollationKey& getCollationKey(const UChar*source,
   1.544 +                                          int32_t sourceLength,
   1.545 +                                          CollationKey& key,
   1.546 +                                          UErrorCode& status) const = 0;
   1.547 +    /**
   1.548 +     * Generates the hash code for the collation object
   1.549 +     * @stable ICU 2.0
   1.550 +     */
   1.551 +    virtual int32_t hashCode(void) const = 0;
   1.552 +
   1.553 +    /**
   1.554 +     * Gets the locale of the Collator
   1.555 +     *
   1.556 +     * @param type can be either requested, valid or actual locale. For more
   1.557 +     *             information see the definition of ULocDataLocaleType in
   1.558 +     *             uloc.h
   1.559 +     * @param status the error code status.
   1.560 +     * @return locale where the collation data lives. If the collator
   1.561 +     *         was instantiated from rules, locale is empty.
   1.562 +     * @deprecated ICU 2.8 This API is under consideration for revision
   1.563 +     * in ICU 3.0.
   1.564 +     */
   1.565 +    virtual Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const = 0;
   1.566 +
   1.567 +    /**
   1.568 +     * Convenience method for comparing two strings based on the collation rules.
   1.569 +     * @param source the source string to be compared with.
   1.570 +     * @param target the target string to be compared with.
   1.571 +     * @return true if the first string is greater than the second one,
   1.572 +     *         according to the collation rules. false, otherwise.
   1.573 +     * @see Collator#compare
   1.574 +     * @stable ICU 2.0
   1.575 +     */
   1.576 +    UBool greater(const UnicodeString& source, const UnicodeString& target)
   1.577 +                  const;
   1.578 +
   1.579 +    /**
   1.580 +     * Convenience method for comparing two strings based on the collation rules.
   1.581 +     * @param source the source string to be compared with.
   1.582 +     * @param target the target string to be compared with.
   1.583 +     * @return true if the first string is greater than or equal to the second
   1.584 +     *         one, according to the collation rules. false, otherwise.
   1.585 +     * @see Collator#compare
   1.586 +     * @stable ICU 2.0
   1.587 +     */
   1.588 +    UBool greaterOrEqual(const UnicodeString& source,
   1.589 +                         const UnicodeString& target) const;
   1.590 +
   1.591 +    /**
   1.592 +     * Convenience method for comparing two strings based on the collation rules.
   1.593 +     * @param source the source string to be compared with.
   1.594 +     * @param target the target string to be compared with.
   1.595 +     * @return true if the strings are equal according to the collation rules.
   1.596 +     *         false, otherwise.
   1.597 +     * @see Collator#compare
   1.598 +     * @stable ICU 2.0
   1.599 +     */
   1.600 +    UBool equals(const UnicodeString& source, const UnicodeString& target) const;
   1.601 +
   1.602 +    /**
   1.603 +     * Determines the minimum strength that will be used in comparison or
   1.604 +     * transformation.
   1.605 +     * <p>E.g. with strength == SECONDARY, the tertiary difference is ignored
   1.606 +     * <p>E.g. with strength == PRIMARY, the secondary and tertiary difference
   1.607 +     * are ignored.
   1.608 +     * @return the current comparison level.
   1.609 +     * @see Collator#setStrength
   1.610 +     * @deprecated ICU 2.6 Use getAttribute(UCOL_STRENGTH...) instead
   1.611 +     */
   1.612 +    virtual ECollationStrength getStrength(void) const;
   1.613 +
   1.614 +    /**
   1.615 +     * Sets the minimum strength to be used in comparison or transformation.
   1.616 +     * <p>Example of use:
   1.617 +     * <pre>
   1.618 +     *  \code
   1.619 +     *  UErrorCode status = U_ZERO_ERROR;
   1.620 +     *  Collator*myCollation = Collator::createInstance(Locale::US, status);
   1.621 +     *  if (U_FAILURE(status)) return;
   1.622 +     *  myCollation->setStrength(Collator::PRIMARY);
   1.623 +     *  // result will be "abc" == "ABC"
   1.624 +     *  // tertiary differences will be ignored
   1.625 +     *  Collator::ComparisonResult result = myCollation->compare("abc", "ABC");
   1.626 +     * \endcode
   1.627 +     * </pre>
   1.628 +     * @see Collator#getStrength
   1.629 +     * @param newStrength the new comparison level.
   1.630 +     * @deprecated ICU 2.6 Use setAttribute(UCOL_STRENGTH...) instead
   1.631 +     */
   1.632 +    virtual void setStrength(ECollationStrength newStrength);
   1.633 +
   1.634 +    /**
   1.635 +     * Retrieves the reordering codes for this collator.
   1.636 +     * @param dest The array to fill with the script ordering.
   1.637 +     * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function
   1.638 +     *  will only return the length of the result without writing any of the result string (pre-flighting).
   1.639 +     * @param status A reference to an error code value, which must not indicate
   1.640 +     * a failure before the function call.
   1.641 +     * @return The length of the script ordering array.
   1.642 +     * @see ucol_setReorderCodes
   1.643 +     * @see Collator#getEquivalentReorderCodes
   1.644 +     * @see Collator#setReorderCodes
   1.645 +     * @see UScriptCode
   1.646 +     * @see UColReorderCode
   1.647 +     * @stable ICU 4.8 
   1.648 +     */
   1.649 +     virtual int32_t getReorderCodes(int32_t *dest,
   1.650 +                                     int32_t destCapacity,
   1.651 +                                     UErrorCode& status) const;
   1.652 +
   1.653 +    /**
   1.654 +     * Sets the ordering of scripts for this collator.
   1.655 +     *
   1.656 +     * <p>The reordering codes are a combination of script codes and reorder codes.
   1.657 +     * @param reorderCodes An array of script codes in the new order. This can be NULL if the 
   1.658 +     * length is also set to 0. An empty array will clear any reordering codes on the collator.
   1.659 +     * @param reorderCodesLength The length of reorderCodes.
   1.660 +     * @param status error code
   1.661 +     * @see Collator#getReorderCodes
   1.662 +     * @see Collator#getEquivalentReorderCodes
   1.663 +     * @see UScriptCode
   1.664 +     * @see UColReorderCode
   1.665 +     * @stable ICU 4.8 
   1.666 +     */
   1.667 +     virtual void setReorderCodes(const int32_t* reorderCodes,
   1.668 +                                  int32_t reorderCodesLength,
   1.669 +                                  UErrorCode& status) ;
   1.670 +
   1.671 +    /**
   1.672 +     * Retrieves the reorder codes that are grouped with the given reorder code. Some reorder
   1.673 +     * codes will be grouped and must reorder together.
   1.674 +     * @param reorderCode The reorder code to determine equivalence for. 
   1.675 +     * @param dest The array to fill with the script equivalene reordering codes.
   1.676 +     * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the 
   1.677 +     * function will only return the length of the result without writing any of the result 
   1.678 +     * string (pre-flighting).
   1.679 +     * @param status A reference to an error code value, which must not indicate 
   1.680 +     * a failure before the function call.
   1.681 +     * @return The length of the of the reordering code equivalence array.
   1.682 +     * @see ucol_setReorderCodes
   1.683 +     * @see Collator#getReorderCodes
   1.684 +     * @see Collator#setReorderCodes
   1.685 +     * @see UScriptCode
   1.686 +     * @see UColReorderCode
   1.687 +     * @stable ICU 4.8 
   1.688 +     */
   1.689 +    static int32_t U_EXPORT2 getEquivalentReorderCodes(int32_t reorderCode,
   1.690 +                                int32_t* dest,
   1.691 +                                int32_t destCapacity,
   1.692 +                                UErrorCode& status);
   1.693 +
   1.694 +    /**
   1.695 +     * Get name of the object for the desired Locale, in the desired langauge
   1.696 +     * @param objectLocale must be from getAvailableLocales
   1.697 +     * @param displayLocale specifies the desired locale for output
   1.698 +     * @param name the fill-in parameter of the return value
   1.699 +     * @return display-able name of the object for the object locale in the
   1.700 +     *         desired language
   1.701 +     * @stable ICU 2.0
   1.702 +     */
   1.703 +    static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
   1.704 +                                         const Locale& displayLocale,
   1.705 +                                         UnicodeString& name);
   1.706 +
   1.707 +    /**
   1.708 +    * Get name of the object for the desired Locale, in the langauge of the
   1.709 +    * default locale.
   1.710 +    * @param objectLocale must be from getAvailableLocales
   1.711 +    * @param name the fill-in parameter of the return value
   1.712 +    * @return name of the object for the desired locale in the default language
   1.713 +    * @stable ICU 2.0
   1.714 +    */
   1.715 +    static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
   1.716 +                                         UnicodeString& name);
   1.717 +
   1.718 +    /**
   1.719 +     * Get the set of Locales for which Collations are installed.
   1.720 +     *
   1.721 +     * <p>Note this does not include locales supported by registered collators.
   1.722 +     * If collators might have been registered, use the overload of getAvailableLocales
   1.723 +     * that returns a StringEnumeration.</p>
   1.724 +     *
   1.725 +     * @param count the output parameter of number of elements in the locale list
   1.726 +     * @return the list of available locales for which collations are installed
   1.727 +     * @stable ICU 2.0
   1.728 +     */
   1.729 +    static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count);
   1.730 +
   1.731 +    /**
   1.732 +     * Return a StringEnumeration over the locales available at the time of the call,
   1.733 +     * including registered locales.  If a severe error occurs (such as out of memory
   1.734 +     * condition) this will return null. If there is no locale data, an empty enumeration
   1.735 +     * will be returned.
   1.736 +     * @return a StringEnumeration over the locales available at the time of the call
   1.737 +     * @stable ICU 2.6
   1.738 +     */
   1.739 +    static StringEnumeration* U_EXPORT2 getAvailableLocales(void);
   1.740 +
   1.741 +    /**
   1.742 +     * Create a string enumerator of all possible keywords that are relevant to
   1.743 +     * collation. At this point, the only recognized keyword for this
   1.744 +     * service is "collation".
   1.745 +     * @param status input-output error code
   1.746 +     * @return a string enumeration over locale strings. The caller is
   1.747 +     * responsible for closing the result.
   1.748 +     * @stable ICU 3.0
   1.749 +     */
   1.750 +    static StringEnumeration* U_EXPORT2 getKeywords(UErrorCode& status);
   1.751 +
   1.752 +    /**
   1.753 +     * Given a keyword, create a string enumeration of all values
   1.754 +     * for that keyword that are currently in use.
   1.755 +     * @param keyword a particular keyword as enumerated by
   1.756 +     * ucol_getKeywords. If any other keyword is passed in, status is set
   1.757 +     * to U_ILLEGAL_ARGUMENT_ERROR.
   1.758 +     * @param status input-output error code
   1.759 +     * @return a string enumeration over collation keyword values, or NULL
   1.760 +     * upon error. The caller is responsible for deleting the result.
   1.761 +     * @stable ICU 3.0
   1.762 +     */
   1.763 +    static StringEnumeration* U_EXPORT2 getKeywordValues(const char *keyword, UErrorCode& status);
   1.764 +
   1.765 +    /**
   1.766 +     * Given a key and a locale, returns an array of string values in a preferred
   1.767 +     * order that would make a difference. These are all and only those values where
   1.768 +     * the open (creation) of the service with the locale formed from the input locale
   1.769 +     * plus input keyword and that value has different behavior than creation with the
   1.770 +     * input locale alone.
   1.771 +     * @param keyword        one of the keys supported by this service.  For now, only
   1.772 +     *                      "collation" is supported.
   1.773 +     * @param locale        the locale
   1.774 +     * @param commonlyUsed  if set to true it will return only commonly used values
   1.775 +     *                      with the given locale in preferred order.  Otherwise,
   1.776 +     *                      it will return all the available values for the locale.
   1.777 +     * @param status ICU status
   1.778 +     * @return a string enumeration over keyword values for the given key and the locale.
   1.779 +     * @stable ICU 4.2
   1.780 +     */
   1.781 +    static StringEnumeration* U_EXPORT2 getKeywordValuesForLocale(const char* keyword, const Locale& locale,
   1.782 +                                                                    UBool commonlyUsed, UErrorCode& status);
   1.783 +
   1.784 +    /**
   1.785 +     * Return the functionally equivalent locale for the given
   1.786 +     * requested locale, with respect to given keyword, for the
   1.787 +     * collation service.  If two locales return the same result, then
   1.788 +     * collators instantiated for these locales will behave
   1.789 +     * equivalently.  The converse is not always true; two collators
   1.790 +     * may in fact be equivalent, but return different results, due to
   1.791 +     * internal details.  The return result has no other meaning than
   1.792 +     * that stated above, and implies nothing as to the relationship
   1.793 +     * between the two locales.  This is intended for use by
   1.794 +     * applications who wish to cache collators, or otherwise reuse
   1.795 +     * collators when possible.  The functional equivalent may change
   1.796 +     * over time.  For more information, please see the <a
   1.797 +     * href="http://icu-project.org/userguide/locale.html#services">
   1.798 +     * Locales and Services</a> section of the ICU User Guide.
   1.799 +     * @param keyword a particular keyword as enumerated by
   1.800 +     * ucol_getKeywords.
   1.801 +     * @param locale the requested locale
   1.802 +     * @param isAvailable reference to a fillin parameter that
   1.803 +     * indicates whether the requested locale was 'available' to the
   1.804 +     * collation service. A locale is defined as 'available' if it
   1.805 +     * physically exists within the collation locale data.
   1.806 +     * @param status reference to input-output error code
   1.807 +     * @return the functionally equivalent collation locale, or the root
   1.808 +     * locale upon error.
   1.809 +     * @stable ICU 3.0
   1.810 +     */
   1.811 +    static Locale U_EXPORT2 getFunctionalEquivalent(const char* keyword, const Locale& locale,
   1.812 +                                          UBool& isAvailable, UErrorCode& status);
   1.813 +
   1.814 +#if !UCONFIG_NO_SERVICE
   1.815 +    /**
   1.816 +     * Register a new Collator.  The collator will be adopted.
   1.817 +     * @param toAdopt the Collator instance to be adopted
   1.818 +     * @param locale the locale with which the collator will be associated
   1.819 +     * @param status the in/out status code, no special meanings are assigned
   1.820 +     * @return a registry key that can be used to unregister this collator
   1.821 +     * @stable ICU 2.6
   1.822 +     */
   1.823 +    static URegistryKey U_EXPORT2 registerInstance(Collator* toAdopt, const Locale& locale, UErrorCode& status);
   1.824 +
   1.825 +    /**
   1.826 +     * Register a new CollatorFactory.  The factory will be adopted.
   1.827 +     * @param toAdopt the CollatorFactory instance to be adopted
   1.828 +     * @param status the in/out status code, no special meanings are assigned
   1.829 +     * @return a registry key that can be used to unregister this collator
   1.830 +     * @stable ICU 2.6
   1.831 +     */
   1.832 +    static URegistryKey U_EXPORT2 registerFactory(CollatorFactory* toAdopt, UErrorCode& status);
   1.833 +
   1.834 +    /**
   1.835 +     * Unregister a previously-registered Collator or CollatorFactory
   1.836 +     * using the key returned from the register call.  Key becomes
   1.837 +     * invalid after a successful call and should not be used again.
   1.838 +     * The object corresponding to the key will be deleted.
   1.839 +     * @param key the registry key returned by a previous call to registerInstance
   1.840 +     * @param status the in/out status code, no special meanings are assigned
   1.841 +     * @return TRUE if the collator for the key was successfully unregistered
   1.842 +     * @stable ICU 2.6
   1.843 +     */
   1.844 +    static UBool U_EXPORT2 unregister(URegistryKey key, UErrorCode& status);
   1.845 +#endif /* UCONFIG_NO_SERVICE */
   1.846 +
   1.847 +    /**
   1.848 +     * Gets the version information for a Collator.
   1.849 +     * @param info the version # information, the result will be filled in
   1.850 +     * @stable ICU 2.0
   1.851 +     */
   1.852 +    virtual void getVersion(UVersionInfo info) const = 0;
   1.853 +
   1.854 +    /**
   1.855 +     * Returns a unique class ID POLYMORPHICALLY. Pure virtual method.
   1.856 +     * This method is to implement a simple version of RTTI, since not all C++
   1.857 +     * compilers support genuine RTTI. Polymorphic operator==() and clone()
   1.858 +     * methods call this method.
   1.859 +     * @return The class ID for this object. All objects of a given class have
   1.860 +     *         the same class ID.  Objects of other classes have different class
   1.861 +     *         IDs.
   1.862 +     * @stable ICU 2.0
   1.863 +     */
   1.864 +    virtual UClassID getDynamicClassID(void) const = 0;
   1.865 +
   1.866 +    /**
   1.867 +     * Universal attribute setter
   1.868 +     * @param attr attribute type
   1.869 +     * @param value attribute value
   1.870 +     * @param status to indicate whether the operation went on smoothly or
   1.871 +     *        there were errors
   1.872 +     * @stable ICU 2.2
   1.873 +     */
   1.874 +    virtual void setAttribute(UColAttribute attr, UColAttributeValue value,
   1.875 +                              UErrorCode &status) = 0;
   1.876 +
   1.877 +    /**
   1.878 +     * Universal attribute getter
   1.879 +     * @param attr attribute type
   1.880 +     * @param status to indicate whether the operation went on smoothly or
   1.881 +     *        there were errors
   1.882 +     * @return attribute value
   1.883 +     * @stable ICU 2.2
   1.884 +     */
   1.885 +    virtual UColAttributeValue getAttribute(UColAttribute attr,
   1.886 +                                            UErrorCode &status) const = 0;
   1.887 +
   1.888 +    /**
   1.889 +     * Sets the variable top to a collation element value of a string supplied.
   1.890 +     * @param varTop one or more (if contraction) UChars to which the variable top should be set
   1.891 +     * @param len length of variable top string. If -1 it is considered to be zero terminated.
   1.892 +     * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
   1.893 +     *    U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction<br>
   1.894 +     *    U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes
   1.895 +     * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined
   1.896 +     * @stable ICU 2.0
   1.897 +     */
   1.898 +    virtual uint32_t setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status) = 0;
   1.899 +
   1.900 +    /**
   1.901 +     * Sets the variable top to a collation element value of a string supplied.
   1.902 +     * @param varTop an UnicodeString size 1 or more (if contraction) of UChars to which the variable top should be set
   1.903 +     * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
   1.904 +     *    U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction<br>
   1.905 +     *    U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes
   1.906 +     * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined
   1.907 +     * @stable ICU 2.0
   1.908 +     */
   1.909 +    virtual uint32_t setVariableTop(const UnicodeString &varTop, UErrorCode &status) = 0;
   1.910 +
   1.911 +    /**
   1.912 +     * Sets the variable top to a collation element value supplied. Variable top is set to the upper 16 bits.
   1.913 +     * Lower 16 bits are ignored.
   1.914 +     * @param varTop CE value, as returned by setVariableTop or ucol)getVariableTop
   1.915 +     * @param status error code (not changed by function)
   1.916 +     * @stable ICU 2.0
   1.917 +     */
   1.918 +    virtual void setVariableTop(uint32_t varTop, UErrorCode &status) = 0;
   1.919 +
   1.920 +    /**
   1.921 +     * Gets the variable top value of a Collator.
   1.922 +     * Lower 16 bits are undefined and should be ignored.
   1.923 +     * @param status error code (not changed by function). If error code is set, the return value is undefined.
   1.924 +     * @stable ICU 2.0
   1.925 +     */
   1.926 +    virtual uint32_t getVariableTop(UErrorCode &status) const = 0;
   1.927 +
   1.928 +    /**
   1.929 +     * Get an UnicodeSet that contains all the characters and sequences
   1.930 +     * tailored in this collator.
   1.931 +     * @param status      error code of the operation
   1.932 +     * @return a pointer to a UnicodeSet object containing all the
   1.933 +     *         code points and sequences that may sort differently than
   1.934 +     *         in the UCA. The object must be disposed of by using delete
   1.935 +     * @stable ICU 2.4
   1.936 +     */
   1.937 +    virtual UnicodeSet *getTailoredSet(UErrorCode &status) const;
   1.938 +
   1.939 +    /**
   1.940 +     * Same as clone().
   1.941 +     * The base class implementation simply calls clone().
   1.942 +     * @return a copy of this object, owned by the caller
   1.943 +     * @see clone()
   1.944 +     * @deprecated ICU 50 no need to have two methods for cloning
   1.945 +     */
   1.946 +    virtual Collator* safeClone(void) const;
   1.947 +
   1.948 +    /**
   1.949 +     * Get the sort key as an array of bytes from an UnicodeString.
   1.950 +     * Sort key byte arrays are zero-terminated and can be compared using
   1.951 +     * strcmp().
   1.952 +     * @param source string to be processed.
   1.953 +     * @param result buffer to store result in. If NULL, number of bytes needed
   1.954 +     *        will be returned.
   1.955 +     * @param resultLength length of the result buffer. If if not enough the
   1.956 +     *        buffer will be filled to capacity.
   1.957 +     * @return Number of bytes needed for storing the sort key
   1.958 +     * @stable ICU 2.2
   1.959 +     */
   1.960 +    virtual int32_t getSortKey(const UnicodeString& source,
   1.961 +                              uint8_t* result,
   1.962 +                              int32_t resultLength) const = 0;
   1.963 +
   1.964 +    /**
   1.965 +     * Get the sort key as an array of bytes from an UChar buffer.
   1.966 +     * Sort key byte arrays are zero-terminated and can be compared using
   1.967 +     * strcmp().
   1.968 +     * @param source string to be processed.
   1.969 +     * @param sourceLength length of string to be processed.
   1.970 +     *        If -1, the string is 0 terminated and length will be decided by the
   1.971 +     *        function.
   1.972 +     * @param result buffer to store result in. If NULL, number of bytes needed
   1.973 +     *        will be returned.
   1.974 +     * @param resultLength length of the result buffer. If if not enough the
   1.975 +     *        buffer will be filled to capacity.
   1.976 +     * @return Number of bytes needed for storing the sort key
   1.977 +     * @stable ICU 2.2
   1.978 +     */
   1.979 +    virtual int32_t getSortKey(const UChar*source, int32_t sourceLength,
   1.980 +                               uint8_t*result, int32_t resultLength) const = 0;
   1.981 +
   1.982 +    /**
   1.983 +     * Produce a bound for a given sortkey and a number of levels.
   1.984 +     * Return value is always the number of bytes needed, regardless of
   1.985 +     * whether the result buffer was big enough or even valid.<br>
   1.986 +     * Resulting bounds can be used to produce a range of strings that are
   1.987 +     * between upper and lower bounds. For example, if bounds are produced
   1.988 +     * for a sortkey of string "smith", strings between upper and lower
   1.989 +     * bounds with one level would include "Smith", "SMITH", "sMiTh".<br>
   1.990 +     * There are two upper bounds that can be produced. If UCOL_BOUND_UPPER
   1.991 +     * is produced, strings matched would be as above. However, if bound
   1.992 +     * produced using UCOL_BOUND_UPPER_LONG is used, the above example will
   1.993 +     * also match "Smithsonian" and similar.<br>
   1.994 +     * For more on usage, see example in cintltst/capitst.c in procedure
   1.995 +     * TestBounds.
   1.996 +     * Sort keys may be compared using <TT>strcmp</TT>.
   1.997 +     * @param source The source sortkey.
   1.998 +     * @param sourceLength The length of source, or -1 if null-terminated.
   1.999 +     *                     (If an unmodified sortkey is passed, it is always null
  1.1000 +     *                      terminated).
  1.1001 +     * @param boundType Type of bound required. It can be UCOL_BOUND_LOWER, which
  1.1002 +     *                  produces a lower inclusive bound, UCOL_BOUND_UPPER, that
  1.1003 +     *                  produces upper bound that matches strings of the same length
  1.1004 +     *                  or UCOL_BOUND_UPPER_LONG that matches strings that have the
  1.1005 +     *                  same starting substring as the source string.
  1.1006 +     * @param noOfLevels  Number of levels required in the resulting bound (for most
  1.1007 +     *                    uses, the recommended value is 1). See users guide for
  1.1008 +     *                    explanation on number of levels a sortkey can have.
  1.1009 +     * @param result A pointer to a buffer to receive the resulting sortkey.
  1.1010 +     * @param resultLength The maximum size of result.
  1.1011 +     * @param status Used for returning error code if something went wrong. If the
  1.1012 +     *               number of levels requested is higher than the number of levels
  1.1013 +     *               in the source key, a warning (U_SORT_KEY_TOO_SHORT_WARNING) is
  1.1014 +     *               issued.
  1.1015 +     * @return The size needed to fully store the bound.
  1.1016 +     * @see ucol_keyHashCode
  1.1017 +     * @stable ICU 2.1
  1.1018 +     */
  1.1019 +    static int32_t U_EXPORT2 getBound(const uint8_t       *source,
  1.1020 +            int32_t             sourceLength,
  1.1021 +            UColBoundMode       boundType,
  1.1022 +            uint32_t            noOfLevels,
  1.1023 +            uint8_t             *result,
  1.1024 +            int32_t             resultLength,
  1.1025 +            UErrorCode          &status);
  1.1026 +
  1.1027 +
  1.1028 +protected:
  1.1029 +
  1.1030 +    // Collator protected constructors -------------------------------------
  1.1031 +
  1.1032 +    /**
  1.1033 +    * Default constructor.
  1.1034 +    * Constructor is different from the old default Collator constructor.
  1.1035 +    * The task for determing the default collation strength and normalization
  1.1036 +    * mode is left to the child class.
  1.1037 +    * @stable ICU 2.0
  1.1038 +    */
  1.1039 +    Collator();
  1.1040 +
  1.1041 +#ifndef U_HIDE_DEPRECATED_API
  1.1042 +    /**
  1.1043 +    * Constructor.
  1.1044 +    * Empty constructor, does not handle the arguments.
  1.1045 +    * This constructor is done for backward compatibility with 1.7 and 1.8.
  1.1046 +    * The task for handling the argument collation strength and normalization
  1.1047 +    * mode is left to the child class.
  1.1048 +    * @param collationStrength collation strength
  1.1049 +    * @param decompositionMode
  1.1050 +    * @deprecated ICU 2.4. Subclasses should use the default constructor
  1.1051 +    * instead and handle the strength and normalization mode themselves.
  1.1052 +    */
  1.1053 +    Collator(UCollationStrength collationStrength,
  1.1054 +             UNormalizationMode decompositionMode);
  1.1055 +#endif  /* U_HIDE_DEPRECATED_API */
  1.1056 +
  1.1057 +    /**
  1.1058 +    * Copy constructor.
  1.1059 +    * @param other Collator object to be copied from
  1.1060 +    * @stable ICU 2.0
  1.1061 +    */
  1.1062 +    Collator(const Collator& other);
  1.1063 +
  1.1064 +    // Collator protected methods -----------------------------------------
  1.1065 +
  1.1066 +
  1.1067 +   /**
  1.1068 +    * Used internally by registraton to define the requested and valid locales.
  1.1069 +    * @param requestedLocale the requested locale
  1.1070 +    * @param validLocale the valid locale
  1.1071 +    * @param actualLocale the actual locale
  1.1072 +    * @internal
  1.1073 +    */
  1.1074 +    virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale);
  1.1075 +
  1.1076 +public:
  1.1077 +#if !UCONFIG_NO_SERVICE
  1.1078 +#ifndef U_HIDE_INTERNAL_API
  1.1079 +    /**
  1.1080 +     * used only by ucol_open, not for public use
  1.1081 +     * @internal
  1.1082 +     */
  1.1083 +    static UCollator* createUCollator(const char* loc, UErrorCode* status);
  1.1084 +#endif  /* U_HIDE_INTERNAL_API */
  1.1085 +#endif
  1.1086 +
  1.1087 +    /** Get the short definition string for a collator. This internal API harvests the collator's
  1.1088 +     *  locale and the attribute set and produces a string that can be used for opening 
  1.1089 +     *  a collator with the same properties using the ucol_openFromShortString API.
  1.1090 +     *  This string will be normalized.
  1.1091 +     *  The structure and the syntax of the string is defined in the "Naming collators"
  1.1092 +     *  section of the users guide: 
  1.1093 +     *  http://icu-project.org/userguide/Collate_Concepts.html#Naming_Collators
  1.1094 +     *  This function supports preflighting.
  1.1095 +     * 
  1.1096 +     *  This is internal, and intended to be used with delegate converters.
  1.1097 +     *
  1.1098 +     *  @param locale a locale that will appear as a collators locale in the resulting
  1.1099 +     *                short string definition. If NULL, the locale will be harvested 
  1.1100 +     *                from the collator.
  1.1101 +     *  @param buffer space to hold the resulting string
  1.1102 +     *  @param capacity capacity of the buffer
  1.1103 +     *  @param status for returning errors. All the preflighting errors are featured
  1.1104 +     *  @return length of the resulting string
  1.1105 +     *  @see ucol_openFromShortString
  1.1106 +     *  @see ucol_normalizeShortDefinitionString
  1.1107 +     *  @see ucol_getShortDefinitionString
  1.1108 +     *  @internal
  1.1109 +     */
  1.1110 +    virtual int32_t internalGetShortDefinitionString(const char *locale,
  1.1111 +                                                     char *buffer,
  1.1112 +                                                     int32_t capacity,
  1.1113 +                                                     UErrorCode &status) const;
  1.1114 +private:
  1.1115 +    /**
  1.1116 +     * Assignment operator. Private for now.
  1.1117 +     * @internal
  1.1118 +     */
  1.1119 +    Collator& operator=(const Collator& other);
  1.1120 +
  1.1121 +    friend class CFactory;
  1.1122 +    friend class SimpleCFactory;
  1.1123 +    friend class ICUCollatorFactory;
  1.1124 +    friend class ICUCollatorService;
  1.1125 +    static Collator* makeInstance(const Locale& desiredLocale,
  1.1126 +                                  UErrorCode& status);
  1.1127 +
  1.1128 +    // Collator private data members ---------------------------------------
  1.1129 +
  1.1130 +    /*
  1.1131 +    synwee : removed as attributes to be handled by child class
  1.1132 +    UCollationStrength  strength;
  1.1133 +    Normalizer::EMode  decmp;
  1.1134 +    */
  1.1135 +    /* This is useless information */
  1.1136 +/*  static const UVersionInfo fVersion;*/
  1.1137 +};
  1.1138 +
  1.1139 +#if !UCONFIG_NO_SERVICE
  1.1140 +/**
  1.1141 + * A factory, used with registerFactory, the creates multiple collators and provides
  1.1142 + * display names for them.  A factory supports some number of locales-- these are the
  1.1143 + * locales for which it can create collators.  The factory can be visible, in which
  1.1144 + * case the supported locales will be enumerated by getAvailableLocales, or invisible,
  1.1145 + * in which they are not.  Invisible locales are still supported, they are just not
  1.1146 + * listed by getAvailableLocales.
  1.1147 + * <p>
  1.1148 + * If standard locale display names are sufficient, Collator instances can
  1.1149 + * be registered using registerInstance instead.</p>
  1.1150 + * <p>
  1.1151 + * Note: if the collators are to be used from C APIs, they must be instances
  1.1152 + * of RuleBasedCollator.</p>
  1.1153 + *
  1.1154 + * @stable ICU 2.6
  1.1155 + */
  1.1156 +class U_I18N_API CollatorFactory : public UObject {
  1.1157 +public:
  1.1158 +
  1.1159 +    /**
  1.1160 +     * Destructor
  1.1161 +     * @stable ICU 3.0
  1.1162 +     */
  1.1163 +    virtual ~CollatorFactory();
  1.1164 +
  1.1165 +    /**
  1.1166 +     * Return true if this factory is visible.  Default is true.
  1.1167 +     * If not visible, the locales supported by this factory will not
  1.1168 +     * be listed by getAvailableLocales.
  1.1169 +     * @return true if the factory is visible.
  1.1170 +     * @stable ICU 2.6
  1.1171 +     */
  1.1172 +    virtual UBool visible(void) const;
  1.1173 +
  1.1174 +    /**
  1.1175 +     * Return a collator for the provided locale.  If the locale
  1.1176 +     * is not supported, return NULL.
  1.1177 +     * @param loc the locale identifying the collator to be created.
  1.1178 +     * @return a new collator if the locale is supported, otherwise NULL.
  1.1179 +     * @stable ICU 2.6
  1.1180 +     */
  1.1181 +    virtual Collator* createCollator(const Locale& loc) = 0;
  1.1182 +
  1.1183 +    /**
  1.1184 +     * Return the name of the collator for the objectLocale, localized for the displayLocale.
  1.1185 +     * If objectLocale is not supported, or the factory is not visible, set the result string
  1.1186 +     * to bogus.
  1.1187 +     * @param objectLocale the locale identifying the collator
  1.1188 +     * @param displayLocale the locale for which the display name of the collator should be localized
  1.1189 +     * @param result an output parameter for the display name, set to bogus if not supported.
  1.1190 +     * @return the display name
  1.1191 +     * @stable ICU 2.6
  1.1192 +     */
  1.1193 +    virtual  UnicodeString& getDisplayName(const Locale& objectLocale,
  1.1194 +                                           const Locale& displayLocale,
  1.1195 +                                           UnicodeString& result);
  1.1196 +
  1.1197 +    /**
  1.1198 +     * Return an array of all the locale names directly supported by this factory.
  1.1199 +     * The number of names is returned in count.  This array is owned by the factory.
  1.1200 +     * Its contents must never change.
  1.1201 +     * @param count output parameter for the number of locales supported by the factory
  1.1202 +     * @param status the in/out error code
  1.1203 +     * @return a pointer to an array of count UnicodeStrings.
  1.1204 +     * @stable ICU 2.6
  1.1205 +     */
  1.1206 +    virtual const UnicodeString * getSupportedIDs(int32_t &count, UErrorCode& status) = 0;
  1.1207 +};
  1.1208 +#endif /* UCONFIG_NO_SERVICE */
  1.1209 +
  1.1210 +// Collator inline methods -----------------------------------------------
  1.1211 +
  1.1212 +U_NAMESPACE_END
  1.1213 +
  1.1214 +#endif /* #if !UCONFIG_NO_COLLATION */
  1.1215 +
  1.1216 +#endif

mercurial