intl/icu/source/i18n/unicode/tblcoll.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/i18n/unicode/tblcoll.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,860 @@
     1.4 +/*
     1.5 +******************************************************************************
     1.6 +* Copyright (C) 1996-2013, International Business Machines Corporation and
     1.7 +* others. All Rights Reserved.
     1.8 +******************************************************************************
     1.9 +*/
    1.10 +
    1.11 +/**
    1.12 + * \file 
    1.13 + * \brief C++ API: RuleBasedCollator class provides the simple implementation of Collator.
    1.14 + */
    1.15 +
    1.16 +/**
    1.17 +* File tblcoll.h
    1.18 +*
    1.19 +* Created by: Helena Shih
    1.20 +*
    1.21 +* Modification History:
    1.22 +*
    1.23 +*  Date        Name        Description
    1.24 +*  2/5/97      aliu        Added streamIn and streamOut methods.  Added
    1.25 +*                          constructor which reads RuleBasedCollator object from
    1.26 +*                          a binary file.  Added writeToFile method which streams
    1.27 +*                          RuleBasedCollator out to a binary file.  The streamIn
    1.28 +*                          and streamOut methods use istream and ostream objects
    1.29 +*                          in binary mode.
    1.30 +*  2/12/97     aliu        Modified to use TableCollationData sub-object to
    1.31 +*                          hold invariant data.
    1.32 +*  2/13/97     aliu        Moved several methods into this class from Collation.
    1.33 +*                          Added a private RuleBasedCollator(Locale&) constructor,
    1.34 +*                          to be used by Collator::createDefault().  General
    1.35 +*                          clean up.
    1.36 +*  2/20/97     helena      Added clone, operator==, operator!=, operator=, and copy
    1.37 +*                          constructor and getDynamicClassID.
    1.38 +*  3/5/97      aliu        Modified constructFromFile() to add parameter
    1.39 +*                          specifying whether or not binary loading is to be
    1.40 +*                          attempted.  This is required for dynamic rule loading.
    1.41 +* 05/07/97     helena      Added memory allocation error detection.
    1.42 +*  6/17/97     helena      Added IDENTICAL strength for compare, changed getRules to
    1.43 +*                          use MergeCollation::getPattern.
    1.44 +*  6/20/97     helena      Java class name change.
    1.45 +*  8/18/97     helena      Added internal API documentation.
    1.46 +* 09/03/97     helena      Added createCollationKeyValues().
    1.47 +* 02/10/98     damiba      Added compare with "length" parameter
    1.48 +* 08/05/98     erm         Synched with 1.2 version of RuleBasedCollator.java
    1.49 +* 04/23/99     stephen     Removed EDecompositionMode, merged with
    1.50 +*                          Normalizer::EMode
    1.51 +* 06/14/99     stephen     Removed kResourceBundleSuffix
    1.52 +* 11/02/99     helena      Collator performance enhancements.  Eliminates the
    1.53 +*                          UnicodeString construction and special case for NO_OP.
    1.54 +* 11/23/99     srl         More performance enhancements. Updates to NormalizerIterator
    1.55 +*                          internal state management.
    1.56 +* 12/15/99     aliu        Update to support Thai collation.  Move NormalizerIterator
    1.57 +*                          to implementation file.
    1.58 +* 01/29/01     synwee      Modified into a C++ wrapper which calls C API
    1.59 +*                          (ucol.h)
    1.60 +*/
    1.61 +
    1.62 +#ifndef TBLCOLL_H
    1.63 +#define TBLCOLL_H
    1.64 +
    1.65 +#include "unicode/utypes.h"
    1.66 +
    1.67 + 
    1.68 +#if !UCONFIG_NO_COLLATION
    1.69 +
    1.70 +#include "unicode/coll.h"
    1.71 +#include "unicode/ucol.h"
    1.72 +#include "unicode/sortkey.h"
    1.73 +#include "unicode/normlzr.h"
    1.74 +
    1.75 +U_NAMESPACE_BEGIN
    1.76 +
    1.77 +/**
    1.78 +* @stable ICU 2.0
    1.79 +*/
    1.80 +class StringSearch;
    1.81 +/**
    1.82 +* @stable ICU 2.0
    1.83 +*/
    1.84 +class CollationElementIterator;
    1.85 +
    1.86 +/**
    1.87 + * The RuleBasedCollator class provides the simple implementation of
    1.88 + * Collator, using data-driven tables. The user can create a customized
    1.89 + * table-based collation.
    1.90 + * <P>
    1.91 + * <em>Important: </em>The ICU collation service has been reimplemented 
    1.92 + * in order to achieve better performance and UCA compliance. 
    1.93 + * For details, see the 
    1.94 + * <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm">
    1.95 + * collation design document</a>.
    1.96 + * <p>
    1.97 + * RuleBasedCollator is a thin C++ wrapper over the C implementation.
    1.98 + * <p>
    1.99 + * For more information about the collation service see 
   1.100 + * <a href="http://icu-project.org/userguide/Collate_Intro.html">the users guide</a>.
   1.101 + * <p>
   1.102 + * Collation service provides correct sorting orders for most locales supported in ICU. 
   1.103 + * If specific data for a locale is not available, the orders eventually falls back
   1.104 + * to the <a href="http://www.unicode.org/unicode/reports/tr10/">UCA sort order</a>. 
   1.105 + * <p>
   1.106 + * Sort ordering may be customized by providing your own set of rules. For more on
   1.107 + * this subject see the <a href="http://icu-project.org/userguide/Collate_Customization.html">
   1.108 + * Collation customization</a> section of the users guide.
   1.109 + * <p>
   1.110 + * Note, RuleBasedCollator is not to be subclassed.
   1.111 + * @see        Collator
   1.112 + * @version    2.0 11/15/2001
   1.113 + */
   1.114 +class U_I18N_API RuleBasedCollator : public Collator
   1.115 +{
   1.116 +public:
   1.117 +
   1.118 +  // constructor -------------------------------------------------------------
   1.119 +
   1.120 +    /**
   1.121 +     * RuleBasedCollator constructor. This takes the table rules and builds a
   1.122 +     * collation table out of them. Please see RuleBasedCollator class
   1.123 +     * description for more details on the collation rule syntax.
   1.124 +     * @param rules the collation rules to build the collation table from.
   1.125 +     * @param status reporting a success or an error.
   1.126 +     * @see Locale
   1.127 +     * @stable ICU 2.0
   1.128 +     */
   1.129 +    RuleBasedCollator(const UnicodeString& rules, UErrorCode& status);
   1.130 +
   1.131 +    /**
   1.132 +     * RuleBasedCollator constructor. This takes the table rules and builds a
   1.133 +     * collation table out of them. Please see RuleBasedCollator class
   1.134 +     * description for more details on the collation rule syntax.
   1.135 +     * @param rules the collation rules to build the collation table from.
   1.136 +     * @param collationStrength default strength for comparison
   1.137 +     * @param status reporting a success or an error.
   1.138 +     * @see Locale
   1.139 +     * @stable ICU 2.0
   1.140 +     */
   1.141 +    RuleBasedCollator(const UnicodeString& rules,
   1.142 +                       ECollationStrength collationStrength,
   1.143 +                       UErrorCode& status);
   1.144 +
   1.145 +    /**
   1.146 +     * RuleBasedCollator constructor. This takes the table rules and builds a
   1.147 +     * collation table out of them. Please see RuleBasedCollator class
   1.148 +     * description for more details on the collation rule syntax.
   1.149 +     * @param rules the collation rules to build the collation table from.
   1.150 +     * @param decompositionMode the normalisation mode
   1.151 +     * @param status reporting a success or an error.
   1.152 +     * @see Locale
   1.153 +     * @stable ICU 2.0
   1.154 +     */
   1.155 +    RuleBasedCollator(const UnicodeString& rules,
   1.156 +                    UColAttributeValue decompositionMode,
   1.157 +                    UErrorCode& status);
   1.158 +
   1.159 +    /**
   1.160 +     * RuleBasedCollator constructor. This takes the table rules and builds a
   1.161 +     * collation table out of them. Please see RuleBasedCollator class
   1.162 +     * description for more details on the collation rule syntax.
   1.163 +     * @param rules the collation rules to build the collation table from.
   1.164 +     * @param collationStrength default strength for comparison
   1.165 +     * @param decompositionMode the normalisation mode
   1.166 +     * @param status reporting a success or an error.
   1.167 +     * @see Locale
   1.168 +     * @stable ICU 2.0
   1.169 +     */
   1.170 +    RuleBasedCollator(const UnicodeString& rules,
   1.171 +                    ECollationStrength collationStrength,
   1.172 +                    UColAttributeValue decompositionMode,
   1.173 +                    UErrorCode& status);
   1.174 +
   1.175 +    /**
   1.176 +     * Copy constructor.
   1.177 +     * @param other the RuleBasedCollator object to be copied
   1.178 +     * @see Locale
   1.179 +     * @stable ICU 2.0
   1.180 +     */
   1.181 +    RuleBasedCollator(const RuleBasedCollator& other);
   1.182 +
   1.183 +
   1.184 +    /** Opens a collator from a collator binary image created using
   1.185 +    *  cloneBinary. Binary image used in instantiation of the 
   1.186 +    *  collator remains owned by the user and should stay around for 
   1.187 +    *  the lifetime of the collator. The API also takes a base collator
   1.188 +    *  which usualy should be UCA.
   1.189 +    *  @param bin binary image owned by the user and required through the
   1.190 +    *             lifetime of the collator
   1.191 +    *  @param length size of the image. If negative, the API will try to
   1.192 +    *                figure out the length of the image
   1.193 +    *  @param base fallback collator, usually UCA. Base is required to be
   1.194 +    *              present through the lifetime of the collator. Currently 
   1.195 +    *              it cannot be NULL.
   1.196 +    *  @param status for catching errors
   1.197 +    *  @return newly created collator
   1.198 +    *  @see cloneBinary
   1.199 +    *  @stable ICU 3.4
   1.200 +    */
   1.201 +    RuleBasedCollator(const uint8_t *bin, int32_t length, 
   1.202 +                    const RuleBasedCollator *base, 
   1.203 +                    UErrorCode &status);
   1.204 +    // destructor --------------------------------------------------------------
   1.205 +
   1.206 +    /**
   1.207 +     * Destructor.
   1.208 +     * @stable ICU 2.0
   1.209 +     */
   1.210 +    virtual ~RuleBasedCollator();
   1.211 +
   1.212 +    // public methods ----------------------------------------------------------
   1.213 +
   1.214 +    /**
   1.215 +     * Assignment operator.
   1.216 +     * @param other other RuleBasedCollator object to compare with.
   1.217 +     * @stable ICU 2.0
   1.218 +     */
   1.219 +    RuleBasedCollator& operator=(const RuleBasedCollator& other);
   1.220 +
   1.221 +    /**
   1.222 +     * Returns true if argument is the same as this object.
   1.223 +     * @param other Collator object to be compared.
   1.224 +     * @return true if arguments is the same as this object.
   1.225 +     * @stable ICU 2.0
   1.226 +     */
   1.227 +    virtual UBool operator==(const Collator& other) const;
   1.228 +
   1.229 +    /**
   1.230 +     * Makes a copy of this object.
   1.231 +     * @return a copy of this object, owned by the caller
   1.232 +     * @stable ICU 2.0
   1.233 +     */
   1.234 +    virtual Collator* clone(void) const;
   1.235 +
   1.236 +    /**
   1.237 +     * Creates a collation element iterator for the source string. The caller of
   1.238 +     * this method is responsible for the memory management of the return
   1.239 +     * pointer.
   1.240 +     * @param source the string over which the CollationElementIterator will
   1.241 +     *        iterate.
   1.242 +     * @return the collation element iterator of the source string using this as
   1.243 +     *         the based Collator.
   1.244 +     * @stable ICU 2.2
   1.245 +     */
   1.246 +    virtual CollationElementIterator* createCollationElementIterator(
   1.247 +                                           const UnicodeString& source) const;
   1.248 +
   1.249 +    /**
   1.250 +     * Creates a collation element iterator for the source. The caller of this
   1.251 +     * method is responsible for the memory management of the returned pointer.
   1.252 +     * @param source the CharacterIterator which produces the characters over
   1.253 +     *        which the CollationElementItgerator will iterate.
   1.254 +     * @return the collation element iterator of the source using this as the
   1.255 +     *         based Collator.
   1.256 +     * @stable ICU 2.2
   1.257 +     */
   1.258 +    virtual CollationElementIterator* createCollationElementIterator(
   1.259 +                                         const CharacterIterator& source) const;
   1.260 +
   1.261 +    // Make deprecated versions of Collator::compare() visible.
   1.262 +    using Collator::compare;
   1.263 +
   1.264 +    /**
   1.265 +    * The comparison function compares the character data stored in two
   1.266 +    * different strings. Returns information about whether a string is less 
   1.267 +    * than, greater than or equal to another string.
   1.268 +    * @param source the source string to be compared with.
   1.269 +    * @param target the string that is to be compared with the source string.
   1.270 +    * @param status possible error code
   1.271 +    * @return Returns an enum value. UCOL_GREATER if source is greater
   1.272 +    * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
   1.273 +    * than target
   1.274 +    * @stable ICU 2.6
   1.275 +    **/
   1.276 +    virtual UCollationResult compare(const UnicodeString& source,
   1.277 +                                      const UnicodeString& target,
   1.278 +                                      UErrorCode &status) const;
   1.279 +
   1.280 +    /**
   1.281 +    * Does the same thing as compare but limits the comparison to a specified 
   1.282 +    * length
   1.283 +    * @param source the source string to be compared with.
   1.284 +    * @param target the string that is to be compared with the source string.
   1.285 +    * @param length the length the comparison is limited to
   1.286 +    * @param status possible error code
   1.287 +    * @return Returns an enum value. UCOL_GREATER if source (up to the specified 
   1.288 +    *         length) is greater than target; UCOL_EQUAL if source (up to specified 
   1.289 +    *         length) is equal to target; UCOL_LESS if source (up to the specified 
   1.290 +    *         length) is less  than target.
   1.291 +    * @stable ICU 2.6
   1.292 +    */
   1.293 +    virtual UCollationResult compare(const UnicodeString& source,
   1.294 +                                      const UnicodeString& target,
   1.295 +                                      int32_t length,
   1.296 +                                      UErrorCode &status) const;
   1.297 +
   1.298 +    /**
   1.299 +    * The comparison function compares the character data stored in two
   1.300 +    * different string arrays. Returns information about whether a string array 
   1.301 +    * is less than, greater than or equal to another string array.
   1.302 +    * @param source the source string array to be compared with.
   1.303 +    * @param sourceLength the length of the source string array.  If this value
   1.304 +    *        is equal to -1, the string array is null-terminated.
   1.305 +    * @param target the string that is to be compared with the source string.
   1.306 +    * @param targetLength the length of the target string array.  If this value
   1.307 +    *        is equal to -1, the string array is null-terminated.
   1.308 +    * @param status possible error code
   1.309 +    * @return Returns an enum value. UCOL_GREATER if source is greater
   1.310 +    * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
   1.311 +    * than target
   1.312 +    * @stable ICU 2.6
   1.313 +    */
   1.314 +    virtual UCollationResult compare(const UChar* source, int32_t sourceLength,
   1.315 +                                      const UChar* target, int32_t targetLength,
   1.316 +                                      UErrorCode &status) const;
   1.317 +
   1.318 +    /**
   1.319 +     * Compares two strings using the Collator.
   1.320 +     * Returns whether the first one compares less than/equal to/greater than
   1.321 +     * the second one.
   1.322 +     * This version takes UCharIterator input.
   1.323 +     * @param sIter the first ("source") string iterator
   1.324 +     * @param tIter the second ("target") string iterator
   1.325 +     * @param status ICU status
   1.326 +     * @return UCOL_LESS, UCOL_EQUAL or UCOL_GREATER
   1.327 +     * @stable ICU 4.2
   1.328 +     */
   1.329 +    virtual UCollationResult compare(UCharIterator &sIter,
   1.330 +                                     UCharIterator &tIter,
   1.331 +                                     UErrorCode &status) const;
   1.332 +
   1.333 +    /**
   1.334 +    * Transforms a specified region of the string into a series of characters
   1.335 +    * that can be compared with CollationKey.compare. Use a CollationKey when
   1.336 +    * you need to do repeated comparisions on the same string. For a single
   1.337 +    * comparison the compare method will be faster.
   1.338 +    * @param source the source string.
   1.339 +    * @param key the transformed key of the source string.
   1.340 +    * @param status the error code status.
   1.341 +    * @return the transformed key.
   1.342 +    * @see CollationKey
   1.343 +    * @stable ICU 2.0
   1.344 +    */
   1.345 +    virtual CollationKey& getCollationKey(const UnicodeString& source,
   1.346 +                                          CollationKey& key,
   1.347 +                                          UErrorCode& status) const;
   1.348 +
   1.349 +    /**
   1.350 +    * Transforms a specified region of the string into a series of characters
   1.351 +    * that can be compared with CollationKey.compare. Use a CollationKey when
   1.352 +    * you need to do repeated comparisions on the same string. For a single
   1.353 +    * comparison the compare method will be faster.
   1.354 +    * @param source the source string.
   1.355 +    * @param sourceLength the length of the source string.
   1.356 +    * @param key the transformed key of the source string.
   1.357 +    * @param status the error code status.
   1.358 +    * @return the transformed key.
   1.359 +    * @see CollationKey
   1.360 +    * @stable ICU 2.0
   1.361 +    */
   1.362 +    virtual CollationKey& getCollationKey(const UChar *source,
   1.363 +                                          int32_t sourceLength,
   1.364 +                                          CollationKey& key,
   1.365 +                                          UErrorCode& status) const;
   1.366 +
   1.367 +    /**
   1.368 +     * Generates the hash code for the rule-based collation object.
   1.369 +     * @return the hash code.
   1.370 +     * @stable ICU 2.0
   1.371 +     */
   1.372 +    virtual int32_t hashCode(void) const;
   1.373 +
   1.374 +    /**
   1.375 +    * Gets the locale of the Collator
   1.376 +    * @param type can be either requested, valid or actual locale. For more
   1.377 +    *             information see the definition of ULocDataLocaleType in
   1.378 +    *             uloc.h
   1.379 +    * @param status the error code status.
   1.380 +    * @return locale where the collation data lives. If the collator
   1.381 +    *         was instantiated from rules, locale is empty.
   1.382 +    * @deprecated ICU 2.8 likely to change in ICU 3.0, based on feedback
   1.383 +    */
   1.384 +    virtual Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
   1.385 +
   1.386 +    /**
   1.387 +     * Gets the tailoring rules for this collator.
   1.388 +     * @return the collation tailoring from which this collator was created
   1.389 +     * @stable ICU 2.0
   1.390 +     */
   1.391 +    const UnicodeString& getRules(void) const;
   1.392 +
   1.393 +    /**
   1.394 +     * Gets the version information for a Collator.
   1.395 +     * @param info the version # information, the result will be filled in
   1.396 +     * @stable ICU 2.0
   1.397 +     */
   1.398 +    virtual void getVersion(UVersionInfo info) const;
   1.399 +
   1.400 +#ifndef U_HIDE_DEPRECATED_API 
   1.401 +    /**
   1.402 +     * Returns the maximum length of any expansion sequences that end with the
   1.403 +     * specified comparison order.
   1.404 +     *
   1.405 +     * This is specific to the kind of collation element values and sequences
   1.406 +     * returned by the CollationElementIterator.
   1.407 +     * Call CollationElementIterator::getMaxExpansion() instead.
   1.408 +     *
   1.409 +     * @param order a collation order returned by CollationElementIterator::previous
   1.410 +     *              or CollationElementIterator::next.
   1.411 +     * @return maximum size of the expansion sequences ending with the collation
   1.412 +     *         element, or 1 if the collation element does not occur at the end of
   1.413 +     *         any expansion sequence
   1.414 +     * @see CollationElementIterator#getMaxExpansion
   1.415 +     * @deprecated ICU 51 Use CollationElementIterator::getMaxExpansion() instead.
   1.416 +     */
   1.417 +    int32_t getMaxExpansion(int32_t order) const;
   1.418 +#endif  /* U_HIDE_DEPRECATED_API */
   1.419 +
   1.420 +    /**
   1.421 +     * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This
   1.422 +     * method is to implement a simple version of RTTI, since not all C++
   1.423 +     * compilers support genuine RTTI. Polymorphic operator==() and clone()
   1.424 +     * methods call this method.
   1.425 +     * @return The class ID for this object. All objects of a given class have
   1.426 +     *         the same class ID. Objects of other classes have different class
   1.427 +     *         IDs.
   1.428 +     * @stable ICU 2.0
   1.429 +     */
   1.430 +    virtual UClassID getDynamicClassID(void) const;
   1.431 +
   1.432 +    /**
   1.433 +     * Returns the class ID for this class. This is useful only for comparing to
   1.434 +     * a return value from getDynamicClassID(). For example:
   1.435 +     * <pre>
   1.436 +     * Base* polymorphic_pointer = createPolymorphicObject();
   1.437 +     * if (polymorphic_pointer->getDynamicClassID() ==
   1.438 +     *                                          Derived::getStaticClassID()) ...
   1.439 +     * </pre>
   1.440 +     * @return The class ID for all objects of this class.
   1.441 +     * @stable ICU 2.0
   1.442 +     */
   1.443 +    static UClassID U_EXPORT2 getStaticClassID(void);
   1.444 +
   1.445 +#ifndef U_HIDE_DEPRECATED_API 
   1.446 +    /**
   1.447 +     * Do not use this method: The caller and the ICU library might use different heaps.
   1.448 +     * Use cloneBinary() instead which writes to caller-provided memory.
   1.449 +     *
   1.450 +     * Returns a binary format of this collator.
   1.451 +     * @param length Returns the length of the data, in bytes
   1.452 +     * @param status the error code status.
   1.453 +     * @return memory, owned by the caller, of size 'length' bytes.
   1.454 +     * @deprecated ICU 52. Use cloneBinary() instead.
   1.455 +     */
   1.456 +    uint8_t *cloneRuleData(int32_t &length, UErrorCode &status);
   1.457 +#endif  /* U_HIDE_DEPRECATED_API */
   1.458 +
   1.459 +    /** Creates a binary image of a collator. This binary image can be stored and 
   1.460 +    *  later used to instantiate a collator using ucol_openBinary.
   1.461 +    *  This API supports preflighting.
   1.462 +    *  @param buffer a fill-in buffer to receive the binary image
   1.463 +    *  @param capacity capacity of the destination buffer
   1.464 +    *  @param status for catching errors
   1.465 +    *  @return size of the image
   1.466 +    *  @see ucol_openBinary
   1.467 +    *  @stable ICU 3.4
   1.468 +    */
   1.469 +    int32_t cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status);
   1.470 +
   1.471 +    /**
   1.472 +     * Returns current rules. Delta defines whether full rules are returned or
   1.473 +     * just the tailoring.
   1.474 +     *
   1.475 +     * getRules(void) should normally be used instead.
   1.476 +     * See http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales
   1.477 +     * @param delta one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES.
   1.478 +     * @param buffer UnicodeString to store the result rules
   1.479 +     * @stable ICU 2.2
   1.480 +     * @see UCOL_FULL_RULES
   1.481 +     */
   1.482 +    void getRules(UColRuleOption delta, UnicodeString &buffer);
   1.483 +
   1.484 +    /**
   1.485 +     * Universal attribute setter
   1.486 +     * @param attr attribute type
   1.487 +     * @param value attribute value
   1.488 +     * @param status to indicate whether the operation went on smoothly or there were errors
   1.489 +     * @stable ICU 2.2
   1.490 +     */
   1.491 +    virtual void setAttribute(UColAttribute attr, UColAttributeValue value,
   1.492 +                              UErrorCode &status);
   1.493 +
   1.494 +    /**
   1.495 +     * Universal attribute getter.
   1.496 +     * @param attr attribute type
   1.497 +     * @param status to indicate whether the operation went on smoothly or there were errors
   1.498 +     * @return attribute value
   1.499 +     * @stable ICU 2.2
   1.500 +     */
   1.501 +    virtual UColAttributeValue getAttribute(UColAttribute attr,
   1.502 +                                            UErrorCode &status) const;
   1.503 +
   1.504 +    /**
   1.505 +     * Sets the variable top to a collation element value of a string supplied.
   1.506 +     * @param varTop one or more (if contraction) UChars to which the variable top should be set
   1.507 +     * @param len length of variable top string. If -1 it is considered to be zero terminated.
   1.508 +     * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
   1.509 +     *    U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction<br>
   1.510 +     *    U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes
   1.511 +     * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined
   1.512 +     * @stable ICU 2.0
   1.513 +     */
   1.514 +    virtual uint32_t setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status);
   1.515 +
   1.516 +    /**
   1.517 +     * Sets the variable top to a collation element value of a string supplied.
   1.518 +     * @param varTop an UnicodeString size 1 or more (if contraction) of UChars to which the variable top should be set
   1.519 +     * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
   1.520 +     *    U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction<br>
   1.521 +     *    U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes
   1.522 +     * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined
   1.523 +     * @stable ICU 2.0
   1.524 +     */
   1.525 +    virtual uint32_t setVariableTop(const UnicodeString &varTop, UErrorCode &status);
   1.526 +
   1.527 +    /**
   1.528 +     * Sets the variable top to a collation element value supplied. Variable top is set to the upper 16 bits.
   1.529 +     * Lower 16 bits are ignored.
   1.530 +     * @param varTop CE value, as returned by setVariableTop or ucol)getVariableTop
   1.531 +     * @param status error code (not changed by function)
   1.532 +     * @stable ICU 2.0
   1.533 +     */
   1.534 +    virtual void setVariableTop(uint32_t varTop, UErrorCode &status);
   1.535 +
   1.536 +    /**
   1.537 +     * Gets the variable top value of a Collator.
   1.538 +     * Lower 16 bits are undefined and should be ignored.
   1.539 +     * @param status error code (not changed by function). If error code is set, the return value is undefined.
   1.540 +     * @stable ICU 2.0
   1.541 +     */
   1.542 +    virtual uint32_t getVariableTop(UErrorCode &status) const;
   1.543 +
   1.544 +    /**
   1.545 +     * Get an UnicodeSet that contains all the characters and sequences tailored in 
   1.546 +     * this collator.
   1.547 +     * @param status      error code of the operation
   1.548 +     * @return a pointer to a UnicodeSet object containing all the 
   1.549 +     *         code points and sequences that may sort differently than
   1.550 +     *         in the UCA. The object must be disposed of by using delete
   1.551 +     * @stable ICU 2.4
   1.552 +     */
   1.553 +    virtual UnicodeSet *getTailoredSet(UErrorCode &status) const;
   1.554 +
   1.555 +    /**
   1.556 +     * Get the sort key as an array of bytes from an UnicodeString.
   1.557 +     * @param source string to be processed.
   1.558 +     * @param result buffer to store result in. If NULL, number of bytes needed
   1.559 +     *        will be returned.
   1.560 +     * @param resultLength length of the result buffer. If if not enough the
   1.561 +     *        buffer will be filled to capacity.
   1.562 +     * @return Number of bytes needed for storing the sort key
   1.563 +     * @stable ICU 2.0
   1.564 +     */
   1.565 +    virtual int32_t getSortKey(const UnicodeString& source, uint8_t *result,
   1.566 +                               int32_t resultLength) const;
   1.567 +
   1.568 +    /**
   1.569 +     * Get the sort key as an array of bytes from an UChar buffer.
   1.570 +     * @param source string to be processed.
   1.571 +     * @param sourceLength length of string to be processed. If -1, the string
   1.572 +     *        is 0 terminated and length will be decided by the function.
   1.573 +     * @param result buffer to store result in. If NULL, number of bytes needed
   1.574 +     *        will be returned.
   1.575 +     * @param resultLength length of the result buffer. If if not enough the
   1.576 +     *        buffer will be filled to capacity.
   1.577 +     * @return Number of bytes needed for storing the sort key
   1.578 +     * @stable ICU 2.2
   1.579 +     */
   1.580 +    virtual int32_t getSortKey(const UChar *source, int32_t sourceLength,
   1.581 +                               uint8_t *result, int32_t resultLength) const;
   1.582 +
   1.583 +    /**
   1.584 +     * Retrieves the reordering codes for this collator.
   1.585 +     * @param dest The array to fill with the script ordering.
   1.586 +     * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function
   1.587 +     *  will only return the length of the result without writing any of the result string (pre-flighting).
   1.588 +     * @param status A reference to an error code value, which must not indicate
   1.589 +     * a failure before the function call.
   1.590 +     * @return The length of the script ordering array.
   1.591 +     * @see ucol_setReorderCodes
   1.592 +     * @see Collator#getEquivalentReorderCodes
   1.593 +     * @see Collator#setReorderCodes
   1.594 +     * @stable ICU 4.8 
   1.595 +     */
   1.596 +     virtual int32_t getReorderCodes(int32_t *dest,
   1.597 +                                     int32_t destCapacity,
   1.598 +                                     UErrorCode& status) const;
   1.599 +
   1.600 +    /**
   1.601 +     * Sets the ordering of scripts for this collator.
   1.602 +     * @param reorderCodes An array of script codes in the new order. This can be NULL if the 
   1.603 +     * length is also set to 0. An empty array will clear any reordering codes on the collator.
   1.604 +     * @param reorderCodesLength The length of reorderCodes.
   1.605 +     * @param status error code
   1.606 +     * @see Collator#getReorderCodes
   1.607 +     * @see Collator#getEquivalentReorderCodes
   1.608 +     * @stable ICU 4.8 
   1.609 +     */
   1.610 +     virtual void setReorderCodes(const int32_t* reorderCodes,
   1.611 +                                  int32_t reorderCodesLength,
   1.612 +                                  UErrorCode& status) ;
   1.613 +
   1.614 +    /**
   1.615 +     * Retrieves the reorder codes that are grouped with the given reorder code. Some reorder
   1.616 +     * codes will be grouped and must reorder together.
   1.617 +     * @param reorderCode The reorder code to determine equivalence for. 
   1.618 +     * @param dest The array to fill with the script equivalene reordering codes.
   1.619 +     * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the 
   1.620 +     * function will only return the length of the result without writing any of the result 
   1.621 +     * string (pre-flighting).
   1.622 +     * @param status A reference to an error code value, which must not indicate 
   1.623 +     * a failure before the function call.
   1.624 +     * @return The length of the of the reordering code equivalence array.
   1.625 +     * @see ucol_setReorderCodes
   1.626 +     * @see Collator#getReorderCodes
   1.627 +     * @see Collator#setReorderCodes
   1.628 +     * @stable ICU 4.8 
   1.629 +     */
   1.630 +    static int32_t U_EXPORT2 getEquivalentReorderCodes(int32_t reorderCode,
   1.631 +                                int32_t* dest,
   1.632 +                                int32_t destCapacity,
   1.633 +                                UErrorCode& status);
   1.634 +
   1.635 +private:
   1.636 +
   1.637 +    // private static constants -----------------------------------------------
   1.638 +
   1.639 +    enum {
   1.640 +        /* need look up in .commit() */
   1.641 +        CHARINDEX = 0x70000000,
   1.642 +        /* Expand index follows */
   1.643 +        EXPANDCHARINDEX = 0x7E000000,
   1.644 +        /* contract indexes follows */
   1.645 +        CONTRACTCHARINDEX = 0x7F000000,
   1.646 +        /* unmapped character values */
   1.647 +        UNMAPPED = 0xFFFFFFFF,
   1.648 +        /* primary strength increment */
   1.649 +        PRIMARYORDERINCREMENT = 0x00010000,
   1.650 +        /* secondary strength increment */
   1.651 +        SECONDARYORDERINCREMENT = 0x00000100,
   1.652 +        /* tertiary strength increment */
   1.653 +        TERTIARYORDERINCREMENT = 0x00000001,
   1.654 +        /* mask off anything but primary order */
   1.655 +        PRIMARYORDERMASK = 0xffff0000,
   1.656 +        /* mask off anything but secondary order */
   1.657 +        SECONDARYORDERMASK = 0x0000ff00,
   1.658 +        /* mask off anything but tertiary order */
   1.659 +        TERTIARYORDERMASK = 0x000000ff,
   1.660 +        /* mask off ignorable char order */
   1.661 +        IGNORABLEMASK = 0x0000ffff,
   1.662 +        /* use only the primary difference */
   1.663 +        PRIMARYDIFFERENCEONLY = 0xffff0000,
   1.664 +        /* use only the primary and secondary difference */
   1.665 +        SECONDARYDIFFERENCEONLY = 0xffffff00,
   1.666 +        /* primary order shift */
   1.667 +        PRIMARYORDERSHIFT = 16,
   1.668 +        /* secondary order shift */
   1.669 +        SECONDARYORDERSHIFT = 8,
   1.670 +        /* starting value for collation elements */
   1.671 +        COLELEMENTSTART = 0x02020202,
   1.672 +        /* testing mask for primary low element */
   1.673 +        PRIMARYLOWZEROMASK = 0x00FF0000,
   1.674 +        /* reseting value for secondaries and tertiaries */
   1.675 +        RESETSECONDARYTERTIARY = 0x00000202,
   1.676 +        /* reseting value for tertiaries */
   1.677 +        RESETTERTIARY = 0x00000002,
   1.678 +
   1.679 +        PRIMIGNORABLE = 0x0202
   1.680 +    };
   1.681 +
   1.682 +    // private data members ---------------------------------------------------
   1.683 +
   1.684 +    UBool dataIsOwned;
   1.685 +
   1.686 +    UBool isWriteThroughAlias;
   1.687 +
   1.688 +    /**
   1.689 +    * c struct for collation. All initialisation for it has to be done through
   1.690 +    * setUCollator().
   1.691 +    */
   1.692 +    UCollator *ucollator;
   1.693 +
   1.694 +    /**
   1.695 +    * Rule UnicodeString
   1.696 +    */
   1.697 +    UnicodeString urulestring;
   1.698 +
   1.699 +    // friend classes --------------------------------------------------------
   1.700 +
   1.701 +    /**
   1.702 +    * Used to iterate over collation elements in a character source.
   1.703 +    */
   1.704 +    friend class CollationElementIterator;
   1.705 +
   1.706 +    /**
   1.707 +    * Collator ONLY needs access to RuleBasedCollator(const Locale&,
   1.708 +    *                                                       UErrorCode&)
   1.709 +    */
   1.710 +    friend class Collator;
   1.711 +
   1.712 +    /**
   1.713 +    * Searching over collation elements in a character source
   1.714 +    */
   1.715 +    friend class StringSearch;
   1.716 +
   1.717 +    // private constructors --------------------------------------------------
   1.718 +
   1.719 +    /**
   1.720 +     * Default constructor
   1.721 +     */
   1.722 +    RuleBasedCollator();
   1.723 +
   1.724 +    /**
   1.725 +     * RuleBasedCollator constructor. This constructor takes a locale. The
   1.726 +     * only caller of this class should be Collator::createInstance(). If
   1.727 +     * createInstance() happens to know that the requested locale's collation is
   1.728 +     * implemented as a RuleBasedCollator, it can then call this constructor.
   1.729 +     * OTHERWISE IT SHOULDN'T, since this constructor ALWAYS RETURNS A VALID
   1.730 +     * COLLATION TABLE. It does this by falling back to defaults.
   1.731 +     * @param desiredLocale locale used
   1.732 +     * @param status error code status
   1.733 +     */
   1.734 +    RuleBasedCollator(const Locale& desiredLocale, UErrorCode& status);
   1.735 +
   1.736 +    /**
   1.737 +     * common constructor implementation
   1.738 +     *
   1.739 +     * @param rules the collation rules to build the collation table from.
   1.740 +     * @param collationStrength default strength for comparison
   1.741 +     * @param decompositionMode the normalisation mode
   1.742 +     * @param status reporting a success or an error.
   1.743 +     */
   1.744 +    void
   1.745 +    construct(const UnicodeString& rules,
   1.746 +              UColAttributeValue collationStrength,
   1.747 +              UColAttributeValue decompositionMode,
   1.748 +              UErrorCode& status);
   1.749 +
   1.750 +    // private methods -------------------------------------------------------
   1.751 +
   1.752 +    /**
   1.753 +    * Creates the c struct for ucollator
   1.754 +    * @param locale desired locale
   1.755 +    * @param status error status
   1.756 +    */
   1.757 +    void setUCollator(const Locale& locale, UErrorCode& status);
   1.758 +
   1.759 +    /**
   1.760 +    * Creates the c struct for ucollator
   1.761 +    * @param locale desired locale name
   1.762 +    * @param status error status
   1.763 +    */
   1.764 +    void setUCollator(const char* locale, UErrorCode& status);
   1.765 +
   1.766 +    /**
   1.767 +    * Creates the c struct for ucollator. This used internally by StringSearch.
   1.768 +    * Hence the responsibility of cleaning up the ucollator is not done by
   1.769 +    * this RuleBasedCollator. The isDataOwned flag is set to FALSE.
   1.770 +    * @param collator new ucollator data
   1.771 +    */
   1.772 +    void setUCollator(UCollator *collator);
   1.773 +
   1.774 +public:
   1.775 +#ifndef U_HIDE_INTERNAL_API
   1.776 +    /**
   1.777 +    * Get UCollator data struct. Used only by StringSearch & intltest.
   1.778 +    * @return UCollator data struct
   1.779 +    * @internal
   1.780 +    */
   1.781 +    const UCollator * getUCollator();
   1.782 +#endif  /* U_HIDE_INTERNAL_API */
   1.783 +
   1.784 +protected:
   1.785 +   /**
   1.786 +    * Used internally by registraton to define the requested and valid locales.
   1.787 +    * @param requestedLocale the requsted locale
   1.788 +    * @param validLocale the valid locale
   1.789 +    * @param actualLocale the actual locale
   1.790 +    * @internal
   1.791 +    */
   1.792 +    virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale);
   1.793 +
   1.794 +private:
   1.795 +    // if not owned and not a write through alias, copy the ucollator
   1.796 +    void checkOwned(void);
   1.797 +
   1.798 +    // utility to init rule string used by checkOwned and construct
   1.799 +    void setRuleStringFromCollator();
   1.800 +
   1.801 +public:
   1.802 +    /** Get the short definition string for a collator. This internal API harvests the collator's
   1.803 +     *  locale and the attribute set and produces a string that can be used for opening 
   1.804 +     *  a collator with the same properties using the ucol_openFromShortString API.
   1.805 +     *  This string will be normalized.
   1.806 +     *  The structure and the syntax of the string is defined in the "Naming collators"
   1.807 +     *  section of the users guide: 
   1.808 +     *  http://icu-project.org/userguide/Collate_Concepts.html#Naming_Collators
   1.809 +     *  This function supports preflighting.
   1.810 +     * 
   1.811 +     *  This is internal, and intended to be used with delegate converters.
   1.812 +     *
   1.813 +     *  @param locale a locale that will appear as a collators locale in the resulting
   1.814 +     *                short string definition. If NULL, the locale will be harvested 
   1.815 +     *                from the collator.
   1.816 +     *  @param buffer space to hold the resulting string
   1.817 +     *  @param capacity capacity of the buffer
   1.818 +     *  @param status for returning errors. All the preflighting errors are featured
   1.819 +     *  @return length of the resulting string
   1.820 +     *  @see ucol_openFromShortString
   1.821 +     *  @see ucol_normalizeShortDefinitionString
   1.822 +     *  @see ucol_getShortDefinitionString
   1.823 +     *  @internal
   1.824 +     */
   1.825 +    virtual int32_t internalGetShortDefinitionString(const char *locale,
   1.826 +                                                     char *buffer,
   1.827 +                                                     int32_t capacity,
   1.828 +                                                     UErrorCode &status) const;
   1.829 +};
   1.830 +
   1.831 +// inline method implementation ---------------------------------------------
   1.832 +
   1.833 +inline void RuleBasedCollator::setUCollator(const Locale &locale,
   1.834 +                                               UErrorCode &status)
   1.835 +{
   1.836 +    setUCollator(locale.getName(), status);
   1.837 +}
   1.838 +
   1.839 +
   1.840 +inline void RuleBasedCollator::setUCollator(UCollator     *collator)
   1.841 +{
   1.842 +
   1.843 +    if (ucollator && dataIsOwned) {
   1.844 +        ucol_close(ucollator);
   1.845 +    }
   1.846 +    ucollator   = collator;
   1.847 +    dataIsOwned = FALSE;
   1.848 +    isWriteThroughAlias = TRUE;
   1.849 +    setRuleStringFromCollator();
   1.850 +}
   1.851 +
   1.852 +#ifndef U_HIDE_INTERNAL_API
   1.853 +inline const UCollator * RuleBasedCollator::getUCollator()
   1.854 +{
   1.855 +    return ucollator;
   1.856 +}
   1.857 +#endif  /* U_HIDE_INTERNAL_API */
   1.858 +
   1.859 +U_NAMESPACE_END
   1.860 +
   1.861 +#endif /* #if !UCONFIG_NO_COLLATION */
   1.862 +
   1.863 +#endif

mercurial