intl/icu/source/i18n/unicode/ucol.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /*
     2 *******************************************************************************
     3 * Copyright (c) 1996-2013, International Business Machines Corporation and others.
     4 * All Rights Reserved.
     5 *******************************************************************************
     6 */
     8 #ifndef UCOL_H
     9 #define UCOL_H
    11 #include "unicode/utypes.h"
    13 #if !UCONFIG_NO_COLLATION
    15 #include "unicode/unorm.h"
    16 #include "unicode/localpointer.h"
    17 #include "unicode/parseerr.h"
    18 #include "unicode/uloc.h"
    19 #include "unicode/uset.h"
    20 #include "unicode/uscript.h"
    22 /**
    23  * \file
    24  * \brief C API: Collator 
    25  *
    26  * <h2> Collator C API </h2>
    27  *
    28  * The C API for Collator performs locale-sensitive
    29  * string comparison. You use this service to build
    30  * searching and sorting routines for natural language text.
    31  * <em>Important: </em>The ICU collation service has been reimplemented 
    32  * in order to achieve better performance and UCA compliance. 
    33  * For details, see the 
    34  * <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm">
    35  * collation design document</a>.
    36  * <p>
    37  * For more information about the collation service see 
    38  * <a href="http://icu-project.org/userguide/Collate_Intro.html">the users guide</a>.
    39  * <p>
    40  * Collation service provides correct sorting orders for most locales supported in ICU. 
    41  * If specific data for a locale is not available, the orders eventually falls back
    42  * to the <a href="http://www.unicode.org/unicode/reports/tr10/">UCA sort order</a>. 
    43  * <p>
    44  * Sort ordering may be customized by providing your own set of rules. For more on
    45  * this subject see the 
    46  * <a href="http://icu-project.org/userguide/Collate_Customization.html">
    47  * Collation customization</a> section of the users guide.
    48  * <p>
    49  * @see         UCollationResult
    50  * @see         UNormalizationMode
    51  * @see         UCollationStrength
    52  * @see         UCollationElements
    53  */
    55 /** A collator.
    56 *  For usage in C programs.
    57 */
    58 struct UCollator;
    59 /** structure representing a collator object instance 
    60  * @stable ICU 2.0
    61  */
    62 typedef struct UCollator UCollator;
    65 /**
    66  * UCOL_LESS is returned if source string is compared to be less than target
    67  * string in the ucol_strcoll() method.
    68  * UCOL_EQUAL is returned if source string is compared to be equal to target
    69  * string in the ucol_strcoll() method.
    70  * UCOL_GREATER is returned if source string is compared to be greater than
    71  * target string in the ucol_strcoll() method.
    72  * @see ucol_strcoll()
    73  * <p>
    74  * Possible values for a comparison result 
    75  * @stable ICU 2.0
    76  */
    77 typedef enum {
    78   /** string a == string b */
    79   UCOL_EQUAL    = 0,
    80   /** string a > string b */
    81   UCOL_GREATER    = 1,
    82   /** string a < string b */
    83   UCOL_LESS    = -1
    84 } UCollationResult ;
    87 /** Enum containing attribute values for controling collation behavior.
    88  * Here are all the allowable values. Not every attribute can take every value. The only
    89  * universal value is UCOL_DEFAULT, which resets the attribute value to the predefined  
    90  * value for that locale 
    91  * @stable ICU 2.0
    92  */
    93 typedef enum {
    94   /** accepted by most attributes */
    95   UCOL_DEFAULT = -1,
    97   /** Primary collation strength */
    98   UCOL_PRIMARY = 0,
    99   /** Secondary collation strength */
   100   UCOL_SECONDARY = 1,
   101   /** Tertiary collation strength */
   102   UCOL_TERTIARY = 2,
   103   /** Default collation strength */
   104   UCOL_DEFAULT_STRENGTH = UCOL_TERTIARY,
   105   UCOL_CE_STRENGTH_LIMIT,
   106   /** Quaternary collation strength */
   107   UCOL_QUATERNARY=3,
   108   /** Identical collation strength */
   109   UCOL_IDENTICAL=15,
   110   UCOL_STRENGTH_LIMIT,
   112   /** Turn the feature off - works for UCOL_FRENCH_COLLATION, 
   113       UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE
   114       & UCOL_DECOMPOSITION_MODE*/
   115   UCOL_OFF = 16,
   116   /** Turn the feature on - works for UCOL_FRENCH_COLLATION, 
   117       UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE
   118       & UCOL_DECOMPOSITION_MODE*/
   119   UCOL_ON = 17,
   121   /** Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be shifted */
   122   UCOL_SHIFTED = 20,
   123   /** Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be non ignorable */
   124   UCOL_NON_IGNORABLE = 21,
   126   /** Valid for UCOL_CASE_FIRST - 
   127       lower case sorts before upper case */
   128   UCOL_LOWER_FIRST = 24,
   129   /** upper case sorts before lower case */
   130   UCOL_UPPER_FIRST = 25,
   132   UCOL_ATTRIBUTE_VALUE_COUNT
   134 } UColAttributeValue;
   136 /**
   137  * Enum containing the codes for reordering segments of the collation table that are not script
   138  * codes. These reordering codes are to be used in conjunction with the script codes.
   139  * @see ucol_getReorderCodes
   140  * @see ucol_setReorderCodes
   141  * @see ucol_getEquivalentReorderCodes
   142  * @see UScriptCode
   143  * @stable ICU 4.8
   144  */
   145  typedef enum {
   146    /**
   147     * A special reordering code that is used to specify the default
   148     * reordering codes for a locale.
   149     * @stable ICU 4.8
   150     */   
   151     UCOL_REORDER_CODE_DEFAULT       = -1,
   152    /**
   153     * A special reordering code that is used to specify no reordering codes.
   154     * @stable ICU 4.8
   155     */   
   156     UCOL_REORDER_CODE_NONE          = USCRIPT_UNKNOWN,
   157    /**
   158     * A special reordering code that is used to specify all other codes used for
   159     * reordering except for the codes lised as UColReorderCode values and those
   160     * listed explicitly in a reordering.
   161     * @stable ICU 4.8
   162     */   
   163     UCOL_REORDER_CODE_OTHERS        = USCRIPT_UNKNOWN,
   164    /**
   165     * Characters with the space property.
   166     * This is equivalent to the rule value "space".
   167     * @stable ICU 4.8
   168     */    
   169     UCOL_REORDER_CODE_SPACE         = 0x1000,
   170    /**
   171     * The first entry in the enumeration of reordering groups. This is intended for use in
   172     * range checking and enumeration of the reorder codes.
   173     * @stable ICU 4.8
   174     */    
   175     UCOL_REORDER_CODE_FIRST         = UCOL_REORDER_CODE_SPACE,
   176    /**
   177     * Characters with the punctuation property.
   178     * This is equivalent to the rule value "punct".
   179     * @stable ICU 4.8
   180     */    
   181     UCOL_REORDER_CODE_PUNCTUATION   = 0x1001,
   182    /**
   183     * Characters with the symbol property.
   184     * This is equivalent to the rule value "symbol".
   185     * @stable ICU 4.8
   186     */    
   187     UCOL_REORDER_CODE_SYMBOL        = 0x1002,
   188    /**
   189     * Characters with the currency property.
   190     * This is equivalent to the rule value "currency".
   191     * @stable ICU 4.8
   192     */    
   193     UCOL_REORDER_CODE_CURRENCY      = 0x1003,
   194    /**
   195     * Characters with the digit property.
   196     * This is equivalent to the rule value "digit".
   197     * @stable ICU 4.8
   198     */    
   199     UCOL_REORDER_CODE_DIGIT         = 0x1004,
   200    /**
   201     * The limit of the reorder codes. This is intended for use in range checking 
   202     * and enumeration of the reorder codes.
   203     * @stable ICU 4.8
   204     */    
   205     UCOL_REORDER_CODE_LIMIT         = 0x1005
   206 } UColReorderCode;
   208 /**
   209  * Base letter represents a primary difference.  Set comparison
   210  * level to UCOL_PRIMARY to ignore secondary and tertiary differences.
   211  * Use this to set the strength of a Collator object.
   212  * Example of primary difference, "abc" &lt; "abd"
   213  * 
   214  * Diacritical differences on the same base letter represent a secondary
   215  * difference.  Set comparison level to UCOL_SECONDARY to ignore tertiary
   216  * differences. Use this to set the strength of a Collator object.
   217  * Example of secondary difference, "&auml;" >> "a".
   218  *
   219  * Uppercase and lowercase versions of the same character represents a
   220  * tertiary difference.  Set comparison level to UCOL_TERTIARY to include
   221  * all comparison differences. Use this to set the strength of a Collator
   222  * object.
   223  * Example of tertiary difference, "abc" &lt;&lt;&lt; "ABC".
   224  *
   225  * Two characters are considered "identical" when they have the same
   226  * unicode spellings.  UCOL_IDENTICAL.
   227  * For example, "&auml;" == "&auml;".
   228  *
   229  * UCollationStrength is also used to determine the strength of sort keys 
   230  * generated from UCollator objects
   231  * These values can be now found in the UColAttributeValue enum.
   232  * @stable ICU 2.0
   233  **/
   234 typedef UColAttributeValue UCollationStrength;
   236 /** Attributes that collation service understands. All the attributes can take UCOL_DEFAULT
   237  * value, as well as the values specific to each one. 
   238  * @stable ICU 2.0
   239  */
   240 typedef enum {
   241      /** Attribute for direction of secondary weights - used in Canadian French.
   242       * Acceptable values are UCOL_ON, which results in secondary weights
   243       * being considered backwards and UCOL_OFF which treats secondary
   244       * weights in the order they appear.
   245       * @stable ICU 2.0
   246       */
   247      UCOL_FRENCH_COLLATION, 
   248      /** Attribute for handling variable elements.
   249       * Acceptable values are UCOL_NON_IGNORABLE (default)
   250       * which treats all the codepoints with non-ignorable 
   251       * primary weights in the same way,
   252       * and UCOL_SHIFTED which causes codepoints with primary 
   253       * weights that are equal or below the variable top value
   254       * to be ignored on primary level and moved to the quaternary 
   255       * level.
   256       * @stable ICU 2.0
   257       */
   258      UCOL_ALTERNATE_HANDLING, 
   259      /** Controls the ordering of upper and lower case letters.
   260       * Acceptable values are UCOL_OFF (default), which orders
   261       * upper and lower case letters in accordance to their tertiary
   262       * weights, UCOL_UPPER_FIRST which forces upper case letters to 
   263       * sort before lower case letters, and UCOL_LOWER_FIRST which does 
   264       * the opposite.
   265       * @stable ICU 2.0
   266       */
   267      UCOL_CASE_FIRST, 
   268      /** Controls whether an extra case level (positioned before the third
   269       * level) is generated or not. Acceptable values are UCOL_OFF (default), 
   270       * when case level is not generated, and UCOL_ON which causes the case
   271       * level to be generated. Contents of the case level are affected by
   272       * the value of UCOL_CASE_FIRST attribute. A simple way to ignore 
   273       * accent differences in a string is to set the strength to UCOL_PRIMARY
   274       * and enable case level.
   275       * @stable ICU 2.0
   276       */
   277      UCOL_CASE_LEVEL,
   278      /** Controls whether the normalization check and necessary normalizations
   279       * are performed. When set to UCOL_OFF (default) no normalization check
   280       * is performed. The correctness of the result is guaranteed only if the 
   281       * input data is in so-called FCD form (see users manual for more info).
   282       * When set to UCOL_ON, an incremental check is performed to see whether
   283       * the input data is in the FCD form. If the data is not in the FCD form,
   284       * incremental NFD normalization is performed.
   285       * @stable ICU 2.0
   286       */
   287      UCOL_NORMALIZATION_MODE, 
   288      /** An alias for UCOL_NORMALIZATION_MODE attribute.
   289       * @stable ICU 2.0
   290       */
   291      UCOL_DECOMPOSITION_MODE = UCOL_NORMALIZATION_MODE,
   292      /** The strength attribute. Can be either UCOL_PRIMARY, UCOL_SECONDARY,
   293       * UCOL_TERTIARY, UCOL_QUATERNARY or UCOL_IDENTICAL. The usual strength
   294       * for most locales (except Japanese) is tertiary. Quaternary strength 
   295       * is useful when combined with shifted setting for alternate handling
   296       * attribute and for JIS x 4061 collation, when it is used to distinguish
   297       * between Katakana  and Hiragana (this is achieved by setting the 
   298       * UCOL_HIRAGANA_QUATERNARY mode to on. Otherwise, quaternary level
   299       * is affected only by the number of non ignorable code points in
   300       * the string. Identical strength is rarely useful, as it amounts 
   301       * to codepoints of the NFD form of the string.
   302       * @stable ICU 2.0
   303       */
   304      UCOL_STRENGTH,  
   305 #ifndef U_HIDE_DEPRECATED_API
   306      /** When turned on, this attribute positions Hiragana before all  
   307       * non-ignorables on quaternary level This is a sneaky way to produce JIS
   308       * sort order.
   309       *
   310       * This attribute is an implementation detail of the CLDR Japanese tailoring.
   311       * The implementation might change to use a different mechanism
   312       * to achieve the same Japanese sort order.
   313       * Since ICU 50, this attribute is not settable any more via API functions.
   314       * @deprecated ICU 50 Implementation detail, cannot be set via API, might be removed from implementation.
   315       */
   316      UCOL_HIRAGANA_QUATERNARY_MODE = UCOL_STRENGTH + 1,
   317 #endif  /* U_HIDE_DEPRECATED_API */
   318      /** When turned on, this attribute generates a collation key
   319       * for the numeric value of substrings of digits.
   320       * This is a way to get '100' to sort AFTER '2'. Note that the longest
   321       * digit substring that can be treated as a single collation element is
   322       * 254 digits (not counting leading zeros). If a digit substring is
   323       * longer than that, the digits beyond the limit will be treated as a
   324       * separate digit substring associated with a separate collation element.
   325       * @stable ICU 2.8
   326       */
   327      UCOL_NUMERIC_COLLATION = UCOL_STRENGTH + 2, 
   328      /**
   329       * The number of UColAttribute constants.
   330       * @stable ICU 2.0
   331       */
   332      UCOL_ATTRIBUTE_COUNT
   333 } UColAttribute;
   335 /** Options for retrieving the rule string 
   336  *  @stable ICU 2.0
   337  */
   338 typedef enum {
   339   /**
   340    * Retrieves the tailoring rules only.
   341    * Same as calling the version of getRules() without UColRuleOption.
   342    * @stable ICU 2.0
   343    */
   344   UCOL_TAILORING_ONLY, 
   345   /**
   346    * Retrieves the "UCA rules" concatenated with the tailoring rules.
   347    * The "UCA rules" are an <i>approximation</i> of the root collator's sort order.
   348    * They are almost never used or useful at runtime and can be removed from the data.
   349    * See http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales
   350    * @stable ICU 2.0
   351    */
   352   UCOL_FULL_RULES 
   353 } UColRuleOption ;
   355 /**
   356  * Open a UCollator for comparing strings.
   357  * The UCollator pointer is used in all the calls to the Collation 
   358  * service. After finished, collator must be disposed of by calling
   359  * {@link #ucol_close }.
   360  * @param loc The locale containing the required collation rules. 
   361  *            Special values for locales can be passed in - 
   362  *            if NULL is passed for the locale, the default locale
   363  *            collation rules will be used. If empty string ("") or
   364  *            "root" are passed, UCA rules will be used.
   365  * @param status A pointer to an UErrorCode to receive any errors
   366  * @return A pointer to a UCollator, or 0 if an error occurred.
   367  * @see ucol_openRules
   368  * @see ucol_safeClone
   369  * @see ucol_close
   370  * @stable ICU 2.0
   371  */
   372 U_STABLE UCollator* U_EXPORT2 
   373 ucol_open(const char *loc, UErrorCode *status);
   375 /**
   376  * Produce an UCollator instance according to the rules supplied.
   377  * The rules are used to change the default ordering, defined in the
   378  * UCA in a process called tailoring. The resulting UCollator pointer
   379  * can be used in the same way as the one obtained by {@link #ucol_strcoll }.
   380  * @param rules A string describing the collation rules. For the syntax
   381  *              of the rules please see users guide.
   382  * @param rulesLength The length of rules, or -1 if null-terminated.
   383  * @param normalizationMode The normalization mode: One of
   384  *             UCOL_OFF     (expect the text to not need normalization),
   385  *             UCOL_ON      (normalize), or
   386  *             UCOL_DEFAULT (set the mode according to the rules)
   387  * @param strength The default collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY,
   388  * UCOL_TERTIARY, UCOL_IDENTICAL,UCOL_DEFAULT_STRENGTH - can be also set in the rules.
   389  * @param parseError  A pointer to UParseError to recieve information about errors
   390  *                    occurred during parsing. This argument can currently be set
   391  *                    to NULL, but at users own risk. Please provide a real structure.
   392  * @param status A pointer to an UErrorCode to receive any errors
   393  * @return A pointer to a UCollator. It is not guaranteed that NULL be returned in case
   394  *         of error - please use status argument to check for errors.
   395  * @see ucol_open
   396  * @see ucol_safeClone
   397  * @see ucol_close
   398  * @stable ICU 2.0
   399  */
   400 U_STABLE UCollator* U_EXPORT2 
   401 ucol_openRules( const UChar        *rules,
   402                 int32_t            rulesLength,
   403                 UColAttributeValue normalizationMode,
   404                 UCollationStrength strength,
   405                 UParseError        *parseError,
   406                 UErrorCode         *status);
   408 /** 
   409  * Open a collator defined by a short form string.
   410  * The structure and the syntax of the string is defined in the "Naming collators"
   411  * section of the users guide: 
   412  * http://icu-project.org/userguide/Collate_Concepts.html#Naming_Collators
   413  * Attributes are overriden by the subsequent attributes. So, for "S2_S3", final
   414  * strength will be 3. 3066bis locale overrides individual locale parts.
   415  * The call to this function is equivalent to a call to ucol_open, followed by a 
   416  * series of calls to ucol_setAttribute and ucol_setVariableTop.
   417  * @param definition A short string containing a locale and a set of attributes. 
   418  *                   Attributes not explicitly mentioned are left at the default
   419  *                   state for a locale.
   420  * @param parseError if not NULL, structure that will get filled with error's pre
   421  *                   and post context in case of error.
   422  * @param forceDefaults if FALSE, the settings that are the same as the collator 
   423  *                   default settings will not be applied (for example, setting
   424  *                   French secondary on a French collator would not be executed). 
   425  *                   If TRUE, all the settings will be applied regardless of the 
   426  *                   collator default value. If the definition
   427  *                   strings are to be cached, should be set to FALSE.
   428  * @param status     Error code. Apart from regular error conditions connected to 
   429  *                   instantiating collators (like out of memory or similar), this
   430  *                   API will return an error if an invalid attribute or attribute/value
   431  *                   combination is specified.
   432  * @return           A pointer to a UCollator or 0 if an error occured (including an 
   433  *                   invalid attribute).
   434  * @see ucol_open
   435  * @see ucol_setAttribute
   436  * @see ucol_setVariableTop
   437  * @see ucol_getShortDefinitionString
   438  * @see ucol_normalizeShortDefinitionString
   439  * @stable ICU 3.0
   440  *
   441  */
   442 U_STABLE UCollator* U_EXPORT2
   443 ucol_openFromShortString( const char *definition,
   444                           UBool forceDefaults,
   445                           UParseError *parseError,
   446                           UErrorCode *status);
   448 #ifndef U_HIDE_DEPRECATED_API
   449 /**
   450  * Get a set containing the contractions defined by the collator. The set includes
   451  * both the UCA contractions and the contractions defined by the collator. This set
   452  * will contain only strings. If a tailoring explicitly suppresses contractions from 
   453  * the UCA (like Russian), removed contractions will not be in the resulting set.
   454  * @param coll collator 
   455  * @param conts the set to hold the result. It gets emptied before
   456  *              contractions are added. 
   457  * @param status to hold the error code
   458  * @return the size of the contraction set
   459  *
   460  * @deprecated ICU 3.4, use ucol_getContractionsAndExpansions instead
   461  */
   462 U_DEPRECATED int32_t U_EXPORT2
   463 ucol_getContractions( const UCollator *coll,
   464                   USet *conts,
   465                   UErrorCode *status);
   466 #endif  /* U_HIDE_DEPRECATED_API */
   468 /**
   469  * Get a set containing the expansions defined by the collator. The set includes
   470  * both the UCA expansions and the expansions defined by the tailoring
   471  * @param coll collator
   472  * @param contractions if not NULL, the set to hold the contractions
   473  * @param expansions if not NULL, the set to hold the expansions
   474  * @param addPrefixes add the prefix contextual elements to contractions
   475  * @param status to hold the error code
   476  *
   477  * @stable ICU 3.4
   478  */
   479 U_STABLE void U_EXPORT2
   480 ucol_getContractionsAndExpansions( const UCollator *coll,
   481                   USet *contractions, USet *expansions,
   482                   UBool addPrefixes, UErrorCode *status);
   484 /** 
   485  * Close a UCollator.
   486  * Once closed, a UCollator should not be used. Every open collator should
   487  * be closed. Otherwise, a memory leak will result.
   488  * @param coll The UCollator to close.
   489  * @see ucol_open
   490  * @see ucol_openRules
   491  * @see ucol_safeClone
   492  * @stable ICU 2.0
   493  */
   494 U_STABLE void U_EXPORT2 
   495 ucol_close(UCollator *coll);
   497 #if U_SHOW_CPLUSPLUS_API
   499 U_NAMESPACE_BEGIN
   501 /**
   502  * \class LocalUCollatorPointer
   503  * "Smart pointer" class, closes a UCollator via ucol_close().
   504  * For most methods see the LocalPointerBase base class.
   505  *
   506  * @see LocalPointerBase
   507  * @see LocalPointer
   508  * @stable ICU 4.4
   509  */
   510 U_DEFINE_LOCAL_OPEN_POINTER(LocalUCollatorPointer, UCollator, ucol_close);
   512 U_NAMESPACE_END
   514 #endif
   516 /**
   517  * Compare two strings.
   518  * The strings will be compared using the options already specified.
   519  * @param coll The UCollator containing the comparison rules.
   520  * @param source The source string.
   521  * @param sourceLength The length of source, or -1 if null-terminated.
   522  * @param target The target string.
   523  * @param targetLength The length of target, or -1 if null-terminated.
   524  * @return The result of comparing the strings; one of UCOL_EQUAL,
   525  * UCOL_GREATER, UCOL_LESS
   526  * @see ucol_greater
   527  * @see ucol_greaterOrEqual
   528  * @see ucol_equal
   529  * @stable ICU 2.0
   530  */
   531 U_STABLE UCollationResult U_EXPORT2 
   532 ucol_strcoll(    const    UCollator    *coll,
   533         const    UChar        *source,
   534         int32_t            sourceLength,
   535         const    UChar        *target,
   536         int32_t            targetLength);
   538 /** 
   539 * Compare two strings in UTF-8. 
   540 * The strings will be compared using the options already specified. 
   541 * Note: When input string contains malformed a UTF-8 byte sequence, 
   542 * this function treats these bytes as REPLACEMENT CHARACTER (U+FFFD).
   543 * @param coll The UCollator containing the comparison rules. 
   544 * @param source The source UTF-8 string. 
   545 * @param sourceLength The length of source, or -1 if null-terminated. 
   546 * @param target The target UTF-8 string. 
   547 * @param targetLength The length of target, or -1 if null-terminated. 
   548 * @param status A pointer to an UErrorCode to receive any errors 
   549 * @return The result of comparing the strings; one of UCOL_EQUAL, 
   550 * UCOL_GREATER, UCOL_LESS 
   551 * @see ucol_greater 
   552 * @see ucol_greaterOrEqual 
   553 * @see ucol_equal 
   554 * @stable ICU 50 
   555 */ 
   556 U_STABLE UCollationResult U_EXPORT2
   557 ucol_strcollUTF8(
   558         const UCollator *coll,
   559         const char      *source,
   560         int32_t         sourceLength,
   561         const char      *target,
   562         int32_t         targetLength,
   563         UErrorCode      *status);
   565 /**
   566  * Determine if one string is greater than another.
   567  * This function is equivalent to {@link #ucol_strcoll } == UCOL_GREATER
   568  * @param coll The UCollator containing the comparison rules.
   569  * @param source The source string.
   570  * @param sourceLength The length of source, or -1 if null-terminated.
   571  * @param target The target string.
   572  * @param targetLength The length of target, or -1 if null-terminated.
   573  * @return TRUE if source is greater than target, FALSE otherwise.
   574  * @see ucol_strcoll
   575  * @see ucol_greaterOrEqual
   576  * @see ucol_equal
   577  * @stable ICU 2.0
   578  */
   579 U_STABLE UBool U_EXPORT2 
   580 ucol_greater(const UCollator *coll,
   581              const UChar     *source, int32_t sourceLength,
   582              const UChar     *target, int32_t targetLength);
   584 /**
   585  * Determine if one string is greater than or equal to another.
   586  * This function is equivalent to {@link #ucol_strcoll } != UCOL_LESS
   587  * @param coll The UCollator containing the comparison rules.
   588  * @param source The source string.
   589  * @param sourceLength The length of source, or -1 if null-terminated.
   590  * @param target The target string.
   591  * @param targetLength The length of target, or -1 if null-terminated.
   592  * @return TRUE if source is greater than or equal to target, FALSE otherwise.
   593  * @see ucol_strcoll
   594  * @see ucol_greater
   595  * @see ucol_equal
   596  * @stable ICU 2.0
   597  */
   598 U_STABLE UBool U_EXPORT2 
   599 ucol_greaterOrEqual(const UCollator *coll,
   600                     const UChar     *source, int32_t sourceLength,
   601                     const UChar     *target, int32_t targetLength);
   603 /**
   604  * Compare two strings for equality.
   605  * This function is equivalent to {@link #ucol_strcoll } == UCOL_EQUAL
   606  * @param coll The UCollator containing the comparison rules.
   607  * @param source The source string.
   608  * @param sourceLength The length of source, or -1 if null-terminated.
   609  * @param target The target string.
   610  * @param targetLength The length of target, or -1 if null-terminated.
   611  * @return TRUE if source is equal to target, FALSE otherwise
   612  * @see ucol_strcoll
   613  * @see ucol_greater
   614  * @see ucol_greaterOrEqual
   615  * @stable ICU 2.0
   616  */
   617 U_STABLE UBool U_EXPORT2 
   618 ucol_equal(const UCollator *coll,
   619            const UChar     *source, int32_t sourceLength,
   620            const UChar     *target, int32_t targetLength);
   622 /**
   623  * Compare two UTF-8 encoded trings.
   624  * The strings will be compared using the options already specified.
   625  * @param coll The UCollator containing the comparison rules.
   626  * @param sIter The source string iterator.
   627  * @param tIter The target string iterator.
   628  * @return The result of comparing the strings; one of UCOL_EQUAL,
   629  * UCOL_GREATER, UCOL_LESS
   630  * @param status A pointer to an UErrorCode to receive any errors
   631  * @see ucol_strcoll
   632  * @stable ICU 2.6
   633  */
   634 U_STABLE UCollationResult U_EXPORT2 
   635 ucol_strcollIter(  const    UCollator    *coll,
   636                   UCharIterator *sIter,
   637                   UCharIterator *tIter,
   638                   UErrorCode *status);
   640 /**
   641  * Get the collation strength used in a UCollator.
   642  * The strength influences how strings are compared.
   643  * @param coll The UCollator to query.
   644  * @return The collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY,
   645  * UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL
   646  * @see ucol_setStrength
   647  * @stable ICU 2.0
   648  */
   649 U_STABLE UCollationStrength U_EXPORT2 
   650 ucol_getStrength(const UCollator *coll);
   652 /**
   653  * Set the collation strength used in a UCollator.
   654  * The strength influences how strings are compared.
   655  * @param coll The UCollator to set.
   656  * @param strength The desired collation strength; one of UCOL_PRIMARY, 
   657  * UCOL_SECONDARY, UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL, UCOL_DEFAULT
   658  * @see ucol_getStrength
   659  * @stable ICU 2.0
   660  */
   661 U_STABLE void U_EXPORT2 
   662 ucol_setStrength(UCollator *coll,
   663                  UCollationStrength strength);
   665 /**
   666  * Retrieves the reordering codes for this collator.
   667  * These reordering codes are a combination of UScript codes and UColReorderCode entries.
   668  * @param coll The UCollator to query.
   669  * @param dest The array to fill with the script ordering.
   670  * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function 
   671  * will only return the length of the result without writing any of the result string (pre-flighting).
   672  * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate a 
   673  * failure before the function call.
   674  * @return The number of reordering codes written to the dest array.
   675  * @see ucol_setReorderCodes
   676  * @see ucol_getEquivalentReorderCodes
   677  * @see UScriptCode
   678  * @see UColReorderCode
   679  * @stable ICU 4.8
   680  */
   681 U_STABLE int32_t U_EXPORT2 
   682 ucol_getReorderCodes(const UCollator* coll,
   683                     int32_t* dest,
   684                     int32_t destCapacity,
   685                     UErrorCode *pErrorCode);
   686 /** 
   687  * Sets the reordering codes for this collator.
   688  * Collation reordering allows scripts and some other defined blocks of characters 
   689  * to be moved relative to each other as a block. This reordering is done on top of 
   690  * the DUCET/CLDR standard collation order. Reordering can specify groups to be placed 
   691  * at the start and/or the end of the collation order. These groups are specified using
   692  * UScript codes and UColReorderCode entries.
   693  * <p>By default, reordering codes specified for the start of the order are placed in the 
   694  * order given after a group of "special" non-script blocks. These special groups of characters 
   695  * are space, punctuation, symbol, currency, and digit. These special groups are represented with
   696  * UColReorderCode entries. Script groups can be intermingled with 
   697  * these special non-script blocks if those special blocks are explicitly specified in the reordering.
   698  * <p>The special code OTHERS stands for any script that is not explicitly 
   699  * mentioned in the list of reordering codes given. Anything that is after OTHERS
   700  * will go at the very end of the reordering in the order given.
   701  * <p>The special reorder code DEFAULT will reset the reordering for this collator
   702  * to the default for this collator. The default reordering may be the DUCET/CLDR order or may be a reordering that
   703  * was specified when this collator was created from resource data or from rules. The 
   704  * DEFAULT code <b>must</b> be the sole code supplied when it used. If not
   705  * that will result in an U_ILLEGAL_ARGUMENT_ERROR being set.
   706  * <p>The special reorder code NONE will remove any reordering for this collator.
   707  * The result of setting no reordering will be to have the DUCET/CLDR ordering used. The 
   708  * NONE code <b>must</b> be the sole code supplied when it used.
   709  * @param coll The UCollator to set.
   710  * @param reorderCodes An array of script codes in the new order. This can be NULL if the 
   711  * length is also set to 0. An empty array will clear any reordering codes on the collator.
   712  * @param reorderCodesLength The length of reorderCodes.
   713  * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate a
   714  * failure before the function call.
   715  * @see ucol_getReorderCodes
   716  * @see ucol_getEquivalentReorderCodes
   717  * @see UScriptCode
   718  * @see UColReorderCode
   719  * @stable ICU 4.8
   720  */ 
   721 U_STABLE void U_EXPORT2 
   722 ucol_setReorderCodes(UCollator* coll,
   723                     const int32_t* reorderCodes,
   724                     int32_t reorderCodesLength,
   725                     UErrorCode *pErrorCode);
   727 /**
   728  * Retrieves the reorder codes that are grouped with the given reorder code. Some reorder
   729  * codes will be grouped and must reorder together.
   730  * @param reorderCode The reorder code to determine equivalence for.
   731  * @param dest The array to fill with the script ordering.
   732  * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function
   733  * will only return the length of the result without writing any of the result string (pre-flighting).
   734  * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate 
   735  * a failure before the function call.
   736  * @return The number of reordering codes written to the dest array.
   737  * @see ucol_setReorderCodes
   738  * @see ucol_getReorderCodes
   739  * @see UScriptCode
   740  * @see UColReorderCode
   741  * @stable ICU 4.8
   742  */
   743 U_STABLE int32_t U_EXPORT2 
   744 ucol_getEquivalentReorderCodes(int32_t reorderCode,
   745                     int32_t* dest,
   746                     int32_t destCapacity,
   747                     UErrorCode *pErrorCode);
   749 /**
   750  * Get the display name for a UCollator.
   751  * The display name is suitable for presentation to a user.
   752  * @param objLoc The locale of the collator in question.
   753  * @param dispLoc The locale for display.
   754  * @param result A pointer to a buffer to receive the attribute.
   755  * @param resultLength The maximum size of result.
   756  * @param status A pointer to an UErrorCode to receive any errors
   757  * @return The total buffer size needed; if greater than resultLength,
   758  * the output was truncated.
   759  * @stable ICU 2.0
   760  */
   761 U_STABLE int32_t U_EXPORT2 
   762 ucol_getDisplayName(    const    char        *objLoc,
   763             const    char        *dispLoc,
   764             UChar             *result,
   765             int32_t         resultLength,
   766             UErrorCode        *status);
   768 /**
   769  * Get a locale for which collation rules are available.
   770  * A UCollator in a locale returned by this function will perform the correct
   771  * collation for the locale.
   772  * @param localeIndex The index of the desired locale.
   773  * @return A locale for which collation rules are available, or 0 if none.
   774  * @see ucol_countAvailable
   775  * @stable ICU 2.0
   776  */
   777 U_STABLE const char* U_EXPORT2 
   778 ucol_getAvailable(int32_t localeIndex);
   780 /**
   781  * Determine how many locales have collation rules available.
   782  * This function is most useful as determining the loop ending condition for
   783  * calls to {@link #ucol_getAvailable }.
   784  * @return The number of locales for which collation rules are available.
   785  * @see ucol_getAvailable
   786  * @stable ICU 2.0
   787  */
   788 U_STABLE int32_t U_EXPORT2 
   789 ucol_countAvailable(void);
   791 #if !UCONFIG_NO_SERVICE
   792 /**
   793  * Create a string enumerator of all locales for which a valid
   794  * collator may be opened.
   795  * @param status input-output error code
   796  * @return a string enumeration over locale strings. The caller is
   797  * responsible for closing the result.
   798  * @stable ICU 3.0
   799  */
   800 U_STABLE UEnumeration* U_EXPORT2
   801 ucol_openAvailableLocales(UErrorCode *status);
   802 #endif
   804 /**
   805  * Create a string enumerator of all possible keywords that are relevant to
   806  * collation. At this point, the only recognized keyword for this
   807  * service is "collation".
   808  * @param status input-output error code
   809  * @return a string enumeration over locale strings. The caller is
   810  * responsible for closing the result.
   811  * @stable ICU 3.0
   812  */
   813 U_STABLE UEnumeration* U_EXPORT2
   814 ucol_getKeywords(UErrorCode *status);
   816 /**
   817  * Given a keyword, create a string enumeration of all values
   818  * for that keyword that are currently in use.
   819  * @param keyword a particular keyword as enumerated by
   820  * ucol_getKeywords. If any other keyword is passed in, *status is set
   821  * to U_ILLEGAL_ARGUMENT_ERROR.
   822  * @param status input-output error code
   823  * @return a string enumeration over collation keyword values, or NULL
   824  * upon error. The caller is responsible for closing the result.
   825  * @stable ICU 3.0
   826  */
   827 U_STABLE UEnumeration* U_EXPORT2
   828 ucol_getKeywordValues(const char *keyword, UErrorCode *status);
   830 /**
   831  * Given a key and a locale, returns an array of string values in a preferred
   832  * order that would make a difference. These are all and only those values where
   833  * the open (creation) of the service with the locale formed from the input locale
   834  * plus input keyword and that value has different behavior than creation with the
   835  * input locale alone.
   836  * @param key           one of the keys supported by this service.  For now, only
   837  *                      "collation" is supported.
   838  * @param locale        the locale
   839  * @param commonlyUsed  if set to true it will return only commonly used values
   840  *                      with the given locale in preferred order.  Otherwise,
   841  *                      it will return all the available values for the locale.
   842  * @param status error status
   843  * @return a string enumeration over keyword values for the given key and the locale.
   844  * @stable ICU 4.2
   845  */
   846 U_STABLE UEnumeration* U_EXPORT2
   847 ucol_getKeywordValuesForLocale(const char* key,
   848                                const char* locale,
   849                                UBool commonlyUsed,
   850                                UErrorCode* status);
   852 /**
   853  * Return the functionally equivalent locale for the given
   854  * requested locale, with respect to given keyword, for the
   855  * collation service.  If two locales return the same result, then
   856  * collators instantiated for these locales will behave
   857  * equivalently.  The converse is not always true; two collators
   858  * may in fact be equivalent, but return different results, due to
   859  * internal details.  The return result has no other meaning than
   860  * that stated above, and implies nothing as to the relationship
   861  * between the two locales.  This is intended for use by
   862  * applications who wish to cache collators, or otherwise reuse
   863  * collators when possible.  The functional equivalent may change
   864  * over time.  For more information, please see the <a
   865  * href="http://icu-project.org/userguide/locale.html#services">
   866  * Locales and Services</a> section of the ICU User Guide.
   867  * @param result fillin for the functionally equivalent locale
   868  * @param resultCapacity capacity of the fillin buffer
   869  * @param keyword a particular keyword as enumerated by
   870  * ucol_getKeywords.
   871  * @param locale the requested locale
   872  * @param isAvailable if non-NULL, pointer to a fillin parameter that
   873  * indicates whether the requested locale was 'available' to the
   874  * collation service. A locale is defined as 'available' if it
   875  * physically exists within the collation locale data.
   876  * @param status pointer to input-output error code
   877  * @return the actual buffer size needed for the locale.  If greater
   878  * than resultCapacity, the returned full name will be truncated and
   879  * an error code will be returned.
   880  * @stable ICU 3.0
   881  */
   882 U_STABLE int32_t U_EXPORT2
   883 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
   884                              const char* keyword, const char* locale,
   885                              UBool* isAvailable, UErrorCode* status);
   887 /**
   888  * Get the collation tailoring rules from a UCollator.
   889  * The rules will follow the rule syntax.
   890  * @param coll The UCollator to query.
   891  * @param length 
   892  * @return The collation tailoring rules.
   893  * @stable ICU 2.0
   894  */
   895 U_STABLE const UChar* U_EXPORT2 
   896 ucol_getRules(    const    UCollator    *coll, 
   897         int32_t            *length);
   899 /** Get the short definition string for a collator. This API harvests the collator's
   900  *  locale and the attribute set and produces a string that can be used for opening 
   901  *  a collator with the same properties using the ucol_openFromShortString API.
   902  *  This string will be normalized.
   903  *  The structure and the syntax of the string is defined in the "Naming collators"
   904  *  section of the users guide: 
   905  *  http://icu-project.org/userguide/Collate_Concepts.html#Naming_Collators
   906  *  This API supports preflighting.
   907  *  @param coll a collator
   908  *  @param locale a locale that will appear as a collators locale in the resulting
   909  *                short string definition. If NULL, the locale will be harvested 
   910  *                from the collator.
   911  *  @param buffer space to hold the resulting string
   912  *  @param capacity capacity of the buffer
   913  *  @param status for returning errors. All the preflighting errors are featured
   914  *  @return length of the resulting string
   915  *  @see ucol_openFromShortString
   916  *  @see ucol_normalizeShortDefinitionString
   917  *  @stable ICU 3.0
   918  */
   919 U_STABLE int32_t U_EXPORT2
   920 ucol_getShortDefinitionString(const UCollator *coll,
   921                               const char *locale,
   922                               char *buffer,
   923                               int32_t capacity,
   924                               UErrorCode *status);
   926 /** Verifies and normalizes short definition string.
   927  *  Normalized short definition string has all the option sorted by the argument name,
   928  *  so that equivalent definition strings are the same. 
   929  *  This API supports preflighting.
   930  *  @param source definition string
   931  *  @param destination space to hold the resulting string
   932  *  @param capacity capacity of the buffer
   933  *  @param parseError if not NULL, structure that will get filled with error's pre
   934  *                   and post context in case of error.
   935  *  @param status     Error code. This API will return an error if an invalid attribute 
   936  *                    or attribute/value combination is specified. All the preflighting 
   937  *                    errors are also featured
   938  *  @return length of the resulting normalized string.
   939  *
   940  *  @see ucol_openFromShortString
   941  *  @see ucol_getShortDefinitionString
   942  * 
   943  *  @stable ICU 3.0
   944  */
   946 U_STABLE int32_t U_EXPORT2
   947 ucol_normalizeShortDefinitionString(const char *source,
   948                                     char *destination,
   949                                     int32_t capacity,
   950                                     UParseError *parseError,
   951                                     UErrorCode *status);
   954 /**
   955  * Get a sort key for a string from a UCollator.
   956  * Sort keys may be compared using <TT>strcmp</TT>.
   957  *
   958  * Like ICU functions that write to an output buffer, the buffer contents
   959  * is undefined if the buffer capacity (resultLength parameter) is too small.
   960  * Unlike ICU functions that write a string to an output buffer,
   961  * the terminating zero byte is counted in the sort key length.
   962  * @param coll The UCollator containing the collation rules.
   963  * @param source The string to transform.
   964  * @param sourceLength The length of source, or -1 if null-terminated.
   965  * @param result A pointer to a buffer to receive the attribute.
   966  * @param resultLength The maximum size of result.
   967  * @return The size needed to fully store the sort key.
   968  *      If there was an internal error generating the sort key,
   969  *      a zero value is returned.
   970  * @see ucol_keyHashCode
   971  * @stable ICU 2.0
   972  */
   973 U_STABLE int32_t U_EXPORT2 
   974 ucol_getSortKey(const    UCollator    *coll,
   975         const    UChar        *source,
   976         int32_t        sourceLength,
   977         uint8_t        *result,
   978         int32_t        resultLength);
   981 /** Gets the next count bytes of a sort key. Caller needs
   982  *  to preserve state array between calls and to provide
   983  *  the same type of UCharIterator set with the same string.
   984  *  The destination buffer provided must be big enough to store
   985  *  the number of requested bytes.
   986  *
   987  *  The generated sort key may or may not be compatible with
   988  *  sort keys generated using ucol_getSortKey().
   989  *  @param coll The UCollator containing the collation rules.
   990  *  @param iter UCharIterator containing the string we need 
   991  *              the sort key to be calculated for.
   992  *  @param state Opaque state of sortkey iteration.
   993  *  @param dest Buffer to hold the resulting sortkey part
   994  *  @param count number of sort key bytes required.
   995  *  @param status error code indicator.
   996  *  @return the actual number of bytes of a sortkey. It can be
   997  *          smaller than count if we have reached the end of 
   998  *          the sort key.
   999  *  @stable ICU 2.6
  1000  */
  1001 U_STABLE int32_t U_EXPORT2 
  1002 ucol_nextSortKeyPart(const UCollator *coll,
  1003                      UCharIterator *iter,
  1004                      uint32_t state[2],
  1005                      uint8_t *dest, int32_t count,
  1006                      UErrorCode *status);
  1008 /** enum that is taken by ucol_getBound API 
  1009  * See below for explanation                
  1010  * do not change the values assigned to the 
  1011  * members of this enum. Underlying code    
  1012  * depends on them having these numbers     
  1013  * @stable ICU 2.0
  1014  */
  1015 typedef enum {
  1016   /** lower bound */
  1017   UCOL_BOUND_LOWER = 0,
  1018   /** upper bound that will match strings of exact size */
  1019   UCOL_BOUND_UPPER = 1,
  1020   /** upper bound that will match all the strings that have the same initial substring as the given string */
  1021   UCOL_BOUND_UPPER_LONG = 2,
  1022   UCOL_BOUND_VALUE_COUNT
  1023 } UColBoundMode;
  1025 /**
  1026  * Produce a bound for a given sortkey and a number of levels.
  1027  * Return value is always the number of bytes needed, regardless of 
  1028  * whether the result buffer was big enough or even valid.<br>
  1029  * Resulting bounds can be used to produce a range of strings that are
  1030  * between upper and lower bounds. For example, if bounds are produced
  1031  * for a sortkey of string "smith", strings between upper and lower 
  1032  * bounds with one level would include "Smith", "SMITH", "sMiTh".<br>
  1033  * There are two upper bounds that can be produced. If UCOL_BOUND_UPPER
  1034  * is produced, strings matched would be as above. However, if bound
  1035  * produced using UCOL_BOUND_UPPER_LONG is used, the above example will
  1036  * also match "Smithsonian" and similar.<br>
  1037  * For more on usage, see example in cintltst/capitst.c in procedure
  1038  * TestBounds.
  1039  * Sort keys may be compared using <TT>strcmp</TT>.
  1040  * @param source The source sortkey.
  1041  * @param sourceLength The length of source, or -1 if null-terminated. 
  1042  *                     (If an unmodified sortkey is passed, it is always null 
  1043  *                      terminated).
  1044  * @param boundType Type of bound required. It can be UCOL_BOUND_LOWER, which 
  1045  *                  produces a lower inclusive bound, UCOL_BOUND_UPPER, that 
  1046  *                  produces upper bound that matches strings of the same length 
  1047  *                  or UCOL_BOUND_UPPER_LONG that matches strings that have the 
  1048  *                  same starting substring as the source string.
  1049  * @param noOfLevels  Number of levels required in the resulting bound (for most 
  1050  *                    uses, the recommended value is 1). See users guide for 
  1051  *                    explanation on number of levels a sortkey can have.
  1052  * @param result A pointer to a buffer to receive the resulting sortkey.
  1053  * @param resultLength The maximum size of result.
  1054  * @param status Used for returning error code if something went wrong. If the 
  1055  *               number of levels requested is higher than the number of levels
  1056  *               in the source key, a warning (U_SORT_KEY_TOO_SHORT_WARNING) is 
  1057  *               issued.
  1058  * @return The size needed to fully store the bound. 
  1059  * @see ucol_keyHashCode
  1060  * @stable ICU 2.1
  1061  */
  1062 U_STABLE int32_t U_EXPORT2 
  1063 ucol_getBound(const uint8_t       *source,
  1064         int32_t             sourceLength,
  1065         UColBoundMode       boundType,
  1066         uint32_t            noOfLevels,
  1067         uint8_t             *result,
  1068         int32_t             resultLength,
  1069         UErrorCode          *status);
  1071 /**
  1072  * Gets the version information for a Collator. Version is currently
  1073  * an opaque 32-bit number which depends, among other things, on major
  1074  * versions of the collator tailoring and UCA.
  1075  * @param coll The UCollator to query.
  1076  * @param info the version # information, the result will be filled in
  1077  * @stable ICU 2.0
  1078  */
  1079 U_STABLE void U_EXPORT2
  1080 ucol_getVersion(const UCollator* coll, UVersionInfo info);
  1082 /**
  1083  * Gets the UCA version information for a Collator. Version is the
  1084  * UCA version number (3.1.1, 4.0).
  1085  * @param coll The UCollator to query.
  1086  * @param info the version # information, the result will be filled in
  1087  * @stable ICU 2.8
  1088  */
  1089 U_STABLE void U_EXPORT2
  1090 ucol_getUCAVersion(const UCollator* coll, UVersionInfo info);
  1092 /**
  1093  * Merges two sort keys. The levels are merged with their corresponding counterparts
  1094  * (primaries with primaries, secondaries with secondaries etc.). Between the values
  1095  * from the same level a separator is inserted.
  1097  * This is useful, for example, for combining sort keys from first and last names
  1098  * to sort such pairs.
  1099  * It is possible to merge multiple sort keys by consecutively merging
  1100  * another one with the intermediate result.
  1102  * The length of the merge result is the sum of the lengths of the input sort keys.
  1104  * Example (uncompressed):
  1105  * <pre>191B1D 01 050505 01 910505 00
  1106  * 1F2123 01 050505 01 910505 00</pre>
  1107  * will be merged as 
  1108  * <pre>191B1D 02 1F2123 01 050505 02 050505 01 910505 02 910505 00</pre>
  1110  * If the destination buffer is not big enough, then its contents are undefined.
  1111  * If any of source lengths are zero or any of the source pointers are NULL/undefined,
  1112  * the result is of size zero.
  1114  * @param src1 the first sort key
  1115  * @param src1Length the length of the first sort key, including the zero byte at the end;
  1116  *        can be -1 if the function is to find the length
  1117  * @param src2 the second sort key
  1118  * @param src2Length the length of the second sort key, including the zero byte at the end;
  1119  *        can be -1 if the function is to find the length
  1120  * @param dest the buffer where the merged sort key is written,
  1121  *        can be NULL if destCapacity==0
  1122  * @param destCapacity the number of bytes in the dest buffer
  1123  * @return the length of the merged sort key, src1Length+src2Length;
  1124  *         can be larger than destCapacity, or 0 if an error occurs (only for illegal arguments),
  1125  *         in which cases the contents of dest is undefined
  1126  * @stable ICU 2.0
  1127  */
  1128 U_STABLE int32_t U_EXPORT2 
  1129 ucol_mergeSortkeys(const uint8_t *src1, int32_t src1Length,
  1130                    const uint8_t *src2, int32_t src2Length,
  1131                    uint8_t *dest, int32_t destCapacity);
  1133 /**
  1134  * Universal attribute setter
  1135  * @param coll collator which attributes are to be changed
  1136  * @param attr attribute type 
  1137  * @param value attribute value
  1138  * @param status to indicate whether the operation went on smoothly or there were errors
  1139  * @see UColAttribute
  1140  * @see UColAttributeValue
  1141  * @see ucol_getAttribute
  1142  * @stable ICU 2.0
  1143  */
  1144 U_STABLE void U_EXPORT2 
  1145 ucol_setAttribute(UCollator *coll, UColAttribute attr, UColAttributeValue value, UErrorCode *status);
  1147 /**
  1148  * Universal attribute getter
  1149  * @param coll collator which attributes are to be changed
  1150  * @param attr attribute type
  1151  * @return attribute value
  1152  * @param status to indicate whether the operation went on smoothly or there were errors
  1153  * @see UColAttribute
  1154  * @see UColAttributeValue
  1155  * @see ucol_setAttribute
  1156  * @stable ICU 2.0
  1157  */
  1158 U_STABLE UColAttributeValue  U_EXPORT2 
  1159 ucol_getAttribute(const UCollator *coll, UColAttribute attr, UErrorCode *status);
  1161 /** Variable top
  1162  * is a two byte primary value which causes all the codepoints with primary values that
  1163  * are less or equal than the variable top to be shifted when alternate handling is set
  1164  * to UCOL_SHIFTED.
  1165  * Sets the variable top to a collation element value of a string supplied. 
  1166  * @param coll collator which variable top needs to be changed
  1167  * @param varTop one or more (if contraction) UChars to which the variable top should be set
  1168  * @param len length of variable top string. If -1 it is considered to be zero terminated.
  1169  * @param status error code. If error code is set, the return value is undefined. 
  1170  *               Errors set by this function are: <br>
  1171  *    U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such 
  1172  *    a contraction<br>
  1173  *    U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes
  1174  * @return a 32 bit value containing the value of the variable top in upper 16 bits. 
  1175  *         Lower 16 bits are undefined
  1176  * @see ucol_getVariableTop
  1177  * @see ucol_restoreVariableTop
  1178  * @stable ICU 2.0
  1179  */
  1180 U_STABLE uint32_t U_EXPORT2 
  1181 ucol_setVariableTop(UCollator *coll, 
  1182                     const UChar *varTop, int32_t len, 
  1183                     UErrorCode *status);
  1185 /** 
  1186  * Gets the variable top value of a Collator. 
  1187  * Lower 16 bits are undefined and should be ignored.
  1188  * @param coll collator which variable top needs to be retrieved
  1189  * @param status error code (not changed by function). If error code is set, 
  1190  *               the return value is undefined.
  1191  * @return the variable top value of a Collator.
  1192  * @see ucol_setVariableTop
  1193  * @see ucol_restoreVariableTop
  1194  * @stable ICU 2.0
  1195  */
  1196 U_STABLE uint32_t U_EXPORT2 ucol_getVariableTop(const UCollator *coll, UErrorCode *status);
  1198 /** 
  1199  * Sets the variable top to a collation element value supplied. Variable top is 
  1200  * set to the upper 16 bits. 
  1201  * Lower 16 bits are ignored.
  1202  * @param coll collator which variable top needs to be changed
  1203  * @param varTop CE value, as returned by ucol_setVariableTop or ucol)getVariableTop
  1204  * @param status error code (not changed by function)
  1205  * @see ucol_getVariableTop
  1206  * @see ucol_setVariableTop
  1207  * @stable ICU 2.0
  1208  */
  1209 U_STABLE void U_EXPORT2 
  1210 ucol_restoreVariableTop(UCollator *coll, const uint32_t varTop, UErrorCode *status);
  1212 /**
  1213  * Thread safe cloning operation. The result is a clone of a given collator.
  1214  * @param coll collator to be cloned
  1215  * @param stackBuffer <em>Deprecated functionality as of ICU 52, use NULL.</em><br>
  1216  * user allocated space for the new clone. 
  1217  * If NULL new memory will be allocated. 
  1218  *  If buffer is not large enough, new memory will be allocated.
  1219  *  Clients can use the U_COL_SAFECLONE_BUFFERSIZE.
  1220  * @param pBufferSize <em>Deprecated functionality as of ICU 52, use NULL or 1.</em><br>
  1221  *  pointer to size of allocated space. 
  1222  *  If *pBufferSize == 0, a sufficient size for use in cloning will 
  1223  *  be returned ('pre-flighting')
  1224  *  If *pBufferSize is not enough for a stack-based safe clone, 
  1225  *  new memory will be allocated.
  1226  * @param status to indicate whether the operation went on smoothly or there were errors
  1227  *    An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any
  1228  * allocations were necessary.
  1229  * @return pointer to the new clone
  1230  * @see ucol_open
  1231  * @see ucol_openRules
  1232  * @see ucol_close
  1233  * @stable ICU 2.0
  1234  */
  1235 U_STABLE UCollator* U_EXPORT2 
  1236 ucol_safeClone(const UCollator *coll,
  1237                void            *stackBuffer,
  1238                int32_t         *pBufferSize,
  1239                UErrorCode      *status);
  1241 #ifndef U_HIDE_DEPRECATED_API
  1243 /** default memory size for the new clone.
  1244  * @deprecated ICU 52. Do not rely on ucol_safeClone() cloning into any provided buffer.
  1245  */
  1246 #define U_COL_SAFECLONE_BUFFERSIZE 1
  1248 #endif /* U_HIDE_DEPRECATED_API */
  1250 /**
  1251  * Returns current rules. Delta defines whether full rules are returned or just the tailoring. 
  1252  * Returns number of UChars needed to store rules. If buffer is NULL or bufferLen is not enough 
  1253  * to store rules, will store up to available space.
  1255  * ucol_getRules() should normally be used instead.
  1256  * See http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales
  1257  * @param coll collator to get the rules from
  1258  * @param delta one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES. 
  1259  * @param buffer buffer to store the result in. If NULL, you'll get no rules.
  1260  * @param bufferLen length of buffer to store rules in. If less than needed you'll get only the part that fits in.
  1261  * @return current rules
  1262  * @stable ICU 2.0
  1263  * @see UCOL_FULL_RULES
  1264  */
  1265 U_STABLE int32_t U_EXPORT2 
  1266 ucol_getRulesEx(const UCollator *coll, UColRuleOption delta, UChar *buffer, int32_t bufferLen);
  1268 #ifndef U_HIDE_DEPRECATED_API
  1269 /**
  1270  * gets the locale name of the collator. If the collator
  1271  * is instantiated from the rules, then this function returns
  1272  * NULL.
  1273  * @param coll The UCollator for which the locale is needed
  1274  * @param type You can choose between requested, valid and actual
  1275  *             locale. For description see the definition of
  1276  *             ULocDataLocaleType in uloc.h
  1277  * @param status error code of the operation
  1278  * @return real locale name from which the collation data comes. 
  1279  *         If the collator was instantiated from rules, returns
  1280  *         NULL.
  1281  * @deprecated ICU 2.8 Use ucol_getLocaleByType instead
  1282  */
  1283 U_DEPRECATED const char * U_EXPORT2
  1284 ucol_getLocale(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status);
  1285 #endif  /* U_HIDE_DEPRECATED_API */
  1287 /**
  1288  * gets the locale name of the collator. If the collator
  1289  * is instantiated from the rules, then this function returns
  1290  * NULL.
  1291  * @param coll The UCollator for which the locale is needed
  1292  * @param type You can choose between requested, valid and actual
  1293  *             locale. For description see the definition of
  1294  *             ULocDataLocaleType in uloc.h
  1295  * @param status error code of the operation
  1296  * @return real locale name from which the collation data comes. 
  1297  *         If the collator was instantiated from rules, returns
  1298  *         NULL.
  1299  * @stable ICU 2.8
  1300  */
  1301 U_STABLE const char * U_EXPORT2
  1302 ucol_getLocaleByType(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status);
  1304 /**
  1305  * Get an Unicode set that contains all the characters and sequences tailored in 
  1306  * this collator. The result must be disposed of by using uset_close.
  1307  * @param coll        The UCollator for which we want to get tailored chars
  1308  * @param status      error code of the operation
  1309  * @return a pointer to newly created USet. Must be be disposed by using uset_close
  1310  * @see ucol_openRules
  1311  * @see uset_close
  1312  * @stable ICU 2.4
  1313  */
  1314 U_STABLE USet * U_EXPORT2
  1315 ucol_getTailoredSet(const UCollator *coll, UErrorCode *status);
  1317 #ifndef U_HIDE_INTERNAL_API
  1318 /**
  1319  * Universal attribute getter that returns UCOL_DEFAULT if the value is default
  1320  * @param coll collator which attributes are to be changed
  1321  * @param attr attribute type
  1322  * @return attribute value or UCOL_DEFAULT if the value is default
  1323  * @param status to indicate whether the operation went on smoothly or there were errors
  1324  * @see UColAttribute
  1325  * @see UColAttributeValue
  1326  * @see ucol_setAttribute
  1327  * @internal ICU 3.0
  1328  */
  1329 U_INTERNAL UColAttributeValue  U_EXPORT2
  1330 ucol_getAttributeOrDefault(const UCollator *coll, UColAttribute attr, UErrorCode *status);
  1332 /** Check whether two collators are equal. Collators are considered equal if they
  1333  *  will sort strings the same. This means that both the current attributes and the
  1334  *  rules must be equivalent. Currently used for RuleBasedCollator::operator==.
  1335  *  @param source first collator
  1336  *  @param target second collator
  1337  *  @return TRUE or FALSE
  1338  *  @internal ICU 3.0
  1339  */
  1340 U_INTERNAL UBool U_EXPORT2
  1341 ucol_equals(const UCollator *source, const UCollator *target);
  1343 /** Calculates the set of unsafe code points, given a collator.
  1344  *   A character is unsafe if you could append any character and cause the ordering to alter significantly.
  1345  *   Collation sorts in normalized order, so anything that rearranges in normalization can cause this.
  1346  *   Thus if you have a character like a_umlaut, and you add a lower_dot to it,
  1347  *   then it normalizes to a_lower_dot + umlaut, and sorts differently.
  1348  *  @param coll Collator
  1349  *  @param unsafe a fill-in set to receive the unsafe points
  1350  *  @param status for catching errors
  1351  *  @return number of elements in the set
  1352  *  @internal ICU 3.0
  1353  */
  1354 U_INTERNAL int32_t U_EXPORT2
  1355 ucol_getUnsafeSet( const UCollator *coll,
  1356                   USet *unsafe,
  1357                   UErrorCode *status);
  1359 /** Reset UCA's static pointers. You don't want to use this, unless your static memory can go away.
  1360  * @internal ICU 3.2.1
  1361  */
  1362 U_INTERNAL void U_EXPORT2
  1363 ucol_forgetUCA(void);
  1365 /** Touches all resources needed for instantiating a collator from a short string definition,
  1366  *  thus filling up the cache.
  1367  * @param definition A short string containing a locale and a set of attributes. 
  1368  *                   Attributes not explicitly mentioned are left at the default
  1369  *                   state for a locale.
  1370  * @param parseError if not NULL, structure that will get filled with error's pre
  1371  *                   and post context in case of error.
  1372  * @param forceDefaults if FALSE, the settings that are the same as the collator 
  1373  *                   default settings will not be applied (for example, setting
  1374  *                   French secondary on a French collator would not be executed). 
  1375  *                   If TRUE, all the settings will be applied regardless of the 
  1376  *                   collator default value. If the definition
  1377  *                   strings are to be cached, should be set to FALSE.
  1378  * @param status     Error code. Apart from regular error conditions connected to 
  1379  *                   instantiating collators (like out of memory or similar), this
  1380  *                   API will return an error if an invalid attribute or attribute/value
  1381  *                   combination is specified.
  1382  * @see ucol_openFromShortString
  1383  * @internal ICU 3.2.1
  1384  */
  1385 U_INTERNAL void U_EXPORT2
  1386 ucol_prepareShortStringOpen( const char *definition,
  1387                           UBool forceDefaults,
  1388                           UParseError *parseError,
  1389                           UErrorCode *status);
  1390 #endif  /* U_HIDE_INTERNAL_API */
  1392 /** Creates a binary image of a collator. This binary image can be stored and 
  1393  *  later used to instantiate a collator using ucol_openBinary.
  1394  *  This API supports preflighting.
  1395  *  @param coll Collator
  1396  *  @param buffer a fill-in buffer to receive the binary image
  1397  *  @param capacity capacity of the destination buffer
  1398  *  @param status for catching errors
  1399  *  @return size of the image
  1400  *  @see ucol_openBinary
  1401  *  @stable ICU 3.2
  1402  */
  1403 U_STABLE int32_t U_EXPORT2
  1404 ucol_cloneBinary(const UCollator *coll,
  1405                  uint8_t *buffer, int32_t capacity,
  1406                  UErrorCode *status);
  1408 /** Opens a collator from a collator binary image created using
  1409  *  ucol_cloneBinary. Binary image used in instantiation of the 
  1410  *  collator remains owned by the user and should stay around for 
  1411  *  the lifetime of the collator. The API also takes a base collator
  1412  *  which usualy should be UCA.
  1413  *  @param bin binary image owned by the user and required through the
  1414  *             lifetime of the collator
  1415  *  @param length size of the image. If negative, the API will try to
  1416  *                figure out the length of the image
  1417  *  @param base fallback collator, usually UCA. Base is required to be
  1418  *              present through the lifetime of the collator. Currently 
  1419  *              it cannot be NULL.
  1420  *  @param status for catching errors
  1421  *  @return newly created collator
  1422  *  @see ucol_cloneBinary
  1423  *  @stable ICU 3.2
  1424  */
  1425 U_STABLE UCollator* U_EXPORT2
  1426 ucol_openBinary(const uint8_t *bin, int32_t length, 
  1427                 const UCollator *base, 
  1428                 UErrorCode *status);
  1431 #endif /* #if !UCONFIG_NO_COLLATION */
  1433 #endif

mercurial