1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/i18n/unicode/ucol.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,1433 @@ 1.4 +/* 1.5 +******************************************************************************* 1.6 +* Copyright (c) 1996-2013, International Business Machines Corporation and others. 1.7 +* All Rights Reserved. 1.8 +******************************************************************************* 1.9 +*/ 1.10 + 1.11 +#ifndef UCOL_H 1.12 +#define UCOL_H 1.13 + 1.14 +#include "unicode/utypes.h" 1.15 + 1.16 +#if !UCONFIG_NO_COLLATION 1.17 + 1.18 +#include "unicode/unorm.h" 1.19 +#include "unicode/localpointer.h" 1.20 +#include "unicode/parseerr.h" 1.21 +#include "unicode/uloc.h" 1.22 +#include "unicode/uset.h" 1.23 +#include "unicode/uscript.h" 1.24 + 1.25 +/** 1.26 + * \file 1.27 + * \brief C API: Collator 1.28 + * 1.29 + * <h2> Collator C API </h2> 1.30 + * 1.31 + * The C API for Collator performs locale-sensitive 1.32 + * string comparison. You use this service to build 1.33 + * searching and sorting routines for natural language text. 1.34 + * <em>Important: </em>The ICU collation service has been reimplemented 1.35 + * in order to achieve better performance and UCA compliance. 1.36 + * For details, see the 1.37 + * <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm"> 1.38 + * collation design document</a>. 1.39 + * <p> 1.40 + * For more information about the collation service see 1.41 + * <a href="http://icu-project.org/userguide/Collate_Intro.html">the users guide</a>. 1.42 + * <p> 1.43 + * Collation service provides correct sorting orders for most locales supported in ICU. 1.44 + * If specific data for a locale is not available, the orders eventually falls back 1.45 + * to the <a href="http://www.unicode.org/unicode/reports/tr10/">UCA sort order</a>. 1.46 + * <p> 1.47 + * Sort ordering may be customized by providing your own set of rules. For more on 1.48 + * this subject see the 1.49 + * <a href="http://icu-project.org/userguide/Collate_Customization.html"> 1.50 + * Collation customization</a> section of the users guide. 1.51 + * <p> 1.52 + * @see UCollationResult 1.53 + * @see UNormalizationMode 1.54 + * @see UCollationStrength 1.55 + * @see UCollationElements 1.56 + */ 1.57 + 1.58 +/** A collator. 1.59 +* For usage in C programs. 1.60 +*/ 1.61 +struct UCollator; 1.62 +/** structure representing a collator object instance 1.63 + * @stable ICU 2.0 1.64 + */ 1.65 +typedef struct UCollator UCollator; 1.66 + 1.67 + 1.68 +/** 1.69 + * UCOL_LESS is returned if source string is compared to be less than target 1.70 + * string in the ucol_strcoll() method. 1.71 + * UCOL_EQUAL is returned if source string is compared to be equal to target 1.72 + * string in the ucol_strcoll() method. 1.73 + * UCOL_GREATER is returned if source string is compared to be greater than 1.74 + * target string in the ucol_strcoll() method. 1.75 + * @see ucol_strcoll() 1.76 + * <p> 1.77 + * Possible values for a comparison result 1.78 + * @stable ICU 2.0 1.79 + */ 1.80 +typedef enum { 1.81 + /** string a == string b */ 1.82 + UCOL_EQUAL = 0, 1.83 + /** string a > string b */ 1.84 + UCOL_GREATER = 1, 1.85 + /** string a < string b */ 1.86 + UCOL_LESS = -1 1.87 +} UCollationResult ; 1.88 + 1.89 + 1.90 +/** Enum containing attribute values for controling collation behavior. 1.91 + * Here are all the allowable values. Not every attribute can take every value. The only 1.92 + * universal value is UCOL_DEFAULT, which resets the attribute value to the predefined 1.93 + * value for that locale 1.94 + * @stable ICU 2.0 1.95 + */ 1.96 +typedef enum { 1.97 + /** accepted by most attributes */ 1.98 + UCOL_DEFAULT = -1, 1.99 + 1.100 + /** Primary collation strength */ 1.101 + UCOL_PRIMARY = 0, 1.102 + /** Secondary collation strength */ 1.103 + UCOL_SECONDARY = 1, 1.104 + /** Tertiary collation strength */ 1.105 + UCOL_TERTIARY = 2, 1.106 + /** Default collation strength */ 1.107 + UCOL_DEFAULT_STRENGTH = UCOL_TERTIARY, 1.108 + UCOL_CE_STRENGTH_LIMIT, 1.109 + /** Quaternary collation strength */ 1.110 + UCOL_QUATERNARY=3, 1.111 + /** Identical collation strength */ 1.112 + UCOL_IDENTICAL=15, 1.113 + UCOL_STRENGTH_LIMIT, 1.114 + 1.115 + /** Turn the feature off - works for UCOL_FRENCH_COLLATION, 1.116 + UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE 1.117 + & UCOL_DECOMPOSITION_MODE*/ 1.118 + UCOL_OFF = 16, 1.119 + /** Turn the feature on - works for UCOL_FRENCH_COLLATION, 1.120 + UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE 1.121 + & UCOL_DECOMPOSITION_MODE*/ 1.122 + UCOL_ON = 17, 1.123 + 1.124 + /** Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be shifted */ 1.125 + UCOL_SHIFTED = 20, 1.126 + /** Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be non ignorable */ 1.127 + UCOL_NON_IGNORABLE = 21, 1.128 + 1.129 + /** Valid for UCOL_CASE_FIRST - 1.130 + lower case sorts before upper case */ 1.131 + UCOL_LOWER_FIRST = 24, 1.132 + /** upper case sorts before lower case */ 1.133 + UCOL_UPPER_FIRST = 25, 1.134 + 1.135 + UCOL_ATTRIBUTE_VALUE_COUNT 1.136 + 1.137 +} UColAttributeValue; 1.138 + 1.139 +/** 1.140 + * Enum containing the codes for reordering segments of the collation table that are not script 1.141 + * codes. These reordering codes are to be used in conjunction with the script codes. 1.142 + * @see ucol_getReorderCodes 1.143 + * @see ucol_setReorderCodes 1.144 + * @see ucol_getEquivalentReorderCodes 1.145 + * @see UScriptCode 1.146 + * @stable ICU 4.8 1.147 + */ 1.148 + typedef enum { 1.149 + /** 1.150 + * A special reordering code that is used to specify the default 1.151 + * reordering codes for a locale. 1.152 + * @stable ICU 4.8 1.153 + */ 1.154 + UCOL_REORDER_CODE_DEFAULT = -1, 1.155 + /** 1.156 + * A special reordering code that is used to specify no reordering codes. 1.157 + * @stable ICU 4.8 1.158 + */ 1.159 + UCOL_REORDER_CODE_NONE = USCRIPT_UNKNOWN, 1.160 + /** 1.161 + * A special reordering code that is used to specify all other codes used for 1.162 + * reordering except for the codes lised as UColReorderCode values and those 1.163 + * listed explicitly in a reordering. 1.164 + * @stable ICU 4.8 1.165 + */ 1.166 + UCOL_REORDER_CODE_OTHERS = USCRIPT_UNKNOWN, 1.167 + /** 1.168 + * Characters with the space property. 1.169 + * This is equivalent to the rule value "space". 1.170 + * @stable ICU 4.8 1.171 + */ 1.172 + UCOL_REORDER_CODE_SPACE = 0x1000, 1.173 + /** 1.174 + * The first entry in the enumeration of reordering groups. This is intended for use in 1.175 + * range checking and enumeration of the reorder codes. 1.176 + * @stable ICU 4.8 1.177 + */ 1.178 + UCOL_REORDER_CODE_FIRST = UCOL_REORDER_CODE_SPACE, 1.179 + /** 1.180 + * Characters with the punctuation property. 1.181 + * This is equivalent to the rule value "punct". 1.182 + * @stable ICU 4.8 1.183 + */ 1.184 + UCOL_REORDER_CODE_PUNCTUATION = 0x1001, 1.185 + /** 1.186 + * Characters with the symbol property. 1.187 + * This is equivalent to the rule value "symbol". 1.188 + * @stable ICU 4.8 1.189 + */ 1.190 + UCOL_REORDER_CODE_SYMBOL = 0x1002, 1.191 + /** 1.192 + * Characters with the currency property. 1.193 + * This is equivalent to the rule value "currency". 1.194 + * @stable ICU 4.8 1.195 + */ 1.196 + UCOL_REORDER_CODE_CURRENCY = 0x1003, 1.197 + /** 1.198 + * Characters with the digit property. 1.199 + * This is equivalent to the rule value "digit". 1.200 + * @stable ICU 4.8 1.201 + */ 1.202 + UCOL_REORDER_CODE_DIGIT = 0x1004, 1.203 + /** 1.204 + * The limit of the reorder codes. This is intended for use in range checking 1.205 + * and enumeration of the reorder codes. 1.206 + * @stable ICU 4.8 1.207 + */ 1.208 + UCOL_REORDER_CODE_LIMIT = 0x1005 1.209 +} UColReorderCode; 1.210 + 1.211 +/** 1.212 + * Base letter represents a primary difference. Set comparison 1.213 + * level to UCOL_PRIMARY to ignore secondary and tertiary differences. 1.214 + * Use this to set the strength of a Collator object. 1.215 + * Example of primary difference, "abc" < "abd" 1.216 + * 1.217 + * Diacritical differences on the same base letter represent a secondary 1.218 + * difference. Set comparison level to UCOL_SECONDARY to ignore tertiary 1.219 + * differences. Use this to set the strength of a Collator object. 1.220 + * Example of secondary difference, "ä" >> "a". 1.221 + * 1.222 + * Uppercase and lowercase versions of the same character represents a 1.223 + * tertiary difference. Set comparison level to UCOL_TERTIARY to include 1.224 + * all comparison differences. Use this to set the strength of a Collator 1.225 + * object. 1.226 + * Example of tertiary difference, "abc" <<< "ABC". 1.227 + * 1.228 + * Two characters are considered "identical" when they have the same 1.229 + * unicode spellings. UCOL_IDENTICAL. 1.230 + * For example, "ä" == "ä". 1.231 + * 1.232 + * UCollationStrength is also used to determine the strength of sort keys 1.233 + * generated from UCollator objects 1.234 + * These values can be now found in the UColAttributeValue enum. 1.235 + * @stable ICU 2.0 1.236 + **/ 1.237 +typedef UColAttributeValue UCollationStrength; 1.238 + 1.239 +/** Attributes that collation service understands. All the attributes can take UCOL_DEFAULT 1.240 + * value, as well as the values specific to each one. 1.241 + * @stable ICU 2.0 1.242 + */ 1.243 +typedef enum { 1.244 + /** Attribute for direction of secondary weights - used in Canadian French. 1.245 + * Acceptable values are UCOL_ON, which results in secondary weights 1.246 + * being considered backwards and UCOL_OFF which treats secondary 1.247 + * weights in the order they appear. 1.248 + * @stable ICU 2.0 1.249 + */ 1.250 + UCOL_FRENCH_COLLATION, 1.251 + /** Attribute for handling variable elements. 1.252 + * Acceptable values are UCOL_NON_IGNORABLE (default) 1.253 + * which treats all the codepoints with non-ignorable 1.254 + * primary weights in the same way, 1.255 + * and UCOL_SHIFTED which causes codepoints with primary 1.256 + * weights that are equal or below the variable top value 1.257 + * to be ignored on primary level and moved to the quaternary 1.258 + * level. 1.259 + * @stable ICU 2.0 1.260 + */ 1.261 + UCOL_ALTERNATE_HANDLING, 1.262 + /** Controls the ordering of upper and lower case letters. 1.263 + * Acceptable values are UCOL_OFF (default), which orders 1.264 + * upper and lower case letters in accordance to their tertiary 1.265 + * weights, UCOL_UPPER_FIRST which forces upper case letters to 1.266 + * sort before lower case letters, and UCOL_LOWER_FIRST which does 1.267 + * the opposite. 1.268 + * @stable ICU 2.0 1.269 + */ 1.270 + UCOL_CASE_FIRST, 1.271 + /** Controls whether an extra case level (positioned before the third 1.272 + * level) is generated or not. Acceptable values are UCOL_OFF (default), 1.273 + * when case level is not generated, and UCOL_ON which causes the case 1.274 + * level to be generated. Contents of the case level are affected by 1.275 + * the value of UCOL_CASE_FIRST attribute. A simple way to ignore 1.276 + * accent differences in a string is to set the strength to UCOL_PRIMARY 1.277 + * and enable case level. 1.278 + * @stable ICU 2.0 1.279 + */ 1.280 + UCOL_CASE_LEVEL, 1.281 + /** Controls whether the normalization check and necessary normalizations 1.282 + * are performed. When set to UCOL_OFF (default) no normalization check 1.283 + * is performed. The correctness of the result is guaranteed only if the 1.284 + * input data is in so-called FCD form (see users manual for more info). 1.285 + * When set to UCOL_ON, an incremental check is performed to see whether 1.286 + * the input data is in the FCD form. If the data is not in the FCD form, 1.287 + * incremental NFD normalization is performed. 1.288 + * @stable ICU 2.0 1.289 + */ 1.290 + UCOL_NORMALIZATION_MODE, 1.291 + /** An alias for UCOL_NORMALIZATION_MODE attribute. 1.292 + * @stable ICU 2.0 1.293 + */ 1.294 + UCOL_DECOMPOSITION_MODE = UCOL_NORMALIZATION_MODE, 1.295 + /** The strength attribute. Can be either UCOL_PRIMARY, UCOL_SECONDARY, 1.296 + * UCOL_TERTIARY, UCOL_QUATERNARY or UCOL_IDENTICAL. The usual strength 1.297 + * for most locales (except Japanese) is tertiary. Quaternary strength 1.298 + * is useful when combined with shifted setting for alternate handling 1.299 + * attribute and for JIS x 4061 collation, when it is used to distinguish 1.300 + * between Katakana and Hiragana (this is achieved by setting the 1.301 + * UCOL_HIRAGANA_QUATERNARY mode to on. Otherwise, quaternary level 1.302 + * is affected only by the number of non ignorable code points in 1.303 + * the string. Identical strength is rarely useful, as it amounts 1.304 + * to codepoints of the NFD form of the string. 1.305 + * @stable ICU 2.0 1.306 + */ 1.307 + UCOL_STRENGTH, 1.308 +#ifndef U_HIDE_DEPRECATED_API 1.309 + /** When turned on, this attribute positions Hiragana before all 1.310 + * non-ignorables on quaternary level This is a sneaky way to produce JIS 1.311 + * sort order. 1.312 + * 1.313 + * This attribute is an implementation detail of the CLDR Japanese tailoring. 1.314 + * The implementation might change to use a different mechanism 1.315 + * to achieve the same Japanese sort order. 1.316 + * Since ICU 50, this attribute is not settable any more via API functions. 1.317 + * @deprecated ICU 50 Implementation detail, cannot be set via API, might be removed from implementation. 1.318 + */ 1.319 + UCOL_HIRAGANA_QUATERNARY_MODE = UCOL_STRENGTH + 1, 1.320 +#endif /* U_HIDE_DEPRECATED_API */ 1.321 + /** When turned on, this attribute generates a collation key 1.322 + * for the numeric value of substrings of digits. 1.323 + * This is a way to get '100' to sort AFTER '2'. Note that the longest 1.324 + * digit substring that can be treated as a single collation element is 1.325 + * 254 digits (not counting leading zeros). If a digit substring is 1.326 + * longer than that, the digits beyond the limit will be treated as a 1.327 + * separate digit substring associated with a separate collation element. 1.328 + * @stable ICU 2.8 1.329 + */ 1.330 + UCOL_NUMERIC_COLLATION = UCOL_STRENGTH + 2, 1.331 + /** 1.332 + * The number of UColAttribute constants. 1.333 + * @stable ICU 2.0 1.334 + */ 1.335 + UCOL_ATTRIBUTE_COUNT 1.336 +} UColAttribute; 1.337 + 1.338 +/** Options for retrieving the rule string 1.339 + * @stable ICU 2.0 1.340 + */ 1.341 +typedef enum { 1.342 + /** 1.343 + * Retrieves the tailoring rules only. 1.344 + * Same as calling the version of getRules() without UColRuleOption. 1.345 + * @stable ICU 2.0 1.346 + */ 1.347 + UCOL_TAILORING_ONLY, 1.348 + /** 1.349 + * Retrieves the "UCA rules" concatenated with the tailoring rules. 1.350 + * The "UCA rules" are an <i>approximation</i> of the root collator's sort order. 1.351 + * They are almost never used or useful at runtime and can be removed from the data. 1.352 + * See http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales 1.353 + * @stable ICU 2.0 1.354 + */ 1.355 + UCOL_FULL_RULES 1.356 +} UColRuleOption ; 1.357 + 1.358 +/** 1.359 + * Open a UCollator for comparing strings. 1.360 + * The UCollator pointer is used in all the calls to the Collation 1.361 + * service. After finished, collator must be disposed of by calling 1.362 + * {@link #ucol_close }. 1.363 + * @param loc The locale containing the required collation rules. 1.364 + * Special values for locales can be passed in - 1.365 + * if NULL is passed for the locale, the default locale 1.366 + * collation rules will be used. If empty string ("") or 1.367 + * "root" are passed, UCA rules will be used. 1.368 + * @param status A pointer to an UErrorCode to receive any errors 1.369 + * @return A pointer to a UCollator, or 0 if an error occurred. 1.370 + * @see ucol_openRules 1.371 + * @see ucol_safeClone 1.372 + * @see ucol_close 1.373 + * @stable ICU 2.0 1.374 + */ 1.375 +U_STABLE UCollator* U_EXPORT2 1.376 +ucol_open(const char *loc, UErrorCode *status); 1.377 + 1.378 +/** 1.379 + * Produce an UCollator instance according to the rules supplied. 1.380 + * The rules are used to change the default ordering, defined in the 1.381 + * UCA in a process called tailoring. The resulting UCollator pointer 1.382 + * can be used in the same way as the one obtained by {@link #ucol_strcoll }. 1.383 + * @param rules A string describing the collation rules. For the syntax 1.384 + * of the rules please see users guide. 1.385 + * @param rulesLength The length of rules, or -1 if null-terminated. 1.386 + * @param normalizationMode The normalization mode: One of 1.387 + * UCOL_OFF (expect the text to not need normalization), 1.388 + * UCOL_ON (normalize), or 1.389 + * UCOL_DEFAULT (set the mode according to the rules) 1.390 + * @param strength The default collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY, 1.391 + * UCOL_TERTIARY, UCOL_IDENTICAL,UCOL_DEFAULT_STRENGTH - can be also set in the rules. 1.392 + * @param parseError A pointer to UParseError to recieve information about errors 1.393 + * occurred during parsing. This argument can currently be set 1.394 + * to NULL, but at users own risk. Please provide a real structure. 1.395 + * @param status A pointer to an UErrorCode to receive any errors 1.396 + * @return A pointer to a UCollator. It is not guaranteed that NULL be returned in case 1.397 + * of error - please use status argument to check for errors. 1.398 + * @see ucol_open 1.399 + * @see ucol_safeClone 1.400 + * @see ucol_close 1.401 + * @stable ICU 2.0 1.402 + */ 1.403 +U_STABLE UCollator* U_EXPORT2 1.404 +ucol_openRules( const UChar *rules, 1.405 + int32_t rulesLength, 1.406 + UColAttributeValue normalizationMode, 1.407 + UCollationStrength strength, 1.408 + UParseError *parseError, 1.409 + UErrorCode *status); 1.410 + 1.411 +/** 1.412 + * Open a collator defined by a short form string. 1.413 + * The structure and the syntax of the string is defined in the "Naming collators" 1.414 + * section of the users guide: 1.415 + * http://icu-project.org/userguide/Collate_Concepts.html#Naming_Collators 1.416 + * Attributes are overriden by the subsequent attributes. So, for "S2_S3", final 1.417 + * strength will be 3. 3066bis locale overrides individual locale parts. 1.418 + * The call to this function is equivalent to a call to ucol_open, followed by a 1.419 + * series of calls to ucol_setAttribute and ucol_setVariableTop. 1.420 + * @param definition A short string containing a locale and a set of attributes. 1.421 + * Attributes not explicitly mentioned are left at the default 1.422 + * state for a locale. 1.423 + * @param parseError if not NULL, structure that will get filled with error's pre 1.424 + * and post context in case of error. 1.425 + * @param forceDefaults if FALSE, the settings that are the same as the collator 1.426 + * default settings will not be applied (for example, setting 1.427 + * French secondary on a French collator would not be executed). 1.428 + * If TRUE, all the settings will be applied regardless of the 1.429 + * collator default value. If the definition 1.430 + * strings are to be cached, should be set to FALSE. 1.431 + * @param status Error code. Apart from regular error conditions connected to 1.432 + * instantiating collators (like out of memory or similar), this 1.433 + * API will return an error if an invalid attribute or attribute/value 1.434 + * combination is specified. 1.435 + * @return A pointer to a UCollator or 0 if an error occured (including an 1.436 + * invalid attribute). 1.437 + * @see ucol_open 1.438 + * @see ucol_setAttribute 1.439 + * @see ucol_setVariableTop 1.440 + * @see ucol_getShortDefinitionString 1.441 + * @see ucol_normalizeShortDefinitionString 1.442 + * @stable ICU 3.0 1.443 + * 1.444 + */ 1.445 +U_STABLE UCollator* U_EXPORT2 1.446 +ucol_openFromShortString( const char *definition, 1.447 + UBool forceDefaults, 1.448 + UParseError *parseError, 1.449 + UErrorCode *status); 1.450 + 1.451 +#ifndef U_HIDE_DEPRECATED_API 1.452 +/** 1.453 + * Get a set containing the contractions defined by the collator. The set includes 1.454 + * both the UCA contractions and the contractions defined by the collator. This set 1.455 + * will contain only strings. If a tailoring explicitly suppresses contractions from 1.456 + * the UCA (like Russian), removed contractions will not be in the resulting set. 1.457 + * @param coll collator 1.458 + * @param conts the set to hold the result. It gets emptied before 1.459 + * contractions are added. 1.460 + * @param status to hold the error code 1.461 + * @return the size of the contraction set 1.462 + * 1.463 + * @deprecated ICU 3.4, use ucol_getContractionsAndExpansions instead 1.464 + */ 1.465 +U_DEPRECATED int32_t U_EXPORT2 1.466 +ucol_getContractions( const UCollator *coll, 1.467 + USet *conts, 1.468 + UErrorCode *status); 1.469 +#endif /* U_HIDE_DEPRECATED_API */ 1.470 + 1.471 +/** 1.472 + * Get a set containing the expansions defined by the collator. The set includes 1.473 + * both the UCA expansions and the expansions defined by the tailoring 1.474 + * @param coll collator 1.475 + * @param contractions if not NULL, the set to hold the contractions 1.476 + * @param expansions if not NULL, the set to hold the expansions 1.477 + * @param addPrefixes add the prefix contextual elements to contractions 1.478 + * @param status to hold the error code 1.479 + * 1.480 + * @stable ICU 3.4 1.481 + */ 1.482 +U_STABLE void U_EXPORT2 1.483 +ucol_getContractionsAndExpansions( const UCollator *coll, 1.484 + USet *contractions, USet *expansions, 1.485 + UBool addPrefixes, UErrorCode *status); 1.486 + 1.487 +/** 1.488 + * Close a UCollator. 1.489 + * Once closed, a UCollator should not be used. Every open collator should 1.490 + * be closed. Otherwise, a memory leak will result. 1.491 + * @param coll The UCollator to close. 1.492 + * @see ucol_open 1.493 + * @see ucol_openRules 1.494 + * @see ucol_safeClone 1.495 + * @stable ICU 2.0 1.496 + */ 1.497 +U_STABLE void U_EXPORT2 1.498 +ucol_close(UCollator *coll); 1.499 + 1.500 +#if U_SHOW_CPLUSPLUS_API 1.501 + 1.502 +U_NAMESPACE_BEGIN 1.503 + 1.504 +/** 1.505 + * \class LocalUCollatorPointer 1.506 + * "Smart pointer" class, closes a UCollator via ucol_close(). 1.507 + * For most methods see the LocalPointerBase base class. 1.508 + * 1.509 + * @see LocalPointerBase 1.510 + * @see LocalPointer 1.511 + * @stable ICU 4.4 1.512 + */ 1.513 +U_DEFINE_LOCAL_OPEN_POINTER(LocalUCollatorPointer, UCollator, ucol_close); 1.514 + 1.515 +U_NAMESPACE_END 1.516 + 1.517 +#endif 1.518 + 1.519 +/** 1.520 + * Compare two strings. 1.521 + * The strings will be compared using the options already specified. 1.522 + * @param coll The UCollator containing the comparison rules. 1.523 + * @param source The source string. 1.524 + * @param sourceLength The length of source, or -1 if null-terminated. 1.525 + * @param target The target string. 1.526 + * @param targetLength The length of target, or -1 if null-terminated. 1.527 + * @return The result of comparing the strings; one of UCOL_EQUAL, 1.528 + * UCOL_GREATER, UCOL_LESS 1.529 + * @see ucol_greater 1.530 + * @see ucol_greaterOrEqual 1.531 + * @see ucol_equal 1.532 + * @stable ICU 2.0 1.533 + */ 1.534 +U_STABLE UCollationResult U_EXPORT2 1.535 +ucol_strcoll( const UCollator *coll, 1.536 + const UChar *source, 1.537 + int32_t sourceLength, 1.538 + const UChar *target, 1.539 + int32_t targetLength); 1.540 + 1.541 +/** 1.542 +* Compare two strings in UTF-8. 1.543 +* The strings will be compared using the options already specified. 1.544 +* Note: When input string contains malformed a UTF-8 byte sequence, 1.545 +* this function treats these bytes as REPLACEMENT CHARACTER (U+FFFD). 1.546 +* @param coll The UCollator containing the comparison rules. 1.547 +* @param source The source UTF-8 string. 1.548 +* @param sourceLength The length of source, or -1 if null-terminated. 1.549 +* @param target The target UTF-8 string. 1.550 +* @param targetLength The length of target, or -1 if null-terminated. 1.551 +* @param status A pointer to an UErrorCode to receive any errors 1.552 +* @return The result of comparing the strings; one of UCOL_EQUAL, 1.553 +* UCOL_GREATER, UCOL_LESS 1.554 +* @see ucol_greater 1.555 +* @see ucol_greaterOrEqual 1.556 +* @see ucol_equal 1.557 +* @stable ICU 50 1.558 +*/ 1.559 +U_STABLE UCollationResult U_EXPORT2 1.560 +ucol_strcollUTF8( 1.561 + const UCollator *coll, 1.562 + const char *source, 1.563 + int32_t sourceLength, 1.564 + const char *target, 1.565 + int32_t targetLength, 1.566 + UErrorCode *status); 1.567 + 1.568 +/** 1.569 + * Determine if one string is greater than another. 1.570 + * This function is equivalent to {@link #ucol_strcoll } == UCOL_GREATER 1.571 + * @param coll The UCollator containing the comparison rules. 1.572 + * @param source The source string. 1.573 + * @param sourceLength The length of source, or -1 if null-terminated. 1.574 + * @param target The target string. 1.575 + * @param targetLength The length of target, or -1 if null-terminated. 1.576 + * @return TRUE if source is greater than target, FALSE otherwise. 1.577 + * @see ucol_strcoll 1.578 + * @see ucol_greaterOrEqual 1.579 + * @see ucol_equal 1.580 + * @stable ICU 2.0 1.581 + */ 1.582 +U_STABLE UBool U_EXPORT2 1.583 +ucol_greater(const UCollator *coll, 1.584 + const UChar *source, int32_t sourceLength, 1.585 + const UChar *target, int32_t targetLength); 1.586 + 1.587 +/** 1.588 + * Determine if one string is greater than or equal to another. 1.589 + * This function is equivalent to {@link #ucol_strcoll } != UCOL_LESS 1.590 + * @param coll The UCollator containing the comparison rules. 1.591 + * @param source The source string. 1.592 + * @param sourceLength The length of source, or -1 if null-terminated. 1.593 + * @param target The target string. 1.594 + * @param targetLength The length of target, or -1 if null-terminated. 1.595 + * @return TRUE if source is greater than or equal to target, FALSE otherwise. 1.596 + * @see ucol_strcoll 1.597 + * @see ucol_greater 1.598 + * @see ucol_equal 1.599 + * @stable ICU 2.0 1.600 + */ 1.601 +U_STABLE UBool U_EXPORT2 1.602 +ucol_greaterOrEqual(const UCollator *coll, 1.603 + const UChar *source, int32_t sourceLength, 1.604 + const UChar *target, int32_t targetLength); 1.605 + 1.606 +/** 1.607 + * Compare two strings for equality. 1.608 + * This function is equivalent to {@link #ucol_strcoll } == UCOL_EQUAL 1.609 + * @param coll The UCollator containing the comparison rules. 1.610 + * @param source The source string. 1.611 + * @param sourceLength The length of source, or -1 if null-terminated. 1.612 + * @param target The target string. 1.613 + * @param targetLength The length of target, or -1 if null-terminated. 1.614 + * @return TRUE if source is equal to target, FALSE otherwise 1.615 + * @see ucol_strcoll 1.616 + * @see ucol_greater 1.617 + * @see ucol_greaterOrEqual 1.618 + * @stable ICU 2.0 1.619 + */ 1.620 +U_STABLE UBool U_EXPORT2 1.621 +ucol_equal(const UCollator *coll, 1.622 + const UChar *source, int32_t sourceLength, 1.623 + const UChar *target, int32_t targetLength); 1.624 + 1.625 +/** 1.626 + * Compare two UTF-8 encoded trings. 1.627 + * The strings will be compared using the options already specified. 1.628 + * @param coll The UCollator containing the comparison rules. 1.629 + * @param sIter The source string iterator. 1.630 + * @param tIter The target string iterator. 1.631 + * @return The result of comparing the strings; one of UCOL_EQUAL, 1.632 + * UCOL_GREATER, UCOL_LESS 1.633 + * @param status A pointer to an UErrorCode to receive any errors 1.634 + * @see ucol_strcoll 1.635 + * @stable ICU 2.6 1.636 + */ 1.637 +U_STABLE UCollationResult U_EXPORT2 1.638 +ucol_strcollIter( const UCollator *coll, 1.639 + UCharIterator *sIter, 1.640 + UCharIterator *tIter, 1.641 + UErrorCode *status); 1.642 + 1.643 +/** 1.644 + * Get the collation strength used in a UCollator. 1.645 + * The strength influences how strings are compared. 1.646 + * @param coll The UCollator to query. 1.647 + * @return The collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY, 1.648 + * UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL 1.649 + * @see ucol_setStrength 1.650 + * @stable ICU 2.0 1.651 + */ 1.652 +U_STABLE UCollationStrength U_EXPORT2 1.653 +ucol_getStrength(const UCollator *coll); 1.654 + 1.655 +/** 1.656 + * Set the collation strength used in a UCollator. 1.657 + * The strength influences how strings are compared. 1.658 + * @param coll The UCollator to set. 1.659 + * @param strength The desired collation strength; one of UCOL_PRIMARY, 1.660 + * UCOL_SECONDARY, UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL, UCOL_DEFAULT 1.661 + * @see ucol_getStrength 1.662 + * @stable ICU 2.0 1.663 + */ 1.664 +U_STABLE void U_EXPORT2 1.665 +ucol_setStrength(UCollator *coll, 1.666 + UCollationStrength strength); 1.667 + 1.668 +/** 1.669 + * Retrieves the reordering codes for this collator. 1.670 + * These reordering codes are a combination of UScript codes and UColReorderCode entries. 1.671 + * @param coll The UCollator to query. 1.672 + * @param dest The array to fill with the script ordering. 1.673 + * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function 1.674 + * will only return the length of the result without writing any of the result string (pre-flighting). 1.675 + * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate a 1.676 + * failure before the function call. 1.677 + * @return The number of reordering codes written to the dest array. 1.678 + * @see ucol_setReorderCodes 1.679 + * @see ucol_getEquivalentReorderCodes 1.680 + * @see UScriptCode 1.681 + * @see UColReorderCode 1.682 + * @stable ICU 4.8 1.683 + */ 1.684 +U_STABLE int32_t U_EXPORT2 1.685 +ucol_getReorderCodes(const UCollator* coll, 1.686 + int32_t* dest, 1.687 + int32_t destCapacity, 1.688 + UErrorCode *pErrorCode); 1.689 +/** 1.690 + * Sets the reordering codes for this collator. 1.691 + * Collation reordering allows scripts and some other defined blocks of characters 1.692 + * to be moved relative to each other as a block. This reordering is done on top of 1.693 + * the DUCET/CLDR standard collation order. Reordering can specify groups to be placed 1.694 + * at the start and/or the end of the collation order. These groups are specified using 1.695 + * UScript codes and UColReorderCode entries. 1.696 + * <p>By default, reordering codes specified for the start of the order are placed in the 1.697 + * order given after a group of "special" non-script blocks. These special groups of characters 1.698 + * are space, punctuation, symbol, currency, and digit. These special groups are represented with 1.699 + * UColReorderCode entries. Script groups can be intermingled with 1.700 + * these special non-script blocks if those special blocks are explicitly specified in the reordering. 1.701 + * <p>The special code OTHERS stands for any script that is not explicitly 1.702 + * mentioned in the list of reordering codes given. Anything that is after OTHERS 1.703 + * will go at the very end of the reordering in the order given. 1.704 + * <p>The special reorder code DEFAULT will reset the reordering for this collator 1.705 + * to the default for this collator. The default reordering may be the DUCET/CLDR order or may be a reordering that 1.706 + * was specified when this collator was created from resource data or from rules. The 1.707 + * DEFAULT code <b>must</b> be the sole code supplied when it used. If not 1.708 + * that will result in an U_ILLEGAL_ARGUMENT_ERROR being set. 1.709 + * <p>The special reorder code NONE will remove any reordering for this collator. 1.710 + * The result of setting no reordering will be to have the DUCET/CLDR ordering used. The 1.711 + * NONE code <b>must</b> be the sole code supplied when it used. 1.712 + * @param coll The UCollator to set. 1.713 + * @param reorderCodes An array of script codes in the new order. This can be NULL if the 1.714 + * length is also set to 0. An empty array will clear any reordering codes on the collator. 1.715 + * @param reorderCodesLength The length of reorderCodes. 1.716 + * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate a 1.717 + * failure before the function call. 1.718 + * @see ucol_getReorderCodes 1.719 + * @see ucol_getEquivalentReorderCodes 1.720 + * @see UScriptCode 1.721 + * @see UColReorderCode 1.722 + * @stable ICU 4.8 1.723 + */ 1.724 +U_STABLE void U_EXPORT2 1.725 +ucol_setReorderCodes(UCollator* coll, 1.726 + const int32_t* reorderCodes, 1.727 + int32_t reorderCodesLength, 1.728 + UErrorCode *pErrorCode); 1.729 + 1.730 +/** 1.731 + * Retrieves the reorder codes that are grouped with the given reorder code. Some reorder 1.732 + * codes will be grouped and must reorder together. 1.733 + * @param reorderCode The reorder code to determine equivalence for. 1.734 + * @param dest The array to fill with the script ordering. 1.735 + * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function 1.736 + * will only return the length of the result without writing any of the result string (pre-flighting). 1.737 + * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate 1.738 + * a failure before the function call. 1.739 + * @return The number of reordering codes written to the dest array. 1.740 + * @see ucol_setReorderCodes 1.741 + * @see ucol_getReorderCodes 1.742 + * @see UScriptCode 1.743 + * @see UColReorderCode 1.744 + * @stable ICU 4.8 1.745 + */ 1.746 +U_STABLE int32_t U_EXPORT2 1.747 +ucol_getEquivalentReorderCodes(int32_t reorderCode, 1.748 + int32_t* dest, 1.749 + int32_t destCapacity, 1.750 + UErrorCode *pErrorCode); 1.751 + 1.752 +/** 1.753 + * Get the display name for a UCollator. 1.754 + * The display name is suitable for presentation to a user. 1.755 + * @param objLoc The locale of the collator in question. 1.756 + * @param dispLoc The locale for display. 1.757 + * @param result A pointer to a buffer to receive the attribute. 1.758 + * @param resultLength The maximum size of result. 1.759 + * @param status A pointer to an UErrorCode to receive any errors 1.760 + * @return The total buffer size needed; if greater than resultLength, 1.761 + * the output was truncated. 1.762 + * @stable ICU 2.0 1.763 + */ 1.764 +U_STABLE int32_t U_EXPORT2 1.765 +ucol_getDisplayName( const char *objLoc, 1.766 + const char *dispLoc, 1.767 + UChar *result, 1.768 + int32_t resultLength, 1.769 + UErrorCode *status); 1.770 + 1.771 +/** 1.772 + * Get a locale for which collation rules are available. 1.773 + * A UCollator in a locale returned by this function will perform the correct 1.774 + * collation for the locale. 1.775 + * @param localeIndex The index of the desired locale. 1.776 + * @return A locale for which collation rules are available, or 0 if none. 1.777 + * @see ucol_countAvailable 1.778 + * @stable ICU 2.0 1.779 + */ 1.780 +U_STABLE const char* U_EXPORT2 1.781 +ucol_getAvailable(int32_t localeIndex); 1.782 + 1.783 +/** 1.784 + * Determine how many locales have collation rules available. 1.785 + * This function is most useful as determining the loop ending condition for 1.786 + * calls to {@link #ucol_getAvailable }. 1.787 + * @return The number of locales for which collation rules are available. 1.788 + * @see ucol_getAvailable 1.789 + * @stable ICU 2.0 1.790 + */ 1.791 +U_STABLE int32_t U_EXPORT2 1.792 +ucol_countAvailable(void); 1.793 + 1.794 +#if !UCONFIG_NO_SERVICE 1.795 +/** 1.796 + * Create a string enumerator of all locales for which a valid 1.797 + * collator may be opened. 1.798 + * @param status input-output error code 1.799 + * @return a string enumeration over locale strings. The caller is 1.800 + * responsible for closing the result. 1.801 + * @stable ICU 3.0 1.802 + */ 1.803 +U_STABLE UEnumeration* U_EXPORT2 1.804 +ucol_openAvailableLocales(UErrorCode *status); 1.805 +#endif 1.806 + 1.807 +/** 1.808 + * Create a string enumerator of all possible keywords that are relevant to 1.809 + * collation. At this point, the only recognized keyword for this 1.810 + * service is "collation". 1.811 + * @param status input-output error code 1.812 + * @return a string enumeration over locale strings. The caller is 1.813 + * responsible for closing the result. 1.814 + * @stable ICU 3.0 1.815 + */ 1.816 +U_STABLE UEnumeration* U_EXPORT2 1.817 +ucol_getKeywords(UErrorCode *status); 1.818 + 1.819 +/** 1.820 + * Given a keyword, create a string enumeration of all values 1.821 + * for that keyword that are currently in use. 1.822 + * @param keyword a particular keyword as enumerated by 1.823 + * ucol_getKeywords. If any other keyword is passed in, *status is set 1.824 + * to U_ILLEGAL_ARGUMENT_ERROR. 1.825 + * @param status input-output error code 1.826 + * @return a string enumeration over collation keyword values, or NULL 1.827 + * upon error. The caller is responsible for closing the result. 1.828 + * @stable ICU 3.0 1.829 + */ 1.830 +U_STABLE UEnumeration* U_EXPORT2 1.831 +ucol_getKeywordValues(const char *keyword, UErrorCode *status); 1.832 + 1.833 +/** 1.834 + * Given a key and a locale, returns an array of string values in a preferred 1.835 + * order that would make a difference. These are all and only those values where 1.836 + * the open (creation) of the service with the locale formed from the input locale 1.837 + * plus input keyword and that value has different behavior than creation with the 1.838 + * input locale alone. 1.839 + * @param key one of the keys supported by this service. For now, only 1.840 + * "collation" is supported. 1.841 + * @param locale the locale 1.842 + * @param commonlyUsed if set to true it will return only commonly used values 1.843 + * with the given locale in preferred order. Otherwise, 1.844 + * it will return all the available values for the locale. 1.845 + * @param status error status 1.846 + * @return a string enumeration over keyword values for the given key and the locale. 1.847 + * @stable ICU 4.2 1.848 + */ 1.849 +U_STABLE UEnumeration* U_EXPORT2 1.850 +ucol_getKeywordValuesForLocale(const char* key, 1.851 + const char* locale, 1.852 + UBool commonlyUsed, 1.853 + UErrorCode* status); 1.854 + 1.855 +/** 1.856 + * Return the functionally equivalent locale for the given 1.857 + * requested locale, with respect to given keyword, for the 1.858 + * collation service. If two locales return the same result, then 1.859 + * collators instantiated for these locales will behave 1.860 + * equivalently. The converse is not always true; two collators 1.861 + * may in fact be equivalent, but return different results, due to 1.862 + * internal details. The return result has no other meaning than 1.863 + * that stated above, and implies nothing as to the relationship 1.864 + * between the two locales. This is intended for use by 1.865 + * applications who wish to cache collators, or otherwise reuse 1.866 + * collators when possible. The functional equivalent may change 1.867 + * over time. For more information, please see the <a 1.868 + * href="http://icu-project.org/userguide/locale.html#services"> 1.869 + * Locales and Services</a> section of the ICU User Guide. 1.870 + * @param result fillin for the functionally equivalent locale 1.871 + * @param resultCapacity capacity of the fillin buffer 1.872 + * @param keyword a particular keyword as enumerated by 1.873 + * ucol_getKeywords. 1.874 + * @param locale the requested locale 1.875 + * @param isAvailable if non-NULL, pointer to a fillin parameter that 1.876 + * indicates whether the requested locale was 'available' to the 1.877 + * collation service. A locale is defined as 'available' if it 1.878 + * physically exists within the collation locale data. 1.879 + * @param status pointer to input-output error code 1.880 + * @return the actual buffer size needed for the locale. If greater 1.881 + * than resultCapacity, the returned full name will be truncated and 1.882 + * an error code will be returned. 1.883 + * @stable ICU 3.0 1.884 + */ 1.885 +U_STABLE int32_t U_EXPORT2 1.886 +ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity, 1.887 + const char* keyword, const char* locale, 1.888 + UBool* isAvailable, UErrorCode* status); 1.889 + 1.890 +/** 1.891 + * Get the collation tailoring rules from a UCollator. 1.892 + * The rules will follow the rule syntax. 1.893 + * @param coll The UCollator to query. 1.894 + * @param length 1.895 + * @return The collation tailoring rules. 1.896 + * @stable ICU 2.0 1.897 + */ 1.898 +U_STABLE const UChar* U_EXPORT2 1.899 +ucol_getRules( const UCollator *coll, 1.900 + int32_t *length); 1.901 + 1.902 +/** Get the short definition string for a collator. This API harvests the collator's 1.903 + * locale and the attribute set and produces a string that can be used for opening 1.904 + * a collator with the same properties using the ucol_openFromShortString API. 1.905 + * This string will be normalized. 1.906 + * The structure and the syntax of the string is defined in the "Naming collators" 1.907 + * section of the users guide: 1.908 + * http://icu-project.org/userguide/Collate_Concepts.html#Naming_Collators 1.909 + * This API supports preflighting. 1.910 + * @param coll a collator 1.911 + * @param locale a locale that will appear as a collators locale in the resulting 1.912 + * short string definition. If NULL, the locale will be harvested 1.913 + * from the collator. 1.914 + * @param buffer space to hold the resulting string 1.915 + * @param capacity capacity of the buffer 1.916 + * @param status for returning errors. All the preflighting errors are featured 1.917 + * @return length of the resulting string 1.918 + * @see ucol_openFromShortString 1.919 + * @see ucol_normalizeShortDefinitionString 1.920 + * @stable ICU 3.0 1.921 + */ 1.922 +U_STABLE int32_t U_EXPORT2 1.923 +ucol_getShortDefinitionString(const UCollator *coll, 1.924 + const char *locale, 1.925 + char *buffer, 1.926 + int32_t capacity, 1.927 + UErrorCode *status); 1.928 + 1.929 +/** Verifies and normalizes short definition string. 1.930 + * Normalized short definition string has all the option sorted by the argument name, 1.931 + * so that equivalent definition strings are the same. 1.932 + * This API supports preflighting. 1.933 + * @param source definition string 1.934 + * @param destination space to hold the resulting string 1.935 + * @param capacity capacity of the buffer 1.936 + * @param parseError if not NULL, structure that will get filled with error's pre 1.937 + * and post context in case of error. 1.938 + * @param status Error code. This API will return an error if an invalid attribute 1.939 + * or attribute/value combination is specified. All the preflighting 1.940 + * errors are also featured 1.941 + * @return length of the resulting normalized string. 1.942 + * 1.943 + * @see ucol_openFromShortString 1.944 + * @see ucol_getShortDefinitionString 1.945 + * 1.946 + * @stable ICU 3.0 1.947 + */ 1.948 + 1.949 +U_STABLE int32_t U_EXPORT2 1.950 +ucol_normalizeShortDefinitionString(const char *source, 1.951 + char *destination, 1.952 + int32_t capacity, 1.953 + UParseError *parseError, 1.954 + UErrorCode *status); 1.955 + 1.956 + 1.957 +/** 1.958 + * Get a sort key for a string from a UCollator. 1.959 + * Sort keys may be compared using <TT>strcmp</TT>. 1.960 + * 1.961 + * Like ICU functions that write to an output buffer, the buffer contents 1.962 + * is undefined if the buffer capacity (resultLength parameter) is too small. 1.963 + * Unlike ICU functions that write a string to an output buffer, 1.964 + * the terminating zero byte is counted in the sort key length. 1.965 + * @param coll The UCollator containing the collation rules. 1.966 + * @param source The string to transform. 1.967 + * @param sourceLength The length of source, or -1 if null-terminated. 1.968 + * @param result A pointer to a buffer to receive the attribute. 1.969 + * @param resultLength The maximum size of result. 1.970 + * @return The size needed to fully store the sort key. 1.971 + * If there was an internal error generating the sort key, 1.972 + * a zero value is returned. 1.973 + * @see ucol_keyHashCode 1.974 + * @stable ICU 2.0 1.975 + */ 1.976 +U_STABLE int32_t U_EXPORT2 1.977 +ucol_getSortKey(const UCollator *coll, 1.978 + const UChar *source, 1.979 + int32_t sourceLength, 1.980 + uint8_t *result, 1.981 + int32_t resultLength); 1.982 + 1.983 + 1.984 +/** Gets the next count bytes of a sort key. Caller needs 1.985 + * to preserve state array between calls and to provide 1.986 + * the same type of UCharIterator set with the same string. 1.987 + * The destination buffer provided must be big enough to store 1.988 + * the number of requested bytes. 1.989 + * 1.990 + * The generated sort key may or may not be compatible with 1.991 + * sort keys generated using ucol_getSortKey(). 1.992 + * @param coll The UCollator containing the collation rules. 1.993 + * @param iter UCharIterator containing the string we need 1.994 + * the sort key to be calculated for. 1.995 + * @param state Opaque state of sortkey iteration. 1.996 + * @param dest Buffer to hold the resulting sortkey part 1.997 + * @param count number of sort key bytes required. 1.998 + * @param status error code indicator. 1.999 + * @return the actual number of bytes of a sortkey. It can be 1.1000 + * smaller than count if we have reached the end of 1.1001 + * the sort key. 1.1002 + * @stable ICU 2.6 1.1003 + */ 1.1004 +U_STABLE int32_t U_EXPORT2 1.1005 +ucol_nextSortKeyPart(const UCollator *coll, 1.1006 + UCharIterator *iter, 1.1007 + uint32_t state[2], 1.1008 + uint8_t *dest, int32_t count, 1.1009 + UErrorCode *status); 1.1010 + 1.1011 +/** enum that is taken by ucol_getBound API 1.1012 + * See below for explanation 1.1013 + * do not change the values assigned to the 1.1014 + * members of this enum. Underlying code 1.1015 + * depends on them having these numbers 1.1016 + * @stable ICU 2.0 1.1017 + */ 1.1018 +typedef enum { 1.1019 + /** lower bound */ 1.1020 + UCOL_BOUND_LOWER = 0, 1.1021 + /** upper bound that will match strings of exact size */ 1.1022 + UCOL_BOUND_UPPER = 1, 1.1023 + /** upper bound that will match all the strings that have the same initial substring as the given string */ 1.1024 + UCOL_BOUND_UPPER_LONG = 2, 1.1025 + UCOL_BOUND_VALUE_COUNT 1.1026 +} UColBoundMode; 1.1027 + 1.1028 +/** 1.1029 + * Produce a bound for a given sortkey and a number of levels. 1.1030 + * Return value is always the number of bytes needed, regardless of 1.1031 + * whether the result buffer was big enough or even valid.<br> 1.1032 + * Resulting bounds can be used to produce a range of strings that are 1.1033 + * between upper and lower bounds. For example, if bounds are produced 1.1034 + * for a sortkey of string "smith", strings between upper and lower 1.1035 + * bounds with one level would include "Smith", "SMITH", "sMiTh".<br> 1.1036 + * There are two upper bounds that can be produced. If UCOL_BOUND_UPPER 1.1037 + * is produced, strings matched would be as above. However, if bound 1.1038 + * produced using UCOL_BOUND_UPPER_LONG is used, the above example will 1.1039 + * also match "Smithsonian" and similar.<br> 1.1040 + * For more on usage, see example in cintltst/capitst.c in procedure 1.1041 + * TestBounds. 1.1042 + * Sort keys may be compared using <TT>strcmp</TT>. 1.1043 + * @param source The source sortkey. 1.1044 + * @param sourceLength The length of source, or -1 if null-terminated. 1.1045 + * (If an unmodified sortkey is passed, it is always null 1.1046 + * terminated). 1.1047 + * @param boundType Type of bound required. It can be UCOL_BOUND_LOWER, which 1.1048 + * produces a lower inclusive bound, UCOL_BOUND_UPPER, that 1.1049 + * produces upper bound that matches strings of the same length 1.1050 + * or UCOL_BOUND_UPPER_LONG that matches strings that have the 1.1051 + * same starting substring as the source string. 1.1052 + * @param noOfLevels Number of levels required in the resulting bound (for most 1.1053 + * uses, the recommended value is 1). See users guide for 1.1054 + * explanation on number of levels a sortkey can have. 1.1055 + * @param result A pointer to a buffer to receive the resulting sortkey. 1.1056 + * @param resultLength The maximum size of result. 1.1057 + * @param status Used for returning error code if something went wrong. If the 1.1058 + * number of levels requested is higher than the number of levels 1.1059 + * in the source key, a warning (U_SORT_KEY_TOO_SHORT_WARNING) is 1.1060 + * issued. 1.1061 + * @return The size needed to fully store the bound. 1.1062 + * @see ucol_keyHashCode 1.1063 + * @stable ICU 2.1 1.1064 + */ 1.1065 +U_STABLE int32_t U_EXPORT2 1.1066 +ucol_getBound(const uint8_t *source, 1.1067 + int32_t sourceLength, 1.1068 + UColBoundMode boundType, 1.1069 + uint32_t noOfLevels, 1.1070 + uint8_t *result, 1.1071 + int32_t resultLength, 1.1072 + UErrorCode *status); 1.1073 + 1.1074 +/** 1.1075 + * Gets the version information for a Collator. Version is currently 1.1076 + * an opaque 32-bit number which depends, among other things, on major 1.1077 + * versions of the collator tailoring and UCA. 1.1078 + * @param coll The UCollator to query. 1.1079 + * @param info the version # information, the result will be filled in 1.1080 + * @stable ICU 2.0 1.1081 + */ 1.1082 +U_STABLE void U_EXPORT2 1.1083 +ucol_getVersion(const UCollator* coll, UVersionInfo info); 1.1084 + 1.1085 +/** 1.1086 + * Gets the UCA version information for a Collator. Version is the 1.1087 + * UCA version number (3.1.1, 4.0). 1.1088 + * @param coll The UCollator to query. 1.1089 + * @param info the version # information, the result will be filled in 1.1090 + * @stable ICU 2.8 1.1091 + */ 1.1092 +U_STABLE void U_EXPORT2 1.1093 +ucol_getUCAVersion(const UCollator* coll, UVersionInfo info); 1.1094 + 1.1095 +/** 1.1096 + * Merges two sort keys. The levels are merged with their corresponding counterparts 1.1097 + * (primaries with primaries, secondaries with secondaries etc.). Between the values 1.1098 + * from the same level a separator is inserted. 1.1099 + * 1.1100 + * This is useful, for example, for combining sort keys from first and last names 1.1101 + * to sort such pairs. 1.1102 + * It is possible to merge multiple sort keys by consecutively merging 1.1103 + * another one with the intermediate result. 1.1104 + * 1.1105 + * The length of the merge result is the sum of the lengths of the input sort keys. 1.1106 + * 1.1107 + * Example (uncompressed): 1.1108 + * <pre>191B1D 01 050505 01 910505 00 1.1109 + * 1F2123 01 050505 01 910505 00</pre> 1.1110 + * will be merged as 1.1111 + * <pre>191B1D 02 1F2123 01 050505 02 050505 01 910505 02 910505 00</pre> 1.1112 + * 1.1113 + * If the destination buffer is not big enough, then its contents are undefined. 1.1114 + * If any of source lengths are zero or any of the source pointers are NULL/undefined, 1.1115 + * the result is of size zero. 1.1116 + * 1.1117 + * @param src1 the first sort key 1.1118 + * @param src1Length the length of the first sort key, including the zero byte at the end; 1.1119 + * can be -1 if the function is to find the length 1.1120 + * @param src2 the second sort key 1.1121 + * @param src2Length the length of the second sort key, including the zero byte at the end; 1.1122 + * can be -1 if the function is to find the length 1.1123 + * @param dest the buffer where the merged sort key is written, 1.1124 + * can be NULL if destCapacity==0 1.1125 + * @param destCapacity the number of bytes in the dest buffer 1.1126 + * @return the length of the merged sort key, src1Length+src2Length; 1.1127 + * can be larger than destCapacity, or 0 if an error occurs (only for illegal arguments), 1.1128 + * in which cases the contents of dest is undefined 1.1129 + * @stable ICU 2.0 1.1130 + */ 1.1131 +U_STABLE int32_t U_EXPORT2 1.1132 +ucol_mergeSortkeys(const uint8_t *src1, int32_t src1Length, 1.1133 + const uint8_t *src2, int32_t src2Length, 1.1134 + uint8_t *dest, int32_t destCapacity); 1.1135 + 1.1136 +/** 1.1137 + * Universal attribute setter 1.1138 + * @param coll collator which attributes are to be changed 1.1139 + * @param attr attribute type 1.1140 + * @param value attribute value 1.1141 + * @param status to indicate whether the operation went on smoothly or there were errors 1.1142 + * @see UColAttribute 1.1143 + * @see UColAttributeValue 1.1144 + * @see ucol_getAttribute 1.1145 + * @stable ICU 2.0 1.1146 + */ 1.1147 +U_STABLE void U_EXPORT2 1.1148 +ucol_setAttribute(UCollator *coll, UColAttribute attr, UColAttributeValue value, UErrorCode *status); 1.1149 + 1.1150 +/** 1.1151 + * Universal attribute getter 1.1152 + * @param coll collator which attributes are to be changed 1.1153 + * @param attr attribute type 1.1154 + * @return attribute value 1.1155 + * @param status to indicate whether the operation went on smoothly or there were errors 1.1156 + * @see UColAttribute 1.1157 + * @see UColAttributeValue 1.1158 + * @see ucol_setAttribute 1.1159 + * @stable ICU 2.0 1.1160 + */ 1.1161 +U_STABLE UColAttributeValue U_EXPORT2 1.1162 +ucol_getAttribute(const UCollator *coll, UColAttribute attr, UErrorCode *status); 1.1163 + 1.1164 +/** Variable top 1.1165 + * is a two byte primary value which causes all the codepoints with primary values that 1.1166 + * are less or equal than the variable top to be shifted when alternate handling is set 1.1167 + * to UCOL_SHIFTED. 1.1168 + * Sets the variable top to a collation element value of a string supplied. 1.1169 + * @param coll collator which variable top needs to be changed 1.1170 + * @param varTop one or more (if contraction) UChars to which the variable top should be set 1.1171 + * @param len length of variable top string. If -1 it is considered to be zero terminated. 1.1172 + * @param status error code. If error code is set, the return value is undefined. 1.1173 + * Errors set by this function are: <br> 1.1174 + * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such 1.1175 + * a contraction<br> 1.1176 + * U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes 1.1177 + * @return a 32 bit value containing the value of the variable top in upper 16 bits. 1.1178 + * Lower 16 bits are undefined 1.1179 + * @see ucol_getVariableTop 1.1180 + * @see ucol_restoreVariableTop 1.1181 + * @stable ICU 2.0 1.1182 + */ 1.1183 +U_STABLE uint32_t U_EXPORT2 1.1184 +ucol_setVariableTop(UCollator *coll, 1.1185 + const UChar *varTop, int32_t len, 1.1186 + UErrorCode *status); 1.1187 + 1.1188 +/** 1.1189 + * Gets the variable top value of a Collator. 1.1190 + * Lower 16 bits are undefined and should be ignored. 1.1191 + * @param coll collator which variable top needs to be retrieved 1.1192 + * @param status error code (not changed by function). If error code is set, 1.1193 + * the return value is undefined. 1.1194 + * @return the variable top value of a Collator. 1.1195 + * @see ucol_setVariableTop 1.1196 + * @see ucol_restoreVariableTop 1.1197 + * @stable ICU 2.0 1.1198 + */ 1.1199 +U_STABLE uint32_t U_EXPORT2 ucol_getVariableTop(const UCollator *coll, UErrorCode *status); 1.1200 + 1.1201 +/** 1.1202 + * Sets the variable top to a collation element value supplied. Variable top is 1.1203 + * set to the upper 16 bits. 1.1204 + * Lower 16 bits are ignored. 1.1205 + * @param coll collator which variable top needs to be changed 1.1206 + * @param varTop CE value, as returned by ucol_setVariableTop or ucol)getVariableTop 1.1207 + * @param status error code (not changed by function) 1.1208 + * @see ucol_getVariableTop 1.1209 + * @see ucol_setVariableTop 1.1210 + * @stable ICU 2.0 1.1211 + */ 1.1212 +U_STABLE void U_EXPORT2 1.1213 +ucol_restoreVariableTop(UCollator *coll, const uint32_t varTop, UErrorCode *status); 1.1214 + 1.1215 +/** 1.1216 + * Thread safe cloning operation. The result is a clone of a given collator. 1.1217 + * @param coll collator to be cloned 1.1218 + * @param stackBuffer <em>Deprecated functionality as of ICU 52, use NULL.</em><br> 1.1219 + * user allocated space for the new clone. 1.1220 + * If NULL new memory will be allocated. 1.1221 + * If buffer is not large enough, new memory will be allocated. 1.1222 + * Clients can use the U_COL_SAFECLONE_BUFFERSIZE. 1.1223 + * @param pBufferSize <em>Deprecated functionality as of ICU 52, use NULL or 1.</em><br> 1.1224 + * pointer to size of allocated space. 1.1225 + * If *pBufferSize == 0, a sufficient size for use in cloning will 1.1226 + * be returned ('pre-flighting') 1.1227 + * If *pBufferSize is not enough for a stack-based safe clone, 1.1228 + * new memory will be allocated. 1.1229 + * @param status to indicate whether the operation went on smoothly or there were errors 1.1230 + * An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any 1.1231 + * allocations were necessary. 1.1232 + * @return pointer to the new clone 1.1233 + * @see ucol_open 1.1234 + * @see ucol_openRules 1.1235 + * @see ucol_close 1.1236 + * @stable ICU 2.0 1.1237 + */ 1.1238 +U_STABLE UCollator* U_EXPORT2 1.1239 +ucol_safeClone(const UCollator *coll, 1.1240 + void *stackBuffer, 1.1241 + int32_t *pBufferSize, 1.1242 + UErrorCode *status); 1.1243 + 1.1244 +#ifndef U_HIDE_DEPRECATED_API 1.1245 + 1.1246 +/** default memory size for the new clone. 1.1247 + * @deprecated ICU 52. Do not rely on ucol_safeClone() cloning into any provided buffer. 1.1248 + */ 1.1249 +#define U_COL_SAFECLONE_BUFFERSIZE 1 1.1250 + 1.1251 +#endif /* U_HIDE_DEPRECATED_API */ 1.1252 + 1.1253 +/** 1.1254 + * Returns current rules. Delta defines whether full rules are returned or just the tailoring. 1.1255 + * Returns number of UChars needed to store rules. If buffer is NULL or bufferLen is not enough 1.1256 + * to store rules, will store up to available space. 1.1257 + * 1.1258 + * ucol_getRules() should normally be used instead. 1.1259 + * See http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales 1.1260 + * @param coll collator to get the rules from 1.1261 + * @param delta one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES. 1.1262 + * @param buffer buffer to store the result in. If NULL, you'll get no rules. 1.1263 + * @param bufferLen length of buffer to store rules in. If less than needed you'll get only the part that fits in. 1.1264 + * @return current rules 1.1265 + * @stable ICU 2.0 1.1266 + * @see UCOL_FULL_RULES 1.1267 + */ 1.1268 +U_STABLE int32_t U_EXPORT2 1.1269 +ucol_getRulesEx(const UCollator *coll, UColRuleOption delta, UChar *buffer, int32_t bufferLen); 1.1270 + 1.1271 +#ifndef U_HIDE_DEPRECATED_API 1.1272 +/** 1.1273 + * gets the locale name of the collator. If the collator 1.1274 + * is instantiated from the rules, then this function returns 1.1275 + * NULL. 1.1276 + * @param coll The UCollator for which the locale is needed 1.1277 + * @param type You can choose between requested, valid and actual 1.1278 + * locale. For description see the definition of 1.1279 + * ULocDataLocaleType in uloc.h 1.1280 + * @param status error code of the operation 1.1281 + * @return real locale name from which the collation data comes. 1.1282 + * If the collator was instantiated from rules, returns 1.1283 + * NULL. 1.1284 + * @deprecated ICU 2.8 Use ucol_getLocaleByType instead 1.1285 + */ 1.1286 +U_DEPRECATED const char * U_EXPORT2 1.1287 +ucol_getLocale(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status); 1.1288 +#endif /* U_HIDE_DEPRECATED_API */ 1.1289 + 1.1290 +/** 1.1291 + * gets the locale name of the collator. If the collator 1.1292 + * is instantiated from the rules, then this function returns 1.1293 + * NULL. 1.1294 + * @param coll The UCollator for which the locale is needed 1.1295 + * @param type You can choose between requested, valid and actual 1.1296 + * locale. For description see the definition of 1.1297 + * ULocDataLocaleType in uloc.h 1.1298 + * @param status error code of the operation 1.1299 + * @return real locale name from which the collation data comes. 1.1300 + * If the collator was instantiated from rules, returns 1.1301 + * NULL. 1.1302 + * @stable ICU 2.8 1.1303 + */ 1.1304 +U_STABLE const char * U_EXPORT2 1.1305 +ucol_getLocaleByType(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status); 1.1306 + 1.1307 +/** 1.1308 + * Get an Unicode set that contains all the characters and sequences tailored in 1.1309 + * this collator. The result must be disposed of by using uset_close. 1.1310 + * @param coll The UCollator for which we want to get tailored chars 1.1311 + * @param status error code of the operation 1.1312 + * @return a pointer to newly created USet. Must be be disposed by using uset_close 1.1313 + * @see ucol_openRules 1.1314 + * @see uset_close 1.1315 + * @stable ICU 2.4 1.1316 + */ 1.1317 +U_STABLE USet * U_EXPORT2 1.1318 +ucol_getTailoredSet(const UCollator *coll, UErrorCode *status); 1.1319 + 1.1320 +#ifndef U_HIDE_INTERNAL_API 1.1321 +/** 1.1322 + * Universal attribute getter that returns UCOL_DEFAULT if the value is default 1.1323 + * @param coll collator which attributes are to be changed 1.1324 + * @param attr attribute type 1.1325 + * @return attribute value or UCOL_DEFAULT if the value is default 1.1326 + * @param status to indicate whether the operation went on smoothly or there were errors 1.1327 + * @see UColAttribute 1.1328 + * @see UColAttributeValue 1.1329 + * @see ucol_setAttribute 1.1330 + * @internal ICU 3.0 1.1331 + */ 1.1332 +U_INTERNAL UColAttributeValue U_EXPORT2 1.1333 +ucol_getAttributeOrDefault(const UCollator *coll, UColAttribute attr, UErrorCode *status); 1.1334 + 1.1335 +/** Check whether two collators are equal. Collators are considered equal if they 1.1336 + * will sort strings the same. This means that both the current attributes and the 1.1337 + * rules must be equivalent. Currently used for RuleBasedCollator::operator==. 1.1338 + * @param source first collator 1.1339 + * @param target second collator 1.1340 + * @return TRUE or FALSE 1.1341 + * @internal ICU 3.0 1.1342 + */ 1.1343 +U_INTERNAL UBool U_EXPORT2 1.1344 +ucol_equals(const UCollator *source, const UCollator *target); 1.1345 + 1.1346 +/** Calculates the set of unsafe code points, given a collator. 1.1347 + * A character is unsafe if you could append any character and cause the ordering to alter significantly. 1.1348 + * Collation sorts in normalized order, so anything that rearranges in normalization can cause this. 1.1349 + * Thus if you have a character like a_umlaut, and you add a lower_dot to it, 1.1350 + * then it normalizes to a_lower_dot + umlaut, and sorts differently. 1.1351 + * @param coll Collator 1.1352 + * @param unsafe a fill-in set to receive the unsafe points 1.1353 + * @param status for catching errors 1.1354 + * @return number of elements in the set 1.1355 + * @internal ICU 3.0 1.1356 + */ 1.1357 +U_INTERNAL int32_t U_EXPORT2 1.1358 +ucol_getUnsafeSet( const UCollator *coll, 1.1359 + USet *unsafe, 1.1360 + UErrorCode *status); 1.1361 + 1.1362 +/** Reset UCA's static pointers. You don't want to use this, unless your static memory can go away. 1.1363 + * @internal ICU 3.2.1 1.1364 + */ 1.1365 +U_INTERNAL void U_EXPORT2 1.1366 +ucol_forgetUCA(void); 1.1367 + 1.1368 +/** Touches all resources needed for instantiating a collator from a short string definition, 1.1369 + * thus filling up the cache. 1.1370 + * @param definition A short string containing a locale and a set of attributes. 1.1371 + * Attributes not explicitly mentioned are left at the default 1.1372 + * state for a locale. 1.1373 + * @param parseError if not NULL, structure that will get filled with error's pre 1.1374 + * and post context in case of error. 1.1375 + * @param forceDefaults if FALSE, the settings that are the same as the collator 1.1376 + * default settings will not be applied (for example, setting 1.1377 + * French secondary on a French collator would not be executed). 1.1378 + * If TRUE, all the settings will be applied regardless of the 1.1379 + * collator default value. If the definition 1.1380 + * strings are to be cached, should be set to FALSE. 1.1381 + * @param status Error code. Apart from regular error conditions connected to 1.1382 + * instantiating collators (like out of memory or similar), this 1.1383 + * API will return an error if an invalid attribute or attribute/value 1.1384 + * combination is specified. 1.1385 + * @see ucol_openFromShortString 1.1386 + * @internal ICU 3.2.1 1.1387 + */ 1.1388 +U_INTERNAL void U_EXPORT2 1.1389 +ucol_prepareShortStringOpen( const char *definition, 1.1390 + UBool forceDefaults, 1.1391 + UParseError *parseError, 1.1392 + UErrorCode *status); 1.1393 +#endif /* U_HIDE_INTERNAL_API */ 1.1394 + 1.1395 +/** Creates a binary image of a collator. This binary image can be stored and 1.1396 + * later used to instantiate a collator using ucol_openBinary. 1.1397 + * This API supports preflighting. 1.1398 + * @param coll Collator 1.1399 + * @param buffer a fill-in buffer to receive the binary image 1.1400 + * @param capacity capacity of the destination buffer 1.1401 + * @param status for catching errors 1.1402 + * @return size of the image 1.1403 + * @see ucol_openBinary 1.1404 + * @stable ICU 3.2 1.1405 + */ 1.1406 +U_STABLE int32_t U_EXPORT2 1.1407 +ucol_cloneBinary(const UCollator *coll, 1.1408 + uint8_t *buffer, int32_t capacity, 1.1409 + UErrorCode *status); 1.1410 + 1.1411 +/** Opens a collator from a collator binary image created using 1.1412 + * ucol_cloneBinary. Binary image used in instantiation of the 1.1413 + * collator remains owned by the user and should stay around for 1.1414 + * the lifetime of the collator. The API also takes a base collator 1.1415 + * which usualy should be UCA. 1.1416 + * @param bin binary image owned by the user and required through the 1.1417 + * lifetime of the collator 1.1418 + * @param length size of the image. If negative, the API will try to 1.1419 + * figure out the length of the image 1.1420 + * @param base fallback collator, usually UCA. Base is required to be 1.1421 + * present through the lifetime of the collator. Currently 1.1422 + * it cannot be NULL. 1.1423 + * @param status for catching errors 1.1424 + * @return newly created collator 1.1425 + * @see ucol_cloneBinary 1.1426 + * @stable ICU 3.2 1.1427 + */ 1.1428 +U_STABLE UCollator* U_EXPORT2 1.1429 +ucol_openBinary(const uint8_t *bin, int32_t length, 1.1430 + const UCollator *base, 1.1431 + UErrorCode *status); 1.1432 + 1.1433 + 1.1434 +#endif /* #if !UCONFIG_NO_COLLATION */ 1.1435 + 1.1436 +#endif