Wed, 31 Dec 2014 07:22:50 +0100
Correct previous dual key logic pending first delivery installment.
michael@0 | 1 | /* |
michael@0 | 2 | ******************************************************************************* |
michael@0 | 3 | * Copyright (c) 1996-2013, International Business Machines Corporation and others. |
michael@0 | 4 | * All Rights Reserved. |
michael@0 | 5 | ******************************************************************************* |
michael@0 | 6 | */ |
michael@0 | 7 | |
michael@0 | 8 | #ifndef UCOL_H |
michael@0 | 9 | #define UCOL_H |
michael@0 | 10 | |
michael@0 | 11 | #include "unicode/utypes.h" |
michael@0 | 12 | |
michael@0 | 13 | #if !UCONFIG_NO_COLLATION |
michael@0 | 14 | |
michael@0 | 15 | #include "unicode/unorm.h" |
michael@0 | 16 | #include "unicode/localpointer.h" |
michael@0 | 17 | #include "unicode/parseerr.h" |
michael@0 | 18 | #include "unicode/uloc.h" |
michael@0 | 19 | #include "unicode/uset.h" |
michael@0 | 20 | #include "unicode/uscript.h" |
michael@0 | 21 | |
michael@0 | 22 | /** |
michael@0 | 23 | * \file |
michael@0 | 24 | * \brief C API: Collator |
michael@0 | 25 | * |
michael@0 | 26 | * <h2> Collator C API </h2> |
michael@0 | 27 | * |
michael@0 | 28 | * The C API for Collator performs locale-sensitive |
michael@0 | 29 | * string comparison. You use this service to build |
michael@0 | 30 | * searching and sorting routines for natural language text. |
michael@0 | 31 | * <em>Important: </em>The ICU collation service has been reimplemented |
michael@0 | 32 | * in order to achieve better performance and UCA compliance. |
michael@0 | 33 | * For details, see the |
michael@0 | 34 | * <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm"> |
michael@0 | 35 | * collation design document</a>. |
michael@0 | 36 | * <p> |
michael@0 | 37 | * For more information about the collation service see |
michael@0 | 38 | * <a href="http://icu-project.org/userguide/Collate_Intro.html">the users guide</a>. |
michael@0 | 39 | * <p> |
michael@0 | 40 | * Collation service provides correct sorting orders for most locales supported in ICU. |
michael@0 | 41 | * If specific data for a locale is not available, the orders eventually falls back |
michael@0 | 42 | * to the <a href="http://www.unicode.org/unicode/reports/tr10/">UCA sort order</a>. |
michael@0 | 43 | * <p> |
michael@0 | 44 | * Sort ordering may be customized by providing your own set of rules. For more on |
michael@0 | 45 | * this subject see the |
michael@0 | 46 | * <a href="http://icu-project.org/userguide/Collate_Customization.html"> |
michael@0 | 47 | * Collation customization</a> section of the users guide. |
michael@0 | 48 | * <p> |
michael@0 | 49 | * @see UCollationResult |
michael@0 | 50 | * @see UNormalizationMode |
michael@0 | 51 | * @see UCollationStrength |
michael@0 | 52 | * @see UCollationElements |
michael@0 | 53 | */ |
michael@0 | 54 | |
michael@0 | 55 | /** A collator. |
michael@0 | 56 | * For usage in C programs. |
michael@0 | 57 | */ |
michael@0 | 58 | struct UCollator; |
michael@0 | 59 | /** structure representing a collator object instance |
michael@0 | 60 | * @stable ICU 2.0 |
michael@0 | 61 | */ |
michael@0 | 62 | typedef struct UCollator UCollator; |
michael@0 | 63 | |
michael@0 | 64 | |
michael@0 | 65 | /** |
michael@0 | 66 | * UCOL_LESS is returned if source string is compared to be less than target |
michael@0 | 67 | * string in the ucol_strcoll() method. |
michael@0 | 68 | * UCOL_EQUAL is returned if source string is compared to be equal to target |
michael@0 | 69 | * string in the ucol_strcoll() method. |
michael@0 | 70 | * UCOL_GREATER is returned if source string is compared to be greater than |
michael@0 | 71 | * target string in the ucol_strcoll() method. |
michael@0 | 72 | * @see ucol_strcoll() |
michael@0 | 73 | * <p> |
michael@0 | 74 | * Possible values for a comparison result |
michael@0 | 75 | * @stable ICU 2.0 |
michael@0 | 76 | */ |
michael@0 | 77 | typedef enum { |
michael@0 | 78 | /** string a == string b */ |
michael@0 | 79 | UCOL_EQUAL = 0, |
michael@0 | 80 | /** string a > string b */ |
michael@0 | 81 | UCOL_GREATER = 1, |
michael@0 | 82 | /** string a < string b */ |
michael@0 | 83 | UCOL_LESS = -1 |
michael@0 | 84 | } UCollationResult ; |
michael@0 | 85 | |
michael@0 | 86 | |
michael@0 | 87 | /** Enum containing attribute values for controling collation behavior. |
michael@0 | 88 | * Here are all the allowable values. Not every attribute can take every value. The only |
michael@0 | 89 | * universal value is UCOL_DEFAULT, which resets the attribute value to the predefined |
michael@0 | 90 | * value for that locale |
michael@0 | 91 | * @stable ICU 2.0 |
michael@0 | 92 | */ |
michael@0 | 93 | typedef enum { |
michael@0 | 94 | /** accepted by most attributes */ |
michael@0 | 95 | UCOL_DEFAULT = -1, |
michael@0 | 96 | |
michael@0 | 97 | /** Primary collation strength */ |
michael@0 | 98 | UCOL_PRIMARY = 0, |
michael@0 | 99 | /** Secondary collation strength */ |
michael@0 | 100 | UCOL_SECONDARY = 1, |
michael@0 | 101 | /** Tertiary collation strength */ |
michael@0 | 102 | UCOL_TERTIARY = 2, |
michael@0 | 103 | /** Default collation strength */ |
michael@0 | 104 | UCOL_DEFAULT_STRENGTH = UCOL_TERTIARY, |
michael@0 | 105 | UCOL_CE_STRENGTH_LIMIT, |
michael@0 | 106 | /** Quaternary collation strength */ |
michael@0 | 107 | UCOL_QUATERNARY=3, |
michael@0 | 108 | /** Identical collation strength */ |
michael@0 | 109 | UCOL_IDENTICAL=15, |
michael@0 | 110 | UCOL_STRENGTH_LIMIT, |
michael@0 | 111 | |
michael@0 | 112 | /** Turn the feature off - works for UCOL_FRENCH_COLLATION, |
michael@0 | 113 | UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE |
michael@0 | 114 | & UCOL_DECOMPOSITION_MODE*/ |
michael@0 | 115 | UCOL_OFF = 16, |
michael@0 | 116 | /** Turn the feature on - works for UCOL_FRENCH_COLLATION, |
michael@0 | 117 | UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE |
michael@0 | 118 | & UCOL_DECOMPOSITION_MODE*/ |
michael@0 | 119 | UCOL_ON = 17, |
michael@0 | 120 | |
michael@0 | 121 | /** Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be shifted */ |
michael@0 | 122 | UCOL_SHIFTED = 20, |
michael@0 | 123 | /** Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be non ignorable */ |
michael@0 | 124 | UCOL_NON_IGNORABLE = 21, |
michael@0 | 125 | |
michael@0 | 126 | /** Valid for UCOL_CASE_FIRST - |
michael@0 | 127 | lower case sorts before upper case */ |
michael@0 | 128 | UCOL_LOWER_FIRST = 24, |
michael@0 | 129 | /** upper case sorts before lower case */ |
michael@0 | 130 | UCOL_UPPER_FIRST = 25, |
michael@0 | 131 | |
michael@0 | 132 | UCOL_ATTRIBUTE_VALUE_COUNT |
michael@0 | 133 | |
michael@0 | 134 | } UColAttributeValue; |
michael@0 | 135 | |
michael@0 | 136 | /** |
michael@0 | 137 | * Enum containing the codes for reordering segments of the collation table that are not script |
michael@0 | 138 | * codes. These reordering codes are to be used in conjunction with the script codes. |
michael@0 | 139 | * @see ucol_getReorderCodes |
michael@0 | 140 | * @see ucol_setReorderCodes |
michael@0 | 141 | * @see ucol_getEquivalentReorderCodes |
michael@0 | 142 | * @see UScriptCode |
michael@0 | 143 | * @stable ICU 4.8 |
michael@0 | 144 | */ |
michael@0 | 145 | typedef enum { |
michael@0 | 146 | /** |
michael@0 | 147 | * A special reordering code that is used to specify the default |
michael@0 | 148 | * reordering codes for a locale. |
michael@0 | 149 | * @stable ICU 4.8 |
michael@0 | 150 | */ |
michael@0 | 151 | UCOL_REORDER_CODE_DEFAULT = -1, |
michael@0 | 152 | /** |
michael@0 | 153 | * A special reordering code that is used to specify no reordering codes. |
michael@0 | 154 | * @stable ICU 4.8 |
michael@0 | 155 | */ |
michael@0 | 156 | UCOL_REORDER_CODE_NONE = USCRIPT_UNKNOWN, |
michael@0 | 157 | /** |
michael@0 | 158 | * A special reordering code that is used to specify all other codes used for |
michael@0 | 159 | * reordering except for the codes lised as UColReorderCode values and those |
michael@0 | 160 | * listed explicitly in a reordering. |
michael@0 | 161 | * @stable ICU 4.8 |
michael@0 | 162 | */ |
michael@0 | 163 | UCOL_REORDER_CODE_OTHERS = USCRIPT_UNKNOWN, |
michael@0 | 164 | /** |
michael@0 | 165 | * Characters with the space property. |
michael@0 | 166 | * This is equivalent to the rule value "space". |
michael@0 | 167 | * @stable ICU 4.8 |
michael@0 | 168 | */ |
michael@0 | 169 | UCOL_REORDER_CODE_SPACE = 0x1000, |
michael@0 | 170 | /** |
michael@0 | 171 | * The first entry in the enumeration of reordering groups. This is intended for use in |
michael@0 | 172 | * range checking and enumeration of the reorder codes. |
michael@0 | 173 | * @stable ICU 4.8 |
michael@0 | 174 | */ |
michael@0 | 175 | UCOL_REORDER_CODE_FIRST = UCOL_REORDER_CODE_SPACE, |
michael@0 | 176 | /** |
michael@0 | 177 | * Characters with the punctuation property. |
michael@0 | 178 | * This is equivalent to the rule value "punct". |
michael@0 | 179 | * @stable ICU 4.8 |
michael@0 | 180 | */ |
michael@0 | 181 | UCOL_REORDER_CODE_PUNCTUATION = 0x1001, |
michael@0 | 182 | /** |
michael@0 | 183 | * Characters with the symbol property. |
michael@0 | 184 | * This is equivalent to the rule value "symbol". |
michael@0 | 185 | * @stable ICU 4.8 |
michael@0 | 186 | */ |
michael@0 | 187 | UCOL_REORDER_CODE_SYMBOL = 0x1002, |
michael@0 | 188 | /** |
michael@0 | 189 | * Characters with the currency property. |
michael@0 | 190 | * This is equivalent to the rule value "currency". |
michael@0 | 191 | * @stable ICU 4.8 |
michael@0 | 192 | */ |
michael@0 | 193 | UCOL_REORDER_CODE_CURRENCY = 0x1003, |
michael@0 | 194 | /** |
michael@0 | 195 | * Characters with the digit property. |
michael@0 | 196 | * This is equivalent to the rule value "digit". |
michael@0 | 197 | * @stable ICU 4.8 |
michael@0 | 198 | */ |
michael@0 | 199 | UCOL_REORDER_CODE_DIGIT = 0x1004, |
michael@0 | 200 | /** |
michael@0 | 201 | * The limit of the reorder codes. This is intended for use in range checking |
michael@0 | 202 | * and enumeration of the reorder codes. |
michael@0 | 203 | * @stable ICU 4.8 |
michael@0 | 204 | */ |
michael@0 | 205 | UCOL_REORDER_CODE_LIMIT = 0x1005 |
michael@0 | 206 | } UColReorderCode; |
michael@0 | 207 | |
michael@0 | 208 | /** |
michael@0 | 209 | * Base letter represents a primary difference. Set comparison |
michael@0 | 210 | * level to UCOL_PRIMARY to ignore secondary and tertiary differences. |
michael@0 | 211 | * Use this to set the strength of a Collator object. |
michael@0 | 212 | * Example of primary difference, "abc" < "abd" |
michael@0 | 213 | * |
michael@0 | 214 | * Diacritical differences on the same base letter represent a secondary |
michael@0 | 215 | * difference. Set comparison level to UCOL_SECONDARY to ignore tertiary |
michael@0 | 216 | * differences. Use this to set the strength of a Collator object. |
michael@0 | 217 | * Example of secondary difference, "ä" >> "a". |
michael@0 | 218 | * |
michael@0 | 219 | * Uppercase and lowercase versions of the same character represents a |
michael@0 | 220 | * tertiary difference. Set comparison level to UCOL_TERTIARY to include |
michael@0 | 221 | * all comparison differences. Use this to set the strength of a Collator |
michael@0 | 222 | * object. |
michael@0 | 223 | * Example of tertiary difference, "abc" <<< "ABC". |
michael@0 | 224 | * |
michael@0 | 225 | * Two characters are considered "identical" when they have the same |
michael@0 | 226 | * unicode spellings. UCOL_IDENTICAL. |
michael@0 | 227 | * For example, "ä" == "ä". |
michael@0 | 228 | * |
michael@0 | 229 | * UCollationStrength is also used to determine the strength of sort keys |
michael@0 | 230 | * generated from UCollator objects |
michael@0 | 231 | * These values can be now found in the UColAttributeValue enum. |
michael@0 | 232 | * @stable ICU 2.0 |
michael@0 | 233 | **/ |
michael@0 | 234 | typedef UColAttributeValue UCollationStrength; |
michael@0 | 235 | |
michael@0 | 236 | /** Attributes that collation service understands. All the attributes can take UCOL_DEFAULT |
michael@0 | 237 | * value, as well as the values specific to each one. |
michael@0 | 238 | * @stable ICU 2.0 |
michael@0 | 239 | */ |
michael@0 | 240 | typedef enum { |
michael@0 | 241 | /** Attribute for direction of secondary weights - used in Canadian French. |
michael@0 | 242 | * Acceptable values are UCOL_ON, which results in secondary weights |
michael@0 | 243 | * being considered backwards and UCOL_OFF which treats secondary |
michael@0 | 244 | * weights in the order they appear. |
michael@0 | 245 | * @stable ICU 2.0 |
michael@0 | 246 | */ |
michael@0 | 247 | UCOL_FRENCH_COLLATION, |
michael@0 | 248 | /** Attribute for handling variable elements. |
michael@0 | 249 | * Acceptable values are UCOL_NON_IGNORABLE (default) |
michael@0 | 250 | * which treats all the codepoints with non-ignorable |
michael@0 | 251 | * primary weights in the same way, |
michael@0 | 252 | * and UCOL_SHIFTED which causes codepoints with primary |
michael@0 | 253 | * weights that are equal or below the variable top value |
michael@0 | 254 | * to be ignored on primary level and moved to the quaternary |
michael@0 | 255 | * level. |
michael@0 | 256 | * @stable ICU 2.0 |
michael@0 | 257 | */ |
michael@0 | 258 | UCOL_ALTERNATE_HANDLING, |
michael@0 | 259 | /** Controls the ordering of upper and lower case letters. |
michael@0 | 260 | * Acceptable values are UCOL_OFF (default), which orders |
michael@0 | 261 | * upper and lower case letters in accordance to their tertiary |
michael@0 | 262 | * weights, UCOL_UPPER_FIRST which forces upper case letters to |
michael@0 | 263 | * sort before lower case letters, and UCOL_LOWER_FIRST which does |
michael@0 | 264 | * the opposite. |
michael@0 | 265 | * @stable ICU 2.0 |
michael@0 | 266 | */ |
michael@0 | 267 | UCOL_CASE_FIRST, |
michael@0 | 268 | /** Controls whether an extra case level (positioned before the third |
michael@0 | 269 | * level) is generated or not. Acceptable values are UCOL_OFF (default), |
michael@0 | 270 | * when case level is not generated, and UCOL_ON which causes the case |
michael@0 | 271 | * level to be generated. Contents of the case level are affected by |
michael@0 | 272 | * the value of UCOL_CASE_FIRST attribute. A simple way to ignore |
michael@0 | 273 | * accent differences in a string is to set the strength to UCOL_PRIMARY |
michael@0 | 274 | * and enable case level. |
michael@0 | 275 | * @stable ICU 2.0 |
michael@0 | 276 | */ |
michael@0 | 277 | UCOL_CASE_LEVEL, |
michael@0 | 278 | /** Controls whether the normalization check and necessary normalizations |
michael@0 | 279 | * are performed. When set to UCOL_OFF (default) no normalization check |
michael@0 | 280 | * is performed. The correctness of the result is guaranteed only if the |
michael@0 | 281 | * input data is in so-called FCD form (see users manual for more info). |
michael@0 | 282 | * When set to UCOL_ON, an incremental check is performed to see whether |
michael@0 | 283 | * the input data is in the FCD form. If the data is not in the FCD form, |
michael@0 | 284 | * incremental NFD normalization is performed. |
michael@0 | 285 | * @stable ICU 2.0 |
michael@0 | 286 | */ |
michael@0 | 287 | UCOL_NORMALIZATION_MODE, |
michael@0 | 288 | /** An alias for UCOL_NORMALIZATION_MODE attribute. |
michael@0 | 289 | * @stable ICU 2.0 |
michael@0 | 290 | */ |
michael@0 | 291 | UCOL_DECOMPOSITION_MODE = UCOL_NORMALIZATION_MODE, |
michael@0 | 292 | /** The strength attribute. Can be either UCOL_PRIMARY, UCOL_SECONDARY, |
michael@0 | 293 | * UCOL_TERTIARY, UCOL_QUATERNARY or UCOL_IDENTICAL. The usual strength |
michael@0 | 294 | * for most locales (except Japanese) is tertiary. Quaternary strength |
michael@0 | 295 | * is useful when combined with shifted setting for alternate handling |
michael@0 | 296 | * attribute and for JIS x 4061 collation, when it is used to distinguish |
michael@0 | 297 | * between Katakana and Hiragana (this is achieved by setting the |
michael@0 | 298 | * UCOL_HIRAGANA_QUATERNARY mode to on. Otherwise, quaternary level |
michael@0 | 299 | * is affected only by the number of non ignorable code points in |
michael@0 | 300 | * the string. Identical strength is rarely useful, as it amounts |
michael@0 | 301 | * to codepoints of the NFD form of the string. |
michael@0 | 302 | * @stable ICU 2.0 |
michael@0 | 303 | */ |
michael@0 | 304 | UCOL_STRENGTH, |
michael@0 | 305 | #ifndef U_HIDE_DEPRECATED_API |
michael@0 | 306 | /** When turned on, this attribute positions Hiragana before all |
michael@0 | 307 | * non-ignorables on quaternary level This is a sneaky way to produce JIS |
michael@0 | 308 | * sort order. |
michael@0 | 309 | * |
michael@0 | 310 | * This attribute is an implementation detail of the CLDR Japanese tailoring. |
michael@0 | 311 | * The implementation might change to use a different mechanism |
michael@0 | 312 | * to achieve the same Japanese sort order. |
michael@0 | 313 | * Since ICU 50, this attribute is not settable any more via API functions. |
michael@0 | 314 | * @deprecated ICU 50 Implementation detail, cannot be set via API, might be removed from implementation. |
michael@0 | 315 | */ |
michael@0 | 316 | UCOL_HIRAGANA_QUATERNARY_MODE = UCOL_STRENGTH + 1, |
michael@0 | 317 | #endif /* U_HIDE_DEPRECATED_API */ |
michael@0 | 318 | /** When turned on, this attribute generates a collation key |
michael@0 | 319 | * for the numeric value of substrings of digits. |
michael@0 | 320 | * This is a way to get '100' to sort AFTER '2'. Note that the longest |
michael@0 | 321 | * digit substring that can be treated as a single collation element is |
michael@0 | 322 | * 254 digits (not counting leading zeros). If a digit substring is |
michael@0 | 323 | * longer than that, the digits beyond the limit will be treated as a |
michael@0 | 324 | * separate digit substring associated with a separate collation element. |
michael@0 | 325 | * @stable ICU 2.8 |
michael@0 | 326 | */ |
michael@0 | 327 | UCOL_NUMERIC_COLLATION = UCOL_STRENGTH + 2, |
michael@0 | 328 | /** |
michael@0 | 329 | * The number of UColAttribute constants. |
michael@0 | 330 | * @stable ICU 2.0 |
michael@0 | 331 | */ |
michael@0 | 332 | UCOL_ATTRIBUTE_COUNT |
michael@0 | 333 | } UColAttribute; |
michael@0 | 334 | |
michael@0 | 335 | /** Options for retrieving the rule string |
michael@0 | 336 | * @stable ICU 2.0 |
michael@0 | 337 | */ |
michael@0 | 338 | typedef enum { |
michael@0 | 339 | /** |
michael@0 | 340 | * Retrieves the tailoring rules only. |
michael@0 | 341 | * Same as calling the version of getRules() without UColRuleOption. |
michael@0 | 342 | * @stable ICU 2.0 |
michael@0 | 343 | */ |
michael@0 | 344 | UCOL_TAILORING_ONLY, |
michael@0 | 345 | /** |
michael@0 | 346 | * Retrieves the "UCA rules" concatenated with the tailoring rules. |
michael@0 | 347 | * The "UCA rules" are an <i>approximation</i> of the root collator's sort order. |
michael@0 | 348 | * They are almost never used or useful at runtime and can be removed from the data. |
michael@0 | 349 | * See http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales |
michael@0 | 350 | * @stable ICU 2.0 |
michael@0 | 351 | */ |
michael@0 | 352 | UCOL_FULL_RULES |
michael@0 | 353 | } UColRuleOption ; |
michael@0 | 354 | |
michael@0 | 355 | /** |
michael@0 | 356 | * Open a UCollator for comparing strings. |
michael@0 | 357 | * The UCollator pointer is used in all the calls to the Collation |
michael@0 | 358 | * service. After finished, collator must be disposed of by calling |
michael@0 | 359 | * {@link #ucol_close }. |
michael@0 | 360 | * @param loc The locale containing the required collation rules. |
michael@0 | 361 | * Special values for locales can be passed in - |
michael@0 | 362 | * if NULL is passed for the locale, the default locale |
michael@0 | 363 | * collation rules will be used. If empty string ("") or |
michael@0 | 364 | * "root" are passed, UCA rules will be used. |
michael@0 | 365 | * @param status A pointer to an UErrorCode to receive any errors |
michael@0 | 366 | * @return A pointer to a UCollator, or 0 if an error occurred. |
michael@0 | 367 | * @see ucol_openRules |
michael@0 | 368 | * @see ucol_safeClone |
michael@0 | 369 | * @see ucol_close |
michael@0 | 370 | * @stable ICU 2.0 |
michael@0 | 371 | */ |
michael@0 | 372 | U_STABLE UCollator* U_EXPORT2 |
michael@0 | 373 | ucol_open(const char *loc, UErrorCode *status); |
michael@0 | 374 | |
michael@0 | 375 | /** |
michael@0 | 376 | * Produce an UCollator instance according to the rules supplied. |
michael@0 | 377 | * The rules are used to change the default ordering, defined in the |
michael@0 | 378 | * UCA in a process called tailoring. The resulting UCollator pointer |
michael@0 | 379 | * can be used in the same way as the one obtained by {@link #ucol_strcoll }. |
michael@0 | 380 | * @param rules A string describing the collation rules. For the syntax |
michael@0 | 381 | * of the rules please see users guide. |
michael@0 | 382 | * @param rulesLength The length of rules, or -1 if null-terminated. |
michael@0 | 383 | * @param normalizationMode The normalization mode: One of |
michael@0 | 384 | * UCOL_OFF (expect the text to not need normalization), |
michael@0 | 385 | * UCOL_ON (normalize), or |
michael@0 | 386 | * UCOL_DEFAULT (set the mode according to the rules) |
michael@0 | 387 | * @param strength The default collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY, |
michael@0 | 388 | * UCOL_TERTIARY, UCOL_IDENTICAL,UCOL_DEFAULT_STRENGTH - can be also set in the rules. |
michael@0 | 389 | * @param parseError A pointer to UParseError to recieve information about errors |
michael@0 | 390 | * occurred during parsing. This argument can currently be set |
michael@0 | 391 | * to NULL, but at users own risk. Please provide a real structure. |
michael@0 | 392 | * @param status A pointer to an UErrorCode to receive any errors |
michael@0 | 393 | * @return A pointer to a UCollator. It is not guaranteed that NULL be returned in case |
michael@0 | 394 | * of error - please use status argument to check for errors. |
michael@0 | 395 | * @see ucol_open |
michael@0 | 396 | * @see ucol_safeClone |
michael@0 | 397 | * @see ucol_close |
michael@0 | 398 | * @stable ICU 2.0 |
michael@0 | 399 | */ |
michael@0 | 400 | U_STABLE UCollator* U_EXPORT2 |
michael@0 | 401 | ucol_openRules( const UChar *rules, |
michael@0 | 402 | int32_t rulesLength, |
michael@0 | 403 | UColAttributeValue normalizationMode, |
michael@0 | 404 | UCollationStrength strength, |
michael@0 | 405 | UParseError *parseError, |
michael@0 | 406 | UErrorCode *status); |
michael@0 | 407 | |
michael@0 | 408 | /** |
michael@0 | 409 | * Open a collator defined by a short form string. |
michael@0 | 410 | * The structure and the syntax of the string is defined in the "Naming collators" |
michael@0 | 411 | * section of the users guide: |
michael@0 | 412 | * http://icu-project.org/userguide/Collate_Concepts.html#Naming_Collators |
michael@0 | 413 | * Attributes are overriden by the subsequent attributes. So, for "S2_S3", final |
michael@0 | 414 | * strength will be 3. 3066bis locale overrides individual locale parts. |
michael@0 | 415 | * The call to this function is equivalent to a call to ucol_open, followed by a |
michael@0 | 416 | * series of calls to ucol_setAttribute and ucol_setVariableTop. |
michael@0 | 417 | * @param definition A short string containing a locale and a set of attributes. |
michael@0 | 418 | * Attributes not explicitly mentioned are left at the default |
michael@0 | 419 | * state for a locale. |
michael@0 | 420 | * @param parseError if not NULL, structure that will get filled with error's pre |
michael@0 | 421 | * and post context in case of error. |
michael@0 | 422 | * @param forceDefaults if FALSE, the settings that are the same as the collator |
michael@0 | 423 | * default settings will not be applied (for example, setting |
michael@0 | 424 | * French secondary on a French collator would not be executed). |
michael@0 | 425 | * If TRUE, all the settings will be applied regardless of the |
michael@0 | 426 | * collator default value. If the definition |
michael@0 | 427 | * strings are to be cached, should be set to FALSE. |
michael@0 | 428 | * @param status Error code. Apart from regular error conditions connected to |
michael@0 | 429 | * instantiating collators (like out of memory or similar), this |
michael@0 | 430 | * API will return an error if an invalid attribute or attribute/value |
michael@0 | 431 | * combination is specified. |
michael@0 | 432 | * @return A pointer to a UCollator or 0 if an error occured (including an |
michael@0 | 433 | * invalid attribute). |
michael@0 | 434 | * @see ucol_open |
michael@0 | 435 | * @see ucol_setAttribute |
michael@0 | 436 | * @see ucol_setVariableTop |
michael@0 | 437 | * @see ucol_getShortDefinitionString |
michael@0 | 438 | * @see ucol_normalizeShortDefinitionString |
michael@0 | 439 | * @stable ICU 3.0 |
michael@0 | 440 | * |
michael@0 | 441 | */ |
michael@0 | 442 | U_STABLE UCollator* U_EXPORT2 |
michael@0 | 443 | ucol_openFromShortString( const char *definition, |
michael@0 | 444 | UBool forceDefaults, |
michael@0 | 445 | UParseError *parseError, |
michael@0 | 446 | UErrorCode *status); |
michael@0 | 447 | |
michael@0 | 448 | #ifndef U_HIDE_DEPRECATED_API |
michael@0 | 449 | /** |
michael@0 | 450 | * Get a set containing the contractions defined by the collator. The set includes |
michael@0 | 451 | * both the UCA contractions and the contractions defined by the collator. This set |
michael@0 | 452 | * will contain only strings. If a tailoring explicitly suppresses contractions from |
michael@0 | 453 | * the UCA (like Russian), removed contractions will not be in the resulting set. |
michael@0 | 454 | * @param coll collator |
michael@0 | 455 | * @param conts the set to hold the result. It gets emptied before |
michael@0 | 456 | * contractions are added. |
michael@0 | 457 | * @param status to hold the error code |
michael@0 | 458 | * @return the size of the contraction set |
michael@0 | 459 | * |
michael@0 | 460 | * @deprecated ICU 3.4, use ucol_getContractionsAndExpansions instead |
michael@0 | 461 | */ |
michael@0 | 462 | U_DEPRECATED int32_t U_EXPORT2 |
michael@0 | 463 | ucol_getContractions( const UCollator *coll, |
michael@0 | 464 | USet *conts, |
michael@0 | 465 | UErrorCode *status); |
michael@0 | 466 | #endif /* U_HIDE_DEPRECATED_API */ |
michael@0 | 467 | |
michael@0 | 468 | /** |
michael@0 | 469 | * Get a set containing the expansions defined by the collator. The set includes |
michael@0 | 470 | * both the UCA expansions and the expansions defined by the tailoring |
michael@0 | 471 | * @param coll collator |
michael@0 | 472 | * @param contractions if not NULL, the set to hold the contractions |
michael@0 | 473 | * @param expansions if not NULL, the set to hold the expansions |
michael@0 | 474 | * @param addPrefixes add the prefix contextual elements to contractions |
michael@0 | 475 | * @param status to hold the error code |
michael@0 | 476 | * |
michael@0 | 477 | * @stable ICU 3.4 |
michael@0 | 478 | */ |
michael@0 | 479 | U_STABLE void U_EXPORT2 |
michael@0 | 480 | ucol_getContractionsAndExpansions( const UCollator *coll, |
michael@0 | 481 | USet *contractions, USet *expansions, |
michael@0 | 482 | UBool addPrefixes, UErrorCode *status); |
michael@0 | 483 | |
michael@0 | 484 | /** |
michael@0 | 485 | * Close a UCollator. |
michael@0 | 486 | * Once closed, a UCollator should not be used. Every open collator should |
michael@0 | 487 | * be closed. Otherwise, a memory leak will result. |
michael@0 | 488 | * @param coll The UCollator to close. |
michael@0 | 489 | * @see ucol_open |
michael@0 | 490 | * @see ucol_openRules |
michael@0 | 491 | * @see ucol_safeClone |
michael@0 | 492 | * @stable ICU 2.0 |
michael@0 | 493 | */ |
michael@0 | 494 | U_STABLE void U_EXPORT2 |
michael@0 | 495 | ucol_close(UCollator *coll); |
michael@0 | 496 | |
michael@0 | 497 | #if U_SHOW_CPLUSPLUS_API |
michael@0 | 498 | |
michael@0 | 499 | U_NAMESPACE_BEGIN |
michael@0 | 500 | |
michael@0 | 501 | /** |
michael@0 | 502 | * \class LocalUCollatorPointer |
michael@0 | 503 | * "Smart pointer" class, closes a UCollator via ucol_close(). |
michael@0 | 504 | * For most methods see the LocalPointerBase base class. |
michael@0 | 505 | * |
michael@0 | 506 | * @see LocalPointerBase |
michael@0 | 507 | * @see LocalPointer |
michael@0 | 508 | * @stable ICU 4.4 |
michael@0 | 509 | */ |
michael@0 | 510 | U_DEFINE_LOCAL_OPEN_POINTER(LocalUCollatorPointer, UCollator, ucol_close); |
michael@0 | 511 | |
michael@0 | 512 | U_NAMESPACE_END |
michael@0 | 513 | |
michael@0 | 514 | #endif |
michael@0 | 515 | |
michael@0 | 516 | /** |
michael@0 | 517 | * Compare two strings. |
michael@0 | 518 | * The strings will be compared using the options already specified. |
michael@0 | 519 | * @param coll The UCollator containing the comparison rules. |
michael@0 | 520 | * @param source The source string. |
michael@0 | 521 | * @param sourceLength The length of source, or -1 if null-terminated. |
michael@0 | 522 | * @param target The target string. |
michael@0 | 523 | * @param targetLength The length of target, or -1 if null-terminated. |
michael@0 | 524 | * @return The result of comparing the strings; one of UCOL_EQUAL, |
michael@0 | 525 | * UCOL_GREATER, UCOL_LESS |
michael@0 | 526 | * @see ucol_greater |
michael@0 | 527 | * @see ucol_greaterOrEqual |
michael@0 | 528 | * @see ucol_equal |
michael@0 | 529 | * @stable ICU 2.0 |
michael@0 | 530 | */ |
michael@0 | 531 | U_STABLE UCollationResult U_EXPORT2 |
michael@0 | 532 | ucol_strcoll( const UCollator *coll, |
michael@0 | 533 | const UChar *source, |
michael@0 | 534 | int32_t sourceLength, |
michael@0 | 535 | const UChar *target, |
michael@0 | 536 | int32_t targetLength); |
michael@0 | 537 | |
michael@0 | 538 | /** |
michael@0 | 539 | * Compare two strings in UTF-8. |
michael@0 | 540 | * The strings will be compared using the options already specified. |
michael@0 | 541 | * Note: When input string contains malformed a UTF-8 byte sequence, |
michael@0 | 542 | * this function treats these bytes as REPLACEMENT CHARACTER (U+FFFD). |
michael@0 | 543 | * @param coll The UCollator containing the comparison rules. |
michael@0 | 544 | * @param source The source UTF-8 string. |
michael@0 | 545 | * @param sourceLength The length of source, or -1 if null-terminated. |
michael@0 | 546 | * @param target The target UTF-8 string. |
michael@0 | 547 | * @param targetLength The length of target, or -1 if null-terminated. |
michael@0 | 548 | * @param status A pointer to an UErrorCode to receive any errors |
michael@0 | 549 | * @return The result of comparing the strings; one of UCOL_EQUAL, |
michael@0 | 550 | * UCOL_GREATER, UCOL_LESS |
michael@0 | 551 | * @see ucol_greater |
michael@0 | 552 | * @see ucol_greaterOrEqual |
michael@0 | 553 | * @see ucol_equal |
michael@0 | 554 | * @stable ICU 50 |
michael@0 | 555 | */ |
michael@0 | 556 | U_STABLE UCollationResult U_EXPORT2 |
michael@0 | 557 | ucol_strcollUTF8( |
michael@0 | 558 | const UCollator *coll, |
michael@0 | 559 | const char *source, |
michael@0 | 560 | int32_t sourceLength, |
michael@0 | 561 | const char *target, |
michael@0 | 562 | int32_t targetLength, |
michael@0 | 563 | UErrorCode *status); |
michael@0 | 564 | |
michael@0 | 565 | /** |
michael@0 | 566 | * Determine if one string is greater than another. |
michael@0 | 567 | * This function is equivalent to {@link #ucol_strcoll } == UCOL_GREATER |
michael@0 | 568 | * @param coll The UCollator containing the comparison rules. |
michael@0 | 569 | * @param source The source string. |
michael@0 | 570 | * @param sourceLength The length of source, or -1 if null-terminated. |
michael@0 | 571 | * @param target The target string. |
michael@0 | 572 | * @param targetLength The length of target, or -1 if null-terminated. |
michael@0 | 573 | * @return TRUE if source is greater than target, FALSE otherwise. |
michael@0 | 574 | * @see ucol_strcoll |
michael@0 | 575 | * @see ucol_greaterOrEqual |
michael@0 | 576 | * @see ucol_equal |
michael@0 | 577 | * @stable ICU 2.0 |
michael@0 | 578 | */ |
michael@0 | 579 | U_STABLE UBool U_EXPORT2 |
michael@0 | 580 | ucol_greater(const UCollator *coll, |
michael@0 | 581 | const UChar *source, int32_t sourceLength, |
michael@0 | 582 | const UChar *target, int32_t targetLength); |
michael@0 | 583 | |
michael@0 | 584 | /** |
michael@0 | 585 | * Determine if one string is greater than or equal to another. |
michael@0 | 586 | * This function is equivalent to {@link #ucol_strcoll } != UCOL_LESS |
michael@0 | 587 | * @param coll The UCollator containing the comparison rules. |
michael@0 | 588 | * @param source The source string. |
michael@0 | 589 | * @param sourceLength The length of source, or -1 if null-terminated. |
michael@0 | 590 | * @param target The target string. |
michael@0 | 591 | * @param targetLength The length of target, or -1 if null-terminated. |
michael@0 | 592 | * @return TRUE if source is greater than or equal to target, FALSE otherwise. |
michael@0 | 593 | * @see ucol_strcoll |
michael@0 | 594 | * @see ucol_greater |
michael@0 | 595 | * @see ucol_equal |
michael@0 | 596 | * @stable ICU 2.0 |
michael@0 | 597 | */ |
michael@0 | 598 | U_STABLE UBool U_EXPORT2 |
michael@0 | 599 | ucol_greaterOrEqual(const UCollator *coll, |
michael@0 | 600 | const UChar *source, int32_t sourceLength, |
michael@0 | 601 | const UChar *target, int32_t targetLength); |
michael@0 | 602 | |
michael@0 | 603 | /** |
michael@0 | 604 | * Compare two strings for equality. |
michael@0 | 605 | * This function is equivalent to {@link #ucol_strcoll } == UCOL_EQUAL |
michael@0 | 606 | * @param coll The UCollator containing the comparison rules. |
michael@0 | 607 | * @param source The source string. |
michael@0 | 608 | * @param sourceLength The length of source, or -1 if null-terminated. |
michael@0 | 609 | * @param target The target string. |
michael@0 | 610 | * @param targetLength The length of target, or -1 if null-terminated. |
michael@0 | 611 | * @return TRUE if source is equal to target, FALSE otherwise |
michael@0 | 612 | * @see ucol_strcoll |
michael@0 | 613 | * @see ucol_greater |
michael@0 | 614 | * @see ucol_greaterOrEqual |
michael@0 | 615 | * @stable ICU 2.0 |
michael@0 | 616 | */ |
michael@0 | 617 | U_STABLE UBool U_EXPORT2 |
michael@0 | 618 | ucol_equal(const UCollator *coll, |
michael@0 | 619 | const UChar *source, int32_t sourceLength, |
michael@0 | 620 | const UChar *target, int32_t targetLength); |
michael@0 | 621 | |
michael@0 | 622 | /** |
michael@0 | 623 | * Compare two UTF-8 encoded trings. |
michael@0 | 624 | * The strings will be compared using the options already specified. |
michael@0 | 625 | * @param coll The UCollator containing the comparison rules. |
michael@0 | 626 | * @param sIter The source string iterator. |
michael@0 | 627 | * @param tIter The target string iterator. |
michael@0 | 628 | * @return The result of comparing the strings; one of UCOL_EQUAL, |
michael@0 | 629 | * UCOL_GREATER, UCOL_LESS |
michael@0 | 630 | * @param status A pointer to an UErrorCode to receive any errors |
michael@0 | 631 | * @see ucol_strcoll |
michael@0 | 632 | * @stable ICU 2.6 |
michael@0 | 633 | */ |
michael@0 | 634 | U_STABLE UCollationResult U_EXPORT2 |
michael@0 | 635 | ucol_strcollIter( const UCollator *coll, |
michael@0 | 636 | UCharIterator *sIter, |
michael@0 | 637 | UCharIterator *tIter, |
michael@0 | 638 | UErrorCode *status); |
michael@0 | 639 | |
michael@0 | 640 | /** |
michael@0 | 641 | * Get the collation strength used in a UCollator. |
michael@0 | 642 | * The strength influences how strings are compared. |
michael@0 | 643 | * @param coll The UCollator to query. |
michael@0 | 644 | * @return The collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY, |
michael@0 | 645 | * UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL |
michael@0 | 646 | * @see ucol_setStrength |
michael@0 | 647 | * @stable ICU 2.0 |
michael@0 | 648 | */ |
michael@0 | 649 | U_STABLE UCollationStrength U_EXPORT2 |
michael@0 | 650 | ucol_getStrength(const UCollator *coll); |
michael@0 | 651 | |
michael@0 | 652 | /** |
michael@0 | 653 | * Set the collation strength used in a UCollator. |
michael@0 | 654 | * The strength influences how strings are compared. |
michael@0 | 655 | * @param coll The UCollator to set. |
michael@0 | 656 | * @param strength The desired collation strength; one of UCOL_PRIMARY, |
michael@0 | 657 | * UCOL_SECONDARY, UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL, UCOL_DEFAULT |
michael@0 | 658 | * @see ucol_getStrength |
michael@0 | 659 | * @stable ICU 2.0 |
michael@0 | 660 | */ |
michael@0 | 661 | U_STABLE void U_EXPORT2 |
michael@0 | 662 | ucol_setStrength(UCollator *coll, |
michael@0 | 663 | UCollationStrength strength); |
michael@0 | 664 | |
michael@0 | 665 | /** |
michael@0 | 666 | * Retrieves the reordering codes for this collator. |
michael@0 | 667 | * These reordering codes are a combination of UScript codes and UColReorderCode entries. |
michael@0 | 668 | * @param coll The UCollator to query. |
michael@0 | 669 | * @param dest The array to fill with the script ordering. |
michael@0 | 670 | * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function |
michael@0 | 671 | * will only return the length of the result without writing any of the result string (pre-flighting). |
michael@0 | 672 | * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate a |
michael@0 | 673 | * failure before the function call. |
michael@0 | 674 | * @return The number of reordering codes written to the dest array. |
michael@0 | 675 | * @see ucol_setReorderCodes |
michael@0 | 676 | * @see ucol_getEquivalentReorderCodes |
michael@0 | 677 | * @see UScriptCode |
michael@0 | 678 | * @see UColReorderCode |
michael@0 | 679 | * @stable ICU 4.8 |
michael@0 | 680 | */ |
michael@0 | 681 | U_STABLE int32_t U_EXPORT2 |
michael@0 | 682 | ucol_getReorderCodes(const UCollator* coll, |
michael@0 | 683 | int32_t* dest, |
michael@0 | 684 | int32_t destCapacity, |
michael@0 | 685 | UErrorCode *pErrorCode); |
michael@0 | 686 | /** |
michael@0 | 687 | * Sets the reordering codes for this collator. |
michael@0 | 688 | * Collation reordering allows scripts and some other defined blocks of characters |
michael@0 | 689 | * to be moved relative to each other as a block. This reordering is done on top of |
michael@0 | 690 | * the DUCET/CLDR standard collation order. Reordering can specify groups to be placed |
michael@0 | 691 | * at the start and/or the end of the collation order. These groups are specified using |
michael@0 | 692 | * UScript codes and UColReorderCode entries. |
michael@0 | 693 | * <p>By default, reordering codes specified for the start of the order are placed in the |
michael@0 | 694 | * order given after a group of "special" non-script blocks. These special groups of characters |
michael@0 | 695 | * are space, punctuation, symbol, currency, and digit. These special groups are represented with |
michael@0 | 696 | * UColReorderCode entries. Script groups can be intermingled with |
michael@0 | 697 | * these special non-script blocks if those special blocks are explicitly specified in the reordering. |
michael@0 | 698 | * <p>The special code OTHERS stands for any script that is not explicitly |
michael@0 | 699 | * mentioned in the list of reordering codes given. Anything that is after OTHERS |
michael@0 | 700 | * will go at the very end of the reordering in the order given. |
michael@0 | 701 | * <p>The special reorder code DEFAULT will reset the reordering for this collator |
michael@0 | 702 | * to the default for this collator. The default reordering may be the DUCET/CLDR order or may be a reordering that |
michael@0 | 703 | * was specified when this collator was created from resource data or from rules. The |
michael@0 | 704 | * DEFAULT code <b>must</b> be the sole code supplied when it used. If not |
michael@0 | 705 | * that will result in an U_ILLEGAL_ARGUMENT_ERROR being set. |
michael@0 | 706 | * <p>The special reorder code NONE will remove any reordering for this collator. |
michael@0 | 707 | * The result of setting no reordering will be to have the DUCET/CLDR ordering used. The |
michael@0 | 708 | * NONE code <b>must</b> be the sole code supplied when it used. |
michael@0 | 709 | * @param coll The UCollator to set. |
michael@0 | 710 | * @param reorderCodes An array of script codes in the new order. This can be NULL if the |
michael@0 | 711 | * length is also set to 0. An empty array will clear any reordering codes on the collator. |
michael@0 | 712 | * @param reorderCodesLength The length of reorderCodes. |
michael@0 | 713 | * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate a |
michael@0 | 714 | * failure before the function call. |
michael@0 | 715 | * @see ucol_getReorderCodes |
michael@0 | 716 | * @see ucol_getEquivalentReorderCodes |
michael@0 | 717 | * @see UScriptCode |
michael@0 | 718 | * @see UColReorderCode |
michael@0 | 719 | * @stable ICU 4.8 |
michael@0 | 720 | */ |
michael@0 | 721 | U_STABLE void U_EXPORT2 |
michael@0 | 722 | ucol_setReorderCodes(UCollator* coll, |
michael@0 | 723 | const int32_t* reorderCodes, |
michael@0 | 724 | int32_t reorderCodesLength, |
michael@0 | 725 | UErrorCode *pErrorCode); |
michael@0 | 726 | |
michael@0 | 727 | /** |
michael@0 | 728 | * Retrieves the reorder codes that are grouped with the given reorder code. Some reorder |
michael@0 | 729 | * codes will be grouped and must reorder together. |
michael@0 | 730 | * @param reorderCode The reorder code to determine equivalence for. |
michael@0 | 731 | * @param dest The array to fill with the script ordering. |
michael@0 | 732 | * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function |
michael@0 | 733 | * will only return the length of the result without writing any of the result string (pre-flighting). |
michael@0 | 734 | * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate |
michael@0 | 735 | * a failure before the function call. |
michael@0 | 736 | * @return The number of reordering codes written to the dest array. |
michael@0 | 737 | * @see ucol_setReorderCodes |
michael@0 | 738 | * @see ucol_getReorderCodes |
michael@0 | 739 | * @see UScriptCode |
michael@0 | 740 | * @see UColReorderCode |
michael@0 | 741 | * @stable ICU 4.8 |
michael@0 | 742 | */ |
michael@0 | 743 | U_STABLE int32_t U_EXPORT2 |
michael@0 | 744 | ucol_getEquivalentReorderCodes(int32_t reorderCode, |
michael@0 | 745 | int32_t* dest, |
michael@0 | 746 | int32_t destCapacity, |
michael@0 | 747 | UErrorCode *pErrorCode); |
michael@0 | 748 | |
michael@0 | 749 | /** |
michael@0 | 750 | * Get the display name for a UCollator. |
michael@0 | 751 | * The display name is suitable for presentation to a user. |
michael@0 | 752 | * @param objLoc The locale of the collator in question. |
michael@0 | 753 | * @param dispLoc The locale for display. |
michael@0 | 754 | * @param result A pointer to a buffer to receive the attribute. |
michael@0 | 755 | * @param resultLength The maximum size of result. |
michael@0 | 756 | * @param status A pointer to an UErrorCode to receive any errors |
michael@0 | 757 | * @return The total buffer size needed; if greater than resultLength, |
michael@0 | 758 | * the output was truncated. |
michael@0 | 759 | * @stable ICU 2.0 |
michael@0 | 760 | */ |
michael@0 | 761 | U_STABLE int32_t U_EXPORT2 |
michael@0 | 762 | ucol_getDisplayName( const char *objLoc, |
michael@0 | 763 | const char *dispLoc, |
michael@0 | 764 | UChar *result, |
michael@0 | 765 | int32_t resultLength, |
michael@0 | 766 | UErrorCode *status); |
michael@0 | 767 | |
michael@0 | 768 | /** |
michael@0 | 769 | * Get a locale for which collation rules are available. |
michael@0 | 770 | * A UCollator in a locale returned by this function will perform the correct |
michael@0 | 771 | * collation for the locale. |
michael@0 | 772 | * @param localeIndex The index of the desired locale. |
michael@0 | 773 | * @return A locale for which collation rules are available, or 0 if none. |
michael@0 | 774 | * @see ucol_countAvailable |
michael@0 | 775 | * @stable ICU 2.0 |
michael@0 | 776 | */ |
michael@0 | 777 | U_STABLE const char* U_EXPORT2 |
michael@0 | 778 | ucol_getAvailable(int32_t localeIndex); |
michael@0 | 779 | |
michael@0 | 780 | /** |
michael@0 | 781 | * Determine how many locales have collation rules available. |
michael@0 | 782 | * This function is most useful as determining the loop ending condition for |
michael@0 | 783 | * calls to {@link #ucol_getAvailable }. |
michael@0 | 784 | * @return The number of locales for which collation rules are available. |
michael@0 | 785 | * @see ucol_getAvailable |
michael@0 | 786 | * @stable ICU 2.0 |
michael@0 | 787 | */ |
michael@0 | 788 | U_STABLE int32_t U_EXPORT2 |
michael@0 | 789 | ucol_countAvailable(void); |
michael@0 | 790 | |
michael@0 | 791 | #if !UCONFIG_NO_SERVICE |
michael@0 | 792 | /** |
michael@0 | 793 | * Create a string enumerator of all locales for which a valid |
michael@0 | 794 | * collator may be opened. |
michael@0 | 795 | * @param status input-output error code |
michael@0 | 796 | * @return a string enumeration over locale strings. The caller is |
michael@0 | 797 | * responsible for closing the result. |
michael@0 | 798 | * @stable ICU 3.0 |
michael@0 | 799 | */ |
michael@0 | 800 | U_STABLE UEnumeration* U_EXPORT2 |
michael@0 | 801 | ucol_openAvailableLocales(UErrorCode *status); |
michael@0 | 802 | #endif |
michael@0 | 803 | |
michael@0 | 804 | /** |
michael@0 | 805 | * Create a string enumerator of all possible keywords that are relevant to |
michael@0 | 806 | * collation. At this point, the only recognized keyword for this |
michael@0 | 807 | * service is "collation". |
michael@0 | 808 | * @param status input-output error code |
michael@0 | 809 | * @return a string enumeration over locale strings. The caller is |
michael@0 | 810 | * responsible for closing the result. |
michael@0 | 811 | * @stable ICU 3.0 |
michael@0 | 812 | */ |
michael@0 | 813 | U_STABLE UEnumeration* U_EXPORT2 |
michael@0 | 814 | ucol_getKeywords(UErrorCode *status); |
michael@0 | 815 | |
michael@0 | 816 | /** |
michael@0 | 817 | * Given a keyword, create a string enumeration of all values |
michael@0 | 818 | * for that keyword that are currently in use. |
michael@0 | 819 | * @param keyword a particular keyword as enumerated by |
michael@0 | 820 | * ucol_getKeywords. If any other keyword is passed in, *status is set |
michael@0 | 821 | * to U_ILLEGAL_ARGUMENT_ERROR. |
michael@0 | 822 | * @param status input-output error code |
michael@0 | 823 | * @return a string enumeration over collation keyword values, or NULL |
michael@0 | 824 | * upon error. The caller is responsible for closing the result. |
michael@0 | 825 | * @stable ICU 3.0 |
michael@0 | 826 | */ |
michael@0 | 827 | U_STABLE UEnumeration* U_EXPORT2 |
michael@0 | 828 | ucol_getKeywordValues(const char *keyword, UErrorCode *status); |
michael@0 | 829 | |
michael@0 | 830 | /** |
michael@0 | 831 | * Given a key and a locale, returns an array of string values in a preferred |
michael@0 | 832 | * order that would make a difference. These are all and only those values where |
michael@0 | 833 | * the open (creation) of the service with the locale formed from the input locale |
michael@0 | 834 | * plus input keyword and that value has different behavior than creation with the |
michael@0 | 835 | * input locale alone. |
michael@0 | 836 | * @param key one of the keys supported by this service. For now, only |
michael@0 | 837 | * "collation" is supported. |
michael@0 | 838 | * @param locale the locale |
michael@0 | 839 | * @param commonlyUsed if set to true it will return only commonly used values |
michael@0 | 840 | * with the given locale in preferred order. Otherwise, |
michael@0 | 841 | * it will return all the available values for the locale. |
michael@0 | 842 | * @param status error status |
michael@0 | 843 | * @return a string enumeration over keyword values for the given key and the locale. |
michael@0 | 844 | * @stable ICU 4.2 |
michael@0 | 845 | */ |
michael@0 | 846 | U_STABLE UEnumeration* U_EXPORT2 |
michael@0 | 847 | ucol_getKeywordValuesForLocale(const char* key, |
michael@0 | 848 | const char* locale, |
michael@0 | 849 | UBool commonlyUsed, |
michael@0 | 850 | UErrorCode* status); |
michael@0 | 851 | |
michael@0 | 852 | /** |
michael@0 | 853 | * Return the functionally equivalent locale for the given |
michael@0 | 854 | * requested locale, with respect to given keyword, for the |
michael@0 | 855 | * collation service. If two locales return the same result, then |
michael@0 | 856 | * collators instantiated for these locales will behave |
michael@0 | 857 | * equivalently. The converse is not always true; two collators |
michael@0 | 858 | * may in fact be equivalent, but return different results, due to |
michael@0 | 859 | * internal details. The return result has no other meaning than |
michael@0 | 860 | * that stated above, and implies nothing as to the relationship |
michael@0 | 861 | * between the two locales. This is intended for use by |
michael@0 | 862 | * applications who wish to cache collators, or otherwise reuse |
michael@0 | 863 | * collators when possible. The functional equivalent may change |
michael@0 | 864 | * over time. For more information, please see the <a |
michael@0 | 865 | * href="http://icu-project.org/userguide/locale.html#services"> |
michael@0 | 866 | * Locales and Services</a> section of the ICU User Guide. |
michael@0 | 867 | * @param result fillin for the functionally equivalent locale |
michael@0 | 868 | * @param resultCapacity capacity of the fillin buffer |
michael@0 | 869 | * @param keyword a particular keyword as enumerated by |
michael@0 | 870 | * ucol_getKeywords. |
michael@0 | 871 | * @param locale the requested locale |
michael@0 | 872 | * @param isAvailable if non-NULL, pointer to a fillin parameter that |
michael@0 | 873 | * indicates whether the requested locale was 'available' to the |
michael@0 | 874 | * collation service. A locale is defined as 'available' if it |
michael@0 | 875 | * physically exists within the collation locale data. |
michael@0 | 876 | * @param status pointer to input-output error code |
michael@0 | 877 | * @return the actual buffer size needed for the locale. If greater |
michael@0 | 878 | * than resultCapacity, the returned full name will be truncated and |
michael@0 | 879 | * an error code will be returned. |
michael@0 | 880 | * @stable ICU 3.0 |
michael@0 | 881 | */ |
michael@0 | 882 | U_STABLE int32_t U_EXPORT2 |
michael@0 | 883 | ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity, |
michael@0 | 884 | const char* keyword, const char* locale, |
michael@0 | 885 | UBool* isAvailable, UErrorCode* status); |
michael@0 | 886 | |
michael@0 | 887 | /** |
michael@0 | 888 | * Get the collation tailoring rules from a UCollator. |
michael@0 | 889 | * The rules will follow the rule syntax. |
michael@0 | 890 | * @param coll The UCollator to query. |
michael@0 | 891 | * @param length |
michael@0 | 892 | * @return The collation tailoring rules. |
michael@0 | 893 | * @stable ICU 2.0 |
michael@0 | 894 | */ |
michael@0 | 895 | U_STABLE const UChar* U_EXPORT2 |
michael@0 | 896 | ucol_getRules( const UCollator *coll, |
michael@0 | 897 | int32_t *length); |
michael@0 | 898 | |
michael@0 | 899 | /** Get the short definition string for a collator. This API harvests the collator's |
michael@0 | 900 | * locale and the attribute set and produces a string that can be used for opening |
michael@0 | 901 | * a collator with the same properties using the ucol_openFromShortString API. |
michael@0 | 902 | * This string will be normalized. |
michael@0 | 903 | * The structure and the syntax of the string is defined in the "Naming collators" |
michael@0 | 904 | * section of the users guide: |
michael@0 | 905 | * http://icu-project.org/userguide/Collate_Concepts.html#Naming_Collators |
michael@0 | 906 | * This API supports preflighting. |
michael@0 | 907 | * @param coll a collator |
michael@0 | 908 | * @param locale a locale that will appear as a collators locale in the resulting |
michael@0 | 909 | * short string definition. If NULL, the locale will be harvested |
michael@0 | 910 | * from the collator. |
michael@0 | 911 | * @param buffer space to hold the resulting string |
michael@0 | 912 | * @param capacity capacity of the buffer |
michael@0 | 913 | * @param status for returning errors. All the preflighting errors are featured |
michael@0 | 914 | * @return length of the resulting string |
michael@0 | 915 | * @see ucol_openFromShortString |
michael@0 | 916 | * @see ucol_normalizeShortDefinitionString |
michael@0 | 917 | * @stable ICU 3.0 |
michael@0 | 918 | */ |
michael@0 | 919 | U_STABLE int32_t U_EXPORT2 |
michael@0 | 920 | ucol_getShortDefinitionString(const UCollator *coll, |
michael@0 | 921 | const char *locale, |
michael@0 | 922 | char *buffer, |
michael@0 | 923 | int32_t capacity, |
michael@0 | 924 | UErrorCode *status); |
michael@0 | 925 | |
michael@0 | 926 | /** Verifies and normalizes short definition string. |
michael@0 | 927 | * Normalized short definition string has all the option sorted by the argument name, |
michael@0 | 928 | * so that equivalent definition strings are the same. |
michael@0 | 929 | * This API supports preflighting. |
michael@0 | 930 | * @param source definition string |
michael@0 | 931 | * @param destination space to hold the resulting string |
michael@0 | 932 | * @param capacity capacity of the buffer |
michael@0 | 933 | * @param parseError if not NULL, structure that will get filled with error's pre |
michael@0 | 934 | * and post context in case of error. |
michael@0 | 935 | * @param status Error code. This API will return an error if an invalid attribute |
michael@0 | 936 | * or attribute/value combination is specified. All the preflighting |
michael@0 | 937 | * errors are also featured |
michael@0 | 938 | * @return length of the resulting normalized string. |
michael@0 | 939 | * |
michael@0 | 940 | * @see ucol_openFromShortString |
michael@0 | 941 | * @see ucol_getShortDefinitionString |
michael@0 | 942 | * |
michael@0 | 943 | * @stable ICU 3.0 |
michael@0 | 944 | */ |
michael@0 | 945 | |
michael@0 | 946 | U_STABLE int32_t U_EXPORT2 |
michael@0 | 947 | ucol_normalizeShortDefinitionString(const char *source, |
michael@0 | 948 | char *destination, |
michael@0 | 949 | int32_t capacity, |
michael@0 | 950 | UParseError *parseError, |
michael@0 | 951 | UErrorCode *status); |
michael@0 | 952 | |
michael@0 | 953 | |
michael@0 | 954 | /** |
michael@0 | 955 | * Get a sort key for a string from a UCollator. |
michael@0 | 956 | * Sort keys may be compared using <TT>strcmp</TT>. |
michael@0 | 957 | * |
michael@0 | 958 | * Like ICU functions that write to an output buffer, the buffer contents |
michael@0 | 959 | * is undefined if the buffer capacity (resultLength parameter) is too small. |
michael@0 | 960 | * Unlike ICU functions that write a string to an output buffer, |
michael@0 | 961 | * the terminating zero byte is counted in the sort key length. |
michael@0 | 962 | * @param coll The UCollator containing the collation rules. |
michael@0 | 963 | * @param source The string to transform. |
michael@0 | 964 | * @param sourceLength The length of source, or -1 if null-terminated. |
michael@0 | 965 | * @param result A pointer to a buffer to receive the attribute. |
michael@0 | 966 | * @param resultLength The maximum size of result. |
michael@0 | 967 | * @return The size needed to fully store the sort key. |
michael@0 | 968 | * If there was an internal error generating the sort key, |
michael@0 | 969 | * a zero value is returned. |
michael@0 | 970 | * @see ucol_keyHashCode |
michael@0 | 971 | * @stable ICU 2.0 |
michael@0 | 972 | */ |
michael@0 | 973 | U_STABLE int32_t U_EXPORT2 |
michael@0 | 974 | ucol_getSortKey(const UCollator *coll, |
michael@0 | 975 | const UChar *source, |
michael@0 | 976 | int32_t sourceLength, |
michael@0 | 977 | uint8_t *result, |
michael@0 | 978 | int32_t resultLength); |
michael@0 | 979 | |
michael@0 | 980 | |
michael@0 | 981 | /** Gets the next count bytes of a sort key. Caller needs |
michael@0 | 982 | * to preserve state array between calls and to provide |
michael@0 | 983 | * the same type of UCharIterator set with the same string. |
michael@0 | 984 | * The destination buffer provided must be big enough to store |
michael@0 | 985 | * the number of requested bytes. |
michael@0 | 986 | * |
michael@0 | 987 | * The generated sort key may or may not be compatible with |
michael@0 | 988 | * sort keys generated using ucol_getSortKey(). |
michael@0 | 989 | * @param coll The UCollator containing the collation rules. |
michael@0 | 990 | * @param iter UCharIterator containing the string we need |
michael@0 | 991 | * the sort key to be calculated for. |
michael@0 | 992 | * @param state Opaque state of sortkey iteration. |
michael@0 | 993 | * @param dest Buffer to hold the resulting sortkey part |
michael@0 | 994 | * @param count number of sort key bytes required. |
michael@0 | 995 | * @param status error code indicator. |
michael@0 | 996 | * @return the actual number of bytes of a sortkey. It can be |
michael@0 | 997 | * smaller than count if we have reached the end of |
michael@0 | 998 | * the sort key. |
michael@0 | 999 | * @stable ICU 2.6 |
michael@0 | 1000 | */ |
michael@0 | 1001 | U_STABLE int32_t U_EXPORT2 |
michael@0 | 1002 | ucol_nextSortKeyPart(const UCollator *coll, |
michael@0 | 1003 | UCharIterator *iter, |
michael@0 | 1004 | uint32_t state[2], |
michael@0 | 1005 | uint8_t *dest, int32_t count, |
michael@0 | 1006 | UErrorCode *status); |
michael@0 | 1007 | |
michael@0 | 1008 | /** enum that is taken by ucol_getBound API |
michael@0 | 1009 | * See below for explanation |
michael@0 | 1010 | * do not change the values assigned to the |
michael@0 | 1011 | * members of this enum. Underlying code |
michael@0 | 1012 | * depends on them having these numbers |
michael@0 | 1013 | * @stable ICU 2.0 |
michael@0 | 1014 | */ |
michael@0 | 1015 | typedef enum { |
michael@0 | 1016 | /** lower bound */ |
michael@0 | 1017 | UCOL_BOUND_LOWER = 0, |
michael@0 | 1018 | /** upper bound that will match strings of exact size */ |
michael@0 | 1019 | UCOL_BOUND_UPPER = 1, |
michael@0 | 1020 | /** upper bound that will match all the strings that have the same initial substring as the given string */ |
michael@0 | 1021 | UCOL_BOUND_UPPER_LONG = 2, |
michael@0 | 1022 | UCOL_BOUND_VALUE_COUNT |
michael@0 | 1023 | } UColBoundMode; |
michael@0 | 1024 | |
michael@0 | 1025 | /** |
michael@0 | 1026 | * Produce a bound for a given sortkey and a number of levels. |
michael@0 | 1027 | * Return value is always the number of bytes needed, regardless of |
michael@0 | 1028 | * whether the result buffer was big enough or even valid.<br> |
michael@0 | 1029 | * Resulting bounds can be used to produce a range of strings that are |
michael@0 | 1030 | * between upper and lower bounds. For example, if bounds are produced |
michael@0 | 1031 | * for a sortkey of string "smith", strings between upper and lower |
michael@0 | 1032 | * bounds with one level would include "Smith", "SMITH", "sMiTh".<br> |
michael@0 | 1033 | * There are two upper bounds that can be produced. If UCOL_BOUND_UPPER |
michael@0 | 1034 | * is produced, strings matched would be as above. However, if bound |
michael@0 | 1035 | * produced using UCOL_BOUND_UPPER_LONG is used, the above example will |
michael@0 | 1036 | * also match "Smithsonian" and similar.<br> |
michael@0 | 1037 | * For more on usage, see example in cintltst/capitst.c in procedure |
michael@0 | 1038 | * TestBounds. |
michael@0 | 1039 | * Sort keys may be compared using <TT>strcmp</TT>. |
michael@0 | 1040 | * @param source The source sortkey. |
michael@0 | 1041 | * @param sourceLength The length of source, or -1 if null-terminated. |
michael@0 | 1042 | * (If an unmodified sortkey is passed, it is always null |
michael@0 | 1043 | * terminated). |
michael@0 | 1044 | * @param boundType Type of bound required. It can be UCOL_BOUND_LOWER, which |
michael@0 | 1045 | * produces a lower inclusive bound, UCOL_BOUND_UPPER, that |
michael@0 | 1046 | * produces upper bound that matches strings of the same length |
michael@0 | 1047 | * or UCOL_BOUND_UPPER_LONG that matches strings that have the |
michael@0 | 1048 | * same starting substring as the source string. |
michael@0 | 1049 | * @param noOfLevels Number of levels required in the resulting bound (for most |
michael@0 | 1050 | * uses, the recommended value is 1). See users guide for |
michael@0 | 1051 | * explanation on number of levels a sortkey can have. |
michael@0 | 1052 | * @param result A pointer to a buffer to receive the resulting sortkey. |
michael@0 | 1053 | * @param resultLength The maximum size of result. |
michael@0 | 1054 | * @param status Used for returning error code if something went wrong. If the |
michael@0 | 1055 | * number of levels requested is higher than the number of levels |
michael@0 | 1056 | * in the source key, a warning (U_SORT_KEY_TOO_SHORT_WARNING) is |
michael@0 | 1057 | * issued. |
michael@0 | 1058 | * @return The size needed to fully store the bound. |
michael@0 | 1059 | * @see ucol_keyHashCode |
michael@0 | 1060 | * @stable ICU 2.1 |
michael@0 | 1061 | */ |
michael@0 | 1062 | U_STABLE int32_t U_EXPORT2 |
michael@0 | 1063 | ucol_getBound(const uint8_t *source, |
michael@0 | 1064 | int32_t sourceLength, |
michael@0 | 1065 | UColBoundMode boundType, |
michael@0 | 1066 | uint32_t noOfLevels, |
michael@0 | 1067 | uint8_t *result, |
michael@0 | 1068 | int32_t resultLength, |
michael@0 | 1069 | UErrorCode *status); |
michael@0 | 1070 | |
michael@0 | 1071 | /** |
michael@0 | 1072 | * Gets the version information for a Collator. Version is currently |
michael@0 | 1073 | * an opaque 32-bit number which depends, among other things, on major |
michael@0 | 1074 | * versions of the collator tailoring and UCA. |
michael@0 | 1075 | * @param coll The UCollator to query. |
michael@0 | 1076 | * @param info the version # information, the result will be filled in |
michael@0 | 1077 | * @stable ICU 2.0 |
michael@0 | 1078 | */ |
michael@0 | 1079 | U_STABLE void U_EXPORT2 |
michael@0 | 1080 | ucol_getVersion(const UCollator* coll, UVersionInfo info); |
michael@0 | 1081 | |
michael@0 | 1082 | /** |
michael@0 | 1083 | * Gets the UCA version information for a Collator. Version is the |
michael@0 | 1084 | * UCA version number (3.1.1, 4.0). |
michael@0 | 1085 | * @param coll The UCollator to query. |
michael@0 | 1086 | * @param info the version # information, the result will be filled in |
michael@0 | 1087 | * @stable ICU 2.8 |
michael@0 | 1088 | */ |
michael@0 | 1089 | U_STABLE void U_EXPORT2 |
michael@0 | 1090 | ucol_getUCAVersion(const UCollator* coll, UVersionInfo info); |
michael@0 | 1091 | |
michael@0 | 1092 | /** |
michael@0 | 1093 | * Merges two sort keys. The levels are merged with their corresponding counterparts |
michael@0 | 1094 | * (primaries with primaries, secondaries with secondaries etc.). Between the values |
michael@0 | 1095 | * from the same level a separator is inserted. |
michael@0 | 1096 | * |
michael@0 | 1097 | * This is useful, for example, for combining sort keys from first and last names |
michael@0 | 1098 | * to sort such pairs. |
michael@0 | 1099 | * It is possible to merge multiple sort keys by consecutively merging |
michael@0 | 1100 | * another one with the intermediate result. |
michael@0 | 1101 | * |
michael@0 | 1102 | * The length of the merge result is the sum of the lengths of the input sort keys. |
michael@0 | 1103 | * |
michael@0 | 1104 | * Example (uncompressed): |
michael@0 | 1105 | * <pre>191B1D 01 050505 01 910505 00 |
michael@0 | 1106 | * 1F2123 01 050505 01 910505 00</pre> |
michael@0 | 1107 | * will be merged as |
michael@0 | 1108 | * <pre>191B1D 02 1F2123 01 050505 02 050505 01 910505 02 910505 00</pre> |
michael@0 | 1109 | * |
michael@0 | 1110 | * If the destination buffer is not big enough, then its contents are undefined. |
michael@0 | 1111 | * If any of source lengths are zero or any of the source pointers are NULL/undefined, |
michael@0 | 1112 | * the result is of size zero. |
michael@0 | 1113 | * |
michael@0 | 1114 | * @param src1 the first sort key |
michael@0 | 1115 | * @param src1Length the length of the first sort key, including the zero byte at the end; |
michael@0 | 1116 | * can be -1 if the function is to find the length |
michael@0 | 1117 | * @param src2 the second sort key |
michael@0 | 1118 | * @param src2Length the length of the second sort key, including the zero byte at the end; |
michael@0 | 1119 | * can be -1 if the function is to find the length |
michael@0 | 1120 | * @param dest the buffer where the merged sort key is written, |
michael@0 | 1121 | * can be NULL if destCapacity==0 |
michael@0 | 1122 | * @param destCapacity the number of bytes in the dest buffer |
michael@0 | 1123 | * @return the length of the merged sort key, src1Length+src2Length; |
michael@0 | 1124 | * can be larger than destCapacity, or 0 if an error occurs (only for illegal arguments), |
michael@0 | 1125 | * in which cases the contents of dest is undefined |
michael@0 | 1126 | * @stable ICU 2.0 |
michael@0 | 1127 | */ |
michael@0 | 1128 | U_STABLE int32_t U_EXPORT2 |
michael@0 | 1129 | ucol_mergeSortkeys(const uint8_t *src1, int32_t src1Length, |
michael@0 | 1130 | const uint8_t *src2, int32_t src2Length, |
michael@0 | 1131 | uint8_t *dest, int32_t destCapacity); |
michael@0 | 1132 | |
michael@0 | 1133 | /** |
michael@0 | 1134 | * Universal attribute setter |
michael@0 | 1135 | * @param coll collator which attributes are to be changed |
michael@0 | 1136 | * @param attr attribute type |
michael@0 | 1137 | * @param value attribute value |
michael@0 | 1138 | * @param status to indicate whether the operation went on smoothly or there were errors |
michael@0 | 1139 | * @see UColAttribute |
michael@0 | 1140 | * @see UColAttributeValue |
michael@0 | 1141 | * @see ucol_getAttribute |
michael@0 | 1142 | * @stable ICU 2.0 |
michael@0 | 1143 | */ |
michael@0 | 1144 | U_STABLE void U_EXPORT2 |
michael@0 | 1145 | ucol_setAttribute(UCollator *coll, UColAttribute attr, UColAttributeValue value, UErrorCode *status); |
michael@0 | 1146 | |
michael@0 | 1147 | /** |
michael@0 | 1148 | * Universal attribute getter |
michael@0 | 1149 | * @param coll collator which attributes are to be changed |
michael@0 | 1150 | * @param attr attribute type |
michael@0 | 1151 | * @return attribute value |
michael@0 | 1152 | * @param status to indicate whether the operation went on smoothly or there were errors |
michael@0 | 1153 | * @see UColAttribute |
michael@0 | 1154 | * @see UColAttributeValue |
michael@0 | 1155 | * @see ucol_setAttribute |
michael@0 | 1156 | * @stable ICU 2.0 |
michael@0 | 1157 | */ |
michael@0 | 1158 | U_STABLE UColAttributeValue U_EXPORT2 |
michael@0 | 1159 | ucol_getAttribute(const UCollator *coll, UColAttribute attr, UErrorCode *status); |
michael@0 | 1160 | |
michael@0 | 1161 | /** Variable top |
michael@0 | 1162 | * is a two byte primary value which causes all the codepoints with primary values that |
michael@0 | 1163 | * are less or equal than the variable top to be shifted when alternate handling is set |
michael@0 | 1164 | * to UCOL_SHIFTED. |
michael@0 | 1165 | * Sets the variable top to a collation element value of a string supplied. |
michael@0 | 1166 | * @param coll collator which variable top needs to be changed |
michael@0 | 1167 | * @param varTop one or more (if contraction) UChars to which the variable top should be set |
michael@0 | 1168 | * @param len length of variable top string. If -1 it is considered to be zero terminated. |
michael@0 | 1169 | * @param status error code. If error code is set, the return value is undefined. |
michael@0 | 1170 | * Errors set by this function are: <br> |
michael@0 | 1171 | * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such |
michael@0 | 1172 | * a contraction<br> |
michael@0 | 1173 | * U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes |
michael@0 | 1174 | * @return a 32 bit value containing the value of the variable top in upper 16 bits. |
michael@0 | 1175 | * Lower 16 bits are undefined |
michael@0 | 1176 | * @see ucol_getVariableTop |
michael@0 | 1177 | * @see ucol_restoreVariableTop |
michael@0 | 1178 | * @stable ICU 2.0 |
michael@0 | 1179 | */ |
michael@0 | 1180 | U_STABLE uint32_t U_EXPORT2 |
michael@0 | 1181 | ucol_setVariableTop(UCollator *coll, |
michael@0 | 1182 | const UChar *varTop, int32_t len, |
michael@0 | 1183 | UErrorCode *status); |
michael@0 | 1184 | |
michael@0 | 1185 | /** |
michael@0 | 1186 | * Gets the variable top value of a Collator. |
michael@0 | 1187 | * Lower 16 bits are undefined and should be ignored. |
michael@0 | 1188 | * @param coll collator which variable top needs to be retrieved |
michael@0 | 1189 | * @param status error code (not changed by function). If error code is set, |
michael@0 | 1190 | * the return value is undefined. |
michael@0 | 1191 | * @return the variable top value of a Collator. |
michael@0 | 1192 | * @see ucol_setVariableTop |
michael@0 | 1193 | * @see ucol_restoreVariableTop |
michael@0 | 1194 | * @stable ICU 2.0 |
michael@0 | 1195 | */ |
michael@0 | 1196 | U_STABLE uint32_t U_EXPORT2 ucol_getVariableTop(const UCollator *coll, UErrorCode *status); |
michael@0 | 1197 | |
michael@0 | 1198 | /** |
michael@0 | 1199 | * Sets the variable top to a collation element value supplied. Variable top is |
michael@0 | 1200 | * set to the upper 16 bits. |
michael@0 | 1201 | * Lower 16 bits are ignored. |
michael@0 | 1202 | * @param coll collator which variable top needs to be changed |
michael@0 | 1203 | * @param varTop CE value, as returned by ucol_setVariableTop or ucol)getVariableTop |
michael@0 | 1204 | * @param status error code (not changed by function) |
michael@0 | 1205 | * @see ucol_getVariableTop |
michael@0 | 1206 | * @see ucol_setVariableTop |
michael@0 | 1207 | * @stable ICU 2.0 |
michael@0 | 1208 | */ |
michael@0 | 1209 | U_STABLE void U_EXPORT2 |
michael@0 | 1210 | ucol_restoreVariableTop(UCollator *coll, const uint32_t varTop, UErrorCode *status); |
michael@0 | 1211 | |
michael@0 | 1212 | /** |
michael@0 | 1213 | * Thread safe cloning operation. The result is a clone of a given collator. |
michael@0 | 1214 | * @param coll collator to be cloned |
michael@0 | 1215 | * @param stackBuffer <em>Deprecated functionality as of ICU 52, use NULL.</em><br> |
michael@0 | 1216 | * user allocated space for the new clone. |
michael@0 | 1217 | * If NULL new memory will be allocated. |
michael@0 | 1218 | * If buffer is not large enough, new memory will be allocated. |
michael@0 | 1219 | * Clients can use the U_COL_SAFECLONE_BUFFERSIZE. |
michael@0 | 1220 | * @param pBufferSize <em>Deprecated functionality as of ICU 52, use NULL or 1.</em><br> |
michael@0 | 1221 | * pointer to size of allocated space. |
michael@0 | 1222 | * If *pBufferSize == 0, a sufficient size for use in cloning will |
michael@0 | 1223 | * be returned ('pre-flighting') |
michael@0 | 1224 | * If *pBufferSize is not enough for a stack-based safe clone, |
michael@0 | 1225 | * new memory will be allocated. |
michael@0 | 1226 | * @param status to indicate whether the operation went on smoothly or there were errors |
michael@0 | 1227 | * An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any |
michael@0 | 1228 | * allocations were necessary. |
michael@0 | 1229 | * @return pointer to the new clone |
michael@0 | 1230 | * @see ucol_open |
michael@0 | 1231 | * @see ucol_openRules |
michael@0 | 1232 | * @see ucol_close |
michael@0 | 1233 | * @stable ICU 2.0 |
michael@0 | 1234 | */ |
michael@0 | 1235 | U_STABLE UCollator* U_EXPORT2 |
michael@0 | 1236 | ucol_safeClone(const UCollator *coll, |
michael@0 | 1237 | void *stackBuffer, |
michael@0 | 1238 | int32_t *pBufferSize, |
michael@0 | 1239 | UErrorCode *status); |
michael@0 | 1240 | |
michael@0 | 1241 | #ifndef U_HIDE_DEPRECATED_API |
michael@0 | 1242 | |
michael@0 | 1243 | /** default memory size for the new clone. |
michael@0 | 1244 | * @deprecated ICU 52. Do not rely on ucol_safeClone() cloning into any provided buffer. |
michael@0 | 1245 | */ |
michael@0 | 1246 | #define U_COL_SAFECLONE_BUFFERSIZE 1 |
michael@0 | 1247 | |
michael@0 | 1248 | #endif /* U_HIDE_DEPRECATED_API */ |
michael@0 | 1249 | |
michael@0 | 1250 | /** |
michael@0 | 1251 | * Returns current rules. Delta defines whether full rules are returned or just the tailoring. |
michael@0 | 1252 | * Returns number of UChars needed to store rules. If buffer is NULL or bufferLen is not enough |
michael@0 | 1253 | * to store rules, will store up to available space. |
michael@0 | 1254 | * |
michael@0 | 1255 | * ucol_getRules() should normally be used instead. |
michael@0 | 1256 | * See http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales |
michael@0 | 1257 | * @param coll collator to get the rules from |
michael@0 | 1258 | * @param delta one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES. |
michael@0 | 1259 | * @param buffer buffer to store the result in. If NULL, you'll get no rules. |
michael@0 | 1260 | * @param bufferLen length of buffer to store rules in. If less than needed you'll get only the part that fits in. |
michael@0 | 1261 | * @return current rules |
michael@0 | 1262 | * @stable ICU 2.0 |
michael@0 | 1263 | * @see UCOL_FULL_RULES |
michael@0 | 1264 | */ |
michael@0 | 1265 | U_STABLE int32_t U_EXPORT2 |
michael@0 | 1266 | ucol_getRulesEx(const UCollator *coll, UColRuleOption delta, UChar *buffer, int32_t bufferLen); |
michael@0 | 1267 | |
michael@0 | 1268 | #ifndef U_HIDE_DEPRECATED_API |
michael@0 | 1269 | /** |
michael@0 | 1270 | * gets the locale name of the collator. If the collator |
michael@0 | 1271 | * is instantiated from the rules, then this function returns |
michael@0 | 1272 | * NULL. |
michael@0 | 1273 | * @param coll The UCollator for which the locale is needed |
michael@0 | 1274 | * @param type You can choose between requested, valid and actual |
michael@0 | 1275 | * locale. For description see the definition of |
michael@0 | 1276 | * ULocDataLocaleType in uloc.h |
michael@0 | 1277 | * @param status error code of the operation |
michael@0 | 1278 | * @return real locale name from which the collation data comes. |
michael@0 | 1279 | * If the collator was instantiated from rules, returns |
michael@0 | 1280 | * NULL. |
michael@0 | 1281 | * @deprecated ICU 2.8 Use ucol_getLocaleByType instead |
michael@0 | 1282 | */ |
michael@0 | 1283 | U_DEPRECATED const char * U_EXPORT2 |
michael@0 | 1284 | ucol_getLocale(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status); |
michael@0 | 1285 | #endif /* U_HIDE_DEPRECATED_API */ |
michael@0 | 1286 | |
michael@0 | 1287 | /** |
michael@0 | 1288 | * gets the locale name of the collator. If the collator |
michael@0 | 1289 | * is instantiated from the rules, then this function returns |
michael@0 | 1290 | * NULL. |
michael@0 | 1291 | * @param coll The UCollator for which the locale is needed |
michael@0 | 1292 | * @param type You can choose between requested, valid and actual |
michael@0 | 1293 | * locale. For description see the definition of |
michael@0 | 1294 | * ULocDataLocaleType in uloc.h |
michael@0 | 1295 | * @param status error code of the operation |
michael@0 | 1296 | * @return real locale name from which the collation data comes. |
michael@0 | 1297 | * If the collator was instantiated from rules, returns |
michael@0 | 1298 | * NULL. |
michael@0 | 1299 | * @stable ICU 2.8 |
michael@0 | 1300 | */ |
michael@0 | 1301 | U_STABLE const char * U_EXPORT2 |
michael@0 | 1302 | ucol_getLocaleByType(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status); |
michael@0 | 1303 | |
michael@0 | 1304 | /** |
michael@0 | 1305 | * Get an Unicode set that contains all the characters and sequences tailored in |
michael@0 | 1306 | * this collator. The result must be disposed of by using uset_close. |
michael@0 | 1307 | * @param coll The UCollator for which we want to get tailored chars |
michael@0 | 1308 | * @param status error code of the operation |
michael@0 | 1309 | * @return a pointer to newly created USet. Must be be disposed by using uset_close |
michael@0 | 1310 | * @see ucol_openRules |
michael@0 | 1311 | * @see uset_close |
michael@0 | 1312 | * @stable ICU 2.4 |
michael@0 | 1313 | */ |
michael@0 | 1314 | U_STABLE USet * U_EXPORT2 |
michael@0 | 1315 | ucol_getTailoredSet(const UCollator *coll, UErrorCode *status); |
michael@0 | 1316 | |
michael@0 | 1317 | #ifndef U_HIDE_INTERNAL_API |
michael@0 | 1318 | /** |
michael@0 | 1319 | * Universal attribute getter that returns UCOL_DEFAULT if the value is default |
michael@0 | 1320 | * @param coll collator which attributes are to be changed |
michael@0 | 1321 | * @param attr attribute type |
michael@0 | 1322 | * @return attribute value or UCOL_DEFAULT if the value is default |
michael@0 | 1323 | * @param status to indicate whether the operation went on smoothly or there were errors |
michael@0 | 1324 | * @see UColAttribute |
michael@0 | 1325 | * @see UColAttributeValue |
michael@0 | 1326 | * @see ucol_setAttribute |
michael@0 | 1327 | * @internal ICU 3.0 |
michael@0 | 1328 | */ |
michael@0 | 1329 | U_INTERNAL UColAttributeValue U_EXPORT2 |
michael@0 | 1330 | ucol_getAttributeOrDefault(const UCollator *coll, UColAttribute attr, UErrorCode *status); |
michael@0 | 1331 | |
michael@0 | 1332 | /** Check whether two collators are equal. Collators are considered equal if they |
michael@0 | 1333 | * will sort strings the same. This means that both the current attributes and the |
michael@0 | 1334 | * rules must be equivalent. Currently used for RuleBasedCollator::operator==. |
michael@0 | 1335 | * @param source first collator |
michael@0 | 1336 | * @param target second collator |
michael@0 | 1337 | * @return TRUE or FALSE |
michael@0 | 1338 | * @internal ICU 3.0 |
michael@0 | 1339 | */ |
michael@0 | 1340 | U_INTERNAL UBool U_EXPORT2 |
michael@0 | 1341 | ucol_equals(const UCollator *source, const UCollator *target); |
michael@0 | 1342 | |
michael@0 | 1343 | /** Calculates the set of unsafe code points, given a collator. |
michael@0 | 1344 | * A character is unsafe if you could append any character and cause the ordering to alter significantly. |
michael@0 | 1345 | * Collation sorts in normalized order, so anything that rearranges in normalization can cause this. |
michael@0 | 1346 | * Thus if you have a character like a_umlaut, and you add a lower_dot to it, |
michael@0 | 1347 | * then it normalizes to a_lower_dot + umlaut, and sorts differently. |
michael@0 | 1348 | * @param coll Collator |
michael@0 | 1349 | * @param unsafe a fill-in set to receive the unsafe points |
michael@0 | 1350 | * @param status for catching errors |
michael@0 | 1351 | * @return number of elements in the set |
michael@0 | 1352 | * @internal ICU 3.0 |
michael@0 | 1353 | */ |
michael@0 | 1354 | U_INTERNAL int32_t U_EXPORT2 |
michael@0 | 1355 | ucol_getUnsafeSet( const UCollator *coll, |
michael@0 | 1356 | USet *unsafe, |
michael@0 | 1357 | UErrorCode *status); |
michael@0 | 1358 | |
michael@0 | 1359 | /** Reset UCA's static pointers. You don't want to use this, unless your static memory can go away. |
michael@0 | 1360 | * @internal ICU 3.2.1 |
michael@0 | 1361 | */ |
michael@0 | 1362 | U_INTERNAL void U_EXPORT2 |
michael@0 | 1363 | ucol_forgetUCA(void); |
michael@0 | 1364 | |
michael@0 | 1365 | /** Touches all resources needed for instantiating a collator from a short string definition, |
michael@0 | 1366 | * thus filling up the cache. |
michael@0 | 1367 | * @param definition A short string containing a locale and a set of attributes. |
michael@0 | 1368 | * Attributes not explicitly mentioned are left at the default |
michael@0 | 1369 | * state for a locale. |
michael@0 | 1370 | * @param parseError if not NULL, structure that will get filled with error's pre |
michael@0 | 1371 | * and post context in case of error. |
michael@0 | 1372 | * @param forceDefaults if FALSE, the settings that are the same as the collator |
michael@0 | 1373 | * default settings will not be applied (for example, setting |
michael@0 | 1374 | * French secondary on a French collator would not be executed). |
michael@0 | 1375 | * If TRUE, all the settings will be applied regardless of the |
michael@0 | 1376 | * collator default value. If the definition |
michael@0 | 1377 | * strings are to be cached, should be set to FALSE. |
michael@0 | 1378 | * @param status Error code. Apart from regular error conditions connected to |
michael@0 | 1379 | * instantiating collators (like out of memory or similar), this |
michael@0 | 1380 | * API will return an error if an invalid attribute or attribute/value |
michael@0 | 1381 | * combination is specified. |
michael@0 | 1382 | * @see ucol_openFromShortString |
michael@0 | 1383 | * @internal ICU 3.2.1 |
michael@0 | 1384 | */ |
michael@0 | 1385 | U_INTERNAL void U_EXPORT2 |
michael@0 | 1386 | ucol_prepareShortStringOpen( const char *definition, |
michael@0 | 1387 | UBool forceDefaults, |
michael@0 | 1388 | UParseError *parseError, |
michael@0 | 1389 | UErrorCode *status); |
michael@0 | 1390 | #endif /* U_HIDE_INTERNAL_API */ |
michael@0 | 1391 | |
michael@0 | 1392 | /** Creates a binary image of a collator. This binary image can be stored and |
michael@0 | 1393 | * later used to instantiate a collator using ucol_openBinary. |
michael@0 | 1394 | * This API supports preflighting. |
michael@0 | 1395 | * @param coll Collator |
michael@0 | 1396 | * @param buffer a fill-in buffer to receive the binary image |
michael@0 | 1397 | * @param capacity capacity of the destination buffer |
michael@0 | 1398 | * @param status for catching errors |
michael@0 | 1399 | * @return size of the image |
michael@0 | 1400 | * @see ucol_openBinary |
michael@0 | 1401 | * @stable ICU 3.2 |
michael@0 | 1402 | */ |
michael@0 | 1403 | U_STABLE int32_t U_EXPORT2 |
michael@0 | 1404 | ucol_cloneBinary(const UCollator *coll, |
michael@0 | 1405 | uint8_t *buffer, int32_t capacity, |
michael@0 | 1406 | UErrorCode *status); |
michael@0 | 1407 | |
michael@0 | 1408 | /** Opens a collator from a collator binary image created using |
michael@0 | 1409 | * ucol_cloneBinary. Binary image used in instantiation of the |
michael@0 | 1410 | * collator remains owned by the user and should stay around for |
michael@0 | 1411 | * the lifetime of the collator. The API also takes a base collator |
michael@0 | 1412 | * which usualy should be UCA. |
michael@0 | 1413 | * @param bin binary image owned by the user and required through the |
michael@0 | 1414 | * lifetime of the collator |
michael@0 | 1415 | * @param length size of the image. If negative, the API will try to |
michael@0 | 1416 | * figure out the length of the image |
michael@0 | 1417 | * @param base fallback collator, usually UCA. Base is required to be |
michael@0 | 1418 | * present through the lifetime of the collator. Currently |
michael@0 | 1419 | * it cannot be NULL. |
michael@0 | 1420 | * @param status for catching errors |
michael@0 | 1421 | * @return newly created collator |
michael@0 | 1422 | * @see ucol_cloneBinary |
michael@0 | 1423 | * @stable ICU 3.2 |
michael@0 | 1424 | */ |
michael@0 | 1425 | U_STABLE UCollator* U_EXPORT2 |
michael@0 | 1426 | ucol_openBinary(const uint8_t *bin, int32_t length, |
michael@0 | 1427 | const UCollator *base, |
michael@0 | 1428 | UErrorCode *status); |
michael@0 | 1429 | |
michael@0 | 1430 | |
michael@0 | 1431 | #endif /* #if !UCONFIG_NO_COLLATION */ |
michael@0 | 1432 | |
michael@0 | 1433 | #endif |