intl/icu/source/i18n/unicode/ucoleitr.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /*
michael@0 2 *******************************************************************************
michael@0 3 * Copyright (C) 2001-2011, International Business Machines
michael@0 4 * Corporation and others. All Rights Reserved.
michael@0 5 *******************************************************************************
michael@0 6 *
michael@0 7 * File ucoleitr.cpp
michael@0 8 *
michael@0 9 * Modification History:
michael@0 10 *
michael@0 11 * Date Name Description
michael@0 12 * 02/15/2001 synwee Modified all methods to process its own function
michael@0 13 * instead of calling the equivalent c++ api (coleitr.h)
michael@0 14 *******************************************************************************/
michael@0 15
michael@0 16 #ifndef UCOLEITR_H
michael@0 17 #define UCOLEITR_H
michael@0 18
michael@0 19 #include "unicode/utypes.h"
michael@0 20
michael@0 21 #if !UCONFIG_NO_COLLATION
michael@0 22
michael@0 23 /**
michael@0 24 * This indicates an error has occured during processing or if no more CEs is
michael@0 25 * to be returned.
michael@0 26 * @stable ICU 2.0
michael@0 27 */
michael@0 28 #define UCOL_NULLORDER ((int32_t)0xFFFFFFFF)
michael@0 29
michael@0 30 #ifndef U_HIDE_INTERNAL_API
michael@0 31 /**
michael@0 32 * This indicates an error has occured during processing or there are no more CEs
michael@0 33 * to be returned.
michael@0 34 *
michael@0 35 * @internal
michael@0 36 */
michael@0 37 #define UCOL_PROCESSED_NULLORDER ((int64_t)U_INT64_MAX)
michael@0 38 #endif /* U_HIDE_INTERNAL_API */
michael@0 39
michael@0 40 #include "unicode/ucol.h"
michael@0 41
michael@0 42 /**
michael@0 43 * The UCollationElements struct.
michael@0 44 * For usage in C programs.
michael@0 45 * @stable ICU 2.0
michael@0 46 */
michael@0 47 typedef struct UCollationElements UCollationElements;
michael@0 48
michael@0 49 /**
michael@0 50 * \file
michael@0 51 * \brief C API: UCollationElements
michael@0 52 *
michael@0 53 * The UCollationElements API is used as an iterator to walk through each
michael@0 54 * character of an international string. Use the iterator to return the
michael@0 55 * ordering priority of the positioned character. The ordering priority of a
michael@0 56 * character, which we refer to as a key, defines how a character is collated
michael@0 57 * in the given collation object.
michael@0 58 * For example, consider the following in Spanish:
michael@0 59 * <pre>
michael@0 60 * . "ca" -> the first key is key('c') and second key is key('a').
michael@0 61 * . "cha" -> the first key is key('ch') and second key is key('a').
michael@0 62 * </pre>
michael@0 63 * And in German,
michael@0 64 * <pre>
michael@0 65 * . "<ae ligature>b"-> the first key is key('a'), the second key is key('e'), and
michael@0 66 * . the third key is key('b').
michael@0 67 * </pre>
michael@0 68 * <p>Example of the iterator usage: (without error checking)
michael@0 69 * <pre>
michael@0 70 * . void CollationElementIterator_Example()
michael@0 71 * . {
michael@0 72 * . UChar *s;
michael@0 73 * . t_int32 order, primaryOrder;
michael@0 74 * . UCollationElements *c;
michael@0 75 * . UCollatorOld *coll;
michael@0 76 * . UErrorCode success = U_ZERO_ERROR;
michael@0 77 * . s=(UChar*)malloc(sizeof(UChar) * (strlen("This is a test")+1) );
michael@0 78 * . u_uastrcpy(s, "This is a test");
michael@0 79 * . coll = ucol_open(NULL, &success);
michael@0 80 * . c = ucol_openElements(coll, str, u_strlen(str), &status);
michael@0 81 * . order = ucol_next(c, &success);
michael@0 82 * . ucol_reset(c);
michael@0 83 * . order = ucol_prev(c, &success);
michael@0 84 * . free(s);
michael@0 85 * . ucol_close(coll);
michael@0 86 * . ucol_closeElements(c);
michael@0 87 * . }
michael@0 88 * </pre>
michael@0 89 * <p>
michael@0 90 * ucol_next() returns the collation order of the next.
michael@0 91 * ucol_prev() returns the collation order of the previous character.
michael@0 92 * The Collation Element Iterator moves only in one direction between calls to
michael@0 93 * ucol_reset. That is, ucol_next() and ucol_prev can not be inter-used.
michael@0 94 * Whenever ucol_prev is to be called after ucol_next() or vice versa,
michael@0 95 * ucol_reset has to be called first to reset the status, shifting pointers to
michael@0 96 * either the end or the start of the string. Hence at the next call of
michael@0 97 * ucol_prev or ucol_next, the first or last collation order will be returned.
michael@0 98 * If a change of direction is done without a ucol_reset, the result is
michael@0 99 * undefined.
michael@0 100 * The result of a forward iterate (ucol_next) and reversed result of the
michael@0 101 * backward iterate (ucol_prev) on the same string are equivalent, if
michael@0 102 * collation orders with the value UCOL_IGNORABLE are ignored.
michael@0 103 * Character based on the comparison level of the collator. A collation order
michael@0 104 * consists of primary order, secondary order and tertiary order. The data
michael@0 105 * type of the collation order is <strong>t_int32</strong>.
michael@0 106 *
michael@0 107 * @see UCollator
michael@0 108 */
michael@0 109
michael@0 110 /**
michael@0 111 * Open the collation elements for a string.
michael@0 112 *
michael@0 113 * @param coll The collator containing the desired collation rules.
michael@0 114 * @param text The text to iterate over.
michael@0 115 * @param textLength The number of characters in text, or -1 if null-terminated
michael@0 116 * @param status A pointer to an UErrorCode to receive any errors.
michael@0 117 * @return a struct containing collation element information
michael@0 118 * @stable ICU 2.0
michael@0 119 */
michael@0 120 U_STABLE UCollationElements* U_EXPORT2
michael@0 121 ucol_openElements(const UCollator *coll,
michael@0 122 const UChar *text,
michael@0 123 int32_t textLength,
michael@0 124 UErrorCode *status);
michael@0 125
michael@0 126
michael@0 127 /**
michael@0 128 * get a hash code for a key... Not very useful!
michael@0 129 * @param key the given key.
michael@0 130 * @param length the size of the key array.
michael@0 131 * @return the hash code.
michael@0 132 * @stable ICU 2.0
michael@0 133 */
michael@0 134 U_STABLE int32_t U_EXPORT2
michael@0 135 ucol_keyHashCode(const uint8_t* key, int32_t length);
michael@0 136
michael@0 137 /**
michael@0 138 * Close a UCollationElements.
michael@0 139 * Once closed, a UCollationElements may no longer be used.
michael@0 140 * @param elems The UCollationElements to close.
michael@0 141 * @stable ICU 2.0
michael@0 142 */
michael@0 143 U_STABLE void U_EXPORT2
michael@0 144 ucol_closeElements(UCollationElements *elems);
michael@0 145
michael@0 146 /**
michael@0 147 * Reset the collation elements to their initial state.
michael@0 148 * This will move the 'cursor' to the beginning of the text.
michael@0 149 * Property settings for collation will be reset to the current status.
michael@0 150 * @param elems The UCollationElements to reset.
michael@0 151 * @see ucol_next
michael@0 152 * @see ucol_previous
michael@0 153 * @stable ICU 2.0
michael@0 154 */
michael@0 155 U_STABLE void U_EXPORT2
michael@0 156 ucol_reset(UCollationElements *elems);
michael@0 157
michael@0 158 #ifndef U_HIDE_INTERNAL_API
michael@0 159 /**
michael@0 160 * Set the collation elements to use implicit ordering for Han
michael@0 161 * even if they've been tailored. This will also force Hangul
michael@0 162 * syllables to be ordered by decomposing them to their component
michael@0 163 * Jamo.
michael@0 164 *
michael@0 165 * @param elems The UCollationElements containing the text.
michael@0 166 * @param status A pointer to a UErrorCode to reveive any errors.
michael@0 167 *
michael@0 168 * @internal
michael@0 169 */
michael@0 170 U_INTERNAL void U_EXPORT2
michael@0 171 ucol_forceHanImplicit(UCollationElements *elems, UErrorCode *status);
michael@0 172 #endif /* U_HIDE_INTERNAL_API */
michael@0 173
michael@0 174 /**
michael@0 175 * Get the ordering priority of the next collation element in the text.
michael@0 176 * A single character may contain more than one collation element.
michael@0 177 * @param elems The UCollationElements containing the text.
michael@0 178 * @param status A pointer to an UErrorCode to receive any errors.
michael@0 179 * @return The next collation elements ordering, otherwise returns NULLORDER
michael@0 180 * if an error has occured or if the end of string has been reached
michael@0 181 * @stable ICU 2.0
michael@0 182 */
michael@0 183 U_STABLE int32_t U_EXPORT2
michael@0 184 ucol_next(UCollationElements *elems, UErrorCode *status);
michael@0 185
michael@0 186 /**
michael@0 187 * Get the ordering priority of the previous collation element in the text.
michael@0 188 * A single character may contain more than one collation element.
michael@0 189 * Note that internally a stack is used to store buffered collation elements.
michael@0 190 * It is very rare that the stack will overflow, however if such a case is
michael@0 191 * encountered, the problem can be solved by increasing the size
michael@0 192 * UCOL_EXPAND_CE_BUFFER_SIZE in ucol_imp.h.
michael@0 193 * @param elems The UCollationElements containing the text.
michael@0 194 * @param status A pointer to an UErrorCode to receive any errors. Noteably
michael@0 195 * a U_BUFFER_OVERFLOW_ERROR is returned if the internal stack
michael@0 196 * buffer has been exhausted.
michael@0 197 * @return The previous collation elements ordering, otherwise returns
michael@0 198 * NULLORDER if an error has occured or if the start of string has
michael@0 199 * been reached.
michael@0 200 * @stable ICU 2.0
michael@0 201 */
michael@0 202 U_STABLE int32_t U_EXPORT2
michael@0 203 ucol_previous(UCollationElements *elems, UErrorCode *status);
michael@0 204
michael@0 205 #ifndef U_HIDE_INTERNAL_API
michael@0 206 /**
michael@0 207 * Get the processed ordering priority of the next collation element in the text.
michael@0 208 * A single character may contain more than one collation element.
michael@0 209 *
michael@0 210 * @param elems The UCollationElements containing the text.
michael@0 211 * @param ixLow a pointer to an int32_t to receive the iterator index before fetching the CE.
michael@0 212 * @param ixHigh a pointer to an int32_t to receive the iterator index after fetching the CE.
michael@0 213 * @param status A pointer to an UErrorCode to receive any errors.
michael@0 214 * @return The next collation elements ordering, otherwise returns UCOL_PROCESSED_NULLORDER
michael@0 215 * if an error has occured or if the end of string has been reached
michael@0 216 *
michael@0 217 * @internal
michael@0 218 */
michael@0 219 U_INTERNAL int64_t U_EXPORT2
michael@0 220 ucol_nextProcessed(UCollationElements *elems, int32_t *ixLow, int32_t *ixHigh, UErrorCode *status);
michael@0 221
michael@0 222 /**
michael@0 223 * Get the processed ordering priority of the previous collation element in the text.
michael@0 224 * A single character may contain more than one collation element.
michael@0 225 * Note that internally a stack is used to store buffered collation elements.
michael@0 226 * It is very rare that the stack will overflow, however if such a case is
michael@0 227 * encountered, the problem can be solved by increasing the size
michael@0 228 * UCOL_EXPAND_CE_BUFFER_SIZE in ucol_imp.h.
michael@0 229 *
michael@0 230 * @param elems The UCollationElements containing the text.
michael@0 231 * @param ixLow A pointer to an int32_t to receive the iterator index after fetching the CE
michael@0 232 * @param ixHigh A pointer to an int32_t to receiver the iterator index before fetching the CE
michael@0 233 * @param status A pointer to an UErrorCode to receive any errors. Noteably
michael@0 234 * a U_BUFFER_OVERFLOW_ERROR is returned if the internal stack
michael@0 235 * buffer has been exhausted.
michael@0 236 * @return The previous collation elements ordering, otherwise returns
michael@0 237 * UCOL_PROCESSED_NULLORDER if an error has occured or if the start of
michael@0 238 * string has been reached.
michael@0 239 *
michael@0 240 * @internal
michael@0 241 */
michael@0 242 U_INTERNAL int64_t U_EXPORT2
michael@0 243 ucol_previousProcessed(UCollationElements *elems, int32_t *ixLow, int32_t *ixHigh, UErrorCode *status);
michael@0 244 #endif /* U_HIDE_INTERNAL_API */
michael@0 245
michael@0 246 /**
michael@0 247 * Get the maximum length of any expansion sequences that end with the
michael@0 248 * specified comparison order.
michael@0 249 * This is useful for .... ?
michael@0 250 * @param elems The UCollationElements containing the text.
michael@0 251 * @param order A collation order returned by previous or next.
michael@0 252 * @return maximum size of the expansion sequences ending with the collation
michael@0 253 * element or 1 if collation element does not occur at the end of any
michael@0 254 * expansion sequence
michael@0 255 * @stable ICU 2.0
michael@0 256 */
michael@0 257 U_STABLE int32_t U_EXPORT2
michael@0 258 ucol_getMaxExpansion(const UCollationElements *elems, int32_t order);
michael@0 259
michael@0 260 /**
michael@0 261 * Set the text containing the collation elements.
michael@0 262 * Property settings for collation will remain the same.
michael@0 263 * In order to reset the iterator to the current collation property settings,
michael@0 264 * the API reset() has to be called.
michael@0 265 * @param elems The UCollationElements to set.
michael@0 266 * @param text The source text containing the collation elements.
michael@0 267 * @param textLength The length of text, or -1 if null-terminated.
michael@0 268 * @param status A pointer to an UErrorCode to receive any errors.
michael@0 269 * @see ucol_getText
michael@0 270 * @stable ICU 2.0
michael@0 271 */
michael@0 272 U_STABLE void U_EXPORT2
michael@0 273 ucol_setText( UCollationElements *elems,
michael@0 274 const UChar *text,
michael@0 275 int32_t textLength,
michael@0 276 UErrorCode *status);
michael@0 277
michael@0 278 /**
michael@0 279 * Get the offset of the current source character.
michael@0 280 * This is an offset into the text of the character containing the current
michael@0 281 * collation elements.
michael@0 282 * @param elems The UCollationElements to query.
michael@0 283 * @return The offset of the current source character.
michael@0 284 * @see ucol_setOffset
michael@0 285 * @stable ICU 2.0
michael@0 286 */
michael@0 287 U_STABLE int32_t U_EXPORT2
michael@0 288 ucol_getOffset(const UCollationElements *elems);
michael@0 289
michael@0 290 /**
michael@0 291 * Set the offset of the current source character.
michael@0 292 * This is an offset into the text of the character to be processed.
michael@0 293 * Property settings for collation will remain the same.
michael@0 294 * In order to reset the iterator to the current collation property settings,
michael@0 295 * the API reset() has to be called.
michael@0 296 * @param elems The UCollationElements to set.
michael@0 297 * @param offset The desired character offset.
michael@0 298 * @param status A pointer to an UErrorCode to receive any errors.
michael@0 299 * @see ucol_getOffset
michael@0 300 * @stable ICU 2.0
michael@0 301 */
michael@0 302 U_STABLE void U_EXPORT2
michael@0 303 ucol_setOffset(UCollationElements *elems,
michael@0 304 int32_t offset,
michael@0 305 UErrorCode *status);
michael@0 306
michael@0 307 /**
michael@0 308 * Get the primary order of a collation order.
michael@0 309 * @param order the collation order
michael@0 310 * @return the primary order of a collation order.
michael@0 311 * @stable ICU 2.6
michael@0 312 */
michael@0 313 U_STABLE int32_t U_EXPORT2
michael@0 314 ucol_primaryOrder (int32_t order);
michael@0 315
michael@0 316 /**
michael@0 317 * Get the secondary order of a collation order.
michael@0 318 * @param order the collation order
michael@0 319 * @return the secondary order of a collation order.
michael@0 320 * @stable ICU 2.6
michael@0 321 */
michael@0 322 U_STABLE int32_t U_EXPORT2
michael@0 323 ucol_secondaryOrder (int32_t order);
michael@0 324
michael@0 325 /**
michael@0 326 * Get the tertiary order of a collation order.
michael@0 327 * @param order the collation order
michael@0 328 * @return the tertiary order of a collation order.
michael@0 329 * @stable ICU 2.6
michael@0 330 */
michael@0 331 U_STABLE int32_t U_EXPORT2
michael@0 332 ucol_tertiaryOrder (int32_t order);
michael@0 333
michael@0 334 #endif /* #if !UCONFIG_NO_COLLATION */
michael@0 335
michael@0 336 #endif

mercurial