michael@0: /* michael@0: ******************************************************************************* michael@0: * Copyright (C) 2001-2011, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: ******************************************************************************* michael@0: * michael@0: * File ucoleitr.cpp michael@0: * michael@0: * Modification History: michael@0: * michael@0: * Date Name Description michael@0: * 02/15/2001 synwee Modified all methods to process its own function michael@0: * instead of calling the equivalent c++ api (coleitr.h) michael@0: *******************************************************************************/ michael@0: michael@0: #ifndef UCOLEITR_H michael@0: #define UCOLEITR_H michael@0: michael@0: #include "unicode/utypes.h" michael@0: michael@0: #if !UCONFIG_NO_COLLATION michael@0: michael@0: /** michael@0: * This indicates an error has occured during processing or if no more CEs is michael@0: * to be returned. michael@0: * @stable ICU 2.0 michael@0: */ michael@0: #define UCOL_NULLORDER ((int32_t)0xFFFFFFFF) michael@0: michael@0: #ifndef U_HIDE_INTERNAL_API michael@0: /** michael@0: * This indicates an error has occured during processing or there are no more CEs michael@0: * to be returned. michael@0: * michael@0: * @internal michael@0: */ michael@0: #define UCOL_PROCESSED_NULLORDER ((int64_t)U_INT64_MAX) michael@0: #endif /* U_HIDE_INTERNAL_API */ michael@0: michael@0: #include "unicode/ucol.h" michael@0: michael@0: /** michael@0: * The UCollationElements struct. michael@0: * For usage in C programs. michael@0: * @stable ICU 2.0 michael@0: */ michael@0: typedef struct UCollationElements UCollationElements; michael@0: michael@0: /** michael@0: * \file michael@0: * \brief C API: UCollationElements michael@0: * michael@0: * The UCollationElements API is used as an iterator to walk through each michael@0: * character of an international string. Use the iterator to return the michael@0: * ordering priority of the positioned character. The ordering priority of a michael@0: * character, which we refer to as a key, defines how a character is collated michael@0: * in the given collation object. michael@0: * For example, consider the following in Spanish: michael@0: *
michael@0:  * .       "ca" -> the first key is key('c') and second key is key('a').
michael@0:  * .       "cha" -> the first key is key('ch') and second key is key('a').
michael@0:  * 
michael@0: * And in German, michael@0: *
michael@0:  * .       "b"-> the first key is key('a'), the second key is key('e'), and
michael@0:  * .       the third key is key('b').
michael@0:  * 
michael@0: *

Example of the iterator usage: (without error checking) michael@0: *

michael@0:  * .  void CollationElementIterator_Example()
michael@0:  * .  {
michael@0:  * .      UChar *s;
michael@0:  * .      t_int32 order, primaryOrder;
michael@0:  * .      UCollationElements *c;
michael@0:  * .      UCollatorOld *coll;
michael@0:  * .      UErrorCode success = U_ZERO_ERROR;
michael@0:  * .      s=(UChar*)malloc(sizeof(UChar) * (strlen("This is a test")+1) );
michael@0:  * .      u_uastrcpy(s, "This is a test");
michael@0:  * .      coll = ucol_open(NULL, &success);
michael@0:  * .      c = ucol_openElements(coll, str, u_strlen(str), &status);
michael@0:  * .      order = ucol_next(c, &success);
michael@0:  * .      ucol_reset(c);
michael@0:  * .      order = ucol_prev(c, &success);
michael@0:  * .      free(s);
michael@0:  * .      ucol_close(coll);
michael@0:  * .      ucol_closeElements(c);
michael@0:  * .  }
michael@0:  * 
michael@0: *

michael@0: * ucol_next() returns the collation order of the next. michael@0: * ucol_prev() returns the collation order of the previous character. michael@0: * The Collation Element Iterator moves only in one direction between calls to michael@0: * ucol_reset. That is, ucol_next() and ucol_prev can not be inter-used. michael@0: * Whenever ucol_prev is to be called after ucol_next() or vice versa, michael@0: * ucol_reset has to be called first to reset the status, shifting pointers to michael@0: * either the end or the start of the string. Hence at the next call of michael@0: * ucol_prev or ucol_next, the first or last collation order will be returned. michael@0: * If a change of direction is done without a ucol_reset, the result is michael@0: * undefined. michael@0: * The result of a forward iterate (ucol_next) and reversed result of the michael@0: * backward iterate (ucol_prev) on the same string are equivalent, if michael@0: * collation orders with the value UCOL_IGNORABLE are ignored. michael@0: * Character based on the comparison level of the collator. A collation order michael@0: * consists of primary order, secondary order and tertiary order. The data michael@0: * type of the collation order is t_int32. michael@0: * michael@0: * @see UCollator michael@0: */ michael@0: michael@0: /** michael@0: * Open the collation elements for a string. michael@0: * michael@0: * @param coll The collator containing the desired collation rules. michael@0: * @param text The text to iterate over. michael@0: * @param textLength The number of characters in text, or -1 if null-terminated michael@0: * @param status A pointer to an UErrorCode to receive any errors. michael@0: * @return a struct containing collation element information michael@0: * @stable ICU 2.0 michael@0: */ michael@0: U_STABLE UCollationElements* U_EXPORT2 michael@0: ucol_openElements(const UCollator *coll, michael@0: const UChar *text, michael@0: int32_t textLength, michael@0: UErrorCode *status); michael@0: michael@0: michael@0: /** michael@0: * get a hash code for a key... Not very useful! michael@0: * @param key the given key. michael@0: * @param length the size of the key array. michael@0: * @return the hash code. michael@0: * @stable ICU 2.0 michael@0: */ michael@0: U_STABLE int32_t U_EXPORT2 michael@0: ucol_keyHashCode(const uint8_t* key, int32_t length); michael@0: michael@0: /** michael@0: * Close a UCollationElements. michael@0: * Once closed, a UCollationElements may no longer be used. michael@0: * @param elems The UCollationElements to close. michael@0: * @stable ICU 2.0 michael@0: */ michael@0: U_STABLE void U_EXPORT2 michael@0: ucol_closeElements(UCollationElements *elems); michael@0: michael@0: /** michael@0: * Reset the collation elements to their initial state. michael@0: * This will move the 'cursor' to the beginning of the text. michael@0: * Property settings for collation will be reset to the current status. michael@0: * @param elems The UCollationElements to reset. michael@0: * @see ucol_next michael@0: * @see ucol_previous michael@0: * @stable ICU 2.0 michael@0: */ michael@0: U_STABLE void U_EXPORT2 michael@0: ucol_reset(UCollationElements *elems); michael@0: michael@0: #ifndef U_HIDE_INTERNAL_API michael@0: /** michael@0: * Set the collation elements to use implicit ordering for Han michael@0: * even if they've been tailored. This will also force Hangul michael@0: * syllables to be ordered by decomposing them to their component michael@0: * Jamo. michael@0: * michael@0: * @param elems The UCollationElements containing the text. michael@0: * @param status A pointer to a UErrorCode to reveive any errors. michael@0: * michael@0: * @internal michael@0: */ michael@0: U_INTERNAL void U_EXPORT2 michael@0: ucol_forceHanImplicit(UCollationElements *elems, UErrorCode *status); michael@0: #endif /* U_HIDE_INTERNAL_API */ michael@0: michael@0: /** michael@0: * Get the ordering priority of the next collation element in the text. michael@0: * A single character may contain more than one collation element. michael@0: * @param elems The UCollationElements containing the text. michael@0: * @param status A pointer to an UErrorCode to receive any errors. michael@0: * @return The next collation elements ordering, otherwise returns NULLORDER michael@0: * if an error has occured or if the end of string has been reached michael@0: * @stable ICU 2.0 michael@0: */ michael@0: U_STABLE int32_t U_EXPORT2 michael@0: ucol_next(UCollationElements *elems, UErrorCode *status); michael@0: michael@0: /** michael@0: * Get the ordering priority of the previous collation element in the text. michael@0: * A single character may contain more than one collation element. michael@0: * Note that internally a stack is used to store buffered collation elements. michael@0: * It is very rare that the stack will overflow, however if such a case is michael@0: * encountered, the problem can be solved by increasing the size michael@0: * UCOL_EXPAND_CE_BUFFER_SIZE in ucol_imp.h. michael@0: * @param elems The UCollationElements containing the text. michael@0: * @param status A pointer to an UErrorCode to receive any errors. Noteably michael@0: * a U_BUFFER_OVERFLOW_ERROR is returned if the internal stack michael@0: * buffer has been exhausted. michael@0: * @return The previous collation elements ordering, otherwise returns michael@0: * NULLORDER if an error has occured or if the start of string has michael@0: * been reached. michael@0: * @stable ICU 2.0 michael@0: */ michael@0: U_STABLE int32_t U_EXPORT2 michael@0: ucol_previous(UCollationElements *elems, UErrorCode *status); michael@0: michael@0: #ifndef U_HIDE_INTERNAL_API michael@0: /** michael@0: * Get the processed ordering priority of the next collation element in the text. michael@0: * A single character may contain more than one collation element. michael@0: * michael@0: * @param elems The UCollationElements containing the text. michael@0: * @param ixLow a pointer to an int32_t to receive the iterator index before fetching the CE. michael@0: * @param ixHigh a pointer to an int32_t to receive the iterator index after fetching the CE. michael@0: * @param status A pointer to an UErrorCode to receive any errors. michael@0: * @return The next collation elements ordering, otherwise returns UCOL_PROCESSED_NULLORDER michael@0: * if an error has occured or if the end of string has been reached michael@0: * michael@0: * @internal michael@0: */ michael@0: U_INTERNAL int64_t U_EXPORT2 michael@0: ucol_nextProcessed(UCollationElements *elems, int32_t *ixLow, int32_t *ixHigh, UErrorCode *status); michael@0: michael@0: /** michael@0: * Get the processed ordering priority of the previous collation element in the text. michael@0: * A single character may contain more than one collation element. michael@0: * Note that internally a stack is used to store buffered collation elements. michael@0: * It is very rare that the stack will overflow, however if such a case is michael@0: * encountered, the problem can be solved by increasing the size michael@0: * UCOL_EXPAND_CE_BUFFER_SIZE in ucol_imp.h. michael@0: * michael@0: * @param elems The UCollationElements containing the text. michael@0: * @param ixLow A pointer to an int32_t to receive the iterator index after fetching the CE michael@0: * @param ixHigh A pointer to an int32_t to receiver the iterator index before fetching the CE michael@0: * @param status A pointer to an UErrorCode to receive any errors. Noteably michael@0: * a U_BUFFER_OVERFLOW_ERROR is returned if the internal stack michael@0: * buffer has been exhausted. michael@0: * @return The previous collation elements ordering, otherwise returns michael@0: * UCOL_PROCESSED_NULLORDER if an error has occured or if the start of michael@0: * string has been reached. michael@0: * michael@0: * @internal michael@0: */ michael@0: U_INTERNAL int64_t U_EXPORT2 michael@0: ucol_previousProcessed(UCollationElements *elems, int32_t *ixLow, int32_t *ixHigh, UErrorCode *status); michael@0: #endif /* U_HIDE_INTERNAL_API */ michael@0: michael@0: /** michael@0: * Get the maximum length of any expansion sequences that end with the michael@0: * specified comparison order. michael@0: * This is useful for .... ? michael@0: * @param elems The UCollationElements containing the text. michael@0: * @param order A collation order returned by previous or next. michael@0: * @return maximum size of the expansion sequences ending with the collation michael@0: * element or 1 if collation element does not occur at the end of any michael@0: * expansion sequence michael@0: * @stable ICU 2.0 michael@0: */ michael@0: U_STABLE int32_t U_EXPORT2 michael@0: ucol_getMaxExpansion(const UCollationElements *elems, int32_t order); michael@0: michael@0: /** michael@0: * Set the text containing the collation elements. michael@0: * Property settings for collation will remain the same. michael@0: * In order to reset the iterator to the current collation property settings, michael@0: * the API reset() has to be called. michael@0: * @param elems The UCollationElements to set. michael@0: * @param text The source text containing the collation elements. michael@0: * @param textLength The length of text, or -1 if null-terminated. michael@0: * @param status A pointer to an UErrorCode to receive any errors. michael@0: * @see ucol_getText michael@0: * @stable ICU 2.0 michael@0: */ michael@0: U_STABLE void U_EXPORT2 michael@0: ucol_setText( UCollationElements *elems, michael@0: const UChar *text, michael@0: int32_t textLength, michael@0: UErrorCode *status); michael@0: michael@0: /** michael@0: * Get the offset of the current source character. michael@0: * This is an offset into the text of the character containing the current michael@0: * collation elements. michael@0: * @param elems The UCollationElements to query. michael@0: * @return The offset of the current source character. michael@0: * @see ucol_setOffset michael@0: * @stable ICU 2.0 michael@0: */ michael@0: U_STABLE int32_t U_EXPORT2 michael@0: ucol_getOffset(const UCollationElements *elems); michael@0: michael@0: /** michael@0: * Set the offset of the current source character. michael@0: * This is an offset into the text of the character to be processed. michael@0: * Property settings for collation will remain the same. michael@0: * In order to reset the iterator to the current collation property settings, michael@0: * the API reset() has to be called. michael@0: * @param elems The UCollationElements to set. michael@0: * @param offset The desired character offset. michael@0: * @param status A pointer to an UErrorCode to receive any errors. michael@0: * @see ucol_getOffset michael@0: * @stable ICU 2.0 michael@0: */ michael@0: U_STABLE void U_EXPORT2 michael@0: ucol_setOffset(UCollationElements *elems, michael@0: int32_t offset, michael@0: UErrorCode *status); michael@0: michael@0: /** michael@0: * Get the primary order of a collation order. michael@0: * @param order the collation order michael@0: * @return the primary order of a collation order. michael@0: * @stable ICU 2.6 michael@0: */ michael@0: U_STABLE int32_t U_EXPORT2 michael@0: ucol_primaryOrder (int32_t order); michael@0: michael@0: /** michael@0: * Get the secondary order of a collation order. michael@0: * @param order the collation order michael@0: * @return the secondary order of a collation order. michael@0: * @stable ICU 2.6 michael@0: */ michael@0: U_STABLE int32_t U_EXPORT2 michael@0: ucol_secondaryOrder (int32_t order); michael@0: michael@0: /** michael@0: * Get the tertiary order of a collation order. michael@0: * @param order the collation order michael@0: * @return the tertiary order of a collation order. michael@0: * @stable ICU 2.6 michael@0: */ michael@0: U_STABLE int32_t U_EXPORT2 michael@0: ucol_tertiaryOrder (int32_t order); michael@0: michael@0: #endif /* #if !UCONFIG_NO_COLLATION */ michael@0: michael@0: #endif