michael@0: /* michael@0: ****************************************************************************** michael@0: * Copyright (C) 1997-2013, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: ****************************************************************************** michael@0: */ michael@0: michael@0: /** michael@0: * \file michael@0: * \brief C++ API: Collation Element Iterator. michael@0: */ michael@0: michael@0: /** michael@0: * File coleitr.h michael@0: * michael@0: * michael@0: * michael@0: * Created by: Helena Shih michael@0: * michael@0: * Modification History: michael@0: * michael@0: * Date Name Description michael@0: * michael@0: * 8/18/97 helena Added internal API documentation. michael@0: * 08/03/98 erm Synched with 1.2 version CollationElementIterator.java michael@0: * 12/10/99 aliu Ported Thai collation support from Java. michael@0: * 01/25/01 swquek Modified into a C++ wrapper calling C APIs (ucoliter.h) michael@0: * 02/19/01 swquek Removed CollationElementsIterator() since it is michael@0: * private constructor and no calls are made to it michael@0: */ michael@0: michael@0: #ifndef COLEITR_H michael@0: #define COLEITR_H michael@0: michael@0: #include "unicode/utypes.h" michael@0: michael@0: michael@0: #if !UCONFIG_NO_COLLATION michael@0: michael@0: #include "unicode/uobject.h" michael@0: #include "unicode/tblcoll.h" michael@0: #include "unicode/ucoleitr.h" michael@0: michael@0: /** michael@0: * The UCollationElements struct. michael@0: * For usage in C programs. michael@0: * @stable ICU 2.0 michael@0: */ michael@0: typedef struct UCollationElements UCollationElements; michael@0: michael@0: U_NAMESPACE_BEGIN michael@0: michael@0: /** michael@0: * The CollationElementIterator class is used as an iterator to walk through michael@0: * each character of an international string. Use the iterator to return the michael@0: * ordering priority of the positioned character. The ordering priority of a michael@0: * character, which we refer to as a key, defines how a character is collated in michael@0: * the given collation object. michael@0: * For example, consider the following in Spanish: michael@0: *
michael@0: * "ca" -> the first key is key('c') and second key is key('a'). michael@0: * "cha" -> the first key is key('ch') and second key is key('a').michael@0: * And in German, michael@0: *
\htmlonly "æb"-> the first key is key('a'), the second key is key('e'), and michael@0: * the third key is key('b'). \endhtmlonlymichael@0: * The key of a character, is an integer composed of primary order(short), michael@0: * secondary order(char), and tertiary order(char). Java strictly defines the michael@0: * size and signedness of its primitive data types. Therefore, the static michael@0: * functions primaryOrder(), secondaryOrder(), and tertiaryOrder() return michael@0: * int32_t to ensure the correctness of the key value. michael@0: *
Example of the iterator usage: (without error checking) michael@0: *
michael@0: * \code michael@0: * void CollationElementIterator_Example() michael@0: * { michael@0: * UnicodeString str = "This is a test"; michael@0: * UErrorCode success = U_ZERO_ERROR; michael@0: * RuleBasedCollator* rbc = michael@0: * (RuleBasedCollator*) RuleBasedCollator::createInstance(success); michael@0: * CollationElementIterator* c = michael@0: * rbc->createCollationElementIterator( str ); michael@0: * int32_t order = c->next(success); michael@0: * c->reset(); michael@0: * order = c->previous(success); michael@0: * delete c; michael@0: * delete rbc; michael@0: * } michael@0: * \endcode michael@0: *michael@0: *
michael@0: * The method next() returns the collation order of the next character based on michael@0: * the comparison level of the collator. The method previous() returns the michael@0: * collation order of the previous character based on the comparison level of michael@0: * the collator. The Collation Element Iterator moves only in one direction michael@0: * between calls to reset(), setOffset(), or setText(). That is, next() michael@0: * and previous() can not be inter-used. Whenever previous() is to be called after michael@0: * next() or vice versa, reset(), setOffset() or setText() has to be called first michael@0: * to reset the status, shifting pointers to either the end or the start of michael@0: * the string (reset() or setText()), or the specified position (setOffset()). michael@0: * Hence at the next call of next() or previous(), the first or last collation order, michael@0: * or collation order at the spefcifieid position will be returned. If a change of michael@0: * direction is done without one of these calls, the result is undefined. michael@0: *
michael@0: * The result of a forward iterate (next()) and reversed result of the backward michael@0: * iterate (previous()) on the same string are equivalent, if collation orders michael@0: * with the value UCOL_IGNORABLE are ignored. michael@0: * Character based on the comparison level of the collator. A collation order michael@0: * consists of primary order, secondary order and tertiary order. The data michael@0: * type of the collation order is t_int32. michael@0: * michael@0: * Note, CollationElementIterator should not be subclassed. michael@0: * @see Collator michael@0: * @see RuleBasedCollator michael@0: * @version 1.8 Jan 16 2001 michael@0: */ michael@0: class U_I18N_API CollationElementIterator : public UObject { michael@0: public: michael@0: michael@0: // CollationElementIterator public data member ------------------------------ michael@0: michael@0: enum { michael@0: /** michael@0: * NULLORDER indicates that an error has occured while processing michael@0: * @stable ICU 2.0 michael@0: */ michael@0: NULLORDER = (int32_t)0xffffffff michael@0: }; michael@0: michael@0: // CollationElementIterator public constructor/destructor ------------------- michael@0: michael@0: /** michael@0: * Copy constructor. michael@0: * michael@0: * @param other the object to be copied from michael@0: * @stable ICU 2.0 michael@0: */ michael@0: CollationElementIterator(const CollationElementIterator& other); michael@0: michael@0: /** michael@0: * Destructor michael@0: * @stable ICU 2.0 michael@0: */ michael@0: virtual ~CollationElementIterator(); michael@0: michael@0: // CollationElementIterator public methods ---------------------------------- michael@0: michael@0: /** michael@0: * Returns true if "other" is the same as "this" michael@0: * michael@0: * @param other the object to be compared michael@0: * @return true if "other" is the same as "this" michael@0: * @stable ICU 2.0 michael@0: */ michael@0: UBool operator==(const CollationElementIterator& other) const; michael@0: michael@0: /** michael@0: * Returns true if "other" is not the same as "this". michael@0: * michael@0: * @param other the object to be compared michael@0: * @return true if "other" is not the same as "this" michael@0: * @stable ICU 2.0 michael@0: */ michael@0: UBool operator!=(const CollationElementIterator& other) const; michael@0: michael@0: /** michael@0: * Resets the cursor to the beginning of the string. michael@0: * @stable ICU 2.0 michael@0: */ michael@0: void reset(void); michael@0: michael@0: /** michael@0: * Gets the ordering priority of the next character in the string. michael@0: * @param status the error code status. michael@0: * @return the next character's ordering. otherwise returns NULLORDER if an michael@0: * error has occured or if the end of string has been reached michael@0: * @stable ICU 2.0 michael@0: */ michael@0: int32_t next(UErrorCode& status); michael@0: michael@0: /** michael@0: * Get the ordering priority of the previous collation element in the string. michael@0: * @param status the error code status. michael@0: * @return the previous element's ordering. otherwise returns NULLORDER if an michael@0: * error has occured or if the start of string has been reached michael@0: * @stable ICU 2.0 michael@0: */ michael@0: int32_t previous(UErrorCode& status); michael@0: michael@0: /** michael@0: * Gets the primary order of a collation order. michael@0: * @param order the collation order michael@0: * @return the primary order of a collation order. michael@0: * @stable ICU 2.0 michael@0: */ michael@0: static inline int32_t primaryOrder(int32_t order); michael@0: michael@0: /** michael@0: * Gets the secondary order of a collation order. michael@0: * @param order the collation order michael@0: * @return the secondary order of a collation order. michael@0: * @stable ICU 2.0 michael@0: */ michael@0: static inline int32_t secondaryOrder(int32_t order); michael@0: michael@0: /** michael@0: * Gets the tertiary order of a collation order. michael@0: * @param order the collation order michael@0: * @return the tertiary order of a collation order. michael@0: * @stable ICU 2.0 michael@0: */ michael@0: static inline int32_t tertiaryOrder(int32_t order); michael@0: michael@0: /** michael@0: * Return the maximum length of any expansion sequences that end with the michael@0: * specified comparison order. michael@0: * @param order a collation order returned by previous or next. michael@0: * @return maximum size of the expansion sequences ending with the collation michael@0: * element or 1 if collation element does not occur at the end of any michael@0: * expansion sequence michael@0: * @stable ICU 2.0 michael@0: */ michael@0: int32_t getMaxExpansion(int32_t order) const; michael@0: michael@0: /** michael@0: * Gets the comparison order in the desired strength. Ignore the other michael@0: * differences. michael@0: * @param order The order value michael@0: * @stable ICU 2.0 michael@0: */ michael@0: int32_t strengthOrder(int32_t order) const; michael@0: michael@0: /** michael@0: * Sets the source string. michael@0: * @param str the source string. michael@0: * @param status the error code status. michael@0: * @stable ICU 2.0 michael@0: */ michael@0: void setText(const UnicodeString& str, UErrorCode& status); michael@0: michael@0: /** michael@0: * Sets the source string. michael@0: * @param str the source character iterator. michael@0: * @param status the error code status. michael@0: * @stable ICU 2.0 michael@0: */ michael@0: void setText(CharacterIterator& str, UErrorCode& status); michael@0: michael@0: /** michael@0: * Checks if a comparison order is ignorable. michael@0: * @param order the collation order. michael@0: * @return TRUE if a character is ignorable, FALSE otherwise. michael@0: * @stable ICU 2.0 michael@0: */ michael@0: static inline UBool isIgnorable(int32_t order); michael@0: michael@0: /** michael@0: * Gets the offset of the currently processed character in the source string. michael@0: * @return the offset of the character. michael@0: * @stable ICU 2.0 michael@0: */ michael@0: int32_t getOffset(void) const; michael@0: michael@0: /** michael@0: * Sets the offset of the currently processed character in the source string. michael@0: * @param newOffset the new offset. michael@0: * @param status the error code status. michael@0: * @return the offset of the character. michael@0: * @stable ICU 2.0 michael@0: */ michael@0: void setOffset(int32_t newOffset, UErrorCode& status); michael@0: michael@0: /** michael@0: * ICU "poor man's RTTI", returns a UClassID for the actual class. michael@0: * michael@0: * @stable ICU 2.2 michael@0: */ michael@0: virtual UClassID getDynamicClassID() const; michael@0: michael@0: /** michael@0: * ICU "poor man's RTTI", returns a UClassID for this class. michael@0: * michael@0: * @stable ICU 2.2 michael@0: */ michael@0: static UClassID U_EXPORT2 getStaticClassID(); michael@0: michael@0: private: michael@0: friend class RuleBasedCollator; michael@0: michael@0: /** michael@0: * CollationElementIterator constructor. This takes the source string and the michael@0: * collation object. The cursor will walk thru the source string based on the michael@0: * predefined collation rules. If the source string is empty, NULLORDER will michael@0: * be returned on the calls to next(). michael@0: * @param sourceText the source string. michael@0: * @param order the collation object. michael@0: * @param status the error code status. michael@0: */ michael@0: CollationElementIterator(const UnicodeString& sourceText, michael@0: const RuleBasedCollator* order, UErrorCode& status); michael@0: michael@0: /** michael@0: * CollationElementIterator constructor. This takes the source string and the michael@0: * collation object. The cursor will walk thru the source string based on the michael@0: * predefined collation rules. If the source string is empty, NULLORDER will michael@0: * be returned on the calls to next(). michael@0: * @param sourceText the source string. michael@0: * @param order the collation object. michael@0: * @param status the error code status. michael@0: */ michael@0: CollationElementIterator(const CharacterIterator& sourceText, michael@0: const RuleBasedCollator* order, UErrorCode& status); michael@0: michael@0: /** michael@0: * Assignment operator michael@0: * michael@0: * @param other the object to be copied michael@0: */ michael@0: const CollationElementIterator& michael@0: operator=(const CollationElementIterator& other); michael@0: michael@0: CollationElementIterator(); // default constructor not implemented michael@0: michael@0: // CollationElementIterator private data members ---------------------------- michael@0: michael@0: /** michael@0: * Data wrapper for collation elements michael@0: */ michael@0: UCollationElements *m_data_; michael@0: michael@0: /** michael@0: * Indicates if m_data_ belongs to this object. michael@0: */ michael@0: UBool isDataOwned_; michael@0: }; michael@0: michael@0: // CollationElementIterator inline method defination -------------------------- michael@0: michael@0: /** michael@0: * Get the primary order of a collation order. michael@0: * @param order the collation order michael@0: * @return the primary order of a collation order. michael@0: */ michael@0: inline int32_t CollationElementIterator::primaryOrder(int32_t order) michael@0: { michael@0: order &= RuleBasedCollator::PRIMARYORDERMASK; michael@0: return (order >> RuleBasedCollator::PRIMARYORDERSHIFT); michael@0: } michael@0: michael@0: /** michael@0: * Get the secondary order of a collation order. michael@0: * @param order the collation order michael@0: * @return the secondary order of a collation order. michael@0: */ michael@0: inline int32_t CollationElementIterator::secondaryOrder(int32_t order) michael@0: { michael@0: order = order & RuleBasedCollator::SECONDARYORDERMASK; michael@0: return (order >> RuleBasedCollator::SECONDARYORDERSHIFT); michael@0: } michael@0: michael@0: /** michael@0: * Get the tertiary order of a collation order. michael@0: * @param order the collation order michael@0: * @return the tertiary order of a collation order. michael@0: */ michael@0: inline int32_t CollationElementIterator::tertiaryOrder(int32_t order) michael@0: { michael@0: return (order &= RuleBasedCollator::TERTIARYORDERMASK); michael@0: } michael@0: michael@0: inline int32_t CollationElementIterator::getMaxExpansion(int32_t order) const michael@0: { michael@0: return ucol_getMaxExpansion(m_data_, (uint32_t)order); michael@0: } michael@0: michael@0: inline UBool CollationElementIterator::isIgnorable(int32_t order) michael@0: { michael@0: return (primaryOrder(order) == RuleBasedCollator::PRIMIGNORABLE); michael@0: } michael@0: michael@0: U_NAMESPACE_END michael@0: michael@0: #endif /* #if !UCONFIG_NO_COLLATION */ michael@0: michael@0: #endif