michael@0: /* michael@0: ********************************************************************** michael@0: * Copyright (c) 2002-2008, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: ********************************************************************** michael@0: */ michael@0: #ifndef USETITER_H michael@0: #define USETITER_H michael@0: michael@0: #include "unicode/utypes.h" michael@0: #include "unicode/uobject.h" michael@0: #include "unicode/unistr.h" michael@0: michael@0: /** michael@0: * \file michael@0: * \brief C++ API: UnicodeSetIterator iterates over the contents of a UnicodeSet. michael@0: */ michael@0: michael@0: U_NAMESPACE_BEGIN michael@0: michael@0: class UnicodeSet; michael@0: class UnicodeString; michael@0: michael@0: /** michael@0: * michael@0: * UnicodeSetIterator iterates over the contents of a UnicodeSet. It michael@0: * iterates over either code points or code point ranges. After all michael@0: * code points or ranges have been returned, it returns the michael@0: * multicharacter strings of the UnicodeSet, if any. michael@0: * michael@0: * This class is not intended to be subclassed. Consider any fields michael@0: * or methods declared as "protected" to be private. The use of michael@0: * protected in this class is an artifact of history. michael@0: * michael@0: *
To iterate over code points and strings, use a loop like this: michael@0: *
michael@0: * UnicodeSetIterator it(set); michael@0: * while (it.next()) { michael@0: * processItem(it.getString()); michael@0: * } michael@0: *michael@0: *
Each item in the set is accessed as a string. Set elements michael@0: * consisting of single code points are returned as strings containing michael@0: * just the one code point. michael@0: * michael@0: *
To iterate over code point ranges, instead of individual code points, michael@0: * use a loop like this: michael@0: *
michael@0: * UnicodeSetIterator it(set); michael@0: * while (it.nextRange()) { michael@0: * if (it.isString()) { michael@0: * processString(it.getString()); michael@0: * } else { michael@0: * processCodepointRange(it.getCodepoint(), it.getCodepointEnd()); michael@0: * } michael@0: * } michael@0: *michael@0: * @author M. Davis michael@0: * @stable ICU 2.4 michael@0: */ michael@0: class U_COMMON_API UnicodeSetIterator : public UObject { michael@0: michael@0: protected: michael@0: michael@0: /** michael@0: * Value of codepoint if the iterator points to a string. michael@0: * If codepoint == IS_STRING, then examine michael@0: * string for the current iteration result. michael@0: * @stable ICU 2.4 michael@0: */ michael@0: enum { IS_STRING = -1 }; michael@0: michael@0: /** michael@0: * Current code point, or the special value IS_STRING, if michael@0: * the iterator points to a string. michael@0: * @stable ICU 2.4 michael@0: */ michael@0: UChar32 codepoint; michael@0: michael@0: /** michael@0: * When iterating over ranges using nextRange(), michael@0: * codepointEnd contains the inclusive end of the michael@0: * iteration range, if codepoint != IS_STRING. If michael@0: * iterating over code points using next(), or if michael@0: * codepoint == IS_STRING, then the value of michael@0: * codepointEnd is undefined. michael@0: * @stable ICU 2.4 michael@0: */ michael@0: UChar32 codepointEnd; michael@0: michael@0: /** michael@0: * If codepoint == IS_STRING, then string points michael@0: * to the current string. If codepoint != IS_STRING, the michael@0: * value of string is undefined. michael@0: * @stable ICU 2.4 michael@0: */ michael@0: const UnicodeString* string; michael@0: michael@0: public: michael@0: michael@0: /** michael@0: * Create an iterator over the given set. The iterator is valid michael@0: * only so long as set is valid. michael@0: * @param set set to iterate over michael@0: * @stable ICU 2.4 michael@0: */ michael@0: UnicodeSetIterator(const UnicodeSet& set); michael@0: michael@0: /** michael@0: * Create an iterator over nothing. next() and michael@0: * nextRange() return false. This is a convenience michael@0: * constructor allowing the target to be set later. michael@0: * @stable ICU 2.4 michael@0: */ michael@0: UnicodeSetIterator(); michael@0: michael@0: /** michael@0: * Destructor. michael@0: * @stable ICU 2.4 michael@0: */ michael@0: virtual ~UnicodeSetIterator(); michael@0: michael@0: /** michael@0: * Returns true if the current element is a string. If so, the michael@0: * caller can retrieve it with getString(). If this michael@0: * method returns false, the current element is a code point or michael@0: * code point range, depending on whether next() or michael@0: * nextRange() was called. michael@0: * Elements of types string and codepoint can both be retrieved michael@0: * with the function getString(). michael@0: * Elements of type codepoint can also be retrieved with michael@0: * getCodepoint(). michael@0: * For ranges, getCodepoint() returns the starting codepoint michael@0: * of the range, and getCodepointEnd() returns the end michael@0: * of the range. michael@0: * @stable ICU 2.4 michael@0: */ michael@0: inline UBool isString() const; michael@0: michael@0: /** michael@0: * Returns the current code point, if isString() returned michael@0: * false. Otherwise returns an undefined result. michael@0: * @stable ICU 2.4 michael@0: */ michael@0: inline UChar32 getCodepoint() const; michael@0: michael@0: /** michael@0: * Returns the end of the current code point range, if michael@0: * isString() returned false and nextRange() was michael@0: * called. Otherwise returns an undefined result. michael@0: * @stable ICU 2.4 michael@0: */ michael@0: inline UChar32 getCodepointEnd() const; michael@0: michael@0: /** michael@0: * Returns the current string, if isString() returned michael@0: * true. If the current iteration item is a code point, a UnicodeString michael@0: * containing that single code point is returned. michael@0: * michael@0: * Ownership of the returned string remains with the iterator. michael@0: * The string is guaranteed to remain valid only until the iterator is michael@0: * advanced to the next item, or until the iterator is deleted. michael@0: * michael@0: * @stable ICU 2.4 michael@0: */ michael@0: const UnicodeString& getString(); michael@0: michael@0: /** michael@0: * Advances the iteration position to the next element in the set, michael@0: * which can be either a single code point or a string. michael@0: * If there are no more elements in the set, return false. michael@0: * michael@0: *
michael@0: * If isString() == TRUE, the value is a michael@0: * string, otherwise the value is a michael@0: * single code point. Elements of either type can be retrieved michael@0: * with the function getString(), while elements of michael@0: * consisting of a single code point can be retrieved with michael@0: * getCodepoint() michael@0: * michael@0: *
The order of iteration is all code points in sorted order, michael@0: * followed by all strings sorted order. Do not mix michael@0: * calls to next() and nextRange() without michael@0: * calling reset() between them. The results of doing so michael@0: * are undefined. michael@0: * michael@0: * @return true if there was another element in the set. michael@0: * @stable ICU 2.4 michael@0: */ michael@0: UBool next(); michael@0: michael@0: /** michael@0: * Returns the next element in the set, either a code point range michael@0: * or a string. If there are no more elements in the set, return michael@0: * false. If isString() == TRUE, the value is a michael@0: * string and can be accessed with getString(). Otherwise the value is a michael@0: * range of one or more code points from getCodepoint() to michael@0: * getCodepointeEnd() inclusive. michael@0: * michael@0: *
The order of iteration is all code points ranges in sorted michael@0: * order, followed by all strings sorted order. Ranges are michael@0: * disjoint and non-contiguous. The value returned from getString() michael@0: * is undefined unless isString() == TRUE. Do not mix calls to michael@0: * next() and nextRange() without calling michael@0: * reset() between them. The results of doing so are michael@0: * undefined. michael@0: * michael@0: * @return true if there was another element in the set. michael@0: * @stable ICU 2.4 michael@0: */ michael@0: UBool nextRange(); michael@0: michael@0: /** michael@0: * Sets this iterator to visit the elements of the given set and michael@0: * resets it to the start of that set. The iterator is valid only michael@0: * so long as set is valid. michael@0: * @param set the set to iterate over. michael@0: * @stable ICU 2.4 michael@0: */ michael@0: void reset(const UnicodeSet& set); michael@0: michael@0: /** michael@0: * Resets this iterator to the start of the set. michael@0: * @stable ICU 2.4 michael@0: */ michael@0: void reset(); michael@0: michael@0: /** michael@0: * ICU "poor man's RTTI", returns a UClassID for this class. michael@0: * michael@0: * @stable ICU 2.4 michael@0: */ michael@0: static UClassID U_EXPORT2 getStaticClassID(); michael@0: michael@0: /** michael@0: * ICU "poor man's RTTI", returns a UClassID for the actual class. michael@0: * michael@0: * @stable ICU 2.4 michael@0: */ michael@0: virtual UClassID getDynamicClassID() const; michael@0: michael@0: // ======================= PRIVATES =========================== michael@0: michael@0: protected: michael@0: michael@0: // endElement and nextElements are really UChar32's, but we keep michael@0: // them as signed int32_t's so we can do comparisons with michael@0: // endElement set to -1. Leave them as int32_t's. michael@0: /** The set michael@0: * @stable ICU 2.4 michael@0: */ michael@0: const UnicodeSet* set; michael@0: /** End range michael@0: * @stable ICU 2.4 michael@0: */ michael@0: int32_t endRange; michael@0: /** Range michael@0: * @stable ICU 2.4 michael@0: */ michael@0: int32_t range; michael@0: /** End element michael@0: * @stable ICU 2.4 michael@0: */ michael@0: int32_t endElement; michael@0: /** Next element michael@0: * @stable ICU 2.4 michael@0: */ michael@0: int32_t nextElement; michael@0: //UBool abbreviated; michael@0: /** Next string michael@0: * @stable ICU 2.4 michael@0: */ michael@0: int32_t nextString; michael@0: /** String count michael@0: * @stable ICU 2.4 michael@0: */ michael@0: int32_t stringCount; michael@0: michael@0: /** michael@0: * Points to the string to use when the caller asks for a michael@0: * string and the current iteration item is a code point, not a string. michael@0: * @internal michael@0: */ michael@0: UnicodeString *cpString; michael@0: michael@0: /** Copy constructor. Disallowed. michael@0: * @stable ICU 2.4 michael@0: */ michael@0: UnicodeSetIterator(const UnicodeSetIterator&); // disallow michael@0: michael@0: /** Assignment operator. Disallowed. michael@0: * @stable ICU 2.4 michael@0: */ michael@0: UnicodeSetIterator& operator=(const UnicodeSetIterator&); // disallow michael@0: michael@0: /** Load range michael@0: * @stable ICU 2.4 michael@0: */ michael@0: virtual void loadRange(int32_t range); michael@0: michael@0: }; michael@0: michael@0: inline UBool UnicodeSetIterator::isString() const { michael@0: return codepoint == (UChar32)IS_STRING; michael@0: } michael@0: michael@0: inline UChar32 UnicodeSetIterator::getCodepoint() const { michael@0: return codepoint; michael@0: } michael@0: michael@0: inline UChar32 UnicodeSetIterator::getCodepointEnd() const { michael@0: return codepointEnd; michael@0: } michael@0: michael@0: michael@0: U_NAMESPACE_END michael@0: michael@0: #endif