1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/unicode/usetiter.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,318 @@ 1.4 +/* 1.5 +********************************************************************** 1.6 +* Copyright (c) 2002-2008, International Business Machines 1.7 +* Corporation and others. All Rights Reserved. 1.8 +********************************************************************** 1.9 +*/ 1.10 +#ifndef USETITER_H 1.11 +#define USETITER_H 1.12 + 1.13 +#include "unicode/utypes.h" 1.14 +#include "unicode/uobject.h" 1.15 +#include "unicode/unistr.h" 1.16 + 1.17 +/** 1.18 + * \file 1.19 + * \brief C++ API: UnicodeSetIterator iterates over the contents of a UnicodeSet. 1.20 + */ 1.21 + 1.22 +U_NAMESPACE_BEGIN 1.23 + 1.24 +class UnicodeSet; 1.25 +class UnicodeString; 1.26 + 1.27 +/** 1.28 + * 1.29 + * UnicodeSetIterator iterates over the contents of a UnicodeSet. It 1.30 + * iterates over either code points or code point ranges. After all 1.31 + * code points or ranges have been returned, it returns the 1.32 + * multicharacter strings of the UnicodeSet, if any. 1.33 + * 1.34 + * This class is not intended to be subclassed. Consider any fields 1.35 + * or methods declared as "protected" to be private. The use of 1.36 + * protected in this class is an artifact of history. 1.37 + * 1.38 + * <p>To iterate over code points and strings, use a loop like this: 1.39 + * <pre> 1.40 + * UnicodeSetIterator it(set); 1.41 + * while (it.next()) { 1.42 + * processItem(it.getString()); 1.43 + * } 1.44 + * </pre> 1.45 + * <p>Each item in the set is accessed as a string. Set elements 1.46 + * consisting of single code points are returned as strings containing 1.47 + * just the one code point. 1.48 + * 1.49 + * <p>To iterate over code point ranges, instead of individual code points, 1.50 + * use a loop like this: 1.51 + * <pre> 1.52 + * UnicodeSetIterator it(set); 1.53 + * while (it.nextRange()) { 1.54 + * if (it.isString()) { 1.55 + * processString(it.getString()); 1.56 + * } else { 1.57 + * processCodepointRange(it.getCodepoint(), it.getCodepointEnd()); 1.58 + * } 1.59 + * } 1.60 + * </pre> 1.61 + * @author M. Davis 1.62 + * @stable ICU 2.4 1.63 + */ 1.64 +class U_COMMON_API UnicodeSetIterator : public UObject { 1.65 + 1.66 + protected: 1.67 + 1.68 + /** 1.69 + * Value of <tt>codepoint</tt> if the iterator points to a string. 1.70 + * If <tt>codepoint == IS_STRING</tt>, then examine 1.71 + * <tt>string</tt> for the current iteration result. 1.72 + * @stable ICU 2.4 1.73 + */ 1.74 + enum { IS_STRING = -1 }; 1.75 + 1.76 + /** 1.77 + * Current code point, or the special value <tt>IS_STRING</tt>, if 1.78 + * the iterator points to a string. 1.79 + * @stable ICU 2.4 1.80 + */ 1.81 + UChar32 codepoint; 1.82 + 1.83 + /** 1.84 + * When iterating over ranges using <tt>nextRange()</tt>, 1.85 + * <tt>codepointEnd</tt> contains the inclusive end of the 1.86 + * iteration range, if <tt>codepoint != IS_STRING</tt>. If 1.87 + * iterating over code points using <tt>next()</tt>, or if 1.88 + * <tt>codepoint == IS_STRING</tt>, then the value of 1.89 + * <tt>codepointEnd</tt> is undefined. 1.90 + * @stable ICU 2.4 1.91 + */ 1.92 + UChar32 codepointEnd; 1.93 + 1.94 + /** 1.95 + * If <tt>codepoint == IS_STRING</tt>, then <tt>string</tt> points 1.96 + * to the current string. If <tt>codepoint != IS_STRING</tt>, the 1.97 + * value of <tt>string</tt> is undefined. 1.98 + * @stable ICU 2.4 1.99 + */ 1.100 + const UnicodeString* string; 1.101 + 1.102 + public: 1.103 + 1.104 + /** 1.105 + * Create an iterator over the given set. The iterator is valid 1.106 + * only so long as <tt>set</tt> is valid. 1.107 + * @param set set to iterate over 1.108 + * @stable ICU 2.4 1.109 + */ 1.110 + UnicodeSetIterator(const UnicodeSet& set); 1.111 + 1.112 + /** 1.113 + * Create an iterator over nothing. <tt>next()</tt> and 1.114 + * <tt>nextRange()</tt> return false. This is a convenience 1.115 + * constructor allowing the target to be set later. 1.116 + * @stable ICU 2.4 1.117 + */ 1.118 + UnicodeSetIterator(); 1.119 + 1.120 + /** 1.121 + * Destructor. 1.122 + * @stable ICU 2.4 1.123 + */ 1.124 + virtual ~UnicodeSetIterator(); 1.125 + 1.126 + /** 1.127 + * Returns true if the current element is a string. If so, the 1.128 + * caller can retrieve it with <tt>getString()</tt>. If this 1.129 + * method returns false, the current element is a code point or 1.130 + * code point range, depending on whether <tt>next()</tt> or 1.131 + * <tt>nextRange()</tt> was called. 1.132 + * Elements of types string and codepoint can both be retrieved 1.133 + * with the function <tt>getString()</tt>. 1.134 + * Elements of type codepoint can also be retrieved with 1.135 + * <tt>getCodepoint()</tt>. 1.136 + * For ranges, <tt>getCodepoint()</tt> returns the starting codepoint 1.137 + * of the range, and <tt>getCodepointEnd()</tt> returns the end 1.138 + * of the range. 1.139 + * @stable ICU 2.4 1.140 + */ 1.141 + inline UBool isString() const; 1.142 + 1.143 + /** 1.144 + * Returns the current code point, if <tt>isString()</tt> returned 1.145 + * false. Otherwise returns an undefined result. 1.146 + * @stable ICU 2.4 1.147 + */ 1.148 + inline UChar32 getCodepoint() const; 1.149 + 1.150 + /** 1.151 + * Returns the end of the current code point range, if 1.152 + * <tt>isString()</tt> returned false and <tt>nextRange()</tt> was 1.153 + * called. Otherwise returns an undefined result. 1.154 + * @stable ICU 2.4 1.155 + */ 1.156 + inline UChar32 getCodepointEnd() const; 1.157 + 1.158 + /** 1.159 + * Returns the current string, if <tt>isString()</tt> returned 1.160 + * true. If the current iteration item is a code point, a UnicodeString 1.161 + * containing that single code point is returned. 1.162 + * 1.163 + * Ownership of the returned string remains with the iterator. 1.164 + * The string is guaranteed to remain valid only until the iterator is 1.165 + * advanced to the next item, or until the iterator is deleted. 1.166 + * 1.167 + * @stable ICU 2.4 1.168 + */ 1.169 + const UnicodeString& getString(); 1.170 + 1.171 + /** 1.172 + * Advances the iteration position to the next element in the set, 1.173 + * which can be either a single code point or a string. 1.174 + * If there are no more elements in the set, return false. 1.175 + * 1.176 + * <p> 1.177 + * If <tt>isString() == TRUE</tt>, the value is a 1.178 + * string, otherwise the value is a 1.179 + * single code point. Elements of either type can be retrieved 1.180 + * with the function <tt>getString()</tt>, while elements of 1.181 + * consisting of a single code point can be retrieved with 1.182 + * <tt>getCodepoint()</tt> 1.183 + * 1.184 + * <p>The order of iteration is all code points in sorted order, 1.185 + * followed by all strings sorted order. Do not mix 1.186 + * calls to <tt>next()</tt> and <tt>nextRange()</tt> without 1.187 + * calling <tt>reset()</tt> between them. The results of doing so 1.188 + * are undefined. 1.189 + * 1.190 + * @return true if there was another element in the set. 1.191 + * @stable ICU 2.4 1.192 + */ 1.193 + UBool next(); 1.194 + 1.195 + /** 1.196 + * Returns the next element in the set, either a code point range 1.197 + * or a string. If there are no more elements in the set, return 1.198 + * false. If <tt>isString() == TRUE</tt>, the value is a 1.199 + * string and can be accessed with <tt>getString()</tt>. Otherwise the value is a 1.200 + * range of one or more code points from <tt>getCodepoint()</tt> to 1.201 + * <tt>getCodepointeEnd()</tt> inclusive. 1.202 + * 1.203 + * <p>The order of iteration is all code points ranges in sorted 1.204 + * order, followed by all strings sorted order. Ranges are 1.205 + * disjoint and non-contiguous. The value returned from <tt>getString()</tt> 1.206 + * is undefined unless <tt>isString() == TRUE</tt>. Do not mix calls to 1.207 + * <tt>next()</tt> and <tt>nextRange()</tt> without calling 1.208 + * <tt>reset()</tt> between them. The results of doing so are 1.209 + * undefined. 1.210 + * 1.211 + * @return true if there was another element in the set. 1.212 + * @stable ICU 2.4 1.213 + */ 1.214 + UBool nextRange(); 1.215 + 1.216 + /** 1.217 + * Sets this iterator to visit the elements of the given set and 1.218 + * resets it to the start of that set. The iterator is valid only 1.219 + * so long as <tt>set</tt> is valid. 1.220 + * @param set the set to iterate over. 1.221 + * @stable ICU 2.4 1.222 + */ 1.223 + void reset(const UnicodeSet& set); 1.224 + 1.225 + /** 1.226 + * Resets this iterator to the start of the set. 1.227 + * @stable ICU 2.4 1.228 + */ 1.229 + void reset(); 1.230 + 1.231 + /** 1.232 + * ICU "poor man's RTTI", returns a UClassID for this class. 1.233 + * 1.234 + * @stable ICU 2.4 1.235 + */ 1.236 + static UClassID U_EXPORT2 getStaticClassID(); 1.237 + 1.238 + /** 1.239 + * ICU "poor man's RTTI", returns a UClassID for the actual class. 1.240 + * 1.241 + * @stable ICU 2.4 1.242 + */ 1.243 + virtual UClassID getDynamicClassID() const; 1.244 + 1.245 + // ======================= PRIVATES =========================== 1.246 + 1.247 + protected: 1.248 + 1.249 + // endElement and nextElements are really UChar32's, but we keep 1.250 + // them as signed int32_t's so we can do comparisons with 1.251 + // endElement set to -1. Leave them as int32_t's. 1.252 + /** The set 1.253 + * @stable ICU 2.4 1.254 + */ 1.255 + const UnicodeSet* set; 1.256 + /** End range 1.257 + * @stable ICU 2.4 1.258 + */ 1.259 + int32_t endRange; 1.260 + /** Range 1.261 + * @stable ICU 2.4 1.262 + */ 1.263 + int32_t range; 1.264 + /** End element 1.265 + * @stable ICU 2.4 1.266 + */ 1.267 + int32_t endElement; 1.268 + /** Next element 1.269 + * @stable ICU 2.4 1.270 + */ 1.271 + int32_t nextElement; 1.272 + //UBool abbreviated; 1.273 + /** Next string 1.274 + * @stable ICU 2.4 1.275 + */ 1.276 + int32_t nextString; 1.277 + /** String count 1.278 + * @stable ICU 2.4 1.279 + */ 1.280 + int32_t stringCount; 1.281 + 1.282 + /** 1.283 + * Points to the string to use when the caller asks for a 1.284 + * string and the current iteration item is a code point, not a string. 1.285 + * @internal 1.286 + */ 1.287 + UnicodeString *cpString; 1.288 + 1.289 + /** Copy constructor. Disallowed. 1.290 + * @stable ICU 2.4 1.291 + */ 1.292 + UnicodeSetIterator(const UnicodeSetIterator&); // disallow 1.293 + 1.294 + /** Assignment operator. Disallowed. 1.295 + * @stable ICU 2.4 1.296 + */ 1.297 + UnicodeSetIterator& operator=(const UnicodeSetIterator&); // disallow 1.298 + 1.299 + /** Load range 1.300 + * @stable ICU 2.4 1.301 + */ 1.302 + virtual void loadRange(int32_t range); 1.303 + 1.304 +}; 1.305 + 1.306 +inline UBool UnicodeSetIterator::isString() const { 1.307 + return codepoint == (UChar32)IS_STRING; 1.308 +} 1.309 + 1.310 +inline UChar32 UnicodeSetIterator::getCodepoint() const { 1.311 + return codepoint; 1.312 +} 1.313 + 1.314 +inline UChar32 UnicodeSetIterator::getCodepointEnd() const { 1.315 + return codepointEnd; 1.316 +} 1.317 + 1.318 + 1.319 +U_NAMESPACE_END 1.320 + 1.321 +#endif