intl/icu/source/common/unicode/usetiter.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/common/unicode/usetiter.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,318 @@
     1.4 +/*
     1.5 +**********************************************************************
     1.6 +* Copyright (c) 2002-2008, International Business Machines
     1.7 +* Corporation and others.  All Rights Reserved.
     1.8 +**********************************************************************
     1.9 +*/
    1.10 +#ifndef USETITER_H
    1.11 +#define USETITER_H
    1.12 +
    1.13 +#include "unicode/utypes.h"
    1.14 +#include "unicode/uobject.h"
    1.15 +#include "unicode/unistr.h"
    1.16 +
    1.17 +/**
    1.18 + * \file 
    1.19 + * \brief C++ API: UnicodeSetIterator iterates over the contents of a UnicodeSet.
    1.20 + */
    1.21 +
    1.22 +U_NAMESPACE_BEGIN
    1.23 +
    1.24 +class UnicodeSet;
    1.25 +class UnicodeString;
    1.26 +
    1.27 +/**
    1.28 + *
    1.29 + * UnicodeSetIterator iterates over the contents of a UnicodeSet.  It
    1.30 + * iterates over either code points or code point ranges.  After all
    1.31 + * code points or ranges have been returned, it returns the
    1.32 + * multicharacter strings of the UnicodeSet, if any.
    1.33 + *
    1.34 + * This class is not intended to be subclassed.  Consider any fields
    1.35 + *  or methods declared as "protected" to be private.  The use of
    1.36 + *  protected in this class is an artifact of history.
    1.37 + *
    1.38 + * <p>To iterate over code points and strings, use a loop like this:
    1.39 + * <pre>
    1.40 + * UnicodeSetIterator it(set);
    1.41 + * while (it.next()) {
    1.42 + *     processItem(it.getString());
    1.43 + * }
    1.44 + * </pre>
    1.45 + * <p>Each item in the set is accessed as a string.  Set elements
    1.46 + *    consisting of single code points are returned as strings containing
    1.47 + *    just the one code point.
    1.48 + *
    1.49 + * <p>To iterate over code point ranges, instead of individual code points,
    1.50 + *    use a loop like this:
    1.51 + * <pre>
    1.52 + * UnicodeSetIterator it(set);
    1.53 + * while (it.nextRange()) {
    1.54 + *   if (it.isString()) {
    1.55 + *     processString(it.getString());
    1.56 + *   } else {
    1.57 + *     processCodepointRange(it.getCodepoint(), it.getCodepointEnd());
    1.58 + *   }
    1.59 + * }
    1.60 + * </pre>
    1.61 + * @author M. Davis
    1.62 + * @stable ICU 2.4
    1.63 + */
    1.64 +class U_COMMON_API UnicodeSetIterator : public UObject {
    1.65 +
    1.66 + protected:
    1.67 +
    1.68 +    /**
    1.69 +     * Value of <tt>codepoint</tt> if the iterator points to a string.
    1.70 +     * If <tt>codepoint == IS_STRING</tt>, then examine
    1.71 +     * <tt>string</tt> for the current iteration result.
    1.72 +     * @stable ICU 2.4
    1.73 +     */
    1.74 +    enum { IS_STRING = -1 };
    1.75 +
    1.76 +    /**
    1.77 +     * Current code point, or the special value <tt>IS_STRING</tt>, if
    1.78 +     * the iterator points to a string.
    1.79 +     * @stable ICU 2.4
    1.80 +     */
    1.81 +    UChar32 codepoint;
    1.82 +
    1.83 +    /**
    1.84 +     * When iterating over ranges using <tt>nextRange()</tt>,
    1.85 +     * <tt>codepointEnd</tt> contains the inclusive end of the
    1.86 +     * iteration range, if <tt>codepoint != IS_STRING</tt>.  If
    1.87 +     * iterating over code points using <tt>next()</tt>, or if
    1.88 +     * <tt>codepoint == IS_STRING</tt>, then the value of
    1.89 +     * <tt>codepointEnd</tt> is undefined.
    1.90 +     * @stable ICU 2.4
    1.91 +     */
    1.92 +    UChar32 codepointEnd;
    1.93 +
    1.94 +    /**
    1.95 +     * If <tt>codepoint == IS_STRING</tt>, then <tt>string</tt> points
    1.96 +     * to the current string.  If <tt>codepoint != IS_STRING</tt>, the
    1.97 +     * value of <tt>string</tt> is undefined.
    1.98 +     * @stable ICU 2.4
    1.99 +     */
   1.100 +    const UnicodeString* string;
   1.101 +
   1.102 + public:
   1.103 +
   1.104 +    /**
   1.105 +     * Create an iterator over the given set.  The iterator is valid
   1.106 +     * only so long as <tt>set</tt> is valid.
   1.107 +     * @param set set to iterate over
   1.108 +     * @stable ICU 2.4
   1.109 +     */
   1.110 +    UnicodeSetIterator(const UnicodeSet& set);
   1.111 +
   1.112 +    /**
   1.113 +     * Create an iterator over nothing.  <tt>next()</tt> and
   1.114 +     * <tt>nextRange()</tt> return false. This is a convenience
   1.115 +     * constructor allowing the target to be set later.
   1.116 +     * @stable ICU 2.4
   1.117 +     */
   1.118 +    UnicodeSetIterator();
   1.119 +
   1.120 +    /**
   1.121 +     * Destructor.
   1.122 +     * @stable ICU 2.4
   1.123 +     */
   1.124 +    virtual ~UnicodeSetIterator();
   1.125 +
   1.126 +    /**
   1.127 +     * Returns true if the current element is a string.  If so, the
   1.128 +     * caller can retrieve it with <tt>getString()</tt>.  If this
   1.129 +     * method returns false, the current element is a code point or
   1.130 +     * code point range, depending on whether <tt>next()</tt> or
   1.131 +     * <tt>nextRange()</tt> was called.
   1.132 +     * Elements of types string and codepoint can both be retrieved
   1.133 +     * with the function <tt>getString()</tt>.
   1.134 +     * Elements of type codepoint can also be retrieved with
   1.135 +     * <tt>getCodepoint()</tt>.
   1.136 +     * For ranges, <tt>getCodepoint()</tt> returns the starting codepoint
   1.137 +     * of the range, and <tt>getCodepointEnd()</tt> returns the end
   1.138 +     * of the range.
   1.139 +     * @stable ICU 2.4
   1.140 +     */
   1.141 +    inline UBool isString() const;
   1.142 +
   1.143 +    /**
   1.144 +     * Returns the current code point, if <tt>isString()</tt> returned
   1.145 +     * false.  Otherwise returns an undefined result.
   1.146 +     * @stable ICU 2.4
   1.147 +     */
   1.148 +    inline UChar32 getCodepoint() const;
   1.149 +
   1.150 +    /**
   1.151 +     * Returns the end of the current code point range, if
   1.152 +     * <tt>isString()</tt> returned false and <tt>nextRange()</tt> was
   1.153 +     * called.  Otherwise returns an undefined result.
   1.154 +     * @stable ICU 2.4
   1.155 +     */
   1.156 +    inline UChar32 getCodepointEnd() const;
   1.157 +
   1.158 +    /**
   1.159 +     * Returns the current string, if <tt>isString()</tt> returned
   1.160 +     * true.  If the current iteration item is a code point, a UnicodeString
   1.161 +     * containing that single code point is returned.
   1.162 +     *
   1.163 +     * Ownership of the returned string remains with the iterator.
   1.164 +     * The string is guaranteed to remain valid only until the iterator is
   1.165 +     *   advanced to the next item, or until the iterator is deleted.
   1.166 +     * 
   1.167 +     * @stable ICU 2.4
   1.168 +     */
   1.169 +    const UnicodeString& getString();
   1.170 +
   1.171 +    /**
   1.172 +     * Advances the iteration position to the next element in the set, 
   1.173 +     * which can be either a single code point or a string.  
   1.174 +     * If there are no more elements in the set, return false.
   1.175 +     *
   1.176 +     * <p>
   1.177 +     * If <tt>isString() == TRUE</tt>, the value is a
   1.178 +     * string, otherwise the value is a
   1.179 +     * single code point.  Elements of either type can be retrieved
   1.180 +     * with the function <tt>getString()</tt>, while elements of
   1.181 +     * consisting of a single code point can be retrieved with
   1.182 +     * <tt>getCodepoint()</tt>
   1.183 +     *
   1.184 +     * <p>The order of iteration is all code points in sorted order,
   1.185 +     * followed by all strings sorted order.    Do not mix
   1.186 +     * calls to <tt>next()</tt> and <tt>nextRange()</tt> without
   1.187 +     * calling <tt>reset()</tt> between them.  The results of doing so
   1.188 +     * are undefined.
   1.189 +     *
   1.190 +     * @return true if there was another element in the set.
   1.191 +     * @stable ICU 2.4
   1.192 +     */
   1.193 +    UBool next();
   1.194 +
   1.195 +    /**
   1.196 +     * Returns the next element in the set, either a code point range
   1.197 +     * or a string.  If there are no more elements in the set, return
   1.198 +     * false.  If <tt>isString() == TRUE</tt>, the value is a
   1.199 +     * string and can be accessed with <tt>getString()</tt>.  Otherwise the value is a
   1.200 +     * range of one or more code points from <tt>getCodepoint()</tt> to
   1.201 +     * <tt>getCodepointeEnd()</tt> inclusive.
   1.202 +     *
   1.203 +     * <p>The order of iteration is all code points ranges in sorted
   1.204 +     * order, followed by all strings sorted order.  Ranges are
   1.205 +     * disjoint and non-contiguous.  The value returned from <tt>getString()</tt>
   1.206 +     * is undefined unless <tt>isString() == TRUE</tt>.  Do not mix calls to
   1.207 +     * <tt>next()</tt> and <tt>nextRange()</tt> without calling
   1.208 +     * <tt>reset()</tt> between them.  The results of doing so are
   1.209 +     * undefined.
   1.210 +     *
   1.211 +     * @return true if there was another element in the set.
   1.212 +     * @stable ICU 2.4
   1.213 +     */
   1.214 +    UBool nextRange();
   1.215 +
   1.216 +    /**
   1.217 +     * Sets this iterator to visit the elements of the given set and
   1.218 +     * resets it to the start of that set.  The iterator is valid only
   1.219 +     * so long as <tt>set</tt> is valid.
   1.220 +     * @param set the set to iterate over.
   1.221 +     * @stable ICU 2.4
   1.222 +     */
   1.223 +    void reset(const UnicodeSet& set);
   1.224 +
   1.225 +    /**
   1.226 +     * Resets this iterator to the start of the set.
   1.227 +     * @stable ICU 2.4
   1.228 +     */
   1.229 +    void reset();
   1.230 +
   1.231 +    /**
   1.232 +     * ICU "poor man's RTTI", returns a UClassID for this class.
   1.233 +     *
   1.234 +     * @stable ICU 2.4
   1.235 +     */
   1.236 +    static UClassID U_EXPORT2 getStaticClassID();
   1.237 +
   1.238 +    /**
   1.239 +     * ICU "poor man's RTTI", returns a UClassID for the actual class.
   1.240 +     *
   1.241 +     * @stable ICU 2.4
   1.242 +     */
   1.243 +    virtual UClassID getDynamicClassID() const;
   1.244 +
   1.245 +    // ======================= PRIVATES ===========================
   1.246 +
   1.247 + protected:
   1.248 +
   1.249 +    // endElement and nextElements are really UChar32's, but we keep
   1.250 +    // them as signed int32_t's so we can do comparisons with
   1.251 +    // endElement set to -1.  Leave them as int32_t's.
   1.252 +    /** The set
   1.253 +     * @stable ICU 2.4
   1.254 +     */
   1.255 +    const UnicodeSet* set;
   1.256 +    /** End range
   1.257 +     * @stable ICU 2.4
   1.258 +     */
   1.259 +    int32_t endRange;
   1.260 +    /** Range
   1.261 +     * @stable ICU 2.4
   1.262 +     */
   1.263 +    int32_t range;
   1.264 +    /** End element
   1.265 +     * @stable ICU 2.4
   1.266 +     */
   1.267 +    int32_t endElement;
   1.268 +    /** Next element
   1.269 +     * @stable ICU 2.4
   1.270 +     */
   1.271 +    int32_t nextElement;
   1.272 +    //UBool abbreviated;
   1.273 +    /** Next string
   1.274 +     * @stable ICU 2.4
   1.275 +     */
   1.276 +    int32_t nextString;
   1.277 +    /** String count
   1.278 +     * @stable ICU 2.4
   1.279 +     */
   1.280 +    int32_t stringCount;
   1.281 +
   1.282 +    /**
   1.283 +     *  Points to the string to use when the caller asks for a
   1.284 +     *  string and the current iteration item is a code point, not a string.
   1.285 +     *  @internal
   1.286 +     */
   1.287 +    UnicodeString *cpString;
   1.288 +
   1.289 +    /** Copy constructor. Disallowed.
   1.290 +     * @stable ICU 2.4
   1.291 +     */
   1.292 +    UnicodeSetIterator(const UnicodeSetIterator&); // disallow
   1.293 +
   1.294 +    /** Assignment operator. Disallowed.
   1.295 +     * @stable ICU 2.4
   1.296 +     */
   1.297 +    UnicodeSetIterator& operator=(const UnicodeSetIterator&); // disallow
   1.298 +
   1.299 +    /** Load range
   1.300 +     * @stable ICU 2.4
   1.301 +     */
   1.302 +    virtual void loadRange(int32_t range);
   1.303 +
   1.304 +};
   1.305 +
   1.306 +inline UBool UnicodeSetIterator::isString() const {
   1.307 +    return codepoint == (UChar32)IS_STRING;
   1.308 +}
   1.309 +
   1.310 +inline UChar32 UnicodeSetIterator::getCodepoint() const {
   1.311 +    return codepoint;
   1.312 +}
   1.313 +
   1.314 +inline UChar32 UnicodeSetIterator::getCodepointEnd() const {
   1.315 +    return codepointEnd;
   1.316 +}
   1.317 +
   1.318 +
   1.319 +U_NAMESPACE_END
   1.320 +
   1.321 +#endif

mercurial