intl/icu/source/common/ruleiter.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/common/ruleiter.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,231 @@
     1.4 +/*
     1.5 +**********************************************************************
     1.6 +* Copyright (c) 2003-2011, International Business Machines
     1.7 +* Corporation and others.  All Rights Reserved.
     1.8 +**********************************************************************
     1.9 +* Author: Alan Liu
    1.10 +* Created: September 24 2003
    1.11 +* Since: ICU 2.8
    1.12 +**********************************************************************
    1.13 +*/
    1.14 +#ifndef _RULEITER_H_
    1.15 +#define _RULEITER_H_
    1.16 +
    1.17 +#include "unicode/uobject.h"
    1.18 +
    1.19 +U_NAMESPACE_BEGIN
    1.20 +
    1.21 +class UnicodeString;
    1.22 +class ParsePosition;
    1.23 +class SymbolTable;
    1.24 +
    1.25 +/**
    1.26 + * An iterator that returns 32-bit code points.  This class is deliberately
    1.27 + * <em>not</em> related to any of the ICU character iterator classes
    1.28 + * in order to minimize complexity.
    1.29 + * @author Alan Liu
    1.30 + * @since ICU 2.8
    1.31 + */
    1.32 +class RuleCharacterIterator : public UMemory {
    1.33 +
    1.34 +    // TODO: Ideas for later.  (Do not implement if not needed, lest the
    1.35 +    // code coverage numbers go down due to unused methods.)
    1.36 +    // 1. Add a copy constructor, operator==() method.
    1.37 +    // 2. Rather than return DONE, throw an exception if the end
    1.38 +    // is reached -- this is an alternate usage model, probably not useful.
    1.39 +
    1.40 +private:
    1.41 +    /**
    1.42 +     * Text being iterated.
    1.43 +     */    
    1.44 +    const UnicodeString& text;
    1.45 +
    1.46 +    /**
    1.47 +     * Position of iterator.
    1.48 +     */
    1.49 +    ParsePosition& pos;
    1.50 +
    1.51 +    /**
    1.52 +     * Symbol table used to parse and dereference variables.  May be 0.
    1.53 +     */
    1.54 +    const SymbolTable* sym;
    1.55 +    
    1.56 +    /**
    1.57 +     * Current variable expansion, or 0 if none.
    1.58 +     */
    1.59 +    const UnicodeString* buf;
    1.60 +
    1.61 +    /**
    1.62 +     * Position within buf.  Meaningless if buf == 0.
    1.63 +     */
    1.64 +    int32_t bufPos;
    1.65 +
    1.66 +public:
    1.67 +    /**
    1.68 +     * Value returned when there are no more characters to iterate.
    1.69 +     */
    1.70 +    enum { DONE = -1 };
    1.71 +
    1.72 +    /**
    1.73 +     * Bitmask option to enable parsing of variable names.  If (options &
    1.74 +     * PARSE_VARIABLES) != 0, then an embedded variable will be expanded to
    1.75 +     * its value.  Variables are parsed using the SymbolTable API.
    1.76 +     */
    1.77 +    enum { PARSE_VARIABLES = 1 };
    1.78 +
    1.79 +    /**
    1.80 +     * Bitmask option to enable parsing of escape sequences.  If (options &
    1.81 +     * PARSE_ESCAPES) != 0, then an embedded escape sequence will be expanded
    1.82 +     * to its value.  Escapes are parsed using Utility.unescapeAt().
    1.83 +     */
    1.84 +    enum { PARSE_ESCAPES   = 2 };
    1.85 +
    1.86 +    /**
    1.87 +     * Bitmask option to enable skipping of whitespace.  If (options &
    1.88 +     * SKIP_WHITESPACE) != 0, then Pattern_White_Space characters will be silently
    1.89 +     * skipped, as if they were not present in the input.
    1.90 +     */
    1.91 +    enum { SKIP_WHITESPACE = 4 };
    1.92 +
    1.93 +    /**
    1.94 +     * Constructs an iterator over the given text, starting at the given
    1.95 +     * position.
    1.96 +     * @param text the text to be iterated
    1.97 +     * @param sym the symbol table, or null if there is none.  If sym is null,
    1.98 +     * then variables will not be deferenced, even if the PARSE_VARIABLES
    1.99 +     * option is set.
   1.100 +     * @param pos upon input, the index of the next character to return.  If a
   1.101 +     * variable has been dereferenced, then pos will <em>not</em> increment as
   1.102 +     * characters of the variable value are iterated.
   1.103 +     */
   1.104 +    RuleCharacterIterator(const UnicodeString& text, const SymbolTable* sym,
   1.105 +                          ParsePosition& pos);
   1.106 +    
   1.107 +    /**
   1.108 +     * Returns true if this iterator has no more characters to return.
   1.109 +     */
   1.110 +    UBool atEnd() const;
   1.111 +
   1.112 +    /**
   1.113 +     * Returns the next character using the given options, or DONE if there
   1.114 +     * are no more characters, and advance the position to the next
   1.115 +     * character.
   1.116 +     * @param options one or more of the following options, bitwise-OR-ed
   1.117 +     * together: PARSE_VARIABLES, PARSE_ESCAPES, SKIP_WHITESPACE.
   1.118 +     * @param isEscaped output parameter set to TRUE if the character
   1.119 +     * was escaped
   1.120 +     * @param ec input-output error code.  An error will only be set by
   1.121 +     * this routing if options includes PARSE_VARIABLES and an unknown
   1.122 +     * variable name is seen, or if options includes PARSE_ESCAPES and
   1.123 +     * an invalid escape sequence is seen.
   1.124 +     * @return the current 32-bit code point, or DONE
   1.125 +     */
   1.126 +    UChar32 next(int32_t options, UBool& isEscaped, UErrorCode& ec);
   1.127 +
   1.128 +    /**
   1.129 +     * Returns true if this iterator is currently within a variable expansion.
   1.130 +     */
   1.131 +    inline UBool inVariable() const;
   1.132 +
   1.133 +    /**
   1.134 +     * An opaque object representing the position of a RuleCharacterIterator.
   1.135 +     */
   1.136 +    struct Pos : public UMemory {
   1.137 +    private:
   1.138 +        const UnicodeString* buf;
   1.139 +        int32_t pos;
   1.140 +        int32_t bufPos;
   1.141 +        friend class RuleCharacterIterator;
   1.142 +    };
   1.143 +
   1.144 +    /**
   1.145 +     * Sets an object which, when later passed to setPos(), will
   1.146 +     * restore this iterator's position.  Usage idiom:
   1.147 +     *
   1.148 +     * RuleCharacterIterator iterator = ...;
   1.149 +     * RuleCharacterIterator::Pos pos;
   1.150 +     * iterator.getPos(pos);
   1.151 +     * for (;;) {
   1.152 +     *   iterator.getPos(pos);
   1.153 +     *   int c = iterator.next(...);
   1.154 +     *   ...
   1.155 +     * }
   1.156 +     * iterator.setPos(pos);
   1.157 +     *
   1.158 +     * @param p a position object to be set to this iterator's
   1.159 +     * current position.
   1.160 +     */
   1.161 +    void getPos(Pos& p) const;
   1.162 +
   1.163 +    /**
   1.164 +     * Restores this iterator to the position it had when getPos()
   1.165 +     * set the given object.
   1.166 +     * @param p a position object previously set by getPos()
   1.167 +     */
   1.168 +    void setPos(const Pos& p);
   1.169 +
   1.170 +    /**
   1.171 +     * Skips ahead past any ignored characters, as indicated by the given
   1.172 +     * options.  This is useful in conjunction with the lookahead() method.
   1.173 +     *
   1.174 +     * Currently, this only has an effect for SKIP_WHITESPACE.
   1.175 +     * @param options one or more of the following options, bitwise-OR-ed
   1.176 +     * together: PARSE_VARIABLES, PARSE_ESCAPES, SKIP_WHITESPACE.
   1.177 +     */
   1.178 +    void skipIgnored(int32_t options);
   1.179 +
   1.180 +    /**
   1.181 +     * Returns a string containing the remainder of the characters to be
   1.182 +     * returned by this iterator, without any option processing.  If the
   1.183 +     * iterator is currently within a variable expansion, this will only
   1.184 +     * extend to the end of the variable expansion.  This method is provided
   1.185 +     * so that iterators may interoperate with string-based APIs.  The typical
   1.186 +     * sequence of calls is to call skipIgnored(), then call lookahead(), then
   1.187 +     * parse the string returned by lookahead(), then call jumpahead() to
   1.188 +     * resynchronize the iterator.
   1.189 +     * @param result a string to receive the characters to be returned
   1.190 +     * by future calls to next()
   1.191 +     * @param maxLookAhead The maximum to copy into the result.
   1.192 +     * @return a reference to result
   1.193 +     */
   1.194 +    UnicodeString& lookahead(UnicodeString& result, int32_t maxLookAhead = -1) const;
   1.195 +
   1.196 +    /**
   1.197 +     * Advances the position by the given number of 16-bit code units.
   1.198 +     * This is useful in conjunction with the lookahead() method.
   1.199 +     * @param count the number of 16-bit code units to jump over
   1.200 +     */
   1.201 +    void jumpahead(int32_t count);
   1.202 +
   1.203 +    /**
   1.204 +     * Returns a string representation of this object, consisting of the
   1.205 +     * characters being iterated, with a '|' marking the current position.
   1.206 +     * Position within an expanded variable is <em>not</em> indicated.
   1.207 +     * @param result output parameter to receive a string
   1.208 +     * representation of this object
   1.209 +     */
   1.210 +//    UnicodeString& toString(UnicodeString& result) const;
   1.211 +    
   1.212 +private:
   1.213 +    /**
   1.214 +     * Returns the current 32-bit code point without parsing escapes, parsing
   1.215 +     * variables, or skipping whitespace.
   1.216 +     * @return the current 32-bit code point
   1.217 +     */
   1.218 +    UChar32 _current() const;
   1.219 +    
   1.220 +    /**
   1.221 +     * Advances the position by the given amount.
   1.222 +     * @param count the number of 16-bit code units to advance past
   1.223 +     */
   1.224 +    void _advance(int32_t count);
   1.225 +};
   1.226 +
   1.227 +inline UBool RuleCharacterIterator::inVariable() const {
   1.228 +    return buf != 0;
   1.229 +}
   1.230 +
   1.231 +U_NAMESPACE_END
   1.232 +
   1.233 +#endif // _RULEITER_H_
   1.234 +//eof

mercurial