Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
michael@0 | 1 | /* |
michael@0 | 2 | ********************************************************************** |
michael@0 | 3 | * Copyright (c) 2003-2011, International Business Machines |
michael@0 | 4 | * Corporation and others. All Rights Reserved. |
michael@0 | 5 | ********************************************************************** |
michael@0 | 6 | * Author: Alan Liu |
michael@0 | 7 | * Created: September 24 2003 |
michael@0 | 8 | * Since: ICU 2.8 |
michael@0 | 9 | ********************************************************************** |
michael@0 | 10 | */ |
michael@0 | 11 | #ifndef _RULEITER_H_ |
michael@0 | 12 | #define _RULEITER_H_ |
michael@0 | 13 | |
michael@0 | 14 | #include "unicode/uobject.h" |
michael@0 | 15 | |
michael@0 | 16 | U_NAMESPACE_BEGIN |
michael@0 | 17 | |
michael@0 | 18 | class UnicodeString; |
michael@0 | 19 | class ParsePosition; |
michael@0 | 20 | class SymbolTable; |
michael@0 | 21 | |
michael@0 | 22 | /** |
michael@0 | 23 | * An iterator that returns 32-bit code points. This class is deliberately |
michael@0 | 24 | * <em>not</em> related to any of the ICU character iterator classes |
michael@0 | 25 | * in order to minimize complexity. |
michael@0 | 26 | * @author Alan Liu |
michael@0 | 27 | * @since ICU 2.8 |
michael@0 | 28 | */ |
michael@0 | 29 | class RuleCharacterIterator : public UMemory { |
michael@0 | 30 | |
michael@0 | 31 | // TODO: Ideas for later. (Do not implement if not needed, lest the |
michael@0 | 32 | // code coverage numbers go down due to unused methods.) |
michael@0 | 33 | // 1. Add a copy constructor, operator==() method. |
michael@0 | 34 | // 2. Rather than return DONE, throw an exception if the end |
michael@0 | 35 | // is reached -- this is an alternate usage model, probably not useful. |
michael@0 | 36 | |
michael@0 | 37 | private: |
michael@0 | 38 | /** |
michael@0 | 39 | * Text being iterated. |
michael@0 | 40 | */ |
michael@0 | 41 | const UnicodeString& text; |
michael@0 | 42 | |
michael@0 | 43 | /** |
michael@0 | 44 | * Position of iterator. |
michael@0 | 45 | */ |
michael@0 | 46 | ParsePosition& pos; |
michael@0 | 47 | |
michael@0 | 48 | /** |
michael@0 | 49 | * Symbol table used to parse and dereference variables. May be 0. |
michael@0 | 50 | */ |
michael@0 | 51 | const SymbolTable* sym; |
michael@0 | 52 | |
michael@0 | 53 | /** |
michael@0 | 54 | * Current variable expansion, or 0 if none. |
michael@0 | 55 | */ |
michael@0 | 56 | const UnicodeString* buf; |
michael@0 | 57 | |
michael@0 | 58 | /** |
michael@0 | 59 | * Position within buf. Meaningless if buf == 0. |
michael@0 | 60 | */ |
michael@0 | 61 | int32_t bufPos; |
michael@0 | 62 | |
michael@0 | 63 | public: |
michael@0 | 64 | /** |
michael@0 | 65 | * Value returned when there are no more characters to iterate. |
michael@0 | 66 | */ |
michael@0 | 67 | enum { DONE = -1 }; |
michael@0 | 68 | |
michael@0 | 69 | /** |
michael@0 | 70 | * Bitmask option to enable parsing of variable names. If (options & |
michael@0 | 71 | * PARSE_VARIABLES) != 0, then an embedded variable will be expanded to |
michael@0 | 72 | * its value. Variables are parsed using the SymbolTable API. |
michael@0 | 73 | */ |
michael@0 | 74 | enum { PARSE_VARIABLES = 1 }; |
michael@0 | 75 | |
michael@0 | 76 | /** |
michael@0 | 77 | * Bitmask option to enable parsing of escape sequences. If (options & |
michael@0 | 78 | * PARSE_ESCAPES) != 0, then an embedded escape sequence will be expanded |
michael@0 | 79 | * to its value. Escapes are parsed using Utility.unescapeAt(). |
michael@0 | 80 | */ |
michael@0 | 81 | enum { PARSE_ESCAPES = 2 }; |
michael@0 | 82 | |
michael@0 | 83 | /** |
michael@0 | 84 | * Bitmask option to enable skipping of whitespace. If (options & |
michael@0 | 85 | * SKIP_WHITESPACE) != 0, then Pattern_White_Space characters will be silently |
michael@0 | 86 | * skipped, as if they were not present in the input. |
michael@0 | 87 | */ |
michael@0 | 88 | enum { SKIP_WHITESPACE = 4 }; |
michael@0 | 89 | |
michael@0 | 90 | /** |
michael@0 | 91 | * Constructs an iterator over the given text, starting at the given |
michael@0 | 92 | * position. |
michael@0 | 93 | * @param text the text to be iterated |
michael@0 | 94 | * @param sym the symbol table, or null if there is none. If sym is null, |
michael@0 | 95 | * then variables will not be deferenced, even if the PARSE_VARIABLES |
michael@0 | 96 | * option is set. |
michael@0 | 97 | * @param pos upon input, the index of the next character to return. If a |
michael@0 | 98 | * variable has been dereferenced, then pos will <em>not</em> increment as |
michael@0 | 99 | * characters of the variable value are iterated. |
michael@0 | 100 | */ |
michael@0 | 101 | RuleCharacterIterator(const UnicodeString& text, const SymbolTable* sym, |
michael@0 | 102 | ParsePosition& pos); |
michael@0 | 103 | |
michael@0 | 104 | /** |
michael@0 | 105 | * Returns true if this iterator has no more characters to return. |
michael@0 | 106 | */ |
michael@0 | 107 | UBool atEnd() const; |
michael@0 | 108 | |
michael@0 | 109 | /** |
michael@0 | 110 | * Returns the next character using the given options, or DONE if there |
michael@0 | 111 | * are no more characters, and advance the position to the next |
michael@0 | 112 | * character. |
michael@0 | 113 | * @param options one or more of the following options, bitwise-OR-ed |
michael@0 | 114 | * together: PARSE_VARIABLES, PARSE_ESCAPES, SKIP_WHITESPACE. |
michael@0 | 115 | * @param isEscaped output parameter set to TRUE if the character |
michael@0 | 116 | * was escaped |
michael@0 | 117 | * @param ec input-output error code. An error will only be set by |
michael@0 | 118 | * this routing if options includes PARSE_VARIABLES and an unknown |
michael@0 | 119 | * variable name is seen, or if options includes PARSE_ESCAPES and |
michael@0 | 120 | * an invalid escape sequence is seen. |
michael@0 | 121 | * @return the current 32-bit code point, or DONE |
michael@0 | 122 | */ |
michael@0 | 123 | UChar32 next(int32_t options, UBool& isEscaped, UErrorCode& ec); |
michael@0 | 124 | |
michael@0 | 125 | /** |
michael@0 | 126 | * Returns true if this iterator is currently within a variable expansion. |
michael@0 | 127 | */ |
michael@0 | 128 | inline UBool inVariable() const; |
michael@0 | 129 | |
michael@0 | 130 | /** |
michael@0 | 131 | * An opaque object representing the position of a RuleCharacterIterator. |
michael@0 | 132 | */ |
michael@0 | 133 | struct Pos : public UMemory { |
michael@0 | 134 | private: |
michael@0 | 135 | const UnicodeString* buf; |
michael@0 | 136 | int32_t pos; |
michael@0 | 137 | int32_t bufPos; |
michael@0 | 138 | friend class RuleCharacterIterator; |
michael@0 | 139 | }; |
michael@0 | 140 | |
michael@0 | 141 | /** |
michael@0 | 142 | * Sets an object which, when later passed to setPos(), will |
michael@0 | 143 | * restore this iterator's position. Usage idiom: |
michael@0 | 144 | * |
michael@0 | 145 | * RuleCharacterIterator iterator = ...; |
michael@0 | 146 | * RuleCharacterIterator::Pos pos; |
michael@0 | 147 | * iterator.getPos(pos); |
michael@0 | 148 | * for (;;) { |
michael@0 | 149 | * iterator.getPos(pos); |
michael@0 | 150 | * int c = iterator.next(...); |
michael@0 | 151 | * ... |
michael@0 | 152 | * } |
michael@0 | 153 | * iterator.setPos(pos); |
michael@0 | 154 | * |
michael@0 | 155 | * @param p a position object to be set to this iterator's |
michael@0 | 156 | * current position. |
michael@0 | 157 | */ |
michael@0 | 158 | void getPos(Pos& p) const; |
michael@0 | 159 | |
michael@0 | 160 | /** |
michael@0 | 161 | * Restores this iterator to the position it had when getPos() |
michael@0 | 162 | * set the given object. |
michael@0 | 163 | * @param p a position object previously set by getPos() |
michael@0 | 164 | */ |
michael@0 | 165 | void setPos(const Pos& p); |
michael@0 | 166 | |
michael@0 | 167 | /** |
michael@0 | 168 | * Skips ahead past any ignored characters, as indicated by the given |
michael@0 | 169 | * options. This is useful in conjunction with the lookahead() method. |
michael@0 | 170 | * |
michael@0 | 171 | * Currently, this only has an effect for SKIP_WHITESPACE. |
michael@0 | 172 | * @param options one or more of the following options, bitwise-OR-ed |
michael@0 | 173 | * together: PARSE_VARIABLES, PARSE_ESCAPES, SKIP_WHITESPACE. |
michael@0 | 174 | */ |
michael@0 | 175 | void skipIgnored(int32_t options); |
michael@0 | 176 | |
michael@0 | 177 | /** |
michael@0 | 178 | * Returns a string containing the remainder of the characters to be |
michael@0 | 179 | * returned by this iterator, without any option processing. If the |
michael@0 | 180 | * iterator is currently within a variable expansion, this will only |
michael@0 | 181 | * extend to the end of the variable expansion. This method is provided |
michael@0 | 182 | * so that iterators may interoperate with string-based APIs. The typical |
michael@0 | 183 | * sequence of calls is to call skipIgnored(), then call lookahead(), then |
michael@0 | 184 | * parse the string returned by lookahead(), then call jumpahead() to |
michael@0 | 185 | * resynchronize the iterator. |
michael@0 | 186 | * @param result a string to receive the characters to be returned |
michael@0 | 187 | * by future calls to next() |
michael@0 | 188 | * @param maxLookAhead The maximum to copy into the result. |
michael@0 | 189 | * @return a reference to result |
michael@0 | 190 | */ |
michael@0 | 191 | UnicodeString& lookahead(UnicodeString& result, int32_t maxLookAhead = -1) const; |
michael@0 | 192 | |
michael@0 | 193 | /** |
michael@0 | 194 | * Advances the position by the given number of 16-bit code units. |
michael@0 | 195 | * This is useful in conjunction with the lookahead() method. |
michael@0 | 196 | * @param count the number of 16-bit code units to jump over |
michael@0 | 197 | */ |
michael@0 | 198 | void jumpahead(int32_t count); |
michael@0 | 199 | |
michael@0 | 200 | /** |
michael@0 | 201 | * Returns a string representation of this object, consisting of the |
michael@0 | 202 | * characters being iterated, with a '|' marking the current position. |
michael@0 | 203 | * Position within an expanded variable is <em>not</em> indicated. |
michael@0 | 204 | * @param result output parameter to receive a string |
michael@0 | 205 | * representation of this object |
michael@0 | 206 | */ |
michael@0 | 207 | // UnicodeString& toString(UnicodeString& result) const; |
michael@0 | 208 | |
michael@0 | 209 | private: |
michael@0 | 210 | /** |
michael@0 | 211 | * Returns the current 32-bit code point without parsing escapes, parsing |
michael@0 | 212 | * variables, or skipping whitespace. |
michael@0 | 213 | * @return the current 32-bit code point |
michael@0 | 214 | */ |
michael@0 | 215 | UChar32 _current() const; |
michael@0 | 216 | |
michael@0 | 217 | /** |
michael@0 | 218 | * Advances the position by the given amount. |
michael@0 | 219 | * @param count the number of 16-bit code units to advance past |
michael@0 | 220 | */ |
michael@0 | 221 | void _advance(int32_t count); |
michael@0 | 222 | }; |
michael@0 | 223 | |
michael@0 | 224 | inline UBool RuleCharacterIterator::inVariable() const { |
michael@0 | 225 | return buf != 0; |
michael@0 | 226 | } |
michael@0 | 227 | |
michael@0 | 228 | U_NAMESPACE_END |
michael@0 | 229 | |
michael@0 | 230 | #endif // _RULEITER_H_ |
michael@0 | 231 | //eof |