Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
michael@0 | 1 | /* |
michael@0 | 2 | ********************************************************************** |
michael@0 | 3 | * Copyright (c) 2003-2011, International Business Machines |
michael@0 | 4 | * Corporation and others. All Rights Reserved. |
michael@0 | 5 | ********************************************************************** |
michael@0 | 6 | * Author: Alan Liu |
michael@0 | 7 | * Created: September 24 2003 |
michael@0 | 8 | * Since: ICU 2.8 |
michael@0 | 9 | ********************************************************************** |
michael@0 | 10 | */ |
michael@0 | 11 | #include "ruleiter.h" |
michael@0 | 12 | #include "unicode/parsepos.h" |
michael@0 | 13 | #include "unicode/symtable.h" |
michael@0 | 14 | #include "unicode/unistr.h" |
michael@0 | 15 | #include "unicode/utf16.h" |
michael@0 | 16 | #include "patternprops.h" |
michael@0 | 17 | |
michael@0 | 18 | /* \U87654321 or \ud800\udc00 */ |
michael@0 | 19 | #define MAX_U_NOTATION_LEN 12 |
michael@0 | 20 | |
michael@0 | 21 | U_NAMESPACE_BEGIN |
michael@0 | 22 | |
michael@0 | 23 | RuleCharacterIterator::RuleCharacterIterator(const UnicodeString& theText, const SymbolTable* theSym, |
michael@0 | 24 | ParsePosition& thePos) : |
michael@0 | 25 | text(theText), |
michael@0 | 26 | pos(thePos), |
michael@0 | 27 | sym(theSym), |
michael@0 | 28 | buf(0), |
michael@0 | 29 | bufPos(0) |
michael@0 | 30 | {} |
michael@0 | 31 | |
michael@0 | 32 | UBool RuleCharacterIterator::atEnd() const { |
michael@0 | 33 | return buf == 0 && pos.getIndex() == text.length(); |
michael@0 | 34 | } |
michael@0 | 35 | |
michael@0 | 36 | UChar32 RuleCharacterIterator::next(int32_t options, UBool& isEscaped, UErrorCode& ec) { |
michael@0 | 37 | if (U_FAILURE(ec)) return DONE; |
michael@0 | 38 | |
michael@0 | 39 | UChar32 c = DONE; |
michael@0 | 40 | isEscaped = FALSE; |
michael@0 | 41 | |
michael@0 | 42 | for (;;) { |
michael@0 | 43 | c = _current(); |
michael@0 | 44 | _advance(U16_LENGTH(c)); |
michael@0 | 45 | |
michael@0 | 46 | if (c == SymbolTable::SYMBOL_REF && buf == 0 && |
michael@0 | 47 | (options & PARSE_VARIABLES) != 0 && sym != 0) { |
michael@0 | 48 | UnicodeString name = sym->parseReference(text, pos, text.length()); |
michael@0 | 49 | // If name is empty there was an isolated SYMBOL_REF; |
michael@0 | 50 | // return it. Caller must be prepared for this. |
michael@0 | 51 | if (name.length() == 0) { |
michael@0 | 52 | break; |
michael@0 | 53 | } |
michael@0 | 54 | bufPos = 0; |
michael@0 | 55 | buf = sym->lookup(name); |
michael@0 | 56 | if (buf == 0) { |
michael@0 | 57 | ec = U_UNDEFINED_VARIABLE; |
michael@0 | 58 | return DONE; |
michael@0 | 59 | } |
michael@0 | 60 | // Handle empty variable value |
michael@0 | 61 | if (buf->length() == 0) { |
michael@0 | 62 | buf = 0; |
michael@0 | 63 | } |
michael@0 | 64 | continue; |
michael@0 | 65 | } |
michael@0 | 66 | |
michael@0 | 67 | if ((options & SKIP_WHITESPACE) != 0 && PatternProps::isWhiteSpace(c)) { |
michael@0 | 68 | continue; |
michael@0 | 69 | } |
michael@0 | 70 | |
michael@0 | 71 | if (c == 0x5C /*'\\'*/ && (options & PARSE_ESCAPES) != 0) { |
michael@0 | 72 | UnicodeString tempEscape; |
michael@0 | 73 | int32_t offset = 0; |
michael@0 | 74 | c = lookahead(tempEscape, MAX_U_NOTATION_LEN).unescapeAt(offset); |
michael@0 | 75 | jumpahead(offset); |
michael@0 | 76 | isEscaped = TRUE; |
michael@0 | 77 | if (c < 0) { |
michael@0 | 78 | ec = U_MALFORMED_UNICODE_ESCAPE; |
michael@0 | 79 | return DONE; |
michael@0 | 80 | } |
michael@0 | 81 | } |
michael@0 | 82 | |
michael@0 | 83 | break; |
michael@0 | 84 | } |
michael@0 | 85 | |
michael@0 | 86 | return c; |
michael@0 | 87 | } |
michael@0 | 88 | |
michael@0 | 89 | void RuleCharacterIterator::getPos(RuleCharacterIterator::Pos& p) const { |
michael@0 | 90 | p.buf = buf; |
michael@0 | 91 | p.pos = pos.getIndex(); |
michael@0 | 92 | p.bufPos = bufPos; |
michael@0 | 93 | } |
michael@0 | 94 | |
michael@0 | 95 | void RuleCharacterIterator::setPos(const RuleCharacterIterator::Pos& p) { |
michael@0 | 96 | buf = p.buf; |
michael@0 | 97 | pos.setIndex(p.pos); |
michael@0 | 98 | bufPos = p.bufPos; |
michael@0 | 99 | } |
michael@0 | 100 | |
michael@0 | 101 | void RuleCharacterIterator::skipIgnored(int32_t options) { |
michael@0 | 102 | if ((options & SKIP_WHITESPACE) != 0) { |
michael@0 | 103 | for (;;) { |
michael@0 | 104 | UChar32 a = _current(); |
michael@0 | 105 | if (!PatternProps::isWhiteSpace(a)) break; |
michael@0 | 106 | _advance(U16_LENGTH(a)); |
michael@0 | 107 | } |
michael@0 | 108 | } |
michael@0 | 109 | } |
michael@0 | 110 | |
michael@0 | 111 | UnicodeString& RuleCharacterIterator::lookahead(UnicodeString& result, int32_t maxLookAhead) const { |
michael@0 | 112 | if (maxLookAhead < 0) { |
michael@0 | 113 | maxLookAhead = 0x7FFFFFFF; |
michael@0 | 114 | } |
michael@0 | 115 | if (buf != 0) { |
michael@0 | 116 | buf->extract(bufPos, maxLookAhead, result); |
michael@0 | 117 | } else { |
michael@0 | 118 | text.extract(pos.getIndex(), maxLookAhead, result); |
michael@0 | 119 | } |
michael@0 | 120 | return result; |
michael@0 | 121 | } |
michael@0 | 122 | |
michael@0 | 123 | void RuleCharacterIterator::jumpahead(int32_t count) { |
michael@0 | 124 | _advance(count); |
michael@0 | 125 | } |
michael@0 | 126 | |
michael@0 | 127 | /* |
michael@0 | 128 | UnicodeString& RuleCharacterIterator::toString(UnicodeString& result) const { |
michael@0 | 129 | int32_t b = pos.getIndex(); |
michael@0 | 130 | text.extract(0, b, result); |
michael@0 | 131 | return result.append((UChar) 0x7C).append(text, b, 0x7FFFFFFF); // Insert '|' at index |
michael@0 | 132 | } |
michael@0 | 133 | */ |
michael@0 | 134 | |
michael@0 | 135 | UChar32 RuleCharacterIterator::_current() const { |
michael@0 | 136 | if (buf != 0) { |
michael@0 | 137 | return buf->char32At(bufPos); |
michael@0 | 138 | } else { |
michael@0 | 139 | int i = pos.getIndex(); |
michael@0 | 140 | return (i < text.length()) ? text.char32At(i) : (UChar32)DONE; |
michael@0 | 141 | } |
michael@0 | 142 | } |
michael@0 | 143 | |
michael@0 | 144 | void RuleCharacterIterator::_advance(int32_t count) { |
michael@0 | 145 | if (buf != 0) { |
michael@0 | 146 | bufPos += count; |
michael@0 | 147 | if (bufPos == buf->length()) { |
michael@0 | 148 | buf = 0; |
michael@0 | 149 | } |
michael@0 | 150 | } else { |
michael@0 | 151 | pos.setIndex(pos.getIndex() + count); |
michael@0 | 152 | if (pos.getIndex() > text.length()) { |
michael@0 | 153 | pos.setIndex(text.length()); |
michael@0 | 154 | } |
michael@0 | 155 | } |
michael@0 | 156 | } |
michael@0 | 157 | |
michael@0 | 158 | U_NAMESPACE_END |
michael@0 | 159 | |
michael@0 | 160 | //eof |