intl/icu/source/common/ruleiter.cpp

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

michael@0 1 /*
michael@0 2 **********************************************************************
michael@0 3 * Copyright (c) 2003-2011, International Business Machines
michael@0 4 * Corporation and others. All Rights Reserved.
michael@0 5 **********************************************************************
michael@0 6 * Author: Alan Liu
michael@0 7 * Created: September 24 2003
michael@0 8 * Since: ICU 2.8
michael@0 9 **********************************************************************
michael@0 10 */
michael@0 11 #include "ruleiter.h"
michael@0 12 #include "unicode/parsepos.h"
michael@0 13 #include "unicode/symtable.h"
michael@0 14 #include "unicode/unistr.h"
michael@0 15 #include "unicode/utf16.h"
michael@0 16 #include "patternprops.h"
michael@0 17
michael@0 18 /* \U87654321 or \ud800\udc00 */
michael@0 19 #define MAX_U_NOTATION_LEN 12
michael@0 20
michael@0 21 U_NAMESPACE_BEGIN
michael@0 22
michael@0 23 RuleCharacterIterator::RuleCharacterIterator(const UnicodeString& theText, const SymbolTable* theSym,
michael@0 24 ParsePosition& thePos) :
michael@0 25 text(theText),
michael@0 26 pos(thePos),
michael@0 27 sym(theSym),
michael@0 28 buf(0),
michael@0 29 bufPos(0)
michael@0 30 {}
michael@0 31
michael@0 32 UBool RuleCharacterIterator::atEnd() const {
michael@0 33 return buf == 0 && pos.getIndex() == text.length();
michael@0 34 }
michael@0 35
michael@0 36 UChar32 RuleCharacterIterator::next(int32_t options, UBool& isEscaped, UErrorCode& ec) {
michael@0 37 if (U_FAILURE(ec)) return DONE;
michael@0 38
michael@0 39 UChar32 c = DONE;
michael@0 40 isEscaped = FALSE;
michael@0 41
michael@0 42 for (;;) {
michael@0 43 c = _current();
michael@0 44 _advance(U16_LENGTH(c));
michael@0 45
michael@0 46 if (c == SymbolTable::SYMBOL_REF && buf == 0 &&
michael@0 47 (options & PARSE_VARIABLES) != 0 && sym != 0) {
michael@0 48 UnicodeString name = sym->parseReference(text, pos, text.length());
michael@0 49 // If name is empty there was an isolated SYMBOL_REF;
michael@0 50 // return it. Caller must be prepared for this.
michael@0 51 if (name.length() == 0) {
michael@0 52 break;
michael@0 53 }
michael@0 54 bufPos = 0;
michael@0 55 buf = sym->lookup(name);
michael@0 56 if (buf == 0) {
michael@0 57 ec = U_UNDEFINED_VARIABLE;
michael@0 58 return DONE;
michael@0 59 }
michael@0 60 // Handle empty variable value
michael@0 61 if (buf->length() == 0) {
michael@0 62 buf = 0;
michael@0 63 }
michael@0 64 continue;
michael@0 65 }
michael@0 66
michael@0 67 if ((options & SKIP_WHITESPACE) != 0 && PatternProps::isWhiteSpace(c)) {
michael@0 68 continue;
michael@0 69 }
michael@0 70
michael@0 71 if (c == 0x5C /*'\\'*/ && (options & PARSE_ESCAPES) != 0) {
michael@0 72 UnicodeString tempEscape;
michael@0 73 int32_t offset = 0;
michael@0 74 c = lookahead(tempEscape, MAX_U_NOTATION_LEN).unescapeAt(offset);
michael@0 75 jumpahead(offset);
michael@0 76 isEscaped = TRUE;
michael@0 77 if (c < 0) {
michael@0 78 ec = U_MALFORMED_UNICODE_ESCAPE;
michael@0 79 return DONE;
michael@0 80 }
michael@0 81 }
michael@0 82
michael@0 83 break;
michael@0 84 }
michael@0 85
michael@0 86 return c;
michael@0 87 }
michael@0 88
michael@0 89 void RuleCharacterIterator::getPos(RuleCharacterIterator::Pos& p) const {
michael@0 90 p.buf = buf;
michael@0 91 p.pos = pos.getIndex();
michael@0 92 p.bufPos = bufPos;
michael@0 93 }
michael@0 94
michael@0 95 void RuleCharacterIterator::setPos(const RuleCharacterIterator::Pos& p) {
michael@0 96 buf = p.buf;
michael@0 97 pos.setIndex(p.pos);
michael@0 98 bufPos = p.bufPos;
michael@0 99 }
michael@0 100
michael@0 101 void RuleCharacterIterator::skipIgnored(int32_t options) {
michael@0 102 if ((options & SKIP_WHITESPACE) != 0) {
michael@0 103 for (;;) {
michael@0 104 UChar32 a = _current();
michael@0 105 if (!PatternProps::isWhiteSpace(a)) break;
michael@0 106 _advance(U16_LENGTH(a));
michael@0 107 }
michael@0 108 }
michael@0 109 }
michael@0 110
michael@0 111 UnicodeString& RuleCharacterIterator::lookahead(UnicodeString& result, int32_t maxLookAhead) const {
michael@0 112 if (maxLookAhead < 0) {
michael@0 113 maxLookAhead = 0x7FFFFFFF;
michael@0 114 }
michael@0 115 if (buf != 0) {
michael@0 116 buf->extract(bufPos, maxLookAhead, result);
michael@0 117 } else {
michael@0 118 text.extract(pos.getIndex(), maxLookAhead, result);
michael@0 119 }
michael@0 120 return result;
michael@0 121 }
michael@0 122
michael@0 123 void RuleCharacterIterator::jumpahead(int32_t count) {
michael@0 124 _advance(count);
michael@0 125 }
michael@0 126
michael@0 127 /*
michael@0 128 UnicodeString& RuleCharacterIterator::toString(UnicodeString& result) const {
michael@0 129 int32_t b = pos.getIndex();
michael@0 130 text.extract(0, b, result);
michael@0 131 return result.append((UChar) 0x7C).append(text, b, 0x7FFFFFFF); // Insert '|' at index
michael@0 132 }
michael@0 133 */
michael@0 134
michael@0 135 UChar32 RuleCharacterIterator::_current() const {
michael@0 136 if (buf != 0) {
michael@0 137 return buf->char32At(bufPos);
michael@0 138 } else {
michael@0 139 int i = pos.getIndex();
michael@0 140 return (i < text.length()) ? text.char32At(i) : (UChar32)DONE;
michael@0 141 }
michael@0 142 }
michael@0 143
michael@0 144 void RuleCharacterIterator::_advance(int32_t count) {
michael@0 145 if (buf != 0) {
michael@0 146 bufPos += count;
michael@0 147 if (bufPos == buf->length()) {
michael@0 148 buf = 0;
michael@0 149 }
michael@0 150 } else {
michael@0 151 pos.setIndex(pos.getIndex() + count);
michael@0 152 if (pos.getIndex() > text.length()) {
michael@0 153 pos.setIndex(text.length());
michael@0 154 }
michael@0 155 }
michael@0 156 }
michael@0 157
michael@0 158 U_NAMESPACE_END
michael@0 159
michael@0 160 //eof

mercurial