1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/ruleiter.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,160 @@ 1.4 +/* 1.5 +********************************************************************** 1.6 +* Copyright (c) 2003-2011, International Business Machines 1.7 +* Corporation and others. All Rights Reserved. 1.8 +********************************************************************** 1.9 +* Author: Alan Liu 1.10 +* Created: September 24 2003 1.11 +* Since: ICU 2.8 1.12 +********************************************************************** 1.13 +*/ 1.14 +#include "ruleiter.h" 1.15 +#include "unicode/parsepos.h" 1.16 +#include "unicode/symtable.h" 1.17 +#include "unicode/unistr.h" 1.18 +#include "unicode/utf16.h" 1.19 +#include "patternprops.h" 1.20 + 1.21 +/* \U87654321 or \ud800\udc00 */ 1.22 +#define MAX_U_NOTATION_LEN 12 1.23 + 1.24 +U_NAMESPACE_BEGIN 1.25 + 1.26 +RuleCharacterIterator::RuleCharacterIterator(const UnicodeString& theText, const SymbolTable* theSym, 1.27 + ParsePosition& thePos) : 1.28 + text(theText), 1.29 + pos(thePos), 1.30 + sym(theSym), 1.31 + buf(0), 1.32 + bufPos(0) 1.33 +{} 1.34 + 1.35 +UBool RuleCharacterIterator::atEnd() const { 1.36 + return buf == 0 && pos.getIndex() == text.length(); 1.37 +} 1.38 + 1.39 +UChar32 RuleCharacterIterator::next(int32_t options, UBool& isEscaped, UErrorCode& ec) { 1.40 + if (U_FAILURE(ec)) return DONE; 1.41 + 1.42 + UChar32 c = DONE; 1.43 + isEscaped = FALSE; 1.44 + 1.45 + for (;;) { 1.46 + c = _current(); 1.47 + _advance(U16_LENGTH(c)); 1.48 + 1.49 + if (c == SymbolTable::SYMBOL_REF && buf == 0 && 1.50 + (options & PARSE_VARIABLES) != 0 && sym != 0) { 1.51 + UnicodeString name = sym->parseReference(text, pos, text.length()); 1.52 + // If name is empty there was an isolated SYMBOL_REF; 1.53 + // return it. Caller must be prepared for this. 1.54 + if (name.length() == 0) { 1.55 + break; 1.56 + } 1.57 + bufPos = 0; 1.58 + buf = sym->lookup(name); 1.59 + if (buf == 0) { 1.60 + ec = U_UNDEFINED_VARIABLE; 1.61 + return DONE; 1.62 + } 1.63 + // Handle empty variable value 1.64 + if (buf->length() == 0) { 1.65 + buf = 0; 1.66 + } 1.67 + continue; 1.68 + } 1.69 + 1.70 + if ((options & SKIP_WHITESPACE) != 0 && PatternProps::isWhiteSpace(c)) { 1.71 + continue; 1.72 + } 1.73 + 1.74 + if (c == 0x5C /*'\\'*/ && (options & PARSE_ESCAPES) != 0) { 1.75 + UnicodeString tempEscape; 1.76 + int32_t offset = 0; 1.77 + c = lookahead(tempEscape, MAX_U_NOTATION_LEN).unescapeAt(offset); 1.78 + jumpahead(offset); 1.79 + isEscaped = TRUE; 1.80 + if (c < 0) { 1.81 + ec = U_MALFORMED_UNICODE_ESCAPE; 1.82 + return DONE; 1.83 + } 1.84 + } 1.85 + 1.86 + break; 1.87 + } 1.88 + 1.89 + return c; 1.90 +} 1.91 + 1.92 +void RuleCharacterIterator::getPos(RuleCharacterIterator::Pos& p) const { 1.93 + p.buf = buf; 1.94 + p.pos = pos.getIndex(); 1.95 + p.bufPos = bufPos; 1.96 +} 1.97 + 1.98 +void RuleCharacterIterator::setPos(const RuleCharacterIterator::Pos& p) { 1.99 + buf = p.buf; 1.100 + pos.setIndex(p.pos); 1.101 + bufPos = p.bufPos; 1.102 +} 1.103 + 1.104 +void RuleCharacterIterator::skipIgnored(int32_t options) { 1.105 + if ((options & SKIP_WHITESPACE) != 0) { 1.106 + for (;;) { 1.107 + UChar32 a = _current(); 1.108 + if (!PatternProps::isWhiteSpace(a)) break; 1.109 + _advance(U16_LENGTH(a)); 1.110 + } 1.111 + } 1.112 +} 1.113 + 1.114 +UnicodeString& RuleCharacterIterator::lookahead(UnicodeString& result, int32_t maxLookAhead) const { 1.115 + if (maxLookAhead < 0) { 1.116 + maxLookAhead = 0x7FFFFFFF; 1.117 + } 1.118 + if (buf != 0) { 1.119 + buf->extract(bufPos, maxLookAhead, result); 1.120 + } else { 1.121 + text.extract(pos.getIndex(), maxLookAhead, result); 1.122 + } 1.123 + return result; 1.124 +} 1.125 + 1.126 +void RuleCharacterIterator::jumpahead(int32_t count) { 1.127 + _advance(count); 1.128 +} 1.129 + 1.130 +/* 1.131 +UnicodeString& RuleCharacterIterator::toString(UnicodeString& result) const { 1.132 + int32_t b = pos.getIndex(); 1.133 + text.extract(0, b, result); 1.134 + return result.append((UChar) 0x7C).append(text, b, 0x7FFFFFFF); // Insert '|' at index 1.135 +} 1.136 +*/ 1.137 + 1.138 +UChar32 RuleCharacterIterator::_current() const { 1.139 + if (buf != 0) { 1.140 + return buf->char32At(bufPos); 1.141 + } else { 1.142 + int i = pos.getIndex(); 1.143 + return (i < text.length()) ? text.char32At(i) : (UChar32)DONE; 1.144 + } 1.145 +} 1.146 + 1.147 +void RuleCharacterIterator::_advance(int32_t count) { 1.148 + if (buf != 0) { 1.149 + bufPos += count; 1.150 + if (bufPos == buf->length()) { 1.151 + buf = 0; 1.152 + } 1.153 + } else { 1.154 + pos.setIndex(pos.getIndex() + count); 1.155 + if (pos.getIndex() > text.length()) { 1.156 + pos.setIndex(text.length()); 1.157 + } 1.158 + } 1.159 +} 1.160 + 1.161 +U_NAMESPACE_END 1.162 + 1.163 +//eof