michael@0: /* michael@0: ********************************************************************** michael@0: * Copyright (c) 2003-2011, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: ********************************************************************** michael@0: * Author: Alan Liu michael@0: * Created: September 24 2003 michael@0: * Since: ICU 2.8 michael@0: ********************************************************************** michael@0: */ michael@0: #include "ruleiter.h" michael@0: #include "unicode/parsepos.h" michael@0: #include "unicode/symtable.h" michael@0: #include "unicode/unistr.h" michael@0: #include "unicode/utf16.h" michael@0: #include "patternprops.h" michael@0: michael@0: /* \U87654321 or \ud800\udc00 */ michael@0: #define MAX_U_NOTATION_LEN 12 michael@0: michael@0: U_NAMESPACE_BEGIN michael@0: michael@0: RuleCharacterIterator::RuleCharacterIterator(const UnicodeString& theText, const SymbolTable* theSym, michael@0: ParsePosition& thePos) : michael@0: text(theText), michael@0: pos(thePos), michael@0: sym(theSym), michael@0: buf(0), michael@0: bufPos(0) michael@0: {} michael@0: michael@0: UBool RuleCharacterIterator::atEnd() const { michael@0: return buf == 0 && pos.getIndex() == text.length(); michael@0: } michael@0: michael@0: UChar32 RuleCharacterIterator::next(int32_t options, UBool& isEscaped, UErrorCode& ec) { michael@0: if (U_FAILURE(ec)) return DONE; michael@0: michael@0: UChar32 c = DONE; michael@0: isEscaped = FALSE; michael@0: michael@0: for (;;) { michael@0: c = _current(); michael@0: _advance(U16_LENGTH(c)); michael@0: michael@0: if (c == SymbolTable::SYMBOL_REF && buf == 0 && michael@0: (options & PARSE_VARIABLES) != 0 && sym != 0) { michael@0: UnicodeString name = sym->parseReference(text, pos, text.length()); michael@0: // If name is empty there was an isolated SYMBOL_REF; michael@0: // return it. Caller must be prepared for this. michael@0: if (name.length() == 0) { michael@0: break; michael@0: } michael@0: bufPos = 0; michael@0: buf = sym->lookup(name); michael@0: if (buf == 0) { michael@0: ec = U_UNDEFINED_VARIABLE; michael@0: return DONE; michael@0: } michael@0: // Handle empty variable value michael@0: if (buf->length() == 0) { michael@0: buf = 0; michael@0: } michael@0: continue; michael@0: } michael@0: michael@0: if ((options & SKIP_WHITESPACE) != 0 && PatternProps::isWhiteSpace(c)) { michael@0: continue; michael@0: } michael@0: michael@0: if (c == 0x5C /*'\\'*/ && (options & PARSE_ESCAPES) != 0) { michael@0: UnicodeString tempEscape; michael@0: int32_t offset = 0; michael@0: c = lookahead(tempEscape, MAX_U_NOTATION_LEN).unescapeAt(offset); michael@0: jumpahead(offset); michael@0: isEscaped = TRUE; michael@0: if (c < 0) { michael@0: ec = U_MALFORMED_UNICODE_ESCAPE; michael@0: return DONE; michael@0: } michael@0: } michael@0: michael@0: break; michael@0: } michael@0: michael@0: return c; michael@0: } michael@0: michael@0: void RuleCharacterIterator::getPos(RuleCharacterIterator::Pos& p) const { michael@0: p.buf = buf; michael@0: p.pos = pos.getIndex(); michael@0: p.bufPos = bufPos; michael@0: } michael@0: michael@0: void RuleCharacterIterator::setPos(const RuleCharacterIterator::Pos& p) { michael@0: buf = p.buf; michael@0: pos.setIndex(p.pos); michael@0: bufPos = p.bufPos; michael@0: } michael@0: michael@0: void RuleCharacterIterator::skipIgnored(int32_t options) { michael@0: if ((options & SKIP_WHITESPACE) != 0) { michael@0: for (;;) { michael@0: UChar32 a = _current(); michael@0: if (!PatternProps::isWhiteSpace(a)) break; michael@0: _advance(U16_LENGTH(a)); michael@0: } michael@0: } michael@0: } michael@0: michael@0: UnicodeString& RuleCharacterIterator::lookahead(UnicodeString& result, int32_t maxLookAhead) const { michael@0: if (maxLookAhead < 0) { michael@0: maxLookAhead = 0x7FFFFFFF; michael@0: } michael@0: if (buf != 0) { michael@0: buf->extract(bufPos, maxLookAhead, result); michael@0: } else { michael@0: text.extract(pos.getIndex(), maxLookAhead, result); michael@0: } michael@0: return result; michael@0: } michael@0: michael@0: void RuleCharacterIterator::jumpahead(int32_t count) { michael@0: _advance(count); michael@0: } michael@0: michael@0: /* michael@0: UnicodeString& RuleCharacterIterator::toString(UnicodeString& result) const { michael@0: int32_t b = pos.getIndex(); michael@0: text.extract(0, b, result); michael@0: return result.append((UChar) 0x7C).append(text, b, 0x7FFFFFFF); // Insert '|' at index michael@0: } michael@0: */ michael@0: michael@0: UChar32 RuleCharacterIterator::_current() const { michael@0: if (buf != 0) { michael@0: return buf->char32At(bufPos); michael@0: } else { michael@0: int i = pos.getIndex(); michael@0: return (i < text.length()) ? text.char32At(i) : (UChar32)DONE; michael@0: } michael@0: } michael@0: michael@0: void RuleCharacterIterator::_advance(int32_t count) { michael@0: if (buf != 0) { michael@0: bufPos += count; michael@0: if (bufPos == buf->length()) { michael@0: buf = 0; michael@0: } michael@0: } else { michael@0: pos.setIndex(pos.getIndex() + count); michael@0: if (pos.getIndex() > text.length()) { michael@0: pos.setIndex(text.length()); michael@0: } michael@0: } michael@0: } michael@0: michael@0: U_NAMESPACE_END michael@0: michael@0: //eof