intl/icu/source/common/ruleiter.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/common/ruleiter.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,160 @@
     1.4 +/*
     1.5 +**********************************************************************
     1.6 +* Copyright (c) 2003-2011, International Business Machines
     1.7 +* Corporation and others.  All Rights Reserved.
     1.8 +**********************************************************************
     1.9 +* Author: Alan Liu
    1.10 +* Created: September 24 2003
    1.11 +* Since: ICU 2.8
    1.12 +**********************************************************************
    1.13 +*/
    1.14 +#include "ruleiter.h"
    1.15 +#include "unicode/parsepos.h"
    1.16 +#include "unicode/symtable.h"
    1.17 +#include "unicode/unistr.h"
    1.18 +#include "unicode/utf16.h"
    1.19 +#include "patternprops.h"
    1.20 +
    1.21 +/* \U87654321 or \ud800\udc00 */
    1.22 +#define MAX_U_NOTATION_LEN 12
    1.23 +
    1.24 +U_NAMESPACE_BEGIN
    1.25 +
    1.26 +RuleCharacterIterator::RuleCharacterIterator(const UnicodeString& theText, const SymbolTable* theSym,
    1.27 +                      ParsePosition& thePos) :
    1.28 +    text(theText),
    1.29 +    pos(thePos),
    1.30 +    sym(theSym),
    1.31 +    buf(0),
    1.32 +    bufPos(0)
    1.33 +{}
    1.34 +
    1.35 +UBool RuleCharacterIterator::atEnd() const {
    1.36 +    return buf == 0 && pos.getIndex() == text.length();
    1.37 +}
    1.38 +
    1.39 +UChar32 RuleCharacterIterator::next(int32_t options, UBool& isEscaped, UErrorCode& ec) {
    1.40 +    if (U_FAILURE(ec)) return DONE;
    1.41 +
    1.42 +    UChar32 c = DONE;
    1.43 +    isEscaped = FALSE;
    1.44 +
    1.45 +    for (;;) {
    1.46 +        c = _current();
    1.47 +        _advance(U16_LENGTH(c));
    1.48 +
    1.49 +        if (c == SymbolTable::SYMBOL_REF && buf == 0 &&
    1.50 +            (options & PARSE_VARIABLES) != 0 && sym != 0) {
    1.51 +            UnicodeString name = sym->parseReference(text, pos, text.length());
    1.52 +            // If name is empty there was an isolated SYMBOL_REF;
    1.53 +            // return it.  Caller must be prepared for this.
    1.54 +            if (name.length() == 0) {
    1.55 +                break;
    1.56 +            }
    1.57 +            bufPos = 0;
    1.58 +            buf = sym->lookup(name);
    1.59 +            if (buf == 0) {
    1.60 +                ec = U_UNDEFINED_VARIABLE;
    1.61 +                return DONE;
    1.62 +            }
    1.63 +            // Handle empty variable value
    1.64 +            if (buf->length() == 0) {
    1.65 +                buf = 0;
    1.66 +            }
    1.67 +            continue;
    1.68 +        }
    1.69 +
    1.70 +        if ((options & SKIP_WHITESPACE) != 0 && PatternProps::isWhiteSpace(c)) {
    1.71 +            continue;
    1.72 +        }
    1.73 +
    1.74 +        if (c == 0x5C /*'\\'*/ && (options & PARSE_ESCAPES) != 0) {
    1.75 +            UnicodeString tempEscape;
    1.76 +            int32_t offset = 0;
    1.77 +            c = lookahead(tempEscape, MAX_U_NOTATION_LEN).unescapeAt(offset);
    1.78 +            jumpahead(offset);
    1.79 +            isEscaped = TRUE;
    1.80 +            if (c < 0) {
    1.81 +                ec = U_MALFORMED_UNICODE_ESCAPE;
    1.82 +                return DONE;
    1.83 +            }
    1.84 +        }
    1.85 +
    1.86 +        break;
    1.87 +    }
    1.88 +
    1.89 +    return c;
    1.90 +}
    1.91 +
    1.92 +void RuleCharacterIterator::getPos(RuleCharacterIterator::Pos& p) const {
    1.93 +    p.buf = buf;
    1.94 +    p.pos = pos.getIndex();
    1.95 +    p.bufPos = bufPos;
    1.96 +}
    1.97 +
    1.98 +void RuleCharacterIterator::setPos(const RuleCharacterIterator::Pos& p) {
    1.99 +    buf = p.buf;
   1.100 +    pos.setIndex(p.pos);
   1.101 +    bufPos = p.bufPos;
   1.102 +}
   1.103 +
   1.104 +void RuleCharacterIterator::skipIgnored(int32_t options) {
   1.105 +    if ((options & SKIP_WHITESPACE) != 0) {
   1.106 +        for (;;) {
   1.107 +            UChar32 a = _current();
   1.108 +            if (!PatternProps::isWhiteSpace(a)) break;
   1.109 +            _advance(U16_LENGTH(a));
   1.110 +        }
   1.111 +    }
   1.112 +}
   1.113 +
   1.114 +UnicodeString& RuleCharacterIterator::lookahead(UnicodeString& result, int32_t maxLookAhead) const {
   1.115 +    if (maxLookAhead < 0) {
   1.116 +        maxLookAhead = 0x7FFFFFFF;
   1.117 +    }
   1.118 +    if (buf != 0) {
   1.119 +        buf->extract(bufPos, maxLookAhead, result);
   1.120 +    } else {
   1.121 +        text.extract(pos.getIndex(), maxLookAhead, result);
   1.122 +    }
   1.123 +    return result;
   1.124 +}
   1.125 +
   1.126 +void RuleCharacterIterator::jumpahead(int32_t count) {
   1.127 +    _advance(count);
   1.128 +}
   1.129 +
   1.130 +/*
   1.131 +UnicodeString& RuleCharacterIterator::toString(UnicodeString& result) const {
   1.132 +    int32_t b = pos.getIndex();
   1.133 +    text.extract(0, b, result);
   1.134 +    return result.append((UChar) 0x7C).append(text, b, 0x7FFFFFFF); // Insert '|' at index
   1.135 +}
   1.136 +*/
   1.137 +
   1.138 +UChar32 RuleCharacterIterator::_current() const {
   1.139 +    if (buf != 0) {
   1.140 +        return buf->char32At(bufPos);
   1.141 +    } else {
   1.142 +        int i = pos.getIndex();
   1.143 +        return (i < text.length()) ? text.char32At(i) : (UChar32)DONE;
   1.144 +    }
   1.145 +}
   1.146 +
   1.147 +void RuleCharacterIterator::_advance(int32_t count) {
   1.148 +    if (buf != 0) {
   1.149 +        bufPos += count;
   1.150 +        if (bufPos == buf->length()) {
   1.151 +            buf = 0;
   1.152 +        }
   1.153 +    } else {
   1.154 +        pos.setIndex(pos.getIndex() + count);
   1.155 +        if (pos.getIndex() > text.length()) {
   1.156 +            pos.setIndex(text.length());
   1.157 +        }
   1.158 +    }
   1.159 +}
   1.160 +
   1.161 +U_NAMESPACE_END
   1.162 +
   1.163 +//eof

mercurial