|
1 /* |
|
2 ********************************************************************** |
|
3 * Copyright (c) 2003-2011, International Business Machines |
|
4 * Corporation and others. All Rights Reserved. |
|
5 ********************************************************************** |
|
6 * Author: Alan Liu |
|
7 * Created: September 24 2003 |
|
8 * Since: ICU 2.8 |
|
9 ********************************************************************** |
|
10 */ |
|
11 #include "ruleiter.h" |
|
12 #include "unicode/parsepos.h" |
|
13 #include "unicode/symtable.h" |
|
14 #include "unicode/unistr.h" |
|
15 #include "unicode/utf16.h" |
|
16 #include "patternprops.h" |
|
17 |
|
18 /* \U87654321 or \ud800\udc00 */ |
|
19 #define MAX_U_NOTATION_LEN 12 |
|
20 |
|
21 U_NAMESPACE_BEGIN |
|
22 |
|
23 RuleCharacterIterator::RuleCharacterIterator(const UnicodeString& theText, const SymbolTable* theSym, |
|
24 ParsePosition& thePos) : |
|
25 text(theText), |
|
26 pos(thePos), |
|
27 sym(theSym), |
|
28 buf(0), |
|
29 bufPos(0) |
|
30 {} |
|
31 |
|
32 UBool RuleCharacterIterator::atEnd() const { |
|
33 return buf == 0 && pos.getIndex() == text.length(); |
|
34 } |
|
35 |
|
36 UChar32 RuleCharacterIterator::next(int32_t options, UBool& isEscaped, UErrorCode& ec) { |
|
37 if (U_FAILURE(ec)) return DONE; |
|
38 |
|
39 UChar32 c = DONE; |
|
40 isEscaped = FALSE; |
|
41 |
|
42 for (;;) { |
|
43 c = _current(); |
|
44 _advance(U16_LENGTH(c)); |
|
45 |
|
46 if (c == SymbolTable::SYMBOL_REF && buf == 0 && |
|
47 (options & PARSE_VARIABLES) != 0 && sym != 0) { |
|
48 UnicodeString name = sym->parseReference(text, pos, text.length()); |
|
49 // If name is empty there was an isolated SYMBOL_REF; |
|
50 // return it. Caller must be prepared for this. |
|
51 if (name.length() == 0) { |
|
52 break; |
|
53 } |
|
54 bufPos = 0; |
|
55 buf = sym->lookup(name); |
|
56 if (buf == 0) { |
|
57 ec = U_UNDEFINED_VARIABLE; |
|
58 return DONE; |
|
59 } |
|
60 // Handle empty variable value |
|
61 if (buf->length() == 0) { |
|
62 buf = 0; |
|
63 } |
|
64 continue; |
|
65 } |
|
66 |
|
67 if ((options & SKIP_WHITESPACE) != 0 && PatternProps::isWhiteSpace(c)) { |
|
68 continue; |
|
69 } |
|
70 |
|
71 if (c == 0x5C /*'\\'*/ && (options & PARSE_ESCAPES) != 0) { |
|
72 UnicodeString tempEscape; |
|
73 int32_t offset = 0; |
|
74 c = lookahead(tempEscape, MAX_U_NOTATION_LEN).unescapeAt(offset); |
|
75 jumpahead(offset); |
|
76 isEscaped = TRUE; |
|
77 if (c < 0) { |
|
78 ec = U_MALFORMED_UNICODE_ESCAPE; |
|
79 return DONE; |
|
80 } |
|
81 } |
|
82 |
|
83 break; |
|
84 } |
|
85 |
|
86 return c; |
|
87 } |
|
88 |
|
89 void RuleCharacterIterator::getPos(RuleCharacterIterator::Pos& p) const { |
|
90 p.buf = buf; |
|
91 p.pos = pos.getIndex(); |
|
92 p.bufPos = bufPos; |
|
93 } |
|
94 |
|
95 void RuleCharacterIterator::setPos(const RuleCharacterIterator::Pos& p) { |
|
96 buf = p.buf; |
|
97 pos.setIndex(p.pos); |
|
98 bufPos = p.bufPos; |
|
99 } |
|
100 |
|
101 void RuleCharacterIterator::skipIgnored(int32_t options) { |
|
102 if ((options & SKIP_WHITESPACE) != 0) { |
|
103 for (;;) { |
|
104 UChar32 a = _current(); |
|
105 if (!PatternProps::isWhiteSpace(a)) break; |
|
106 _advance(U16_LENGTH(a)); |
|
107 } |
|
108 } |
|
109 } |
|
110 |
|
111 UnicodeString& RuleCharacterIterator::lookahead(UnicodeString& result, int32_t maxLookAhead) const { |
|
112 if (maxLookAhead < 0) { |
|
113 maxLookAhead = 0x7FFFFFFF; |
|
114 } |
|
115 if (buf != 0) { |
|
116 buf->extract(bufPos, maxLookAhead, result); |
|
117 } else { |
|
118 text.extract(pos.getIndex(), maxLookAhead, result); |
|
119 } |
|
120 return result; |
|
121 } |
|
122 |
|
123 void RuleCharacterIterator::jumpahead(int32_t count) { |
|
124 _advance(count); |
|
125 } |
|
126 |
|
127 /* |
|
128 UnicodeString& RuleCharacterIterator::toString(UnicodeString& result) const { |
|
129 int32_t b = pos.getIndex(); |
|
130 text.extract(0, b, result); |
|
131 return result.append((UChar) 0x7C).append(text, b, 0x7FFFFFFF); // Insert '|' at index |
|
132 } |
|
133 */ |
|
134 |
|
135 UChar32 RuleCharacterIterator::_current() const { |
|
136 if (buf != 0) { |
|
137 return buf->char32At(bufPos); |
|
138 } else { |
|
139 int i = pos.getIndex(); |
|
140 return (i < text.length()) ? text.char32At(i) : (UChar32)DONE; |
|
141 } |
|
142 } |
|
143 |
|
144 void RuleCharacterIterator::_advance(int32_t count) { |
|
145 if (buf != 0) { |
|
146 bufPos += count; |
|
147 if (bufPos == buf->length()) { |
|
148 buf = 0; |
|
149 } |
|
150 } else { |
|
151 pos.setIndex(pos.getIndex() + count); |
|
152 if (pos.getIndex() > text.length()) { |
|
153 pos.setIndex(text.length()); |
|
154 } |
|
155 } |
|
156 } |
|
157 |
|
158 U_NAMESPACE_END |
|
159 |
|
160 //eof |