|
1 /* |
|
2 ******************************************************************************* |
|
3 * Copyright (C) 2010-2011, International Business Machines |
|
4 * Corporation and others. All Rights Reserved. |
|
5 ******************************************************************************* |
|
6 * file name: ucharstrieiterator.h |
|
7 * encoding: US-ASCII |
|
8 * tab size: 8 (not used) |
|
9 * indentation:4 |
|
10 * |
|
11 * created on: 2010nov15 |
|
12 * created by: Markus W. Scherer |
|
13 */ |
|
14 |
|
15 #include "unicode/utypes.h" |
|
16 #include "unicode/ucharstrie.h" |
|
17 #include "unicode/unistr.h" |
|
18 #include "uvectr32.h" |
|
19 |
|
20 U_NAMESPACE_BEGIN |
|
21 |
|
22 UCharsTrie::Iterator::Iterator(const UChar *trieUChars, int32_t maxStringLength, |
|
23 UErrorCode &errorCode) |
|
24 : uchars_(trieUChars), |
|
25 pos_(uchars_), initialPos_(uchars_), |
|
26 remainingMatchLength_(-1), initialRemainingMatchLength_(-1), |
|
27 skipValue_(FALSE), |
|
28 maxLength_(maxStringLength), value_(0), stack_(NULL) { |
|
29 if(U_FAILURE(errorCode)) { |
|
30 return; |
|
31 } |
|
32 // stack_ is a pointer so that it's easy to turn ucharstrie.h into |
|
33 // a public API header for which we would want it to depend only on |
|
34 // other public headers. |
|
35 // Unlike UCharsTrie itself, its Iterator performs memory allocations anyway |
|
36 // via the UnicodeString and UVector32 implementations, so this additional |
|
37 // cost is minimal. |
|
38 stack_=new UVector32(errorCode); |
|
39 if(stack_==NULL) { |
|
40 errorCode=U_MEMORY_ALLOCATION_ERROR; |
|
41 } |
|
42 } |
|
43 |
|
44 UCharsTrie::Iterator::Iterator(const UCharsTrie &trie, int32_t maxStringLength, |
|
45 UErrorCode &errorCode) |
|
46 : uchars_(trie.uchars_), pos_(trie.pos_), initialPos_(trie.pos_), |
|
47 remainingMatchLength_(trie.remainingMatchLength_), |
|
48 initialRemainingMatchLength_(trie.remainingMatchLength_), |
|
49 skipValue_(FALSE), |
|
50 maxLength_(maxStringLength), value_(0), stack_(NULL) { |
|
51 if(U_FAILURE(errorCode)) { |
|
52 return; |
|
53 } |
|
54 stack_=new UVector32(errorCode); |
|
55 if(U_FAILURE(errorCode)) { |
|
56 return; |
|
57 } |
|
58 if(stack_==NULL) { |
|
59 errorCode=U_MEMORY_ALLOCATION_ERROR; |
|
60 return; |
|
61 } |
|
62 int32_t length=remainingMatchLength_; // Actual remaining match length minus 1. |
|
63 if(length>=0) { |
|
64 // Pending linear-match node, append remaining UChars to str_. |
|
65 ++length; |
|
66 if(maxLength_>0 && length>maxLength_) { |
|
67 length=maxLength_; // This will leave remainingMatchLength>=0 as a signal. |
|
68 } |
|
69 str_.append(pos_, length); |
|
70 pos_+=length; |
|
71 remainingMatchLength_-=length; |
|
72 } |
|
73 } |
|
74 |
|
75 UCharsTrie::Iterator::~Iterator() { |
|
76 delete stack_; |
|
77 } |
|
78 |
|
79 UCharsTrie::Iterator & |
|
80 UCharsTrie::Iterator::reset() { |
|
81 pos_=initialPos_; |
|
82 remainingMatchLength_=initialRemainingMatchLength_; |
|
83 skipValue_=FALSE; |
|
84 int32_t length=remainingMatchLength_+1; // Remaining match length. |
|
85 if(maxLength_>0 && length>maxLength_) { |
|
86 length=maxLength_; |
|
87 } |
|
88 str_.truncate(length); |
|
89 pos_+=length; |
|
90 remainingMatchLength_-=length; |
|
91 stack_->setSize(0); |
|
92 return *this; |
|
93 } |
|
94 |
|
95 UBool |
|
96 UCharsTrie::Iterator::hasNext() const { return pos_!=NULL || !stack_->isEmpty(); } |
|
97 |
|
98 UBool |
|
99 UCharsTrie::Iterator::next(UErrorCode &errorCode) { |
|
100 if(U_FAILURE(errorCode)) { |
|
101 return FALSE; |
|
102 } |
|
103 const UChar *pos=pos_; |
|
104 if(pos==NULL) { |
|
105 if(stack_->isEmpty()) { |
|
106 return FALSE; |
|
107 } |
|
108 // Pop the state off the stack and continue with the next outbound edge of |
|
109 // the branch node. |
|
110 int32_t stackSize=stack_->size(); |
|
111 int32_t length=stack_->elementAti(stackSize-1); |
|
112 pos=uchars_+stack_->elementAti(stackSize-2); |
|
113 stack_->setSize(stackSize-2); |
|
114 str_.truncate(length&0xffff); |
|
115 length=(int32_t)((uint32_t)length>>16); |
|
116 if(length>1) { |
|
117 pos=branchNext(pos, length, errorCode); |
|
118 if(pos==NULL) { |
|
119 return TRUE; // Reached a final value. |
|
120 } |
|
121 } else { |
|
122 str_.append(*pos++); |
|
123 } |
|
124 } |
|
125 if(remainingMatchLength_>=0) { |
|
126 // We only get here if we started in a pending linear-match node |
|
127 // with more than maxLength remaining units. |
|
128 return truncateAndStop(); |
|
129 } |
|
130 for(;;) { |
|
131 int32_t node=*pos++; |
|
132 if(node>=kMinValueLead) { |
|
133 if(skipValue_) { |
|
134 pos=skipNodeValue(pos, node); |
|
135 node&=kNodeTypeMask; |
|
136 skipValue_=FALSE; |
|
137 } else { |
|
138 // Deliver value for the string so far. |
|
139 UBool isFinal=(UBool)(node>>15); |
|
140 if(isFinal) { |
|
141 value_=readValue(pos, node&0x7fff); |
|
142 } else { |
|
143 value_=readNodeValue(pos, node); |
|
144 } |
|
145 if(isFinal || (maxLength_>0 && str_.length()==maxLength_)) { |
|
146 pos_=NULL; |
|
147 } else { |
|
148 // We cannot skip the value right here because it shares its |
|
149 // lead unit with a match node which we have to evaluate |
|
150 // next time. |
|
151 // Instead, keep pos_ on the node lead unit itself. |
|
152 pos_=pos-1; |
|
153 skipValue_=TRUE; |
|
154 } |
|
155 return TRUE; |
|
156 } |
|
157 } |
|
158 if(maxLength_>0 && str_.length()==maxLength_) { |
|
159 return truncateAndStop(); |
|
160 } |
|
161 if(node<kMinLinearMatch) { |
|
162 if(node==0) { |
|
163 node=*pos++; |
|
164 } |
|
165 pos=branchNext(pos, node+1, errorCode); |
|
166 if(pos==NULL) { |
|
167 return TRUE; // Reached a final value. |
|
168 } |
|
169 } else { |
|
170 // Linear-match node, append length units to str_. |
|
171 int32_t length=node-kMinLinearMatch+1; |
|
172 if(maxLength_>0 && str_.length()+length>maxLength_) { |
|
173 str_.append(pos, maxLength_-str_.length()); |
|
174 return truncateAndStop(); |
|
175 } |
|
176 str_.append(pos, length); |
|
177 pos+=length; |
|
178 } |
|
179 } |
|
180 } |
|
181 |
|
182 // Branch node, needs to take the first outbound edge and push state for the rest. |
|
183 const UChar * |
|
184 UCharsTrie::Iterator::branchNext(const UChar *pos, int32_t length, UErrorCode &errorCode) { |
|
185 while(length>kMaxBranchLinearSubNodeLength) { |
|
186 ++pos; // ignore the comparison unit |
|
187 // Push state for the greater-or-equal edge. |
|
188 stack_->addElement((int32_t)(skipDelta(pos)-uchars_), errorCode); |
|
189 stack_->addElement(((length-(length>>1))<<16)|str_.length(), errorCode); |
|
190 // Follow the less-than edge. |
|
191 length>>=1; |
|
192 pos=jumpByDelta(pos); |
|
193 } |
|
194 // List of key-value pairs where values are either final values or jump deltas. |
|
195 // Read the first (key, value) pair. |
|
196 UChar trieUnit=*pos++; |
|
197 int32_t node=*pos++; |
|
198 UBool isFinal=(UBool)(node>>15); |
|
199 int32_t value=readValue(pos, node&=0x7fff); |
|
200 pos=skipValue(pos, node); |
|
201 stack_->addElement((int32_t)(pos-uchars_), errorCode); |
|
202 stack_->addElement(((length-1)<<16)|str_.length(), errorCode); |
|
203 str_.append(trieUnit); |
|
204 if(isFinal) { |
|
205 pos_=NULL; |
|
206 value_=value; |
|
207 return NULL; |
|
208 } else { |
|
209 return pos+value; |
|
210 } |
|
211 } |
|
212 |
|
213 U_NAMESPACE_END |