|
1 /* |
|
2 ******************************************************************************* |
|
3 * Copyright (C) 2010-2012, International Business Machines |
|
4 * Corporation and others. All Rights Reserved. |
|
5 ******************************************************************************* |
|
6 * file name: bytestrieiterator.cpp |
|
7 * encoding: US-ASCII |
|
8 * tab size: 8 (not used) |
|
9 * indentation:4 |
|
10 * |
|
11 * created on: 2010nov03 |
|
12 * created by: Markus W. Scherer |
|
13 */ |
|
14 |
|
15 #include "unicode/utypes.h" |
|
16 #include "unicode/bytestrie.h" |
|
17 #include "unicode/stringpiece.h" |
|
18 #include "charstr.h" |
|
19 #include "uvectr32.h" |
|
20 |
|
21 U_NAMESPACE_BEGIN |
|
22 |
|
23 BytesTrie::Iterator::Iterator(const void *trieBytes, int32_t maxStringLength, |
|
24 UErrorCode &errorCode) |
|
25 : bytes_(static_cast<const uint8_t *>(trieBytes)), |
|
26 pos_(bytes_), initialPos_(bytes_), |
|
27 remainingMatchLength_(-1), initialRemainingMatchLength_(-1), |
|
28 str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) { |
|
29 if(U_FAILURE(errorCode)) { |
|
30 return; |
|
31 } |
|
32 // str_ and stack_ are pointers so that it's easy to turn bytestrie.h into |
|
33 // a public API header for which we would want it to depend only on |
|
34 // other public headers. |
|
35 // Unlike BytesTrie itself, its Iterator performs memory allocations anyway |
|
36 // via the CharString and UVector32 implementations, so this additional |
|
37 // cost is minimal. |
|
38 str_=new CharString(); |
|
39 stack_=new UVector32(errorCode); |
|
40 if(U_SUCCESS(errorCode) && (str_==NULL || stack_==NULL)) { |
|
41 errorCode=U_MEMORY_ALLOCATION_ERROR; |
|
42 } |
|
43 } |
|
44 |
|
45 BytesTrie::Iterator::Iterator(const BytesTrie &trie, int32_t maxStringLength, |
|
46 UErrorCode &errorCode) |
|
47 : bytes_(trie.bytes_), pos_(trie.pos_), initialPos_(trie.pos_), |
|
48 remainingMatchLength_(trie.remainingMatchLength_), |
|
49 initialRemainingMatchLength_(trie.remainingMatchLength_), |
|
50 str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) { |
|
51 if(U_FAILURE(errorCode)) { |
|
52 return; |
|
53 } |
|
54 str_=new CharString(); |
|
55 stack_=new UVector32(errorCode); |
|
56 if(U_FAILURE(errorCode)) { |
|
57 return; |
|
58 } |
|
59 if(str_==NULL || stack_==NULL) { |
|
60 errorCode=U_MEMORY_ALLOCATION_ERROR; |
|
61 return; |
|
62 } |
|
63 int32_t length=remainingMatchLength_; // Actual remaining match length minus 1. |
|
64 if(length>=0) { |
|
65 // Pending linear-match node, append remaining bytes to str_. |
|
66 ++length; |
|
67 if(maxLength_>0 && length>maxLength_) { |
|
68 length=maxLength_; // This will leave remainingMatchLength>=0 as a signal. |
|
69 } |
|
70 str_->append(reinterpret_cast<const char *>(pos_), length, errorCode); |
|
71 pos_+=length; |
|
72 remainingMatchLength_-=length; |
|
73 } |
|
74 } |
|
75 |
|
76 BytesTrie::Iterator::~Iterator() { |
|
77 delete str_; |
|
78 delete stack_; |
|
79 } |
|
80 |
|
81 BytesTrie::Iterator & |
|
82 BytesTrie::Iterator::reset() { |
|
83 pos_=initialPos_; |
|
84 remainingMatchLength_=initialRemainingMatchLength_; |
|
85 int32_t length=remainingMatchLength_+1; // Remaining match length. |
|
86 if(maxLength_>0 && length>maxLength_) { |
|
87 length=maxLength_; |
|
88 } |
|
89 str_->truncate(length); |
|
90 pos_+=length; |
|
91 remainingMatchLength_-=length; |
|
92 stack_->setSize(0); |
|
93 return *this; |
|
94 } |
|
95 |
|
96 UBool |
|
97 BytesTrie::Iterator::hasNext() const { return pos_!=NULL || !stack_->isEmpty(); } |
|
98 |
|
99 UBool |
|
100 BytesTrie::Iterator::next(UErrorCode &errorCode) { |
|
101 if(U_FAILURE(errorCode)) { |
|
102 return FALSE; |
|
103 } |
|
104 const uint8_t *pos=pos_; |
|
105 if(pos==NULL) { |
|
106 if(stack_->isEmpty()) { |
|
107 return FALSE; |
|
108 } |
|
109 // Pop the state off the stack and continue with the next outbound edge of |
|
110 // the branch node. |
|
111 int32_t stackSize=stack_->size(); |
|
112 int32_t length=stack_->elementAti(stackSize-1); |
|
113 pos=bytes_+stack_->elementAti(stackSize-2); |
|
114 stack_->setSize(stackSize-2); |
|
115 str_->truncate(length&0xffff); |
|
116 length=(int32_t)((uint32_t)length>>16); |
|
117 if(length>1) { |
|
118 pos=branchNext(pos, length, errorCode); |
|
119 if(pos==NULL) { |
|
120 return TRUE; // Reached a final value. |
|
121 } |
|
122 } else { |
|
123 str_->append((char)*pos++, errorCode); |
|
124 } |
|
125 } |
|
126 if(remainingMatchLength_>=0) { |
|
127 // We only get here if we started in a pending linear-match node |
|
128 // with more than maxLength remaining bytes. |
|
129 return truncateAndStop(); |
|
130 } |
|
131 for(;;) { |
|
132 int32_t node=*pos++; |
|
133 if(node>=kMinValueLead) { |
|
134 // Deliver value for the byte sequence so far. |
|
135 UBool isFinal=(UBool)(node&kValueIsFinal); |
|
136 value_=readValue(pos, node>>1); |
|
137 if(isFinal || (maxLength_>0 && str_->length()==maxLength_)) { |
|
138 pos_=NULL; |
|
139 } else { |
|
140 pos_=skipValue(pos, node); |
|
141 } |
|
142 sp_.set(str_->data(), str_->length()); |
|
143 return TRUE; |
|
144 } |
|
145 if(maxLength_>0 && str_->length()==maxLength_) { |
|
146 return truncateAndStop(); |
|
147 } |
|
148 if(node<kMinLinearMatch) { |
|
149 if(node==0) { |
|
150 node=*pos++; |
|
151 } |
|
152 pos=branchNext(pos, node+1, errorCode); |
|
153 if(pos==NULL) { |
|
154 return TRUE; // Reached a final value. |
|
155 } |
|
156 } else { |
|
157 // Linear-match node, append length bytes to str_. |
|
158 int32_t length=node-kMinLinearMatch+1; |
|
159 if(maxLength_>0 && str_->length()+length>maxLength_) { |
|
160 str_->append(reinterpret_cast<const char *>(pos), |
|
161 maxLength_-str_->length(), errorCode); |
|
162 return truncateAndStop(); |
|
163 } |
|
164 str_->append(reinterpret_cast<const char *>(pos), length, errorCode); |
|
165 pos+=length; |
|
166 } |
|
167 } |
|
168 } |
|
169 |
|
170 UBool |
|
171 BytesTrie::Iterator::truncateAndStop() { |
|
172 pos_=NULL; |
|
173 sp_.set(str_->data(), str_->length()); |
|
174 value_=-1; // no real value for str |
|
175 return TRUE; |
|
176 } |
|
177 |
|
178 // Branch node, needs to take the first outbound edge and push state for the rest. |
|
179 const uint8_t * |
|
180 BytesTrie::Iterator::branchNext(const uint8_t *pos, int32_t length, UErrorCode &errorCode) { |
|
181 while(length>kMaxBranchLinearSubNodeLength) { |
|
182 ++pos; // ignore the comparison byte |
|
183 // Push state for the greater-or-equal edge. |
|
184 stack_->addElement((int32_t)(skipDelta(pos)-bytes_), errorCode); |
|
185 stack_->addElement(((length-(length>>1))<<16)|str_->length(), errorCode); |
|
186 // Follow the less-than edge. |
|
187 length>>=1; |
|
188 pos=jumpByDelta(pos); |
|
189 } |
|
190 // List of key-value pairs where values are either final values or jump deltas. |
|
191 // Read the first (key, value) pair. |
|
192 uint8_t trieByte=*pos++; |
|
193 int32_t node=*pos++; |
|
194 UBool isFinal=(UBool)(node&kValueIsFinal); |
|
195 int32_t value=readValue(pos, node>>1); |
|
196 pos=skipValue(pos, node); |
|
197 stack_->addElement((int32_t)(pos-bytes_), errorCode); |
|
198 stack_->addElement(((length-1)<<16)|str_->length(), errorCode); |
|
199 str_->append((char)trieByte, errorCode); |
|
200 if(isFinal) { |
|
201 pos_=NULL; |
|
202 sp_.set(str_->data(), str_->length()); |
|
203 value_=value; |
|
204 return NULL; |
|
205 } else { |
|
206 return pos+value; |
|
207 } |
|
208 } |
|
209 |
|
210 U_NAMESPACE_END |