|
1 /* |
|
2 ******************************************************************************* |
|
3 * Copyright (C) 2010-2012, International Business Machines |
|
4 * Corporation and others. All Rights Reserved. |
|
5 ******************************************************************************* |
|
6 * file name: stringtriebuilder.cpp |
|
7 * encoding: US-ASCII |
|
8 * tab size: 8 (not used) |
|
9 * indentation:4 |
|
10 * |
|
11 * created on: 2010dec24 |
|
12 * created by: Markus W. Scherer |
|
13 */ |
|
14 |
|
15 #include "utypeinfo.h" // for 'typeid' to work |
|
16 #include "unicode/utypes.h" |
|
17 #include "unicode/stringtriebuilder.h" |
|
18 #include "uassert.h" |
|
19 #include "uhash.h" |
|
20 |
|
21 U_CDECL_BEGIN |
|
22 |
|
23 static int32_t U_CALLCONV |
|
24 hashStringTrieNode(const UHashTok key) { |
|
25 return icu::StringTrieBuilder::hashNode(key.pointer); |
|
26 } |
|
27 |
|
28 static UBool U_CALLCONV |
|
29 equalStringTrieNodes(const UHashTok key1, const UHashTok key2) { |
|
30 return icu::StringTrieBuilder::equalNodes(key1.pointer, key2.pointer); |
|
31 } |
|
32 |
|
33 U_CDECL_END |
|
34 |
|
35 U_NAMESPACE_BEGIN |
|
36 |
|
37 StringTrieBuilder::StringTrieBuilder() : nodes(NULL) {} |
|
38 |
|
39 StringTrieBuilder::~StringTrieBuilder() { |
|
40 deleteCompactBuilder(); |
|
41 } |
|
42 |
|
43 void |
|
44 StringTrieBuilder::createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode) { |
|
45 if(U_FAILURE(errorCode)) { |
|
46 return; |
|
47 } |
|
48 nodes=uhash_openSize(hashStringTrieNode, equalStringTrieNodes, NULL, |
|
49 sizeGuess, &errorCode); |
|
50 if(U_SUCCESS(errorCode)) { |
|
51 if(nodes==NULL) { |
|
52 errorCode=U_MEMORY_ALLOCATION_ERROR; |
|
53 } else { |
|
54 uhash_setKeyDeleter(nodes, uprv_deleteUObject); |
|
55 } |
|
56 } |
|
57 } |
|
58 |
|
59 void |
|
60 StringTrieBuilder::deleteCompactBuilder() { |
|
61 uhash_close(nodes); |
|
62 nodes=NULL; |
|
63 } |
|
64 |
|
65 void |
|
66 StringTrieBuilder::build(UStringTrieBuildOption buildOption, int32_t elementsLength, |
|
67 UErrorCode &errorCode) { |
|
68 if(buildOption==USTRINGTRIE_BUILD_FAST) { |
|
69 writeNode(0, elementsLength, 0); |
|
70 } else /* USTRINGTRIE_BUILD_SMALL */ { |
|
71 createCompactBuilder(2*elementsLength, errorCode); |
|
72 Node *root=makeNode(0, elementsLength, 0, errorCode); |
|
73 if(U_SUCCESS(errorCode)) { |
|
74 root->markRightEdgesFirst(-1); |
|
75 root->write(*this); |
|
76 } |
|
77 deleteCompactBuilder(); |
|
78 } |
|
79 } |
|
80 |
|
81 // Requires start<limit, |
|
82 // and all strings of the [start..limit[ elements must be sorted and |
|
83 // have a common prefix of length unitIndex. |
|
84 int32_t |
|
85 StringTrieBuilder::writeNode(int32_t start, int32_t limit, int32_t unitIndex) { |
|
86 UBool hasValue=FALSE; |
|
87 int32_t value=0; |
|
88 int32_t type; |
|
89 if(unitIndex==getElementStringLength(start)) { |
|
90 // An intermediate or final value. |
|
91 value=getElementValue(start++); |
|
92 if(start==limit) { |
|
93 return writeValueAndFinal(value, TRUE); // final-value node |
|
94 } |
|
95 hasValue=TRUE; |
|
96 } |
|
97 // Now all [start..limit[ strings are longer than unitIndex. |
|
98 int32_t minUnit=getElementUnit(start, unitIndex); |
|
99 int32_t maxUnit=getElementUnit(limit-1, unitIndex); |
|
100 if(minUnit==maxUnit) { |
|
101 // Linear-match node: All strings have the same character at unitIndex. |
|
102 int32_t lastUnitIndex=getLimitOfLinearMatch(start, limit-1, unitIndex); |
|
103 writeNode(start, limit, lastUnitIndex); |
|
104 // Break the linear-match sequence into chunks of at most kMaxLinearMatchLength. |
|
105 int32_t length=lastUnitIndex-unitIndex; |
|
106 int32_t maxLinearMatchLength=getMaxLinearMatchLength(); |
|
107 while(length>maxLinearMatchLength) { |
|
108 lastUnitIndex-=maxLinearMatchLength; |
|
109 length-=maxLinearMatchLength; |
|
110 writeElementUnits(start, lastUnitIndex, maxLinearMatchLength); |
|
111 write(getMinLinearMatch()+maxLinearMatchLength-1); |
|
112 } |
|
113 writeElementUnits(start, unitIndex, length); |
|
114 type=getMinLinearMatch()+length-1; |
|
115 } else { |
|
116 // Branch node. |
|
117 int32_t length=countElementUnits(start, limit, unitIndex); |
|
118 // length>=2 because minUnit!=maxUnit. |
|
119 writeBranchSubNode(start, limit, unitIndex, length); |
|
120 if(--length<getMinLinearMatch()) { |
|
121 type=length; |
|
122 } else { |
|
123 write(length); |
|
124 type=0; |
|
125 } |
|
126 } |
|
127 return writeValueAndType(hasValue, value, type); |
|
128 } |
|
129 |
|
130 // start<limit && all strings longer than unitIndex && |
|
131 // length different units at unitIndex |
|
132 int32_t |
|
133 StringTrieBuilder::writeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, int32_t length) { |
|
134 UChar middleUnits[kMaxSplitBranchLevels]; |
|
135 int32_t lessThan[kMaxSplitBranchLevels]; |
|
136 int32_t ltLength=0; |
|
137 while(length>getMaxBranchLinearSubNodeLength()) { |
|
138 // Branch on the middle unit. |
|
139 // First, find the middle unit. |
|
140 int32_t i=skipElementsBySomeUnits(start, unitIndex, length/2); |
|
141 // Encode the less-than branch first. |
|
142 middleUnits[ltLength]=getElementUnit(i, unitIndex); // middle unit |
|
143 lessThan[ltLength]=writeBranchSubNode(start, i, unitIndex, length/2); |
|
144 ++ltLength; |
|
145 // Continue for the greater-or-equal branch. |
|
146 start=i; |
|
147 length=length-length/2; |
|
148 } |
|
149 // For each unit, find its elements array start and whether it has a final value. |
|
150 int32_t starts[kMaxBranchLinearSubNodeLength]; |
|
151 UBool isFinal[kMaxBranchLinearSubNodeLength-1]; |
|
152 int32_t unitNumber=0; |
|
153 do { |
|
154 int32_t i=starts[unitNumber]=start; |
|
155 UChar unit=getElementUnit(i++, unitIndex); |
|
156 i=indexOfElementWithNextUnit(i, unitIndex, unit); |
|
157 isFinal[unitNumber]= start==i-1 && unitIndex+1==getElementStringLength(start); |
|
158 start=i; |
|
159 } while(++unitNumber<length-1); |
|
160 // unitNumber==length-1, and the maxUnit elements range is [start..limit[ |
|
161 starts[unitNumber]=start; |
|
162 |
|
163 // Write the sub-nodes in reverse order: The jump lengths are deltas from |
|
164 // after their own positions, so if we wrote the minUnit sub-node first, |
|
165 // then its jump delta would be larger. |
|
166 // Instead we write the minUnit sub-node last, for a shorter delta. |
|
167 int32_t jumpTargets[kMaxBranchLinearSubNodeLength-1]; |
|
168 do { |
|
169 --unitNumber; |
|
170 if(!isFinal[unitNumber]) { |
|
171 jumpTargets[unitNumber]=writeNode(starts[unitNumber], starts[unitNumber+1], unitIndex+1); |
|
172 } |
|
173 } while(unitNumber>0); |
|
174 // The maxUnit sub-node is written as the very last one because we do |
|
175 // not jump for it at all. |
|
176 unitNumber=length-1; |
|
177 writeNode(start, limit, unitIndex+1); |
|
178 int32_t offset=write(getElementUnit(start, unitIndex)); |
|
179 // Write the rest of this node's unit-value pairs. |
|
180 while(--unitNumber>=0) { |
|
181 start=starts[unitNumber]; |
|
182 int32_t value; |
|
183 if(isFinal[unitNumber]) { |
|
184 // Write the final value for the one string ending with this unit. |
|
185 value=getElementValue(start); |
|
186 } else { |
|
187 // Write the delta to the start position of the sub-node. |
|
188 value=offset-jumpTargets[unitNumber]; |
|
189 } |
|
190 writeValueAndFinal(value, isFinal[unitNumber]); |
|
191 offset=write(getElementUnit(start, unitIndex)); |
|
192 } |
|
193 // Write the split-branch nodes. |
|
194 while(ltLength>0) { |
|
195 --ltLength; |
|
196 writeDeltaTo(lessThan[ltLength]); |
|
197 offset=write(middleUnits[ltLength]); |
|
198 } |
|
199 return offset; |
|
200 } |
|
201 |
|
202 // Requires start<limit, |
|
203 // and all strings of the [start..limit[ elements must be sorted and |
|
204 // have a common prefix of length unitIndex. |
|
205 StringTrieBuilder::Node * |
|
206 StringTrieBuilder::makeNode(int32_t start, int32_t limit, int32_t unitIndex, UErrorCode &errorCode) { |
|
207 if(U_FAILURE(errorCode)) { |
|
208 return NULL; |
|
209 } |
|
210 UBool hasValue=FALSE; |
|
211 int32_t value=0; |
|
212 if(unitIndex==getElementStringLength(start)) { |
|
213 // An intermediate or final value. |
|
214 value=getElementValue(start++); |
|
215 if(start==limit) { |
|
216 return registerFinalValue(value, errorCode); |
|
217 } |
|
218 hasValue=TRUE; |
|
219 } |
|
220 Node *node; |
|
221 // Now all [start..limit[ strings are longer than unitIndex. |
|
222 int32_t minUnit=getElementUnit(start, unitIndex); |
|
223 int32_t maxUnit=getElementUnit(limit-1, unitIndex); |
|
224 if(minUnit==maxUnit) { |
|
225 // Linear-match node: All strings have the same character at unitIndex. |
|
226 int32_t lastUnitIndex=getLimitOfLinearMatch(start, limit-1, unitIndex); |
|
227 Node *nextNode=makeNode(start, limit, lastUnitIndex, errorCode); |
|
228 // Break the linear-match sequence into chunks of at most kMaxLinearMatchLength. |
|
229 int32_t length=lastUnitIndex-unitIndex; |
|
230 int32_t maxLinearMatchLength=getMaxLinearMatchLength(); |
|
231 while(length>maxLinearMatchLength) { |
|
232 lastUnitIndex-=maxLinearMatchLength; |
|
233 length-=maxLinearMatchLength; |
|
234 node=createLinearMatchNode(start, lastUnitIndex, maxLinearMatchLength, nextNode); |
|
235 nextNode=registerNode(node, errorCode); |
|
236 } |
|
237 node=createLinearMatchNode(start, unitIndex, length, nextNode); |
|
238 } else { |
|
239 // Branch node. |
|
240 int32_t length=countElementUnits(start, limit, unitIndex); |
|
241 // length>=2 because minUnit!=maxUnit. |
|
242 Node *subNode=makeBranchSubNode(start, limit, unitIndex, length, errorCode); |
|
243 node=new BranchHeadNode(length, subNode); |
|
244 } |
|
245 if(hasValue && node!=NULL) { |
|
246 if(matchNodesCanHaveValues()) { |
|
247 ((ValueNode *)node)->setValue(value); |
|
248 } else { |
|
249 node=new IntermediateValueNode(value, registerNode(node, errorCode)); |
|
250 } |
|
251 } |
|
252 return registerNode(node, errorCode); |
|
253 } |
|
254 |
|
255 // start<limit && all strings longer than unitIndex && |
|
256 // length different units at unitIndex |
|
257 StringTrieBuilder::Node * |
|
258 StringTrieBuilder::makeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, |
|
259 int32_t length, UErrorCode &errorCode) { |
|
260 if(U_FAILURE(errorCode)) { |
|
261 return NULL; |
|
262 } |
|
263 UChar middleUnits[kMaxSplitBranchLevels]; |
|
264 Node *lessThan[kMaxSplitBranchLevels]; |
|
265 int32_t ltLength=0; |
|
266 while(length>getMaxBranchLinearSubNodeLength()) { |
|
267 // Branch on the middle unit. |
|
268 // First, find the middle unit. |
|
269 int32_t i=skipElementsBySomeUnits(start, unitIndex, length/2); |
|
270 // Create the less-than branch. |
|
271 middleUnits[ltLength]=getElementUnit(i, unitIndex); // middle unit |
|
272 lessThan[ltLength]=makeBranchSubNode(start, i, unitIndex, length/2, errorCode); |
|
273 ++ltLength; |
|
274 // Continue for the greater-or-equal branch. |
|
275 start=i; |
|
276 length=length-length/2; |
|
277 } |
|
278 if(U_FAILURE(errorCode)) { |
|
279 return NULL; |
|
280 } |
|
281 ListBranchNode *listNode=new ListBranchNode(); |
|
282 if(listNode==NULL) { |
|
283 errorCode=U_MEMORY_ALLOCATION_ERROR; |
|
284 return NULL; |
|
285 } |
|
286 // For each unit, find its elements array start and whether it has a final value. |
|
287 int32_t unitNumber=0; |
|
288 do { |
|
289 int32_t i=start; |
|
290 UChar unit=getElementUnit(i++, unitIndex); |
|
291 i=indexOfElementWithNextUnit(i, unitIndex, unit); |
|
292 if(start==i-1 && unitIndex+1==getElementStringLength(start)) { |
|
293 listNode->add(unit, getElementValue(start)); |
|
294 } else { |
|
295 listNode->add(unit, makeNode(start, i, unitIndex+1, errorCode)); |
|
296 } |
|
297 start=i; |
|
298 } while(++unitNumber<length-1); |
|
299 // unitNumber==length-1, and the maxUnit elements range is [start..limit[ |
|
300 UChar unit=getElementUnit(start, unitIndex); |
|
301 if(start==limit-1 && unitIndex+1==getElementStringLength(start)) { |
|
302 listNode->add(unit, getElementValue(start)); |
|
303 } else { |
|
304 listNode->add(unit, makeNode(start, limit, unitIndex+1, errorCode)); |
|
305 } |
|
306 Node *node=registerNode(listNode, errorCode); |
|
307 // Create the split-branch nodes. |
|
308 while(ltLength>0) { |
|
309 --ltLength; |
|
310 node=registerNode( |
|
311 new SplitBranchNode(middleUnits[ltLength], lessThan[ltLength], node), errorCode); |
|
312 } |
|
313 return node; |
|
314 } |
|
315 |
|
316 StringTrieBuilder::Node * |
|
317 StringTrieBuilder::registerNode(Node *newNode, UErrorCode &errorCode) { |
|
318 if(U_FAILURE(errorCode)) { |
|
319 delete newNode; |
|
320 return NULL; |
|
321 } |
|
322 if(newNode==NULL) { |
|
323 errorCode=U_MEMORY_ALLOCATION_ERROR; |
|
324 return NULL; |
|
325 } |
|
326 const UHashElement *old=uhash_find(nodes, newNode); |
|
327 if(old!=NULL) { |
|
328 delete newNode; |
|
329 return (Node *)old->key.pointer; |
|
330 } |
|
331 // If uhash_puti() returns a non-zero value from an equivalent, previously |
|
332 // registered node, then uhash_find() failed to find that and we will leak newNode. |
|
333 #if U_DEBUG |
|
334 int32_t oldValue= // Only in debug mode to avoid a compiler warning about unused oldValue. |
|
335 #endif |
|
336 uhash_puti(nodes, newNode, 1, &errorCode); |
|
337 U_ASSERT(oldValue==0); |
|
338 if(U_FAILURE(errorCode)) { |
|
339 delete newNode; |
|
340 return NULL; |
|
341 } |
|
342 return newNode; |
|
343 } |
|
344 |
|
345 StringTrieBuilder::Node * |
|
346 StringTrieBuilder::registerFinalValue(int32_t value, UErrorCode &errorCode) { |
|
347 if(U_FAILURE(errorCode)) { |
|
348 return NULL; |
|
349 } |
|
350 FinalValueNode key(value); |
|
351 const UHashElement *old=uhash_find(nodes, &key); |
|
352 if(old!=NULL) { |
|
353 return (Node *)old->key.pointer; |
|
354 } |
|
355 Node *newNode=new FinalValueNode(value); |
|
356 if(newNode==NULL) { |
|
357 errorCode=U_MEMORY_ALLOCATION_ERROR; |
|
358 return NULL; |
|
359 } |
|
360 // If uhash_puti() returns a non-zero value from an equivalent, previously |
|
361 // registered node, then uhash_find() failed to find that and we will leak newNode. |
|
362 #if U_DEBUG |
|
363 int32_t oldValue= // Only in debug mode to avoid a compiler warning about unused oldValue. |
|
364 #endif |
|
365 uhash_puti(nodes, newNode, 1, &errorCode); |
|
366 U_ASSERT(oldValue==0); |
|
367 if(U_FAILURE(errorCode)) { |
|
368 delete newNode; |
|
369 return NULL; |
|
370 } |
|
371 return newNode; |
|
372 } |
|
373 |
|
374 UBool |
|
375 StringTrieBuilder::hashNode(const void *node) { |
|
376 return ((const Node *)node)->hashCode(); |
|
377 } |
|
378 |
|
379 UBool |
|
380 StringTrieBuilder::equalNodes(const void *left, const void *right) { |
|
381 return *(const Node *)left==*(const Node *)right; |
|
382 } |
|
383 |
|
384 UBool |
|
385 StringTrieBuilder::Node::operator==(const Node &other) const { |
|
386 return this==&other || (typeid(*this)==typeid(other) && hash==other.hash); |
|
387 } |
|
388 |
|
389 int32_t |
|
390 StringTrieBuilder::Node::markRightEdgesFirst(int32_t edgeNumber) { |
|
391 if(offset==0) { |
|
392 offset=edgeNumber; |
|
393 } |
|
394 return edgeNumber; |
|
395 } |
|
396 |
|
397 UBool |
|
398 StringTrieBuilder::FinalValueNode::operator==(const Node &other) const { |
|
399 if(this==&other) { |
|
400 return TRUE; |
|
401 } |
|
402 if(!Node::operator==(other)) { |
|
403 return FALSE; |
|
404 } |
|
405 const FinalValueNode &o=(const FinalValueNode &)other; |
|
406 return value==o.value; |
|
407 } |
|
408 |
|
409 void |
|
410 StringTrieBuilder::FinalValueNode::write(StringTrieBuilder &builder) { |
|
411 offset=builder.writeValueAndFinal(value, TRUE); |
|
412 } |
|
413 |
|
414 UBool |
|
415 StringTrieBuilder::ValueNode::operator==(const Node &other) const { |
|
416 if(this==&other) { |
|
417 return TRUE; |
|
418 } |
|
419 if(!Node::operator==(other)) { |
|
420 return FALSE; |
|
421 } |
|
422 const ValueNode &o=(const ValueNode &)other; |
|
423 return hasValue==o.hasValue && (!hasValue || value==o.value); |
|
424 } |
|
425 |
|
426 UBool |
|
427 StringTrieBuilder::IntermediateValueNode::operator==(const Node &other) const { |
|
428 if(this==&other) { |
|
429 return TRUE; |
|
430 } |
|
431 if(!ValueNode::operator==(other)) { |
|
432 return FALSE; |
|
433 } |
|
434 const IntermediateValueNode &o=(const IntermediateValueNode &)other; |
|
435 return next==o.next; |
|
436 } |
|
437 |
|
438 int32_t |
|
439 StringTrieBuilder::IntermediateValueNode::markRightEdgesFirst(int32_t edgeNumber) { |
|
440 if(offset==0) { |
|
441 offset=edgeNumber=next->markRightEdgesFirst(edgeNumber); |
|
442 } |
|
443 return edgeNumber; |
|
444 } |
|
445 |
|
446 void |
|
447 StringTrieBuilder::IntermediateValueNode::write(StringTrieBuilder &builder) { |
|
448 next->write(builder); |
|
449 offset=builder.writeValueAndFinal(value, FALSE); |
|
450 } |
|
451 |
|
452 UBool |
|
453 StringTrieBuilder::LinearMatchNode::operator==(const Node &other) const { |
|
454 if(this==&other) { |
|
455 return TRUE; |
|
456 } |
|
457 if(!ValueNode::operator==(other)) { |
|
458 return FALSE; |
|
459 } |
|
460 const LinearMatchNode &o=(const LinearMatchNode &)other; |
|
461 return length==o.length && next==o.next; |
|
462 } |
|
463 |
|
464 int32_t |
|
465 StringTrieBuilder::LinearMatchNode::markRightEdgesFirst(int32_t edgeNumber) { |
|
466 if(offset==0) { |
|
467 offset=edgeNumber=next->markRightEdgesFirst(edgeNumber); |
|
468 } |
|
469 return edgeNumber; |
|
470 } |
|
471 |
|
472 UBool |
|
473 StringTrieBuilder::ListBranchNode::operator==(const Node &other) const { |
|
474 if(this==&other) { |
|
475 return TRUE; |
|
476 } |
|
477 if(!Node::operator==(other)) { |
|
478 return FALSE; |
|
479 } |
|
480 const ListBranchNode &o=(const ListBranchNode &)other; |
|
481 for(int32_t i=0; i<length; ++i) { |
|
482 if(units[i]!=o.units[i] || values[i]!=o.values[i] || equal[i]!=o.equal[i]) { |
|
483 return FALSE; |
|
484 } |
|
485 } |
|
486 return TRUE; |
|
487 } |
|
488 |
|
489 int32_t |
|
490 StringTrieBuilder::ListBranchNode::markRightEdgesFirst(int32_t edgeNumber) { |
|
491 if(offset==0) { |
|
492 firstEdgeNumber=edgeNumber; |
|
493 int32_t step=0; |
|
494 int32_t i=length; |
|
495 do { |
|
496 Node *edge=equal[--i]; |
|
497 if(edge!=NULL) { |
|
498 edgeNumber=edge->markRightEdgesFirst(edgeNumber-step); |
|
499 } |
|
500 // For all but the rightmost edge, decrement the edge number. |
|
501 step=1; |
|
502 } while(i>0); |
|
503 offset=edgeNumber; |
|
504 } |
|
505 return edgeNumber; |
|
506 } |
|
507 |
|
508 void |
|
509 StringTrieBuilder::ListBranchNode::write(StringTrieBuilder &builder) { |
|
510 // Write the sub-nodes in reverse order: The jump lengths are deltas from |
|
511 // after their own positions, so if we wrote the minUnit sub-node first, |
|
512 // then its jump delta would be larger. |
|
513 // Instead we write the minUnit sub-node last, for a shorter delta. |
|
514 int32_t unitNumber=length-1; |
|
515 Node *rightEdge=equal[unitNumber]; |
|
516 int32_t rightEdgeNumber= rightEdge==NULL ? firstEdgeNumber : rightEdge->getOffset(); |
|
517 do { |
|
518 --unitNumber; |
|
519 if(equal[unitNumber]!=NULL) { |
|
520 equal[unitNumber]->writeUnlessInsideRightEdge(firstEdgeNumber, rightEdgeNumber, builder); |
|
521 } |
|
522 } while(unitNumber>0); |
|
523 // The maxUnit sub-node is written as the very last one because we do |
|
524 // not jump for it at all. |
|
525 unitNumber=length-1; |
|
526 if(rightEdge==NULL) { |
|
527 builder.writeValueAndFinal(values[unitNumber], TRUE); |
|
528 } else { |
|
529 rightEdge->write(builder); |
|
530 } |
|
531 offset=builder.write(units[unitNumber]); |
|
532 // Write the rest of this node's unit-value pairs. |
|
533 while(--unitNumber>=0) { |
|
534 int32_t value; |
|
535 UBool isFinal; |
|
536 if(equal[unitNumber]==NULL) { |
|
537 // Write the final value for the one string ending with this unit. |
|
538 value=values[unitNumber]; |
|
539 isFinal=TRUE; |
|
540 } else { |
|
541 // Write the delta to the start position of the sub-node. |
|
542 U_ASSERT(equal[unitNumber]->getOffset()>0); |
|
543 value=offset-equal[unitNumber]->getOffset(); |
|
544 isFinal=FALSE; |
|
545 } |
|
546 builder.writeValueAndFinal(value, isFinal); |
|
547 offset=builder.write(units[unitNumber]); |
|
548 } |
|
549 } |
|
550 |
|
551 UBool |
|
552 StringTrieBuilder::SplitBranchNode::operator==(const Node &other) const { |
|
553 if(this==&other) { |
|
554 return TRUE; |
|
555 } |
|
556 if(!Node::operator==(other)) { |
|
557 return FALSE; |
|
558 } |
|
559 const SplitBranchNode &o=(const SplitBranchNode &)other; |
|
560 return unit==o.unit && lessThan==o.lessThan && greaterOrEqual==o.greaterOrEqual; |
|
561 } |
|
562 |
|
563 int32_t |
|
564 StringTrieBuilder::SplitBranchNode::markRightEdgesFirst(int32_t edgeNumber) { |
|
565 if(offset==0) { |
|
566 firstEdgeNumber=edgeNumber; |
|
567 edgeNumber=greaterOrEqual->markRightEdgesFirst(edgeNumber); |
|
568 offset=edgeNumber=lessThan->markRightEdgesFirst(edgeNumber-1); |
|
569 } |
|
570 return edgeNumber; |
|
571 } |
|
572 |
|
573 void |
|
574 StringTrieBuilder::SplitBranchNode::write(StringTrieBuilder &builder) { |
|
575 // Encode the less-than branch first. |
|
576 lessThan->writeUnlessInsideRightEdge(firstEdgeNumber, greaterOrEqual->getOffset(), builder); |
|
577 // Encode the greater-or-equal branch last because we do not jump for it at all. |
|
578 greaterOrEqual->write(builder); |
|
579 // Write this node. |
|
580 U_ASSERT(lessThan->getOffset()>0); |
|
581 builder.writeDeltaTo(lessThan->getOffset()); // less-than |
|
582 offset=builder.write(unit); |
|
583 } |
|
584 |
|
585 UBool |
|
586 StringTrieBuilder::BranchHeadNode::operator==(const Node &other) const { |
|
587 if(this==&other) { |
|
588 return TRUE; |
|
589 } |
|
590 if(!ValueNode::operator==(other)) { |
|
591 return FALSE; |
|
592 } |
|
593 const BranchHeadNode &o=(const BranchHeadNode &)other; |
|
594 return length==o.length && next==o.next; |
|
595 } |
|
596 |
|
597 int32_t |
|
598 StringTrieBuilder::BranchHeadNode::markRightEdgesFirst(int32_t edgeNumber) { |
|
599 if(offset==0) { |
|
600 offset=edgeNumber=next->markRightEdgesFirst(edgeNumber); |
|
601 } |
|
602 return edgeNumber; |
|
603 } |
|
604 |
|
605 void |
|
606 StringTrieBuilder::BranchHeadNode::write(StringTrieBuilder &builder) { |
|
607 next->write(builder); |
|
608 if(length<=builder.getMinLinearMatch()) { |
|
609 offset=builder.writeValueAndType(hasValue, value, length-1); |
|
610 } else { |
|
611 builder.write(length-1); |
|
612 offset=builder.writeValueAndType(hasValue, value, 0); |
|
613 } |
|
614 } |
|
615 |
|
616 U_NAMESPACE_END |