intl/icu/source/common/stringtriebuilder.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /*
michael@0 2 *******************************************************************************
michael@0 3 * Copyright (C) 2010-2012, International Business Machines
michael@0 4 * Corporation and others. All Rights Reserved.
michael@0 5 *******************************************************************************
michael@0 6 * file name: stringtriebuilder.cpp
michael@0 7 * encoding: US-ASCII
michael@0 8 * tab size: 8 (not used)
michael@0 9 * indentation:4
michael@0 10 *
michael@0 11 * created on: 2010dec24
michael@0 12 * created by: Markus W. Scherer
michael@0 13 */
michael@0 14
michael@0 15 #include "utypeinfo.h" // for 'typeid' to work
michael@0 16 #include "unicode/utypes.h"
michael@0 17 #include "unicode/stringtriebuilder.h"
michael@0 18 #include "uassert.h"
michael@0 19 #include "uhash.h"
michael@0 20
michael@0 21 U_CDECL_BEGIN
michael@0 22
michael@0 23 static int32_t U_CALLCONV
michael@0 24 hashStringTrieNode(const UHashTok key) {
michael@0 25 return icu::StringTrieBuilder::hashNode(key.pointer);
michael@0 26 }
michael@0 27
michael@0 28 static UBool U_CALLCONV
michael@0 29 equalStringTrieNodes(const UHashTok key1, const UHashTok key2) {
michael@0 30 return icu::StringTrieBuilder::equalNodes(key1.pointer, key2.pointer);
michael@0 31 }
michael@0 32
michael@0 33 U_CDECL_END
michael@0 34
michael@0 35 U_NAMESPACE_BEGIN
michael@0 36
michael@0 37 StringTrieBuilder::StringTrieBuilder() : nodes(NULL) {}
michael@0 38
michael@0 39 StringTrieBuilder::~StringTrieBuilder() {
michael@0 40 deleteCompactBuilder();
michael@0 41 }
michael@0 42
michael@0 43 void
michael@0 44 StringTrieBuilder::createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode) {
michael@0 45 if(U_FAILURE(errorCode)) {
michael@0 46 return;
michael@0 47 }
michael@0 48 nodes=uhash_openSize(hashStringTrieNode, equalStringTrieNodes, NULL,
michael@0 49 sizeGuess, &errorCode);
michael@0 50 if(U_SUCCESS(errorCode)) {
michael@0 51 if(nodes==NULL) {
michael@0 52 errorCode=U_MEMORY_ALLOCATION_ERROR;
michael@0 53 } else {
michael@0 54 uhash_setKeyDeleter(nodes, uprv_deleteUObject);
michael@0 55 }
michael@0 56 }
michael@0 57 }
michael@0 58
michael@0 59 void
michael@0 60 StringTrieBuilder::deleteCompactBuilder() {
michael@0 61 uhash_close(nodes);
michael@0 62 nodes=NULL;
michael@0 63 }
michael@0 64
michael@0 65 void
michael@0 66 StringTrieBuilder::build(UStringTrieBuildOption buildOption, int32_t elementsLength,
michael@0 67 UErrorCode &errorCode) {
michael@0 68 if(buildOption==USTRINGTRIE_BUILD_FAST) {
michael@0 69 writeNode(0, elementsLength, 0);
michael@0 70 } else /* USTRINGTRIE_BUILD_SMALL */ {
michael@0 71 createCompactBuilder(2*elementsLength, errorCode);
michael@0 72 Node *root=makeNode(0, elementsLength, 0, errorCode);
michael@0 73 if(U_SUCCESS(errorCode)) {
michael@0 74 root->markRightEdgesFirst(-1);
michael@0 75 root->write(*this);
michael@0 76 }
michael@0 77 deleteCompactBuilder();
michael@0 78 }
michael@0 79 }
michael@0 80
michael@0 81 // Requires start<limit,
michael@0 82 // and all strings of the [start..limit[ elements must be sorted and
michael@0 83 // have a common prefix of length unitIndex.
michael@0 84 int32_t
michael@0 85 StringTrieBuilder::writeNode(int32_t start, int32_t limit, int32_t unitIndex) {
michael@0 86 UBool hasValue=FALSE;
michael@0 87 int32_t value=0;
michael@0 88 int32_t type;
michael@0 89 if(unitIndex==getElementStringLength(start)) {
michael@0 90 // An intermediate or final value.
michael@0 91 value=getElementValue(start++);
michael@0 92 if(start==limit) {
michael@0 93 return writeValueAndFinal(value, TRUE); // final-value node
michael@0 94 }
michael@0 95 hasValue=TRUE;
michael@0 96 }
michael@0 97 // Now all [start..limit[ strings are longer than unitIndex.
michael@0 98 int32_t minUnit=getElementUnit(start, unitIndex);
michael@0 99 int32_t maxUnit=getElementUnit(limit-1, unitIndex);
michael@0 100 if(minUnit==maxUnit) {
michael@0 101 // Linear-match node: All strings have the same character at unitIndex.
michael@0 102 int32_t lastUnitIndex=getLimitOfLinearMatch(start, limit-1, unitIndex);
michael@0 103 writeNode(start, limit, lastUnitIndex);
michael@0 104 // Break the linear-match sequence into chunks of at most kMaxLinearMatchLength.
michael@0 105 int32_t length=lastUnitIndex-unitIndex;
michael@0 106 int32_t maxLinearMatchLength=getMaxLinearMatchLength();
michael@0 107 while(length>maxLinearMatchLength) {
michael@0 108 lastUnitIndex-=maxLinearMatchLength;
michael@0 109 length-=maxLinearMatchLength;
michael@0 110 writeElementUnits(start, lastUnitIndex, maxLinearMatchLength);
michael@0 111 write(getMinLinearMatch()+maxLinearMatchLength-1);
michael@0 112 }
michael@0 113 writeElementUnits(start, unitIndex, length);
michael@0 114 type=getMinLinearMatch()+length-1;
michael@0 115 } else {
michael@0 116 // Branch node.
michael@0 117 int32_t length=countElementUnits(start, limit, unitIndex);
michael@0 118 // length>=2 because minUnit!=maxUnit.
michael@0 119 writeBranchSubNode(start, limit, unitIndex, length);
michael@0 120 if(--length<getMinLinearMatch()) {
michael@0 121 type=length;
michael@0 122 } else {
michael@0 123 write(length);
michael@0 124 type=0;
michael@0 125 }
michael@0 126 }
michael@0 127 return writeValueAndType(hasValue, value, type);
michael@0 128 }
michael@0 129
michael@0 130 // start<limit && all strings longer than unitIndex &&
michael@0 131 // length different units at unitIndex
michael@0 132 int32_t
michael@0 133 StringTrieBuilder::writeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, int32_t length) {
michael@0 134 UChar middleUnits[kMaxSplitBranchLevels];
michael@0 135 int32_t lessThan[kMaxSplitBranchLevels];
michael@0 136 int32_t ltLength=0;
michael@0 137 while(length>getMaxBranchLinearSubNodeLength()) {
michael@0 138 // Branch on the middle unit.
michael@0 139 // First, find the middle unit.
michael@0 140 int32_t i=skipElementsBySomeUnits(start, unitIndex, length/2);
michael@0 141 // Encode the less-than branch first.
michael@0 142 middleUnits[ltLength]=getElementUnit(i, unitIndex); // middle unit
michael@0 143 lessThan[ltLength]=writeBranchSubNode(start, i, unitIndex, length/2);
michael@0 144 ++ltLength;
michael@0 145 // Continue for the greater-or-equal branch.
michael@0 146 start=i;
michael@0 147 length=length-length/2;
michael@0 148 }
michael@0 149 // For each unit, find its elements array start and whether it has a final value.
michael@0 150 int32_t starts[kMaxBranchLinearSubNodeLength];
michael@0 151 UBool isFinal[kMaxBranchLinearSubNodeLength-1];
michael@0 152 int32_t unitNumber=0;
michael@0 153 do {
michael@0 154 int32_t i=starts[unitNumber]=start;
michael@0 155 UChar unit=getElementUnit(i++, unitIndex);
michael@0 156 i=indexOfElementWithNextUnit(i, unitIndex, unit);
michael@0 157 isFinal[unitNumber]= start==i-1 && unitIndex+1==getElementStringLength(start);
michael@0 158 start=i;
michael@0 159 } while(++unitNumber<length-1);
michael@0 160 // unitNumber==length-1, and the maxUnit elements range is [start..limit[
michael@0 161 starts[unitNumber]=start;
michael@0 162
michael@0 163 // Write the sub-nodes in reverse order: The jump lengths are deltas from
michael@0 164 // after their own positions, so if we wrote the minUnit sub-node first,
michael@0 165 // then its jump delta would be larger.
michael@0 166 // Instead we write the minUnit sub-node last, for a shorter delta.
michael@0 167 int32_t jumpTargets[kMaxBranchLinearSubNodeLength-1];
michael@0 168 do {
michael@0 169 --unitNumber;
michael@0 170 if(!isFinal[unitNumber]) {
michael@0 171 jumpTargets[unitNumber]=writeNode(starts[unitNumber], starts[unitNumber+1], unitIndex+1);
michael@0 172 }
michael@0 173 } while(unitNumber>0);
michael@0 174 // The maxUnit sub-node is written as the very last one because we do
michael@0 175 // not jump for it at all.
michael@0 176 unitNumber=length-1;
michael@0 177 writeNode(start, limit, unitIndex+1);
michael@0 178 int32_t offset=write(getElementUnit(start, unitIndex));
michael@0 179 // Write the rest of this node's unit-value pairs.
michael@0 180 while(--unitNumber>=0) {
michael@0 181 start=starts[unitNumber];
michael@0 182 int32_t value;
michael@0 183 if(isFinal[unitNumber]) {
michael@0 184 // Write the final value for the one string ending with this unit.
michael@0 185 value=getElementValue(start);
michael@0 186 } else {
michael@0 187 // Write the delta to the start position of the sub-node.
michael@0 188 value=offset-jumpTargets[unitNumber];
michael@0 189 }
michael@0 190 writeValueAndFinal(value, isFinal[unitNumber]);
michael@0 191 offset=write(getElementUnit(start, unitIndex));
michael@0 192 }
michael@0 193 // Write the split-branch nodes.
michael@0 194 while(ltLength>0) {
michael@0 195 --ltLength;
michael@0 196 writeDeltaTo(lessThan[ltLength]);
michael@0 197 offset=write(middleUnits[ltLength]);
michael@0 198 }
michael@0 199 return offset;
michael@0 200 }
michael@0 201
michael@0 202 // Requires start<limit,
michael@0 203 // and all strings of the [start..limit[ elements must be sorted and
michael@0 204 // have a common prefix of length unitIndex.
michael@0 205 StringTrieBuilder::Node *
michael@0 206 StringTrieBuilder::makeNode(int32_t start, int32_t limit, int32_t unitIndex, UErrorCode &errorCode) {
michael@0 207 if(U_FAILURE(errorCode)) {
michael@0 208 return NULL;
michael@0 209 }
michael@0 210 UBool hasValue=FALSE;
michael@0 211 int32_t value=0;
michael@0 212 if(unitIndex==getElementStringLength(start)) {
michael@0 213 // An intermediate or final value.
michael@0 214 value=getElementValue(start++);
michael@0 215 if(start==limit) {
michael@0 216 return registerFinalValue(value, errorCode);
michael@0 217 }
michael@0 218 hasValue=TRUE;
michael@0 219 }
michael@0 220 Node *node;
michael@0 221 // Now all [start..limit[ strings are longer than unitIndex.
michael@0 222 int32_t minUnit=getElementUnit(start, unitIndex);
michael@0 223 int32_t maxUnit=getElementUnit(limit-1, unitIndex);
michael@0 224 if(minUnit==maxUnit) {
michael@0 225 // Linear-match node: All strings have the same character at unitIndex.
michael@0 226 int32_t lastUnitIndex=getLimitOfLinearMatch(start, limit-1, unitIndex);
michael@0 227 Node *nextNode=makeNode(start, limit, lastUnitIndex, errorCode);
michael@0 228 // Break the linear-match sequence into chunks of at most kMaxLinearMatchLength.
michael@0 229 int32_t length=lastUnitIndex-unitIndex;
michael@0 230 int32_t maxLinearMatchLength=getMaxLinearMatchLength();
michael@0 231 while(length>maxLinearMatchLength) {
michael@0 232 lastUnitIndex-=maxLinearMatchLength;
michael@0 233 length-=maxLinearMatchLength;
michael@0 234 node=createLinearMatchNode(start, lastUnitIndex, maxLinearMatchLength, nextNode);
michael@0 235 nextNode=registerNode(node, errorCode);
michael@0 236 }
michael@0 237 node=createLinearMatchNode(start, unitIndex, length, nextNode);
michael@0 238 } else {
michael@0 239 // Branch node.
michael@0 240 int32_t length=countElementUnits(start, limit, unitIndex);
michael@0 241 // length>=2 because minUnit!=maxUnit.
michael@0 242 Node *subNode=makeBranchSubNode(start, limit, unitIndex, length, errorCode);
michael@0 243 node=new BranchHeadNode(length, subNode);
michael@0 244 }
michael@0 245 if(hasValue && node!=NULL) {
michael@0 246 if(matchNodesCanHaveValues()) {
michael@0 247 ((ValueNode *)node)->setValue(value);
michael@0 248 } else {
michael@0 249 node=new IntermediateValueNode(value, registerNode(node, errorCode));
michael@0 250 }
michael@0 251 }
michael@0 252 return registerNode(node, errorCode);
michael@0 253 }
michael@0 254
michael@0 255 // start<limit && all strings longer than unitIndex &&
michael@0 256 // length different units at unitIndex
michael@0 257 StringTrieBuilder::Node *
michael@0 258 StringTrieBuilder::makeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex,
michael@0 259 int32_t length, UErrorCode &errorCode) {
michael@0 260 if(U_FAILURE(errorCode)) {
michael@0 261 return NULL;
michael@0 262 }
michael@0 263 UChar middleUnits[kMaxSplitBranchLevels];
michael@0 264 Node *lessThan[kMaxSplitBranchLevels];
michael@0 265 int32_t ltLength=0;
michael@0 266 while(length>getMaxBranchLinearSubNodeLength()) {
michael@0 267 // Branch on the middle unit.
michael@0 268 // First, find the middle unit.
michael@0 269 int32_t i=skipElementsBySomeUnits(start, unitIndex, length/2);
michael@0 270 // Create the less-than branch.
michael@0 271 middleUnits[ltLength]=getElementUnit(i, unitIndex); // middle unit
michael@0 272 lessThan[ltLength]=makeBranchSubNode(start, i, unitIndex, length/2, errorCode);
michael@0 273 ++ltLength;
michael@0 274 // Continue for the greater-or-equal branch.
michael@0 275 start=i;
michael@0 276 length=length-length/2;
michael@0 277 }
michael@0 278 if(U_FAILURE(errorCode)) {
michael@0 279 return NULL;
michael@0 280 }
michael@0 281 ListBranchNode *listNode=new ListBranchNode();
michael@0 282 if(listNode==NULL) {
michael@0 283 errorCode=U_MEMORY_ALLOCATION_ERROR;
michael@0 284 return NULL;
michael@0 285 }
michael@0 286 // For each unit, find its elements array start and whether it has a final value.
michael@0 287 int32_t unitNumber=0;
michael@0 288 do {
michael@0 289 int32_t i=start;
michael@0 290 UChar unit=getElementUnit(i++, unitIndex);
michael@0 291 i=indexOfElementWithNextUnit(i, unitIndex, unit);
michael@0 292 if(start==i-1 && unitIndex+1==getElementStringLength(start)) {
michael@0 293 listNode->add(unit, getElementValue(start));
michael@0 294 } else {
michael@0 295 listNode->add(unit, makeNode(start, i, unitIndex+1, errorCode));
michael@0 296 }
michael@0 297 start=i;
michael@0 298 } while(++unitNumber<length-1);
michael@0 299 // unitNumber==length-1, and the maxUnit elements range is [start..limit[
michael@0 300 UChar unit=getElementUnit(start, unitIndex);
michael@0 301 if(start==limit-1 && unitIndex+1==getElementStringLength(start)) {
michael@0 302 listNode->add(unit, getElementValue(start));
michael@0 303 } else {
michael@0 304 listNode->add(unit, makeNode(start, limit, unitIndex+1, errorCode));
michael@0 305 }
michael@0 306 Node *node=registerNode(listNode, errorCode);
michael@0 307 // Create the split-branch nodes.
michael@0 308 while(ltLength>0) {
michael@0 309 --ltLength;
michael@0 310 node=registerNode(
michael@0 311 new SplitBranchNode(middleUnits[ltLength], lessThan[ltLength], node), errorCode);
michael@0 312 }
michael@0 313 return node;
michael@0 314 }
michael@0 315
michael@0 316 StringTrieBuilder::Node *
michael@0 317 StringTrieBuilder::registerNode(Node *newNode, UErrorCode &errorCode) {
michael@0 318 if(U_FAILURE(errorCode)) {
michael@0 319 delete newNode;
michael@0 320 return NULL;
michael@0 321 }
michael@0 322 if(newNode==NULL) {
michael@0 323 errorCode=U_MEMORY_ALLOCATION_ERROR;
michael@0 324 return NULL;
michael@0 325 }
michael@0 326 const UHashElement *old=uhash_find(nodes, newNode);
michael@0 327 if(old!=NULL) {
michael@0 328 delete newNode;
michael@0 329 return (Node *)old->key.pointer;
michael@0 330 }
michael@0 331 // If uhash_puti() returns a non-zero value from an equivalent, previously
michael@0 332 // registered node, then uhash_find() failed to find that and we will leak newNode.
michael@0 333 #if U_DEBUG
michael@0 334 int32_t oldValue= // Only in debug mode to avoid a compiler warning about unused oldValue.
michael@0 335 #endif
michael@0 336 uhash_puti(nodes, newNode, 1, &errorCode);
michael@0 337 U_ASSERT(oldValue==0);
michael@0 338 if(U_FAILURE(errorCode)) {
michael@0 339 delete newNode;
michael@0 340 return NULL;
michael@0 341 }
michael@0 342 return newNode;
michael@0 343 }
michael@0 344
michael@0 345 StringTrieBuilder::Node *
michael@0 346 StringTrieBuilder::registerFinalValue(int32_t value, UErrorCode &errorCode) {
michael@0 347 if(U_FAILURE(errorCode)) {
michael@0 348 return NULL;
michael@0 349 }
michael@0 350 FinalValueNode key(value);
michael@0 351 const UHashElement *old=uhash_find(nodes, &key);
michael@0 352 if(old!=NULL) {
michael@0 353 return (Node *)old->key.pointer;
michael@0 354 }
michael@0 355 Node *newNode=new FinalValueNode(value);
michael@0 356 if(newNode==NULL) {
michael@0 357 errorCode=U_MEMORY_ALLOCATION_ERROR;
michael@0 358 return NULL;
michael@0 359 }
michael@0 360 // If uhash_puti() returns a non-zero value from an equivalent, previously
michael@0 361 // registered node, then uhash_find() failed to find that and we will leak newNode.
michael@0 362 #if U_DEBUG
michael@0 363 int32_t oldValue= // Only in debug mode to avoid a compiler warning about unused oldValue.
michael@0 364 #endif
michael@0 365 uhash_puti(nodes, newNode, 1, &errorCode);
michael@0 366 U_ASSERT(oldValue==0);
michael@0 367 if(U_FAILURE(errorCode)) {
michael@0 368 delete newNode;
michael@0 369 return NULL;
michael@0 370 }
michael@0 371 return newNode;
michael@0 372 }
michael@0 373
michael@0 374 UBool
michael@0 375 StringTrieBuilder::hashNode(const void *node) {
michael@0 376 return ((const Node *)node)->hashCode();
michael@0 377 }
michael@0 378
michael@0 379 UBool
michael@0 380 StringTrieBuilder::equalNodes(const void *left, const void *right) {
michael@0 381 return *(const Node *)left==*(const Node *)right;
michael@0 382 }
michael@0 383
michael@0 384 UBool
michael@0 385 StringTrieBuilder::Node::operator==(const Node &other) const {
michael@0 386 return this==&other || (typeid(*this)==typeid(other) && hash==other.hash);
michael@0 387 }
michael@0 388
michael@0 389 int32_t
michael@0 390 StringTrieBuilder::Node::markRightEdgesFirst(int32_t edgeNumber) {
michael@0 391 if(offset==0) {
michael@0 392 offset=edgeNumber;
michael@0 393 }
michael@0 394 return edgeNumber;
michael@0 395 }
michael@0 396
michael@0 397 UBool
michael@0 398 StringTrieBuilder::FinalValueNode::operator==(const Node &other) const {
michael@0 399 if(this==&other) {
michael@0 400 return TRUE;
michael@0 401 }
michael@0 402 if(!Node::operator==(other)) {
michael@0 403 return FALSE;
michael@0 404 }
michael@0 405 const FinalValueNode &o=(const FinalValueNode &)other;
michael@0 406 return value==o.value;
michael@0 407 }
michael@0 408
michael@0 409 void
michael@0 410 StringTrieBuilder::FinalValueNode::write(StringTrieBuilder &builder) {
michael@0 411 offset=builder.writeValueAndFinal(value, TRUE);
michael@0 412 }
michael@0 413
michael@0 414 UBool
michael@0 415 StringTrieBuilder::ValueNode::operator==(const Node &other) const {
michael@0 416 if(this==&other) {
michael@0 417 return TRUE;
michael@0 418 }
michael@0 419 if(!Node::operator==(other)) {
michael@0 420 return FALSE;
michael@0 421 }
michael@0 422 const ValueNode &o=(const ValueNode &)other;
michael@0 423 return hasValue==o.hasValue && (!hasValue || value==o.value);
michael@0 424 }
michael@0 425
michael@0 426 UBool
michael@0 427 StringTrieBuilder::IntermediateValueNode::operator==(const Node &other) const {
michael@0 428 if(this==&other) {
michael@0 429 return TRUE;
michael@0 430 }
michael@0 431 if(!ValueNode::operator==(other)) {
michael@0 432 return FALSE;
michael@0 433 }
michael@0 434 const IntermediateValueNode &o=(const IntermediateValueNode &)other;
michael@0 435 return next==o.next;
michael@0 436 }
michael@0 437
michael@0 438 int32_t
michael@0 439 StringTrieBuilder::IntermediateValueNode::markRightEdgesFirst(int32_t edgeNumber) {
michael@0 440 if(offset==0) {
michael@0 441 offset=edgeNumber=next->markRightEdgesFirst(edgeNumber);
michael@0 442 }
michael@0 443 return edgeNumber;
michael@0 444 }
michael@0 445
michael@0 446 void
michael@0 447 StringTrieBuilder::IntermediateValueNode::write(StringTrieBuilder &builder) {
michael@0 448 next->write(builder);
michael@0 449 offset=builder.writeValueAndFinal(value, FALSE);
michael@0 450 }
michael@0 451
michael@0 452 UBool
michael@0 453 StringTrieBuilder::LinearMatchNode::operator==(const Node &other) const {
michael@0 454 if(this==&other) {
michael@0 455 return TRUE;
michael@0 456 }
michael@0 457 if(!ValueNode::operator==(other)) {
michael@0 458 return FALSE;
michael@0 459 }
michael@0 460 const LinearMatchNode &o=(const LinearMatchNode &)other;
michael@0 461 return length==o.length && next==o.next;
michael@0 462 }
michael@0 463
michael@0 464 int32_t
michael@0 465 StringTrieBuilder::LinearMatchNode::markRightEdgesFirst(int32_t edgeNumber) {
michael@0 466 if(offset==0) {
michael@0 467 offset=edgeNumber=next->markRightEdgesFirst(edgeNumber);
michael@0 468 }
michael@0 469 return edgeNumber;
michael@0 470 }
michael@0 471
michael@0 472 UBool
michael@0 473 StringTrieBuilder::ListBranchNode::operator==(const Node &other) const {
michael@0 474 if(this==&other) {
michael@0 475 return TRUE;
michael@0 476 }
michael@0 477 if(!Node::operator==(other)) {
michael@0 478 return FALSE;
michael@0 479 }
michael@0 480 const ListBranchNode &o=(const ListBranchNode &)other;
michael@0 481 for(int32_t i=0; i<length; ++i) {
michael@0 482 if(units[i]!=o.units[i] || values[i]!=o.values[i] || equal[i]!=o.equal[i]) {
michael@0 483 return FALSE;
michael@0 484 }
michael@0 485 }
michael@0 486 return TRUE;
michael@0 487 }
michael@0 488
michael@0 489 int32_t
michael@0 490 StringTrieBuilder::ListBranchNode::markRightEdgesFirst(int32_t edgeNumber) {
michael@0 491 if(offset==0) {
michael@0 492 firstEdgeNumber=edgeNumber;
michael@0 493 int32_t step=0;
michael@0 494 int32_t i=length;
michael@0 495 do {
michael@0 496 Node *edge=equal[--i];
michael@0 497 if(edge!=NULL) {
michael@0 498 edgeNumber=edge->markRightEdgesFirst(edgeNumber-step);
michael@0 499 }
michael@0 500 // For all but the rightmost edge, decrement the edge number.
michael@0 501 step=1;
michael@0 502 } while(i>0);
michael@0 503 offset=edgeNumber;
michael@0 504 }
michael@0 505 return edgeNumber;
michael@0 506 }
michael@0 507
michael@0 508 void
michael@0 509 StringTrieBuilder::ListBranchNode::write(StringTrieBuilder &builder) {
michael@0 510 // Write the sub-nodes in reverse order: The jump lengths are deltas from
michael@0 511 // after their own positions, so if we wrote the minUnit sub-node first,
michael@0 512 // then its jump delta would be larger.
michael@0 513 // Instead we write the minUnit sub-node last, for a shorter delta.
michael@0 514 int32_t unitNumber=length-1;
michael@0 515 Node *rightEdge=equal[unitNumber];
michael@0 516 int32_t rightEdgeNumber= rightEdge==NULL ? firstEdgeNumber : rightEdge->getOffset();
michael@0 517 do {
michael@0 518 --unitNumber;
michael@0 519 if(equal[unitNumber]!=NULL) {
michael@0 520 equal[unitNumber]->writeUnlessInsideRightEdge(firstEdgeNumber, rightEdgeNumber, builder);
michael@0 521 }
michael@0 522 } while(unitNumber>0);
michael@0 523 // The maxUnit sub-node is written as the very last one because we do
michael@0 524 // not jump for it at all.
michael@0 525 unitNumber=length-1;
michael@0 526 if(rightEdge==NULL) {
michael@0 527 builder.writeValueAndFinal(values[unitNumber], TRUE);
michael@0 528 } else {
michael@0 529 rightEdge->write(builder);
michael@0 530 }
michael@0 531 offset=builder.write(units[unitNumber]);
michael@0 532 // Write the rest of this node's unit-value pairs.
michael@0 533 while(--unitNumber>=0) {
michael@0 534 int32_t value;
michael@0 535 UBool isFinal;
michael@0 536 if(equal[unitNumber]==NULL) {
michael@0 537 // Write the final value for the one string ending with this unit.
michael@0 538 value=values[unitNumber];
michael@0 539 isFinal=TRUE;
michael@0 540 } else {
michael@0 541 // Write the delta to the start position of the sub-node.
michael@0 542 U_ASSERT(equal[unitNumber]->getOffset()>0);
michael@0 543 value=offset-equal[unitNumber]->getOffset();
michael@0 544 isFinal=FALSE;
michael@0 545 }
michael@0 546 builder.writeValueAndFinal(value, isFinal);
michael@0 547 offset=builder.write(units[unitNumber]);
michael@0 548 }
michael@0 549 }
michael@0 550
michael@0 551 UBool
michael@0 552 StringTrieBuilder::SplitBranchNode::operator==(const Node &other) const {
michael@0 553 if(this==&other) {
michael@0 554 return TRUE;
michael@0 555 }
michael@0 556 if(!Node::operator==(other)) {
michael@0 557 return FALSE;
michael@0 558 }
michael@0 559 const SplitBranchNode &o=(const SplitBranchNode &)other;
michael@0 560 return unit==o.unit && lessThan==o.lessThan && greaterOrEqual==o.greaterOrEqual;
michael@0 561 }
michael@0 562
michael@0 563 int32_t
michael@0 564 StringTrieBuilder::SplitBranchNode::markRightEdgesFirst(int32_t edgeNumber) {
michael@0 565 if(offset==0) {
michael@0 566 firstEdgeNumber=edgeNumber;
michael@0 567 edgeNumber=greaterOrEqual->markRightEdgesFirst(edgeNumber);
michael@0 568 offset=edgeNumber=lessThan->markRightEdgesFirst(edgeNumber-1);
michael@0 569 }
michael@0 570 return edgeNumber;
michael@0 571 }
michael@0 572
michael@0 573 void
michael@0 574 StringTrieBuilder::SplitBranchNode::write(StringTrieBuilder &builder) {
michael@0 575 // Encode the less-than branch first.
michael@0 576 lessThan->writeUnlessInsideRightEdge(firstEdgeNumber, greaterOrEqual->getOffset(), builder);
michael@0 577 // Encode the greater-or-equal branch last because we do not jump for it at all.
michael@0 578 greaterOrEqual->write(builder);
michael@0 579 // Write this node.
michael@0 580 U_ASSERT(lessThan->getOffset()>0);
michael@0 581 builder.writeDeltaTo(lessThan->getOffset()); // less-than
michael@0 582 offset=builder.write(unit);
michael@0 583 }
michael@0 584
michael@0 585 UBool
michael@0 586 StringTrieBuilder::BranchHeadNode::operator==(const Node &other) const {
michael@0 587 if(this==&other) {
michael@0 588 return TRUE;
michael@0 589 }
michael@0 590 if(!ValueNode::operator==(other)) {
michael@0 591 return FALSE;
michael@0 592 }
michael@0 593 const BranchHeadNode &o=(const BranchHeadNode &)other;
michael@0 594 return length==o.length && next==o.next;
michael@0 595 }
michael@0 596
michael@0 597 int32_t
michael@0 598 StringTrieBuilder::BranchHeadNode::markRightEdgesFirst(int32_t edgeNumber) {
michael@0 599 if(offset==0) {
michael@0 600 offset=edgeNumber=next->markRightEdgesFirst(edgeNumber);
michael@0 601 }
michael@0 602 return edgeNumber;
michael@0 603 }
michael@0 604
michael@0 605 void
michael@0 606 StringTrieBuilder::BranchHeadNode::write(StringTrieBuilder &builder) {
michael@0 607 next->write(builder);
michael@0 608 if(length<=builder.getMinLinearMatch()) {
michael@0 609 offset=builder.writeValueAndType(hasValue, value, length-1);
michael@0 610 } else {
michael@0 611 builder.write(length-1);
michael@0 612 offset=builder.writeValueAndType(hasValue, value, 0);
michael@0 613 }
michael@0 614 }
michael@0 615
michael@0 616 U_NAMESPACE_END

mercurial