Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
michael@0 | 1 | /* |
michael@0 | 2 | ******************************************************************************* |
michael@0 | 3 | * Copyright (C) 2010-2012, International Business Machines |
michael@0 | 4 | * Corporation and others. All Rights Reserved. |
michael@0 | 5 | ******************************************************************************* |
michael@0 | 6 | * file name: stringtriebuilder.cpp |
michael@0 | 7 | * encoding: US-ASCII |
michael@0 | 8 | * tab size: 8 (not used) |
michael@0 | 9 | * indentation:4 |
michael@0 | 10 | * |
michael@0 | 11 | * created on: 2010dec24 |
michael@0 | 12 | * created by: Markus W. Scherer |
michael@0 | 13 | */ |
michael@0 | 14 | |
michael@0 | 15 | #include "utypeinfo.h" // for 'typeid' to work |
michael@0 | 16 | #include "unicode/utypes.h" |
michael@0 | 17 | #include "unicode/stringtriebuilder.h" |
michael@0 | 18 | #include "uassert.h" |
michael@0 | 19 | #include "uhash.h" |
michael@0 | 20 | |
michael@0 | 21 | U_CDECL_BEGIN |
michael@0 | 22 | |
michael@0 | 23 | static int32_t U_CALLCONV |
michael@0 | 24 | hashStringTrieNode(const UHashTok key) { |
michael@0 | 25 | return icu::StringTrieBuilder::hashNode(key.pointer); |
michael@0 | 26 | } |
michael@0 | 27 | |
michael@0 | 28 | static UBool U_CALLCONV |
michael@0 | 29 | equalStringTrieNodes(const UHashTok key1, const UHashTok key2) { |
michael@0 | 30 | return icu::StringTrieBuilder::equalNodes(key1.pointer, key2.pointer); |
michael@0 | 31 | } |
michael@0 | 32 | |
michael@0 | 33 | U_CDECL_END |
michael@0 | 34 | |
michael@0 | 35 | U_NAMESPACE_BEGIN |
michael@0 | 36 | |
michael@0 | 37 | StringTrieBuilder::StringTrieBuilder() : nodes(NULL) {} |
michael@0 | 38 | |
michael@0 | 39 | StringTrieBuilder::~StringTrieBuilder() { |
michael@0 | 40 | deleteCompactBuilder(); |
michael@0 | 41 | } |
michael@0 | 42 | |
michael@0 | 43 | void |
michael@0 | 44 | StringTrieBuilder::createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode) { |
michael@0 | 45 | if(U_FAILURE(errorCode)) { |
michael@0 | 46 | return; |
michael@0 | 47 | } |
michael@0 | 48 | nodes=uhash_openSize(hashStringTrieNode, equalStringTrieNodes, NULL, |
michael@0 | 49 | sizeGuess, &errorCode); |
michael@0 | 50 | if(U_SUCCESS(errorCode)) { |
michael@0 | 51 | if(nodes==NULL) { |
michael@0 | 52 | errorCode=U_MEMORY_ALLOCATION_ERROR; |
michael@0 | 53 | } else { |
michael@0 | 54 | uhash_setKeyDeleter(nodes, uprv_deleteUObject); |
michael@0 | 55 | } |
michael@0 | 56 | } |
michael@0 | 57 | } |
michael@0 | 58 | |
michael@0 | 59 | void |
michael@0 | 60 | StringTrieBuilder::deleteCompactBuilder() { |
michael@0 | 61 | uhash_close(nodes); |
michael@0 | 62 | nodes=NULL; |
michael@0 | 63 | } |
michael@0 | 64 | |
michael@0 | 65 | void |
michael@0 | 66 | StringTrieBuilder::build(UStringTrieBuildOption buildOption, int32_t elementsLength, |
michael@0 | 67 | UErrorCode &errorCode) { |
michael@0 | 68 | if(buildOption==USTRINGTRIE_BUILD_FAST) { |
michael@0 | 69 | writeNode(0, elementsLength, 0); |
michael@0 | 70 | } else /* USTRINGTRIE_BUILD_SMALL */ { |
michael@0 | 71 | createCompactBuilder(2*elementsLength, errorCode); |
michael@0 | 72 | Node *root=makeNode(0, elementsLength, 0, errorCode); |
michael@0 | 73 | if(U_SUCCESS(errorCode)) { |
michael@0 | 74 | root->markRightEdgesFirst(-1); |
michael@0 | 75 | root->write(*this); |
michael@0 | 76 | } |
michael@0 | 77 | deleteCompactBuilder(); |
michael@0 | 78 | } |
michael@0 | 79 | } |
michael@0 | 80 | |
michael@0 | 81 | // Requires start<limit, |
michael@0 | 82 | // and all strings of the [start..limit[ elements must be sorted and |
michael@0 | 83 | // have a common prefix of length unitIndex. |
michael@0 | 84 | int32_t |
michael@0 | 85 | StringTrieBuilder::writeNode(int32_t start, int32_t limit, int32_t unitIndex) { |
michael@0 | 86 | UBool hasValue=FALSE; |
michael@0 | 87 | int32_t value=0; |
michael@0 | 88 | int32_t type; |
michael@0 | 89 | if(unitIndex==getElementStringLength(start)) { |
michael@0 | 90 | // An intermediate or final value. |
michael@0 | 91 | value=getElementValue(start++); |
michael@0 | 92 | if(start==limit) { |
michael@0 | 93 | return writeValueAndFinal(value, TRUE); // final-value node |
michael@0 | 94 | } |
michael@0 | 95 | hasValue=TRUE; |
michael@0 | 96 | } |
michael@0 | 97 | // Now all [start..limit[ strings are longer than unitIndex. |
michael@0 | 98 | int32_t minUnit=getElementUnit(start, unitIndex); |
michael@0 | 99 | int32_t maxUnit=getElementUnit(limit-1, unitIndex); |
michael@0 | 100 | if(minUnit==maxUnit) { |
michael@0 | 101 | // Linear-match node: All strings have the same character at unitIndex. |
michael@0 | 102 | int32_t lastUnitIndex=getLimitOfLinearMatch(start, limit-1, unitIndex); |
michael@0 | 103 | writeNode(start, limit, lastUnitIndex); |
michael@0 | 104 | // Break the linear-match sequence into chunks of at most kMaxLinearMatchLength. |
michael@0 | 105 | int32_t length=lastUnitIndex-unitIndex; |
michael@0 | 106 | int32_t maxLinearMatchLength=getMaxLinearMatchLength(); |
michael@0 | 107 | while(length>maxLinearMatchLength) { |
michael@0 | 108 | lastUnitIndex-=maxLinearMatchLength; |
michael@0 | 109 | length-=maxLinearMatchLength; |
michael@0 | 110 | writeElementUnits(start, lastUnitIndex, maxLinearMatchLength); |
michael@0 | 111 | write(getMinLinearMatch()+maxLinearMatchLength-1); |
michael@0 | 112 | } |
michael@0 | 113 | writeElementUnits(start, unitIndex, length); |
michael@0 | 114 | type=getMinLinearMatch()+length-1; |
michael@0 | 115 | } else { |
michael@0 | 116 | // Branch node. |
michael@0 | 117 | int32_t length=countElementUnits(start, limit, unitIndex); |
michael@0 | 118 | // length>=2 because minUnit!=maxUnit. |
michael@0 | 119 | writeBranchSubNode(start, limit, unitIndex, length); |
michael@0 | 120 | if(--length<getMinLinearMatch()) { |
michael@0 | 121 | type=length; |
michael@0 | 122 | } else { |
michael@0 | 123 | write(length); |
michael@0 | 124 | type=0; |
michael@0 | 125 | } |
michael@0 | 126 | } |
michael@0 | 127 | return writeValueAndType(hasValue, value, type); |
michael@0 | 128 | } |
michael@0 | 129 | |
michael@0 | 130 | // start<limit && all strings longer than unitIndex && |
michael@0 | 131 | // length different units at unitIndex |
michael@0 | 132 | int32_t |
michael@0 | 133 | StringTrieBuilder::writeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, int32_t length) { |
michael@0 | 134 | UChar middleUnits[kMaxSplitBranchLevels]; |
michael@0 | 135 | int32_t lessThan[kMaxSplitBranchLevels]; |
michael@0 | 136 | int32_t ltLength=0; |
michael@0 | 137 | while(length>getMaxBranchLinearSubNodeLength()) { |
michael@0 | 138 | // Branch on the middle unit. |
michael@0 | 139 | // First, find the middle unit. |
michael@0 | 140 | int32_t i=skipElementsBySomeUnits(start, unitIndex, length/2); |
michael@0 | 141 | // Encode the less-than branch first. |
michael@0 | 142 | middleUnits[ltLength]=getElementUnit(i, unitIndex); // middle unit |
michael@0 | 143 | lessThan[ltLength]=writeBranchSubNode(start, i, unitIndex, length/2); |
michael@0 | 144 | ++ltLength; |
michael@0 | 145 | // Continue for the greater-or-equal branch. |
michael@0 | 146 | start=i; |
michael@0 | 147 | length=length-length/2; |
michael@0 | 148 | } |
michael@0 | 149 | // For each unit, find its elements array start and whether it has a final value. |
michael@0 | 150 | int32_t starts[kMaxBranchLinearSubNodeLength]; |
michael@0 | 151 | UBool isFinal[kMaxBranchLinearSubNodeLength-1]; |
michael@0 | 152 | int32_t unitNumber=0; |
michael@0 | 153 | do { |
michael@0 | 154 | int32_t i=starts[unitNumber]=start; |
michael@0 | 155 | UChar unit=getElementUnit(i++, unitIndex); |
michael@0 | 156 | i=indexOfElementWithNextUnit(i, unitIndex, unit); |
michael@0 | 157 | isFinal[unitNumber]= start==i-1 && unitIndex+1==getElementStringLength(start); |
michael@0 | 158 | start=i; |
michael@0 | 159 | } while(++unitNumber<length-1); |
michael@0 | 160 | // unitNumber==length-1, and the maxUnit elements range is [start..limit[ |
michael@0 | 161 | starts[unitNumber]=start; |
michael@0 | 162 | |
michael@0 | 163 | // Write the sub-nodes in reverse order: The jump lengths are deltas from |
michael@0 | 164 | // after their own positions, so if we wrote the minUnit sub-node first, |
michael@0 | 165 | // then its jump delta would be larger. |
michael@0 | 166 | // Instead we write the minUnit sub-node last, for a shorter delta. |
michael@0 | 167 | int32_t jumpTargets[kMaxBranchLinearSubNodeLength-1]; |
michael@0 | 168 | do { |
michael@0 | 169 | --unitNumber; |
michael@0 | 170 | if(!isFinal[unitNumber]) { |
michael@0 | 171 | jumpTargets[unitNumber]=writeNode(starts[unitNumber], starts[unitNumber+1], unitIndex+1); |
michael@0 | 172 | } |
michael@0 | 173 | } while(unitNumber>0); |
michael@0 | 174 | // The maxUnit sub-node is written as the very last one because we do |
michael@0 | 175 | // not jump for it at all. |
michael@0 | 176 | unitNumber=length-1; |
michael@0 | 177 | writeNode(start, limit, unitIndex+1); |
michael@0 | 178 | int32_t offset=write(getElementUnit(start, unitIndex)); |
michael@0 | 179 | // Write the rest of this node's unit-value pairs. |
michael@0 | 180 | while(--unitNumber>=0) { |
michael@0 | 181 | start=starts[unitNumber]; |
michael@0 | 182 | int32_t value; |
michael@0 | 183 | if(isFinal[unitNumber]) { |
michael@0 | 184 | // Write the final value for the one string ending with this unit. |
michael@0 | 185 | value=getElementValue(start); |
michael@0 | 186 | } else { |
michael@0 | 187 | // Write the delta to the start position of the sub-node. |
michael@0 | 188 | value=offset-jumpTargets[unitNumber]; |
michael@0 | 189 | } |
michael@0 | 190 | writeValueAndFinal(value, isFinal[unitNumber]); |
michael@0 | 191 | offset=write(getElementUnit(start, unitIndex)); |
michael@0 | 192 | } |
michael@0 | 193 | // Write the split-branch nodes. |
michael@0 | 194 | while(ltLength>0) { |
michael@0 | 195 | --ltLength; |
michael@0 | 196 | writeDeltaTo(lessThan[ltLength]); |
michael@0 | 197 | offset=write(middleUnits[ltLength]); |
michael@0 | 198 | } |
michael@0 | 199 | return offset; |
michael@0 | 200 | } |
michael@0 | 201 | |
michael@0 | 202 | // Requires start<limit, |
michael@0 | 203 | // and all strings of the [start..limit[ elements must be sorted and |
michael@0 | 204 | // have a common prefix of length unitIndex. |
michael@0 | 205 | StringTrieBuilder::Node * |
michael@0 | 206 | StringTrieBuilder::makeNode(int32_t start, int32_t limit, int32_t unitIndex, UErrorCode &errorCode) { |
michael@0 | 207 | if(U_FAILURE(errorCode)) { |
michael@0 | 208 | return NULL; |
michael@0 | 209 | } |
michael@0 | 210 | UBool hasValue=FALSE; |
michael@0 | 211 | int32_t value=0; |
michael@0 | 212 | if(unitIndex==getElementStringLength(start)) { |
michael@0 | 213 | // An intermediate or final value. |
michael@0 | 214 | value=getElementValue(start++); |
michael@0 | 215 | if(start==limit) { |
michael@0 | 216 | return registerFinalValue(value, errorCode); |
michael@0 | 217 | } |
michael@0 | 218 | hasValue=TRUE; |
michael@0 | 219 | } |
michael@0 | 220 | Node *node; |
michael@0 | 221 | // Now all [start..limit[ strings are longer than unitIndex. |
michael@0 | 222 | int32_t minUnit=getElementUnit(start, unitIndex); |
michael@0 | 223 | int32_t maxUnit=getElementUnit(limit-1, unitIndex); |
michael@0 | 224 | if(minUnit==maxUnit) { |
michael@0 | 225 | // Linear-match node: All strings have the same character at unitIndex. |
michael@0 | 226 | int32_t lastUnitIndex=getLimitOfLinearMatch(start, limit-1, unitIndex); |
michael@0 | 227 | Node *nextNode=makeNode(start, limit, lastUnitIndex, errorCode); |
michael@0 | 228 | // Break the linear-match sequence into chunks of at most kMaxLinearMatchLength. |
michael@0 | 229 | int32_t length=lastUnitIndex-unitIndex; |
michael@0 | 230 | int32_t maxLinearMatchLength=getMaxLinearMatchLength(); |
michael@0 | 231 | while(length>maxLinearMatchLength) { |
michael@0 | 232 | lastUnitIndex-=maxLinearMatchLength; |
michael@0 | 233 | length-=maxLinearMatchLength; |
michael@0 | 234 | node=createLinearMatchNode(start, lastUnitIndex, maxLinearMatchLength, nextNode); |
michael@0 | 235 | nextNode=registerNode(node, errorCode); |
michael@0 | 236 | } |
michael@0 | 237 | node=createLinearMatchNode(start, unitIndex, length, nextNode); |
michael@0 | 238 | } else { |
michael@0 | 239 | // Branch node. |
michael@0 | 240 | int32_t length=countElementUnits(start, limit, unitIndex); |
michael@0 | 241 | // length>=2 because minUnit!=maxUnit. |
michael@0 | 242 | Node *subNode=makeBranchSubNode(start, limit, unitIndex, length, errorCode); |
michael@0 | 243 | node=new BranchHeadNode(length, subNode); |
michael@0 | 244 | } |
michael@0 | 245 | if(hasValue && node!=NULL) { |
michael@0 | 246 | if(matchNodesCanHaveValues()) { |
michael@0 | 247 | ((ValueNode *)node)->setValue(value); |
michael@0 | 248 | } else { |
michael@0 | 249 | node=new IntermediateValueNode(value, registerNode(node, errorCode)); |
michael@0 | 250 | } |
michael@0 | 251 | } |
michael@0 | 252 | return registerNode(node, errorCode); |
michael@0 | 253 | } |
michael@0 | 254 | |
michael@0 | 255 | // start<limit && all strings longer than unitIndex && |
michael@0 | 256 | // length different units at unitIndex |
michael@0 | 257 | StringTrieBuilder::Node * |
michael@0 | 258 | StringTrieBuilder::makeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, |
michael@0 | 259 | int32_t length, UErrorCode &errorCode) { |
michael@0 | 260 | if(U_FAILURE(errorCode)) { |
michael@0 | 261 | return NULL; |
michael@0 | 262 | } |
michael@0 | 263 | UChar middleUnits[kMaxSplitBranchLevels]; |
michael@0 | 264 | Node *lessThan[kMaxSplitBranchLevels]; |
michael@0 | 265 | int32_t ltLength=0; |
michael@0 | 266 | while(length>getMaxBranchLinearSubNodeLength()) { |
michael@0 | 267 | // Branch on the middle unit. |
michael@0 | 268 | // First, find the middle unit. |
michael@0 | 269 | int32_t i=skipElementsBySomeUnits(start, unitIndex, length/2); |
michael@0 | 270 | // Create the less-than branch. |
michael@0 | 271 | middleUnits[ltLength]=getElementUnit(i, unitIndex); // middle unit |
michael@0 | 272 | lessThan[ltLength]=makeBranchSubNode(start, i, unitIndex, length/2, errorCode); |
michael@0 | 273 | ++ltLength; |
michael@0 | 274 | // Continue for the greater-or-equal branch. |
michael@0 | 275 | start=i; |
michael@0 | 276 | length=length-length/2; |
michael@0 | 277 | } |
michael@0 | 278 | if(U_FAILURE(errorCode)) { |
michael@0 | 279 | return NULL; |
michael@0 | 280 | } |
michael@0 | 281 | ListBranchNode *listNode=new ListBranchNode(); |
michael@0 | 282 | if(listNode==NULL) { |
michael@0 | 283 | errorCode=U_MEMORY_ALLOCATION_ERROR; |
michael@0 | 284 | return NULL; |
michael@0 | 285 | } |
michael@0 | 286 | // For each unit, find its elements array start and whether it has a final value. |
michael@0 | 287 | int32_t unitNumber=0; |
michael@0 | 288 | do { |
michael@0 | 289 | int32_t i=start; |
michael@0 | 290 | UChar unit=getElementUnit(i++, unitIndex); |
michael@0 | 291 | i=indexOfElementWithNextUnit(i, unitIndex, unit); |
michael@0 | 292 | if(start==i-1 && unitIndex+1==getElementStringLength(start)) { |
michael@0 | 293 | listNode->add(unit, getElementValue(start)); |
michael@0 | 294 | } else { |
michael@0 | 295 | listNode->add(unit, makeNode(start, i, unitIndex+1, errorCode)); |
michael@0 | 296 | } |
michael@0 | 297 | start=i; |
michael@0 | 298 | } while(++unitNumber<length-1); |
michael@0 | 299 | // unitNumber==length-1, and the maxUnit elements range is [start..limit[ |
michael@0 | 300 | UChar unit=getElementUnit(start, unitIndex); |
michael@0 | 301 | if(start==limit-1 && unitIndex+1==getElementStringLength(start)) { |
michael@0 | 302 | listNode->add(unit, getElementValue(start)); |
michael@0 | 303 | } else { |
michael@0 | 304 | listNode->add(unit, makeNode(start, limit, unitIndex+1, errorCode)); |
michael@0 | 305 | } |
michael@0 | 306 | Node *node=registerNode(listNode, errorCode); |
michael@0 | 307 | // Create the split-branch nodes. |
michael@0 | 308 | while(ltLength>0) { |
michael@0 | 309 | --ltLength; |
michael@0 | 310 | node=registerNode( |
michael@0 | 311 | new SplitBranchNode(middleUnits[ltLength], lessThan[ltLength], node), errorCode); |
michael@0 | 312 | } |
michael@0 | 313 | return node; |
michael@0 | 314 | } |
michael@0 | 315 | |
michael@0 | 316 | StringTrieBuilder::Node * |
michael@0 | 317 | StringTrieBuilder::registerNode(Node *newNode, UErrorCode &errorCode) { |
michael@0 | 318 | if(U_FAILURE(errorCode)) { |
michael@0 | 319 | delete newNode; |
michael@0 | 320 | return NULL; |
michael@0 | 321 | } |
michael@0 | 322 | if(newNode==NULL) { |
michael@0 | 323 | errorCode=U_MEMORY_ALLOCATION_ERROR; |
michael@0 | 324 | return NULL; |
michael@0 | 325 | } |
michael@0 | 326 | const UHashElement *old=uhash_find(nodes, newNode); |
michael@0 | 327 | if(old!=NULL) { |
michael@0 | 328 | delete newNode; |
michael@0 | 329 | return (Node *)old->key.pointer; |
michael@0 | 330 | } |
michael@0 | 331 | // If uhash_puti() returns a non-zero value from an equivalent, previously |
michael@0 | 332 | // registered node, then uhash_find() failed to find that and we will leak newNode. |
michael@0 | 333 | #if U_DEBUG |
michael@0 | 334 | int32_t oldValue= // Only in debug mode to avoid a compiler warning about unused oldValue. |
michael@0 | 335 | #endif |
michael@0 | 336 | uhash_puti(nodes, newNode, 1, &errorCode); |
michael@0 | 337 | U_ASSERT(oldValue==0); |
michael@0 | 338 | if(U_FAILURE(errorCode)) { |
michael@0 | 339 | delete newNode; |
michael@0 | 340 | return NULL; |
michael@0 | 341 | } |
michael@0 | 342 | return newNode; |
michael@0 | 343 | } |
michael@0 | 344 | |
michael@0 | 345 | StringTrieBuilder::Node * |
michael@0 | 346 | StringTrieBuilder::registerFinalValue(int32_t value, UErrorCode &errorCode) { |
michael@0 | 347 | if(U_FAILURE(errorCode)) { |
michael@0 | 348 | return NULL; |
michael@0 | 349 | } |
michael@0 | 350 | FinalValueNode key(value); |
michael@0 | 351 | const UHashElement *old=uhash_find(nodes, &key); |
michael@0 | 352 | if(old!=NULL) { |
michael@0 | 353 | return (Node *)old->key.pointer; |
michael@0 | 354 | } |
michael@0 | 355 | Node *newNode=new FinalValueNode(value); |
michael@0 | 356 | if(newNode==NULL) { |
michael@0 | 357 | errorCode=U_MEMORY_ALLOCATION_ERROR; |
michael@0 | 358 | return NULL; |
michael@0 | 359 | } |
michael@0 | 360 | // If uhash_puti() returns a non-zero value from an equivalent, previously |
michael@0 | 361 | // registered node, then uhash_find() failed to find that and we will leak newNode. |
michael@0 | 362 | #if U_DEBUG |
michael@0 | 363 | int32_t oldValue= // Only in debug mode to avoid a compiler warning about unused oldValue. |
michael@0 | 364 | #endif |
michael@0 | 365 | uhash_puti(nodes, newNode, 1, &errorCode); |
michael@0 | 366 | U_ASSERT(oldValue==0); |
michael@0 | 367 | if(U_FAILURE(errorCode)) { |
michael@0 | 368 | delete newNode; |
michael@0 | 369 | return NULL; |
michael@0 | 370 | } |
michael@0 | 371 | return newNode; |
michael@0 | 372 | } |
michael@0 | 373 | |
michael@0 | 374 | UBool |
michael@0 | 375 | StringTrieBuilder::hashNode(const void *node) { |
michael@0 | 376 | return ((const Node *)node)->hashCode(); |
michael@0 | 377 | } |
michael@0 | 378 | |
michael@0 | 379 | UBool |
michael@0 | 380 | StringTrieBuilder::equalNodes(const void *left, const void *right) { |
michael@0 | 381 | return *(const Node *)left==*(const Node *)right; |
michael@0 | 382 | } |
michael@0 | 383 | |
michael@0 | 384 | UBool |
michael@0 | 385 | StringTrieBuilder::Node::operator==(const Node &other) const { |
michael@0 | 386 | return this==&other || (typeid(*this)==typeid(other) && hash==other.hash); |
michael@0 | 387 | } |
michael@0 | 388 | |
michael@0 | 389 | int32_t |
michael@0 | 390 | StringTrieBuilder::Node::markRightEdgesFirst(int32_t edgeNumber) { |
michael@0 | 391 | if(offset==0) { |
michael@0 | 392 | offset=edgeNumber; |
michael@0 | 393 | } |
michael@0 | 394 | return edgeNumber; |
michael@0 | 395 | } |
michael@0 | 396 | |
michael@0 | 397 | UBool |
michael@0 | 398 | StringTrieBuilder::FinalValueNode::operator==(const Node &other) const { |
michael@0 | 399 | if(this==&other) { |
michael@0 | 400 | return TRUE; |
michael@0 | 401 | } |
michael@0 | 402 | if(!Node::operator==(other)) { |
michael@0 | 403 | return FALSE; |
michael@0 | 404 | } |
michael@0 | 405 | const FinalValueNode &o=(const FinalValueNode &)other; |
michael@0 | 406 | return value==o.value; |
michael@0 | 407 | } |
michael@0 | 408 | |
michael@0 | 409 | void |
michael@0 | 410 | StringTrieBuilder::FinalValueNode::write(StringTrieBuilder &builder) { |
michael@0 | 411 | offset=builder.writeValueAndFinal(value, TRUE); |
michael@0 | 412 | } |
michael@0 | 413 | |
michael@0 | 414 | UBool |
michael@0 | 415 | StringTrieBuilder::ValueNode::operator==(const Node &other) const { |
michael@0 | 416 | if(this==&other) { |
michael@0 | 417 | return TRUE; |
michael@0 | 418 | } |
michael@0 | 419 | if(!Node::operator==(other)) { |
michael@0 | 420 | return FALSE; |
michael@0 | 421 | } |
michael@0 | 422 | const ValueNode &o=(const ValueNode &)other; |
michael@0 | 423 | return hasValue==o.hasValue && (!hasValue || value==o.value); |
michael@0 | 424 | } |
michael@0 | 425 | |
michael@0 | 426 | UBool |
michael@0 | 427 | StringTrieBuilder::IntermediateValueNode::operator==(const Node &other) const { |
michael@0 | 428 | if(this==&other) { |
michael@0 | 429 | return TRUE; |
michael@0 | 430 | } |
michael@0 | 431 | if(!ValueNode::operator==(other)) { |
michael@0 | 432 | return FALSE; |
michael@0 | 433 | } |
michael@0 | 434 | const IntermediateValueNode &o=(const IntermediateValueNode &)other; |
michael@0 | 435 | return next==o.next; |
michael@0 | 436 | } |
michael@0 | 437 | |
michael@0 | 438 | int32_t |
michael@0 | 439 | StringTrieBuilder::IntermediateValueNode::markRightEdgesFirst(int32_t edgeNumber) { |
michael@0 | 440 | if(offset==0) { |
michael@0 | 441 | offset=edgeNumber=next->markRightEdgesFirst(edgeNumber); |
michael@0 | 442 | } |
michael@0 | 443 | return edgeNumber; |
michael@0 | 444 | } |
michael@0 | 445 | |
michael@0 | 446 | void |
michael@0 | 447 | StringTrieBuilder::IntermediateValueNode::write(StringTrieBuilder &builder) { |
michael@0 | 448 | next->write(builder); |
michael@0 | 449 | offset=builder.writeValueAndFinal(value, FALSE); |
michael@0 | 450 | } |
michael@0 | 451 | |
michael@0 | 452 | UBool |
michael@0 | 453 | StringTrieBuilder::LinearMatchNode::operator==(const Node &other) const { |
michael@0 | 454 | if(this==&other) { |
michael@0 | 455 | return TRUE; |
michael@0 | 456 | } |
michael@0 | 457 | if(!ValueNode::operator==(other)) { |
michael@0 | 458 | return FALSE; |
michael@0 | 459 | } |
michael@0 | 460 | const LinearMatchNode &o=(const LinearMatchNode &)other; |
michael@0 | 461 | return length==o.length && next==o.next; |
michael@0 | 462 | } |
michael@0 | 463 | |
michael@0 | 464 | int32_t |
michael@0 | 465 | StringTrieBuilder::LinearMatchNode::markRightEdgesFirst(int32_t edgeNumber) { |
michael@0 | 466 | if(offset==0) { |
michael@0 | 467 | offset=edgeNumber=next->markRightEdgesFirst(edgeNumber); |
michael@0 | 468 | } |
michael@0 | 469 | return edgeNumber; |
michael@0 | 470 | } |
michael@0 | 471 | |
michael@0 | 472 | UBool |
michael@0 | 473 | StringTrieBuilder::ListBranchNode::operator==(const Node &other) const { |
michael@0 | 474 | if(this==&other) { |
michael@0 | 475 | return TRUE; |
michael@0 | 476 | } |
michael@0 | 477 | if(!Node::operator==(other)) { |
michael@0 | 478 | return FALSE; |
michael@0 | 479 | } |
michael@0 | 480 | const ListBranchNode &o=(const ListBranchNode &)other; |
michael@0 | 481 | for(int32_t i=0; i<length; ++i) { |
michael@0 | 482 | if(units[i]!=o.units[i] || values[i]!=o.values[i] || equal[i]!=o.equal[i]) { |
michael@0 | 483 | return FALSE; |
michael@0 | 484 | } |
michael@0 | 485 | } |
michael@0 | 486 | return TRUE; |
michael@0 | 487 | } |
michael@0 | 488 | |
michael@0 | 489 | int32_t |
michael@0 | 490 | StringTrieBuilder::ListBranchNode::markRightEdgesFirst(int32_t edgeNumber) { |
michael@0 | 491 | if(offset==0) { |
michael@0 | 492 | firstEdgeNumber=edgeNumber; |
michael@0 | 493 | int32_t step=0; |
michael@0 | 494 | int32_t i=length; |
michael@0 | 495 | do { |
michael@0 | 496 | Node *edge=equal[--i]; |
michael@0 | 497 | if(edge!=NULL) { |
michael@0 | 498 | edgeNumber=edge->markRightEdgesFirst(edgeNumber-step); |
michael@0 | 499 | } |
michael@0 | 500 | // For all but the rightmost edge, decrement the edge number. |
michael@0 | 501 | step=1; |
michael@0 | 502 | } while(i>0); |
michael@0 | 503 | offset=edgeNumber; |
michael@0 | 504 | } |
michael@0 | 505 | return edgeNumber; |
michael@0 | 506 | } |
michael@0 | 507 | |
michael@0 | 508 | void |
michael@0 | 509 | StringTrieBuilder::ListBranchNode::write(StringTrieBuilder &builder) { |
michael@0 | 510 | // Write the sub-nodes in reverse order: The jump lengths are deltas from |
michael@0 | 511 | // after their own positions, so if we wrote the minUnit sub-node first, |
michael@0 | 512 | // then its jump delta would be larger. |
michael@0 | 513 | // Instead we write the minUnit sub-node last, for a shorter delta. |
michael@0 | 514 | int32_t unitNumber=length-1; |
michael@0 | 515 | Node *rightEdge=equal[unitNumber]; |
michael@0 | 516 | int32_t rightEdgeNumber= rightEdge==NULL ? firstEdgeNumber : rightEdge->getOffset(); |
michael@0 | 517 | do { |
michael@0 | 518 | --unitNumber; |
michael@0 | 519 | if(equal[unitNumber]!=NULL) { |
michael@0 | 520 | equal[unitNumber]->writeUnlessInsideRightEdge(firstEdgeNumber, rightEdgeNumber, builder); |
michael@0 | 521 | } |
michael@0 | 522 | } while(unitNumber>0); |
michael@0 | 523 | // The maxUnit sub-node is written as the very last one because we do |
michael@0 | 524 | // not jump for it at all. |
michael@0 | 525 | unitNumber=length-1; |
michael@0 | 526 | if(rightEdge==NULL) { |
michael@0 | 527 | builder.writeValueAndFinal(values[unitNumber], TRUE); |
michael@0 | 528 | } else { |
michael@0 | 529 | rightEdge->write(builder); |
michael@0 | 530 | } |
michael@0 | 531 | offset=builder.write(units[unitNumber]); |
michael@0 | 532 | // Write the rest of this node's unit-value pairs. |
michael@0 | 533 | while(--unitNumber>=0) { |
michael@0 | 534 | int32_t value; |
michael@0 | 535 | UBool isFinal; |
michael@0 | 536 | if(equal[unitNumber]==NULL) { |
michael@0 | 537 | // Write the final value for the one string ending with this unit. |
michael@0 | 538 | value=values[unitNumber]; |
michael@0 | 539 | isFinal=TRUE; |
michael@0 | 540 | } else { |
michael@0 | 541 | // Write the delta to the start position of the sub-node. |
michael@0 | 542 | U_ASSERT(equal[unitNumber]->getOffset()>0); |
michael@0 | 543 | value=offset-equal[unitNumber]->getOffset(); |
michael@0 | 544 | isFinal=FALSE; |
michael@0 | 545 | } |
michael@0 | 546 | builder.writeValueAndFinal(value, isFinal); |
michael@0 | 547 | offset=builder.write(units[unitNumber]); |
michael@0 | 548 | } |
michael@0 | 549 | } |
michael@0 | 550 | |
michael@0 | 551 | UBool |
michael@0 | 552 | StringTrieBuilder::SplitBranchNode::operator==(const Node &other) const { |
michael@0 | 553 | if(this==&other) { |
michael@0 | 554 | return TRUE; |
michael@0 | 555 | } |
michael@0 | 556 | if(!Node::operator==(other)) { |
michael@0 | 557 | return FALSE; |
michael@0 | 558 | } |
michael@0 | 559 | const SplitBranchNode &o=(const SplitBranchNode &)other; |
michael@0 | 560 | return unit==o.unit && lessThan==o.lessThan && greaterOrEqual==o.greaterOrEqual; |
michael@0 | 561 | } |
michael@0 | 562 | |
michael@0 | 563 | int32_t |
michael@0 | 564 | StringTrieBuilder::SplitBranchNode::markRightEdgesFirst(int32_t edgeNumber) { |
michael@0 | 565 | if(offset==0) { |
michael@0 | 566 | firstEdgeNumber=edgeNumber; |
michael@0 | 567 | edgeNumber=greaterOrEqual->markRightEdgesFirst(edgeNumber); |
michael@0 | 568 | offset=edgeNumber=lessThan->markRightEdgesFirst(edgeNumber-1); |
michael@0 | 569 | } |
michael@0 | 570 | return edgeNumber; |
michael@0 | 571 | } |
michael@0 | 572 | |
michael@0 | 573 | void |
michael@0 | 574 | StringTrieBuilder::SplitBranchNode::write(StringTrieBuilder &builder) { |
michael@0 | 575 | // Encode the less-than branch first. |
michael@0 | 576 | lessThan->writeUnlessInsideRightEdge(firstEdgeNumber, greaterOrEqual->getOffset(), builder); |
michael@0 | 577 | // Encode the greater-or-equal branch last because we do not jump for it at all. |
michael@0 | 578 | greaterOrEqual->write(builder); |
michael@0 | 579 | // Write this node. |
michael@0 | 580 | U_ASSERT(lessThan->getOffset()>0); |
michael@0 | 581 | builder.writeDeltaTo(lessThan->getOffset()); // less-than |
michael@0 | 582 | offset=builder.write(unit); |
michael@0 | 583 | } |
michael@0 | 584 | |
michael@0 | 585 | UBool |
michael@0 | 586 | StringTrieBuilder::BranchHeadNode::operator==(const Node &other) const { |
michael@0 | 587 | if(this==&other) { |
michael@0 | 588 | return TRUE; |
michael@0 | 589 | } |
michael@0 | 590 | if(!ValueNode::operator==(other)) { |
michael@0 | 591 | return FALSE; |
michael@0 | 592 | } |
michael@0 | 593 | const BranchHeadNode &o=(const BranchHeadNode &)other; |
michael@0 | 594 | return length==o.length && next==o.next; |
michael@0 | 595 | } |
michael@0 | 596 | |
michael@0 | 597 | int32_t |
michael@0 | 598 | StringTrieBuilder::BranchHeadNode::markRightEdgesFirst(int32_t edgeNumber) { |
michael@0 | 599 | if(offset==0) { |
michael@0 | 600 | offset=edgeNumber=next->markRightEdgesFirst(edgeNumber); |
michael@0 | 601 | } |
michael@0 | 602 | return edgeNumber; |
michael@0 | 603 | } |
michael@0 | 604 | |
michael@0 | 605 | void |
michael@0 | 606 | StringTrieBuilder::BranchHeadNode::write(StringTrieBuilder &builder) { |
michael@0 | 607 | next->write(builder); |
michael@0 | 608 | if(length<=builder.getMinLinearMatch()) { |
michael@0 | 609 | offset=builder.writeValueAndType(hasValue, value, length-1); |
michael@0 | 610 | } else { |
michael@0 | 611 | builder.write(length-1); |
michael@0 | 612 | offset=builder.writeValueAndType(hasValue, value, 0); |
michael@0 | 613 | } |
michael@0 | 614 | } |
michael@0 | 615 | |
michael@0 | 616 | U_NAMESPACE_END |