intl/icu/source/common/stringtriebuilder.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/common/stringtriebuilder.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,616 @@
     1.4 +/*
     1.5 +*******************************************************************************
     1.6 +*   Copyright (C) 2010-2012, International Business Machines
     1.7 +*   Corporation and others.  All Rights Reserved.
     1.8 +*******************************************************************************
     1.9 +*   file name:  stringtriebuilder.cpp
    1.10 +*   encoding:   US-ASCII
    1.11 +*   tab size:   8 (not used)
    1.12 +*   indentation:4
    1.13 +*
    1.14 +*   created on: 2010dec24
    1.15 +*   created by: Markus W. Scherer
    1.16 +*/
    1.17 +
    1.18 +#include "utypeinfo.h"  // for 'typeid' to work
    1.19 +#include "unicode/utypes.h"
    1.20 +#include "unicode/stringtriebuilder.h"
    1.21 +#include "uassert.h"
    1.22 +#include "uhash.h"
    1.23 +
    1.24 +U_CDECL_BEGIN
    1.25 +
    1.26 +static int32_t U_CALLCONV
    1.27 +hashStringTrieNode(const UHashTok key) {
    1.28 +    return icu::StringTrieBuilder::hashNode(key.pointer);
    1.29 +}
    1.30 +
    1.31 +static UBool U_CALLCONV
    1.32 +equalStringTrieNodes(const UHashTok key1, const UHashTok key2) {
    1.33 +    return icu::StringTrieBuilder::equalNodes(key1.pointer, key2.pointer);
    1.34 +}
    1.35 +
    1.36 +U_CDECL_END
    1.37 +
    1.38 +U_NAMESPACE_BEGIN
    1.39 +
    1.40 +StringTrieBuilder::StringTrieBuilder() : nodes(NULL) {}
    1.41 +
    1.42 +StringTrieBuilder::~StringTrieBuilder() {
    1.43 +    deleteCompactBuilder();
    1.44 +}
    1.45 +
    1.46 +void
    1.47 +StringTrieBuilder::createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode) {
    1.48 +    if(U_FAILURE(errorCode)) {
    1.49 +        return;
    1.50 +    }
    1.51 +    nodes=uhash_openSize(hashStringTrieNode, equalStringTrieNodes, NULL,
    1.52 +                         sizeGuess, &errorCode);
    1.53 +    if(U_SUCCESS(errorCode)) {
    1.54 +        if(nodes==NULL) {
    1.55 +          errorCode=U_MEMORY_ALLOCATION_ERROR;
    1.56 +        } else {
    1.57 +          uhash_setKeyDeleter(nodes, uprv_deleteUObject);
    1.58 +        }
    1.59 +    }
    1.60 +}
    1.61 +
    1.62 +void
    1.63 +StringTrieBuilder::deleteCompactBuilder() {
    1.64 +    uhash_close(nodes);
    1.65 +    nodes=NULL;
    1.66 +}
    1.67 +
    1.68 +void
    1.69 +StringTrieBuilder::build(UStringTrieBuildOption buildOption, int32_t elementsLength,
    1.70 +                       UErrorCode &errorCode) {
    1.71 +    if(buildOption==USTRINGTRIE_BUILD_FAST) {
    1.72 +        writeNode(0, elementsLength, 0);
    1.73 +    } else /* USTRINGTRIE_BUILD_SMALL */ {
    1.74 +        createCompactBuilder(2*elementsLength, errorCode);
    1.75 +        Node *root=makeNode(0, elementsLength, 0, errorCode);
    1.76 +        if(U_SUCCESS(errorCode)) {
    1.77 +            root->markRightEdgesFirst(-1);
    1.78 +            root->write(*this);
    1.79 +        }
    1.80 +        deleteCompactBuilder();
    1.81 +    }
    1.82 +}
    1.83 +
    1.84 +// Requires start<limit,
    1.85 +// and all strings of the [start..limit[ elements must be sorted and
    1.86 +// have a common prefix of length unitIndex.
    1.87 +int32_t
    1.88 +StringTrieBuilder::writeNode(int32_t start, int32_t limit, int32_t unitIndex) {
    1.89 +    UBool hasValue=FALSE;
    1.90 +    int32_t value=0;
    1.91 +    int32_t type;
    1.92 +    if(unitIndex==getElementStringLength(start)) {
    1.93 +        // An intermediate or final value.
    1.94 +        value=getElementValue(start++);
    1.95 +        if(start==limit) {
    1.96 +            return writeValueAndFinal(value, TRUE);  // final-value node
    1.97 +        }
    1.98 +        hasValue=TRUE;
    1.99 +    }
   1.100 +    // Now all [start..limit[ strings are longer than unitIndex.
   1.101 +    int32_t minUnit=getElementUnit(start, unitIndex);
   1.102 +    int32_t maxUnit=getElementUnit(limit-1, unitIndex);
   1.103 +    if(minUnit==maxUnit) {
   1.104 +        // Linear-match node: All strings have the same character at unitIndex.
   1.105 +        int32_t lastUnitIndex=getLimitOfLinearMatch(start, limit-1, unitIndex);
   1.106 +        writeNode(start, limit, lastUnitIndex);
   1.107 +        // Break the linear-match sequence into chunks of at most kMaxLinearMatchLength.
   1.108 +        int32_t length=lastUnitIndex-unitIndex;
   1.109 +        int32_t maxLinearMatchLength=getMaxLinearMatchLength();
   1.110 +        while(length>maxLinearMatchLength) {
   1.111 +            lastUnitIndex-=maxLinearMatchLength;
   1.112 +            length-=maxLinearMatchLength;
   1.113 +            writeElementUnits(start, lastUnitIndex, maxLinearMatchLength);
   1.114 +            write(getMinLinearMatch()+maxLinearMatchLength-1);
   1.115 +        }
   1.116 +        writeElementUnits(start, unitIndex, length);
   1.117 +        type=getMinLinearMatch()+length-1;
   1.118 +    } else {
   1.119 +        // Branch node.
   1.120 +        int32_t length=countElementUnits(start, limit, unitIndex);
   1.121 +        // length>=2 because minUnit!=maxUnit.
   1.122 +        writeBranchSubNode(start, limit, unitIndex, length);
   1.123 +        if(--length<getMinLinearMatch()) {
   1.124 +            type=length;
   1.125 +        } else {
   1.126 +            write(length);
   1.127 +            type=0;
   1.128 +        }
   1.129 +    }
   1.130 +    return writeValueAndType(hasValue, value, type);
   1.131 +}
   1.132 +
   1.133 +// start<limit && all strings longer than unitIndex &&
   1.134 +// length different units at unitIndex
   1.135 +int32_t
   1.136 +StringTrieBuilder::writeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, int32_t length) {
   1.137 +    UChar middleUnits[kMaxSplitBranchLevels];
   1.138 +    int32_t lessThan[kMaxSplitBranchLevels];
   1.139 +    int32_t ltLength=0;
   1.140 +    while(length>getMaxBranchLinearSubNodeLength()) {
   1.141 +        // Branch on the middle unit.
   1.142 +        // First, find the middle unit.
   1.143 +        int32_t i=skipElementsBySomeUnits(start, unitIndex, length/2);
   1.144 +        // Encode the less-than branch first.
   1.145 +        middleUnits[ltLength]=getElementUnit(i, unitIndex);  // middle unit
   1.146 +        lessThan[ltLength]=writeBranchSubNode(start, i, unitIndex, length/2);
   1.147 +        ++ltLength;
   1.148 +        // Continue for the greater-or-equal branch.
   1.149 +        start=i;
   1.150 +        length=length-length/2;
   1.151 +    }
   1.152 +    // For each unit, find its elements array start and whether it has a final value.
   1.153 +    int32_t starts[kMaxBranchLinearSubNodeLength];
   1.154 +    UBool isFinal[kMaxBranchLinearSubNodeLength-1];
   1.155 +    int32_t unitNumber=0;
   1.156 +    do {
   1.157 +        int32_t i=starts[unitNumber]=start;
   1.158 +        UChar unit=getElementUnit(i++, unitIndex);
   1.159 +        i=indexOfElementWithNextUnit(i, unitIndex, unit);
   1.160 +        isFinal[unitNumber]= start==i-1 && unitIndex+1==getElementStringLength(start);
   1.161 +        start=i;
   1.162 +    } while(++unitNumber<length-1);
   1.163 +    // unitNumber==length-1, and the maxUnit elements range is [start..limit[
   1.164 +    starts[unitNumber]=start;
   1.165 +
   1.166 +    // Write the sub-nodes in reverse order: The jump lengths are deltas from
   1.167 +    // after their own positions, so if we wrote the minUnit sub-node first,
   1.168 +    // then its jump delta would be larger.
   1.169 +    // Instead we write the minUnit sub-node last, for a shorter delta.
   1.170 +    int32_t jumpTargets[kMaxBranchLinearSubNodeLength-1];
   1.171 +    do {
   1.172 +        --unitNumber;
   1.173 +        if(!isFinal[unitNumber]) {
   1.174 +            jumpTargets[unitNumber]=writeNode(starts[unitNumber], starts[unitNumber+1], unitIndex+1);
   1.175 +        }
   1.176 +    } while(unitNumber>0);
   1.177 +    // The maxUnit sub-node is written as the very last one because we do
   1.178 +    // not jump for it at all.
   1.179 +    unitNumber=length-1;
   1.180 +    writeNode(start, limit, unitIndex+1);
   1.181 +    int32_t offset=write(getElementUnit(start, unitIndex));
   1.182 +    // Write the rest of this node's unit-value pairs.
   1.183 +    while(--unitNumber>=0) {
   1.184 +        start=starts[unitNumber];
   1.185 +        int32_t value;
   1.186 +        if(isFinal[unitNumber]) {
   1.187 +            // Write the final value for the one string ending with this unit.
   1.188 +            value=getElementValue(start);
   1.189 +        } else {
   1.190 +            // Write the delta to the start position of the sub-node.
   1.191 +            value=offset-jumpTargets[unitNumber];
   1.192 +        }
   1.193 +        writeValueAndFinal(value, isFinal[unitNumber]);
   1.194 +        offset=write(getElementUnit(start, unitIndex));
   1.195 +    }
   1.196 +    // Write the split-branch nodes.
   1.197 +    while(ltLength>0) {
   1.198 +        --ltLength;
   1.199 +        writeDeltaTo(lessThan[ltLength]);
   1.200 +        offset=write(middleUnits[ltLength]);
   1.201 +    }
   1.202 +    return offset;
   1.203 +}
   1.204 +
   1.205 +// Requires start<limit,
   1.206 +// and all strings of the [start..limit[ elements must be sorted and
   1.207 +// have a common prefix of length unitIndex.
   1.208 +StringTrieBuilder::Node *
   1.209 +StringTrieBuilder::makeNode(int32_t start, int32_t limit, int32_t unitIndex, UErrorCode &errorCode) {
   1.210 +    if(U_FAILURE(errorCode)) {
   1.211 +        return NULL;
   1.212 +    }
   1.213 +    UBool hasValue=FALSE;
   1.214 +    int32_t value=0;
   1.215 +    if(unitIndex==getElementStringLength(start)) {
   1.216 +        // An intermediate or final value.
   1.217 +        value=getElementValue(start++);
   1.218 +        if(start==limit) {
   1.219 +            return registerFinalValue(value, errorCode);
   1.220 +        }
   1.221 +        hasValue=TRUE;
   1.222 +    }
   1.223 +    Node *node;
   1.224 +    // Now all [start..limit[ strings are longer than unitIndex.
   1.225 +    int32_t minUnit=getElementUnit(start, unitIndex);
   1.226 +    int32_t maxUnit=getElementUnit(limit-1, unitIndex);
   1.227 +    if(minUnit==maxUnit) {
   1.228 +        // Linear-match node: All strings have the same character at unitIndex.
   1.229 +        int32_t lastUnitIndex=getLimitOfLinearMatch(start, limit-1, unitIndex);
   1.230 +        Node *nextNode=makeNode(start, limit, lastUnitIndex, errorCode);
   1.231 +        // Break the linear-match sequence into chunks of at most kMaxLinearMatchLength.
   1.232 +        int32_t length=lastUnitIndex-unitIndex;
   1.233 +        int32_t maxLinearMatchLength=getMaxLinearMatchLength();
   1.234 +        while(length>maxLinearMatchLength) {
   1.235 +            lastUnitIndex-=maxLinearMatchLength;
   1.236 +            length-=maxLinearMatchLength;
   1.237 +            node=createLinearMatchNode(start, lastUnitIndex, maxLinearMatchLength, nextNode);
   1.238 +            nextNode=registerNode(node, errorCode);
   1.239 +        }
   1.240 +        node=createLinearMatchNode(start, unitIndex, length, nextNode);
   1.241 +    } else {
   1.242 +        // Branch node.
   1.243 +        int32_t length=countElementUnits(start, limit, unitIndex);
   1.244 +        // length>=2 because minUnit!=maxUnit.
   1.245 +        Node *subNode=makeBranchSubNode(start, limit, unitIndex, length, errorCode);
   1.246 +        node=new BranchHeadNode(length, subNode);
   1.247 +    }
   1.248 +    if(hasValue && node!=NULL) {
   1.249 +        if(matchNodesCanHaveValues()) {
   1.250 +            ((ValueNode *)node)->setValue(value);
   1.251 +        } else {
   1.252 +            node=new IntermediateValueNode(value, registerNode(node, errorCode));
   1.253 +        }
   1.254 +    }
   1.255 +    return registerNode(node, errorCode);
   1.256 +}
   1.257 +
   1.258 +// start<limit && all strings longer than unitIndex &&
   1.259 +// length different units at unitIndex
   1.260 +StringTrieBuilder::Node *
   1.261 +StringTrieBuilder::makeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex,
   1.262 +                                   int32_t length, UErrorCode &errorCode) {
   1.263 +    if(U_FAILURE(errorCode)) {
   1.264 +        return NULL;
   1.265 +    }
   1.266 +    UChar middleUnits[kMaxSplitBranchLevels];
   1.267 +    Node *lessThan[kMaxSplitBranchLevels];
   1.268 +    int32_t ltLength=0;
   1.269 +    while(length>getMaxBranchLinearSubNodeLength()) {
   1.270 +        // Branch on the middle unit.
   1.271 +        // First, find the middle unit.
   1.272 +        int32_t i=skipElementsBySomeUnits(start, unitIndex, length/2);
   1.273 +        // Create the less-than branch.
   1.274 +        middleUnits[ltLength]=getElementUnit(i, unitIndex);  // middle unit
   1.275 +        lessThan[ltLength]=makeBranchSubNode(start, i, unitIndex, length/2, errorCode);
   1.276 +        ++ltLength;
   1.277 +        // Continue for the greater-or-equal branch.
   1.278 +        start=i;
   1.279 +        length=length-length/2;
   1.280 +    }
   1.281 +    if(U_FAILURE(errorCode)) {
   1.282 +        return NULL;
   1.283 +    }
   1.284 +    ListBranchNode *listNode=new ListBranchNode();
   1.285 +    if(listNode==NULL) {
   1.286 +        errorCode=U_MEMORY_ALLOCATION_ERROR;
   1.287 +        return NULL;
   1.288 +    }
   1.289 +    // For each unit, find its elements array start and whether it has a final value.
   1.290 +    int32_t unitNumber=0;
   1.291 +    do {
   1.292 +        int32_t i=start;
   1.293 +        UChar unit=getElementUnit(i++, unitIndex);
   1.294 +        i=indexOfElementWithNextUnit(i, unitIndex, unit);
   1.295 +        if(start==i-1 && unitIndex+1==getElementStringLength(start)) {
   1.296 +            listNode->add(unit, getElementValue(start));
   1.297 +        } else {
   1.298 +            listNode->add(unit, makeNode(start, i, unitIndex+1, errorCode));
   1.299 +        }
   1.300 +        start=i;
   1.301 +    } while(++unitNumber<length-1);
   1.302 +    // unitNumber==length-1, and the maxUnit elements range is [start..limit[
   1.303 +    UChar unit=getElementUnit(start, unitIndex);
   1.304 +    if(start==limit-1 && unitIndex+1==getElementStringLength(start)) {
   1.305 +        listNode->add(unit, getElementValue(start));
   1.306 +    } else {
   1.307 +        listNode->add(unit, makeNode(start, limit, unitIndex+1, errorCode));
   1.308 +    }
   1.309 +    Node *node=registerNode(listNode, errorCode);
   1.310 +    // Create the split-branch nodes.
   1.311 +    while(ltLength>0) {
   1.312 +        --ltLength;
   1.313 +        node=registerNode(
   1.314 +            new SplitBranchNode(middleUnits[ltLength], lessThan[ltLength], node), errorCode);
   1.315 +    }
   1.316 +    return node;
   1.317 +}
   1.318 +
   1.319 +StringTrieBuilder::Node *
   1.320 +StringTrieBuilder::registerNode(Node *newNode, UErrorCode &errorCode) {
   1.321 +    if(U_FAILURE(errorCode)) {
   1.322 +        delete newNode;
   1.323 +        return NULL;
   1.324 +    }
   1.325 +    if(newNode==NULL) {
   1.326 +        errorCode=U_MEMORY_ALLOCATION_ERROR;
   1.327 +        return NULL;
   1.328 +    }
   1.329 +    const UHashElement *old=uhash_find(nodes, newNode);
   1.330 +    if(old!=NULL) {
   1.331 +        delete newNode;
   1.332 +        return (Node *)old->key.pointer;
   1.333 +    }
   1.334 +    // If uhash_puti() returns a non-zero value from an equivalent, previously
   1.335 +    // registered node, then uhash_find() failed to find that and we will leak newNode.
   1.336 +#if U_DEBUG
   1.337 +    int32_t oldValue=  // Only in debug mode to avoid a compiler warning about unused oldValue.
   1.338 +#endif
   1.339 +    uhash_puti(nodes, newNode, 1, &errorCode);
   1.340 +    U_ASSERT(oldValue==0);
   1.341 +    if(U_FAILURE(errorCode)) {
   1.342 +        delete newNode;
   1.343 +        return NULL;
   1.344 +    }
   1.345 +    return newNode;
   1.346 +}
   1.347 +
   1.348 +StringTrieBuilder::Node *
   1.349 +StringTrieBuilder::registerFinalValue(int32_t value, UErrorCode &errorCode) {
   1.350 +    if(U_FAILURE(errorCode)) {
   1.351 +        return NULL;
   1.352 +    }
   1.353 +    FinalValueNode key(value);
   1.354 +    const UHashElement *old=uhash_find(nodes, &key);
   1.355 +    if(old!=NULL) {
   1.356 +        return (Node *)old->key.pointer;
   1.357 +    }
   1.358 +    Node *newNode=new FinalValueNode(value);
   1.359 +    if(newNode==NULL) {
   1.360 +        errorCode=U_MEMORY_ALLOCATION_ERROR;
   1.361 +        return NULL;
   1.362 +    }
   1.363 +    // If uhash_puti() returns a non-zero value from an equivalent, previously
   1.364 +    // registered node, then uhash_find() failed to find that and we will leak newNode.
   1.365 +#if U_DEBUG
   1.366 +    int32_t oldValue=  // Only in debug mode to avoid a compiler warning about unused oldValue.
   1.367 +#endif
   1.368 +    uhash_puti(nodes, newNode, 1, &errorCode);
   1.369 +    U_ASSERT(oldValue==0);
   1.370 +    if(U_FAILURE(errorCode)) {
   1.371 +        delete newNode;
   1.372 +        return NULL;
   1.373 +    }
   1.374 +    return newNode;
   1.375 +}
   1.376 +
   1.377 +UBool
   1.378 +StringTrieBuilder::hashNode(const void *node) {
   1.379 +    return ((const Node *)node)->hashCode();
   1.380 +}
   1.381 +
   1.382 +UBool
   1.383 +StringTrieBuilder::equalNodes(const void *left, const void *right) {
   1.384 +    return *(const Node *)left==*(const Node *)right;
   1.385 +}
   1.386 +
   1.387 +UBool
   1.388 +StringTrieBuilder::Node::operator==(const Node &other) const {
   1.389 +    return this==&other || (typeid(*this)==typeid(other) && hash==other.hash);
   1.390 +}
   1.391 +
   1.392 +int32_t
   1.393 +StringTrieBuilder::Node::markRightEdgesFirst(int32_t edgeNumber) {
   1.394 +    if(offset==0) {
   1.395 +        offset=edgeNumber;
   1.396 +    }
   1.397 +    return edgeNumber;
   1.398 +}
   1.399 +
   1.400 +UBool
   1.401 +StringTrieBuilder::FinalValueNode::operator==(const Node &other) const {
   1.402 +    if(this==&other) {
   1.403 +        return TRUE;
   1.404 +    }
   1.405 +    if(!Node::operator==(other)) {
   1.406 +        return FALSE;
   1.407 +    }
   1.408 +    const FinalValueNode &o=(const FinalValueNode &)other;
   1.409 +    return value==o.value;
   1.410 +}
   1.411 +
   1.412 +void
   1.413 +StringTrieBuilder::FinalValueNode::write(StringTrieBuilder &builder) {
   1.414 +    offset=builder.writeValueAndFinal(value, TRUE);
   1.415 +}
   1.416 +
   1.417 +UBool
   1.418 +StringTrieBuilder::ValueNode::operator==(const Node &other) const {
   1.419 +    if(this==&other) {
   1.420 +        return TRUE;
   1.421 +    }
   1.422 +    if(!Node::operator==(other)) {
   1.423 +        return FALSE;
   1.424 +    }
   1.425 +    const ValueNode &o=(const ValueNode &)other;
   1.426 +    return hasValue==o.hasValue && (!hasValue || value==o.value);
   1.427 +}
   1.428 +
   1.429 +UBool
   1.430 +StringTrieBuilder::IntermediateValueNode::operator==(const Node &other) const {
   1.431 +    if(this==&other) {
   1.432 +        return TRUE;
   1.433 +    }
   1.434 +    if(!ValueNode::operator==(other)) {
   1.435 +        return FALSE;
   1.436 +    }
   1.437 +    const IntermediateValueNode &o=(const IntermediateValueNode &)other;
   1.438 +    return next==o.next;
   1.439 +}
   1.440 +
   1.441 +int32_t
   1.442 +StringTrieBuilder::IntermediateValueNode::markRightEdgesFirst(int32_t edgeNumber) {
   1.443 +    if(offset==0) {
   1.444 +        offset=edgeNumber=next->markRightEdgesFirst(edgeNumber);
   1.445 +    }
   1.446 +    return edgeNumber;
   1.447 +}
   1.448 +
   1.449 +void
   1.450 +StringTrieBuilder::IntermediateValueNode::write(StringTrieBuilder &builder) {
   1.451 +    next->write(builder);
   1.452 +    offset=builder.writeValueAndFinal(value, FALSE);
   1.453 +}
   1.454 +
   1.455 +UBool
   1.456 +StringTrieBuilder::LinearMatchNode::operator==(const Node &other) const {
   1.457 +    if(this==&other) {
   1.458 +        return TRUE;
   1.459 +    }
   1.460 +    if(!ValueNode::operator==(other)) {
   1.461 +        return FALSE;
   1.462 +    }
   1.463 +    const LinearMatchNode &o=(const LinearMatchNode &)other;
   1.464 +    return length==o.length && next==o.next;
   1.465 +}
   1.466 +
   1.467 +int32_t
   1.468 +StringTrieBuilder::LinearMatchNode::markRightEdgesFirst(int32_t edgeNumber) {
   1.469 +    if(offset==0) {
   1.470 +        offset=edgeNumber=next->markRightEdgesFirst(edgeNumber);
   1.471 +    }
   1.472 +    return edgeNumber;
   1.473 +}
   1.474 +
   1.475 +UBool
   1.476 +StringTrieBuilder::ListBranchNode::operator==(const Node &other) const {
   1.477 +    if(this==&other) {
   1.478 +        return TRUE;
   1.479 +    }
   1.480 +    if(!Node::operator==(other)) {
   1.481 +        return FALSE;
   1.482 +    }
   1.483 +    const ListBranchNode &o=(const ListBranchNode &)other;
   1.484 +    for(int32_t i=0; i<length; ++i) {
   1.485 +        if(units[i]!=o.units[i] || values[i]!=o.values[i] || equal[i]!=o.equal[i]) {
   1.486 +            return FALSE;
   1.487 +        }
   1.488 +    }
   1.489 +    return TRUE;
   1.490 +}
   1.491 +
   1.492 +int32_t
   1.493 +StringTrieBuilder::ListBranchNode::markRightEdgesFirst(int32_t edgeNumber) {
   1.494 +    if(offset==0) {
   1.495 +        firstEdgeNumber=edgeNumber;
   1.496 +        int32_t step=0;
   1.497 +        int32_t i=length;
   1.498 +        do {
   1.499 +            Node *edge=equal[--i];
   1.500 +            if(edge!=NULL) {
   1.501 +                edgeNumber=edge->markRightEdgesFirst(edgeNumber-step);
   1.502 +            }
   1.503 +            // For all but the rightmost edge, decrement the edge number.
   1.504 +            step=1;
   1.505 +        } while(i>0);
   1.506 +        offset=edgeNumber;
   1.507 +    }
   1.508 +    return edgeNumber;
   1.509 +}
   1.510 +
   1.511 +void
   1.512 +StringTrieBuilder::ListBranchNode::write(StringTrieBuilder &builder) {
   1.513 +    // Write the sub-nodes in reverse order: The jump lengths are deltas from
   1.514 +    // after their own positions, so if we wrote the minUnit sub-node first,
   1.515 +    // then its jump delta would be larger.
   1.516 +    // Instead we write the minUnit sub-node last, for a shorter delta.
   1.517 +    int32_t unitNumber=length-1;
   1.518 +    Node *rightEdge=equal[unitNumber];
   1.519 +    int32_t rightEdgeNumber= rightEdge==NULL ? firstEdgeNumber : rightEdge->getOffset();
   1.520 +    do {
   1.521 +        --unitNumber;
   1.522 +        if(equal[unitNumber]!=NULL) {
   1.523 +            equal[unitNumber]->writeUnlessInsideRightEdge(firstEdgeNumber, rightEdgeNumber, builder);
   1.524 +        }
   1.525 +    } while(unitNumber>0);
   1.526 +    // The maxUnit sub-node is written as the very last one because we do
   1.527 +    // not jump for it at all.
   1.528 +    unitNumber=length-1;
   1.529 +    if(rightEdge==NULL) {
   1.530 +        builder.writeValueAndFinal(values[unitNumber], TRUE);
   1.531 +    } else {
   1.532 +        rightEdge->write(builder);
   1.533 +    }
   1.534 +    offset=builder.write(units[unitNumber]);
   1.535 +    // Write the rest of this node's unit-value pairs.
   1.536 +    while(--unitNumber>=0) {
   1.537 +        int32_t value;
   1.538 +        UBool isFinal;
   1.539 +        if(equal[unitNumber]==NULL) {
   1.540 +            // Write the final value for the one string ending with this unit.
   1.541 +            value=values[unitNumber];
   1.542 +            isFinal=TRUE;
   1.543 +        } else {
   1.544 +            // Write the delta to the start position of the sub-node.
   1.545 +            U_ASSERT(equal[unitNumber]->getOffset()>0);
   1.546 +            value=offset-equal[unitNumber]->getOffset();
   1.547 +            isFinal=FALSE;
   1.548 +        }
   1.549 +        builder.writeValueAndFinal(value, isFinal);
   1.550 +        offset=builder.write(units[unitNumber]);
   1.551 +    }
   1.552 +}
   1.553 +
   1.554 +UBool
   1.555 +StringTrieBuilder::SplitBranchNode::operator==(const Node &other) const {
   1.556 +    if(this==&other) {
   1.557 +        return TRUE;
   1.558 +    }
   1.559 +    if(!Node::operator==(other)) {
   1.560 +        return FALSE;
   1.561 +    }
   1.562 +    const SplitBranchNode &o=(const SplitBranchNode &)other;
   1.563 +    return unit==o.unit && lessThan==o.lessThan && greaterOrEqual==o.greaterOrEqual;
   1.564 +}
   1.565 +
   1.566 +int32_t
   1.567 +StringTrieBuilder::SplitBranchNode::markRightEdgesFirst(int32_t edgeNumber) {
   1.568 +    if(offset==0) {
   1.569 +        firstEdgeNumber=edgeNumber;
   1.570 +        edgeNumber=greaterOrEqual->markRightEdgesFirst(edgeNumber);
   1.571 +        offset=edgeNumber=lessThan->markRightEdgesFirst(edgeNumber-1);
   1.572 +    }
   1.573 +    return edgeNumber;
   1.574 +}
   1.575 +
   1.576 +void
   1.577 +StringTrieBuilder::SplitBranchNode::write(StringTrieBuilder &builder) {
   1.578 +    // Encode the less-than branch first.
   1.579 +    lessThan->writeUnlessInsideRightEdge(firstEdgeNumber, greaterOrEqual->getOffset(), builder);
   1.580 +    // Encode the greater-or-equal branch last because we do not jump for it at all.
   1.581 +    greaterOrEqual->write(builder);
   1.582 +    // Write this node.
   1.583 +    U_ASSERT(lessThan->getOffset()>0);
   1.584 +    builder.writeDeltaTo(lessThan->getOffset());  // less-than
   1.585 +    offset=builder.write(unit);
   1.586 +}
   1.587 +
   1.588 +UBool
   1.589 +StringTrieBuilder::BranchHeadNode::operator==(const Node &other) const {
   1.590 +    if(this==&other) {
   1.591 +        return TRUE;
   1.592 +    }
   1.593 +    if(!ValueNode::operator==(other)) {
   1.594 +        return FALSE;
   1.595 +    }
   1.596 +    const BranchHeadNode &o=(const BranchHeadNode &)other;
   1.597 +    return length==o.length && next==o.next;
   1.598 +}
   1.599 +
   1.600 +int32_t
   1.601 +StringTrieBuilder::BranchHeadNode::markRightEdgesFirst(int32_t edgeNumber) {
   1.602 +    if(offset==0) {
   1.603 +        offset=edgeNumber=next->markRightEdgesFirst(edgeNumber);
   1.604 +    }
   1.605 +    return edgeNumber;
   1.606 +}
   1.607 +
   1.608 +void
   1.609 +StringTrieBuilder::BranchHeadNode::write(StringTrieBuilder &builder) {
   1.610 +    next->write(builder);
   1.611 +    if(length<=builder.getMinLinearMatch()) {
   1.612 +        offset=builder.writeValueAndType(hasValue, value, length-1);
   1.613 +    } else {
   1.614 +        builder.write(length-1);
   1.615 +        offset=builder.writeValueAndType(hasValue, value, 0);
   1.616 +    }
   1.617 +}
   1.618 +
   1.619 +U_NAMESPACE_END

mercurial