intl/icu/source/common/bytestrieiterator.cpp

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rw-r--r--

Correct previous dual key logic pending first delivery installment.

michael@0 1 /*
michael@0 2 *******************************************************************************
michael@0 3 * Copyright (C) 2010-2012, International Business Machines
michael@0 4 * Corporation and others. All Rights Reserved.
michael@0 5 *******************************************************************************
michael@0 6 * file name: bytestrieiterator.cpp
michael@0 7 * encoding: US-ASCII
michael@0 8 * tab size: 8 (not used)
michael@0 9 * indentation:4
michael@0 10 *
michael@0 11 * created on: 2010nov03
michael@0 12 * created by: Markus W. Scherer
michael@0 13 */
michael@0 14
michael@0 15 #include "unicode/utypes.h"
michael@0 16 #include "unicode/bytestrie.h"
michael@0 17 #include "unicode/stringpiece.h"
michael@0 18 #include "charstr.h"
michael@0 19 #include "uvectr32.h"
michael@0 20
michael@0 21 U_NAMESPACE_BEGIN
michael@0 22
michael@0 23 BytesTrie::Iterator::Iterator(const void *trieBytes, int32_t maxStringLength,
michael@0 24 UErrorCode &errorCode)
michael@0 25 : bytes_(static_cast<const uint8_t *>(trieBytes)),
michael@0 26 pos_(bytes_), initialPos_(bytes_),
michael@0 27 remainingMatchLength_(-1), initialRemainingMatchLength_(-1),
michael@0 28 str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) {
michael@0 29 if(U_FAILURE(errorCode)) {
michael@0 30 return;
michael@0 31 }
michael@0 32 // str_ and stack_ are pointers so that it's easy to turn bytestrie.h into
michael@0 33 // a public API header for which we would want it to depend only on
michael@0 34 // other public headers.
michael@0 35 // Unlike BytesTrie itself, its Iterator performs memory allocations anyway
michael@0 36 // via the CharString and UVector32 implementations, so this additional
michael@0 37 // cost is minimal.
michael@0 38 str_=new CharString();
michael@0 39 stack_=new UVector32(errorCode);
michael@0 40 if(U_SUCCESS(errorCode) && (str_==NULL || stack_==NULL)) {
michael@0 41 errorCode=U_MEMORY_ALLOCATION_ERROR;
michael@0 42 }
michael@0 43 }
michael@0 44
michael@0 45 BytesTrie::Iterator::Iterator(const BytesTrie &trie, int32_t maxStringLength,
michael@0 46 UErrorCode &errorCode)
michael@0 47 : bytes_(trie.bytes_), pos_(trie.pos_), initialPos_(trie.pos_),
michael@0 48 remainingMatchLength_(trie.remainingMatchLength_),
michael@0 49 initialRemainingMatchLength_(trie.remainingMatchLength_),
michael@0 50 str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) {
michael@0 51 if(U_FAILURE(errorCode)) {
michael@0 52 return;
michael@0 53 }
michael@0 54 str_=new CharString();
michael@0 55 stack_=new UVector32(errorCode);
michael@0 56 if(U_FAILURE(errorCode)) {
michael@0 57 return;
michael@0 58 }
michael@0 59 if(str_==NULL || stack_==NULL) {
michael@0 60 errorCode=U_MEMORY_ALLOCATION_ERROR;
michael@0 61 return;
michael@0 62 }
michael@0 63 int32_t length=remainingMatchLength_; // Actual remaining match length minus 1.
michael@0 64 if(length>=0) {
michael@0 65 // Pending linear-match node, append remaining bytes to str_.
michael@0 66 ++length;
michael@0 67 if(maxLength_>0 && length>maxLength_) {
michael@0 68 length=maxLength_; // This will leave remainingMatchLength>=0 as a signal.
michael@0 69 }
michael@0 70 str_->append(reinterpret_cast<const char *>(pos_), length, errorCode);
michael@0 71 pos_+=length;
michael@0 72 remainingMatchLength_-=length;
michael@0 73 }
michael@0 74 }
michael@0 75
michael@0 76 BytesTrie::Iterator::~Iterator() {
michael@0 77 delete str_;
michael@0 78 delete stack_;
michael@0 79 }
michael@0 80
michael@0 81 BytesTrie::Iterator &
michael@0 82 BytesTrie::Iterator::reset() {
michael@0 83 pos_=initialPos_;
michael@0 84 remainingMatchLength_=initialRemainingMatchLength_;
michael@0 85 int32_t length=remainingMatchLength_+1; // Remaining match length.
michael@0 86 if(maxLength_>0 && length>maxLength_) {
michael@0 87 length=maxLength_;
michael@0 88 }
michael@0 89 str_->truncate(length);
michael@0 90 pos_+=length;
michael@0 91 remainingMatchLength_-=length;
michael@0 92 stack_->setSize(0);
michael@0 93 return *this;
michael@0 94 }
michael@0 95
michael@0 96 UBool
michael@0 97 BytesTrie::Iterator::hasNext() const { return pos_!=NULL || !stack_->isEmpty(); }
michael@0 98
michael@0 99 UBool
michael@0 100 BytesTrie::Iterator::next(UErrorCode &errorCode) {
michael@0 101 if(U_FAILURE(errorCode)) {
michael@0 102 return FALSE;
michael@0 103 }
michael@0 104 const uint8_t *pos=pos_;
michael@0 105 if(pos==NULL) {
michael@0 106 if(stack_->isEmpty()) {
michael@0 107 return FALSE;
michael@0 108 }
michael@0 109 // Pop the state off the stack and continue with the next outbound edge of
michael@0 110 // the branch node.
michael@0 111 int32_t stackSize=stack_->size();
michael@0 112 int32_t length=stack_->elementAti(stackSize-1);
michael@0 113 pos=bytes_+stack_->elementAti(stackSize-2);
michael@0 114 stack_->setSize(stackSize-2);
michael@0 115 str_->truncate(length&0xffff);
michael@0 116 length=(int32_t)((uint32_t)length>>16);
michael@0 117 if(length>1) {
michael@0 118 pos=branchNext(pos, length, errorCode);
michael@0 119 if(pos==NULL) {
michael@0 120 return TRUE; // Reached a final value.
michael@0 121 }
michael@0 122 } else {
michael@0 123 str_->append((char)*pos++, errorCode);
michael@0 124 }
michael@0 125 }
michael@0 126 if(remainingMatchLength_>=0) {
michael@0 127 // We only get here if we started in a pending linear-match node
michael@0 128 // with more than maxLength remaining bytes.
michael@0 129 return truncateAndStop();
michael@0 130 }
michael@0 131 for(;;) {
michael@0 132 int32_t node=*pos++;
michael@0 133 if(node>=kMinValueLead) {
michael@0 134 // Deliver value for the byte sequence so far.
michael@0 135 UBool isFinal=(UBool)(node&kValueIsFinal);
michael@0 136 value_=readValue(pos, node>>1);
michael@0 137 if(isFinal || (maxLength_>0 && str_->length()==maxLength_)) {
michael@0 138 pos_=NULL;
michael@0 139 } else {
michael@0 140 pos_=skipValue(pos, node);
michael@0 141 }
michael@0 142 sp_.set(str_->data(), str_->length());
michael@0 143 return TRUE;
michael@0 144 }
michael@0 145 if(maxLength_>0 && str_->length()==maxLength_) {
michael@0 146 return truncateAndStop();
michael@0 147 }
michael@0 148 if(node<kMinLinearMatch) {
michael@0 149 if(node==0) {
michael@0 150 node=*pos++;
michael@0 151 }
michael@0 152 pos=branchNext(pos, node+1, errorCode);
michael@0 153 if(pos==NULL) {
michael@0 154 return TRUE; // Reached a final value.
michael@0 155 }
michael@0 156 } else {
michael@0 157 // Linear-match node, append length bytes to str_.
michael@0 158 int32_t length=node-kMinLinearMatch+1;
michael@0 159 if(maxLength_>0 && str_->length()+length>maxLength_) {
michael@0 160 str_->append(reinterpret_cast<const char *>(pos),
michael@0 161 maxLength_-str_->length(), errorCode);
michael@0 162 return truncateAndStop();
michael@0 163 }
michael@0 164 str_->append(reinterpret_cast<const char *>(pos), length, errorCode);
michael@0 165 pos+=length;
michael@0 166 }
michael@0 167 }
michael@0 168 }
michael@0 169
michael@0 170 UBool
michael@0 171 BytesTrie::Iterator::truncateAndStop() {
michael@0 172 pos_=NULL;
michael@0 173 sp_.set(str_->data(), str_->length());
michael@0 174 value_=-1; // no real value for str
michael@0 175 return TRUE;
michael@0 176 }
michael@0 177
michael@0 178 // Branch node, needs to take the first outbound edge and push state for the rest.
michael@0 179 const uint8_t *
michael@0 180 BytesTrie::Iterator::branchNext(const uint8_t *pos, int32_t length, UErrorCode &errorCode) {
michael@0 181 while(length>kMaxBranchLinearSubNodeLength) {
michael@0 182 ++pos; // ignore the comparison byte
michael@0 183 // Push state for the greater-or-equal edge.
michael@0 184 stack_->addElement((int32_t)(skipDelta(pos)-bytes_), errorCode);
michael@0 185 stack_->addElement(((length-(length>>1))<<16)|str_->length(), errorCode);
michael@0 186 // Follow the less-than edge.
michael@0 187 length>>=1;
michael@0 188 pos=jumpByDelta(pos);
michael@0 189 }
michael@0 190 // List of key-value pairs where values are either final values or jump deltas.
michael@0 191 // Read the first (key, value) pair.
michael@0 192 uint8_t trieByte=*pos++;
michael@0 193 int32_t node=*pos++;
michael@0 194 UBool isFinal=(UBool)(node&kValueIsFinal);
michael@0 195 int32_t value=readValue(pos, node>>1);
michael@0 196 pos=skipValue(pos, node);
michael@0 197 stack_->addElement((int32_t)(pos-bytes_), errorCode);
michael@0 198 stack_->addElement(((length-1)<<16)|str_->length(), errorCode);
michael@0 199 str_->append((char)trieByte, errorCode);
michael@0 200 if(isFinal) {
michael@0 201 pos_=NULL;
michael@0 202 sp_.set(str_->data(), str_->length());
michael@0 203 value_=value;
michael@0 204 return NULL;
michael@0 205 } else {
michael@0 206 return pos+value;
michael@0 207 }
michael@0 208 }
michael@0 209
michael@0 210 U_NAMESPACE_END

mercurial