intl/icu/source/common/ucharstrie.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /*
michael@0 2 *******************************************************************************
michael@0 3 * Copyright (C) 2010-2011, International Business Machines
michael@0 4 * Corporation and others. All Rights Reserved.
michael@0 5 *******************************************************************************
michael@0 6 * file name: ucharstrie.h
michael@0 7 * encoding: US-ASCII
michael@0 8 * tab size: 8 (not used)
michael@0 9 * indentation:4
michael@0 10 *
michael@0 11 * created on: 2010nov14
michael@0 12 * created by: Markus W. Scherer
michael@0 13 */
michael@0 14
michael@0 15 #include "unicode/utypes.h"
michael@0 16 #include "unicode/appendable.h"
michael@0 17 #include "unicode/ucharstrie.h"
michael@0 18 #include "unicode/uobject.h"
michael@0 19 #include "unicode/utf16.h"
michael@0 20 #include "cmemory.h"
michael@0 21 #include "uassert.h"
michael@0 22
michael@0 23 U_NAMESPACE_BEGIN
michael@0 24
michael@0 25 UCharsTrie::~UCharsTrie() {
michael@0 26 uprv_free(ownedArray_);
michael@0 27 }
michael@0 28
michael@0 29 UStringTrieResult
michael@0 30 UCharsTrie::current() const {
michael@0 31 const UChar *pos=pos_;
michael@0 32 if(pos==NULL) {
michael@0 33 return USTRINGTRIE_NO_MATCH;
michael@0 34 } else {
michael@0 35 int32_t node;
michael@0 36 return (remainingMatchLength_<0 && (node=*pos)>=kMinValueLead) ?
michael@0 37 valueResult(node) : USTRINGTRIE_NO_VALUE;
michael@0 38 }
michael@0 39 }
michael@0 40
michael@0 41 UStringTrieResult
michael@0 42 UCharsTrie::firstForCodePoint(UChar32 cp) {
michael@0 43 return cp<=0xffff ?
michael@0 44 first(cp) :
michael@0 45 (USTRINGTRIE_HAS_NEXT(first(U16_LEAD(cp))) ?
michael@0 46 next(U16_TRAIL(cp)) :
michael@0 47 USTRINGTRIE_NO_MATCH);
michael@0 48 }
michael@0 49
michael@0 50 UStringTrieResult
michael@0 51 UCharsTrie::nextForCodePoint(UChar32 cp) {
michael@0 52 return cp<=0xffff ?
michael@0 53 next(cp) :
michael@0 54 (USTRINGTRIE_HAS_NEXT(next(U16_LEAD(cp))) ?
michael@0 55 next(U16_TRAIL(cp)) :
michael@0 56 USTRINGTRIE_NO_MATCH);
michael@0 57 }
michael@0 58
michael@0 59 UStringTrieResult
michael@0 60 UCharsTrie::branchNext(const UChar *pos, int32_t length, int32_t uchar) {
michael@0 61 // Branch according to the current unit.
michael@0 62 if(length==0) {
michael@0 63 length=*pos++;
michael@0 64 }
michael@0 65 ++length;
michael@0 66 // The length of the branch is the number of units to select from.
michael@0 67 // The data structure encodes a binary search.
michael@0 68 while(length>kMaxBranchLinearSubNodeLength) {
michael@0 69 if(uchar<*pos++) {
michael@0 70 length>>=1;
michael@0 71 pos=jumpByDelta(pos);
michael@0 72 } else {
michael@0 73 length=length-(length>>1);
michael@0 74 pos=skipDelta(pos);
michael@0 75 }
michael@0 76 }
michael@0 77 // Drop down to linear search for the last few units.
michael@0 78 // length>=2 because the loop body above sees length>kMaxBranchLinearSubNodeLength>=3
michael@0 79 // and divides length by 2.
michael@0 80 do {
michael@0 81 if(uchar==*pos++) {
michael@0 82 UStringTrieResult result;
michael@0 83 int32_t node=*pos;
michael@0 84 if(node&kValueIsFinal) {
michael@0 85 // Leave the final value for getValue() to read.
michael@0 86 result=USTRINGTRIE_FINAL_VALUE;
michael@0 87 } else {
michael@0 88 // Use the non-final value as the jump delta.
michael@0 89 ++pos;
michael@0 90 // int32_t delta=readValue(pos, node);
michael@0 91 int32_t delta;
michael@0 92 if(node<kMinTwoUnitValueLead) {
michael@0 93 delta=node;
michael@0 94 } else if(node<kThreeUnitValueLead) {
michael@0 95 delta=((node-kMinTwoUnitValueLead)<<16)|*pos++;
michael@0 96 } else {
michael@0 97 delta=(pos[0]<<16)|pos[1];
michael@0 98 pos+=2;
michael@0 99 }
michael@0 100 // end readValue()
michael@0 101 pos+=delta;
michael@0 102 node=*pos;
michael@0 103 result= node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE;
michael@0 104 }
michael@0 105 pos_=pos;
michael@0 106 return result;
michael@0 107 }
michael@0 108 --length;
michael@0 109 pos=skipValue(pos);
michael@0 110 } while(length>1);
michael@0 111 if(uchar==*pos++) {
michael@0 112 pos_=pos;
michael@0 113 int32_t node=*pos;
michael@0 114 return node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE;
michael@0 115 } else {
michael@0 116 stop();
michael@0 117 return USTRINGTRIE_NO_MATCH;
michael@0 118 }
michael@0 119 }
michael@0 120
michael@0 121 UStringTrieResult
michael@0 122 UCharsTrie::nextImpl(const UChar *pos, int32_t uchar) {
michael@0 123 int32_t node=*pos++;
michael@0 124 for(;;) {
michael@0 125 if(node<kMinLinearMatch) {
michael@0 126 return branchNext(pos, node, uchar);
michael@0 127 } else if(node<kMinValueLead) {
michael@0 128 // Match the first of length+1 units.
michael@0 129 int32_t length=node-kMinLinearMatch; // Actual match length minus 1.
michael@0 130 if(uchar==*pos++) {
michael@0 131 remainingMatchLength_=--length;
michael@0 132 pos_=pos;
michael@0 133 return (length<0 && (node=*pos)>=kMinValueLead) ?
michael@0 134 valueResult(node) : USTRINGTRIE_NO_VALUE;
michael@0 135 } else {
michael@0 136 // No match.
michael@0 137 break;
michael@0 138 }
michael@0 139 } else if(node&kValueIsFinal) {
michael@0 140 // No further matching units.
michael@0 141 break;
michael@0 142 } else {
michael@0 143 // Skip intermediate value.
michael@0 144 pos=skipNodeValue(pos, node);
michael@0 145 node&=kNodeTypeMask;
michael@0 146 }
michael@0 147 }
michael@0 148 stop();
michael@0 149 return USTRINGTRIE_NO_MATCH;
michael@0 150 }
michael@0 151
michael@0 152 UStringTrieResult
michael@0 153 UCharsTrie::next(int32_t uchar) {
michael@0 154 const UChar *pos=pos_;
michael@0 155 if(pos==NULL) {
michael@0 156 return USTRINGTRIE_NO_MATCH;
michael@0 157 }
michael@0 158 int32_t length=remainingMatchLength_; // Actual remaining match length minus 1.
michael@0 159 if(length>=0) {
michael@0 160 // Remaining part of a linear-match node.
michael@0 161 if(uchar==*pos++) {
michael@0 162 remainingMatchLength_=--length;
michael@0 163 pos_=pos;
michael@0 164 int32_t node;
michael@0 165 return (length<0 && (node=*pos)>=kMinValueLead) ?
michael@0 166 valueResult(node) : USTRINGTRIE_NO_VALUE;
michael@0 167 } else {
michael@0 168 stop();
michael@0 169 return USTRINGTRIE_NO_MATCH;
michael@0 170 }
michael@0 171 }
michael@0 172 return nextImpl(pos, uchar);
michael@0 173 }
michael@0 174
michael@0 175 UStringTrieResult
michael@0 176 UCharsTrie::next(const UChar *s, int32_t sLength) {
michael@0 177 if(sLength<0 ? *s==0 : sLength==0) {
michael@0 178 // Empty input.
michael@0 179 return current();
michael@0 180 }
michael@0 181 const UChar *pos=pos_;
michael@0 182 if(pos==NULL) {
michael@0 183 return USTRINGTRIE_NO_MATCH;
michael@0 184 }
michael@0 185 int32_t length=remainingMatchLength_; // Actual remaining match length minus 1.
michael@0 186 for(;;) {
michael@0 187 // Fetch the next input unit, if there is one.
michael@0 188 // Continue a linear-match node without rechecking sLength<0.
michael@0 189 int32_t uchar;
michael@0 190 if(sLength<0) {
michael@0 191 for(;;) {
michael@0 192 if((uchar=*s++)==0) {
michael@0 193 remainingMatchLength_=length;
michael@0 194 pos_=pos;
michael@0 195 int32_t node;
michael@0 196 return (length<0 && (node=*pos)>=kMinValueLead) ?
michael@0 197 valueResult(node) : USTRINGTRIE_NO_VALUE;
michael@0 198 }
michael@0 199 if(length<0) {
michael@0 200 remainingMatchLength_=length;
michael@0 201 break;
michael@0 202 }
michael@0 203 if(uchar!=*pos) {
michael@0 204 stop();
michael@0 205 return USTRINGTRIE_NO_MATCH;
michael@0 206 }
michael@0 207 ++pos;
michael@0 208 --length;
michael@0 209 }
michael@0 210 } else {
michael@0 211 for(;;) {
michael@0 212 if(sLength==0) {
michael@0 213 remainingMatchLength_=length;
michael@0 214 pos_=pos;
michael@0 215 int32_t node;
michael@0 216 return (length<0 && (node=*pos)>=kMinValueLead) ?
michael@0 217 valueResult(node) : USTRINGTRIE_NO_VALUE;
michael@0 218 }
michael@0 219 uchar=*s++;
michael@0 220 --sLength;
michael@0 221 if(length<0) {
michael@0 222 remainingMatchLength_=length;
michael@0 223 break;
michael@0 224 }
michael@0 225 if(uchar!=*pos) {
michael@0 226 stop();
michael@0 227 return USTRINGTRIE_NO_MATCH;
michael@0 228 }
michael@0 229 ++pos;
michael@0 230 --length;
michael@0 231 }
michael@0 232 }
michael@0 233 int32_t node=*pos++;
michael@0 234 for(;;) {
michael@0 235 if(node<kMinLinearMatch) {
michael@0 236 UStringTrieResult result=branchNext(pos, node, uchar);
michael@0 237 if(result==USTRINGTRIE_NO_MATCH) {
michael@0 238 return USTRINGTRIE_NO_MATCH;
michael@0 239 }
michael@0 240 // Fetch the next input unit, if there is one.
michael@0 241 if(sLength<0) {
michael@0 242 if((uchar=*s++)==0) {
michael@0 243 return result;
michael@0 244 }
michael@0 245 } else {
michael@0 246 if(sLength==0) {
michael@0 247 return result;
michael@0 248 }
michael@0 249 uchar=*s++;
michael@0 250 --sLength;
michael@0 251 }
michael@0 252 if(result==USTRINGTRIE_FINAL_VALUE) {
michael@0 253 // No further matching units.
michael@0 254 stop();
michael@0 255 return USTRINGTRIE_NO_MATCH;
michael@0 256 }
michael@0 257 pos=pos_; // branchNext() advanced pos and wrote it to pos_ .
michael@0 258 node=*pos++;
michael@0 259 } else if(node<kMinValueLead) {
michael@0 260 // Match length+1 units.
michael@0 261 length=node-kMinLinearMatch; // Actual match length minus 1.
michael@0 262 if(uchar!=*pos) {
michael@0 263 stop();
michael@0 264 return USTRINGTRIE_NO_MATCH;
michael@0 265 }
michael@0 266 ++pos;
michael@0 267 --length;
michael@0 268 break;
michael@0 269 } else if(node&kValueIsFinal) {
michael@0 270 // No further matching units.
michael@0 271 stop();
michael@0 272 return USTRINGTRIE_NO_MATCH;
michael@0 273 } else {
michael@0 274 // Skip intermediate value.
michael@0 275 pos=skipNodeValue(pos, node);
michael@0 276 node&=kNodeTypeMask;
michael@0 277 }
michael@0 278 }
michael@0 279 }
michael@0 280 }
michael@0 281
michael@0 282 const UChar *
michael@0 283 UCharsTrie::findUniqueValueFromBranch(const UChar *pos, int32_t length,
michael@0 284 UBool haveUniqueValue, int32_t &uniqueValue) {
michael@0 285 while(length>kMaxBranchLinearSubNodeLength) {
michael@0 286 ++pos; // ignore the comparison unit
michael@0 287 if(NULL==findUniqueValueFromBranch(jumpByDelta(pos), length>>1, haveUniqueValue, uniqueValue)) {
michael@0 288 return NULL;
michael@0 289 }
michael@0 290 length=length-(length>>1);
michael@0 291 pos=skipDelta(pos);
michael@0 292 }
michael@0 293 do {
michael@0 294 ++pos; // ignore a comparison unit
michael@0 295 // handle its value
michael@0 296 int32_t node=*pos++;
michael@0 297 UBool isFinal=(UBool)(node>>15);
michael@0 298 node&=0x7fff;
michael@0 299 int32_t value=readValue(pos, node);
michael@0 300 pos=skipValue(pos, node);
michael@0 301 if(isFinal) {
michael@0 302 if(haveUniqueValue) {
michael@0 303 if(value!=uniqueValue) {
michael@0 304 return NULL;
michael@0 305 }
michael@0 306 } else {
michael@0 307 uniqueValue=value;
michael@0 308 haveUniqueValue=TRUE;
michael@0 309 }
michael@0 310 } else {
michael@0 311 if(!findUniqueValue(pos+value, haveUniqueValue, uniqueValue)) {
michael@0 312 return NULL;
michael@0 313 }
michael@0 314 haveUniqueValue=TRUE;
michael@0 315 }
michael@0 316 } while(--length>1);
michael@0 317 return pos+1; // ignore the last comparison unit
michael@0 318 }
michael@0 319
michael@0 320 UBool
michael@0 321 UCharsTrie::findUniqueValue(const UChar *pos, UBool haveUniqueValue, int32_t &uniqueValue) {
michael@0 322 int32_t node=*pos++;
michael@0 323 for(;;) {
michael@0 324 if(node<kMinLinearMatch) {
michael@0 325 if(node==0) {
michael@0 326 node=*pos++;
michael@0 327 }
michael@0 328 pos=findUniqueValueFromBranch(pos, node+1, haveUniqueValue, uniqueValue);
michael@0 329 if(pos==NULL) {
michael@0 330 return FALSE;
michael@0 331 }
michael@0 332 haveUniqueValue=TRUE;
michael@0 333 node=*pos++;
michael@0 334 } else if(node<kMinValueLead) {
michael@0 335 // linear-match node
michael@0 336 pos+=node-kMinLinearMatch+1; // Ignore the match units.
michael@0 337 node=*pos++;
michael@0 338 } else {
michael@0 339 UBool isFinal=(UBool)(node>>15);
michael@0 340 int32_t value;
michael@0 341 if(isFinal) {
michael@0 342 value=readValue(pos, node&0x7fff);
michael@0 343 } else {
michael@0 344 value=readNodeValue(pos, node);
michael@0 345 }
michael@0 346 if(haveUniqueValue) {
michael@0 347 if(value!=uniqueValue) {
michael@0 348 return FALSE;
michael@0 349 }
michael@0 350 } else {
michael@0 351 uniqueValue=value;
michael@0 352 haveUniqueValue=TRUE;
michael@0 353 }
michael@0 354 if(isFinal) {
michael@0 355 return TRUE;
michael@0 356 }
michael@0 357 pos=skipNodeValue(pos, node);
michael@0 358 node&=kNodeTypeMask;
michael@0 359 }
michael@0 360 }
michael@0 361 }
michael@0 362
michael@0 363 int32_t
michael@0 364 UCharsTrie::getNextUChars(Appendable &out) const {
michael@0 365 const UChar *pos=pos_;
michael@0 366 if(pos==NULL) {
michael@0 367 return 0;
michael@0 368 }
michael@0 369 if(remainingMatchLength_>=0) {
michael@0 370 out.appendCodeUnit(*pos); // Next unit of a pending linear-match node.
michael@0 371 return 1;
michael@0 372 }
michael@0 373 int32_t node=*pos++;
michael@0 374 if(node>=kMinValueLead) {
michael@0 375 if(node&kValueIsFinal) {
michael@0 376 return 0;
michael@0 377 } else {
michael@0 378 pos=skipNodeValue(pos, node);
michael@0 379 node&=kNodeTypeMask;
michael@0 380 }
michael@0 381 }
michael@0 382 if(node<kMinLinearMatch) {
michael@0 383 if(node==0) {
michael@0 384 node=*pos++;
michael@0 385 }
michael@0 386 out.reserveAppendCapacity(++node);
michael@0 387 getNextBranchUChars(pos, node, out);
michael@0 388 return node;
michael@0 389 } else {
michael@0 390 // First unit of the linear-match node.
michael@0 391 out.appendCodeUnit(*pos);
michael@0 392 return 1;
michael@0 393 }
michael@0 394 }
michael@0 395
michael@0 396 void
michael@0 397 UCharsTrie::getNextBranchUChars(const UChar *pos, int32_t length, Appendable &out) {
michael@0 398 while(length>kMaxBranchLinearSubNodeLength) {
michael@0 399 ++pos; // ignore the comparison unit
michael@0 400 getNextBranchUChars(jumpByDelta(pos), length>>1, out);
michael@0 401 length=length-(length>>1);
michael@0 402 pos=skipDelta(pos);
michael@0 403 }
michael@0 404 do {
michael@0 405 out.appendCodeUnit(*pos++);
michael@0 406 pos=skipValue(pos);
michael@0 407 } while(--length>1);
michael@0 408 out.appendCodeUnit(*pos);
michael@0 409 }
michael@0 410
michael@0 411 U_NAMESPACE_END

mercurial