intl/icu/source/common/uset.cpp

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

michael@0 1 /*
michael@0 2 *******************************************************************************
michael@0 3 *
michael@0 4 * Copyright (C) 2002-2011, International Business Machines
michael@0 5 * Corporation and others. All Rights Reserved.
michael@0 6 *
michael@0 7 *******************************************************************************
michael@0 8 * file name: uset.cpp
michael@0 9 * encoding: US-ASCII
michael@0 10 * tab size: 8 (not used)
michael@0 11 * indentation:4
michael@0 12 *
michael@0 13 * created on: 2002mar07
michael@0 14 * created by: Markus W. Scherer
michael@0 15 *
michael@0 16 * There are functions to efficiently serialize a USet into an array of uint16_t
michael@0 17 * and functions to use such a serialized form efficiently without
michael@0 18 * instantiating a new USet.
michael@0 19 */
michael@0 20
michael@0 21 #include "unicode/utypes.h"
michael@0 22 #include "unicode/uobject.h"
michael@0 23 #include "unicode/uset.h"
michael@0 24 #include "unicode/uniset.h"
michael@0 25 #include "cmemory.h"
michael@0 26 #include "unicode/ustring.h"
michael@0 27 #include "unicode/parsepos.h"
michael@0 28
michael@0 29 U_NAMESPACE_USE
michael@0 30
michael@0 31 U_CAPI USet* U_EXPORT2
michael@0 32 uset_openEmpty() {
michael@0 33 return (USet*) new UnicodeSet();
michael@0 34 }
michael@0 35
michael@0 36 U_CAPI USet* U_EXPORT2
michael@0 37 uset_open(UChar32 start, UChar32 end) {
michael@0 38 return (USet*) new UnicodeSet(start, end);
michael@0 39 }
michael@0 40
michael@0 41 U_CAPI void U_EXPORT2
michael@0 42 uset_close(USet* set) {
michael@0 43 delete (UnicodeSet*) set;
michael@0 44 }
michael@0 45
michael@0 46 U_CAPI USet * U_EXPORT2
michael@0 47 uset_clone(const USet *set) {
michael@0 48 return (USet*) (((UnicodeSet*) set)->UnicodeSet::clone());
michael@0 49 }
michael@0 50
michael@0 51 U_CAPI UBool U_EXPORT2
michael@0 52 uset_isFrozen(const USet *set) {
michael@0 53 return ((UnicodeSet*) set)->UnicodeSet::isFrozen();
michael@0 54 }
michael@0 55
michael@0 56 U_CAPI void U_EXPORT2
michael@0 57 uset_freeze(USet *set) {
michael@0 58 ((UnicodeSet*) set)->UnicodeSet::freeze();
michael@0 59 }
michael@0 60
michael@0 61 U_CAPI USet * U_EXPORT2
michael@0 62 uset_cloneAsThawed(const USet *set) {
michael@0 63 return (USet*) (((UnicodeSet*) set)->UnicodeSet::cloneAsThawed());
michael@0 64 }
michael@0 65
michael@0 66 U_CAPI void U_EXPORT2
michael@0 67 uset_set(USet* set,
michael@0 68 UChar32 start, UChar32 end) {
michael@0 69 ((UnicodeSet*) set)->UnicodeSet::set(start, end);
michael@0 70 }
michael@0 71
michael@0 72 U_CAPI void U_EXPORT2
michael@0 73 uset_addAll(USet* set, const USet *additionalSet) {
michael@0 74 ((UnicodeSet*) set)->UnicodeSet::addAll(*((const UnicodeSet*)additionalSet));
michael@0 75 }
michael@0 76
michael@0 77 U_CAPI void U_EXPORT2
michael@0 78 uset_add(USet* set, UChar32 c) {
michael@0 79 ((UnicodeSet*) set)->UnicodeSet::add(c);
michael@0 80 }
michael@0 81
michael@0 82 U_CAPI void U_EXPORT2
michael@0 83 uset_addRange(USet* set, UChar32 start, UChar32 end) {
michael@0 84 ((UnicodeSet*) set)->UnicodeSet::add(start, end);
michael@0 85 }
michael@0 86
michael@0 87 U_CAPI void U_EXPORT2
michael@0 88 uset_addString(USet* set, const UChar* str, int32_t strLen) {
michael@0 89 // UnicodeString handles -1 for strLen
michael@0 90 UnicodeString s(strLen<0, str, strLen);
michael@0 91 ((UnicodeSet*) set)->UnicodeSet::add(s);
michael@0 92 }
michael@0 93
michael@0 94 U_CAPI void U_EXPORT2
michael@0 95 uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen) {
michael@0 96 // UnicodeString handles -1 for strLen
michael@0 97 UnicodeString s(str, strLen);
michael@0 98 ((UnicodeSet*) set)->UnicodeSet::addAll(s);
michael@0 99 }
michael@0 100
michael@0 101 U_CAPI void U_EXPORT2
michael@0 102 uset_remove(USet* set, UChar32 c) {
michael@0 103 ((UnicodeSet*) set)->UnicodeSet::remove(c);
michael@0 104 }
michael@0 105
michael@0 106 U_CAPI void U_EXPORT2
michael@0 107 uset_removeRange(USet* set, UChar32 start, UChar32 end) {
michael@0 108 ((UnicodeSet*) set)->UnicodeSet::remove(start, end);
michael@0 109 }
michael@0 110
michael@0 111 U_CAPI void U_EXPORT2
michael@0 112 uset_removeString(USet* set, const UChar* str, int32_t strLen) {
michael@0 113 UnicodeString s(strLen==-1, str, strLen);
michael@0 114 ((UnicodeSet*) set)->UnicodeSet::remove(s);
michael@0 115 }
michael@0 116
michael@0 117 U_CAPI void U_EXPORT2
michael@0 118 uset_removeAll(USet* set, const USet* remove) {
michael@0 119 ((UnicodeSet*) set)->UnicodeSet::removeAll(*(const UnicodeSet*)remove);
michael@0 120 }
michael@0 121
michael@0 122 U_CAPI void U_EXPORT2
michael@0 123 uset_retain(USet* set, UChar32 start, UChar32 end) {
michael@0 124 ((UnicodeSet*) set)->UnicodeSet::retain(start, end);
michael@0 125 }
michael@0 126
michael@0 127 U_CAPI void U_EXPORT2
michael@0 128 uset_retainAll(USet* set, const USet* retain) {
michael@0 129 ((UnicodeSet*) set)->UnicodeSet::retainAll(*(const UnicodeSet*)retain);
michael@0 130 }
michael@0 131
michael@0 132 U_CAPI void U_EXPORT2
michael@0 133 uset_compact(USet* set) {
michael@0 134 ((UnicodeSet*) set)->UnicodeSet::compact();
michael@0 135 }
michael@0 136
michael@0 137 U_CAPI void U_EXPORT2
michael@0 138 uset_complement(USet* set) {
michael@0 139 ((UnicodeSet*) set)->UnicodeSet::complement();
michael@0 140 }
michael@0 141
michael@0 142 U_CAPI void U_EXPORT2
michael@0 143 uset_complementAll(USet* set, const USet* complement) {
michael@0 144 ((UnicodeSet*) set)->UnicodeSet::complementAll(*(const UnicodeSet*)complement);
michael@0 145 }
michael@0 146
michael@0 147 U_CAPI void U_EXPORT2
michael@0 148 uset_clear(USet* set) {
michael@0 149 ((UnicodeSet*) set)->UnicodeSet::clear();
michael@0 150 }
michael@0 151
michael@0 152 U_CAPI void U_EXPORT2
michael@0 153 uset_removeAllStrings(USet* set) {
michael@0 154 ((UnicodeSet*) set)->UnicodeSet::removeAllStrings();
michael@0 155 }
michael@0 156
michael@0 157 U_CAPI UBool U_EXPORT2
michael@0 158 uset_isEmpty(const USet* set) {
michael@0 159 return ((const UnicodeSet*) set)->UnicodeSet::isEmpty();
michael@0 160 }
michael@0 161
michael@0 162 U_CAPI UBool U_EXPORT2
michael@0 163 uset_contains(const USet* set, UChar32 c) {
michael@0 164 return ((const UnicodeSet*) set)->UnicodeSet::contains(c);
michael@0 165 }
michael@0 166
michael@0 167 U_CAPI UBool U_EXPORT2
michael@0 168 uset_containsRange(const USet* set, UChar32 start, UChar32 end) {
michael@0 169 return ((const UnicodeSet*) set)->UnicodeSet::contains(start, end);
michael@0 170 }
michael@0 171
michael@0 172 U_CAPI UBool U_EXPORT2
michael@0 173 uset_containsString(const USet* set, const UChar* str, int32_t strLen) {
michael@0 174 UnicodeString s(strLen==-1, str, strLen);
michael@0 175 return ((const UnicodeSet*) set)->UnicodeSet::contains(s);
michael@0 176 }
michael@0 177
michael@0 178 U_CAPI UBool U_EXPORT2
michael@0 179 uset_containsAll(const USet* set1, const USet* set2) {
michael@0 180 return ((const UnicodeSet*) set1)->UnicodeSet::containsAll(* (const UnicodeSet*) set2);
michael@0 181 }
michael@0 182
michael@0 183 U_CAPI UBool U_EXPORT2
michael@0 184 uset_containsAllCodePoints(const USet* set, const UChar *str, int32_t strLen) {
michael@0 185 // Create a string alias, since nothing is being added to the set.
michael@0 186 UnicodeString s(strLen==-1, str, strLen);
michael@0 187 return ((const UnicodeSet*) set)->UnicodeSet::containsAll(s);
michael@0 188 }
michael@0 189
michael@0 190 U_CAPI UBool U_EXPORT2
michael@0 191 uset_containsNone(const USet* set1, const USet* set2) {
michael@0 192 return ((const UnicodeSet*) set1)->UnicodeSet::containsNone(* (const UnicodeSet*) set2);
michael@0 193 }
michael@0 194
michael@0 195 U_CAPI UBool U_EXPORT2
michael@0 196 uset_containsSome(const USet* set1, const USet* set2) {
michael@0 197 return ((const UnicodeSet*) set1)->UnicodeSet::containsSome(* (const UnicodeSet*) set2);
michael@0 198 }
michael@0 199
michael@0 200 U_CAPI int32_t U_EXPORT2
michael@0 201 uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition) {
michael@0 202 return ((UnicodeSet*) set)->UnicodeSet::span(s, length, spanCondition);
michael@0 203 }
michael@0 204
michael@0 205 U_CAPI int32_t U_EXPORT2
michael@0 206 uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition) {
michael@0 207 return ((UnicodeSet*) set)->UnicodeSet::spanBack(s, length, spanCondition);
michael@0 208 }
michael@0 209
michael@0 210 U_CAPI int32_t U_EXPORT2
michael@0 211 uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition) {
michael@0 212 return ((UnicodeSet*) set)->UnicodeSet::spanUTF8(s, length, spanCondition);
michael@0 213 }
michael@0 214
michael@0 215 U_CAPI int32_t U_EXPORT2
michael@0 216 uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition) {
michael@0 217 return ((UnicodeSet*) set)->UnicodeSet::spanBackUTF8(s, length, spanCondition);
michael@0 218 }
michael@0 219
michael@0 220 U_CAPI UBool U_EXPORT2
michael@0 221 uset_equals(const USet* set1, const USet* set2) {
michael@0 222 return *(const UnicodeSet*)set1 == *(const UnicodeSet*)set2;
michael@0 223 }
michael@0 224
michael@0 225 U_CAPI int32_t U_EXPORT2
michael@0 226 uset_indexOf(const USet* set, UChar32 c) {
michael@0 227 return ((UnicodeSet*) set)->UnicodeSet::indexOf(c);
michael@0 228 }
michael@0 229
michael@0 230 U_CAPI UChar32 U_EXPORT2
michael@0 231 uset_charAt(const USet* set, int32_t index) {
michael@0 232 return ((UnicodeSet*) set)->UnicodeSet::charAt(index);
michael@0 233 }
michael@0 234
michael@0 235 U_CAPI int32_t U_EXPORT2
michael@0 236 uset_size(const USet* set) {
michael@0 237 return ((const UnicodeSet*) set)->UnicodeSet::size();
michael@0 238 }
michael@0 239
michael@0 240 U_NAMESPACE_BEGIN
michael@0 241 /**
michael@0 242 * This class only exists to provide access to the UnicodeSet private
michael@0 243 * USet support API. Declaring a class a friend is more portable than
michael@0 244 * trying to declare extern "C" functions as friends.
michael@0 245 */
michael@0 246 class USetAccess /* not : public UObject because all methods are static */ {
michael@0 247 public:
michael@0 248 /* Try to have the compiler inline these*/
michael@0 249 inline static int32_t getStringCount(const UnicodeSet& set) {
michael@0 250 return set.getStringCount();
michael@0 251 }
michael@0 252 inline static const UnicodeString* getString(const UnicodeSet& set,
michael@0 253 int32_t i) {
michael@0 254 return set.getString(i);
michael@0 255 }
michael@0 256 private:
michael@0 257 /* do not instantiate*/
michael@0 258 USetAccess();
michael@0 259 };
michael@0 260 U_NAMESPACE_END
michael@0 261
michael@0 262 U_CAPI int32_t U_EXPORT2
michael@0 263 uset_getItemCount(const USet* uset) {
michael@0 264 const UnicodeSet& set = *(const UnicodeSet*)uset;
michael@0 265 return set.getRangeCount() + USetAccess::getStringCount(set);
michael@0 266 }
michael@0 267
michael@0 268 U_CAPI int32_t U_EXPORT2
michael@0 269 uset_getItem(const USet* uset, int32_t itemIndex,
michael@0 270 UChar32* start, UChar32* end,
michael@0 271 UChar* str, int32_t strCapacity,
michael@0 272 UErrorCode* ec) {
michael@0 273 if (U_FAILURE(*ec)) return 0;
michael@0 274 const UnicodeSet& set = *(const UnicodeSet*)uset;
michael@0 275 int32_t rangeCount;
michael@0 276
michael@0 277 if (itemIndex < 0) {
michael@0 278 *ec = U_ILLEGAL_ARGUMENT_ERROR;
michael@0 279 return -1;
michael@0 280 } else if (itemIndex < (rangeCount = set.getRangeCount())) {
michael@0 281 *start = set.getRangeStart(itemIndex);
michael@0 282 *end = set.getRangeEnd(itemIndex);
michael@0 283 return 0;
michael@0 284 } else {
michael@0 285 itemIndex -= rangeCount;
michael@0 286 if (itemIndex < USetAccess::getStringCount(set)) {
michael@0 287 const UnicodeString* s = USetAccess::getString(set, itemIndex);
michael@0 288 return s->extract(str, strCapacity, *ec);
michael@0 289 } else {
michael@0 290 *ec = U_INDEX_OUTOFBOUNDS_ERROR;
michael@0 291 return -1;
michael@0 292 }
michael@0 293 }
michael@0 294 }
michael@0 295
michael@0 296 //U_CAPI int32_t U_EXPORT2
michael@0 297 //uset_getRangeCount(const USet* set) {
michael@0 298 // return ((const UnicodeSet*) set)->getRangeCount();
michael@0 299 //}
michael@0 300 //
michael@0 301 //U_CAPI UBool U_EXPORT2
michael@0 302 //uset_getRange(const USet* set, int32_t rangeIndex,
michael@0 303 // UChar32* pStart, UChar32* pEnd) {
michael@0 304 // if ((uint32_t) rangeIndex >= (uint32_t) uset_getRangeCount(set)) {
michael@0 305 // return FALSE;
michael@0 306 // }
michael@0 307 // const UnicodeSet* us = (const UnicodeSet*) set;
michael@0 308 // *pStart = us->getRangeStart(rangeIndex);
michael@0 309 // *pEnd = us->getRangeEnd(rangeIndex);
michael@0 310 // return TRUE;
michael@0 311 //}
michael@0 312
michael@0 313 /*
michael@0 314 * Serialize a USet into 16-bit units.
michael@0 315 * Store BMP code points as themselves with one 16-bit unit each.
michael@0 316 *
michael@0 317 * Important: the code points in the array are in ascending order,
michael@0 318 * therefore all BMP code points precede all supplementary code points.
michael@0 319 *
michael@0 320 * Store each supplementary code point in 2 16-bit units,
michael@0 321 * simply with higher-then-lower 16-bit halfs.
michael@0 322 *
michael@0 323 * Precede the entire list with the length.
michael@0 324 * If there are supplementary code points, then set bit 15 in the length
michael@0 325 * and add the bmpLength between it and the array.
michael@0 326 *
michael@0 327 * In other words:
michael@0 328 * - all BMP: (length=bmpLength) BMP, .., BMP
michael@0 329 * - some supplementary: (length|0x8000) (bmpLength<length) BMP, .., BMP, supp-high, supp-low, ..
michael@0 330 */
michael@0 331 U_CAPI int32_t U_EXPORT2
michael@0 332 uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* ec) {
michael@0 333 if (ec==NULL || U_FAILURE(*ec)) {
michael@0 334 return 0;
michael@0 335 }
michael@0 336
michael@0 337 return ((const UnicodeSet*) set)->UnicodeSet::serialize(dest, destCapacity,* ec);
michael@0 338 }
michael@0 339
michael@0 340 U_CAPI UBool U_EXPORT2
michael@0 341 uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength) {
michael@0 342 int32_t length;
michael@0 343
michael@0 344 if(fillSet==NULL) {
michael@0 345 return FALSE;
michael@0 346 }
michael@0 347 if(src==NULL || srcLength<=0) {
michael@0 348 fillSet->length=fillSet->bmpLength=0;
michael@0 349 return FALSE;
michael@0 350 }
michael@0 351
michael@0 352 length=*src++;
michael@0 353 if(length&0x8000) {
michael@0 354 /* there are supplementary values */
michael@0 355 length&=0x7fff;
michael@0 356 if(srcLength<(2+length)) {
michael@0 357 fillSet->length=fillSet->bmpLength=0;
michael@0 358 return FALSE;
michael@0 359 }
michael@0 360 fillSet->bmpLength=*src++;
michael@0 361 } else {
michael@0 362 /* only BMP values */
michael@0 363 if(srcLength<(1+length)) {
michael@0 364 fillSet->length=fillSet->bmpLength=0;
michael@0 365 return FALSE;
michael@0 366 }
michael@0 367 fillSet->bmpLength=length;
michael@0 368 }
michael@0 369 fillSet->array=src;
michael@0 370 fillSet->length=length;
michael@0 371 return TRUE;
michael@0 372 }
michael@0 373
michael@0 374 U_CAPI void U_EXPORT2
michael@0 375 uset_setSerializedToOne(USerializedSet* fillSet, UChar32 c) {
michael@0 376 if(fillSet==NULL || (uint32_t)c>0x10ffff) {
michael@0 377 return;
michael@0 378 }
michael@0 379
michael@0 380 fillSet->array=fillSet->staticArray;
michael@0 381 if(c<0xffff) {
michael@0 382 fillSet->bmpLength=fillSet->length=2;
michael@0 383 fillSet->staticArray[0]=(uint16_t)c;
michael@0 384 fillSet->staticArray[1]=(uint16_t)c+1;
michael@0 385 } else if(c==0xffff) {
michael@0 386 fillSet->bmpLength=1;
michael@0 387 fillSet->length=3;
michael@0 388 fillSet->staticArray[0]=0xffff;
michael@0 389 fillSet->staticArray[1]=1;
michael@0 390 fillSet->staticArray[2]=0;
michael@0 391 } else if(c<0x10ffff) {
michael@0 392 fillSet->bmpLength=0;
michael@0 393 fillSet->length=4;
michael@0 394 fillSet->staticArray[0]=(uint16_t)(c>>16);
michael@0 395 fillSet->staticArray[1]=(uint16_t)c;
michael@0 396 ++c;
michael@0 397 fillSet->staticArray[2]=(uint16_t)(c>>16);
michael@0 398 fillSet->staticArray[3]=(uint16_t)c;
michael@0 399 } else /* c==0x10ffff */ {
michael@0 400 fillSet->bmpLength=0;
michael@0 401 fillSet->length=2;
michael@0 402 fillSet->staticArray[0]=0x10;
michael@0 403 fillSet->staticArray[1]=0xffff;
michael@0 404 }
michael@0 405 }
michael@0 406
michael@0 407 U_CAPI UBool U_EXPORT2
michael@0 408 uset_serializedContains(const USerializedSet* set, UChar32 c) {
michael@0 409 const uint16_t* array;
michael@0 410
michael@0 411 if(set==NULL || (uint32_t)c>0x10ffff) {
michael@0 412 return FALSE;
michael@0 413 }
michael@0 414
michael@0 415 array=set->array;
michael@0 416 if(c<=0xffff) {
michael@0 417 /* find c in the BMP part */
michael@0 418 int32_t lo = 0;
michael@0 419 int32_t hi = set->bmpLength-1;
michael@0 420 if (c < array[0]) {
michael@0 421 hi = 0;
michael@0 422 } else if (c < array[hi]) {
michael@0 423 for(;;) {
michael@0 424 int32_t i = (lo + hi) >> 1;
michael@0 425 if (i == lo) {
michael@0 426 break; // Done!
michael@0 427 } else if (c < array[i]) {
michael@0 428 hi = i;
michael@0 429 } else {
michael@0 430 lo = i;
michael@0 431 }
michael@0 432 }
michael@0 433 } else {
michael@0 434 hi += 1;
michael@0 435 }
michael@0 436 return (UBool)(hi&1);
michael@0 437 } else {
michael@0 438 /* find c in the supplementary part */
michael@0 439 uint16_t high=(uint16_t)(c>>16), low=(uint16_t)c;
michael@0 440 int32_t base = set->bmpLength;
michael@0 441 int32_t lo = 0;
michael@0 442 int32_t hi = set->length - 2 - base;
michael@0 443 if (high < array[base] || (high==array[base] && low<array[base+1])) {
michael@0 444 hi = 0;
michael@0 445 } else if (high < array[base+hi] || (high==array[base+hi] && low<array[base+hi+1])) {
michael@0 446 for (;;) {
michael@0 447 int32_t i = ((lo + hi) >> 1) & ~1; // Guarantee even result
michael@0 448 int32_t iabs = i + base;
michael@0 449 if (i == lo) {
michael@0 450 break; // Done!
michael@0 451 } else if (high < array[iabs] || (high==array[iabs] && low<array[iabs+1])) {
michael@0 452 hi = i;
michael@0 453 } else {
michael@0 454 lo = i;
michael@0 455 }
michael@0 456 }
michael@0 457 } else {
michael@0 458 hi += 2;
michael@0 459 }
michael@0 460 /* count pairs of 16-bit units even per BMP and check if the number of pairs is odd */
michael@0 461 return (UBool)(((hi+(base<<1))&2)!=0);
michael@0 462 }
michael@0 463 }
michael@0 464
michael@0 465 U_CAPI int32_t U_EXPORT2
michael@0 466 uset_getSerializedRangeCount(const USerializedSet* set) {
michael@0 467 if(set==NULL) {
michael@0 468 return 0;
michael@0 469 }
michael@0 470
michael@0 471 return (set->bmpLength+(set->length-set->bmpLength)/2+1)/2;
michael@0 472 }
michael@0 473
michael@0 474 U_CAPI UBool U_EXPORT2
michael@0 475 uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
michael@0 476 UChar32* pStart, UChar32* pEnd) {
michael@0 477 const uint16_t* array;
michael@0 478 int32_t bmpLength, length;
michael@0 479
michael@0 480 if(set==NULL || rangeIndex<0 || pStart==NULL || pEnd==NULL) {
michael@0 481 return FALSE;
michael@0 482 }
michael@0 483
michael@0 484 array=set->array;
michael@0 485 length=set->length;
michael@0 486 bmpLength=set->bmpLength;
michael@0 487
michael@0 488 rangeIndex*=2; /* address start/limit pairs */
michael@0 489 if(rangeIndex<bmpLength) {
michael@0 490 *pStart=array[rangeIndex++];
michael@0 491 if(rangeIndex<bmpLength) {
michael@0 492 *pEnd=array[rangeIndex]-1;
michael@0 493 } else if(rangeIndex<length) {
michael@0 494 *pEnd=((((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1])-1;
michael@0 495 } else {
michael@0 496 *pEnd=0x10ffff;
michael@0 497 }
michael@0 498 return TRUE;
michael@0 499 } else {
michael@0 500 rangeIndex-=bmpLength;
michael@0 501 rangeIndex*=2; /* address pairs of pairs of units */
michael@0 502 length-=bmpLength;
michael@0 503 if(rangeIndex<length) {
michael@0 504 array+=bmpLength;
michael@0 505 *pStart=(((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1];
michael@0 506 rangeIndex+=2;
michael@0 507 if(rangeIndex<length) {
michael@0 508 *pEnd=((((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1])-1;
michael@0 509 } else {
michael@0 510 *pEnd=0x10ffff;
michael@0 511 }
michael@0 512 return TRUE;
michael@0 513 } else {
michael@0 514 return FALSE;
michael@0 515 }
michael@0 516 }
michael@0 517 }
michael@0 518
michael@0 519 // TODO The old, internal uset.c had an efficient uset_containsOne function.
michael@0 520 // Returned the one and only code point, or else -1 or something.
michael@0 521 // Consider adding such a function to both C and C++ UnicodeSet/uset.
michael@0 522 // See tools/gennorm/store.c for usage, now usetContainsOne there.
michael@0 523
michael@0 524 // TODO Investigate incorporating this code into UnicodeSet to improve
michael@0 525 // efficiency.
michael@0 526 // ---
michael@0 527 // #define USET_GROW_DELTA 20
michael@0 528 //
michael@0 529 // static int32_t
michael@0 530 // findChar(const UChar32* array, int32_t length, UChar32 c) {
michael@0 531 // int32_t i;
michael@0 532 //
michael@0 533 // /* check the last range limit first for more efficient appending */
michael@0 534 // if(length>0) {
michael@0 535 // if(c>=array[length-1]) {
michael@0 536 // return length;
michael@0 537 // }
michael@0 538 //
michael@0 539 // /* do not check the last range limit again in the loop below */
michael@0 540 // --length;
michael@0 541 // }
michael@0 542 //
michael@0 543 // for(i=0; i<length && c>=array[i]; ++i) {}
michael@0 544 // return i;
michael@0 545 // }
michael@0 546 //
michael@0 547 // static UBool
michael@0 548 // addRemove(USet* set, UChar32 c, int32_t doRemove) {
michael@0 549 // int32_t i, length, more;
michael@0 550 //
michael@0 551 // if(set==NULL || (uint32_t)c>0x10ffff) {
michael@0 552 // return FALSE;
michael@0 553 // }
michael@0 554 //
michael@0 555 // length=set->length;
michael@0 556 // i=findChar(set->array, length, c);
michael@0 557 // if((i&1)^doRemove) {
michael@0 558 // /* c is already in the set */
michael@0 559 // return TRUE;
michael@0 560 // }
michael@0 561 //
michael@0 562 // /* how many more array items do we need? */
michael@0 563 // if(i<length && (c+1)==set->array[i]) {
michael@0 564 // /* c is just before the following range, extend that in-place by one */
michael@0 565 // set->array[i]=c;
michael@0 566 // if(i>0) {
michael@0 567 // --i;
michael@0 568 // if(c==set->array[i]) {
michael@0 569 // /* the previous range collapsed, remove it */
michael@0 570 // set->length=length-=2;
michael@0 571 // if(i<length) {
michael@0 572 // uprv_memmove(set->array+i, set->array+i+2, (length-i)*4);
michael@0 573 // }
michael@0 574 // }
michael@0 575 // }
michael@0 576 // return TRUE;
michael@0 577 // } else if(i>0 && c==set->array[i-1]) {
michael@0 578 // /* c is just after the previous range, extend that in-place by one */
michael@0 579 // if(++c<=0x10ffff) {
michael@0 580 // set->array[i-1]=c;
michael@0 581 // if(i<length && c==set->array[i]) {
michael@0 582 // /* the following range collapsed, remove it */
michael@0 583 // --i;
michael@0 584 // set->length=length-=2;
michael@0 585 // if(i<length) {
michael@0 586 // uprv_memmove(set->array+i, set->array+i+2, (length-i)*4);
michael@0 587 // }
michael@0 588 // }
michael@0 589 // } else {
michael@0 590 // /* extend the previous range (had limit 0x10ffff) to the end of Unicode */
michael@0 591 // set->length=i-1;
michael@0 592 // }
michael@0 593 // return TRUE;
michael@0 594 // } else if(i==length && c==0x10ffff) {
michael@0 595 // /* insert one range limit c */
michael@0 596 // more=1;
michael@0 597 // } else {
michael@0 598 // /* insert two range limits c, c+1 */
michael@0 599 // more=2;
michael@0 600 // }
michael@0 601 //
michael@0 602 // /* insert <more> range limits */
michael@0 603 // if(length+more>set->capacity) {
michael@0 604 // /* reallocate */
michael@0 605 // int32_t newCapacity=set->capacity+set->capacity/2+USET_GROW_DELTA;
michael@0 606 // UChar32* newArray=(UChar32* )uprv_malloc(newCapacity*4);
michael@0 607 // if(newArray==NULL) {
michael@0 608 // return FALSE;
michael@0 609 // }
michael@0 610 // set->capacity=newCapacity;
michael@0 611 // uprv_memcpy(newArray, set->array, length*4);
michael@0 612 //
michael@0 613 // if(set->array!=set->staticBuffer) {
michael@0 614 // uprv_free(set->array);
michael@0 615 // }
michael@0 616 // set->array=newArray;
michael@0 617 // }
michael@0 618 //
michael@0 619 // if(i<length) {
michael@0 620 // uprv_memmove(set->array+i+more, set->array+i, (length-i)*4);
michael@0 621 // }
michael@0 622 // set->array[i]=c;
michael@0 623 // if(more==2) {
michael@0 624 // set->array[i+1]=c+1;
michael@0 625 // }
michael@0 626 // set->length+=more;
michael@0 627 //
michael@0 628 // return TRUE;
michael@0 629 // }
michael@0 630 //
michael@0 631 // U_CAPI UBool U_EXPORT2
michael@0 632 // uset_add(USet* set, UChar32 c) {
michael@0 633 // return addRemove(set, c, 0);
michael@0 634 // }
michael@0 635 //
michael@0 636 // U_CAPI void U_EXPORT2
michael@0 637 // uset_remove(USet* set, UChar32 c) {
michael@0 638 // addRemove(set, c, 1);
michael@0 639 // }

mercurial