intl/icu/source/common/uset.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/common/uset.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,639 @@
     1.4 +/*
     1.5 +*******************************************************************************
     1.6 +*
     1.7 +*   Copyright (C) 2002-2011, International Business Machines
     1.8 +*   Corporation and others.  All Rights Reserved.
     1.9 +*
    1.10 +*******************************************************************************
    1.11 +*   file name:  uset.cpp
    1.12 +*   encoding:   US-ASCII
    1.13 +*   tab size:   8 (not used)
    1.14 +*   indentation:4
    1.15 +*
    1.16 +*   created on: 2002mar07
    1.17 +*   created by: Markus W. Scherer
    1.18 +*
    1.19 +*   There are functions to efficiently serialize a USet into an array of uint16_t
    1.20 +*   and functions to use such a serialized form efficiently without
    1.21 +*   instantiating a new USet.
    1.22 +*/
    1.23 +
    1.24 +#include "unicode/utypes.h"
    1.25 +#include "unicode/uobject.h"
    1.26 +#include "unicode/uset.h"
    1.27 +#include "unicode/uniset.h"
    1.28 +#include "cmemory.h"
    1.29 +#include "unicode/ustring.h"
    1.30 +#include "unicode/parsepos.h"
    1.31 +
    1.32 +U_NAMESPACE_USE
    1.33 +
    1.34 +U_CAPI USet* U_EXPORT2
    1.35 +uset_openEmpty() {
    1.36 +    return (USet*) new UnicodeSet();
    1.37 +}
    1.38 +
    1.39 +U_CAPI USet* U_EXPORT2
    1.40 +uset_open(UChar32 start, UChar32 end) {
    1.41 +    return (USet*) new UnicodeSet(start, end);
    1.42 +}
    1.43 +
    1.44 +U_CAPI void U_EXPORT2
    1.45 +uset_close(USet* set) {
    1.46 +    delete (UnicodeSet*) set;
    1.47 +}
    1.48 +
    1.49 +U_CAPI USet * U_EXPORT2
    1.50 +uset_clone(const USet *set) {
    1.51 +    return (USet*) (((UnicodeSet*) set)->UnicodeSet::clone());
    1.52 +}
    1.53 +
    1.54 +U_CAPI UBool U_EXPORT2
    1.55 +uset_isFrozen(const USet *set) {
    1.56 +    return ((UnicodeSet*) set)->UnicodeSet::isFrozen();
    1.57 +}
    1.58 +
    1.59 +U_CAPI void U_EXPORT2
    1.60 +uset_freeze(USet *set) {
    1.61 +    ((UnicodeSet*) set)->UnicodeSet::freeze();
    1.62 +}
    1.63 +
    1.64 +U_CAPI USet * U_EXPORT2
    1.65 +uset_cloneAsThawed(const USet *set) {
    1.66 +    return (USet*) (((UnicodeSet*) set)->UnicodeSet::cloneAsThawed());
    1.67 +}
    1.68 +
    1.69 +U_CAPI void U_EXPORT2
    1.70 +uset_set(USet* set,
    1.71 +     UChar32 start, UChar32 end) {
    1.72 +    ((UnicodeSet*) set)->UnicodeSet::set(start, end);
    1.73 +}
    1.74 +
    1.75 +U_CAPI void U_EXPORT2
    1.76 +uset_addAll(USet* set, const USet *additionalSet) {
    1.77 +    ((UnicodeSet*) set)->UnicodeSet::addAll(*((const UnicodeSet*)additionalSet));
    1.78 +}
    1.79 +
    1.80 +U_CAPI void U_EXPORT2
    1.81 +uset_add(USet* set, UChar32 c) {
    1.82 +    ((UnicodeSet*) set)->UnicodeSet::add(c);
    1.83 +}
    1.84 +
    1.85 +U_CAPI void U_EXPORT2
    1.86 +uset_addRange(USet* set, UChar32 start, UChar32 end) {
    1.87 +    ((UnicodeSet*) set)->UnicodeSet::add(start, end);    
    1.88 +}
    1.89 +
    1.90 +U_CAPI void U_EXPORT2
    1.91 +uset_addString(USet* set, const UChar* str, int32_t strLen) {
    1.92 +    // UnicodeString handles -1 for strLen
    1.93 +    UnicodeString s(strLen<0, str, strLen);
    1.94 +    ((UnicodeSet*) set)->UnicodeSet::add(s);
    1.95 +}
    1.96 +
    1.97 +U_CAPI void U_EXPORT2
    1.98 +uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen) {
    1.99 +    // UnicodeString handles -1 for strLen
   1.100 +    UnicodeString s(str, strLen);
   1.101 +    ((UnicodeSet*) set)->UnicodeSet::addAll(s);
   1.102 +}
   1.103 +
   1.104 +U_CAPI void U_EXPORT2
   1.105 +uset_remove(USet* set, UChar32 c) {
   1.106 +    ((UnicodeSet*) set)->UnicodeSet::remove(c);
   1.107 +}
   1.108 +
   1.109 +U_CAPI void U_EXPORT2
   1.110 +uset_removeRange(USet* set, UChar32 start, UChar32 end) {
   1.111 +    ((UnicodeSet*) set)->UnicodeSet::remove(start, end);
   1.112 +}
   1.113 +
   1.114 +U_CAPI void U_EXPORT2
   1.115 +uset_removeString(USet* set, const UChar* str, int32_t strLen) {
   1.116 +    UnicodeString s(strLen==-1, str, strLen);
   1.117 +    ((UnicodeSet*) set)->UnicodeSet::remove(s);
   1.118 +}
   1.119 +
   1.120 +U_CAPI void U_EXPORT2
   1.121 +uset_removeAll(USet* set, const USet* remove) {
   1.122 +    ((UnicodeSet*) set)->UnicodeSet::removeAll(*(const UnicodeSet*)remove);
   1.123 +}
   1.124 +
   1.125 +U_CAPI void U_EXPORT2
   1.126 +uset_retain(USet* set, UChar32 start, UChar32 end) {
   1.127 +    ((UnicodeSet*) set)->UnicodeSet::retain(start, end);
   1.128 +}
   1.129 +
   1.130 +U_CAPI void U_EXPORT2
   1.131 +uset_retainAll(USet* set, const USet* retain) {
   1.132 +    ((UnicodeSet*) set)->UnicodeSet::retainAll(*(const UnicodeSet*)retain);
   1.133 +}
   1.134 +
   1.135 +U_CAPI void U_EXPORT2
   1.136 +uset_compact(USet* set) {
   1.137 +    ((UnicodeSet*) set)->UnicodeSet::compact();
   1.138 +}
   1.139 +
   1.140 +U_CAPI void U_EXPORT2
   1.141 +uset_complement(USet* set) {
   1.142 +    ((UnicodeSet*) set)->UnicodeSet::complement();
   1.143 +}
   1.144 +
   1.145 +U_CAPI void U_EXPORT2
   1.146 +uset_complementAll(USet* set, const USet* complement) {
   1.147 +    ((UnicodeSet*) set)->UnicodeSet::complementAll(*(const UnicodeSet*)complement);
   1.148 +}
   1.149 +
   1.150 +U_CAPI void U_EXPORT2
   1.151 +uset_clear(USet* set) {
   1.152 +    ((UnicodeSet*) set)->UnicodeSet::clear();
   1.153 +}
   1.154 +
   1.155 +U_CAPI void U_EXPORT2
   1.156 +uset_removeAllStrings(USet* set) {
   1.157 +    ((UnicodeSet*) set)->UnicodeSet::removeAllStrings();
   1.158 +}
   1.159 +
   1.160 +U_CAPI UBool U_EXPORT2
   1.161 +uset_isEmpty(const USet* set) {
   1.162 +    return ((const UnicodeSet*) set)->UnicodeSet::isEmpty();
   1.163 +}
   1.164 +
   1.165 +U_CAPI UBool U_EXPORT2
   1.166 +uset_contains(const USet* set, UChar32 c) {
   1.167 +    return ((const UnicodeSet*) set)->UnicodeSet::contains(c);
   1.168 +}
   1.169 +
   1.170 +U_CAPI UBool U_EXPORT2
   1.171 +uset_containsRange(const USet* set, UChar32 start, UChar32 end) {
   1.172 +    return ((const UnicodeSet*) set)->UnicodeSet::contains(start, end);
   1.173 +}
   1.174 +
   1.175 +U_CAPI UBool U_EXPORT2
   1.176 +uset_containsString(const USet* set, const UChar* str, int32_t strLen) {
   1.177 +    UnicodeString s(strLen==-1, str, strLen);
   1.178 +    return ((const UnicodeSet*) set)->UnicodeSet::contains(s);
   1.179 +}
   1.180 +
   1.181 +U_CAPI UBool U_EXPORT2
   1.182 +uset_containsAll(const USet* set1, const USet* set2) {
   1.183 +    return ((const UnicodeSet*) set1)->UnicodeSet::containsAll(* (const UnicodeSet*) set2);
   1.184 +}
   1.185 +
   1.186 +U_CAPI UBool U_EXPORT2
   1.187 +uset_containsAllCodePoints(const USet* set, const UChar *str, int32_t strLen) {
   1.188 +    // Create a string alias, since nothing is being added to the set.
   1.189 +    UnicodeString s(strLen==-1, str, strLen);
   1.190 +    return ((const UnicodeSet*) set)->UnicodeSet::containsAll(s);
   1.191 +}
   1.192 +
   1.193 +U_CAPI UBool U_EXPORT2
   1.194 +uset_containsNone(const USet* set1, const USet* set2) {
   1.195 +    return ((const UnicodeSet*) set1)->UnicodeSet::containsNone(* (const UnicodeSet*) set2);
   1.196 +}
   1.197 +
   1.198 +U_CAPI UBool U_EXPORT2
   1.199 +uset_containsSome(const USet* set1, const USet* set2) {
   1.200 +    return ((const UnicodeSet*) set1)->UnicodeSet::containsSome(* (const UnicodeSet*) set2);
   1.201 +}
   1.202 +
   1.203 +U_CAPI int32_t U_EXPORT2
   1.204 +uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition) {
   1.205 +    return ((UnicodeSet*) set)->UnicodeSet::span(s, length, spanCondition);
   1.206 +}
   1.207 +
   1.208 +U_CAPI int32_t U_EXPORT2
   1.209 +uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition) {
   1.210 +    return ((UnicodeSet*) set)->UnicodeSet::spanBack(s, length, spanCondition);
   1.211 +}
   1.212 +
   1.213 +U_CAPI int32_t U_EXPORT2
   1.214 +uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition) {
   1.215 +    return ((UnicodeSet*) set)->UnicodeSet::spanUTF8(s, length, spanCondition);
   1.216 +}
   1.217 +
   1.218 +U_CAPI int32_t U_EXPORT2
   1.219 +uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition) {
   1.220 +    return ((UnicodeSet*) set)->UnicodeSet::spanBackUTF8(s, length, spanCondition);
   1.221 +}
   1.222 +
   1.223 +U_CAPI UBool U_EXPORT2
   1.224 +uset_equals(const USet* set1, const USet* set2) {
   1.225 +    return *(const UnicodeSet*)set1 == *(const UnicodeSet*)set2;
   1.226 +}
   1.227 +
   1.228 +U_CAPI int32_t U_EXPORT2
   1.229 +uset_indexOf(const USet* set, UChar32 c) {
   1.230 +    return ((UnicodeSet*) set)->UnicodeSet::indexOf(c);
   1.231 +}
   1.232 +
   1.233 +U_CAPI UChar32 U_EXPORT2
   1.234 +uset_charAt(const USet* set, int32_t index) {
   1.235 +    return ((UnicodeSet*) set)->UnicodeSet::charAt(index);
   1.236 +}
   1.237 +
   1.238 +U_CAPI int32_t U_EXPORT2
   1.239 +uset_size(const USet* set) {
   1.240 +    return ((const UnicodeSet*) set)->UnicodeSet::size();
   1.241 +}
   1.242 +
   1.243 +U_NAMESPACE_BEGIN
   1.244 +/**
   1.245 + * This class only exists to provide access to the UnicodeSet private
   1.246 + * USet support API.  Declaring a class a friend is more portable than
   1.247 + * trying to declare extern "C" functions as friends.
   1.248 + */
   1.249 +class USetAccess /* not : public UObject because all methods are static */ {
   1.250 +public:
   1.251 +    /* Try to have the compiler inline these*/
   1.252 +    inline static int32_t getStringCount(const UnicodeSet& set) {
   1.253 +        return set.getStringCount();
   1.254 +    }
   1.255 +    inline static const UnicodeString* getString(const UnicodeSet& set,
   1.256 +                                                 int32_t i) {
   1.257 +        return set.getString(i);
   1.258 +    }
   1.259 +private:
   1.260 +    /* do not instantiate*/
   1.261 +    USetAccess();
   1.262 +};
   1.263 +U_NAMESPACE_END
   1.264 +
   1.265 +U_CAPI int32_t U_EXPORT2
   1.266 +uset_getItemCount(const USet* uset) {
   1.267 +    const UnicodeSet& set = *(const UnicodeSet*)uset;
   1.268 +    return set.getRangeCount() + USetAccess::getStringCount(set);
   1.269 +}
   1.270 +
   1.271 +U_CAPI int32_t U_EXPORT2
   1.272 +uset_getItem(const USet* uset, int32_t itemIndex,
   1.273 +             UChar32* start, UChar32* end,
   1.274 +             UChar* str, int32_t strCapacity,
   1.275 +             UErrorCode* ec) {
   1.276 +    if (U_FAILURE(*ec)) return 0;
   1.277 +    const UnicodeSet& set = *(const UnicodeSet*)uset;
   1.278 +    int32_t rangeCount;
   1.279 +
   1.280 +    if (itemIndex < 0) {
   1.281 +        *ec = U_ILLEGAL_ARGUMENT_ERROR;
   1.282 +        return -1;
   1.283 +    } else if (itemIndex < (rangeCount = set.getRangeCount())) {
   1.284 +        *start = set.getRangeStart(itemIndex);
   1.285 +        *end = set.getRangeEnd(itemIndex);
   1.286 +        return 0;
   1.287 +    } else {
   1.288 +        itemIndex -= rangeCount;
   1.289 +        if (itemIndex < USetAccess::getStringCount(set)) {
   1.290 +            const UnicodeString* s = USetAccess::getString(set, itemIndex);
   1.291 +            return s->extract(str, strCapacity, *ec);
   1.292 +        } else {
   1.293 +            *ec = U_INDEX_OUTOFBOUNDS_ERROR;
   1.294 +            return -1;
   1.295 +        }
   1.296 +    }
   1.297 +}
   1.298 +
   1.299 +//U_CAPI int32_t U_EXPORT2
   1.300 +//uset_getRangeCount(const USet* set) {
   1.301 +//    return ((const UnicodeSet*) set)->getRangeCount();
   1.302 +//}
   1.303 +//
   1.304 +//U_CAPI UBool U_EXPORT2
   1.305 +//uset_getRange(const USet* set, int32_t rangeIndex,
   1.306 +//              UChar32* pStart, UChar32* pEnd) {
   1.307 +//    if ((uint32_t) rangeIndex >= (uint32_t) uset_getRangeCount(set)) {
   1.308 +//        return FALSE;
   1.309 +//    }
   1.310 +//    const UnicodeSet* us = (const UnicodeSet*) set;
   1.311 +//    *pStart = us->getRangeStart(rangeIndex);
   1.312 +//    *pEnd = us->getRangeEnd(rangeIndex);
   1.313 +//    return TRUE;
   1.314 +//}
   1.315 +
   1.316 +/*
   1.317 + * Serialize a USet into 16-bit units.
   1.318 + * Store BMP code points as themselves with one 16-bit unit each.
   1.319 + *
   1.320 + * Important: the code points in the array are in ascending order,
   1.321 + * therefore all BMP code points precede all supplementary code points.
   1.322 + *
   1.323 + * Store each supplementary code point in 2 16-bit units,
   1.324 + * simply with higher-then-lower 16-bit halfs.
   1.325 + *
   1.326 + * Precede the entire list with the length.
   1.327 + * If there are supplementary code points, then set bit 15 in the length
   1.328 + * and add the bmpLength between it and the array.
   1.329 + *
   1.330 + * In other words:
   1.331 + * - all BMP:            (length=bmpLength) BMP, .., BMP
   1.332 + * - some supplementary: (length|0x8000) (bmpLength<length) BMP, .., BMP, supp-high, supp-low, ..
   1.333 + */
   1.334 +U_CAPI int32_t U_EXPORT2
   1.335 +uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* ec) {
   1.336 +    if (ec==NULL || U_FAILURE(*ec)) {
   1.337 +        return 0;
   1.338 +    }
   1.339 +
   1.340 +    return ((const UnicodeSet*) set)->UnicodeSet::serialize(dest, destCapacity,* ec);
   1.341 +}
   1.342 +
   1.343 +U_CAPI UBool U_EXPORT2
   1.344 +uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength) {
   1.345 +    int32_t length;
   1.346 +
   1.347 +    if(fillSet==NULL) {
   1.348 +        return FALSE;
   1.349 +    }
   1.350 +    if(src==NULL || srcLength<=0) {
   1.351 +        fillSet->length=fillSet->bmpLength=0;
   1.352 +        return FALSE;
   1.353 +    }
   1.354 +
   1.355 +    length=*src++;
   1.356 +    if(length&0x8000) {
   1.357 +        /* there are supplementary values */
   1.358 +        length&=0x7fff;
   1.359 +        if(srcLength<(2+length)) {
   1.360 +            fillSet->length=fillSet->bmpLength=0;
   1.361 +            return FALSE;
   1.362 +        }
   1.363 +        fillSet->bmpLength=*src++;
   1.364 +    } else {
   1.365 +        /* only BMP values */
   1.366 +        if(srcLength<(1+length)) {
   1.367 +            fillSet->length=fillSet->bmpLength=0;
   1.368 +            return FALSE;
   1.369 +        }
   1.370 +        fillSet->bmpLength=length;
   1.371 +    }
   1.372 +    fillSet->array=src;
   1.373 +    fillSet->length=length;
   1.374 +    return TRUE;
   1.375 +}
   1.376 +
   1.377 +U_CAPI void U_EXPORT2
   1.378 +uset_setSerializedToOne(USerializedSet* fillSet, UChar32 c) {
   1.379 +    if(fillSet==NULL || (uint32_t)c>0x10ffff) {
   1.380 +        return;
   1.381 +    }
   1.382 +
   1.383 +    fillSet->array=fillSet->staticArray;
   1.384 +    if(c<0xffff) {
   1.385 +        fillSet->bmpLength=fillSet->length=2;
   1.386 +        fillSet->staticArray[0]=(uint16_t)c;
   1.387 +        fillSet->staticArray[1]=(uint16_t)c+1;
   1.388 +    } else if(c==0xffff) {
   1.389 +        fillSet->bmpLength=1;
   1.390 +        fillSet->length=3;
   1.391 +        fillSet->staticArray[0]=0xffff;
   1.392 +        fillSet->staticArray[1]=1;
   1.393 +        fillSet->staticArray[2]=0;
   1.394 +    } else if(c<0x10ffff) {
   1.395 +        fillSet->bmpLength=0;
   1.396 +        fillSet->length=4;
   1.397 +        fillSet->staticArray[0]=(uint16_t)(c>>16);
   1.398 +        fillSet->staticArray[1]=(uint16_t)c;
   1.399 +        ++c;
   1.400 +        fillSet->staticArray[2]=(uint16_t)(c>>16);
   1.401 +        fillSet->staticArray[3]=(uint16_t)c;
   1.402 +    } else /* c==0x10ffff */ {
   1.403 +        fillSet->bmpLength=0;
   1.404 +        fillSet->length=2;
   1.405 +        fillSet->staticArray[0]=0x10;
   1.406 +        fillSet->staticArray[1]=0xffff;
   1.407 +    }
   1.408 +}
   1.409 +
   1.410 +U_CAPI UBool U_EXPORT2
   1.411 +uset_serializedContains(const USerializedSet* set, UChar32 c) {
   1.412 +    const uint16_t* array;
   1.413 +
   1.414 +    if(set==NULL || (uint32_t)c>0x10ffff) {
   1.415 +        return FALSE;
   1.416 +    }
   1.417 +
   1.418 +    array=set->array;
   1.419 +    if(c<=0xffff) {
   1.420 +        /* find c in the BMP part */
   1.421 +        int32_t lo = 0;
   1.422 +        int32_t hi = set->bmpLength-1;
   1.423 +        if (c < array[0]) {
   1.424 +            hi = 0;
   1.425 +        } else if (c < array[hi]) {
   1.426 +            for(;;) {
   1.427 +                int32_t i = (lo + hi) >> 1;
   1.428 +                if (i == lo) {
   1.429 +                    break;  // Done!
   1.430 +                } else if (c < array[i]) {
   1.431 +                    hi = i;
   1.432 +                } else {
   1.433 +                    lo = i;
   1.434 +                }
   1.435 +            }
   1.436 +        } else {
   1.437 +            hi += 1;
   1.438 +        }
   1.439 +        return (UBool)(hi&1);
   1.440 +    } else {
   1.441 +        /* find c in the supplementary part */
   1.442 +        uint16_t high=(uint16_t)(c>>16), low=(uint16_t)c;
   1.443 +        int32_t base = set->bmpLength;
   1.444 +        int32_t lo = 0;
   1.445 +        int32_t hi = set->length - 2 - base;
   1.446 +        if (high < array[base] || (high==array[base] && low<array[base+1])) {
   1.447 +            hi = 0;
   1.448 +        } else if (high < array[base+hi] || (high==array[base+hi] && low<array[base+hi+1])) {
   1.449 +            for (;;) {
   1.450 +                int32_t i = ((lo + hi) >> 1) & ~1;  // Guarantee even result
   1.451 +                int32_t iabs = i + base;
   1.452 +                if (i == lo) {
   1.453 +                    break;  // Done!
   1.454 +                } else if (high < array[iabs] || (high==array[iabs] && low<array[iabs+1])) {
   1.455 +                    hi = i;
   1.456 +                } else {
   1.457 +                    lo = i;
   1.458 +                }
   1.459 +            }
   1.460 +        } else {
   1.461 +            hi += 2;
   1.462 +        }
   1.463 +        /* count pairs of 16-bit units even per BMP and check if the number of pairs is odd */
   1.464 +        return (UBool)(((hi+(base<<1))&2)!=0);
   1.465 +    }
   1.466 +}
   1.467 +
   1.468 +U_CAPI int32_t U_EXPORT2
   1.469 +uset_getSerializedRangeCount(const USerializedSet* set) {
   1.470 +    if(set==NULL) {
   1.471 +        return 0;
   1.472 +    }
   1.473 +
   1.474 +    return (set->bmpLength+(set->length-set->bmpLength)/2+1)/2;
   1.475 +}
   1.476 +
   1.477 +U_CAPI UBool U_EXPORT2
   1.478 +uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
   1.479 +                        UChar32* pStart, UChar32* pEnd) {
   1.480 +    const uint16_t* array;
   1.481 +    int32_t bmpLength, length;
   1.482 +
   1.483 +    if(set==NULL || rangeIndex<0 || pStart==NULL || pEnd==NULL) {
   1.484 +        return FALSE;
   1.485 +    }
   1.486 +
   1.487 +    array=set->array;
   1.488 +    length=set->length;
   1.489 +    bmpLength=set->bmpLength;
   1.490 +
   1.491 +    rangeIndex*=2; /* address start/limit pairs */
   1.492 +    if(rangeIndex<bmpLength) {
   1.493 +        *pStart=array[rangeIndex++];
   1.494 +        if(rangeIndex<bmpLength) {
   1.495 +            *pEnd=array[rangeIndex]-1;
   1.496 +        } else if(rangeIndex<length) {
   1.497 +            *pEnd=((((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1])-1;
   1.498 +        } else {
   1.499 +            *pEnd=0x10ffff;
   1.500 +        }
   1.501 +        return TRUE;
   1.502 +    } else {
   1.503 +        rangeIndex-=bmpLength;
   1.504 +        rangeIndex*=2; /* address pairs of pairs of units */
   1.505 +        length-=bmpLength;
   1.506 +        if(rangeIndex<length) {
   1.507 +            array+=bmpLength;
   1.508 +            *pStart=(((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1];
   1.509 +            rangeIndex+=2;
   1.510 +            if(rangeIndex<length) {
   1.511 +                *pEnd=((((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1])-1;
   1.512 +            } else {
   1.513 +                *pEnd=0x10ffff;
   1.514 +            }
   1.515 +            return TRUE;
   1.516 +        } else {
   1.517 +            return FALSE;
   1.518 +        }
   1.519 +    }
   1.520 +}
   1.521 +
   1.522 +// TODO The old, internal uset.c had an efficient uset_containsOne function.
   1.523 +// Returned the one and only code point, or else -1 or something.
   1.524 +// Consider adding such a function to both C and C++ UnicodeSet/uset.
   1.525 +// See tools/gennorm/store.c for usage, now usetContainsOne there.
   1.526 +
   1.527 +// TODO Investigate incorporating this code into UnicodeSet to improve
   1.528 +// efficiency.
   1.529 +// ---
   1.530 +// #define USET_GROW_DELTA 20
   1.531 +// 
   1.532 +// static int32_t
   1.533 +// findChar(const UChar32* array, int32_t length, UChar32 c) {
   1.534 +//     int32_t i;
   1.535 +// 
   1.536 +//     /* check the last range limit first for more efficient appending */
   1.537 +//     if(length>0) {
   1.538 +//         if(c>=array[length-1]) {
   1.539 +//             return length;
   1.540 +//         }
   1.541 +// 
   1.542 +//         /* do not check the last range limit again in the loop below */
   1.543 +//         --length;
   1.544 +//     }
   1.545 +// 
   1.546 +//     for(i=0; i<length && c>=array[i]; ++i) {}
   1.547 +//     return i;
   1.548 +// }
   1.549 +// 
   1.550 +// static UBool
   1.551 +// addRemove(USet* set, UChar32 c, int32_t doRemove) {
   1.552 +//     int32_t i, length, more;
   1.553 +// 
   1.554 +//     if(set==NULL || (uint32_t)c>0x10ffff) {
   1.555 +//         return FALSE;
   1.556 +//     }
   1.557 +// 
   1.558 +//     length=set->length;
   1.559 +//     i=findChar(set->array, length, c);
   1.560 +//     if((i&1)^doRemove) {
   1.561 +//         /* c is already in the set */
   1.562 +//         return TRUE;
   1.563 +//     }
   1.564 +// 
   1.565 +//     /* how many more array items do we need? */
   1.566 +//     if(i<length && (c+1)==set->array[i]) {
   1.567 +//         /* c is just before the following range, extend that in-place by one */
   1.568 +//         set->array[i]=c;
   1.569 +//         if(i>0) {
   1.570 +//             --i;
   1.571 +//             if(c==set->array[i]) {
   1.572 +//                 /* the previous range collapsed, remove it */
   1.573 +//                 set->length=length-=2;
   1.574 +//                 if(i<length) {
   1.575 +//                     uprv_memmove(set->array+i, set->array+i+2, (length-i)*4);
   1.576 +//                 }
   1.577 +//             }
   1.578 +//         }
   1.579 +//         return TRUE;
   1.580 +//     } else if(i>0 && c==set->array[i-1]) {
   1.581 +//         /* c is just after the previous range, extend that in-place by one */
   1.582 +//         if(++c<=0x10ffff) {
   1.583 +//             set->array[i-1]=c;
   1.584 +//             if(i<length && c==set->array[i]) {
   1.585 +//                 /* the following range collapsed, remove it */
   1.586 +//                 --i;
   1.587 +//                 set->length=length-=2;
   1.588 +//                 if(i<length) {
   1.589 +//                     uprv_memmove(set->array+i, set->array+i+2, (length-i)*4);
   1.590 +//                 }
   1.591 +//             }
   1.592 +//         } else {
   1.593 +//             /* extend the previous range (had limit 0x10ffff) to the end of Unicode */
   1.594 +//             set->length=i-1;
   1.595 +//         }
   1.596 +//         return TRUE;
   1.597 +//     } else if(i==length && c==0x10ffff) {
   1.598 +//         /* insert one range limit c */
   1.599 +//         more=1;
   1.600 +//     } else {
   1.601 +//         /* insert two range limits c, c+1 */
   1.602 +//         more=2;
   1.603 +//     }
   1.604 +// 
   1.605 +//     /* insert <more> range limits */
   1.606 +//     if(length+more>set->capacity) {
   1.607 +//         /* reallocate */
   1.608 +//         int32_t newCapacity=set->capacity+set->capacity/2+USET_GROW_DELTA;
   1.609 +//         UChar32* newArray=(UChar32* )uprv_malloc(newCapacity*4);
   1.610 +//         if(newArray==NULL) {
   1.611 +//             return FALSE;
   1.612 +//         }
   1.613 +//         set->capacity=newCapacity;
   1.614 +//         uprv_memcpy(newArray, set->array, length*4);
   1.615 +// 
   1.616 +//         if(set->array!=set->staticBuffer) {
   1.617 +//             uprv_free(set->array);
   1.618 +//         }
   1.619 +//         set->array=newArray;
   1.620 +//     }
   1.621 +// 
   1.622 +//     if(i<length) {
   1.623 +//         uprv_memmove(set->array+i+more, set->array+i, (length-i)*4);
   1.624 +//     }
   1.625 +//     set->array[i]=c;
   1.626 +//     if(more==2) {
   1.627 +//         set->array[i+1]=c+1;
   1.628 +//     }
   1.629 +//     set->length+=more;
   1.630 +// 
   1.631 +//     return TRUE;
   1.632 +// }
   1.633 +// 
   1.634 +// U_CAPI UBool U_EXPORT2
   1.635 +// uset_add(USet* set, UChar32 c) {
   1.636 +//     return addRemove(set, c, 0);
   1.637 +// }
   1.638 +// 
   1.639 +// U_CAPI void U_EXPORT2
   1.640 +// uset_remove(USet* set, UChar32 c) {
   1.641 +//     addRemove(set, c, 1);
   1.642 +// }

mercurial