Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
michael@0 | 1 | /* |
michael@0 | 2 | ******************************************************************************* |
michael@0 | 3 | * |
michael@0 | 4 | * Copyright (C) 1999-2011, International Business Machines |
michael@0 | 5 | * Corporation and others. All Rights Reserved. |
michael@0 | 6 | * |
michael@0 | 7 | ******************************************************************************* |
michael@0 | 8 | * file name: ucol_wgt.cpp |
michael@0 | 9 | * encoding: US-ASCII |
michael@0 | 10 | * tab size: 8 (not used) |
michael@0 | 11 | * indentation:4 |
michael@0 | 12 | * |
michael@0 | 13 | * created on: 2001mar08 |
michael@0 | 14 | * created by: Markus W. Scherer |
michael@0 | 15 | * |
michael@0 | 16 | * This file contains code for allocating n collation element weights |
michael@0 | 17 | * between two exclusive limits. |
michael@0 | 18 | * It is used only internally by ucol_bld. |
michael@0 | 19 | */ |
michael@0 | 20 | |
michael@0 | 21 | #include "unicode/utypes.h" |
michael@0 | 22 | |
michael@0 | 23 | #if !UCONFIG_NO_COLLATION |
michael@0 | 24 | |
michael@0 | 25 | #include "ucol_imp.h" |
michael@0 | 26 | #include "ucol_wgt.h" |
michael@0 | 27 | #include "cmemory.h" |
michael@0 | 28 | #include "uarrsort.h" |
michael@0 | 29 | |
michael@0 | 30 | #ifdef UCOL_DEBUG |
michael@0 | 31 | # include <stdio.h> |
michael@0 | 32 | #endif |
michael@0 | 33 | |
michael@0 | 34 | /* collation element weight allocation -------------------------------------- */ |
michael@0 | 35 | |
michael@0 | 36 | /* helper functions for CE weights */ |
michael@0 | 37 | |
michael@0 | 38 | static inline int32_t |
michael@0 | 39 | lengthOfWeight(uint32_t weight) { |
michael@0 | 40 | if((weight&0xffffff)==0) { |
michael@0 | 41 | return 1; |
michael@0 | 42 | } else if((weight&0xffff)==0) { |
michael@0 | 43 | return 2; |
michael@0 | 44 | } else if((weight&0xff)==0) { |
michael@0 | 45 | return 3; |
michael@0 | 46 | } else { |
michael@0 | 47 | return 4; |
michael@0 | 48 | } |
michael@0 | 49 | } |
michael@0 | 50 | |
michael@0 | 51 | static inline uint32_t |
michael@0 | 52 | getWeightTrail(uint32_t weight, int32_t length) { |
michael@0 | 53 | return (uint32_t)(weight>>(8*(4-length)))&0xff; |
michael@0 | 54 | } |
michael@0 | 55 | |
michael@0 | 56 | static inline uint32_t |
michael@0 | 57 | setWeightTrail(uint32_t weight, int32_t length, uint32_t trail) { |
michael@0 | 58 | length=8*(4-length); |
michael@0 | 59 | return (uint32_t)((weight&(0xffffff00<<length))|(trail<<length)); |
michael@0 | 60 | } |
michael@0 | 61 | |
michael@0 | 62 | static inline uint32_t |
michael@0 | 63 | getWeightByte(uint32_t weight, int32_t idx) { |
michael@0 | 64 | return getWeightTrail(weight, idx); /* same calculation */ |
michael@0 | 65 | } |
michael@0 | 66 | |
michael@0 | 67 | static inline uint32_t |
michael@0 | 68 | setWeightByte(uint32_t weight, int32_t idx, uint32_t byte) { |
michael@0 | 69 | uint32_t mask; /* 0xffffffff except a 00 "hole" for the index-th byte */ |
michael@0 | 70 | |
michael@0 | 71 | idx*=8; |
michael@0 | 72 | if(idx<32) { |
michael@0 | 73 | mask=((uint32_t)0xffffffff)>>idx; |
michael@0 | 74 | } else { |
michael@0 | 75 | // Do not use uint32_t>>32 because on some platforms that does not shift at all |
michael@0 | 76 | // while we need it to become 0. |
michael@0 | 77 | // PowerPC: 0xffffffff>>32 = 0 (wanted) |
michael@0 | 78 | // x86: 0xffffffff>>32 = 0xffffffff (not wanted) |
michael@0 | 79 | // |
michael@0 | 80 | // ANSI C99 6.5.7 Bitwise shift operators: |
michael@0 | 81 | // "If the value of the right operand is negative |
michael@0 | 82 | // or is greater than or equal to the width of the promoted left operand, |
michael@0 | 83 | // the behavior is undefined." |
michael@0 | 84 | mask=0; |
michael@0 | 85 | } |
michael@0 | 86 | idx=32-idx; |
michael@0 | 87 | mask|=0xffffff00<<idx; |
michael@0 | 88 | return (uint32_t)((weight&mask)|(byte<<idx)); |
michael@0 | 89 | } |
michael@0 | 90 | |
michael@0 | 91 | static inline uint32_t |
michael@0 | 92 | truncateWeight(uint32_t weight, int32_t length) { |
michael@0 | 93 | return (uint32_t)(weight&(0xffffffff<<(8*(4-length)))); |
michael@0 | 94 | } |
michael@0 | 95 | |
michael@0 | 96 | static inline uint32_t |
michael@0 | 97 | incWeightTrail(uint32_t weight, int32_t length) { |
michael@0 | 98 | return (uint32_t)(weight+(1UL<<(8*(4-length)))); |
michael@0 | 99 | } |
michael@0 | 100 | |
michael@0 | 101 | static inline uint32_t |
michael@0 | 102 | decWeightTrail(uint32_t weight, int32_t length) { |
michael@0 | 103 | return (uint32_t)(weight-(1UL<<(8*(4-length)))); |
michael@0 | 104 | } |
michael@0 | 105 | |
michael@0 | 106 | static inline uint32_t |
michael@0 | 107 | incWeight(uint32_t weight, int32_t length, uint32_t maxByte) { |
michael@0 | 108 | uint32_t byte; |
michael@0 | 109 | |
michael@0 | 110 | for(;;) { |
michael@0 | 111 | byte=getWeightByte(weight, length); |
michael@0 | 112 | if(byte<maxByte) { |
michael@0 | 113 | return setWeightByte(weight, length, byte+1); |
michael@0 | 114 | } else { |
michael@0 | 115 | /* roll over, set this byte to UCOL_BYTE_FIRST_TAILORED and increment the previous one */ |
michael@0 | 116 | weight=setWeightByte(weight, length, UCOL_BYTE_FIRST_TAILORED); |
michael@0 | 117 | --length; |
michael@0 | 118 | } |
michael@0 | 119 | } |
michael@0 | 120 | } |
michael@0 | 121 | |
michael@0 | 122 | static inline int32_t |
michael@0 | 123 | lengthenRange(WeightRange *range, uint32_t maxByte, uint32_t countBytes) { |
michael@0 | 124 | int32_t length; |
michael@0 | 125 | |
michael@0 | 126 | length=range->length2+1; |
michael@0 | 127 | range->start=setWeightTrail(range->start, length, UCOL_BYTE_FIRST_TAILORED); |
michael@0 | 128 | range->end=setWeightTrail(range->end, length, maxByte); |
michael@0 | 129 | range->count2*=countBytes; |
michael@0 | 130 | range->length2=length; |
michael@0 | 131 | return length; |
michael@0 | 132 | } |
michael@0 | 133 | |
michael@0 | 134 | /* for uprv_sortArray: sort ranges in weight order */ |
michael@0 | 135 | static int32_t U_CALLCONV |
michael@0 | 136 | compareRanges(const void * /*context*/, const void *left, const void *right) { |
michael@0 | 137 | uint32_t l, r; |
michael@0 | 138 | |
michael@0 | 139 | l=((const WeightRange *)left)->start; |
michael@0 | 140 | r=((const WeightRange *)right)->start; |
michael@0 | 141 | if(l<r) { |
michael@0 | 142 | return -1; |
michael@0 | 143 | } else if(l>r) { |
michael@0 | 144 | return 1; |
michael@0 | 145 | } else { |
michael@0 | 146 | return 0; |
michael@0 | 147 | } |
michael@0 | 148 | } |
michael@0 | 149 | |
michael@0 | 150 | /* |
michael@0 | 151 | * take two CE weights and calculate the |
michael@0 | 152 | * possible ranges of weights between the two limits, excluding them |
michael@0 | 153 | * for weights with up to 4 bytes there are up to 2*4-1=7 ranges |
michael@0 | 154 | */ |
michael@0 | 155 | static inline int32_t |
michael@0 | 156 | getWeightRanges(uint32_t lowerLimit, uint32_t upperLimit, |
michael@0 | 157 | uint32_t maxByte, uint32_t countBytes, |
michael@0 | 158 | WeightRange ranges[7]) { |
michael@0 | 159 | WeightRange lower[5], middle, upper[5]; /* [0] and [1] are not used - this simplifies indexing */ |
michael@0 | 160 | uint32_t weight, trail; |
michael@0 | 161 | int32_t length, lowerLength, upperLength, rangeCount; |
michael@0 | 162 | |
michael@0 | 163 | /* assume that both lowerLimit & upperLimit are not 0 */ |
michael@0 | 164 | |
michael@0 | 165 | /* get the lengths of the limits */ |
michael@0 | 166 | lowerLength=lengthOfWeight(lowerLimit); |
michael@0 | 167 | upperLength=lengthOfWeight(upperLimit); |
michael@0 | 168 | |
michael@0 | 169 | #ifdef UCOL_DEBUG |
michael@0 | 170 | printf("length of lower limit 0x%08lx is %ld\n", lowerLimit, lowerLength); |
michael@0 | 171 | printf("length of upper limit 0x%08lx is %ld\n", upperLimit, upperLength); |
michael@0 | 172 | #endif |
michael@0 | 173 | |
michael@0 | 174 | if(lowerLimit>=upperLimit) { |
michael@0 | 175 | #ifdef UCOL_DEBUG |
michael@0 | 176 | printf("error: no space between lower & upper limits\n"); |
michael@0 | 177 | #endif |
michael@0 | 178 | return 0; |
michael@0 | 179 | } |
michael@0 | 180 | |
michael@0 | 181 | /* check that neither is a prefix of the other */ |
michael@0 | 182 | if(lowerLength<upperLength) { |
michael@0 | 183 | if(lowerLimit==truncateWeight(upperLimit, lowerLength)) { |
michael@0 | 184 | #ifdef UCOL_DEBUG |
michael@0 | 185 | printf("error: lower limit 0x%08lx is a prefix of upper limit 0x%08lx\n", lowerLimit, upperLimit); |
michael@0 | 186 | #endif |
michael@0 | 187 | return 0; |
michael@0 | 188 | } |
michael@0 | 189 | } |
michael@0 | 190 | /* if the upper limit is a prefix of the lower limit then the earlier test lowerLimit>=upperLimit has caught it */ |
michael@0 | 191 | |
michael@0 | 192 | /* reset local variables */ |
michael@0 | 193 | uprv_memset(lower, 0, sizeof(lower)); |
michael@0 | 194 | uprv_memset(&middle, 0, sizeof(middle)); |
michael@0 | 195 | uprv_memset(upper, 0, sizeof(upper)); |
michael@0 | 196 | |
michael@0 | 197 | /* |
michael@0 | 198 | * With the limit lengths of 1..4, there are up to 7 ranges for allocation: |
michael@0 | 199 | * range minimum length |
michael@0 | 200 | * lower[4] 4 |
michael@0 | 201 | * lower[3] 3 |
michael@0 | 202 | * lower[2] 2 |
michael@0 | 203 | * middle 1 |
michael@0 | 204 | * upper[2] 2 |
michael@0 | 205 | * upper[3] 3 |
michael@0 | 206 | * upper[4] 4 |
michael@0 | 207 | * |
michael@0 | 208 | * We are now going to calculate up to 7 ranges. |
michael@0 | 209 | * Some of them will typically overlap, so we will then have to merge and eliminate ranges. |
michael@0 | 210 | */ |
michael@0 | 211 | weight=lowerLimit; |
michael@0 | 212 | for(length=lowerLength; length>=2; --length) { |
michael@0 | 213 | trail=getWeightTrail(weight, length); |
michael@0 | 214 | if(trail<maxByte) { |
michael@0 | 215 | lower[length].start=incWeightTrail(weight, length); |
michael@0 | 216 | lower[length].end=setWeightTrail(weight, length, maxByte); |
michael@0 | 217 | lower[length].length=length; |
michael@0 | 218 | lower[length].count=maxByte-trail; |
michael@0 | 219 | } |
michael@0 | 220 | weight=truncateWeight(weight, length-1); |
michael@0 | 221 | } |
michael@0 | 222 | middle.start=incWeightTrail(weight, 1); |
michael@0 | 223 | |
michael@0 | 224 | weight=upperLimit; |
michael@0 | 225 | for(length=upperLength; length>=2; --length) { |
michael@0 | 226 | trail=getWeightTrail(weight, length); |
michael@0 | 227 | if(trail>UCOL_BYTE_FIRST_TAILORED) { |
michael@0 | 228 | upper[length].start=setWeightTrail(weight, length, UCOL_BYTE_FIRST_TAILORED); |
michael@0 | 229 | upper[length].end=decWeightTrail(weight, length); |
michael@0 | 230 | upper[length].length=length; |
michael@0 | 231 | upper[length].count=trail-UCOL_BYTE_FIRST_TAILORED; |
michael@0 | 232 | } |
michael@0 | 233 | weight=truncateWeight(weight, length-1); |
michael@0 | 234 | } |
michael@0 | 235 | middle.end=decWeightTrail(weight, 1); |
michael@0 | 236 | |
michael@0 | 237 | /* set the middle range */ |
michael@0 | 238 | middle.length=1; |
michael@0 | 239 | if(middle.end>=middle.start) { |
michael@0 | 240 | middle.count=(int32_t)((middle.end-middle.start)>>24)+1; |
michael@0 | 241 | } else { |
michael@0 | 242 | /* eliminate overlaps */ |
michael@0 | 243 | uint32_t start, end; |
michael@0 | 244 | |
michael@0 | 245 | /* remove the middle range */ |
michael@0 | 246 | middle.count=0; |
michael@0 | 247 | |
michael@0 | 248 | /* reduce or remove the lower ranges that go beyond upperLimit */ |
michael@0 | 249 | for(length=4; length>=2; --length) { |
michael@0 | 250 | if(lower[length].count>0 && upper[length].count>0) { |
michael@0 | 251 | start=upper[length].start; |
michael@0 | 252 | end=lower[length].end; |
michael@0 | 253 | |
michael@0 | 254 | if(end>=start || incWeight(end, length, maxByte)==start) { |
michael@0 | 255 | /* lower and upper ranges collide or are directly adjacent: merge these two and remove all shorter ranges */ |
michael@0 | 256 | start=lower[length].start; |
michael@0 | 257 | end=lower[length].end=upper[length].end; |
michael@0 | 258 | /* |
michael@0 | 259 | * merging directly adjacent ranges needs to subtract the 0/1 gaps in between; |
michael@0 | 260 | * it may result in a range with count>countBytes |
michael@0 | 261 | */ |
michael@0 | 262 | lower[length].count= |
michael@0 | 263 | (int32_t)(getWeightTrail(end, length)-getWeightTrail(start, length)+1+ |
michael@0 | 264 | countBytes*(getWeightByte(end, length-1)-getWeightByte(start, length-1))); |
michael@0 | 265 | upper[length].count=0; |
michael@0 | 266 | while(--length>=2) { |
michael@0 | 267 | lower[length].count=upper[length].count=0; |
michael@0 | 268 | } |
michael@0 | 269 | break; |
michael@0 | 270 | } |
michael@0 | 271 | } |
michael@0 | 272 | } |
michael@0 | 273 | } |
michael@0 | 274 | |
michael@0 | 275 | #ifdef UCOL_DEBUG |
michael@0 | 276 | /* print ranges */ |
michael@0 | 277 | for(length=4; length>=2; --length) { |
michael@0 | 278 | if(lower[length].count>0) { |
michael@0 | 279 | printf("lower[%ld] .start=0x%08lx .end=0x%08lx .count=%ld\n", length, lower[length].start, lower[length].end, lower[length].count); |
michael@0 | 280 | } |
michael@0 | 281 | } |
michael@0 | 282 | if(middle.count>0) { |
michael@0 | 283 | printf("middle .start=0x%08lx .end=0x%08lx .count=%ld\n", middle.start, middle.end, middle.count); |
michael@0 | 284 | } |
michael@0 | 285 | for(length=2; length<=4; ++length) { |
michael@0 | 286 | if(upper[length].count>0) { |
michael@0 | 287 | printf("upper[%ld] .start=0x%08lx .end=0x%08lx .count=%ld\n", length, upper[length].start, upper[length].end, upper[length].count); |
michael@0 | 288 | } |
michael@0 | 289 | } |
michael@0 | 290 | #endif |
michael@0 | 291 | |
michael@0 | 292 | /* copy the ranges, shortest first, into the result array */ |
michael@0 | 293 | rangeCount=0; |
michael@0 | 294 | if(middle.count>0) { |
michael@0 | 295 | uprv_memcpy(ranges, &middle, sizeof(WeightRange)); |
michael@0 | 296 | rangeCount=1; |
michael@0 | 297 | } |
michael@0 | 298 | for(length=2; length<=4; ++length) { |
michael@0 | 299 | /* copy upper first so that later the middle range is more likely the first one to use */ |
michael@0 | 300 | if(upper[length].count>0) { |
michael@0 | 301 | uprv_memcpy(ranges+rangeCount, upper+length, sizeof(WeightRange)); |
michael@0 | 302 | ++rangeCount; |
michael@0 | 303 | } |
michael@0 | 304 | if(lower[length].count>0) { |
michael@0 | 305 | uprv_memcpy(ranges+rangeCount, lower+length, sizeof(WeightRange)); |
michael@0 | 306 | ++rangeCount; |
michael@0 | 307 | } |
michael@0 | 308 | } |
michael@0 | 309 | return rangeCount; |
michael@0 | 310 | } |
michael@0 | 311 | |
michael@0 | 312 | /* |
michael@0 | 313 | * call getWeightRanges and then determine heuristically |
michael@0 | 314 | * which ranges to use for a given number of weights between (excluding) |
michael@0 | 315 | * two limits |
michael@0 | 316 | */ |
michael@0 | 317 | U_CFUNC int32_t |
michael@0 | 318 | ucol_allocWeights(uint32_t lowerLimit, uint32_t upperLimit, |
michael@0 | 319 | uint32_t n, |
michael@0 | 320 | uint32_t maxByte, |
michael@0 | 321 | WeightRange ranges[7]) { |
michael@0 | 322 | /* number of usable byte values 3..maxByte */ |
michael@0 | 323 | uint32_t countBytes=maxByte-UCOL_BYTE_FIRST_TAILORED+1; |
michael@0 | 324 | |
michael@0 | 325 | uint32_t lengthCounts[6]; /* [0] unused, [5] to make index checks unnecessary */ |
michael@0 | 326 | uint32_t maxCount; |
michael@0 | 327 | int32_t i, rangeCount, minLength/*, maxLength*/; |
michael@0 | 328 | |
michael@0 | 329 | /* countBytes to the power of index */ |
michael@0 | 330 | uint32_t powers[5]; |
michael@0 | 331 | /* gcc requires explicit initialization */ |
michael@0 | 332 | powers[0] = 1; |
michael@0 | 333 | powers[1] = countBytes; |
michael@0 | 334 | powers[2] = countBytes*countBytes; |
michael@0 | 335 | powers[3] = countBytes*countBytes*countBytes; |
michael@0 | 336 | powers[4] = countBytes*countBytes*countBytes*countBytes; |
michael@0 | 337 | |
michael@0 | 338 | #ifdef UCOL_DEBUG |
michael@0 | 339 | puts(""); |
michael@0 | 340 | #endif |
michael@0 | 341 | |
michael@0 | 342 | rangeCount=getWeightRanges(lowerLimit, upperLimit, maxByte, countBytes, ranges); |
michael@0 | 343 | if(rangeCount<=0) { |
michael@0 | 344 | #ifdef UCOL_DEBUG |
michael@0 | 345 | printf("error: unable to get Weight ranges\n"); |
michael@0 | 346 | #endif |
michael@0 | 347 | return 0; |
michael@0 | 348 | } |
michael@0 | 349 | |
michael@0 | 350 | /* what is the maximum number of weights with these ranges? */ |
michael@0 | 351 | maxCount=0; |
michael@0 | 352 | for(i=0; i<rangeCount; ++i) { |
michael@0 | 353 | maxCount+=(uint32_t)ranges[i].count*powers[4-ranges[i].length]; |
michael@0 | 354 | } |
michael@0 | 355 | if(maxCount>=n) { |
michael@0 | 356 | #ifdef UCOL_DEBUG |
michael@0 | 357 | printf("the maximum number of %lu weights is sufficient for n=%lu\n", maxCount, n); |
michael@0 | 358 | #endif |
michael@0 | 359 | } else { |
michael@0 | 360 | #ifdef UCOL_DEBUG |
michael@0 | 361 | printf("error: the maximum number of %lu weights is insufficient for n=%lu\n", maxCount, n); |
michael@0 | 362 | #endif |
michael@0 | 363 | return 0; |
michael@0 | 364 | } |
michael@0 | 365 | |
michael@0 | 366 | /* set the length2 and count2 fields */ |
michael@0 | 367 | for(i=0; i<rangeCount; ++i) { |
michael@0 | 368 | ranges[i].length2=ranges[i].length; |
michael@0 | 369 | ranges[i].count2=(uint32_t)ranges[i].count; |
michael@0 | 370 | } |
michael@0 | 371 | |
michael@0 | 372 | /* try until we find suitably large ranges */ |
michael@0 | 373 | for(;;) { |
michael@0 | 374 | /* get the smallest number of bytes in a range */ |
michael@0 | 375 | minLength=ranges[0].length2; |
michael@0 | 376 | |
michael@0 | 377 | /* sum up the number of elements that fit into ranges of each byte length */ |
michael@0 | 378 | uprv_memset(lengthCounts, 0, sizeof(lengthCounts)); |
michael@0 | 379 | for(i=0; i<rangeCount; ++i) { |
michael@0 | 380 | lengthCounts[ranges[i].length2]+=ranges[i].count2; |
michael@0 | 381 | } |
michael@0 | 382 | |
michael@0 | 383 | /* now try to allocate n elements in the available short ranges */ |
michael@0 | 384 | if(n<=(lengthCounts[minLength]+lengthCounts[minLength+1])) { |
michael@0 | 385 | /* trivial cases, use the first few ranges */ |
michael@0 | 386 | maxCount=0; |
michael@0 | 387 | rangeCount=0; |
michael@0 | 388 | do { |
michael@0 | 389 | maxCount+=ranges[rangeCount].count2; |
michael@0 | 390 | ++rangeCount; |
michael@0 | 391 | } while(n>maxCount); |
michael@0 | 392 | #ifdef UCOL_DEBUG |
michael@0 | 393 | printf("take first %ld ranges\n", rangeCount); |
michael@0 | 394 | #endif |
michael@0 | 395 | break; |
michael@0 | 396 | } else if(n<=ranges[0].count2*countBytes) { |
michael@0 | 397 | /* easy case, just make this one range large enough by lengthening it once more, possibly split it */ |
michael@0 | 398 | uint32_t count1, count2, power_1, power; |
michael@0 | 399 | |
michael@0 | 400 | /*maxLength=minLength+1;*/ |
michael@0 | 401 | |
michael@0 | 402 | /* calculate how to split the range between maxLength-1 (count1) and maxLength (count2) */ |
michael@0 | 403 | power_1=powers[minLength-ranges[0].length]; |
michael@0 | 404 | power=power_1*countBytes; |
michael@0 | 405 | count2=(n+power-1)/power; |
michael@0 | 406 | count1=ranges[0].count-count2; |
michael@0 | 407 | |
michael@0 | 408 | /* split the range */ |
michael@0 | 409 | #ifdef UCOL_DEBUG |
michael@0 | 410 | printf("split the first range %ld:%ld\n", count1, count2); |
michael@0 | 411 | #endif |
michael@0 | 412 | if(count1<1) { |
michael@0 | 413 | rangeCount=1; |
michael@0 | 414 | |
michael@0 | 415 | /* lengthen the entire range to maxLength */ |
michael@0 | 416 | lengthenRange(ranges, maxByte, countBytes); |
michael@0 | 417 | } else { |
michael@0 | 418 | /* really split the range */ |
michael@0 | 419 | uint32_t byte; |
michael@0 | 420 | |
michael@0 | 421 | /* create a new range with the end and initial and current length of the old one */ |
michael@0 | 422 | rangeCount=2; |
michael@0 | 423 | ranges[1].end=ranges[0].end; |
michael@0 | 424 | ranges[1].length=ranges[0].length; |
michael@0 | 425 | ranges[1].length2=minLength; |
michael@0 | 426 | |
michael@0 | 427 | /* set the end of the first range according to count1 */ |
michael@0 | 428 | i=ranges[0].length; |
michael@0 | 429 | byte=getWeightByte(ranges[0].start, i)+count1-1; |
michael@0 | 430 | |
michael@0 | 431 | /* |
michael@0 | 432 | * ranges[0].count and count1 may be >countBytes |
michael@0 | 433 | * from merging adjacent ranges; |
michael@0 | 434 | * byte>maxByte is possible |
michael@0 | 435 | */ |
michael@0 | 436 | if(byte<=maxByte) { |
michael@0 | 437 | ranges[0].end=setWeightByte(ranges[0].start, i, byte); |
michael@0 | 438 | } else /* byte>maxByte */ { |
michael@0 | 439 | ranges[0].end=setWeightByte(incWeight(ranges[0].start, i-1, maxByte), i, byte-countBytes); |
michael@0 | 440 | } |
michael@0 | 441 | |
michael@0 | 442 | /* set the bytes in the end weight at length+1..length2 to maxByte */ |
michael@0 | 443 | byte=(maxByte<<24)|(maxByte<<16)|(maxByte<<8)|maxByte; /* this used to be 0xffffffff */ |
michael@0 | 444 | ranges[0].end=truncateWeight(ranges[0].end, i)| |
michael@0 | 445 | ((byte>>(8*i))&(byte<<(8*(4-minLength)))); |
michael@0 | 446 | |
michael@0 | 447 | /* set the start of the second range to immediately follow the end of the first one */ |
michael@0 | 448 | ranges[1].start=incWeight(ranges[0].end, minLength, maxByte); |
michael@0 | 449 | |
michael@0 | 450 | /* set the count values (informational) */ |
michael@0 | 451 | ranges[0].count=count1; |
michael@0 | 452 | ranges[1].count=count2; |
michael@0 | 453 | |
michael@0 | 454 | ranges[0].count2=count1*power_1; |
michael@0 | 455 | ranges[1].count2=count2*power_1; /* will be *countBytes when lengthened */ |
michael@0 | 456 | |
michael@0 | 457 | /* lengthen the second range to maxLength */ |
michael@0 | 458 | lengthenRange(ranges+1, maxByte, countBytes); |
michael@0 | 459 | } |
michael@0 | 460 | break; |
michael@0 | 461 | } |
michael@0 | 462 | |
michael@0 | 463 | /* no good match, lengthen all minLength ranges and iterate */ |
michael@0 | 464 | #ifdef UCOL_DEBUG |
michael@0 | 465 | printf("lengthen the short ranges from %ld bytes to %ld and iterate\n", minLength, minLength+1); |
michael@0 | 466 | #endif |
michael@0 | 467 | for(i=0; ranges[i].length2==minLength; ++i) { |
michael@0 | 468 | lengthenRange(ranges+i, maxByte, countBytes); |
michael@0 | 469 | } |
michael@0 | 470 | } |
michael@0 | 471 | |
michael@0 | 472 | if(rangeCount>1) { |
michael@0 | 473 | /* sort the ranges by weight values */ |
michael@0 | 474 | UErrorCode errorCode=U_ZERO_ERROR; |
michael@0 | 475 | uprv_sortArray(ranges, rangeCount, sizeof(WeightRange), compareRanges, NULL, FALSE, &errorCode); |
michael@0 | 476 | /* ignore error code: we know that the internal sort function will not fail here */ |
michael@0 | 477 | } |
michael@0 | 478 | |
michael@0 | 479 | #ifdef UCOL_DEBUG |
michael@0 | 480 | puts("final ranges:"); |
michael@0 | 481 | for(i=0; i<rangeCount; ++i) { |
michael@0 | 482 | printf("ranges[%ld] .start=0x%08lx .end=0x%08lx .length=%ld .length2=%ld .count=%ld .count2=%lu\n", |
michael@0 | 483 | i, ranges[i].start, ranges[i].end, ranges[i].length, ranges[i].length2, ranges[i].count, ranges[i].count2); |
michael@0 | 484 | } |
michael@0 | 485 | #endif |
michael@0 | 486 | |
michael@0 | 487 | /* set maxByte in ranges[0] for ucol_nextWeight() */ |
michael@0 | 488 | ranges[0].count=maxByte; |
michael@0 | 489 | |
michael@0 | 490 | return rangeCount; |
michael@0 | 491 | } |
michael@0 | 492 | |
michael@0 | 493 | /* |
michael@0 | 494 | * given a set of ranges calculated by ucol_allocWeights(), |
michael@0 | 495 | * iterate through the weights |
michael@0 | 496 | */ |
michael@0 | 497 | U_CFUNC uint32_t |
michael@0 | 498 | ucol_nextWeight(WeightRange ranges[], int32_t *pRangeCount) { |
michael@0 | 499 | if(*pRangeCount<=0) { |
michael@0 | 500 | return 0xffffffff; |
michael@0 | 501 | } else { |
michael@0 | 502 | uint32_t weight, maxByte; |
michael@0 | 503 | |
michael@0 | 504 | /* get maxByte from the .count field */ |
michael@0 | 505 | maxByte=ranges[0].count; |
michael@0 | 506 | |
michael@0 | 507 | /* get the next weight */ |
michael@0 | 508 | weight=ranges[0].start; |
michael@0 | 509 | if(weight==ranges[0].end) { |
michael@0 | 510 | /* this range is finished, remove it and move the following ones up */ |
michael@0 | 511 | if(--*pRangeCount>0) { |
michael@0 | 512 | uprv_memmove(ranges, ranges+1, *pRangeCount*sizeof(WeightRange)); |
michael@0 | 513 | ranges[0].count=maxByte; /* keep maxByte in ranges[0] */ |
michael@0 | 514 | } |
michael@0 | 515 | } else { |
michael@0 | 516 | /* increment the weight for the next value */ |
michael@0 | 517 | ranges[0].start=incWeight(weight, ranges[0].length2, maxByte); |
michael@0 | 518 | } |
michael@0 | 519 | |
michael@0 | 520 | return weight; |
michael@0 | 521 | } |
michael@0 | 522 | } |
michael@0 | 523 | |
michael@0 | 524 | #if 0 // #ifdef UCOL_DEBUG |
michael@0 | 525 | |
michael@0 | 526 | static void |
michael@0 | 527 | testAlloc(uint32_t lowerLimit, uint32_t upperLimit, uint32_t n, UBool enumerate) { |
michael@0 | 528 | WeightRange ranges[8]; |
michael@0 | 529 | int32_t rangeCount; |
michael@0 | 530 | |
michael@0 | 531 | rangeCount=ucol_allocWeights(lowerLimit, upperLimit, n, ranges); |
michael@0 | 532 | if(enumerate) { |
michael@0 | 533 | uint32_t weight; |
michael@0 | 534 | |
michael@0 | 535 | while(n>0) { |
michael@0 | 536 | weight=ucol_nextWeight(ranges, &rangeCount); |
michael@0 | 537 | if(weight==0xffffffff) { |
michael@0 | 538 | printf("error: 0xffffffff with %lu more weights to go\n", n); |
michael@0 | 539 | break; |
michael@0 | 540 | } |
michael@0 | 541 | printf(" 0x%08lx\n", weight); |
michael@0 | 542 | --n; |
michael@0 | 543 | } |
michael@0 | 544 | } |
michael@0 | 545 | } |
michael@0 | 546 | |
michael@0 | 547 | extern int |
michael@0 | 548 | main(int argc, const char *argv[]) { |
michael@0 | 549 | #if 0 |
michael@0 | 550 | #endif |
michael@0 | 551 | testAlloc(0x364214fc, 0x44b87d23, 5, FALSE); |
michael@0 | 552 | testAlloc(0x36421500, 0x44b87d23, 5, FALSE); |
michael@0 | 553 | testAlloc(0x36421500, 0x44b87d23, 20, FALSE); |
michael@0 | 554 | testAlloc(0x36421500, 0x44b87d23, 13700, FALSE); |
michael@0 | 555 | testAlloc(0x36421500, 0x38b87d23, 1, FALSE); |
michael@0 | 556 | testAlloc(0x36421500, 0x38b87d23, 20, FALSE); |
michael@0 | 557 | testAlloc(0x36421500, 0x38b87d23, 200, TRUE); |
michael@0 | 558 | testAlloc(0x36421500, 0x38b87d23, 13700, FALSE); |
michael@0 | 559 | testAlloc(0x36421500, 0x37b87d23, 13700, FALSE); |
michael@0 | 560 | testAlloc(0x36ef1500, 0x37b87d23, 13700, FALSE); |
michael@0 | 561 | testAlloc(0x36421500, 0x36b87d23, 13700, FALSE); |
michael@0 | 562 | testAlloc(0x36b87122, 0x36b87d23, 13700, FALSE); |
michael@0 | 563 | testAlloc(0x49000000, 0x4a600000, 13700, FALSE); |
michael@0 | 564 | testAlloc(0x9fffffff, 0xd0000000, 13700, FALSE); |
michael@0 | 565 | testAlloc(0x9fffffff, 0xd0000000, 67400, FALSE); |
michael@0 | 566 | testAlloc(0x9fffffff, 0xa0030000, 67400, FALSE); |
michael@0 | 567 | testAlloc(0x9fffffff, 0xa0030000, 40000, FALSE); |
michael@0 | 568 | testAlloc(0xa0000000, 0xa0030000, 40000, FALSE); |
michael@0 | 569 | testAlloc(0xa0031100, 0xa0030000, 40000, FALSE); |
michael@0 | 570 | #if 0 |
michael@0 | 571 | #endif |
michael@0 | 572 | return 0; |
michael@0 | 573 | } |
michael@0 | 574 | |
michael@0 | 575 | #endif |
michael@0 | 576 | |
michael@0 | 577 | #endif /* #if !UCONFIG_NO_COLLATION */ |