michael@0: /* michael@0: ******************************************************************************* michael@0: * michael@0: * Copyright (C) 1999-2011, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: * michael@0: ******************************************************************************* michael@0: * file name: ucol_wgt.cpp michael@0: * encoding: US-ASCII michael@0: * tab size: 8 (not used) michael@0: * indentation:4 michael@0: * michael@0: * created on: 2001mar08 michael@0: * created by: Markus W. Scherer michael@0: * michael@0: * This file contains code for allocating n collation element weights michael@0: * between two exclusive limits. michael@0: * It is used only internally by ucol_bld. michael@0: */ michael@0: michael@0: #include "unicode/utypes.h" michael@0: michael@0: #if !UCONFIG_NO_COLLATION michael@0: michael@0: #include "ucol_imp.h" michael@0: #include "ucol_wgt.h" michael@0: #include "cmemory.h" michael@0: #include "uarrsort.h" michael@0: michael@0: #ifdef UCOL_DEBUG michael@0: # include michael@0: #endif michael@0: michael@0: /* collation element weight allocation -------------------------------------- */ michael@0: michael@0: /* helper functions for CE weights */ michael@0: michael@0: static inline int32_t michael@0: lengthOfWeight(uint32_t weight) { michael@0: if((weight&0xffffff)==0) { michael@0: return 1; michael@0: } else if((weight&0xffff)==0) { michael@0: return 2; michael@0: } else if((weight&0xff)==0) { michael@0: return 3; michael@0: } else { michael@0: return 4; michael@0: } michael@0: } michael@0: michael@0: static inline uint32_t michael@0: getWeightTrail(uint32_t weight, int32_t length) { michael@0: return (uint32_t)(weight>>(8*(4-length)))&0xff; michael@0: } michael@0: michael@0: static inline uint32_t michael@0: setWeightTrail(uint32_t weight, int32_t length, uint32_t trail) { michael@0: length=8*(4-length); michael@0: return (uint32_t)((weight&(0xffffff00<>idx; michael@0: } else { michael@0: // Do not use uint32_t>>32 because on some platforms that does not shift at all michael@0: // while we need it to become 0. michael@0: // PowerPC: 0xffffffff>>32 = 0 (wanted) michael@0: // x86: 0xffffffff>>32 = 0xffffffff (not wanted) michael@0: // michael@0: // ANSI C99 6.5.7 Bitwise shift operators: michael@0: // "If the value of the right operand is negative michael@0: // or is greater than or equal to the width of the promoted left operand, michael@0: // the behavior is undefined." michael@0: mask=0; michael@0: } michael@0: idx=32-idx; michael@0: mask|=0xffffff00<length2+1; michael@0: range->start=setWeightTrail(range->start, length, UCOL_BYTE_FIRST_TAILORED); michael@0: range->end=setWeightTrail(range->end, length, maxByte); michael@0: range->count2*=countBytes; michael@0: range->length2=length; michael@0: return length; michael@0: } michael@0: michael@0: /* for uprv_sortArray: sort ranges in weight order */ michael@0: static int32_t U_CALLCONV michael@0: compareRanges(const void * /*context*/, const void *left, const void *right) { michael@0: uint32_t l, r; michael@0: michael@0: l=((const WeightRange *)left)->start; michael@0: r=((const WeightRange *)right)->start; michael@0: if(lr) { michael@0: return 1; michael@0: } else { michael@0: return 0; michael@0: } michael@0: } michael@0: michael@0: /* michael@0: * take two CE weights and calculate the michael@0: * possible ranges of weights between the two limits, excluding them michael@0: * for weights with up to 4 bytes there are up to 2*4-1=7 ranges michael@0: */ michael@0: static inline int32_t michael@0: getWeightRanges(uint32_t lowerLimit, uint32_t upperLimit, michael@0: uint32_t maxByte, uint32_t countBytes, michael@0: WeightRange ranges[7]) { michael@0: WeightRange lower[5], middle, upper[5]; /* [0] and [1] are not used - this simplifies indexing */ michael@0: uint32_t weight, trail; michael@0: int32_t length, lowerLength, upperLength, rangeCount; michael@0: michael@0: /* assume that both lowerLimit & upperLimit are not 0 */ michael@0: michael@0: /* get the lengths of the limits */ michael@0: lowerLength=lengthOfWeight(lowerLimit); michael@0: upperLength=lengthOfWeight(upperLimit); michael@0: michael@0: #ifdef UCOL_DEBUG michael@0: printf("length of lower limit 0x%08lx is %ld\n", lowerLimit, lowerLength); michael@0: printf("length of upper limit 0x%08lx is %ld\n", upperLimit, upperLength); michael@0: #endif michael@0: michael@0: if(lowerLimit>=upperLimit) { michael@0: #ifdef UCOL_DEBUG michael@0: printf("error: no space between lower & upper limits\n"); michael@0: #endif michael@0: return 0; michael@0: } michael@0: michael@0: /* check that neither is a prefix of the other */ michael@0: if(lowerLength=upperLimit has caught it */ michael@0: michael@0: /* reset local variables */ michael@0: uprv_memset(lower, 0, sizeof(lower)); michael@0: uprv_memset(&middle, 0, sizeof(middle)); michael@0: uprv_memset(upper, 0, sizeof(upper)); michael@0: michael@0: /* michael@0: * With the limit lengths of 1..4, there are up to 7 ranges for allocation: michael@0: * range minimum length michael@0: * lower[4] 4 michael@0: * lower[3] 3 michael@0: * lower[2] 2 michael@0: * middle 1 michael@0: * upper[2] 2 michael@0: * upper[3] 3 michael@0: * upper[4] 4 michael@0: * michael@0: * We are now going to calculate up to 7 ranges. michael@0: * Some of them will typically overlap, so we will then have to merge and eliminate ranges. michael@0: */ michael@0: weight=lowerLimit; michael@0: for(length=lowerLength; length>=2; --length) { michael@0: trail=getWeightTrail(weight, length); michael@0: if(trail=2; --length) { michael@0: trail=getWeightTrail(weight, length); michael@0: if(trail>UCOL_BYTE_FIRST_TAILORED) { michael@0: upper[length].start=setWeightTrail(weight, length, UCOL_BYTE_FIRST_TAILORED); michael@0: upper[length].end=decWeightTrail(weight, length); michael@0: upper[length].length=length; michael@0: upper[length].count=trail-UCOL_BYTE_FIRST_TAILORED; michael@0: } michael@0: weight=truncateWeight(weight, length-1); michael@0: } michael@0: middle.end=decWeightTrail(weight, 1); michael@0: michael@0: /* set the middle range */ michael@0: middle.length=1; michael@0: if(middle.end>=middle.start) { michael@0: middle.count=(int32_t)((middle.end-middle.start)>>24)+1; michael@0: } else { michael@0: /* eliminate overlaps */ michael@0: uint32_t start, end; michael@0: michael@0: /* remove the middle range */ michael@0: middle.count=0; michael@0: michael@0: /* reduce or remove the lower ranges that go beyond upperLimit */ michael@0: for(length=4; length>=2; --length) { michael@0: if(lower[length].count>0 && upper[length].count>0) { michael@0: start=upper[length].start; michael@0: end=lower[length].end; michael@0: michael@0: if(end>=start || incWeight(end, length, maxByte)==start) { michael@0: /* lower and upper ranges collide or are directly adjacent: merge these two and remove all shorter ranges */ michael@0: start=lower[length].start; michael@0: end=lower[length].end=upper[length].end; michael@0: /* michael@0: * merging directly adjacent ranges needs to subtract the 0/1 gaps in between; michael@0: * it may result in a range with count>countBytes michael@0: */ michael@0: lower[length].count= michael@0: (int32_t)(getWeightTrail(end, length)-getWeightTrail(start, length)+1+ michael@0: countBytes*(getWeightByte(end, length-1)-getWeightByte(start, length-1))); michael@0: upper[length].count=0; michael@0: while(--length>=2) { michael@0: lower[length].count=upper[length].count=0; michael@0: } michael@0: break; michael@0: } michael@0: } michael@0: } michael@0: } michael@0: michael@0: #ifdef UCOL_DEBUG michael@0: /* print ranges */ michael@0: for(length=4; length>=2; --length) { michael@0: if(lower[length].count>0) { michael@0: printf("lower[%ld] .start=0x%08lx .end=0x%08lx .count=%ld\n", length, lower[length].start, lower[length].end, lower[length].count); michael@0: } michael@0: } michael@0: if(middle.count>0) { michael@0: printf("middle .start=0x%08lx .end=0x%08lx .count=%ld\n", middle.start, middle.end, middle.count); michael@0: } michael@0: for(length=2; length<=4; ++length) { michael@0: if(upper[length].count>0) { michael@0: printf("upper[%ld] .start=0x%08lx .end=0x%08lx .count=%ld\n", length, upper[length].start, upper[length].end, upper[length].count); michael@0: } michael@0: } michael@0: #endif michael@0: michael@0: /* copy the ranges, shortest first, into the result array */ michael@0: rangeCount=0; michael@0: if(middle.count>0) { michael@0: uprv_memcpy(ranges, &middle, sizeof(WeightRange)); michael@0: rangeCount=1; michael@0: } michael@0: for(length=2; length<=4; ++length) { michael@0: /* copy upper first so that later the middle range is more likely the first one to use */ michael@0: if(upper[length].count>0) { michael@0: uprv_memcpy(ranges+rangeCount, upper+length, sizeof(WeightRange)); michael@0: ++rangeCount; michael@0: } michael@0: if(lower[length].count>0) { michael@0: uprv_memcpy(ranges+rangeCount, lower+length, sizeof(WeightRange)); michael@0: ++rangeCount; michael@0: } michael@0: } michael@0: return rangeCount; michael@0: } michael@0: michael@0: /* michael@0: * call getWeightRanges and then determine heuristically michael@0: * which ranges to use for a given number of weights between (excluding) michael@0: * two limits michael@0: */ michael@0: U_CFUNC int32_t michael@0: ucol_allocWeights(uint32_t lowerLimit, uint32_t upperLimit, michael@0: uint32_t n, michael@0: uint32_t maxByte, michael@0: WeightRange ranges[7]) { michael@0: /* number of usable byte values 3..maxByte */ michael@0: uint32_t countBytes=maxByte-UCOL_BYTE_FIRST_TAILORED+1; michael@0: michael@0: uint32_t lengthCounts[6]; /* [0] unused, [5] to make index checks unnecessary */ michael@0: uint32_t maxCount; michael@0: int32_t i, rangeCount, minLength/*, maxLength*/; michael@0: michael@0: /* countBytes to the power of index */ michael@0: uint32_t powers[5]; michael@0: /* gcc requires explicit initialization */ michael@0: powers[0] = 1; michael@0: powers[1] = countBytes; michael@0: powers[2] = countBytes*countBytes; michael@0: powers[3] = countBytes*countBytes*countBytes; michael@0: powers[4] = countBytes*countBytes*countBytes*countBytes; michael@0: michael@0: #ifdef UCOL_DEBUG michael@0: puts(""); michael@0: #endif michael@0: michael@0: rangeCount=getWeightRanges(lowerLimit, upperLimit, maxByte, countBytes, ranges); michael@0: if(rangeCount<=0) { michael@0: #ifdef UCOL_DEBUG michael@0: printf("error: unable to get Weight ranges\n"); michael@0: #endif michael@0: return 0; michael@0: } michael@0: michael@0: /* what is the maximum number of weights with these ranges? */ michael@0: maxCount=0; michael@0: for(i=0; i=n) { michael@0: #ifdef UCOL_DEBUG michael@0: printf("the maximum number of %lu weights is sufficient for n=%lu\n", maxCount, n); michael@0: #endif michael@0: } else { michael@0: #ifdef UCOL_DEBUG michael@0: printf("error: the maximum number of %lu weights is insufficient for n=%lu\n", maxCount, n); michael@0: #endif michael@0: return 0; michael@0: } michael@0: michael@0: /* set the length2 and count2 fields */ michael@0: for(i=0; imaxCount); michael@0: #ifdef UCOL_DEBUG michael@0: printf("take first %ld ranges\n", rangeCount); michael@0: #endif michael@0: break; michael@0: } else if(n<=ranges[0].count2*countBytes) { michael@0: /* easy case, just make this one range large enough by lengthening it once more, possibly split it */ michael@0: uint32_t count1, count2, power_1, power; michael@0: michael@0: /*maxLength=minLength+1;*/ michael@0: michael@0: /* calculate how to split the range between maxLength-1 (count1) and maxLength (count2) */ michael@0: power_1=powers[minLength-ranges[0].length]; michael@0: power=power_1*countBytes; michael@0: count2=(n+power-1)/power; michael@0: count1=ranges[0].count-count2; michael@0: michael@0: /* split the range */ michael@0: #ifdef UCOL_DEBUG michael@0: printf("split the first range %ld:%ld\n", count1, count2); michael@0: #endif michael@0: if(count1<1) { michael@0: rangeCount=1; michael@0: michael@0: /* lengthen the entire range to maxLength */ michael@0: lengthenRange(ranges, maxByte, countBytes); michael@0: } else { michael@0: /* really split the range */ michael@0: uint32_t byte; michael@0: michael@0: /* create a new range with the end and initial and current length of the old one */ michael@0: rangeCount=2; michael@0: ranges[1].end=ranges[0].end; michael@0: ranges[1].length=ranges[0].length; michael@0: ranges[1].length2=minLength; michael@0: michael@0: /* set the end of the first range according to count1 */ michael@0: i=ranges[0].length; michael@0: byte=getWeightByte(ranges[0].start, i)+count1-1; michael@0: michael@0: /* michael@0: * ranges[0].count and count1 may be >countBytes michael@0: * from merging adjacent ranges; michael@0: * byte>maxByte is possible michael@0: */ michael@0: if(byte<=maxByte) { michael@0: ranges[0].end=setWeightByte(ranges[0].start, i, byte); michael@0: } else /* byte>maxByte */ { michael@0: ranges[0].end=setWeightByte(incWeight(ranges[0].start, i-1, maxByte), i, byte-countBytes); michael@0: } michael@0: michael@0: /* set the bytes in the end weight at length+1..length2 to maxByte */ michael@0: byte=(maxByte<<24)|(maxByte<<16)|(maxByte<<8)|maxByte; /* this used to be 0xffffffff */ michael@0: ranges[0].end=truncateWeight(ranges[0].end, i)| michael@0: ((byte>>(8*i))&(byte<<(8*(4-minLength)))); michael@0: michael@0: /* set the start of the second range to immediately follow the end of the first one */ michael@0: ranges[1].start=incWeight(ranges[0].end, minLength, maxByte); michael@0: michael@0: /* set the count values (informational) */ michael@0: ranges[0].count=count1; michael@0: ranges[1].count=count2; michael@0: michael@0: ranges[0].count2=count1*power_1; michael@0: ranges[1].count2=count2*power_1; /* will be *countBytes when lengthened */ michael@0: michael@0: /* lengthen the second range to maxLength */ michael@0: lengthenRange(ranges+1, maxByte, countBytes); michael@0: } michael@0: break; michael@0: } michael@0: michael@0: /* no good match, lengthen all minLength ranges and iterate */ michael@0: #ifdef UCOL_DEBUG michael@0: printf("lengthen the short ranges from %ld bytes to %ld and iterate\n", minLength, minLength+1); michael@0: #endif michael@0: for(i=0; ranges[i].length2==minLength; ++i) { michael@0: lengthenRange(ranges+i, maxByte, countBytes); michael@0: } michael@0: } michael@0: michael@0: if(rangeCount>1) { michael@0: /* sort the ranges by weight values */ michael@0: UErrorCode errorCode=U_ZERO_ERROR; michael@0: uprv_sortArray(ranges, rangeCount, sizeof(WeightRange), compareRanges, NULL, FALSE, &errorCode); michael@0: /* ignore error code: we know that the internal sort function will not fail here */ michael@0: } michael@0: michael@0: #ifdef UCOL_DEBUG michael@0: puts("final ranges:"); michael@0: for(i=0; i0) { michael@0: uprv_memmove(ranges, ranges+1, *pRangeCount*sizeof(WeightRange)); michael@0: ranges[0].count=maxByte; /* keep maxByte in ranges[0] */ michael@0: } michael@0: } else { michael@0: /* increment the weight for the next value */ michael@0: ranges[0].start=incWeight(weight, ranges[0].length2, maxByte); michael@0: } michael@0: michael@0: return weight; michael@0: } michael@0: } michael@0: michael@0: #if 0 // #ifdef UCOL_DEBUG michael@0: michael@0: static void michael@0: testAlloc(uint32_t lowerLimit, uint32_t upperLimit, uint32_t n, UBool enumerate) { michael@0: WeightRange ranges[8]; michael@0: int32_t rangeCount; michael@0: michael@0: rangeCount=ucol_allocWeights(lowerLimit, upperLimit, n, ranges); michael@0: if(enumerate) { michael@0: uint32_t weight; michael@0: michael@0: while(n>0) { michael@0: weight=ucol_nextWeight(ranges, &rangeCount); michael@0: if(weight==0xffffffff) { michael@0: printf("error: 0xffffffff with %lu more weights to go\n", n); michael@0: break; michael@0: } michael@0: printf(" 0x%08lx\n", weight); michael@0: --n; michael@0: } michael@0: } michael@0: } michael@0: michael@0: extern int michael@0: main(int argc, const char *argv[]) { michael@0: #if 0 michael@0: #endif michael@0: testAlloc(0x364214fc, 0x44b87d23, 5, FALSE); michael@0: testAlloc(0x36421500, 0x44b87d23, 5, FALSE); michael@0: testAlloc(0x36421500, 0x44b87d23, 20, FALSE); michael@0: testAlloc(0x36421500, 0x44b87d23, 13700, FALSE); michael@0: testAlloc(0x36421500, 0x38b87d23, 1, FALSE); michael@0: testAlloc(0x36421500, 0x38b87d23, 20, FALSE); michael@0: testAlloc(0x36421500, 0x38b87d23, 200, TRUE); michael@0: testAlloc(0x36421500, 0x38b87d23, 13700, FALSE); michael@0: testAlloc(0x36421500, 0x37b87d23, 13700, FALSE); michael@0: testAlloc(0x36ef1500, 0x37b87d23, 13700, FALSE); michael@0: testAlloc(0x36421500, 0x36b87d23, 13700, FALSE); michael@0: testAlloc(0x36b87122, 0x36b87d23, 13700, FALSE); michael@0: testAlloc(0x49000000, 0x4a600000, 13700, FALSE); michael@0: testAlloc(0x9fffffff, 0xd0000000, 13700, FALSE); michael@0: testAlloc(0x9fffffff, 0xd0000000, 67400, FALSE); michael@0: testAlloc(0x9fffffff, 0xa0030000, 67400, FALSE); michael@0: testAlloc(0x9fffffff, 0xa0030000, 40000, FALSE); michael@0: testAlloc(0xa0000000, 0xa0030000, 40000, FALSE); michael@0: testAlloc(0xa0031100, 0xa0030000, 40000, FALSE); michael@0: #if 0 michael@0: #endif michael@0: return 0; michael@0: } michael@0: michael@0: #endif michael@0: michael@0: #endif /* #if !UCONFIG_NO_COLLATION */