1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/tools/toolutil/denseranges.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,158 @@ 1.4 +/* 1.5 +******************************************************************************* 1.6 +* Copyright (C) 2010, International Business Machines 1.7 +* Corporation and others. All Rights Reserved. 1.8 +******************************************************************************* 1.9 +* file name: denseranges.cpp 1.10 +* encoding: US-ASCII 1.11 +* tab size: 8 (not used) 1.12 +* indentation:4 1.13 +* 1.14 +* created on: 2010sep25 1.15 +* created by: Markus W. Scherer 1.16 +* 1.17 +* Helper code for finding a small number of dense ranges. 1.18 +*/ 1.19 + 1.20 +#include "unicode/utypes.h" 1.21 +#include "denseranges.h" 1.22 + 1.23 +// Definitions in the anonymous namespace are invisible outside this file. 1.24 +namespace { 1.25 + 1.26 +/** 1.27 + * Collect up to 15 range gaps and sort them by ascending gap size. 1.28 + */ 1.29 +class LargestGaps { 1.30 +public: 1.31 + LargestGaps(int32_t max) : maxLength(max<=kCapacity ? max : kCapacity), length(0) {} 1.32 + 1.33 + void add(int32_t gapStart, int64_t gapLength) { 1.34 + int32_t i=length; 1.35 + while(i>0 && gapLength>gapLengths[i-1]) { 1.36 + --i; 1.37 + } 1.38 + if(i<maxLength) { 1.39 + // The new gap is now one of the maxLength largest. 1.40 + // Insert the new gap, moving up smaller ones of the previous 1.41 + // length largest. 1.42 + int32_t j= length<maxLength ? length++ : maxLength-1; 1.43 + while(j>i) { 1.44 + gapStarts[j]=gapStarts[j-1]; 1.45 + gapLengths[j]=gapLengths[j-1]; 1.46 + --j; 1.47 + } 1.48 + gapStarts[i]=gapStart; 1.49 + gapLengths[i]=gapLength; 1.50 + } 1.51 + } 1.52 + 1.53 + void truncate(int32_t newLength) { 1.54 + if(newLength<length) { 1.55 + length=newLength; 1.56 + } 1.57 + } 1.58 + 1.59 + int32_t count() const { return length; } 1.60 + int32_t gapStart(int32_t i) const { return gapStarts[i]; } 1.61 + int64_t gapLength(int32_t i) const { return gapLengths[i]; } 1.62 + 1.63 + int32_t firstAfter(int32_t value) const { 1.64 + if(length==0) { 1.65 + return -1; 1.66 + } 1.67 + int32_t minValue=0; 1.68 + int32_t minIndex=-1; 1.69 + for(int32_t i=0; i<length; ++i) { 1.70 + if(value<gapStarts[i] && (minIndex<0 || gapStarts[i]<minValue)) { 1.71 + minValue=gapStarts[i]; 1.72 + minIndex=i; 1.73 + } 1.74 + } 1.75 + return minIndex; 1.76 + } 1.77 + 1.78 +private: 1.79 + static const int32_t kCapacity=15; 1.80 + 1.81 + int32_t maxLength; 1.82 + int32_t length; 1.83 + int32_t gapStarts[kCapacity]; 1.84 + int64_t gapLengths[kCapacity]; 1.85 +}; 1.86 + 1.87 +} // namespace 1.88 + 1.89 +/** 1.90 + * Does it make sense to write 1..capacity ranges? 1.91 + * Returns 0 if not, otherwise the number of ranges. 1.92 + * @param values Sorted array of signed-integer values. 1.93 + * @param length Number of values. 1.94 + * @param density Minimum average range density, in 256th. (0x100=100%=perfectly dense.) 1.95 + * Should be 0x80..0x100, must be 1..0x100. 1.96 + * @param ranges Output ranges array. 1.97 + * @param capacity Maximum number of ranges. 1.98 + * @return Minimum number of ranges (at most capacity) that have the desired density, 1.99 + * or 0 if that density cannot be achieved. 1.100 + */ 1.101 +U_CAPI int32_t U_EXPORT2 1.102 +uprv_makeDenseRanges(const int32_t values[], int32_t length, 1.103 + int32_t density, 1.104 + int32_t ranges[][2], int32_t capacity) { 1.105 + if(length<=2) { 1.106 + return 0; 1.107 + } 1.108 + int32_t minValue=values[0]; 1.109 + int32_t maxValue=values[length-1]; // Assume minValue<=maxValue. 1.110 + // Use int64_t variables for intermediate-value precision and to avoid 1.111 + // signed-int32_t overflow of maxValue-minValue. 1.112 + int64_t maxLength=(int64_t)maxValue-(int64_t)minValue+1; 1.113 + if(length>=(density*maxLength)/0x100) { 1.114 + // Use one range. 1.115 + ranges[0][0]=minValue; 1.116 + ranges[0][1]=maxValue; 1.117 + return 1; 1.118 + } 1.119 + if(length<=4) { 1.120 + return 0; 1.121 + } 1.122 + // See if we can split [minValue, maxValue] into 2..capacity ranges, 1.123 + // divided by the 1..(capacity-1) largest gaps. 1.124 + LargestGaps gaps(capacity-1); 1.125 + int32_t i; 1.126 + int32_t expectedValue=minValue; 1.127 + for(i=1; i<length; ++i) { 1.128 + ++expectedValue; 1.129 + int32_t actualValue=values[i]; 1.130 + if(expectedValue!=actualValue) { 1.131 + gaps.add(expectedValue, (int64_t)actualValue-(int64_t)expectedValue); 1.132 + expectedValue=actualValue; 1.133 + } 1.134 + } 1.135 + // We know gaps.count()>=1 because we have fewer values (length) than 1.136 + // the length of the [minValue..maxValue] range (maxLength). 1.137 + // (Otherwise we would have returned with the one range above.) 1.138 + int32_t num; 1.139 + for(i=0, num=2;; ++i, ++num) { 1.140 + if(i>=gaps.count()) { 1.141 + // The values are too sparse for capacity or fewer ranges 1.142 + // of the requested density. 1.143 + return 0; 1.144 + } 1.145 + maxLength-=gaps.gapLength(i); 1.146 + if(length>num*2 && length>=(density*maxLength)/0x100) { 1.147 + break; 1.148 + } 1.149 + } 1.150 + // Use the num ranges with the num-1 largest gaps. 1.151 + gaps.truncate(num-1); 1.152 + ranges[0][0]=minValue; 1.153 + for(i=0; i<=num-2; ++i) { 1.154 + int32_t gapIndex=gaps.firstAfter(minValue); 1.155 + int32_t gapStart=gaps.gapStart(gapIndex); 1.156 + ranges[i][1]=gapStart-1; 1.157 + ranges[i+1][0]=minValue=(int32_t)(gapStart+gaps.gapLength(gapIndex)); 1.158 + } 1.159 + ranges[num-1][1]=maxValue; 1.160 + return num; 1.161 +}