|
1 /* |
|
2 ******************************************************************************* |
|
3 * Copyright (C) 2010, International Business Machines |
|
4 * Corporation and others. All Rights Reserved. |
|
5 ******************************************************************************* |
|
6 * file name: denseranges.cpp |
|
7 * encoding: US-ASCII |
|
8 * tab size: 8 (not used) |
|
9 * indentation:4 |
|
10 * |
|
11 * created on: 2010sep25 |
|
12 * created by: Markus W. Scherer |
|
13 * |
|
14 * Helper code for finding a small number of dense ranges. |
|
15 */ |
|
16 |
|
17 #include "unicode/utypes.h" |
|
18 #include "denseranges.h" |
|
19 |
|
20 // Definitions in the anonymous namespace are invisible outside this file. |
|
21 namespace { |
|
22 |
|
23 /** |
|
24 * Collect up to 15 range gaps and sort them by ascending gap size. |
|
25 */ |
|
26 class LargestGaps { |
|
27 public: |
|
28 LargestGaps(int32_t max) : maxLength(max<=kCapacity ? max : kCapacity), length(0) {} |
|
29 |
|
30 void add(int32_t gapStart, int64_t gapLength) { |
|
31 int32_t i=length; |
|
32 while(i>0 && gapLength>gapLengths[i-1]) { |
|
33 --i; |
|
34 } |
|
35 if(i<maxLength) { |
|
36 // The new gap is now one of the maxLength largest. |
|
37 // Insert the new gap, moving up smaller ones of the previous |
|
38 // length largest. |
|
39 int32_t j= length<maxLength ? length++ : maxLength-1; |
|
40 while(j>i) { |
|
41 gapStarts[j]=gapStarts[j-1]; |
|
42 gapLengths[j]=gapLengths[j-1]; |
|
43 --j; |
|
44 } |
|
45 gapStarts[i]=gapStart; |
|
46 gapLengths[i]=gapLength; |
|
47 } |
|
48 } |
|
49 |
|
50 void truncate(int32_t newLength) { |
|
51 if(newLength<length) { |
|
52 length=newLength; |
|
53 } |
|
54 } |
|
55 |
|
56 int32_t count() const { return length; } |
|
57 int32_t gapStart(int32_t i) const { return gapStarts[i]; } |
|
58 int64_t gapLength(int32_t i) const { return gapLengths[i]; } |
|
59 |
|
60 int32_t firstAfter(int32_t value) const { |
|
61 if(length==0) { |
|
62 return -1; |
|
63 } |
|
64 int32_t minValue=0; |
|
65 int32_t minIndex=-1; |
|
66 for(int32_t i=0; i<length; ++i) { |
|
67 if(value<gapStarts[i] && (minIndex<0 || gapStarts[i]<minValue)) { |
|
68 minValue=gapStarts[i]; |
|
69 minIndex=i; |
|
70 } |
|
71 } |
|
72 return minIndex; |
|
73 } |
|
74 |
|
75 private: |
|
76 static const int32_t kCapacity=15; |
|
77 |
|
78 int32_t maxLength; |
|
79 int32_t length; |
|
80 int32_t gapStarts[kCapacity]; |
|
81 int64_t gapLengths[kCapacity]; |
|
82 }; |
|
83 |
|
84 } // namespace |
|
85 |
|
86 /** |
|
87 * Does it make sense to write 1..capacity ranges? |
|
88 * Returns 0 if not, otherwise the number of ranges. |
|
89 * @param values Sorted array of signed-integer values. |
|
90 * @param length Number of values. |
|
91 * @param density Minimum average range density, in 256th. (0x100=100%=perfectly dense.) |
|
92 * Should be 0x80..0x100, must be 1..0x100. |
|
93 * @param ranges Output ranges array. |
|
94 * @param capacity Maximum number of ranges. |
|
95 * @return Minimum number of ranges (at most capacity) that have the desired density, |
|
96 * or 0 if that density cannot be achieved. |
|
97 */ |
|
98 U_CAPI int32_t U_EXPORT2 |
|
99 uprv_makeDenseRanges(const int32_t values[], int32_t length, |
|
100 int32_t density, |
|
101 int32_t ranges[][2], int32_t capacity) { |
|
102 if(length<=2) { |
|
103 return 0; |
|
104 } |
|
105 int32_t minValue=values[0]; |
|
106 int32_t maxValue=values[length-1]; // Assume minValue<=maxValue. |
|
107 // Use int64_t variables for intermediate-value precision and to avoid |
|
108 // signed-int32_t overflow of maxValue-minValue. |
|
109 int64_t maxLength=(int64_t)maxValue-(int64_t)minValue+1; |
|
110 if(length>=(density*maxLength)/0x100) { |
|
111 // Use one range. |
|
112 ranges[0][0]=minValue; |
|
113 ranges[0][1]=maxValue; |
|
114 return 1; |
|
115 } |
|
116 if(length<=4) { |
|
117 return 0; |
|
118 } |
|
119 // See if we can split [minValue, maxValue] into 2..capacity ranges, |
|
120 // divided by the 1..(capacity-1) largest gaps. |
|
121 LargestGaps gaps(capacity-1); |
|
122 int32_t i; |
|
123 int32_t expectedValue=minValue; |
|
124 for(i=1; i<length; ++i) { |
|
125 ++expectedValue; |
|
126 int32_t actualValue=values[i]; |
|
127 if(expectedValue!=actualValue) { |
|
128 gaps.add(expectedValue, (int64_t)actualValue-(int64_t)expectedValue); |
|
129 expectedValue=actualValue; |
|
130 } |
|
131 } |
|
132 // We know gaps.count()>=1 because we have fewer values (length) than |
|
133 // the length of the [minValue..maxValue] range (maxLength). |
|
134 // (Otherwise we would have returned with the one range above.) |
|
135 int32_t num; |
|
136 for(i=0, num=2;; ++i, ++num) { |
|
137 if(i>=gaps.count()) { |
|
138 // The values are too sparse for capacity or fewer ranges |
|
139 // of the requested density. |
|
140 return 0; |
|
141 } |
|
142 maxLength-=gaps.gapLength(i); |
|
143 if(length>num*2 && length>=(density*maxLength)/0x100) { |
|
144 break; |
|
145 } |
|
146 } |
|
147 // Use the num ranges with the num-1 largest gaps. |
|
148 gaps.truncate(num-1); |
|
149 ranges[0][0]=minValue; |
|
150 for(i=0; i<=num-2; ++i) { |
|
151 int32_t gapIndex=gaps.firstAfter(minValue); |
|
152 int32_t gapStart=gaps.gapStart(gapIndex); |
|
153 ranges[i][1]=gapStart-1; |
|
154 ranges[i+1][0]=minValue=(int32_t)(gapStart+gaps.gapLength(gapIndex)); |
|
155 } |
|
156 ranges[num-1][1]=maxValue; |
|
157 return num; |
|
158 } |