intl/icu/source/tools/toolutil/denseranges.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/tools/toolutil/denseranges.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,158 @@
     1.4 +/*
     1.5 +*******************************************************************************
     1.6 +*   Copyright (C) 2010, International Business Machines
     1.7 +*   Corporation and others.  All Rights Reserved.
     1.8 +*******************************************************************************
     1.9 +*   file name:  denseranges.cpp
    1.10 +*   encoding:   US-ASCII
    1.11 +*   tab size:   8 (not used)
    1.12 +*   indentation:4
    1.13 +*
    1.14 +*   created on: 2010sep25
    1.15 +*   created by: Markus W. Scherer
    1.16 +*
    1.17 +* Helper code for finding a small number of dense ranges.
    1.18 +*/
    1.19 +
    1.20 +#include "unicode/utypes.h"
    1.21 +#include "denseranges.h"
    1.22 +
    1.23 +// Definitions in the anonymous namespace are invisible outside this file.
    1.24 +namespace {
    1.25 +
    1.26 +/**
    1.27 + * Collect up to 15 range gaps and sort them by ascending gap size.
    1.28 + */
    1.29 +class LargestGaps {
    1.30 +public:
    1.31 +    LargestGaps(int32_t max) : maxLength(max<=kCapacity ? max : kCapacity), length(0) {}
    1.32 +
    1.33 +    void add(int32_t gapStart, int64_t gapLength) {
    1.34 +        int32_t i=length;
    1.35 +        while(i>0 && gapLength>gapLengths[i-1]) {
    1.36 +            --i;
    1.37 +        }
    1.38 +        if(i<maxLength) {
    1.39 +            // The new gap is now one of the maxLength largest.
    1.40 +            // Insert the new gap, moving up smaller ones of the previous
    1.41 +            // length largest.
    1.42 +            int32_t j= length<maxLength ? length++ : maxLength-1;
    1.43 +            while(j>i) {
    1.44 +                gapStarts[j]=gapStarts[j-1];
    1.45 +                gapLengths[j]=gapLengths[j-1];
    1.46 +                --j;
    1.47 +            }
    1.48 +            gapStarts[i]=gapStart;
    1.49 +            gapLengths[i]=gapLength;
    1.50 +        }
    1.51 +    }
    1.52 +
    1.53 +    void truncate(int32_t newLength) {
    1.54 +        if(newLength<length) {
    1.55 +            length=newLength;
    1.56 +        }
    1.57 +    }
    1.58 +
    1.59 +    int32_t count() const { return length; }
    1.60 +    int32_t gapStart(int32_t i) const { return gapStarts[i]; }
    1.61 +    int64_t gapLength(int32_t i) const { return gapLengths[i]; }
    1.62 +
    1.63 +    int32_t firstAfter(int32_t value) const {
    1.64 +        if(length==0) {
    1.65 +            return -1;
    1.66 +        }
    1.67 +        int32_t minValue=0;
    1.68 +        int32_t minIndex=-1;
    1.69 +        for(int32_t i=0; i<length; ++i) {
    1.70 +            if(value<gapStarts[i] && (minIndex<0 || gapStarts[i]<minValue)) {
    1.71 +                minValue=gapStarts[i];
    1.72 +                minIndex=i;
    1.73 +            }
    1.74 +        }
    1.75 +        return minIndex;
    1.76 +    }
    1.77 +
    1.78 +private:
    1.79 +    static const int32_t kCapacity=15;
    1.80 +
    1.81 +    int32_t maxLength;
    1.82 +    int32_t length;
    1.83 +    int32_t gapStarts[kCapacity];
    1.84 +    int64_t gapLengths[kCapacity];
    1.85 +};
    1.86 +
    1.87 +}  // namespace
    1.88 +
    1.89 +/**
    1.90 + * Does it make sense to write 1..capacity ranges?
    1.91 + * Returns 0 if not, otherwise the number of ranges.
    1.92 + * @param values Sorted array of signed-integer values.
    1.93 + * @param length Number of values.
    1.94 + * @param density Minimum average range density, in 256th. (0x100=100%=perfectly dense.)
    1.95 + *                Should be 0x80..0x100, must be 1..0x100.
    1.96 + * @param ranges Output ranges array.
    1.97 + * @param capacity Maximum number of ranges.
    1.98 + * @return Minimum number of ranges (at most capacity) that have the desired density,
    1.99 + *         or 0 if that density cannot be achieved.
   1.100 + */
   1.101 +U_CAPI int32_t U_EXPORT2
   1.102 +uprv_makeDenseRanges(const int32_t values[], int32_t length,
   1.103 +                     int32_t density,
   1.104 +                     int32_t ranges[][2], int32_t capacity) {
   1.105 +    if(length<=2) {
   1.106 +        return 0;
   1.107 +    }
   1.108 +    int32_t minValue=values[0];
   1.109 +    int32_t maxValue=values[length-1];  // Assume minValue<=maxValue.
   1.110 +    // Use int64_t variables for intermediate-value precision and to avoid
   1.111 +    // signed-int32_t overflow of maxValue-minValue.
   1.112 +    int64_t maxLength=(int64_t)maxValue-(int64_t)minValue+1;
   1.113 +    if(length>=(density*maxLength)/0x100) {
   1.114 +        // Use one range.
   1.115 +        ranges[0][0]=minValue;
   1.116 +        ranges[0][1]=maxValue;
   1.117 +        return 1;
   1.118 +    }
   1.119 +    if(length<=4) {
   1.120 +        return 0;
   1.121 +    }
   1.122 +    // See if we can split [minValue, maxValue] into 2..capacity ranges,
   1.123 +    // divided by the 1..(capacity-1) largest gaps.
   1.124 +    LargestGaps gaps(capacity-1);
   1.125 +    int32_t i;
   1.126 +    int32_t expectedValue=minValue;
   1.127 +    for(i=1; i<length; ++i) {
   1.128 +        ++expectedValue;
   1.129 +        int32_t actualValue=values[i];
   1.130 +        if(expectedValue!=actualValue) {
   1.131 +            gaps.add(expectedValue, (int64_t)actualValue-(int64_t)expectedValue);
   1.132 +            expectedValue=actualValue;
   1.133 +        }
   1.134 +    }
   1.135 +    // We know gaps.count()>=1 because we have fewer values (length) than
   1.136 +    // the length of the [minValue..maxValue] range (maxLength).
   1.137 +    // (Otherwise we would have returned with the one range above.)
   1.138 +    int32_t num;
   1.139 +    for(i=0, num=2;; ++i, ++num) {
   1.140 +        if(i>=gaps.count()) {
   1.141 +            // The values are too sparse for capacity or fewer ranges
   1.142 +            // of the requested density.
   1.143 +            return 0;
   1.144 +        }
   1.145 +        maxLength-=gaps.gapLength(i);
   1.146 +        if(length>num*2 && length>=(density*maxLength)/0x100) {
   1.147 +            break;
   1.148 +        }
   1.149 +    }
   1.150 +    // Use the num ranges with the num-1 largest gaps.
   1.151 +    gaps.truncate(num-1);
   1.152 +    ranges[0][0]=minValue;
   1.153 +    for(i=0; i<=num-2; ++i) {
   1.154 +        int32_t gapIndex=gaps.firstAfter(minValue);
   1.155 +        int32_t gapStart=gaps.gapStart(gapIndex);
   1.156 +        ranges[i][1]=gapStart-1;
   1.157 +        ranges[i+1][0]=minValue=(int32_t)(gapStart+gaps.gapLength(gapIndex));
   1.158 +    }
   1.159 +    ranges[num-1][1]=maxValue;
   1.160 +    return num;
   1.161 +}

mercurial