intl/icu/source/i18n/nfrs.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/i18n/nfrs.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,971 @@
     1.4 +/*
     1.5 +******************************************************************************
     1.6 +*   Copyright (C) 1997-2012, International Business Machines
     1.7 +*   Corporation and others.  All Rights Reserved.
     1.8 +******************************************************************************
     1.9 +*   file name:  nfrs.cpp
    1.10 +*   encoding:   US-ASCII
    1.11 +*   tab size:   8 (not used)
    1.12 +*   indentation:4
    1.13 +*
    1.14 +* Modification history
    1.15 +* Date        Name      Comments
    1.16 +* 10/11/2001  Doug      Ported from ICU4J
    1.17 +*/
    1.18 +
    1.19 +#include "nfrs.h"
    1.20 +
    1.21 +#if U_HAVE_RBNF
    1.22 +
    1.23 +#include "unicode/uchar.h"
    1.24 +#include "nfrule.h"
    1.25 +#include "nfrlist.h"
    1.26 +#include "patternprops.h"
    1.27 +
    1.28 +#ifdef RBNF_DEBUG
    1.29 +#include "cmemory.h"
    1.30 +#endif
    1.31 +
    1.32 +U_NAMESPACE_BEGIN
    1.33 +
    1.34 +#if 0
    1.35 +// euclid's algorithm works with doubles
    1.36 +// note, doubles only get us up to one quadrillion or so, which
    1.37 +// isn't as much range as we get with longs.  We probably still
    1.38 +// want either 64-bit math, or BigInteger.
    1.39 +
    1.40 +static int64_t
    1.41 +util_lcm(int64_t x, int64_t y)
    1.42 +{
    1.43 +    x.abs();
    1.44 +    y.abs();
    1.45 +
    1.46 +    if (x == 0 || y == 0) {
    1.47 +        return 0;
    1.48 +    } else {
    1.49 +        do {
    1.50 +            if (x < y) {
    1.51 +                int64_t t = x; x = y; y = t;
    1.52 +            }
    1.53 +            x -= y * (x/y);
    1.54 +        } while (x != 0);
    1.55 +
    1.56 +        return y;
    1.57 +    }
    1.58 +}
    1.59 +
    1.60 +#else
    1.61 +/**
    1.62 + * Calculates the least common multiple of x and y.
    1.63 + */
    1.64 +static int64_t
    1.65 +util_lcm(int64_t x, int64_t y)
    1.66 +{
    1.67 +    // binary gcd algorithm from Knuth, "The Art of Computer Programming,"
    1.68 +    // vol. 2, 1st ed., pp. 298-299
    1.69 +    int64_t x1 = x;
    1.70 +    int64_t y1 = y;
    1.71 +
    1.72 +    int p2 = 0;
    1.73 +    while ((x1 & 1) == 0 && (y1 & 1) == 0) {
    1.74 +        ++p2;
    1.75 +        x1 >>= 1;
    1.76 +        y1 >>= 1;
    1.77 +    }
    1.78 +
    1.79 +    int64_t t;
    1.80 +    if ((x1 & 1) == 1) {
    1.81 +        t = -y1;
    1.82 +    } else {
    1.83 +        t = x1;
    1.84 +    }
    1.85 +
    1.86 +    while (t != 0) {
    1.87 +        while ((t & 1) == 0) {
    1.88 +            t = t >> 1;
    1.89 +        }
    1.90 +        if (t > 0) {
    1.91 +            x1 = t;
    1.92 +        } else {
    1.93 +            y1 = -t;
    1.94 +        }
    1.95 +        t = x1 - y1;
    1.96 +    }
    1.97 +
    1.98 +    int64_t gcd = x1 << p2;
    1.99 +
   1.100 +    // x * y == gcd(x, y) * lcm(x, y)
   1.101 +    return x / gcd * y;
   1.102 +}
   1.103 +#endif
   1.104 +
   1.105 +static const UChar gPercent = 0x0025;
   1.106 +static const UChar gColon = 0x003a;
   1.107 +static const UChar gSemicolon = 0x003b;
   1.108 +static const UChar gLineFeed = 0x000a;
   1.109 +
   1.110 +static const UChar gFourSpaces[] =
   1.111 +{
   1.112 +    0x20, 0x20, 0x20, 0x20, 0
   1.113 +}; /* "    " */
   1.114 +static const UChar gPercentPercent[] =
   1.115 +{
   1.116 +    0x25, 0x25, 0
   1.117 +}; /* "%%" */
   1.118 +
   1.119 +static const UChar gNoparse[] =
   1.120 +{
   1.121 +    0x40, 0x6E, 0x6F, 0x70, 0x61, 0x72, 0x73, 0x65, 0
   1.122 +}; /* "@noparse" */
   1.123 +
   1.124 +NFRuleSet::NFRuleSet(UnicodeString* descriptions, int32_t index, UErrorCode& status)
   1.125 +  : name()
   1.126 +  , rules(0)
   1.127 +  , negativeNumberRule(NULL)
   1.128 +  , fIsFractionRuleSet(FALSE)
   1.129 +  , fIsPublic(FALSE)
   1.130 +  , fIsParseable(TRUE)
   1.131 +  , fRecursionCount(0)
   1.132 +{
   1.133 +    for (int i = 0; i < 3; ++i) {
   1.134 +        fractionRules[i] = NULL;
   1.135 +    }
   1.136 +
   1.137 +    if (U_FAILURE(status)) {
   1.138 +        return;
   1.139 +    }
   1.140 +
   1.141 +    UnicodeString& description = descriptions[index]; // !!! make sure index is valid
   1.142 +
   1.143 +    if (description.length() == 0) {
   1.144 +        // throw new IllegalArgumentException("Empty rule set description");
   1.145 +        status = U_PARSE_ERROR;
   1.146 +        return;
   1.147 +    }
   1.148 +
   1.149 +    // if the description begins with a rule set name (the rule set
   1.150 +    // name can be omitted in formatter descriptions that consist
   1.151 +    // of only one rule set), copy it out into our "name" member
   1.152 +    // and delete it from the description
   1.153 +    if (description.charAt(0) == gPercent) {
   1.154 +        int32_t pos = description.indexOf(gColon);
   1.155 +        if (pos == -1) {
   1.156 +            // throw new IllegalArgumentException("Rule set name doesn't end in colon");
   1.157 +            status = U_PARSE_ERROR;
   1.158 +        } else {
   1.159 +            name.setTo(description, 0, pos);
   1.160 +            while (pos < description.length() && PatternProps::isWhiteSpace(description.charAt(++pos))) {
   1.161 +            }
   1.162 +            description.remove(0, pos);
   1.163 +        }
   1.164 +    } else {
   1.165 +        name.setTo(UNICODE_STRING_SIMPLE("%default"));
   1.166 +    }
   1.167 +
   1.168 +    if (description.length() == 0) {
   1.169 +        // throw new IllegalArgumentException("Empty rule set description");
   1.170 +        status = U_PARSE_ERROR;
   1.171 +    }
   1.172 +
   1.173 +    fIsPublic = name.indexOf(gPercentPercent, 2, 0) != 0;
   1.174 +
   1.175 +    if ( name.endsWith(gNoparse,8) ) {
   1.176 +        fIsParseable = FALSE;
   1.177 +        name.truncate(name.length()-8); // remove the @noparse from the name
   1.178 +    }
   1.179 +
   1.180 +    // all of the other members of NFRuleSet are initialized
   1.181 +    // by parseRules()
   1.182 +}
   1.183 +
   1.184 +void
   1.185 +NFRuleSet::parseRules(UnicodeString& description, const RuleBasedNumberFormat* owner, UErrorCode& status)
   1.186 +{
   1.187 +    // start by creating a Vector whose elements are Strings containing
   1.188 +    // the descriptions of the rules (one rule per element).  The rules
   1.189 +    // are separated by semicolons (there's no escape facility: ALL
   1.190 +    // semicolons are rule delimiters)
   1.191 +
   1.192 +    if (U_FAILURE(status)) {
   1.193 +        return;
   1.194 +    }
   1.195 +
   1.196 +    // ensure we are starting with an empty rule list
   1.197 +    rules.deleteAll();
   1.198 +
   1.199 +    // dlf - the original code kept a separate description array for no reason,
   1.200 +    // so I got rid of it.  The loop was too complex so I simplified it.
   1.201 +
   1.202 +    UnicodeString currentDescription;
   1.203 +    int32_t oldP = 0;
   1.204 +    while (oldP < description.length()) {
   1.205 +        int32_t p = description.indexOf(gSemicolon, oldP);
   1.206 +        if (p == -1) {
   1.207 +            p = description.length();
   1.208 +        }
   1.209 +        currentDescription.setTo(description, oldP, p - oldP);
   1.210 +        NFRule::makeRules(currentDescription, this, rules.last(), owner, rules, status);
   1.211 +        oldP = p + 1;
   1.212 +    }
   1.213 +
   1.214 +    // for rules that didn't specify a base value, their base values
   1.215 +    // were initialized to 0.  Make another pass through the list and
   1.216 +    // set all those rules' base values.  We also remove any special
   1.217 +    // rules from the list and put them into their own member variables
   1.218 +    int64_t defaultBaseValue = 0;
   1.219 +
   1.220 +    // (this isn't a for loop because we might be deleting items from
   1.221 +    // the vector-- we want to make sure we only increment i when
   1.222 +    // we _didn't_ delete aything from the vector)
   1.223 +    uint32_t i = 0;
   1.224 +    while (i < rules.size()) {
   1.225 +        NFRule* rule = rules[i];
   1.226 +
   1.227 +        switch (rule->getType()) {
   1.228 +            // if the rule's base value is 0, fill in a default
   1.229 +            // base value (this will be 1 plus the preceding
   1.230 +            // rule's base value for regular rule sets, and the
   1.231 +            // same as the preceding rule's base value in fraction
   1.232 +            // rule sets)
   1.233 +        case NFRule::kNoBase:
   1.234 +            rule->setBaseValue(defaultBaseValue, status);
   1.235 +            if (!isFractionRuleSet()) {
   1.236 +                ++defaultBaseValue;
   1.237 +            }
   1.238 +            ++i;
   1.239 +            break;
   1.240 +
   1.241 +            // if it's the negative-number rule, copy it into its own
   1.242 +            // data member and delete it from the list
   1.243 +        case NFRule::kNegativeNumberRule:
   1.244 +            if (negativeNumberRule) {
   1.245 +                delete negativeNumberRule;
   1.246 +            }
   1.247 +            negativeNumberRule = rules.remove(i);
   1.248 +            break;
   1.249 +
   1.250 +            // if it's the improper fraction rule, copy it into the
   1.251 +            // correct element of fractionRules
   1.252 +        case NFRule::kImproperFractionRule:
   1.253 +            if (fractionRules[0]) {
   1.254 +                delete fractionRules[0];
   1.255 +            }
   1.256 +            fractionRules[0] = rules.remove(i);
   1.257 +            break;
   1.258 +
   1.259 +            // if it's the proper fraction rule, copy it into the
   1.260 +            // correct element of fractionRules
   1.261 +        case NFRule::kProperFractionRule:
   1.262 +            if (fractionRules[1]) {
   1.263 +                delete fractionRules[1];
   1.264 +            }
   1.265 +            fractionRules[1] = rules.remove(i);
   1.266 +            break;
   1.267 +
   1.268 +            // if it's the master rule, copy it into the
   1.269 +            // correct element of fractionRules
   1.270 +        case NFRule::kMasterRule:
   1.271 +            if (fractionRules[2]) {
   1.272 +                delete fractionRules[2];
   1.273 +            }
   1.274 +            fractionRules[2] = rules.remove(i);
   1.275 +            break;
   1.276 +
   1.277 +            // if it's a regular rule that already knows its base value,
   1.278 +            // check to make sure the rules are in order, and update
   1.279 +            // the default base value for the next rule
   1.280 +        default:
   1.281 +            if (rule->getBaseValue() < defaultBaseValue) {
   1.282 +                // throw new IllegalArgumentException("Rules are not in order");
   1.283 +                status = U_PARSE_ERROR;
   1.284 +                return;
   1.285 +            }
   1.286 +            defaultBaseValue = rule->getBaseValue();
   1.287 +            if (!isFractionRuleSet()) {
   1.288 +                ++defaultBaseValue;
   1.289 +            }
   1.290 +            ++i;
   1.291 +            break;
   1.292 +        }
   1.293 +    }
   1.294 +}
   1.295 +
   1.296 +NFRuleSet::~NFRuleSet()
   1.297 +{
   1.298 +    delete negativeNumberRule;
   1.299 +    delete fractionRules[0];
   1.300 +    delete fractionRules[1];
   1.301 +    delete fractionRules[2];
   1.302 +}
   1.303 +
   1.304 +static UBool
   1.305 +util_equalRules(const NFRule* rule1, const NFRule* rule2)
   1.306 +{
   1.307 +    if (rule1) {
   1.308 +        if (rule2) {
   1.309 +            return *rule1 == *rule2;
   1.310 +        }
   1.311 +    } else if (!rule2) {
   1.312 +        return TRUE;
   1.313 +    }
   1.314 +    return FALSE;
   1.315 +}
   1.316 +
   1.317 +UBool
   1.318 +NFRuleSet::operator==(const NFRuleSet& rhs) const
   1.319 +{
   1.320 +    if (rules.size() == rhs.rules.size() &&
   1.321 +        fIsFractionRuleSet == rhs.fIsFractionRuleSet &&
   1.322 +        name == rhs.name &&
   1.323 +        util_equalRules(negativeNumberRule, rhs.negativeNumberRule) &&
   1.324 +        util_equalRules(fractionRules[0], rhs.fractionRules[0]) &&
   1.325 +        util_equalRules(fractionRules[1], rhs.fractionRules[1]) &&
   1.326 +        util_equalRules(fractionRules[2], rhs.fractionRules[2])) {
   1.327 +
   1.328 +        for (uint32_t i = 0; i < rules.size(); ++i) {
   1.329 +            if (*rules[i] != *rhs.rules[i]) {
   1.330 +                return FALSE;
   1.331 +            }
   1.332 +        }
   1.333 +        return TRUE;
   1.334 +    }
   1.335 +    return FALSE;
   1.336 +}
   1.337 +
   1.338 +#define RECURSION_LIMIT 50
   1.339 +
   1.340 +void
   1.341 +NFRuleSet::format(int64_t number, UnicodeString& toAppendTo, int32_t pos) const
   1.342 +{
   1.343 +    NFRule *rule = findNormalRule(number);
   1.344 +    if (rule) { // else error, but can't report it
   1.345 +        NFRuleSet* ncThis = (NFRuleSet*)this;
   1.346 +        if (ncThis->fRecursionCount++ >= RECURSION_LIMIT) {
   1.347 +            // stop recursion
   1.348 +            ncThis->fRecursionCount = 0;
   1.349 +        } else {
   1.350 +            rule->doFormat(number, toAppendTo, pos);
   1.351 +            ncThis->fRecursionCount--;
   1.352 +        }
   1.353 +    }
   1.354 +}
   1.355 +
   1.356 +void
   1.357 +NFRuleSet::format(double number, UnicodeString& toAppendTo, int32_t pos) const
   1.358 +{
   1.359 +    NFRule *rule = findDoubleRule(number);
   1.360 +    if (rule) { // else error, but can't report it
   1.361 +        NFRuleSet* ncThis = (NFRuleSet*)this;
   1.362 +        if (ncThis->fRecursionCount++ >= RECURSION_LIMIT) {
   1.363 +            // stop recursion
   1.364 +            ncThis->fRecursionCount = 0;
   1.365 +        } else {
   1.366 +            rule->doFormat(number, toAppendTo, pos);
   1.367 +            ncThis->fRecursionCount--;
   1.368 +        }
   1.369 +    }
   1.370 +}
   1.371 +
   1.372 +NFRule*
   1.373 +NFRuleSet::findDoubleRule(double number) const
   1.374 +{
   1.375 +    // if this is a fraction rule set, use findFractionRuleSetRule()
   1.376 +    if (isFractionRuleSet()) {
   1.377 +        return findFractionRuleSetRule(number);
   1.378 +    }
   1.379 +
   1.380 +    // if the number is negative, return the negative number rule
   1.381 +    // (if there isn't a negative-number rule, we pretend it's a
   1.382 +    // positive number)
   1.383 +    if (number < 0) {
   1.384 +        if (negativeNumberRule) {
   1.385 +            return  negativeNumberRule;
   1.386 +        } else {
   1.387 +            number = -number;
   1.388 +        }
   1.389 +    }
   1.390 +
   1.391 +    // if the number isn't an integer, we use one of the fraction rules...
   1.392 +    if (number != uprv_floor(number)) {
   1.393 +        // if the number is between 0 and 1, return the proper
   1.394 +        // fraction rule
   1.395 +        if (number < 1 && fractionRules[1]) {
   1.396 +            return fractionRules[1];
   1.397 +        }
   1.398 +        // otherwise, return the improper fraction rule
   1.399 +        else if (fractionRules[0]) {
   1.400 +            return fractionRules[0];
   1.401 +        }
   1.402 +    }
   1.403 +
   1.404 +    // if there's a master rule, use it to format the number
   1.405 +    if (fractionRules[2]) {
   1.406 +        return fractionRules[2];
   1.407 +    }
   1.408 +
   1.409 +    // and if we haven't yet returned a rule, use findNormalRule()
   1.410 +    // to find the applicable rule
   1.411 +    int64_t r = util64_fromDouble(number + 0.5);
   1.412 +    return findNormalRule(r);
   1.413 +}
   1.414 +
   1.415 +NFRule *
   1.416 +NFRuleSet::findNormalRule(int64_t number) const
   1.417 +{
   1.418 +    // if this is a fraction rule set, use findFractionRuleSetRule()
   1.419 +    // to find the rule (we should only go into this clause if the
   1.420 +    // value is 0)
   1.421 +    if (fIsFractionRuleSet) {
   1.422 +        return findFractionRuleSetRule((double)number);
   1.423 +    }
   1.424 +
   1.425 +    // if the number is negative, return the negative-number rule
   1.426 +    // (if there isn't one, pretend the number is positive)
   1.427 +    if (number < 0) {
   1.428 +        if (negativeNumberRule) {
   1.429 +            return negativeNumberRule;
   1.430 +        } else {
   1.431 +            number = -number;
   1.432 +        }
   1.433 +    }
   1.434 +
   1.435 +    // we have to repeat the preceding two checks, even though we
   1.436 +    // do them in findRule(), because the version of format() that
   1.437 +    // takes a long bypasses findRule() and goes straight to this
   1.438 +    // function.  This function does skip the fraction rules since
   1.439 +    // we know the value is an integer (it also skips the master
   1.440 +    // rule, since it's considered a fraction rule.  Skipping the
   1.441 +    // master rule in this function is also how we avoid infinite
   1.442 +    // recursion)
   1.443 +
   1.444 +    // {dlf} unfortunately this fails if there are no rules except
   1.445 +    // special rules.  If there are no rules, use the master rule.
   1.446 +
   1.447 +    // binary-search the rule list for the applicable rule
   1.448 +    // (a rule is used for all values from its base value to
   1.449 +    // the next rule's base value)
   1.450 +    int32_t hi = rules.size();
   1.451 +    if (hi > 0) {
   1.452 +        int32_t lo = 0;
   1.453 +
   1.454 +        while (lo < hi) {
   1.455 +            int32_t mid = (lo + hi) / 2;
   1.456 +            if (rules[mid]->getBaseValue() == number) {
   1.457 +                return rules[mid];
   1.458 +            }
   1.459 +            else if (rules[mid]->getBaseValue() > number) {
   1.460 +                hi = mid;
   1.461 +            }
   1.462 +            else {
   1.463 +                lo = mid + 1;
   1.464 +            }
   1.465 +        }
   1.466 +        if (hi == 0) { // bad rule set, minimum base > 0
   1.467 +            return NULL; // want to throw exception here
   1.468 +        }
   1.469 +
   1.470 +        NFRule *result = rules[hi - 1];
   1.471 +
   1.472 +        // use shouldRollBack() to see whether we need to invoke the
   1.473 +        // rollback rule (see shouldRollBack()'s documentation for
   1.474 +        // an explanation of the rollback rule).  If we do, roll back
   1.475 +        // one rule and return that one instead of the one we'd normally
   1.476 +        // return
   1.477 +        if (result->shouldRollBack((double)number)) {
   1.478 +            if (hi == 1) { // bad rule set, no prior rule to rollback to from this base
   1.479 +                return NULL;
   1.480 +            }
   1.481 +            result = rules[hi - 2];
   1.482 +        }
   1.483 +        return result;
   1.484 +    }
   1.485 +    // else use the master rule
   1.486 +    return fractionRules[2];
   1.487 +}
   1.488 +
   1.489 +/**
   1.490 + * If this rule is a fraction rule set, this function is used by
   1.491 + * findRule() to select the most appropriate rule for formatting
   1.492 + * the number.  Basically, the base value of each rule in the rule
   1.493 + * set is treated as the denominator of a fraction.  Whichever
   1.494 + * denominator can produce the fraction closest in value to the
   1.495 + * number passed in is the result.  If there's a tie, the earlier
   1.496 + * one in the list wins.  (If there are two rules in a row with the
   1.497 + * same base value, the first one is used when the numerator of the
   1.498 + * fraction would be 1, and the second rule is used the rest of the
   1.499 + * time.
   1.500 + * @param number The number being formatted (which will always be
   1.501 + * a number between 0 and 1)
   1.502 + * @return The rule to use to format this number
   1.503 + */
   1.504 +NFRule*
   1.505 +NFRuleSet::findFractionRuleSetRule(double number) const
   1.506 +{
   1.507 +    // the obvious way to do this (multiply the value being formatted
   1.508 +    // by each rule's base value until you get an integral result)
   1.509 +    // doesn't work because of rounding error.  This method is more
   1.510 +    // accurate
   1.511 +
   1.512 +    // find the least common multiple of the rules' base values
   1.513 +    // and multiply this by the number being formatted.  This is
   1.514 +    // all the precision we need, and we can do all of the rest
   1.515 +    // of the math using integer arithmetic
   1.516 +    int64_t leastCommonMultiple = rules[0]->getBaseValue();
   1.517 +    int64_t numerator;
   1.518 +    {
   1.519 +        for (uint32_t i = 1; i < rules.size(); ++i) {
   1.520 +            leastCommonMultiple = util_lcm(leastCommonMultiple, rules[i]->getBaseValue());
   1.521 +        }
   1.522 +        numerator = util64_fromDouble(number * (double)leastCommonMultiple + 0.5);
   1.523 +    }
   1.524 +    // for each rule, do the following...
   1.525 +    int64_t tempDifference;
   1.526 +    int64_t difference = util64_fromDouble(uprv_maxMantissa());
   1.527 +    int32_t winner = 0;
   1.528 +    for (uint32_t i = 0; i < rules.size(); ++i) {
   1.529 +        // "numerator" is the numerator of the fraction if the
   1.530 +        // denominator is the LCD.  The numerator if the rule's
   1.531 +        // base value is the denominator is "numerator" times the
   1.532 +        // base value divided bythe LCD.  Here we check to see if
   1.533 +        // that's an integer, and if not, how close it is to being
   1.534 +        // an integer.
   1.535 +        tempDifference = numerator * rules[i]->getBaseValue() % leastCommonMultiple;
   1.536 +
   1.537 +
   1.538 +        // normalize the result of the above calculation: we want
   1.539 +        // the numerator's distance from the CLOSEST multiple
   1.540 +        // of the LCD
   1.541 +        if (leastCommonMultiple - tempDifference < tempDifference) {
   1.542 +            tempDifference = leastCommonMultiple - tempDifference;
   1.543 +        }
   1.544 +
   1.545 +        // if this is as close as we've come, keep track of how close
   1.546 +        // that is, and the line number of the rule that did it.  If
   1.547 +        // we've scored a direct hit, we don't have to look at any more
   1.548 +        // rules
   1.549 +        if (tempDifference < difference) {
   1.550 +            difference = tempDifference;
   1.551 +            winner = i;
   1.552 +            if (difference == 0) {
   1.553 +                break;
   1.554 +            }
   1.555 +        }
   1.556 +    }
   1.557 +
   1.558 +    // if we have two successive rules that both have the winning base
   1.559 +    // value, then the first one (the one we found above) is used if
   1.560 +    // the numerator of the fraction is 1 and the second one is used if
   1.561 +    // the numerator of the fraction is anything else (this lets us
   1.562 +    // do things like "one third"/"two thirds" without haveing to define
   1.563 +    // a whole bunch of extra rule sets)
   1.564 +    if ((unsigned)(winner + 1) < rules.size() &&
   1.565 +        rules[winner + 1]->getBaseValue() == rules[winner]->getBaseValue()) {
   1.566 +        double n = ((double)rules[winner]->getBaseValue()) * number;
   1.567 +        if (n < 0.5 || n >= 2) {
   1.568 +            ++winner;
   1.569 +        }
   1.570 +    }
   1.571 +
   1.572 +    // finally, return the winning rule
   1.573 +    return rules[winner];
   1.574 +}
   1.575 +
   1.576 +/**
   1.577 + * Parses a string.  Matches the string to be parsed against each
   1.578 + * of its rules (with a base value less than upperBound) and returns
   1.579 + * the value produced by the rule that matched the most charcters
   1.580 + * in the source string.
   1.581 + * @param text The string to parse
   1.582 + * @param parsePosition The initial position is ignored and assumed
   1.583 + * to be 0.  On exit, this object has been updated to point to the
   1.584 + * first character position this rule set didn't consume.
   1.585 + * @param upperBound Limits the rules that can be allowed to match.
   1.586 + * Only rules whose base values are strictly less than upperBound
   1.587 + * are considered.
   1.588 + * @return The numerical result of parsing this string.  This will
   1.589 + * be the matching rule's base value, composed appropriately with
   1.590 + * the results of matching any of its substitutions.  The object
   1.591 + * will be an instance of Long if it's an integral value; otherwise,
   1.592 + * it will be an instance of Double.  This function always returns
   1.593 + * a valid object: If nothing matched the input string at all,
   1.594 + * this function returns new Long(0), and the parse position is
   1.595 + * left unchanged.
   1.596 + */
   1.597 +#ifdef RBNF_DEBUG
   1.598 +#include <stdio.h>
   1.599 +
   1.600 +static void dumpUS(FILE* f, const UnicodeString& us) {
   1.601 +  int len = us.length();
   1.602 +  char* buf = (char *)uprv_malloc((len+1)*sizeof(char)); //new char[len+1];
   1.603 +  if (buf != NULL) {
   1.604 +	  us.extract(0, len, buf);
   1.605 +	  buf[len] = 0;
   1.606 +	  fprintf(f, "%s", buf);
   1.607 +	  uprv_free(buf); //delete[] buf;
   1.608 +  }
   1.609 +}
   1.610 +#endif
   1.611 +
   1.612 +UBool
   1.613 +NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result) const
   1.614 +{
   1.615 +    // try matching each rule in the rule set against the text being
   1.616 +    // parsed.  Whichever one matches the most characters is the one
   1.617 +    // that determines the value we return.
   1.618 +
   1.619 +    result.setLong(0);
   1.620 +
   1.621 +    // dump out if there's no text to parse
   1.622 +    if (text.length() == 0) {
   1.623 +        return 0;
   1.624 +    }
   1.625 +
   1.626 +    ParsePosition highWaterMark;
   1.627 +    ParsePosition workingPos = pos;
   1.628 +
   1.629 +#ifdef RBNF_DEBUG
   1.630 +    fprintf(stderr, "<nfrs> %x '", this);
   1.631 +    dumpUS(stderr, name);
   1.632 +    fprintf(stderr, "' text '");
   1.633 +    dumpUS(stderr, text);
   1.634 +    fprintf(stderr, "'\n");
   1.635 +    fprintf(stderr, "  parse negative: %d\n", this, negativeNumberRule != 0);
   1.636 +#endif
   1.637 +
   1.638 +    // start by trying the negative number rule (if there is one)
   1.639 +    if (negativeNumberRule) {
   1.640 +        Formattable tempResult;
   1.641 +#ifdef RBNF_DEBUG
   1.642 +        fprintf(stderr, "  <nfrs before negative> %x ub: %g\n", negativeNumberRule, upperBound);
   1.643 +#endif
   1.644 +        UBool success = negativeNumberRule->doParse(text, workingPos, 0, upperBound, tempResult);
   1.645 +#ifdef RBNF_DEBUG
   1.646 +        fprintf(stderr, "  <nfrs after negative> success: %d wpi: %d\n", success, workingPos.getIndex());
   1.647 +#endif
   1.648 +        if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
   1.649 +            result = tempResult;
   1.650 +            highWaterMark = workingPos;
   1.651 +        }
   1.652 +        workingPos = pos;
   1.653 +    }
   1.654 +#ifdef RBNF_DEBUG
   1.655 +    fprintf(stderr, "<nfrs> continue fractional with text '");
   1.656 +    dumpUS(stderr, text);
   1.657 +    fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex());
   1.658 +#endif
   1.659 +    // then try each of the fraction rules
   1.660 +    {
   1.661 +        for (int i = 0; i < 3; i++) {
   1.662 +            if (fractionRules[i]) {
   1.663 +                Formattable tempResult;
   1.664 +                UBool success = fractionRules[i]->doParse(text, workingPos, 0, upperBound, tempResult);
   1.665 +                if (success && (workingPos.getIndex() > highWaterMark.getIndex())) {
   1.666 +                    result = tempResult;
   1.667 +                    highWaterMark = workingPos;
   1.668 +                }
   1.669 +                workingPos = pos;
   1.670 +            }
   1.671 +        }
   1.672 +    }
   1.673 +#ifdef RBNF_DEBUG
   1.674 +    fprintf(stderr, "<nfrs> continue other with text '");
   1.675 +    dumpUS(stderr, text);
   1.676 +    fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex());
   1.677 +#endif
   1.678 +
   1.679 +    // finally, go through the regular rules one at a time.  We start
   1.680 +    // at the end of the list because we want to try matching the most
   1.681 +    // sigificant rule first (this helps ensure that we parse
   1.682 +    // "five thousand three hundred six" as
   1.683 +    // "(five thousand) (three hundred) (six)" rather than
   1.684 +    // "((five thousand three) hundred) (six)").  Skip rules whose
   1.685 +    // base values are higher than the upper bound (again, this helps
   1.686 +    // limit ambiguity by making sure the rules that match a rule's
   1.687 +    // are less significant than the rule containing the substitutions)/
   1.688 +    {
   1.689 +        int64_t ub = util64_fromDouble(upperBound);
   1.690 +#ifdef RBNF_DEBUG
   1.691 +        {
   1.692 +            char ubstr[64];
   1.693 +            util64_toa(ub, ubstr, 64);
   1.694 +            char ubstrhex[64];
   1.695 +            util64_toa(ub, ubstrhex, 64, 16);
   1.696 +            fprintf(stderr, "ub: %g, i64: %s (%s)\n", upperBound, ubstr, ubstrhex);
   1.697 +        }
   1.698 +#endif
   1.699 +        for (int32_t i = rules.size(); --i >= 0 && highWaterMark.getIndex() < text.length();) {
   1.700 +            if ((!fIsFractionRuleSet) && (rules[i]->getBaseValue() >= ub)) {
   1.701 +                continue;
   1.702 +            }
   1.703 +            Formattable tempResult;
   1.704 +            UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, tempResult);
   1.705 +            if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
   1.706 +                result = tempResult;
   1.707 +                highWaterMark = workingPos;
   1.708 +            }
   1.709 +            workingPos = pos;
   1.710 +        }
   1.711 +    }
   1.712 +#ifdef RBNF_DEBUG
   1.713 +    fprintf(stderr, "<nfrs> exit\n");
   1.714 +#endif
   1.715 +    // finally, update the parse postion we were passed to point to the
   1.716 +    // first character we didn't use, and return the result that
   1.717 +    // corresponds to that string of characters
   1.718 +    pos = highWaterMark;
   1.719 +
   1.720 +    return 1;
   1.721 +}
   1.722 +
   1.723 +void
   1.724 +NFRuleSet::appendRules(UnicodeString& result) const
   1.725 +{
   1.726 +    // the rule set name goes first...
   1.727 +    result.append(name);
   1.728 +    result.append(gColon);
   1.729 +    result.append(gLineFeed);
   1.730 +
   1.731 +    // followed by the regular rules...
   1.732 +    for (uint32_t i = 0; i < rules.size(); i++) {
   1.733 +        result.append(gFourSpaces, 4);
   1.734 +        rules[i]->_appendRuleText(result);
   1.735 +        result.append(gLineFeed);
   1.736 +    }
   1.737 +
   1.738 +    // followed by the special rules (if they exist)
   1.739 +    if (negativeNumberRule) {
   1.740 +        result.append(gFourSpaces, 4);
   1.741 +        negativeNumberRule->_appendRuleText(result);
   1.742 +        result.append(gLineFeed);
   1.743 +    }
   1.744 +
   1.745 +    {
   1.746 +        for (uint32_t i = 0; i < 3; ++i) {
   1.747 +            if (fractionRules[i]) {
   1.748 +                result.append(gFourSpaces, 4);
   1.749 +                fractionRules[i]->_appendRuleText(result);
   1.750 +                result.append(gLineFeed);
   1.751 +            }
   1.752 +        }
   1.753 +    }
   1.754 +}
   1.755 +
   1.756 +// utility functions
   1.757 +
   1.758 +int64_t util64_fromDouble(double d) {
   1.759 +    int64_t result = 0;
   1.760 +    if (!uprv_isNaN(d)) {
   1.761 +        double mant = uprv_maxMantissa();
   1.762 +        if (d < -mant) {
   1.763 +            d = -mant;
   1.764 +        } else if (d > mant) {
   1.765 +            d = mant;
   1.766 +        }
   1.767 +        UBool neg = d < 0; 
   1.768 +        if (neg) {
   1.769 +            d = -d;
   1.770 +        }
   1.771 +        result = (int64_t)uprv_floor(d);
   1.772 +        if (neg) {
   1.773 +            result = -result;
   1.774 +        }
   1.775 +    }
   1.776 +    return result;
   1.777 +}
   1.778 +
   1.779 +int64_t util64_pow(int32_t r, uint32_t e)  { 
   1.780 +    if (r == 0) {
   1.781 +        return 0;
   1.782 +    } else if (e == 0) {
   1.783 +        return 1;
   1.784 +    } else {
   1.785 +        int64_t n = r;
   1.786 +        while (--e > 0) {
   1.787 +            n *= r;
   1.788 +        }
   1.789 +        return n;
   1.790 +    }
   1.791 +}
   1.792 +
   1.793 +static const uint8_t asciiDigits[] = { 
   1.794 +    0x30u, 0x31u, 0x32u, 0x33u, 0x34u, 0x35u, 0x36u, 0x37u,
   1.795 +    0x38u, 0x39u, 0x61u, 0x62u, 0x63u, 0x64u, 0x65u, 0x66u,
   1.796 +    0x67u, 0x68u, 0x69u, 0x6au, 0x6bu, 0x6cu, 0x6du, 0x6eu,
   1.797 +    0x6fu, 0x70u, 0x71u, 0x72u, 0x73u, 0x74u, 0x75u, 0x76u,
   1.798 +    0x77u, 0x78u, 0x79u, 0x7au,  
   1.799 +};
   1.800 +
   1.801 +static const UChar kUMinus = (UChar)0x002d;
   1.802 +
   1.803 +#ifdef RBNF_DEBUG
   1.804 +static const char kMinus = '-';
   1.805 +
   1.806 +static const uint8_t digitInfo[] = {
   1.807 +        0,     0,     0,     0,     0,     0,     0,     0,
   1.808 +        0,     0,     0,     0,     0,     0,     0,     0,
   1.809 +        0,     0,     0,     0,     0,     0,     0,     0,
   1.810 +        0,     0,     0,     0,     0,     0,     0,     0,
   1.811 +        0,     0,     0,     0,     0,     0,     0,     0,
   1.812 +        0,     0,     0,     0,     0,     0,     0,     0,
   1.813 +    0x80u, 0x81u, 0x82u, 0x83u, 0x84u, 0x85u, 0x86u, 0x87u,
   1.814 +    0x88u, 0x89u,     0,     0,     0,     0,     0,     0,
   1.815 +        0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
   1.816 +    0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
   1.817 +    0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
   1.818 +    0xa1u, 0xa2u, 0xa3u,     0,     0,     0,     0,     0,
   1.819 +        0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
   1.820 +    0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
   1.821 +    0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
   1.822 +    0xa1u, 0xa2u, 0xa3u,     0,     0,     0,     0,     0,
   1.823 +};
   1.824 +
   1.825 +int64_t util64_atoi(const char* str, uint32_t radix)
   1.826 +{
   1.827 +    if (radix > 36) {
   1.828 +        radix = 36;
   1.829 +    } else if (radix < 2) {
   1.830 +        radix = 2;
   1.831 +    }
   1.832 +    int64_t lradix = radix;
   1.833 +
   1.834 +    int neg = 0;
   1.835 +    if (*str == kMinus) {
   1.836 +        ++str;
   1.837 +        neg = 1;
   1.838 +    }
   1.839 +    int64_t result = 0;
   1.840 +    uint8_t b;
   1.841 +    while ((b = digitInfo[*str++]) && ((b &= 0x7f) < radix)) {
   1.842 +        result *= lradix;
   1.843 +        result += (int32_t)b;
   1.844 +    }
   1.845 +    if (neg) {
   1.846 +        result = -result;
   1.847 +    }
   1.848 +    return result;
   1.849 +}
   1.850 +
   1.851 +int64_t util64_utoi(const UChar* str, uint32_t radix)
   1.852 +{
   1.853 +    if (radix > 36) {
   1.854 +        radix = 36;
   1.855 +    } else if (radix < 2) {
   1.856 +        radix = 2;
   1.857 +    }
   1.858 +    int64_t lradix = radix;
   1.859 +
   1.860 +    int neg = 0;
   1.861 +    if (*str == kUMinus) {
   1.862 +        ++str;
   1.863 +        neg = 1;
   1.864 +    }
   1.865 +    int64_t result = 0;
   1.866 +    UChar c;
   1.867 +    uint8_t b;
   1.868 +    while (((c = *str++) < 0x0080) && (b = digitInfo[c]) && ((b &= 0x7f) < radix)) {
   1.869 +        result *= lradix;
   1.870 +        result += (int32_t)b;
   1.871 +    }
   1.872 +    if (neg) {
   1.873 +        result = -result;
   1.874 +    }
   1.875 +    return result;
   1.876 +}
   1.877 +
   1.878 +uint32_t util64_toa(int64_t w, char* buf, uint32_t len, uint32_t radix, UBool raw)
   1.879 +{    
   1.880 +    if (radix > 36) {
   1.881 +        radix = 36;
   1.882 +    } else if (radix < 2) {
   1.883 +        radix = 2;
   1.884 +    }
   1.885 +    int64_t base = radix;
   1.886 +
   1.887 +    char* p = buf;
   1.888 +    if (len && (w < 0) && (radix == 10) && !raw) {
   1.889 +        w = -w;
   1.890 +        *p++ = kMinus;
   1.891 +        --len;
   1.892 +    } else if (len && (w == 0)) {
   1.893 +        *p++ = (char)raw ? 0 : asciiDigits[0];
   1.894 +        --len;
   1.895 +    }
   1.896 +
   1.897 +    while (len && w != 0) {
   1.898 +        int64_t n = w / base;
   1.899 +        int64_t m = n * base;
   1.900 +        int32_t d = (int32_t)(w-m);
   1.901 +        *p++ = raw ? (char)d : asciiDigits[d];
   1.902 +        w = n;
   1.903 +        --len;
   1.904 +    }
   1.905 +    if (len) {
   1.906 +        *p = 0; // null terminate if room for caller convenience
   1.907 +    }
   1.908 +
   1.909 +    len = p - buf;
   1.910 +    if (*buf == kMinus) {
   1.911 +        ++buf;
   1.912 +    }
   1.913 +    while (--p > buf) {
   1.914 +        char c = *p;
   1.915 +        *p = *buf;
   1.916 +        *buf = c;
   1.917 +        ++buf;
   1.918 +    }
   1.919 +
   1.920 +    return len;
   1.921 +}
   1.922 +#endif
   1.923 +
   1.924 +uint32_t util64_tou(int64_t w, UChar* buf, uint32_t len, uint32_t radix, UBool raw)
   1.925 +{    
   1.926 +    if (radix > 36) {
   1.927 +        radix = 36;
   1.928 +    } else if (radix < 2) {
   1.929 +        radix = 2;
   1.930 +    }
   1.931 +    int64_t base = radix;
   1.932 +
   1.933 +    UChar* p = buf;
   1.934 +    if (len && (w < 0) && (radix == 10) && !raw) {
   1.935 +        w = -w;
   1.936 +        *p++ = kUMinus;
   1.937 +        --len;
   1.938 +    } else if (len && (w == 0)) {
   1.939 +        *p++ = (UChar)raw ? 0 : asciiDigits[0];
   1.940 +        --len;
   1.941 +    }
   1.942 +
   1.943 +    while (len && (w != 0)) {
   1.944 +        int64_t n = w / base;
   1.945 +        int64_t m = n * base;
   1.946 +        int32_t d = (int32_t)(w-m);
   1.947 +        *p++ = (UChar)(raw ? d : asciiDigits[d]);
   1.948 +        w = n;
   1.949 +        --len;
   1.950 +    }
   1.951 +    if (len) {
   1.952 +        *p = 0; // null terminate if room for caller convenience
   1.953 +    }
   1.954 +
   1.955 +    len = (uint32_t)(p - buf);
   1.956 +    if (*buf == kUMinus) {
   1.957 +        ++buf;
   1.958 +    }
   1.959 +    while (--p > buf) {
   1.960 +        UChar c = *p;
   1.961 +        *p = *buf;
   1.962 +        *buf = c;
   1.963 +        ++buf;
   1.964 +    }
   1.965 +
   1.966 +    return len;
   1.967 +}
   1.968 +
   1.969 +
   1.970 +U_NAMESPACE_END
   1.971 +
   1.972 +/* U_HAVE_RBNF */
   1.973 +#endif
   1.974 +

mercurial