1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/i18n/nfrs.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,971 @@ 1.4 +/* 1.5 +****************************************************************************** 1.6 +* Copyright (C) 1997-2012, International Business Machines 1.7 +* Corporation and others. All Rights Reserved. 1.8 +****************************************************************************** 1.9 +* file name: nfrs.cpp 1.10 +* encoding: US-ASCII 1.11 +* tab size: 8 (not used) 1.12 +* indentation:4 1.13 +* 1.14 +* Modification history 1.15 +* Date Name Comments 1.16 +* 10/11/2001 Doug Ported from ICU4J 1.17 +*/ 1.18 + 1.19 +#include "nfrs.h" 1.20 + 1.21 +#if U_HAVE_RBNF 1.22 + 1.23 +#include "unicode/uchar.h" 1.24 +#include "nfrule.h" 1.25 +#include "nfrlist.h" 1.26 +#include "patternprops.h" 1.27 + 1.28 +#ifdef RBNF_DEBUG 1.29 +#include "cmemory.h" 1.30 +#endif 1.31 + 1.32 +U_NAMESPACE_BEGIN 1.33 + 1.34 +#if 0 1.35 +// euclid's algorithm works with doubles 1.36 +// note, doubles only get us up to one quadrillion or so, which 1.37 +// isn't as much range as we get with longs. We probably still 1.38 +// want either 64-bit math, or BigInteger. 1.39 + 1.40 +static int64_t 1.41 +util_lcm(int64_t x, int64_t y) 1.42 +{ 1.43 + x.abs(); 1.44 + y.abs(); 1.45 + 1.46 + if (x == 0 || y == 0) { 1.47 + return 0; 1.48 + } else { 1.49 + do { 1.50 + if (x < y) { 1.51 + int64_t t = x; x = y; y = t; 1.52 + } 1.53 + x -= y * (x/y); 1.54 + } while (x != 0); 1.55 + 1.56 + return y; 1.57 + } 1.58 +} 1.59 + 1.60 +#else 1.61 +/** 1.62 + * Calculates the least common multiple of x and y. 1.63 + */ 1.64 +static int64_t 1.65 +util_lcm(int64_t x, int64_t y) 1.66 +{ 1.67 + // binary gcd algorithm from Knuth, "The Art of Computer Programming," 1.68 + // vol. 2, 1st ed., pp. 298-299 1.69 + int64_t x1 = x; 1.70 + int64_t y1 = y; 1.71 + 1.72 + int p2 = 0; 1.73 + while ((x1 & 1) == 0 && (y1 & 1) == 0) { 1.74 + ++p2; 1.75 + x1 >>= 1; 1.76 + y1 >>= 1; 1.77 + } 1.78 + 1.79 + int64_t t; 1.80 + if ((x1 & 1) == 1) { 1.81 + t = -y1; 1.82 + } else { 1.83 + t = x1; 1.84 + } 1.85 + 1.86 + while (t != 0) { 1.87 + while ((t & 1) == 0) { 1.88 + t = t >> 1; 1.89 + } 1.90 + if (t > 0) { 1.91 + x1 = t; 1.92 + } else { 1.93 + y1 = -t; 1.94 + } 1.95 + t = x1 - y1; 1.96 + } 1.97 + 1.98 + int64_t gcd = x1 << p2; 1.99 + 1.100 + // x * y == gcd(x, y) * lcm(x, y) 1.101 + return x / gcd * y; 1.102 +} 1.103 +#endif 1.104 + 1.105 +static const UChar gPercent = 0x0025; 1.106 +static const UChar gColon = 0x003a; 1.107 +static const UChar gSemicolon = 0x003b; 1.108 +static const UChar gLineFeed = 0x000a; 1.109 + 1.110 +static const UChar gFourSpaces[] = 1.111 +{ 1.112 + 0x20, 0x20, 0x20, 0x20, 0 1.113 +}; /* " " */ 1.114 +static const UChar gPercentPercent[] = 1.115 +{ 1.116 + 0x25, 0x25, 0 1.117 +}; /* "%%" */ 1.118 + 1.119 +static const UChar gNoparse[] = 1.120 +{ 1.121 + 0x40, 0x6E, 0x6F, 0x70, 0x61, 0x72, 0x73, 0x65, 0 1.122 +}; /* "@noparse" */ 1.123 + 1.124 +NFRuleSet::NFRuleSet(UnicodeString* descriptions, int32_t index, UErrorCode& status) 1.125 + : name() 1.126 + , rules(0) 1.127 + , negativeNumberRule(NULL) 1.128 + , fIsFractionRuleSet(FALSE) 1.129 + , fIsPublic(FALSE) 1.130 + , fIsParseable(TRUE) 1.131 + , fRecursionCount(0) 1.132 +{ 1.133 + for (int i = 0; i < 3; ++i) { 1.134 + fractionRules[i] = NULL; 1.135 + } 1.136 + 1.137 + if (U_FAILURE(status)) { 1.138 + return; 1.139 + } 1.140 + 1.141 + UnicodeString& description = descriptions[index]; // !!! make sure index is valid 1.142 + 1.143 + if (description.length() == 0) { 1.144 + // throw new IllegalArgumentException("Empty rule set description"); 1.145 + status = U_PARSE_ERROR; 1.146 + return; 1.147 + } 1.148 + 1.149 + // if the description begins with a rule set name (the rule set 1.150 + // name can be omitted in formatter descriptions that consist 1.151 + // of only one rule set), copy it out into our "name" member 1.152 + // and delete it from the description 1.153 + if (description.charAt(0) == gPercent) { 1.154 + int32_t pos = description.indexOf(gColon); 1.155 + if (pos == -1) { 1.156 + // throw new IllegalArgumentException("Rule set name doesn't end in colon"); 1.157 + status = U_PARSE_ERROR; 1.158 + } else { 1.159 + name.setTo(description, 0, pos); 1.160 + while (pos < description.length() && PatternProps::isWhiteSpace(description.charAt(++pos))) { 1.161 + } 1.162 + description.remove(0, pos); 1.163 + } 1.164 + } else { 1.165 + name.setTo(UNICODE_STRING_SIMPLE("%default")); 1.166 + } 1.167 + 1.168 + if (description.length() == 0) { 1.169 + // throw new IllegalArgumentException("Empty rule set description"); 1.170 + status = U_PARSE_ERROR; 1.171 + } 1.172 + 1.173 + fIsPublic = name.indexOf(gPercentPercent, 2, 0) != 0; 1.174 + 1.175 + if ( name.endsWith(gNoparse,8) ) { 1.176 + fIsParseable = FALSE; 1.177 + name.truncate(name.length()-8); // remove the @noparse from the name 1.178 + } 1.179 + 1.180 + // all of the other members of NFRuleSet are initialized 1.181 + // by parseRules() 1.182 +} 1.183 + 1.184 +void 1.185 +NFRuleSet::parseRules(UnicodeString& description, const RuleBasedNumberFormat* owner, UErrorCode& status) 1.186 +{ 1.187 + // start by creating a Vector whose elements are Strings containing 1.188 + // the descriptions of the rules (one rule per element). The rules 1.189 + // are separated by semicolons (there's no escape facility: ALL 1.190 + // semicolons are rule delimiters) 1.191 + 1.192 + if (U_FAILURE(status)) { 1.193 + return; 1.194 + } 1.195 + 1.196 + // ensure we are starting with an empty rule list 1.197 + rules.deleteAll(); 1.198 + 1.199 + // dlf - the original code kept a separate description array for no reason, 1.200 + // so I got rid of it. The loop was too complex so I simplified it. 1.201 + 1.202 + UnicodeString currentDescription; 1.203 + int32_t oldP = 0; 1.204 + while (oldP < description.length()) { 1.205 + int32_t p = description.indexOf(gSemicolon, oldP); 1.206 + if (p == -1) { 1.207 + p = description.length(); 1.208 + } 1.209 + currentDescription.setTo(description, oldP, p - oldP); 1.210 + NFRule::makeRules(currentDescription, this, rules.last(), owner, rules, status); 1.211 + oldP = p + 1; 1.212 + } 1.213 + 1.214 + // for rules that didn't specify a base value, their base values 1.215 + // were initialized to 0. Make another pass through the list and 1.216 + // set all those rules' base values. We also remove any special 1.217 + // rules from the list and put them into their own member variables 1.218 + int64_t defaultBaseValue = 0; 1.219 + 1.220 + // (this isn't a for loop because we might be deleting items from 1.221 + // the vector-- we want to make sure we only increment i when 1.222 + // we _didn't_ delete aything from the vector) 1.223 + uint32_t i = 0; 1.224 + while (i < rules.size()) { 1.225 + NFRule* rule = rules[i]; 1.226 + 1.227 + switch (rule->getType()) { 1.228 + // if the rule's base value is 0, fill in a default 1.229 + // base value (this will be 1 plus the preceding 1.230 + // rule's base value for regular rule sets, and the 1.231 + // same as the preceding rule's base value in fraction 1.232 + // rule sets) 1.233 + case NFRule::kNoBase: 1.234 + rule->setBaseValue(defaultBaseValue, status); 1.235 + if (!isFractionRuleSet()) { 1.236 + ++defaultBaseValue; 1.237 + } 1.238 + ++i; 1.239 + break; 1.240 + 1.241 + // if it's the negative-number rule, copy it into its own 1.242 + // data member and delete it from the list 1.243 + case NFRule::kNegativeNumberRule: 1.244 + if (negativeNumberRule) { 1.245 + delete negativeNumberRule; 1.246 + } 1.247 + negativeNumberRule = rules.remove(i); 1.248 + break; 1.249 + 1.250 + // if it's the improper fraction rule, copy it into the 1.251 + // correct element of fractionRules 1.252 + case NFRule::kImproperFractionRule: 1.253 + if (fractionRules[0]) { 1.254 + delete fractionRules[0]; 1.255 + } 1.256 + fractionRules[0] = rules.remove(i); 1.257 + break; 1.258 + 1.259 + // if it's the proper fraction rule, copy it into the 1.260 + // correct element of fractionRules 1.261 + case NFRule::kProperFractionRule: 1.262 + if (fractionRules[1]) { 1.263 + delete fractionRules[1]; 1.264 + } 1.265 + fractionRules[1] = rules.remove(i); 1.266 + break; 1.267 + 1.268 + // if it's the master rule, copy it into the 1.269 + // correct element of fractionRules 1.270 + case NFRule::kMasterRule: 1.271 + if (fractionRules[2]) { 1.272 + delete fractionRules[2]; 1.273 + } 1.274 + fractionRules[2] = rules.remove(i); 1.275 + break; 1.276 + 1.277 + // if it's a regular rule that already knows its base value, 1.278 + // check to make sure the rules are in order, and update 1.279 + // the default base value for the next rule 1.280 + default: 1.281 + if (rule->getBaseValue() < defaultBaseValue) { 1.282 + // throw new IllegalArgumentException("Rules are not in order"); 1.283 + status = U_PARSE_ERROR; 1.284 + return; 1.285 + } 1.286 + defaultBaseValue = rule->getBaseValue(); 1.287 + if (!isFractionRuleSet()) { 1.288 + ++defaultBaseValue; 1.289 + } 1.290 + ++i; 1.291 + break; 1.292 + } 1.293 + } 1.294 +} 1.295 + 1.296 +NFRuleSet::~NFRuleSet() 1.297 +{ 1.298 + delete negativeNumberRule; 1.299 + delete fractionRules[0]; 1.300 + delete fractionRules[1]; 1.301 + delete fractionRules[2]; 1.302 +} 1.303 + 1.304 +static UBool 1.305 +util_equalRules(const NFRule* rule1, const NFRule* rule2) 1.306 +{ 1.307 + if (rule1) { 1.308 + if (rule2) { 1.309 + return *rule1 == *rule2; 1.310 + } 1.311 + } else if (!rule2) { 1.312 + return TRUE; 1.313 + } 1.314 + return FALSE; 1.315 +} 1.316 + 1.317 +UBool 1.318 +NFRuleSet::operator==(const NFRuleSet& rhs) const 1.319 +{ 1.320 + if (rules.size() == rhs.rules.size() && 1.321 + fIsFractionRuleSet == rhs.fIsFractionRuleSet && 1.322 + name == rhs.name && 1.323 + util_equalRules(negativeNumberRule, rhs.negativeNumberRule) && 1.324 + util_equalRules(fractionRules[0], rhs.fractionRules[0]) && 1.325 + util_equalRules(fractionRules[1], rhs.fractionRules[1]) && 1.326 + util_equalRules(fractionRules[2], rhs.fractionRules[2])) { 1.327 + 1.328 + for (uint32_t i = 0; i < rules.size(); ++i) { 1.329 + if (*rules[i] != *rhs.rules[i]) { 1.330 + return FALSE; 1.331 + } 1.332 + } 1.333 + return TRUE; 1.334 + } 1.335 + return FALSE; 1.336 +} 1.337 + 1.338 +#define RECURSION_LIMIT 50 1.339 + 1.340 +void 1.341 +NFRuleSet::format(int64_t number, UnicodeString& toAppendTo, int32_t pos) const 1.342 +{ 1.343 + NFRule *rule = findNormalRule(number); 1.344 + if (rule) { // else error, but can't report it 1.345 + NFRuleSet* ncThis = (NFRuleSet*)this; 1.346 + if (ncThis->fRecursionCount++ >= RECURSION_LIMIT) { 1.347 + // stop recursion 1.348 + ncThis->fRecursionCount = 0; 1.349 + } else { 1.350 + rule->doFormat(number, toAppendTo, pos); 1.351 + ncThis->fRecursionCount--; 1.352 + } 1.353 + } 1.354 +} 1.355 + 1.356 +void 1.357 +NFRuleSet::format(double number, UnicodeString& toAppendTo, int32_t pos) const 1.358 +{ 1.359 + NFRule *rule = findDoubleRule(number); 1.360 + if (rule) { // else error, but can't report it 1.361 + NFRuleSet* ncThis = (NFRuleSet*)this; 1.362 + if (ncThis->fRecursionCount++ >= RECURSION_LIMIT) { 1.363 + // stop recursion 1.364 + ncThis->fRecursionCount = 0; 1.365 + } else { 1.366 + rule->doFormat(number, toAppendTo, pos); 1.367 + ncThis->fRecursionCount--; 1.368 + } 1.369 + } 1.370 +} 1.371 + 1.372 +NFRule* 1.373 +NFRuleSet::findDoubleRule(double number) const 1.374 +{ 1.375 + // if this is a fraction rule set, use findFractionRuleSetRule() 1.376 + if (isFractionRuleSet()) { 1.377 + return findFractionRuleSetRule(number); 1.378 + } 1.379 + 1.380 + // if the number is negative, return the negative number rule 1.381 + // (if there isn't a negative-number rule, we pretend it's a 1.382 + // positive number) 1.383 + if (number < 0) { 1.384 + if (negativeNumberRule) { 1.385 + return negativeNumberRule; 1.386 + } else { 1.387 + number = -number; 1.388 + } 1.389 + } 1.390 + 1.391 + // if the number isn't an integer, we use one of the fraction rules... 1.392 + if (number != uprv_floor(number)) { 1.393 + // if the number is between 0 and 1, return the proper 1.394 + // fraction rule 1.395 + if (number < 1 && fractionRules[1]) { 1.396 + return fractionRules[1]; 1.397 + } 1.398 + // otherwise, return the improper fraction rule 1.399 + else if (fractionRules[0]) { 1.400 + return fractionRules[0]; 1.401 + } 1.402 + } 1.403 + 1.404 + // if there's a master rule, use it to format the number 1.405 + if (fractionRules[2]) { 1.406 + return fractionRules[2]; 1.407 + } 1.408 + 1.409 + // and if we haven't yet returned a rule, use findNormalRule() 1.410 + // to find the applicable rule 1.411 + int64_t r = util64_fromDouble(number + 0.5); 1.412 + return findNormalRule(r); 1.413 +} 1.414 + 1.415 +NFRule * 1.416 +NFRuleSet::findNormalRule(int64_t number) const 1.417 +{ 1.418 + // if this is a fraction rule set, use findFractionRuleSetRule() 1.419 + // to find the rule (we should only go into this clause if the 1.420 + // value is 0) 1.421 + if (fIsFractionRuleSet) { 1.422 + return findFractionRuleSetRule((double)number); 1.423 + } 1.424 + 1.425 + // if the number is negative, return the negative-number rule 1.426 + // (if there isn't one, pretend the number is positive) 1.427 + if (number < 0) { 1.428 + if (negativeNumberRule) { 1.429 + return negativeNumberRule; 1.430 + } else { 1.431 + number = -number; 1.432 + } 1.433 + } 1.434 + 1.435 + // we have to repeat the preceding two checks, even though we 1.436 + // do them in findRule(), because the version of format() that 1.437 + // takes a long bypasses findRule() and goes straight to this 1.438 + // function. This function does skip the fraction rules since 1.439 + // we know the value is an integer (it also skips the master 1.440 + // rule, since it's considered a fraction rule. Skipping the 1.441 + // master rule in this function is also how we avoid infinite 1.442 + // recursion) 1.443 + 1.444 + // {dlf} unfortunately this fails if there are no rules except 1.445 + // special rules. If there are no rules, use the master rule. 1.446 + 1.447 + // binary-search the rule list for the applicable rule 1.448 + // (a rule is used for all values from its base value to 1.449 + // the next rule's base value) 1.450 + int32_t hi = rules.size(); 1.451 + if (hi > 0) { 1.452 + int32_t lo = 0; 1.453 + 1.454 + while (lo < hi) { 1.455 + int32_t mid = (lo + hi) / 2; 1.456 + if (rules[mid]->getBaseValue() == number) { 1.457 + return rules[mid]; 1.458 + } 1.459 + else if (rules[mid]->getBaseValue() > number) { 1.460 + hi = mid; 1.461 + } 1.462 + else { 1.463 + lo = mid + 1; 1.464 + } 1.465 + } 1.466 + if (hi == 0) { // bad rule set, minimum base > 0 1.467 + return NULL; // want to throw exception here 1.468 + } 1.469 + 1.470 + NFRule *result = rules[hi - 1]; 1.471 + 1.472 + // use shouldRollBack() to see whether we need to invoke the 1.473 + // rollback rule (see shouldRollBack()'s documentation for 1.474 + // an explanation of the rollback rule). If we do, roll back 1.475 + // one rule and return that one instead of the one we'd normally 1.476 + // return 1.477 + if (result->shouldRollBack((double)number)) { 1.478 + if (hi == 1) { // bad rule set, no prior rule to rollback to from this base 1.479 + return NULL; 1.480 + } 1.481 + result = rules[hi - 2]; 1.482 + } 1.483 + return result; 1.484 + } 1.485 + // else use the master rule 1.486 + return fractionRules[2]; 1.487 +} 1.488 + 1.489 +/** 1.490 + * If this rule is a fraction rule set, this function is used by 1.491 + * findRule() to select the most appropriate rule for formatting 1.492 + * the number. Basically, the base value of each rule in the rule 1.493 + * set is treated as the denominator of a fraction. Whichever 1.494 + * denominator can produce the fraction closest in value to the 1.495 + * number passed in is the result. If there's a tie, the earlier 1.496 + * one in the list wins. (If there are two rules in a row with the 1.497 + * same base value, the first one is used when the numerator of the 1.498 + * fraction would be 1, and the second rule is used the rest of the 1.499 + * time. 1.500 + * @param number The number being formatted (which will always be 1.501 + * a number between 0 and 1) 1.502 + * @return The rule to use to format this number 1.503 + */ 1.504 +NFRule* 1.505 +NFRuleSet::findFractionRuleSetRule(double number) const 1.506 +{ 1.507 + // the obvious way to do this (multiply the value being formatted 1.508 + // by each rule's base value until you get an integral result) 1.509 + // doesn't work because of rounding error. This method is more 1.510 + // accurate 1.511 + 1.512 + // find the least common multiple of the rules' base values 1.513 + // and multiply this by the number being formatted. This is 1.514 + // all the precision we need, and we can do all of the rest 1.515 + // of the math using integer arithmetic 1.516 + int64_t leastCommonMultiple = rules[0]->getBaseValue(); 1.517 + int64_t numerator; 1.518 + { 1.519 + for (uint32_t i = 1; i < rules.size(); ++i) { 1.520 + leastCommonMultiple = util_lcm(leastCommonMultiple, rules[i]->getBaseValue()); 1.521 + } 1.522 + numerator = util64_fromDouble(number * (double)leastCommonMultiple + 0.5); 1.523 + } 1.524 + // for each rule, do the following... 1.525 + int64_t tempDifference; 1.526 + int64_t difference = util64_fromDouble(uprv_maxMantissa()); 1.527 + int32_t winner = 0; 1.528 + for (uint32_t i = 0; i < rules.size(); ++i) { 1.529 + // "numerator" is the numerator of the fraction if the 1.530 + // denominator is the LCD. The numerator if the rule's 1.531 + // base value is the denominator is "numerator" times the 1.532 + // base value divided bythe LCD. Here we check to see if 1.533 + // that's an integer, and if not, how close it is to being 1.534 + // an integer. 1.535 + tempDifference = numerator * rules[i]->getBaseValue() % leastCommonMultiple; 1.536 + 1.537 + 1.538 + // normalize the result of the above calculation: we want 1.539 + // the numerator's distance from the CLOSEST multiple 1.540 + // of the LCD 1.541 + if (leastCommonMultiple - tempDifference < tempDifference) { 1.542 + tempDifference = leastCommonMultiple - tempDifference; 1.543 + } 1.544 + 1.545 + // if this is as close as we've come, keep track of how close 1.546 + // that is, and the line number of the rule that did it. If 1.547 + // we've scored a direct hit, we don't have to look at any more 1.548 + // rules 1.549 + if (tempDifference < difference) { 1.550 + difference = tempDifference; 1.551 + winner = i; 1.552 + if (difference == 0) { 1.553 + break; 1.554 + } 1.555 + } 1.556 + } 1.557 + 1.558 + // if we have two successive rules that both have the winning base 1.559 + // value, then the first one (the one we found above) is used if 1.560 + // the numerator of the fraction is 1 and the second one is used if 1.561 + // the numerator of the fraction is anything else (this lets us 1.562 + // do things like "one third"/"two thirds" without haveing to define 1.563 + // a whole bunch of extra rule sets) 1.564 + if ((unsigned)(winner + 1) < rules.size() && 1.565 + rules[winner + 1]->getBaseValue() == rules[winner]->getBaseValue()) { 1.566 + double n = ((double)rules[winner]->getBaseValue()) * number; 1.567 + if (n < 0.5 || n >= 2) { 1.568 + ++winner; 1.569 + } 1.570 + } 1.571 + 1.572 + // finally, return the winning rule 1.573 + return rules[winner]; 1.574 +} 1.575 + 1.576 +/** 1.577 + * Parses a string. Matches the string to be parsed against each 1.578 + * of its rules (with a base value less than upperBound) and returns 1.579 + * the value produced by the rule that matched the most charcters 1.580 + * in the source string. 1.581 + * @param text The string to parse 1.582 + * @param parsePosition The initial position is ignored and assumed 1.583 + * to be 0. On exit, this object has been updated to point to the 1.584 + * first character position this rule set didn't consume. 1.585 + * @param upperBound Limits the rules that can be allowed to match. 1.586 + * Only rules whose base values are strictly less than upperBound 1.587 + * are considered. 1.588 + * @return The numerical result of parsing this string. This will 1.589 + * be the matching rule's base value, composed appropriately with 1.590 + * the results of matching any of its substitutions. The object 1.591 + * will be an instance of Long if it's an integral value; otherwise, 1.592 + * it will be an instance of Double. This function always returns 1.593 + * a valid object: If nothing matched the input string at all, 1.594 + * this function returns new Long(0), and the parse position is 1.595 + * left unchanged. 1.596 + */ 1.597 +#ifdef RBNF_DEBUG 1.598 +#include <stdio.h> 1.599 + 1.600 +static void dumpUS(FILE* f, const UnicodeString& us) { 1.601 + int len = us.length(); 1.602 + char* buf = (char *)uprv_malloc((len+1)*sizeof(char)); //new char[len+1]; 1.603 + if (buf != NULL) { 1.604 + us.extract(0, len, buf); 1.605 + buf[len] = 0; 1.606 + fprintf(f, "%s", buf); 1.607 + uprv_free(buf); //delete[] buf; 1.608 + } 1.609 +} 1.610 +#endif 1.611 + 1.612 +UBool 1.613 +NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result) const 1.614 +{ 1.615 + // try matching each rule in the rule set against the text being 1.616 + // parsed. Whichever one matches the most characters is the one 1.617 + // that determines the value we return. 1.618 + 1.619 + result.setLong(0); 1.620 + 1.621 + // dump out if there's no text to parse 1.622 + if (text.length() == 0) { 1.623 + return 0; 1.624 + } 1.625 + 1.626 + ParsePosition highWaterMark; 1.627 + ParsePosition workingPos = pos; 1.628 + 1.629 +#ifdef RBNF_DEBUG 1.630 + fprintf(stderr, "<nfrs> %x '", this); 1.631 + dumpUS(stderr, name); 1.632 + fprintf(stderr, "' text '"); 1.633 + dumpUS(stderr, text); 1.634 + fprintf(stderr, "'\n"); 1.635 + fprintf(stderr, " parse negative: %d\n", this, negativeNumberRule != 0); 1.636 +#endif 1.637 + 1.638 + // start by trying the negative number rule (if there is one) 1.639 + if (negativeNumberRule) { 1.640 + Formattable tempResult; 1.641 +#ifdef RBNF_DEBUG 1.642 + fprintf(stderr, " <nfrs before negative> %x ub: %g\n", negativeNumberRule, upperBound); 1.643 +#endif 1.644 + UBool success = negativeNumberRule->doParse(text, workingPos, 0, upperBound, tempResult); 1.645 +#ifdef RBNF_DEBUG 1.646 + fprintf(stderr, " <nfrs after negative> success: %d wpi: %d\n", success, workingPos.getIndex()); 1.647 +#endif 1.648 + if (success && workingPos.getIndex() > highWaterMark.getIndex()) { 1.649 + result = tempResult; 1.650 + highWaterMark = workingPos; 1.651 + } 1.652 + workingPos = pos; 1.653 + } 1.654 +#ifdef RBNF_DEBUG 1.655 + fprintf(stderr, "<nfrs> continue fractional with text '"); 1.656 + dumpUS(stderr, text); 1.657 + fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex()); 1.658 +#endif 1.659 + // then try each of the fraction rules 1.660 + { 1.661 + for (int i = 0; i < 3; i++) { 1.662 + if (fractionRules[i]) { 1.663 + Formattable tempResult; 1.664 + UBool success = fractionRules[i]->doParse(text, workingPos, 0, upperBound, tempResult); 1.665 + if (success && (workingPos.getIndex() > highWaterMark.getIndex())) { 1.666 + result = tempResult; 1.667 + highWaterMark = workingPos; 1.668 + } 1.669 + workingPos = pos; 1.670 + } 1.671 + } 1.672 + } 1.673 +#ifdef RBNF_DEBUG 1.674 + fprintf(stderr, "<nfrs> continue other with text '"); 1.675 + dumpUS(stderr, text); 1.676 + fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex()); 1.677 +#endif 1.678 + 1.679 + // finally, go through the regular rules one at a time. We start 1.680 + // at the end of the list because we want to try matching the most 1.681 + // sigificant rule first (this helps ensure that we parse 1.682 + // "five thousand three hundred six" as 1.683 + // "(five thousand) (three hundred) (six)" rather than 1.684 + // "((five thousand three) hundred) (six)"). Skip rules whose 1.685 + // base values are higher than the upper bound (again, this helps 1.686 + // limit ambiguity by making sure the rules that match a rule's 1.687 + // are less significant than the rule containing the substitutions)/ 1.688 + { 1.689 + int64_t ub = util64_fromDouble(upperBound); 1.690 +#ifdef RBNF_DEBUG 1.691 + { 1.692 + char ubstr[64]; 1.693 + util64_toa(ub, ubstr, 64); 1.694 + char ubstrhex[64]; 1.695 + util64_toa(ub, ubstrhex, 64, 16); 1.696 + fprintf(stderr, "ub: %g, i64: %s (%s)\n", upperBound, ubstr, ubstrhex); 1.697 + } 1.698 +#endif 1.699 + for (int32_t i = rules.size(); --i >= 0 && highWaterMark.getIndex() < text.length();) { 1.700 + if ((!fIsFractionRuleSet) && (rules[i]->getBaseValue() >= ub)) { 1.701 + continue; 1.702 + } 1.703 + Formattable tempResult; 1.704 + UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, tempResult); 1.705 + if (success && workingPos.getIndex() > highWaterMark.getIndex()) { 1.706 + result = tempResult; 1.707 + highWaterMark = workingPos; 1.708 + } 1.709 + workingPos = pos; 1.710 + } 1.711 + } 1.712 +#ifdef RBNF_DEBUG 1.713 + fprintf(stderr, "<nfrs> exit\n"); 1.714 +#endif 1.715 + // finally, update the parse postion we were passed to point to the 1.716 + // first character we didn't use, and return the result that 1.717 + // corresponds to that string of characters 1.718 + pos = highWaterMark; 1.719 + 1.720 + return 1; 1.721 +} 1.722 + 1.723 +void 1.724 +NFRuleSet::appendRules(UnicodeString& result) const 1.725 +{ 1.726 + // the rule set name goes first... 1.727 + result.append(name); 1.728 + result.append(gColon); 1.729 + result.append(gLineFeed); 1.730 + 1.731 + // followed by the regular rules... 1.732 + for (uint32_t i = 0; i < rules.size(); i++) { 1.733 + result.append(gFourSpaces, 4); 1.734 + rules[i]->_appendRuleText(result); 1.735 + result.append(gLineFeed); 1.736 + } 1.737 + 1.738 + // followed by the special rules (if they exist) 1.739 + if (negativeNumberRule) { 1.740 + result.append(gFourSpaces, 4); 1.741 + negativeNumberRule->_appendRuleText(result); 1.742 + result.append(gLineFeed); 1.743 + } 1.744 + 1.745 + { 1.746 + for (uint32_t i = 0; i < 3; ++i) { 1.747 + if (fractionRules[i]) { 1.748 + result.append(gFourSpaces, 4); 1.749 + fractionRules[i]->_appendRuleText(result); 1.750 + result.append(gLineFeed); 1.751 + } 1.752 + } 1.753 + } 1.754 +} 1.755 + 1.756 +// utility functions 1.757 + 1.758 +int64_t util64_fromDouble(double d) { 1.759 + int64_t result = 0; 1.760 + if (!uprv_isNaN(d)) { 1.761 + double mant = uprv_maxMantissa(); 1.762 + if (d < -mant) { 1.763 + d = -mant; 1.764 + } else if (d > mant) { 1.765 + d = mant; 1.766 + } 1.767 + UBool neg = d < 0; 1.768 + if (neg) { 1.769 + d = -d; 1.770 + } 1.771 + result = (int64_t)uprv_floor(d); 1.772 + if (neg) { 1.773 + result = -result; 1.774 + } 1.775 + } 1.776 + return result; 1.777 +} 1.778 + 1.779 +int64_t util64_pow(int32_t r, uint32_t e) { 1.780 + if (r == 0) { 1.781 + return 0; 1.782 + } else if (e == 0) { 1.783 + return 1; 1.784 + } else { 1.785 + int64_t n = r; 1.786 + while (--e > 0) { 1.787 + n *= r; 1.788 + } 1.789 + return n; 1.790 + } 1.791 +} 1.792 + 1.793 +static const uint8_t asciiDigits[] = { 1.794 + 0x30u, 0x31u, 0x32u, 0x33u, 0x34u, 0x35u, 0x36u, 0x37u, 1.795 + 0x38u, 0x39u, 0x61u, 0x62u, 0x63u, 0x64u, 0x65u, 0x66u, 1.796 + 0x67u, 0x68u, 0x69u, 0x6au, 0x6bu, 0x6cu, 0x6du, 0x6eu, 1.797 + 0x6fu, 0x70u, 0x71u, 0x72u, 0x73u, 0x74u, 0x75u, 0x76u, 1.798 + 0x77u, 0x78u, 0x79u, 0x7au, 1.799 +}; 1.800 + 1.801 +static const UChar kUMinus = (UChar)0x002d; 1.802 + 1.803 +#ifdef RBNF_DEBUG 1.804 +static const char kMinus = '-'; 1.805 + 1.806 +static const uint8_t digitInfo[] = { 1.807 + 0, 0, 0, 0, 0, 0, 0, 0, 1.808 + 0, 0, 0, 0, 0, 0, 0, 0, 1.809 + 0, 0, 0, 0, 0, 0, 0, 0, 1.810 + 0, 0, 0, 0, 0, 0, 0, 0, 1.811 + 0, 0, 0, 0, 0, 0, 0, 0, 1.812 + 0, 0, 0, 0, 0, 0, 0, 0, 1.813 + 0x80u, 0x81u, 0x82u, 0x83u, 0x84u, 0x85u, 0x86u, 0x87u, 1.814 + 0x88u, 0x89u, 0, 0, 0, 0, 0, 0, 1.815 + 0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u, 1.816 + 0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u, 1.817 + 0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u, 1.818 + 0xa1u, 0xa2u, 0xa3u, 0, 0, 0, 0, 0, 1.819 + 0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u, 1.820 + 0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u, 1.821 + 0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u, 1.822 + 0xa1u, 0xa2u, 0xa3u, 0, 0, 0, 0, 0, 1.823 +}; 1.824 + 1.825 +int64_t util64_atoi(const char* str, uint32_t radix) 1.826 +{ 1.827 + if (radix > 36) { 1.828 + radix = 36; 1.829 + } else if (radix < 2) { 1.830 + radix = 2; 1.831 + } 1.832 + int64_t lradix = radix; 1.833 + 1.834 + int neg = 0; 1.835 + if (*str == kMinus) { 1.836 + ++str; 1.837 + neg = 1; 1.838 + } 1.839 + int64_t result = 0; 1.840 + uint8_t b; 1.841 + while ((b = digitInfo[*str++]) && ((b &= 0x7f) < radix)) { 1.842 + result *= lradix; 1.843 + result += (int32_t)b; 1.844 + } 1.845 + if (neg) { 1.846 + result = -result; 1.847 + } 1.848 + return result; 1.849 +} 1.850 + 1.851 +int64_t util64_utoi(const UChar* str, uint32_t radix) 1.852 +{ 1.853 + if (radix > 36) { 1.854 + radix = 36; 1.855 + } else if (radix < 2) { 1.856 + radix = 2; 1.857 + } 1.858 + int64_t lradix = radix; 1.859 + 1.860 + int neg = 0; 1.861 + if (*str == kUMinus) { 1.862 + ++str; 1.863 + neg = 1; 1.864 + } 1.865 + int64_t result = 0; 1.866 + UChar c; 1.867 + uint8_t b; 1.868 + while (((c = *str++) < 0x0080) && (b = digitInfo[c]) && ((b &= 0x7f) < radix)) { 1.869 + result *= lradix; 1.870 + result += (int32_t)b; 1.871 + } 1.872 + if (neg) { 1.873 + result = -result; 1.874 + } 1.875 + return result; 1.876 +} 1.877 + 1.878 +uint32_t util64_toa(int64_t w, char* buf, uint32_t len, uint32_t radix, UBool raw) 1.879 +{ 1.880 + if (radix > 36) { 1.881 + radix = 36; 1.882 + } else if (radix < 2) { 1.883 + radix = 2; 1.884 + } 1.885 + int64_t base = radix; 1.886 + 1.887 + char* p = buf; 1.888 + if (len && (w < 0) && (radix == 10) && !raw) { 1.889 + w = -w; 1.890 + *p++ = kMinus; 1.891 + --len; 1.892 + } else if (len && (w == 0)) { 1.893 + *p++ = (char)raw ? 0 : asciiDigits[0]; 1.894 + --len; 1.895 + } 1.896 + 1.897 + while (len && w != 0) { 1.898 + int64_t n = w / base; 1.899 + int64_t m = n * base; 1.900 + int32_t d = (int32_t)(w-m); 1.901 + *p++ = raw ? (char)d : asciiDigits[d]; 1.902 + w = n; 1.903 + --len; 1.904 + } 1.905 + if (len) { 1.906 + *p = 0; // null terminate if room for caller convenience 1.907 + } 1.908 + 1.909 + len = p - buf; 1.910 + if (*buf == kMinus) { 1.911 + ++buf; 1.912 + } 1.913 + while (--p > buf) { 1.914 + char c = *p; 1.915 + *p = *buf; 1.916 + *buf = c; 1.917 + ++buf; 1.918 + } 1.919 + 1.920 + return len; 1.921 +} 1.922 +#endif 1.923 + 1.924 +uint32_t util64_tou(int64_t w, UChar* buf, uint32_t len, uint32_t radix, UBool raw) 1.925 +{ 1.926 + if (radix > 36) { 1.927 + radix = 36; 1.928 + } else if (radix < 2) { 1.929 + radix = 2; 1.930 + } 1.931 + int64_t base = radix; 1.932 + 1.933 + UChar* p = buf; 1.934 + if (len && (w < 0) && (radix == 10) && !raw) { 1.935 + w = -w; 1.936 + *p++ = kUMinus; 1.937 + --len; 1.938 + } else if (len && (w == 0)) { 1.939 + *p++ = (UChar)raw ? 0 : asciiDigits[0]; 1.940 + --len; 1.941 + } 1.942 + 1.943 + while (len && (w != 0)) { 1.944 + int64_t n = w / base; 1.945 + int64_t m = n * base; 1.946 + int32_t d = (int32_t)(w-m); 1.947 + *p++ = (UChar)(raw ? d : asciiDigits[d]); 1.948 + w = n; 1.949 + --len; 1.950 + } 1.951 + if (len) { 1.952 + *p = 0; // null terminate if room for caller convenience 1.953 + } 1.954 + 1.955 + len = (uint32_t)(p - buf); 1.956 + if (*buf == kUMinus) { 1.957 + ++buf; 1.958 + } 1.959 + while (--p > buf) { 1.960 + UChar c = *p; 1.961 + *p = *buf; 1.962 + *buf = c; 1.963 + ++buf; 1.964 + } 1.965 + 1.966 + return len; 1.967 +} 1.968 + 1.969 + 1.970 +U_NAMESPACE_END 1.971 + 1.972 +/* U_HAVE_RBNF */ 1.973 +#endif 1.974 +