intl/icu/source/i18n/nfrs.cpp

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rw-r--r--

Correct previous dual key logic pending first delivery installment.

michael@0 1 /*
michael@0 2 ******************************************************************************
michael@0 3 * Copyright (C) 1997-2012, International Business Machines
michael@0 4 * Corporation and others. All Rights Reserved.
michael@0 5 ******************************************************************************
michael@0 6 * file name: nfrs.cpp
michael@0 7 * encoding: US-ASCII
michael@0 8 * tab size: 8 (not used)
michael@0 9 * indentation:4
michael@0 10 *
michael@0 11 * Modification history
michael@0 12 * Date Name Comments
michael@0 13 * 10/11/2001 Doug Ported from ICU4J
michael@0 14 */
michael@0 15
michael@0 16 #include "nfrs.h"
michael@0 17
michael@0 18 #if U_HAVE_RBNF
michael@0 19
michael@0 20 #include "unicode/uchar.h"
michael@0 21 #include "nfrule.h"
michael@0 22 #include "nfrlist.h"
michael@0 23 #include "patternprops.h"
michael@0 24
michael@0 25 #ifdef RBNF_DEBUG
michael@0 26 #include "cmemory.h"
michael@0 27 #endif
michael@0 28
michael@0 29 U_NAMESPACE_BEGIN
michael@0 30
michael@0 31 #if 0
michael@0 32 // euclid's algorithm works with doubles
michael@0 33 // note, doubles only get us up to one quadrillion or so, which
michael@0 34 // isn't as much range as we get with longs. We probably still
michael@0 35 // want either 64-bit math, or BigInteger.
michael@0 36
michael@0 37 static int64_t
michael@0 38 util_lcm(int64_t x, int64_t y)
michael@0 39 {
michael@0 40 x.abs();
michael@0 41 y.abs();
michael@0 42
michael@0 43 if (x == 0 || y == 0) {
michael@0 44 return 0;
michael@0 45 } else {
michael@0 46 do {
michael@0 47 if (x < y) {
michael@0 48 int64_t t = x; x = y; y = t;
michael@0 49 }
michael@0 50 x -= y * (x/y);
michael@0 51 } while (x != 0);
michael@0 52
michael@0 53 return y;
michael@0 54 }
michael@0 55 }
michael@0 56
michael@0 57 #else
michael@0 58 /**
michael@0 59 * Calculates the least common multiple of x and y.
michael@0 60 */
michael@0 61 static int64_t
michael@0 62 util_lcm(int64_t x, int64_t y)
michael@0 63 {
michael@0 64 // binary gcd algorithm from Knuth, "The Art of Computer Programming,"
michael@0 65 // vol. 2, 1st ed., pp. 298-299
michael@0 66 int64_t x1 = x;
michael@0 67 int64_t y1 = y;
michael@0 68
michael@0 69 int p2 = 0;
michael@0 70 while ((x1 & 1) == 0 && (y1 & 1) == 0) {
michael@0 71 ++p2;
michael@0 72 x1 >>= 1;
michael@0 73 y1 >>= 1;
michael@0 74 }
michael@0 75
michael@0 76 int64_t t;
michael@0 77 if ((x1 & 1) == 1) {
michael@0 78 t = -y1;
michael@0 79 } else {
michael@0 80 t = x1;
michael@0 81 }
michael@0 82
michael@0 83 while (t != 0) {
michael@0 84 while ((t & 1) == 0) {
michael@0 85 t = t >> 1;
michael@0 86 }
michael@0 87 if (t > 0) {
michael@0 88 x1 = t;
michael@0 89 } else {
michael@0 90 y1 = -t;
michael@0 91 }
michael@0 92 t = x1 - y1;
michael@0 93 }
michael@0 94
michael@0 95 int64_t gcd = x1 << p2;
michael@0 96
michael@0 97 // x * y == gcd(x, y) * lcm(x, y)
michael@0 98 return x / gcd * y;
michael@0 99 }
michael@0 100 #endif
michael@0 101
michael@0 102 static const UChar gPercent = 0x0025;
michael@0 103 static const UChar gColon = 0x003a;
michael@0 104 static const UChar gSemicolon = 0x003b;
michael@0 105 static const UChar gLineFeed = 0x000a;
michael@0 106
michael@0 107 static const UChar gFourSpaces[] =
michael@0 108 {
michael@0 109 0x20, 0x20, 0x20, 0x20, 0
michael@0 110 }; /* " " */
michael@0 111 static const UChar gPercentPercent[] =
michael@0 112 {
michael@0 113 0x25, 0x25, 0
michael@0 114 }; /* "%%" */
michael@0 115
michael@0 116 static const UChar gNoparse[] =
michael@0 117 {
michael@0 118 0x40, 0x6E, 0x6F, 0x70, 0x61, 0x72, 0x73, 0x65, 0
michael@0 119 }; /* "@noparse" */
michael@0 120
michael@0 121 NFRuleSet::NFRuleSet(UnicodeString* descriptions, int32_t index, UErrorCode& status)
michael@0 122 : name()
michael@0 123 , rules(0)
michael@0 124 , negativeNumberRule(NULL)
michael@0 125 , fIsFractionRuleSet(FALSE)
michael@0 126 , fIsPublic(FALSE)
michael@0 127 , fIsParseable(TRUE)
michael@0 128 , fRecursionCount(0)
michael@0 129 {
michael@0 130 for (int i = 0; i < 3; ++i) {
michael@0 131 fractionRules[i] = NULL;
michael@0 132 }
michael@0 133
michael@0 134 if (U_FAILURE(status)) {
michael@0 135 return;
michael@0 136 }
michael@0 137
michael@0 138 UnicodeString& description = descriptions[index]; // !!! make sure index is valid
michael@0 139
michael@0 140 if (description.length() == 0) {
michael@0 141 // throw new IllegalArgumentException("Empty rule set description");
michael@0 142 status = U_PARSE_ERROR;
michael@0 143 return;
michael@0 144 }
michael@0 145
michael@0 146 // if the description begins with a rule set name (the rule set
michael@0 147 // name can be omitted in formatter descriptions that consist
michael@0 148 // of only one rule set), copy it out into our "name" member
michael@0 149 // and delete it from the description
michael@0 150 if (description.charAt(0) == gPercent) {
michael@0 151 int32_t pos = description.indexOf(gColon);
michael@0 152 if (pos == -1) {
michael@0 153 // throw new IllegalArgumentException("Rule set name doesn't end in colon");
michael@0 154 status = U_PARSE_ERROR;
michael@0 155 } else {
michael@0 156 name.setTo(description, 0, pos);
michael@0 157 while (pos < description.length() && PatternProps::isWhiteSpace(description.charAt(++pos))) {
michael@0 158 }
michael@0 159 description.remove(0, pos);
michael@0 160 }
michael@0 161 } else {
michael@0 162 name.setTo(UNICODE_STRING_SIMPLE("%default"));
michael@0 163 }
michael@0 164
michael@0 165 if (description.length() == 0) {
michael@0 166 // throw new IllegalArgumentException("Empty rule set description");
michael@0 167 status = U_PARSE_ERROR;
michael@0 168 }
michael@0 169
michael@0 170 fIsPublic = name.indexOf(gPercentPercent, 2, 0) != 0;
michael@0 171
michael@0 172 if ( name.endsWith(gNoparse,8) ) {
michael@0 173 fIsParseable = FALSE;
michael@0 174 name.truncate(name.length()-8); // remove the @noparse from the name
michael@0 175 }
michael@0 176
michael@0 177 // all of the other members of NFRuleSet are initialized
michael@0 178 // by parseRules()
michael@0 179 }
michael@0 180
michael@0 181 void
michael@0 182 NFRuleSet::parseRules(UnicodeString& description, const RuleBasedNumberFormat* owner, UErrorCode& status)
michael@0 183 {
michael@0 184 // start by creating a Vector whose elements are Strings containing
michael@0 185 // the descriptions of the rules (one rule per element). The rules
michael@0 186 // are separated by semicolons (there's no escape facility: ALL
michael@0 187 // semicolons are rule delimiters)
michael@0 188
michael@0 189 if (U_FAILURE(status)) {
michael@0 190 return;
michael@0 191 }
michael@0 192
michael@0 193 // ensure we are starting with an empty rule list
michael@0 194 rules.deleteAll();
michael@0 195
michael@0 196 // dlf - the original code kept a separate description array for no reason,
michael@0 197 // so I got rid of it. The loop was too complex so I simplified it.
michael@0 198
michael@0 199 UnicodeString currentDescription;
michael@0 200 int32_t oldP = 0;
michael@0 201 while (oldP < description.length()) {
michael@0 202 int32_t p = description.indexOf(gSemicolon, oldP);
michael@0 203 if (p == -1) {
michael@0 204 p = description.length();
michael@0 205 }
michael@0 206 currentDescription.setTo(description, oldP, p - oldP);
michael@0 207 NFRule::makeRules(currentDescription, this, rules.last(), owner, rules, status);
michael@0 208 oldP = p + 1;
michael@0 209 }
michael@0 210
michael@0 211 // for rules that didn't specify a base value, their base values
michael@0 212 // were initialized to 0. Make another pass through the list and
michael@0 213 // set all those rules' base values. We also remove any special
michael@0 214 // rules from the list and put them into their own member variables
michael@0 215 int64_t defaultBaseValue = 0;
michael@0 216
michael@0 217 // (this isn't a for loop because we might be deleting items from
michael@0 218 // the vector-- we want to make sure we only increment i when
michael@0 219 // we _didn't_ delete aything from the vector)
michael@0 220 uint32_t i = 0;
michael@0 221 while (i < rules.size()) {
michael@0 222 NFRule* rule = rules[i];
michael@0 223
michael@0 224 switch (rule->getType()) {
michael@0 225 // if the rule's base value is 0, fill in a default
michael@0 226 // base value (this will be 1 plus the preceding
michael@0 227 // rule's base value for regular rule sets, and the
michael@0 228 // same as the preceding rule's base value in fraction
michael@0 229 // rule sets)
michael@0 230 case NFRule::kNoBase:
michael@0 231 rule->setBaseValue(defaultBaseValue, status);
michael@0 232 if (!isFractionRuleSet()) {
michael@0 233 ++defaultBaseValue;
michael@0 234 }
michael@0 235 ++i;
michael@0 236 break;
michael@0 237
michael@0 238 // if it's the negative-number rule, copy it into its own
michael@0 239 // data member and delete it from the list
michael@0 240 case NFRule::kNegativeNumberRule:
michael@0 241 if (negativeNumberRule) {
michael@0 242 delete negativeNumberRule;
michael@0 243 }
michael@0 244 negativeNumberRule = rules.remove(i);
michael@0 245 break;
michael@0 246
michael@0 247 // if it's the improper fraction rule, copy it into the
michael@0 248 // correct element of fractionRules
michael@0 249 case NFRule::kImproperFractionRule:
michael@0 250 if (fractionRules[0]) {
michael@0 251 delete fractionRules[0];
michael@0 252 }
michael@0 253 fractionRules[0] = rules.remove(i);
michael@0 254 break;
michael@0 255
michael@0 256 // if it's the proper fraction rule, copy it into the
michael@0 257 // correct element of fractionRules
michael@0 258 case NFRule::kProperFractionRule:
michael@0 259 if (fractionRules[1]) {
michael@0 260 delete fractionRules[1];
michael@0 261 }
michael@0 262 fractionRules[1] = rules.remove(i);
michael@0 263 break;
michael@0 264
michael@0 265 // if it's the master rule, copy it into the
michael@0 266 // correct element of fractionRules
michael@0 267 case NFRule::kMasterRule:
michael@0 268 if (fractionRules[2]) {
michael@0 269 delete fractionRules[2];
michael@0 270 }
michael@0 271 fractionRules[2] = rules.remove(i);
michael@0 272 break;
michael@0 273
michael@0 274 // if it's a regular rule that already knows its base value,
michael@0 275 // check to make sure the rules are in order, and update
michael@0 276 // the default base value for the next rule
michael@0 277 default:
michael@0 278 if (rule->getBaseValue() < defaultBaseValue) {
michael@0 279 // throw new IllegalArgumentException("Rules are not in order");
michael@0 280 status = U_PARSE_ERROR;
michael@0 281 return;
michael@0 282 }
michael@0 283 defaultBaseValue = rule->getBaseValue();
michael@0 284 if (!isFractionRuleSet()) {
michael@0 285 ++defaultBaseValue;
michael@0 286 }
michael@0 287 ++i;
michael@0 288 break;
michael@0 289 }
michael@0 290 }
michael@0 291 }
michael@0 292
michael@0 293 NFRuleSet::~NFRuleSet()
michael@0 294 {
michael@0 295 delete negativeNumberRule;
michael@0 296 delete fractionRules[0];
michael@0 297 delete fractionRules[1];
michael@0 298 delete fractionRules[2];
michael@0 299 }
michael@0 300
michael@0 301 static UBool
michael@0 302 util_equalRules(const NFRule* rule1, const NFRule* rule2)
michael@0 303 {
michael@0 304 if (rule1) {
michael@0 305 if (rule2) {
michael@0 306 return *rule1 == *rule2;
michael@0 307 }
michael@0 308 } else if (!rule2) {
michael@0 309 return TRUE;
michael@0 310 }
michael@0 311 return FALSE;
michael@0 312 }
michael@0 313
michael@0 314 UBool
michael@0 315 NFRuleSet::operator==(const NFRuleSet& rhs) const
michael@0 316 {
michael@0 317 if (rules.size() == rhs.rules.size() &&
michael@0 318 fIsFractionRuleSet == rhs.fIsFractionRuleSet &&
michael@0 319 name == rhs.name &&
michael@0 320 util_equalRules(negativeNumberRule, rhs.negativeNumberRule) &&
michael@0 321 util_equalRules(fractionRules[0], rhs.fractionRules[0]) &&
michael@0 322 util_equalRules(fractionRules[1], rhs.fractionRules[1]) &&
michael@0 323 util_equalRules(fractionRules[2], rhs.fractionRules[2])) {
michael@0 324
michael@0 325 for (uint32_t i = 0; i < rules.size(); ++i) {
michael@0 326 if (*rules[i] != *rhs.rules[i]) {
michael@0 327 return FALSE;
michael@0 328 }
michael@0 329 }
michael@0 330 return TRUE;
michael@0 331 }
michael@0 332 return FALSE;
michael@0 333 }
michael@0 334
michael@0 335 #define RECURSION_LIMIT 50
michael@0 336
michael@0 337 void
michael@0 338 NFRuleSet::format(int64_t number, UnicodeString& toAppendTo, int32_t pos) const
michael@0 339 {
michael@0 340 NFRule *rule = findNormalRule(number);
michael@0 341 if (rule) { // else error, but can't report it
michael@0 342 NFRuleSet* ncThis = (NFRuleSet*)this;
michael@0 343 if (ncThis->fRecursionCount++ >= RECURSION_LIMIT) {
michael@0 344 // stop recursion
michael@0 345 ncThis->fRecursionCount = 0;
michael@0 346 } else {
michael@0 347 rule->doFormat(number, toAppendTo, pos);
michael@0 348 ncThis->fRecursionCount--;
michael@0 349 }
michael@0 350 }
michael@0 351 }
michael@0 352
michael@0 353 void
michael@0 354 NFRuleSet::format(double number, UnicodeString& toAppendTo, int32_t pos) const
michael@0 355 {
michael@0 356 NFRule *rule = findDoubleRule(number);
michael@0 357 if (rule) { // else error, but can't report it
michael@0 358 NFRuleSet* ncThis = (NFRuleSet*)this;
michael@0 359 if (ncThis->fRecursionCount++ >= RECURSION_LIMIT) {
michael@0 360 // stop recursion
michael@0 361 ncThis->fRecursionCount = 0;
michael@0 362 } else {
michael@0 363 rule->doFormat(number, toAppendTo, pos);
michael@0 364 ncThis->fRecursionCount--;
michael@0 365 }
michael@0 366 }
michael@0 367 }
michael@0 368
michael@0 369 NFRule*
michael@0 370 NFRuleSet::findDoubleRule(double number) const
michael@0 371 {
michael@0 372 // if this is a fraction rule set, use findFractionRuleSetRule()
michael@0 373 if (isFractionRuleSet()) {
michael@0 374 return findFractionRuleSetRule(number);
michael@0 375 }
michael@0 376
michael@0 377 // if the number is negative, return the negative number rule
michael@0 378 // (if there isn't a negative-number rule, we pretend it's a
michael@0 379 // positive number)
michael@0 380 if (number < 0) {
michael@0 381 if (negativeNumberRule) {
michael@0 382 return negativeNumberRule;
michael@0 383 } else {
michael@0 384 number = -number;
michael@0 385 }
michael@0 386 }
michael@0 387
michael@0 388 // if the number isn't an integer, we use one of the fraction rules...
michael@0 389 if (number != uprv_floor(number)) {
michael@0 390 // if the number is between 0 and 1, return the proper
michael@0 391 // fraction rule
michael@0 392 if (number < 1 && fractionRules[1]) {
michael@0 393 return fractionRules[1];
michael@0 394 }
michael@0 395 // otherwise, return the improper fraction rule
michael@0 396 else if (fractionRules[0]) {
michael@0 397 return fractionRules[0];
michael@0 398 }
michael@0 399 }
michael@0 400
michael@0 401 // if there's a master rule, use it to format the number
michael@0 402 if (fractionRules[2]) {
michael@0 403 return fractionRules[2];
michael@0 404 }
michael@0 405
michael@0 406 // and if we haven't yet returned a rule, use findNormalRule()
michael@0 407 // to find the applicable rule
michael@0 408 int64_t r = util64_fromDouble(number + 0.5);
michael@0 409 return findNormalRule(r);
michael@0 410 }
michael@0 411
michael@0 412 NFRule *
michael@0 413 NFRuleSet::findNormalRule(int64_t number) const
michael@0 414 {
michael@0 415 // if this is a fraction rule set, use findFractionRuleSetRule()
michael@0 416 // to find the rule (we should only go into this clause if the
michael@0 417 // value is 0)
michael@0 418 if (fIsFractionRuleSet) {
michael@0 419 return findFractionRuleSetRule((double)number);
michael@0 420 }
michael@0 421
michael@0 422 // if the number is negative, return the negative-number rule
michael@0 423 // (if there isn't one, pretend the number is positive)
michael@0 424 if (number < 0) {
michael@0 425 if (negativeNumberRule) {
michael@0 426 return negativeNumberRule;
michael@0 427 } else {
michael@0 428 number = -number;
michael@0 429 }
michael@0 430 }
michael@0 431
michael@0 432 // we have to repeat the preceding two checks, even though we
michael@0 433 // do them in findRule(), because the version of format() that
michael@0 434 // takes a long bypasses findRule() and goes straight to this
michael@0 435 // function. This function does skip the fraction rules since
michael@0 436 // we know the value is an integer (it also skips the master
michael@0 437 // rule, since it's considered a fraction rule. Skipping the
michael@0 438 // master rule in this function is also how we avoid infinite
michael@0 439 // recursion)
michael@0 440
michael@0 441 // {dlf} unfortunately this fails if there are no rules except
michael@0 442 // special rules. If there are no rules, use the master rule.
michael@0 443
michael@0 444 // binary-search the rule list for the applicable rule
michael@0 445 // (a rule is used for all values from its base value to
michael@0 446 // the next rule's base value)
michael@0 447 int32_t hi = rules.size();
michael@0 448 if (hi > 0) {
michael@0 449 int32_t lo = 0;
michael@0 450
michael@0 451 while (lo < hi) {
michael@0 452 int32_t mid = (lo + hi) / 2;
michael@0 453 if (rules[mid]->getBaseValue() == number) {
michael@0 454 return rules[mid];
michael@0 455 }
michael@0 456 else if (rules[mid]->getBaseValue() > number) {
michael@0 457 hi = mid;
michael@0 458 }
michael@0 459 else {
michael@0 460 lo = mid + 1;
michael@0 461 }
michael@0 462 }
michael@0 463 if (hi == 0) { // bad rule set, minimum base > 0
michael@0 464 return NULL; // want to throw exception here
michael@0 465 }
michael@0 466
michael@0 467 NFRule *result = rules[hi - 1];
michael@0 468
michael@0 469 // use shouldRollBack() to see whether we need to invoke the
michael@0 470 // rollback rule (see shouldRollBack()'s documentation for
michael@0 471 // an explanation of the rollback rule). If we do, roll back
michael@0 472 // one rule and return that one instead of the one we'd normally
michael@0 473 // return
michael@0 474 if (result->shouldRollBack((double)number)) {
michael@0 475 if (hi == 1) { // bad rule set, no prior rule to rollback to from this base
michael@0 476 return NULL;
michael@0 477 }
michael@0 478 result = rules[hi - 2];
michael@0 479 }
michael@0 480 return result;
michael@0 481 }
michael@0 482 // else use the master rule
michael@0 483 return fractionRules[2];
michael@0 484 }
michael@0 485
michael@0 486 /**
michael@0 487 * If this rule is a fraction rule set, this function is used by
michael@0 488 * findRule() to select the most appropriate rule for formatting
michael@0 489 * the number. Basically, the base value of each rule in the rule
michael@0 490 * set is treated as the denominator of a fraction. Whichever
michael@0 491 * denominator can produce the fraction closest in value to the
michael@0 492 * number passed in is the result. If there's a tie, the earlier
michael@0 493 * one in the list wins. (If there are two rules in a row with the
michael@0 494 * same base value, the first one is used when the numerator of the
michael@0 495 * fraction would be 1, and the second rule is used the rest of the
michael@0 496 * time.
michael@0 497 * @param number The number being formatted (which will always be
michael@0 498 * a number between 0 and 1)
michael@0 499 * @return The rule to use to format this number
michael@0 500 */
michael@0 501 NFRule*
michael@0 502 NFRuleSet::findFractionRuleSetRule(double number) const
michael@0 503 {
michael@0 504 // the obvious way to do this (multiply the value being formatted
michael@0 505 // by each rule's base value until you get an integral result)
michael@0 506 // doesn't work because of rounding error. This method is more
michael@0 507 // accurate
michael@0 508
michael@0 509 // find the least common multiple of the rules' base values
michael@0 510 // and multiply this by the number being formatted. This is
michael@0 511 // all the precision we need, and we can do all of the rest
michael@0 512 // of the math using integer arithmetic
michael@0 513 int64_t leastCommonMultiple = rules[0]->getBaseValue();
michael@0 514 int64_t numerator;
michael@0 515 {
michael@0 516 for (uint32_t i = 1; i < rules.size(); ++i) {
michael@0 517 leastCommonMultiple = util_lcm(leastCommonMultiple, rules[i]->getBaseValue());
michael@0 518 }
michael@0 519 numerator = util64_fromDouble(number * (double)leastCommonMultiple + 0.5);
michael@0 520 }
michael@0 521 // for each rule, do the following...
michael@0 522 int64_t tempDifference;
michael@0 523 int64_t difference = util64_fromDouble(uprv_maxMantissa());
michael@0 524 int32_t winner = 0;
michael@0 525 for (uint32_t i = 0; i < rules.size(); ++i) {
michael@0 526 // "numerator" is the numerator of the fraction if the
michael@0 527 // denominator is the LCD. The numerator if the rule's
michael@0 528 // base value is the denominator is "numerator" times the
michael@0 529 // base value divided bythe LCD. Here we check to see if
michael@0 530 // that's an integer, and if not, how close it is to being
michael@0 531 // an integer.
michael@0 532 tempDifference = numerator * rules[i]->getBaseValue() % leastCommonMultiple;
michael@0 533
michael@0 534
michael@0 535 // normalize the result of the above calculation: we want
michael@0 536 // the numerator's distance from the CLOSEST multiple
michael@0 537 // of the LCD
michael@0 538 if (leastCommonMultiple - tempDifference < tempDifference) {
michael@0 539 tempDifference = leastCommonMultiple - tempDifference;
michael@0 540 }
michael@0 541
michael@0 542 // if this is as close as we've come, keep track of how close
michael@0 543 // that is, and the line number of the rule that did it. If
michael@0 544 // we've scored a direct hit, we don't have to look at any more
michael@0 545 // rules
michael@0 546 if (tempDifference < difference) {
michael@0 547 difference = tempDifference;
michael@0 548 winner = i;
michael@0 549 if (difference == 0) {
michael@0 550 break;
michael@0 551 }
michael@0 552 }
michael@0 553 }
michael@0 554
michael@0 555 // if we have two successive rules that both have the winning base
michael@0 556 // value, then the first one (the one we found above) is used if
michael@0 557 // the numerator of the fraction is 1 and the second one is used if
michael@0 558 // the numerator of the fraction is anything else (this lets us
michael@0 559 // do things like "one third"/"two thirds" without haveing to define
michael@0 560 // a whole bunch of extra rule sets)
michael@0 561 if ((unsigned)(winner + 1) < rules.size() &&
michael@0 562 rules[winner + 1]->getBaseValue() == rules[winner]->getBaseValue()) {
michael@0 563 double n = ((double)rules[winner]->getBaseValue()) * number;
michael@0 564 if (n < 0.5 || n >= 2) {
michael@0 565 ++winner;
michael@0 566 }
michael@0 567 }
michael@0 568
michael@0 569 // finally, return the winning rule
michael@0 570 return rules[winner];
michael@0 571 }
michael@0 572
michael@0 573 /**
michael@0 574 * Parses a string. Matches the string to be parsed against each
michael@0 575 * of its rules (with a base value less than upperBound) and returns
michael@0 576 * the value produced by the rule that matched the most charcters
michael@0 577 * in the source string.
michael@0 578 * @param text The string to parse
michael@0 579 * @param parsePosition The initial position is ignored and assumed
michael@0 580 * to be 0. On exit, this object has been updated to point to the
michael@0 581 * first character position this rule set didn't consume.
michael@0 582 * @param upperBound Limits the rules that can be allowed to match.
michael@0 583 * Only rules whose base values are strictly less than upperBound
michael@0 584 * are considered.
michael@0 585 * @return The numerical result of parsing this string. This will
michael@0 586 * be the matching rule's base value, composed appropriately with
michael@0 587 * the results of matching any of its substitutions. The object
michael@0 588 * will be an instance of Long if it's an integral value; otherwise,
michael@0 589 * it will be an instance of Double. This function always returns
michael@0 590 * a valid object: If nothing matched the input string at all,
michael@0 591 * this function returns new Long(0), and the parse position is
michael@0 592 * left unchanged.
michael@0 593 */
michael@0 594 #ifdef RBNF_DEBUG
michael@0 595 #include <stdio.h>
michael@0 596
michael@0 597 static void dumpUS(FILE* f, const UnicodeString& us) {
michael@0 598 int len = us.length();
michael@0 599 char* buf = (char *)uprv_malloc((len+1)*sizeof(char)); //new char[len+1];
michael@0 600 if (buf != NULL) {
michael@0 601 us.extract(0, len, buf);
michael@0 602 buf[len] = 0;
michael@0 603 fprintf(f, "%s", buf);
michael@0 604 uprv_free(buf); //delete[] buf;
michael@0 605 }
michael@0 606 }
michael@0 607 #endif
michael@0 608
michael@0 609 UBool
michael@0 610 NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result) const
michael@0 611 {
michael@0 612 // try matching each rule in the rule set against the text being
michael@0 613 // parsed. Whichever one matches the most characters is the one
michael@0 614 // that determines the value we return.
michael@0 615
michael@0 616 result.setLong(0);
michael@0 617
michael@0 618 // dump out if there's no text to parse
michael@0 619 if (text.length() == 0) {
michael@0 620 return 0;
michael@0 621 }
michael@0 622
michael@0 623 ParsePosition highWaterMark;
michael@0 624 ParsePosition workingPos = pos;
michael@0 625
michael@0 626 #ifdef RBNF_DEBUG
michael@0 627 fprintf(stderr, "<nfrs> %x '", this);
michael@0 628 dumpUS(stderr, name);
michael@0 629 fprintf(stderr, "' text '");
michael@0 630 dumpUS(stderr, text);
michael@0 631 fprintf(stderr, "'\n");
michael@0 632 fprintf(stderr, " parse negative: %d\n", this, negativeNumberRule != 0);
michael@0 633 #endif
michael@0 634
michael@0 635 // start by trying the negative number rule (if there is one)
michael@0 636 if (negativeNumberRule) {
michael@0 637 Formattable tempResult;
michael@0 638 #ifdef RBNF_DEBUG
michael@0 639 fprintf(stderr, " <nfrs before negative> %x ub: %g\n", negativeNumberRule, upperBound);
michael@0 640 #endif
michael@0 641 UBool success = negativeNumberRule->doParse(text, workingPos, 0, upperBound, tempResult);
michael@0 642 #ifdef RBNF_DEBUG
michael@0 643 fprintf(stderr, " <nfrs after negative> success: %d wpi: %d\n", success, workingPos.getIndex());
michael@0 644 #endif
michael@0 645 if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
michael@0 646 result = tempResult;
michael@0 647 highWaterMark = workingPos;
michael@0 648 }
michael@0 649 workingPos = pos;
michael@0 650 }
michael@0 651 #ifdef RBNF_DEBUG
michael@0 652 fprintf(stderr, "<nfrs> continue fractional with text '");
michael@0 653 dumpUS(stderr, text);
michael@0 654 fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex());
michael@0 655 #endif
michael@0 656 // then try each of the fraction rules
michael@0 657 {
michael@0 658 for (int i = 0; i < 3; i++) {
michael@0 659 if (fractionRules[i]) {
michael@0 660 Formattable tempResult;
michael@0 661 UBool success = fractionRules[i]->doParse(text, workingPos, 0, upperBound, tempResult);
michael@0 662 if (success && (workingPos.getIndex() > highWaterMark.getIndex())) {
michael@0 663 result = tempResult;
michael@0 664 highWaterMark = workingPos;
michael@0 665 }
michael@0 666 workingPos = pos;
michael@0 667 }
michael@0 668 }
michael@0 669 }
michael@0 670 #ifdef RBNF_DEBUG
michael@0 671 fprintf(stderr, "<nfrs> continue other with text '");
michael@0 672 dumpUS(stderr, text);
michael@0 673 fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex());
michael@0 674 #endif
michael@0 675
michael@0 676 // finally, go through the regular rules one at a time. We start
michael@0 677 // at the end of the list because we want to try matching the most
michael@0 678 // sigificant rule first (this helps ensure that we parse
michael@0 679 // "five thousand three hundred six" as
michael@0 680 // "(five thousand) (three hundred) (six)" rather than
michael@0 681 // "((five thousand three) hundred) (six)"). Skip rules whose
michael@0 682 // base values are higher than the upper bound (again, this helps
michael@0 683 // limit ambiguity by making sure the rules that match a rule's
michael@0 684 // are less significant than the rule containing the substitutions)/
michael@0 685 {
michael@0 686 int64_t ub = util64_fromDouble(upperBound);
michael@0 687 #ifdef RBNF_DEBUG
michael@0 688 {
michael@0 689 char ubstr[64];
michael@0 690 util64_toa(ub, ubstr, 64);
michael@0 691 char ubstrhex[64];
michael@0 692 util64_toa(ub, ubstrhex, 64, 16);
michael@0 693 fprintf(stderr, "ub: %g, i64: %s (%s)\n", upperBound, ubstr, ubstrhex);
michael@0 694 }
michael@0 695 #endif
michael@0 696 for (int32_t i = rules.size(); --i >= 0 && highWaterMark.getIndex() < text.length();) {
michael@0 697 if ((!fIsFractionRuleSet) && (rules[i]->getBaseValue() >= ub)) {
michael@0 698 continue;
michael@0 699 }
michael@0 700 Formattable tempResult;
michael@0 701 UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, tempResult);
michael@0 702 if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
michael@0 703 result = tempResult;
michael@0 704 highWaterMark = workingPos;
michael@0 705 }
michael@0 706 workingPos = pos;
michael@0 707 }
michael@0 708 }
michael@0 709 #ifdef RBNF_DEBUG
michael@0 710 fprintf(stderr, "<nfrs> exit\n");
michael@0 711 #endif
michael@0 712 // finally, update the parse postion we were passed to point to the
michael@0 713 // first character we didn't use, and return the result that
michael@0 714 // corresponds to that string of characters
michael@0 715 pos = highWaterMark;
michael@0 716
michael@0 717 return 1;
michael@0 718 }
michael@0 719
michael@0 720 void
michael@0 721 NFRuleSet::appendRules(UnicodeString& result) const
michael@0 722 {
michael@0 723 // the rule set name goes first...
michael@0 724 result.append(name);
michael@0 725 result.append(gColon);
michael@0 726 result.append(gLineFeed);
michael@0 727
michael@0 728 // followed by the regular rules...
michael@0 729 for (uint32_t i = 0; i < rules.size(); i++) {
michael@0 730 result.append(gFourSpaces, 4);
michael@0 731 rules[i]->_appendRuleText(result);
michael@0 732 result.append(gLineFeed);
michael@0 733 }
michael@0 734
michael@0 735 // followed by the special rules (if they exist)
michael@0 736 if (negativeNumberRule) {
michael@0 737 result.append(gFourSpaces, 4);
michael@0 738 negativeNumberRule->_appendRuleText(result);
michael@0 739 result.append(gLineFeed);
michael@0 740 }
michael@0 741
michael@0 742 {
michael@0 743 for (uint32_t i = 0; i < 3; ++i) {
michael@0 744 if (fractionRules[i]) {
michael@0 745 result.append(gFourSpaces, 4);
michael@0 746 fractionRules[i]->_appendRuleText(result);
michael@0 747 result.append(gLineFeed);
michael@0 748 }
michael@0 749 }
michael@0 750 }
michael@0 751 }
michael@0 752
michael@0 753 // utility functions
michael@0 754
michael@0 755 int64_t util64_fromDouble(double d) {
michael@0 756 int64_t result = 0;
michael@0 757 if (!uprv_isNaN(d)) {
michael@0 758 double mant = uprv_maxMantissa();
michael@0 759 if (d < -mant) {
michael@0 760 d = -mant;
michael@0 761 } else if (d > mant) {
michael@0 762 d = mant;
michael@0 763 }
michael@0 764 UBool neg = d < 0;
michael@0 765 if (neg) {
michael@0 766 d = -d;
michael@0 767 }
michael@0 768 result = (int64_t)uprv_floor(d);
michael@0 769 if (neg) {
michael@0 770 result = -result;
michael@0 771 }
michael@0 772 }
michael@0 773 return result;
michael@0 774 }
michael@0 775
michael@0 776 int64_t util64_pow(int32_t r, uint32_t e) {
michael@0 777 if (r == 0) {
michael@0 778 return 0;
michael@0 779 } else if (e == 0) {
michael@0 780 return 1;
michael@0 781 } else {
michael@0 782 int64_t n = r;
michael@0 783 while (--e > 0) {
michael@0 784 n *= r;
michael@0 785 }
michael@0 786 return n;
michael@0 787 }
michael@0 788 }
michael@0 789
michael@0 790 static const uint8_t asciiDigits[] = {
michael@0 791 0x30u, 0x31u, 0x32u, 0x33u, 0x34u, 0x35u, 0x36u, 0x37u,
michael@0 792 0x38u, 0x39u, 0x61u, 0x62u, 0x63u, 0x64u, 0x65u, 0x66u,
michael@0 793 0x67u, 0x68u, 0x69u, 0x6au, 0x6bu, 0x6cu, 0x6du, 0x6eu,
michael@0 794 0x6fu, 0x70u, 0x71u, 0x72u, 0x73u, 0x74u, 0x75u, 0x76u,
michael@0 795 0x77u, 0x78u, 0x79u, 0x7au,
michael@0 796 };
michael@0 797
michael@0 798 static const UChar kUMinus = (UChar)0x002d;
michael@0 799
michael@0 800 #ifdef RBNF_DEBUG
michael@0 801 static const char kMinus = '-';
michael@0 802
michael@0 803 static const uint8_t digitInfo[] = {
michael@0 804 0, 0, 0, 0, 0, 0, 0, 0,
michael@0 805 0, 0, 0, 0, 0, 0, 0, 0,
michael@0 806 0, 0, 0, 0, 0, 0, 0, 0,
michael@0 807 0, 0, 0, 0, 0, 0, 0, 0,
michael@0 808 0, 0, 0, 0, 0, 0, 0, 0,
michael@0 809 0, 0, 0, 0, 0, 0, 0, 0,
michael@0 810 0x80u, 0x81u, 0x82u, 0x83u, 0x84u, 0x85u, 0x86u, 0x87u,
michael@0 811 0x88u, 0x89u, 0, 0, 0, 0, 0, 0,
michael@0 812 0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
michael@0 813 0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
michael@0 814 0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
michael@0 815 0xa1u, 0xa2u, 0xa3u, 0, 0, 0, 0, 0,
michael@0 816 0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
michael@0 817 0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
michael@0 818 0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
michael@0 819 0xa1u, 0xa2u, 0xa3u, 0, 0, 0, 0, 0,
michael@0 820 };
michael@0 821
michael@0 822 int64_t util64_atoi(const char* str, uint32_t radix)
michael@0 823 {
michael@0 824 if (radix > 36) {
michael@0 825 radix = 36;
michael@0 826 } else if (radix < 2) {
michael@0 827 radix = 2;
michael@0 828 }
michael@0 829 int64_t lradix = radix;
michael@0 830
michael@0 831 int neg = 0;
michael@0 832 if (*str == kMinus) {
michael@0 833 ++str;
michael@0 834 neg = 1;
michael@0 835 }
michael@0 836 int64_t result = 0;
michael@0 837 uint8_t b;
michael@0 838 while ((b = digitInfo[*str++]) && ((b &= 0x7f) < radix)) {
michael@0 839 result *= lradix;
michael@0 840 result += (int32_t)b;
michael@0 841 }
michael@0 842 if (neg) {
michael@0 843 result = -result;
michael@0 844 }
michael@0 845 return result;
michael@0 846 }
michael@0 847
michael@0 848 int64_t util64_utoi(const UChar* str, uint32_t radix)
michael@0 849 {
michael@0 850 if (radix > 36) {
michael@0 851 radix = 36;
michael@0 852 } else if (radix < 2) {
michael@0 853 radix = 2;
michael@0 854 }
michael@0 855 int64_t lradix = radix;
michael@0 856
michael@0 857 int neg = 0;
michael@0 858 if (*str == kUMinus) {
michael@0 859 ++str;
michael@0 860 neg = 1;
michael@0 861 }
michael@0 862 int64_t result = 0;
michael@0 863 UChar c;
michael@0 864 uint8_t b;
michael@0 865 while (((c = *str++) < 0x0080) && (b = digitInfo[c]) && ((b &= 0x7f) < radix)) {
michael@0 866 result *= lradix;
michael@0 867 result += (int32_t)b;
michael@0 868 }
michael@0 869 if (neg) {
michael@0 870 result = -result;
michael@0 871 }
michael@0 872 return result;
michael@0 873 }
michael@0 874
michael@0 875 uint32_t util64_toa(int64_t w, char* buf, uint32_t len, uint32_t radix, UBool raw)
michael@0 876 {
michael@0 877 if (radix > 36) {
michael@0 878 radix = 36;
michael@0 879 } else if (radix < 2) {
michael@0 880 radix = 2;
michael@0 881 }
michael@0 882 int64_t base = radix;
michael@0 883
michael@0 884 char* p = buf;
michael@0 885 if (len && (w < 0) && (radix == 10) && !raw) {
michael@0 886 w = -w;
michael@0 887 *p++ = kMinus;
michael@0 888 --len;
michael@0 889 } else if (len && (w == 0)) {
michael@0 890 *p++ = (char)raw ? 0 : asciiDigits[0];
michael@0 891 --len;
michael@0 892 }
michael@0 893
michael@0 894 while (len && w != 0) {
michael@0 895 int64_t n = w / base;
michael@0 896 int64_t m = n * base;
michael@0 897 int32_t d = (int32_t)(w-m);
michael@0 898 *p++ = raw ? (char)d : asciiDigits[d];
michael@0 899 w = n;
michael@0 900 --len;
michael@0 901 }
michael@0 902 if (len) {
michael@0 903 *p = 0; // null terminate if room for caller convenience
michael@0 904 }
michael@0 905
michael@0 906 len = p - buf;
michael@0 907 if (*buf == kMinus) {
michael@0 908 ++buf;
michael@0 909 }
michael@0 910 while (--p > buf) {
michael@0 911 char c = *p;
michael@0 912 *p = *buf;
michael@0 913 *buf = c;
michael@0 914 ++buf;
michael@0 915 }
michael@0 916
michael@0 917 return len;
michael@0 918 }
michael@0 919 #endif
michael@0 920
michael@0 921 uint32_t util64_tou(int64_t w, UChar* buf, uint32_t len, uint32_t radix, UBool raw)
michael@0 922 {
michael@0 923 if (radix > 36) {
michael@0 924 radix = 36;
michael@0 925 } else if (radix < 2) {
michael@0 926 radix = 2;
michael@0 927 }
michael@0 928 int64_t base = radix;
michael@0 929
michael@0 930 UChar* p = buf;
michael@0 931 if (len && (w < 0) && (radix == 10) && !raw) {
michael@0 932 w = -w;
michael@0 933 *p++ = kUMinus;
michael@0 934 --len;
michael@0 935 } else if (len && (w == 0)) {
michael@0 936 *p++ = (UChar)raw ? 0 : asciiDigits[0];
michael@0 937 --len;
michael@0 938 }
michael@0 939
michael@0 940 while (len && (w != 0)) {
michael@0 941 int64_t n = w / base;
michael@0 942 int64_t m = n * base;
michael@0 943 int32_t d = (int32_t)(w-m);
michael@0 944 *p++ = (UChar)(raw ? d : asciiDigits[d]);
michael@0 945 w = n;
michael@0 946 --len;
michael@0 947 }
michael@0 948 if (len) {
michael@0 949 *p = 0; // null terminate if room for caller convenience
michael@0 950 }
michael@0 951
michael@0 952 len = (uint32_t)(p - buf);
michael@0 953 if (*buf == kUMinus) {
michael@0 954 ++buf;
michael@0 955 }
michael@0 956 while (--p > buf) {
michael@0 957 UChar c = *p;
michael@0 958 *p = *buf;
michael@0 959 *buf = c;
michael@0 960 ++buf;
michael@0 961 }
michael@0 962
michael@0 963 return len;
michael@0 964 }
michael@0 965
michael@0 966
michael@0 967 U_NAMESPACE_END
michael@0 968
michael@0 969 /* U_HAVE_RBNF */
michael@0 970 #endif
michael@0 971

mercurial