michael@0: /* michael@0: ****************************************************************************** michael@0: * Copyright (C) 1997-2012, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: ****************************************************************************** michael@0: * file name: nfrs.cpp michael@0: * encoding: US-ASCII michael@0: * tab size: 8 (not used) michael@0: * indentation:4 michael@0: * michael@0: * Modification history michael@0: * Date Name Comments michael@0: * 10/11/2001 Doug Ported from ICU4J michael@0: */ michael@0: michael@0: #include "nfrs.h" michael@0: michael@0: #if U_HAVE_RBNF michael@0: michael@0: #include "unicode/uchar.h" michael@0: #include "nfrule.h" michael@0: #include "nfrlist.h" michael@0: #include "patternprops.h" michael@0: michael@0: #ifdef RBNF_DEBUG michael@0: #include "cmemory.h" michael@0: #endif michael@0: michael@0: U_NAMESPACE_BEGIN michael@0: michael@0: #if 0 michael@0: // euclid's algorithm works with doubles michael@0: // note, doubles only get us up to one quadrillion or so, which michael@0: // isn't as much range as we get with longs. We probably still michael@0: // want either 64-bit math, or BigInteger. michael@0: michael@0: static int64_t michael@0: util_lcm(int64_t x, int64_t y) michael@0: { michael@0: x.abs(); michael@0: y.abs(); michael@0: michael@0: if (x == 0 || y == 0) { michael@0: return 0; michael@0: } else { michael@0: do { michael@0: if (x < y) { michael@0: int64_t t = x; x = y; y = t; michael@0: } michael@0: x -= y * (x/y); michael@0: } while (x != 0); michael@0: michael@0: return y; michael@0: } michael@0: } michael@0: michael@0: #else michael@0: /** michael@0: * Calculates the least common multiple of x and y. michael@0: */ michael@0: static int64_t michael@0: util_lcm(int64_t x, int64_t y) michael@0: { michael@0: // binary gcd algorithm from Knuth, "The Art of Computer Programming," michael@0: // vol. 2, 1st ed., pp. 298-299 michael@0: int64_t x1 = x; michael@0: int64_t y1 = y; michael@0: michael@0: int p2 = 0; michael@0: while ((x1 & 1) == 0 && (y1 & 1) == 0) { michael@0: ++p2; michael@0: x1 >>= 1; michael@0: y1 >>= 1; michael@0: } michael@0: michael@0: int64_t t; michael@0: if ((x1 & 1) == 1) { michael@0: t = -y1; michael@0: } else { michael@0: t = x1; michael@0: } michael@0: michael@0: while (t != 0) { michael@0: while ((t & 1) == 0) { michael@0: t = t >> 1; michael@0: } michael@0: if (t > 0) { michael@0: x1 = t; michael@0: } else { michael@0: y1 = -t; michael@0: } michael@0: t = x1 - y1; michael@0: } michael@0: michael@0: int64_t gcd = x1 << p2; michael@0: michael@0: // x * y == gcd(x, y) * lcm(x, y) michael@0: return x / gcd * y; michael@0: } michael@0: #endif michael@0: michael@0: static const UChar gPercent = 0x0025; michael@0: static const UChar gColon = 0x003a; michael@0: static const UChar gSemicolon = 0x003b; michael@0: static const UChar gLineFeed = 0x000a; michael@0: michael@0: static const UChar gFourSpaces[] = michael@0: { michael@0: 0x20, 0x20, 0x20, 0x20, 0 michael@0: }; /* " " */ michael@0: static const UChar gPercentPercent[] = michael@0: { michael@0: 0x25, 0x25, 0 michael@0: }; /* "%%" */ michael@0: michael@0: static const UChar gNoparse[] = michael@0: { michael@0: 0x40, 0x6E, 0x6F, 0x70, 0x61, 0x72, 0x73, 0x65, 0 michael@0: }; /* "@noparse" */ michael@0: michael@0: NFRuleSet::NFRuleSet(UnicodeString* descriptions, int32_t index, UErrorCode& status) michael@0: : name() michael@0: , rules(0) michael@0: , negativeNumberRule(NULL) michael@0: , fIsFractionRuleSet(FALSE) michael@0: , fIsPublic(FALSE) michael@0: , fIsParseable(TRUE) michael@0: , fRecursionCount(0) michael@0: { michael@0: for (int i = 0; i < 3; ++i) { michael@0: fractionRules[i] = NULL; michael@0: } michael@0: michael@0: if (U_FAILURE(status)) { michael@0: return; michael@0: } michael@0: michael@0: UnicodeString& description = descriptions[index]; // !!! make sure index is valid michael@0: michael@0: if (description.length() == 0) { michael@0: // throw new IllegalArgumentException("Empty rule set description"); michael@0: status = U_PARSE_ERROR; michael@0: return; michael@0: } michael@0: michael@0: // if the description begins with a rule set name (the rule set michael@0: // name can be omitted in formatter descriptions that consist michael@0: // of only one rule set), copy it out into our "name" member michael@0: // and delete it from the description michael@0: if (description.charAt(0) == gPercent) { michael@0: int32_t pos = description.indexOf(gColon); michael@0: if (pos == -1) { michael@0: // throw new IllegalArgumentException("Rule set name doesn't end in colon"); michael@0: status = U_PARSE_ERROR; michael@0: } else { michael@0: name.setTo(description, 0, pos); michael@0: while (pos < description.length() && PatternProps::isWhiteSpace(description.charAt(++pos))) { michael@0: } michael@0: description.remove(0, pos); michael@0: } michael@0: } else { michael@0: name.setTo(UNICODE_STRING_SIMPLE("%default")); michael@0: } michael@0: michael@0: if (description.length() == 0) { michael@0: // throw new IllegalArgumentException("Empty rule set description"); michael@0: status = U_PARSE_ERROR; michael@0: } michael@0: michael@0: fIsPublic = name.indexOf(gPercentPercent, 2, 0) != 0; michael@0: michael@0: if ( name.endsWith(gNoparse,8) ) { michael@0: fIsParseable = FALSE; michael@0: name.truncate(name.length()-8); // remove the @noparse from the name michael@0: } michael@0: michael@0: // all of the other members of NFRuleSet are initialized michael@0: // by parseRules() michael@0: } michael@0: michael@0: void michael@0: NFRuleSet::parseRules(UnicodeString& description, const RuleBasedNumberFormat* owner, UErrorCode& status) michael@0: { michael@0: // start by creating a Vector whose elements are Strings containing michael@0: // the descriptions of the rules (one rule per element). The rules michael@0: // are separated by semicolons (there's no escape facility: ALL michael@0: // semicolons are rule delimiters) michael@0: michael@0: if (U_FAILURE(status)) { michael@0: return; michael@0: } michael@0: michael@0: // ensure we are starting with an empty rule list michael@0: rules.deleteAll(); michael@0: michael@0: // dlf - the original code kept a separate description array for no reason, michael@0: // so I got rid of it. The loop was too complex so I simplified it. michael@0: michael@0: UnicodeString currentDescription; michael@0: int32_t oldP = 0; michael@0: while (oldP < description.length()) { michael@0: int32_t p = description.indexOf(gSemicolon, oldP); michael@0: if (p == -1) { michael@0: p = description.length(); michael@0: } michael@0: currentDescription.setTo(description, oldP, p - oldP); michael@0: NFRule::makeRules(currentDescription, this, rules.last(), owner, rules, status); michael@0: oldP = p + 1; michael@0: } michael@0: michael@0: // for rules that didn't specify a base value, their base values michael@0: // were initialized to 0. Make another pass through the list and michael@0: // set all those rules' base values. We also remove any special michael@0: // rules from the list and put them into their own member variables michael@0: int64_t defaultBaseValue = 0; michael@0: michael@0: // (this isn't a for loop because we might be deleting items from michael@0: // the vector-- we want to make sure we only increment i when michael@0: // we _didn't_ delete aything from the vector) michael@0: uint32_t i = 0; michael@0: while (i < rules.size()) { michael@0: NFRule* rule = rules[i]; michael@0: michael@0: switch (rule->getType()) { michael@0: // if the rule's base value is 0, fill in a default michael@0: // base value (this will be 1 plus the preceding michael@0: // rule's base value for regular rule sets, and the michael@0: // same as the preceding rule's base value in fraction michael@0: // rule sets) michael@0: case NFRule::kNoBase: michael@0: rule->setBaseValue(defaultBaseValue, status); michael@0: if (!isFractionRuleSet()) { michael@0: ++defaultBaseValue; michael@0: } michael@0: ++i; michael@0: break; michael@0: michael@0: // if it's the negative-number rule, copy it into its own michael@0: // data member and delete it from the list michael@0: case NFRule::kNegativeNumberRule: michael@0: if (negativeNumberRule) { michael@0: delete negativeNumberRule; michael@0: } michael@0: negativeNumberRule = rules.remove(i); michael@0: break; michael@0: michael@0: // if it's the improper fraction rule, copy it into the michael@0: // correct element of fractionRules michael@0: case NFRule::kImproperFractionRule: michael@0: if (fractionRules[0]) { michael@0: delete fractionRules[0]; michael@0: } michael@0: fractionRules[0] = rules.remove(i); michael@0: break; michael@0: michael@0: // if it's the proper fraction rule, copy it into the michael@0: // correct element of fractionRules michael@0: case NFRule::kProperFractionRule: michael@0: if (fractionRules[1]) { michael@0: delete fractionRules[1]; michael@0: } michael@0: fractionRules[1] = rules.remove(i); michael@0: break; michael@0: michael@0: // if it's the master rule, copy it into the michael@0: // correct element of fractionRules michael@0: case NFRule::kMasterRule: michael@0: if (fractionRules[2]) { michael@0: delete fractionRules[2]; michael@0: } michael@0: fractionRules[2] = rules.remove(i); michael@0: break; michael@0: michael@0: // if it's a regular rule that already knows its base value, michael@0: // check to make sure the rules are in order, and update michael@0: // the default base value for the next rule michael@0: default: michael@0: if (rule->getBaseValue() < defaultBaseValue) { michael@0: // throw new IllegalArgumentException("Rules are not in order"); michael@0: status = U_PARSE_ERROR; michael@0: return; michael@0: } michael@0: defaultBaseValue = rule->getBaseValue(); michael@0: if (!isFractionRuleSet()) { michael@0: ++defaultBaseValue; michael@0: } michael@0: ++i; michael@0: break; michael@0: } michael@0: } michael@0: } michael@0: michael@0: NFRuleSet::~NFRuleSet() michael@0: { michael@0: delete negativeNumberRule; michael@0: delete fractionRules[0]; michael@0: delete fractionRules[1]; michael@0: delete fractionRules[2]; michael@0: } michael@0: michael@0: static UBool michael@0: util_equalRules(const NFRule* rule1, const NFRule* rule2) michael@0: { michael@0: if (rule1) { michael@0: if (rule2) { michael@0: return *rule1 == *rule2; michael@0: } michael@0: } else if (!rule2) { michael@0: return TRUE; michael@0: } michael@0: return FALSE; michael@0: } michael@0: michael@0: UBool michael@0: NFRuleSet::operator==(const NFRuleSet& rhs) const michael@0: { michael@0: if (rules.size() == rhs.rules.size() && michael@0: fIsFractionRuleSet == rhs.fIsFractionRuleSet && michael@0: name == rhs.name && michael@0: util_equalRules(negativeNumberRule, rhs.negativeNumberRule) && michael@0: util_equalRules(fractionRules[0], rhs.fractionRules[0]) && michael@0: util_equalRules(fractionRules[1], rhs.fractionRules[1]) && michael@0: util_equalRules(fractionRules[2], rhs.fractionRules[2])) { michael@0: michael@0: for (uint32_t i = 0; i < rules.size(); ++i) { michael@0: if (*rules[i] != *rhs.rules[i]) { michael@0: return FALSE; michael@0: } michael@0: } michael@0: return TRUE; michael@0: } michael@0: return FALSE; michael@0: } michael@0: michael@0: #define RECURSION_LIMIT 50 michael@0: michael@0: void michael@0: NFRuleSet::format(int64_t number, UnicodeString& toAppendTo, int32_t pos) const michael@0: { michael@0: NFRule *rule = findNormalRule(number); michael@0: if (rule) { // else error, but can't report it michael@0: NFRuleSet* ncThis = (NFRuleSet*)this; michael@0: if (ncThis->fRecursionCount++ >= RECURSION_LIMIT) { michael@0: // stop recursion michael@0: ncThis->fRecursionCount = 0; michael@0: } else { michael@0: rule->doFormat(number, toAppendTo, pos); michael@0: ncThis->fRecursionCount--; michael@0: } michael@0: } michael@0: } michael@0: michael@0: void michael@0: NFRuleSet::format(double number, UnicodeString& toAppendTo, int32_t pos) const michael@0: { michael@0: NFRule *rule = findDoubleRule(number); michael@0: if (rule) { // else error, but can't report it michael@0: NFRuleSet* ncThis = (NFRuleSet*)this; michael@0: if (ncThis->fRecursionCount++ >= RECURSION_LIMIT) { michael@0: // stop recursion michael@0: ncThis->fRecursionCount = 0; michael@0: } else { michael@0: rule->doFormat(number, toAppendTo, pos); michael@0: ncThis->fRecursionCount--; michael@0: } michael@0: } michael@0: } michael@0: michael@0: NFRule* michael@0: NFRuleSet::findDoubleRule(double number) const michael@0: { michael@0: // if this is a fraction rule set, use findFractionRuleSetRule() michael@0: if (isFractionRuleSet()) { michael@0: return findFractionRuleSetRule(number); michael@0: } michael@0: michael@0: // if the number is negative, return the negative number rule michael@0: // (if there isn't a negative-number rule, we pretend it's a michael@0: // positive number) michael@0: if (number < 0) { michael@0: if (negativeNumberRule) { michael@0: return negativeNumberRule; michael@0: } else { michael@0: number = -number; michael@0: } michael@0: } michael@0: michael@0: // if the number isn't an integer, we use one of the fraction rules... michael@0: if (number != uprv_floor(number)) { michael@0: // if the number is between 0 and 1, return the proper michael@0: // fraction rule michael@0: if (number < 1 && fractionRules[1]) { michael@0: return fractionRules[1]; michael@0: } michael@0: // otherwise, return the improper fraction rule michael@0: else if (fractionRules[0]) { michael@0: return fractionRules[0]; michael@0: } michael@0: } michael@0: michael@0: // if there's a master rule, use it to format the number michael@0: if (fractionRules[2]) { michael@0: return fractionRules[2]; michael@0: } michael@0: michael@0: // and if we haven't yet returned a rule, use findNormalRule() michael@0: // to find the applicable rule michael@0: int64_t r = util64_fromDouble(number + 0.5); michael@0: return findNormalRule(r); michael@0: } michael@0: michael@0: NFRule * michael@0: NFRuleSet::findNormalRule(int64_t number) const michael@0: { michael@0: // if this is a fraction rule set, use findFractionRuleSetRule() michael@0: // to find the rule (we should only go into this clause if the michael@0: // value is 0) michael@0: if (fIsFractionRuleSet) { michael@0: return findFractionRuleSetRule((double)number); michael@0: } michael@0: michael@0: // if the number is negative, return the negative-number rule michael@0: // (if there isn't one, pretend the number is positive) michael@0: if (number < 0) { michael@0: if (negativeNumberRule) { michael@0: return negativeNumberRule; michael@0: } else { michael@0: number = -number; michael@0: } michael@0: } michael@0: michael@0: // we have to repeat the preceding two checks, even though we michael@0: // do them in findRule(), because the version of format() that michael@0: // takes a long bypasses findRule() and goes straight to this michael@0: // function. This function does skip the fraction rules since michael@0: // we know the value is an integer (it also skips the master michael@0: // rule, since it's considered a fraction rule. Skipping the michael@0: // master rule in this function is also how we avoid infinite michael@0: // recursion) michael@0: michael@0: // {dlf} unfortunately this fails if there are no rules except michael@0: // special rules. If there are no rules, use the master rule. michael@0: michael@0: // binary-search the rule list for the applicable rule michael@0: // (a rule is used for all values from its base value to michael@0: // the next rule's base value) michael@0: int32_t hi = rules.size(); michael@0: if (hi > 0) { michael@0: int32_t lo = 0; michael@0: michael@0: while (lo < hi) { michael@0: int32_t mid = (lo + hi) / 2; michael@0: if (rules[mid]->getBaseValue() == number) { michael@0: return rules[mid]; michael@0: } michael@0: else if (rules[mid]->getBaseValue() > number) { michael@0: hi = mid; michael@0: } michael@0: else { michael@0: lo = mid + 1; michael@0: } michael@0: } michael@0: if (hi == 0) { // bad rule set, minimum base > 0 michael@0: return NULL; // want to throw exception here michael@0: } michael@0: michael@0: NFRule *result = rules[hi - 1]; michael@0: michael@0: // use shouldRollBack() to see whether we need to invoke the michael@0: // rollback rule (see shouldRollBack()'s documentation for michael@0: // an explanation of the rollback rule). If we do, roll back michael@0: // one rule and return that one instead of the one we'd normally michael@0: // return michael@0: if (result->shouldRollBack((double)number)) { michael@0: if (hi == 1) { // bad rule set, no prior rule to rollback to from this base michael@0: return NULL; michael@0: } michael@0: result = rules[hi - 2]; michael@0: } michael@0: return result; michael@0: } michael@0: // else use the master rule michael@0: return fractionRules[2]; michael@0: } michael@0: michael@0: /** michael@0: * If this rule is a fraction rule set, this function is used by michael@0: * findRule() to select the most appropriate rule for formatting michael@0: * the number. Basically, the base value of each rule in the rule michael@0: * set is treated as the denominator of a fraction. Whichever michael@0: * denominator can produce the fraction closest in value to the michael@0: * number passed in is the result. If there's a tie, the earlier michael@0: * one in the list wins. (If there are two rules in a row with the michael@0: * same base value, the first one is used when the numerator of the michael@0: * fraction would be 1, and the second rule is used the rest of the michael@0: * time. michael@0: * @param number The number being formatted (which will always be michael@0: * a number between 0 and 1) michael@0: * @return The rule to use to format this number michael@0: */ michael@0: NFRule* michael@0: NFRuleSet::findFractionRuleSetRule(double number) const michael@0: { michael@0: // the obvious way to do this (multiply the value being formatted michael@0: // by each rule's base value until you get an integral result) michael@0: // doesn't work because of rounding error. This method is more michael@0: // accurate michael@0: michael@0: // find the least common multiple of the rules' base values michael@0: // and multiply this by the number being formatted. This is michael@0: // all the precision we need, and we can do all of the rest michael@0: // of the math using integer arithmetic michael@0: int64_t leastCommonMultiple = rules[0]->getBaseValue(); michael@0: int64_t numerator; michael@0: { michael@0: for (uint32_t i = 1; i < rules.size(); ++i) { michael@0: leastCommonMultiple = util_lcm(leastCommonMultiple, rules[i]->getBaseValue()); michael@0: } michael@0: numerator = util64_fromDouble(number * (double)leastCommonMultiple + 0.5); michael@0: } michael@0: // for each rule, do the following... michael@0: int64_t tempDifference; michael@0: int64_t difference = util64_fromDouble(uprv_maxMantissa()); michael@0: int32_t winner = 0; michael@0: for (uint32_t i = 0; i < rules.size(); ++i) { michael@0: // "numerator" is the numerator of the fraction if the michael@0: // denominator is the LCD. The numerator if the rule's michael@0: // base value is the denominator is "numerator" times the michael@0: // base value divided bythe LCD. Here we check to see if michael@0: // that's an integer, and if not, how close it is to being michael@0: // an integer. michael@0: tempDifference = numerator * rules[i]->getBaseValue() % leastCommonMultiple; michael@0: michael@0: michael@0: // normalize the result of the above calculation: we want michael@0: // the numerator's distance from the CLOSEST multiple michael@0: // of the LCD michael@0: if (leastCommonMultiple - tempDifference < tempDifference) { michael@0: tempDifference = leastCommonMultiple - tempDifference; michael@0: } michael@0: michael@0: // if this is as close as we've come, keep track of how close michael@0: // that is, and the line number of the rule that did it. If michael@0: // we've scored a direct hit, we don't have to look at any more michael@0: // rules michael@0: if (tempDifference < difference) { michael@0: difference = tempDifference; michael@0: winner = i; michael@0: if (difference == 0) { michael@0: break; michael@0: } michael@0: } michael@0: } michael@0: michael@0: // if we have two successive rules that both have the winning base michael@0: // value, then the first one (the one we found above) is used if michael@0: // the numerator of the fraction is 1 and the second one is used if michael@0: // the numerator of the fraction is anything else (this lets us michael@0: // do things like "one third"/"two thirds" without haveing to define michael@0: // a whole bunch of extra rule sets) michael@0: if ((unsigned)(winner + 1) < rules.size() && michael@0: rules[winner + 1]->getBaseValue() == rules[winner]->getBaseValue()) { michael@0: double n = ((double)rules[winner]->getBaseValue()) * number; michael@0: if (n < 0.5 || n >= 2) { michael@0: ++winner; michael@0: } michael@0: } michael@0: michael@0: // finally, return the winning rule michael@0: return rules[winner]; michael@0: } michael@0: michael@0: /** michael@0: * Parses a string. Matches the string to be parsed against each michael@0: * of its rules (with a base value less than upperBound) and returns michael@0: * the value produced by the rule that matched the most charcters michael@0: * in the source string. michael@0: * @param text The string to parse michael@0: * @param parsePosition The initial position is ignored and assumed michael@0: * to be 0. On exit, this object has been updated to point to the michael@0: * first character position this rule set didn't consume. michael@0: * @param upperBound Limits the rules that can be allowed to match. michael@0: * Only rules whose base values are strictly less than upperBound michael@0: * are considered. michael@0: * @return The numerical result of parsing this string. This will michael@0: * be the matching rule's base value, composed appropriately with michael@0: * the results of matching any of its substitutions. The object michael@0: * will be an instance of Long if it's an integral value; otherwise, michael@0: * it will be an instance of Double. This function always returns michael@0: * a valid object: If nothing matched the input string at all, michael@0: * this function returns new Long(0), and the parse position is michael@0: * left unchanged. michael@0: */ michael@0: #ifdef RBNF_DEBUG michael@0: #include michael@0: michael@0: static void dumpUS(FILE* f, const UnicodeString& us) { michael@0: int len = us.length(); michael@0: char* buf = (char *)uprv_malloc((len+1)*sizeof(char)); //new char[len+1]; michael@0: if (buf != NULL) { michael@0: us.extract(0, len, buf); michael@0: buf[len] = 0; michael@0: fprintf(f, "%s", buf); michael@0: uprv_free(buf); //delete[] buf; michael@0: } michael@0: } michael@0: #endif michael@0: michael@0: UBool michael@0: NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result) const michael@0: { michael@0: // try matching each rule in the rule set against the text being michael@0: // parsed. Whichever one matches the most characters is the one michael@0: // that determines the value we return. michael@0: michael@0: result.setLong(0); michael@0: michael@0: // dump out if there's no text to parse michael@0: if (text.length() == 0) { michael@0: return 0; michael@0: } michael@0: michael@0: ParsePosition highWaterMark; michael@0: ParsePosition workingPos = pos; michael@0: michael@0: #ifdef RBNF_DEBUG michael@0: fprintf(stderr, " %x '", this); michael@0: dumpUS(stderr, name); michael@0: fprintf(stderr, "' text '"); michael@0: dumpUS(stderr, text); michael@0: fprintf(stderr, "'\n"); michael@0: fprintf(stderr, " parse negative: %d\n", this, negativeNumberRule != 0); michael@0: #endif michael@0: michael@0: // start by trying the negative number rule (if there is one) michael@0: if (negativeNumberRule) { michael@0: Formattable tempResult; michael@0: #ifdef RBNF_DEBUG michael@0: fprintf(stderr, " %x ub: %g\n", negativeNumberRule, upperBound); michael@0: #endif michael@0: UBool success = negativeNumberRule->doParse(text, workingPos, 0, upperBound, tempResult); michael@0: #ifdef RBNF_DEBUG michael@0: fprintf(stderr, " success: %d wpi: %d\n", success, workingPos.getIndex()); michael@0: #endif michael@0: if (success && workingPos.getIndex() > highWaterMark.getIndex()) { michael@0: result = tempResult; michael@0: highWaterMark = workingPos; michael@0: } michael@0: workingPos = pos; michael@0: } michael@0: #ifdef RBNF_DEBUG michael@0: fprintf(stderr, " continue fractional with text '"); michael@0: dumpUS(stderr, text); michael@0: fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex()); michael@0: #endif michael@0: // then try each of the fraction rules michael@0: { michael@0: for (int i = 0; i < 3; i++) { michael@0: if (fractionRules[i]) { michael@0: Formattable tempResult; michael@0: UBool success = fractionRules[i]->doParse(text, workingPos, 0, upperBound, tempResult); michael@0: if (success && (workingPos.getIndex() > highWaterMark.getIndex())) { michael@0: result = tempResult; michael@0: highWaterMark = workingPos; michael@0: } michael@0: workingPos = pos; michael@0: } michael@0: } michael@0: } michael@0: #ifdef RBNF_DEBUG michael@0: fprintf(stderr, " continue other with text '"); michael@0: dumpUS(stderr, text); michael@0: fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex()); michael@0: #endif michael@0: michael@0: // finally, go through the regular rules one at a time. We start michael@0: // at the end of the list because we want to try matching the most michael@0: // sigificant rule first (this helps ensure that we parse michael@0: // "five thousand three hundred six" as michael@0: // "(five thousand) (three hundred) (six)" rather than michael@0: // "((five thousand three) hundred) (six)"). Skip rules whose michael@0: // base values are higher than the upper bound (again, this helps michael@0: // limit ambiguity by making sure the rules that match a rule's michael@0: // are less significant than the rule containing the substitutions)/ michael@0: { michael@0: int64_t ub = util64_fromDouble(upperBound); michael@0: #ifdef RBNF_DEBUG michael@0: { michael@0: char ubstr[64]; michael@0: util64_toa(ub, ubstr, 64); michael@0: char ubstrhex[64]; michael@0: util64_toa(ub, ubstrhex, 64, 16); michael@0: fprintf(stderr, "ub: %g, i64: %s (%s)\n", upperBound, ubstr, ubstrhex); michael@0: } michael@0: #endif michael@0: for (int32_t i = rules.size(); --i >= 0 && highWaterMark.getIndex() < text.length();) { michael@0: if ((!fIsFractionRuleSet) && (rules[i]->getBaseValue() >= ub)) { michael@0: continue; michael@0: } michael@0: Formattable tempResult; michael@0: UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, tempResult); michael@0: if (success && workingPos.getIndex() > highWaterMark.getIndex()) { michael@0: result = tempResult; michael@0: highWaterMark = workingPos; michael@0: } michael@0: workingPos = pos; michael@0: } michael@0: } michael@0: #ifdef RBNF_DEBUG michael@0: fprintf(stderr, " exit\n"); michael@0: #endif michael@0: // finally, update the parse postion we were passed to point to the michael@0: // first character we didn't use, and return the result that michael@0: // corresponds to that string of characters michael@0: pos = highWaterMark; michael@0: michael@0: return 1; michael@0: } michael@0: michael@0: void michael@0: NFRuleSet::appendRules(UnicodeString& result) const michael@0: { michael@0: // the rule set name goes first... michael@0: result.append(name); michael@0: result.append(gColon); michael@0: result.append(gLineFeed); michael@0: michael@0: // followed by the regular rules... michael@0: for (uint32_t i = 0; i < rules.size(); i++) { michael@0: result.append(gFourSpaces, 4); michael@0: rules[i]->_appendRuleText(result); michael@0: result.append(gLineFeed); michael@0: } michael@0: michael@0: // followed by the special rules (if they exist) michael@0: if (negativeNumberRule) { michael@0: result.append(gFourSpaces, 4); michael@0: negativeNumberRule->_appendRuleText(result); michael@0: result.append(gLineFeed); michael@0: } michael@0: michael@0: { michael@0: for (uint32_t i = 0; i < 3; ++i) { michael@0: if (fractionRules[i]) { michael@0: result.append(gFourSpaces, 4); michael@0: fractionRules[i]->_appendRuleText(result); michael@0: result.append(gLineFeed); michael@0: } michael@0: } michael@0: } michael@0: } michael@0: michael@0: // utility functions michael@0: michael@0: int64_t util64_fromDouble(double d) { michael@0: int64_t result = 0; michael@0: if (!uprv_isNaN(d)) { michael@0: double mant = uprv_maxMantissa(); michael@0: if (d < -mant) { michael@0: d = -mant; michael@0: } else if (d > mant) { michael@0: d = mant; michael@0: } michael@0: UBool neg = d < 0; michael@0: if (neg) { michael@0: d = -d; michael@0: } michael@0: result = (int64_t)uprv_floor(d); michael@0: if (neg) { michael@0: result = -result; michael@0: } michael@0: } michael@0: return result; michael@0: } michael@0: michael@0: int64_t util64_pow(int32_t r, uint32_t e) { michael@0: if (r == 0) { michael@0: return 0; michael@0: } else if (e == 0) { michael@0: return 1; michael@0: } else { michael@0: int64_t n = r; michael@0: while (--e > 0) { michael@0: n *= r; michael@0: } michael@0: return n; michael@0: } michael@0: } michael@0: michael@0: static const uint8_t asciiDigits[] = { michael@0: 0x30u, 0x31u, 0x32u, 0x33u, 0x34u, 0x35u, 0x36u, 0x37u, michael@0: 0x38u, 0x39u, 0x61u, 0x62u, 0x63u, 0x64u, 0x65u, 0x66u, michael@0: 0x67u, 0x68u, 0x69u, 0x6au, 0x6bu, 0x6cu, 0x6du, 0x6eu, michael@0: 0x6fu, 0x70u, 0x71u, 0x72u, 0x73u, 0x74u, 0x75u, 0x76u, michael@0: 0x77u, 0x78u, 0x79u, 0x7au, michael@0: }; michael@0: michael@0: static const UChar kUMinus = (UChar)0x002d; michael@0: michael@0: #ifdef RBNF_DEBUG michael@0: static const char kMinus = '-'; michael@0: michael@0: static const uint8_t digitInfo[] = { michael@0: 0, 0, 0, 0, 0, 0, 0, 0, michael@0: 0, 0, 0, 0, 0, 0, 0, 0, michael@0: 0, 0, 0, 0, 0, 0, 0, 0, michael@0: 0, 0, 0, 0, 0, 0, 0, 0, michael@0: 0, 0, 0, 0, 0, 0, 0, 0, michael@0: 0, 0, 0, 0, 0, 0, 0, 0, michael@0: 0x80u, 0x81u, 0x82u, 0x83u, 0x84u, 0x85u, 0x86u, 0x87u, michael@0: 0x88u, 0x89u, 0, 0, 0, 0, 0, 0, michael@0: 0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u, michael@0: 0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u, michael@0: 0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u, michael@0: 0xa1u, 0xa2u, 0xa3u, 0, 0, 0, 0, 0, michael@0: 0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u, michael@0: 0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u, michael@0: 0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u, michael@0: 0xa1u, 0xa2u, 0xa3u, 0, 0, 0, 0, 0, michael@0: }; michael@0: michael@0: int64_t util64_atoi(const char* str, uint32_t radix) michael@0: { michael@0: if (radix > 36) { michael@0: radix = 36; michael@0: } else if (radix < 2) { michael@0: radix = 2; michael@0: } michael@0: int64_t lradix = radix; michael@0: michael@0: int neg = 0; michael@0: if (*str == kMinus) { michael@0: ++str; michael@0: neg = 1; michael@0: } michael@0: int64_t result = 0; michael@0: uint8_t b; michael@0: while ((b = digitInfo[*str++]) && ((b &= 0x7f) < radix)) { michael@0: result *= lradix; michael@0: result += (int32_t)b; michael@0: } michael@0: if (neg) { michael@0: result = -result; michael@0: } michael@0: return result; michael@0: } michael@0: michael@0: int64_t util64_utoi(const UChar* str, uint32_t radix) michael@0: { michael@0: if (radix > 36) { michael@0: radix = 36; michael@0: } else if (radix < 2) { michael@0: radix = 2; michael@0: } michael@0: int64_t lradix = radix; michael@0: michael@0: int neg = 0; michael@0: if (*str == kUMinus) { michael@0: ++str; michael@0: neg = 1; michael@0: } michael@0: int64_t result = 0; michael@0: UChar c; michael@0: uint8_t b; michael@0: while (((c = *str++) < 0x0080) && (b = digitInfo[c]) && ((b &= 0x7f) < radix)) { michael@0: result *= lradix; michael@0: result += (int32_t)b; michael@0: } michael@0: if (neg) { michael@0: result = -result; michael@0: } michael@0: return result; michael@0: } michael@0: michael@0: uint32_t util64_toa(int64_t w, char* buf, uint32_t len, uint32_t radix, UBool raw) michael@0: { michael@0: if (radix > 36) { michael@0: radix = 36; michael@0: } else if (radix < 2) { michael@0: radix = 2; michael@0: } michael@0: int64_t base = radix; michael@0: michael@0: char* p = buf; michael@0: if (len && (w < 0) && (radix == 10) && !raw) { michael@0: w = -w; michael@0: *p++ = kMinus; michael@0: --len; michael@0: } else if (len && (w == 0)) { michael@0: *p++ = (char)raw ? 0 : asciiDigits[0]; michael@0: --len; michael@0: } michael@0: michael@0: while (len && w != 0) { michael@0: int64_t n = w / base; michael@0: int64_t m = n * base; michael@0: int32_t d = (int32_t)(w-m); michael@0: *p++ = raw ? (char)d : asciiDigits[d]; michael@0: w = n; michael@0: --len; michael@0: } michael@0: if (len) { michael@0: *p = 0; // null terminate if room for caller convenience michael@0: } michael@0: michael@0: len = p - buf; michael@0: if (*buf == kMinus) { michael@0: ++buf; michael@0: } michael@0: while (--p > buf) { michael@0: char c = *p; michael@0: *p = *buf; michael@0: *buf = c; michael@0: ++buf; michael@0: } michael@0: michael@0: return len; michael@0: } michael@0: #endif michael@0: michael@0: uint32_t util64_tou(int64_t w, UChar* buf, uint32_t len, uint32_t radix, UBool raw) michael@0: { michael@0: if (radix > 36) { michael@0: radix = 36; michael@0: } else if (radix < 2) { michael@0: radix = 2; michael@0: } michael@0: int64_t base = radix; michael@0: michael@0: UChar* p = buf; michael@0: if (len && (w < 0) && (radix == 10) && !raw) { michael@0: w = -w; michael@0: *p++ = kUMinus; michael@0: --len; michael@0: } else if (len && (w == 0)) { michael@0: *p++ = (UChar)raw ? 0 : asciiDigits[0]; michael@0: --len; michael@0: } michael@0: michael@0: while (len && (w != 0)) { michael@0: int64_t n = w / base; michael@0: int64_t m = n * base; michael@0: int32_t d = (int32_t)(w-m); michael@0: *p++ = (UChar)(raw ? d : asciiDigits[d]); michael@0: w = n; michael@0: --len; michael@0: } michael@0: if (len) { michael@0: *p = 0; // null terminate if room for caller convenience michael@0: } michael@0: michael@0: len = (uint32_t)(p - buf); michael@0: if (*buf == kUMinus) { michael@0: ++buf; michael@0: } michael@0: while (--p > buf) { michael@0: UChar c = *p; michael@0: *p = *buf; michael@0: *buf = c; michael@0: ++buf; michael@0: } michael@0: michael@0: return len; michael@0: } michael@0: michael@0: michael@0: U_NAMESPACE_END michael@0: michael@0: /* U_HAVE_RBNF */ michael@0: #endif michael@0: