michael@0: /*
michael@0: ******************************************************************************
michael@0: *   Copyright (C) 1997-2012, International Business Machines
michael@0: *   Corporation and others.  All Rights Reserved.
michael@0: ******************************************************************************
michael@0: *   file name:  nfrs.cpp
michael@0: *   encoding:   US-ASCII
michael@0: *   tab size:   8 (not used)
michael@0: *   indentation:4
michael@0: *
michael@0: * Modification history
michael@0: * Date        Name      Comments
michael@0: * 10/11/2001  Doug      Ported from ICU4J
michael@0: */
michael@0: 
michael@0: #include "nfrs.h"
michael@0: 
michael@0: #if U_HAVE_RBNF
michael@0: 
michael@0: #include "unicode/uchar.h"
michael@0: #include "nfrule.h"
michael@0: #include "nfrlist.h"
michael@0: #include "patternprops.h"
michael@0: 
michael@0: #ifdef RBNF_DEBUG
michael@0: #include "cmemory.h"
michael@0: #endif
michael@0: 
michael@0: U_NAMESPACE_BEGIN
michael@0: 
michael@0: #if 0
michael@0: // euclid's algorithm works with doubles
michael@0: // note, doubles only get us up to one quadrillion or so, which
michael@0: // isn't as much range as we get with longs.  We probably still
michael@0: // want either 64-bit math, or BigInteger.
michael@0: 
michael@0: static int64_t
michael@0: util_lcm(int64_t x, int64_t y)
michael@0: {
michael@0:     x.abs();
michael@0:     y.abs();
michael@0: 
michael@0:     if (x == 0 || y == 0) {
michael@0:         return 0;
michael@0:     } else {
michael@0:         do {
michael@0:             if (x < y) {
michael@0:                 int64_t t = x; x = y; y = t;
michael@0:             }
michael@0:             x -= y * (x/y);
michael@0:         } while (x != 0);
michael@0: 
michael@0:         return y;
michael@0:     }
michael@0: }
michael@0: 
michael@0: #else
michael@0: /**
michael@0:  * Calculates the least common multiple of x and y.
michael@0:  */
michael@0: static int64_t
michael@0: util_lcm(int64_t x, int64_t y)
michael@0: {
michael@0:     // binary gcd algorithm from Knuth, "The Art of Computer Programming,"
michael@0:     // vol. 2, 1st ed., pp. 298-299
michael@0:     int64_t x1 = x;
michael@0:     int64_t y1 = y;
michael@0: 
michael@0:     int p2 = 0;
michael@0:     while ((x1 & 1) == 0 && (y1 & 1) == 0) {
michael@0:         ++p2;
michael@0:         x1 >>= 1;
michael@0:         y1 >>= 1;
michael@0:     }
michael@0: 
michael@0:     int64_t t;
michael@0:     if ((x1 & 1) == 1) {
michael@0:         t = -y1;
michael@0:     } else {
michael@0:         t = x1;
michael@0:     }
michael@0: 
michael@0:     while (t != 0) {
michael@0:         while ((t & 1) == 0) {
michael@0:             t = t >> 1;
michael@0:         }
michael@0:         if (t > 0) {
michael@0:             x1 = t;
michael@0:         } else {
michael@0:             y1 = -t;
michael@0:         }
michael@0:         t = x1 - y1;
michael@0:     }
michael@0: 
michael@0:     int64_t gcd = x1 << p2;
michael@0: 
michael@0:     // x * y == gcd(x, y) * lcm(x, y)
michael@0:     return x / gcd * y;
michael@0: }
michael@0: #endif
michael@0: 
michael@0: static const UChar gPercent = 0x0025;
michael@0: static const UChar gColon = 0x003a;
michael@0: static const UChar gSemicolon = 0x003b;
michael@0: static const UChar gLineFeed = 0x000a;
michael@0: 
michael@0: static const UChar gFourSpaces[] =
michael@0: {
michael@0:     0x20, 0x20, 0x20, 0x20, 0
michael@0: }; /* "    " */
michael@0: static const UChar gPercentPercent[] =
michael@0: {
michael@0:     0x25, 0x25, 0
michael@0: }; /* "%%" */
michael@0: 
michael@0: static const UChar gNoparse[] =
michael@0: {
michael@0:     0x40, 0x6E, 0x6F, 0x70, 0x61, 0x72, 0x73, 0x65, 0
michael@0: }; /* "@noparse" */
michael@0: 
michael@0: NFRuleSet::NFRuleSet(UnicodeString* descriptions, int32_t index, UErrorCode& status)
michael@0:   : name()
michael@0:   , rules(0)
michael@0:   , negativeNumberRule(NULL)
michael@0:   , fIsFractionRuleSet(FALSE)
michael@0:   , fIsPublic(FALSE)
michael@0:   , fIsParseable(TRUE)
michael@0:   , fRecursionCount(0)
michael@0: {
michael@0:     for (int i = 0; i < 3; ++i) {
michael@0:         fractionRules[i] = NULL;
michael@0:     }
michael@0: 
michael@0:     if (U_FAILURE(status)) {
michael@0:         return;
michael@0:     }
michael@0: 
michael@0:     UnicodeString& description = descriptions[index]; // !!! make sure index is valid
michael@0: 
michael@0:     if (description.length() == 0) {
michael@0:         // throw new IllegalArgumentException("Empty rule set description");
michael@0:         status = U_PARSE_ERROR;
michael@0:         return;
michael@0:     }
michael@0: 
michael@0:     // if the description begins with a rule set name (the rule set
michael@0:     // name can be omitted in formatter descriptions that consist
michael@0:     // of only one rule set), copy it out into our "name" member
michael@0:     // and delete it from the description
michael@0:     if (description.charAt(0) == gPercent) {
michael@0:         int32_t pos = description.indexOf(gColon);
michael@0:         if (pos == -1) {
michael@0:             // throw new IllegalArgumentException("Rule set name doesn't end in colon");
michael@0:             status = U_PARSE_ERROR;
michael@0:         } else {
michael@0:             name.setTo(description, 0, pos);
michael@0:             while (pos < description.length() && PatternProps::isWhiteSpace(description.charAt(++pos))) {
michael@0:             }
michael@0:             description.remove(0, pos);
michael@0:         }
michael@0:     } else {
michael@0:         name.setTo(UNICODE_STRING_SIMPLE("%default"));
michael@0:     }
michael@0: 
michael@0:     if (description.length() == 0) {
michael@0:         // throw new IllegalArgumentException("Empty rule set description");
michael@0:         status = U_PARSE_ERROR;
michael@0:     }
michael@0: 
michael@0:     fIsPublic = name.indexOf(gPercentPercent, 2, 0) != 0;
michael@0: 
michael@0:     if ( name.endsWith(gNoparse,8) ) {
michael@0:         fIsParseable = FALSE;
michael@0:         name.truncate(name.length()-8); // remove the @noparse from the name
michael@0:     }
michael@0: 
michael@0:     // all of the other members of NFRuleSet are initialized
michael@0:     // by parseRules()
michael@0: }
michael@0: 
michael@0: void
michael@0: NFRuleSet::parseRules(UnicodeString& description, const RuleBasedNumberFormat* owner, UErrorCode& status)
michael@0: {
michael@0:     // start by creating a Vector whose elements are Strings containing
michael@0:     // the descriptions of the rules (one rule per element).  The rules
michael@0:     // are separated by semicolons (there's no escape facility: ALL
michael@0:     // semicolons are rule delimiters)
michael@0: 
michael@0:     if (U_FAILURE(status)) {
michael@0:         return;
michael@0:     }
michael@0: 
michael@0:     // ensure we are starting with an empty rule list
michael@0:     rules.deleteAll();
michael@0: 
michael@0:     // dlf - the original code kept a separate description array for no reason,
michael@0:     // so I got rid of it.  The loop was too complex so I simplified it.
michael@0: 
michael@0:     UnicodeString currentDescription;
michael@0:     int32_t oldP = 0;
michael@0:     while (oldP < description.length()) {
michael@0:         int32_t p = description.indexOf(gSemicolon, oldP);
michael@0:         if (p == -1) {
michael@0:             p = description.length();
michael@0:         }
michael@0:         currentDescription.setTo(description, oldP, p - oldP);
michael@0:         NFRule::makeRules(currentDescription, this, rules.last(), owner, rules, status);
michael@0:         oldP = p + 1;
michael@0:     }
michael@0: 
michael@0:     // for rules that didn't specify a base value, their base values
michael@0:     // were initialized to 0.  Make another pass through the list and
michael@0:     // set all those rules' base values.  We also remove any special
michael@0:     // rules from the list and put them into their own member variables
michael@0:     int64_t defaultBaseValue = 0;
michael@0: 
michael@0:     // (this isn't a for loop because we might be deleting items from
michael@0:     // the vector-- we want to make sure we only increment i when
michael@0:     // we _didn't_ delete aything from the vector)
michael@0:     uint32_t i = 0;
michael@0:     while (i < rules.size()) {
michael@0:         NFRule* rule = rules[i];
michael@0: 
michael@0:         switch (rule->getType()) {
michael@0:             // if the rule's base value is 0, fill in a default
michael@0:             // base value (this will be 1 plus the preceding
michael@0:             // rule's base value for regular rule sets, and the
michael@0:             // same as the preceding rule's base value in fraction
michael@0:             // rule sets)
michael@0:         case NFRule::kNoBase:
michael@0:             rule->setBaseValue(defaultBaseValue, status);
michael@0:             if (!isFractionRuleSet()) {
michael@0:                 ++defaultBaseValue;
michael@0:             }
michael@0:             ++i;
michael@0:             break;
michael@0: 
michael@0:             // if it's the negative-number rule, copy it into its own
michael@0:             // data member and delete it from the list
michael@0:         case NFRule::kNegativeNumberRule:
michael@0:             if (negativeNumberRule) {
michael@0:                 delete negativeNumberRule;
michael@0:             }
michael@0:             negativeNumberRule = rules.remove(i);
michael@0:             break;
michael@0: 
michael@0:             // if it's the improper fraction rule, copy it into the
michael@0:             // correct element of fractionRules
michael@0:         case NFRule::kImproperFractionRule:
michael@0:             if (fractionRules[0]) {
michael@0:                 delete fractionRules[0];
michael@0:             }
michael@0:             fractionRules[0] = rules.remove(i);
michael@0:             break;
michael@0: 
michael@0:             // if it's the proper fraction rule, copy it into the
michael@0:             // correct element of fractionRules
michael@0:         case NFRule::kProperFractionRule:
michael@0:             if (fractionRules[1]) {
michael@0:                 delete fractionRules[1];
michael@0:             }
michael@0:             fractionRules[1] = rules.remove(i);
michael@0:             break;
michael@0: 
michael@0:             // if it's the master rule, copy it into the
michael@0:             // correct element of fractionRules
michael@0:         case NFRule::kMasterRule:
michael@0:             if (fractionRules[2]) {
michael@0:                 delete fractionRules[2];
michael@0:             }
michael@0:             fractionRules[2] = rules.remove(i);
michael@0:             break;
michael@0: 
michael@0:             // if it's a regular rule that already knows its base value,
michael@0:             // check to make sure the rules are in order, and update
michael@0:             // the default base value for the next rule
michael@0:         default:
michael@0:             if (rule->getBaseValue() < defaultBaseValue) {
michael@0:                 // throw new IllegalArgumentException("Rules are not in order");
michael@0:                 status = U_PARSE_ERROR;
michael@0:                 return;
michael@0:             }
michael@0:             defaultBaseValue = rule->getBaseValue();
michael@0:             if (!isFractionRuleSet()) {
michael@0:                 ++defaultBaseValue;
michael@0:             }
michael@0:             ++i;
michael@0:             break;
michael@0:         }
michael@0:     }
michael@0: }
michael@0: 
michael@0: NFRuleSet::~NFRuleSet()
michael@0: {
michael@0:     delete negativeNumberRule;
michael@0:     delete fractionRules[0];
michael@0:     delete fractionRules[1];
michael@0:     delete fractionRules[2];
michael@0: }
michael@0: 
michael@0: static UBool
michael@0: util_equalRules(const NFRule* rule1, const NFRule* rule2)
michael@0: {
michael@0:     if (rule1) {
michael@0:         if (rule2) {
michael@0:             return *rule1 == *rule2;
michael@0:         }
michael@0:     } else if (!rule2) {
michael@0:         return TRUE;
michael@0:     }
michael@0:     return FALSE;
michael@0: }
michael@0: 
michael@0: UBool
michael@0: NFRuleSet::operator==(const NFRuleSet& rhs) const
michael@0: {
michael@0:     if (rules.size() == rhs.rules.size() &&
michael@0:         fIsFractionRuleSet == rhs.fIsFractionRuleSet &&
michael@0:         name == rhs.name &&
michael@0:         util_equalRules(negativeNumberRule, rhs.negativeNumberRule) &&
michael@0:         util_equalRules(fractionRules[0], rhs.fractionRules[0]) &&
michael@0:         util_equalRules(fractionRules[1], rhs.fractionRules[1]) &&
michael@0:         util_equalRules(fractionRules[2], rhs.fractionRules[2])) {
michael@0: 
michael@0:         for (uint32_t i = 0; i < rules.size(); ++i) {
michael@0:             if (*rules[i] != *rhs.rules[i]) {
michael@0:                 return FALSE;
michael@0:             }
michael@0:         }
michael@0:         return TRUE;
michael@0:     }
michael@0:     return FALSE;
michael@0: }
michael@0: 
michael@0: #define RECURSION_LIMIT 50
michael@0: 
michael@0: void
michael@0: NFRuleSet::format(int64_t number, UnicodeString& toAppendTo, int32_t pos) const
michael@0: {
michael@0:     NFRule *rule = findNormalRule(number);
michael@0:     if (rule) { // else error, but can't report it
michael@0:         NFRuleSet* ncThis = (NFRuleSet*)this;
michael@0:         if (ncThis->fRecursionCount++ >= RECURSION_LIMIT) {
michael@0:             // stop recursion
michael@0:             ncThis->fRecursionCount = 0;
michael@0:         } else {
michael@0:             rule->doFormat(number, toAppendTo, pos);
michael@0:             ncThis->fRecursionCount--;
michael@0:         }
michael@0:     }
michael@0: }
michael@0: 
michael@0: void
michael@0: NFRuleSet::format(double number, UnicodeString& toAppendTo, int32_t pos) const
michael@0: {
michael@0:     NFRule *rule = findDoubleRule(number);
michael@0:     if (rule) { // else error, but can't report it
michael@0:         NFRuleSet* ncThis = (NFRuleSet*)this;
michael@0:         if (ncThis->fRecursionCount++ >= RECURSION_LIMIT) {
michael@0:             // stop recursion
michael@0:             ncThis->fRecursionCount = 0;
michael@0:         } else {
michael@0:             rule->doFormat(number, toAppendTo, pos);
michael@0:             ncThis->fRecursionCount--;
michael@0:         }
michael@0:     }
michael@0: }
michael@0: 
michael@0: NFRule*
michael@0: NFRuleSet::findDoubleRule(double number) const
michael@0: {
michael@0:     // if this is a fraction rule set, use findFractionRuleSetRule()
michael@0:     if (isFractionRuleSet()) {
michael@0:         return findFractionRuleSetRule(number);
michael@0:     }
michael@0: 
michael@0:     // if the number is negative, return the negative number rule
michael@0:     // (if there isn't a negative-number rule, we pretend it's a
michael@0:     // positive number)
michael@0:     if (number < 0) {
michael@0:         if (negativeNumberRule) {
michael@0:             return  negativeNumberRule;
michael@0:         } else {
michael@0:             number = -number;
michael@0:         }
michael@0:     }
michael@0: 
michael@0:     // if the number isn't an integer, we use one of the fraction rules...
michael@0:     if (number != uprv_floor(number)) {
michael@0:         // if the number is between 0 and 1, return the proper
michael@0:         // fraction rule
michael@0:         if (number < 1 && fractionRules[1]) {
michael@0:             return fractionRules[1];
michael@0:         }
michael@0:         // otherwise, return the improper fraction rule
michael@0:         else if (fractionRules[0]) {
michael@0:             return fractionRules[0];
michael@0:         }
michael@0:     }
michael@0: 
michael@0:     // if there's a master rule, use it to format the number
michael@0:     if (fractionRules[2]) {
michael@0:         return fractionRules[2];
michael@0:     }
michael@0: 
michael@0:     // and if we haven't yet returned a rule, use findNormalRule()
michael@0:     // to find the applicable rule
michael@0:     int64_t r = util64_fromDouble(number + 0.5);
michael@0:     return findNormalRule(r);
michael@0: }
michael@0: 
michael@0: NFRule *
michael@0: NFRuleSet::findNormalRule(int64_t number) const
michael@0: {
michael@0:     // if this is a fraction rule set, use findFractionRuleSetRule()
michael@0:     // to find the rule (we should only go into this clause if the
michael@0:     // value is 0)
michael@0:     if (fIsFractionRuleSet) {
michael@0:         return findFractionRuleSetRule((double)number);
michael@0:     }
michael@0: 
michael@0:     // if the number is negative, return the negative-number rule
michael@0:     // (if there isn't one, pretend the number is positive)
michael@0:     if (number < 0) {
michael@0:         if (negativeNumberRule) {
michael@0:             return negativeNumberRule;
michael@0:         } else {
michael@0:             number = -number;
michael@0:         }
michael@0:     }
michael@0: 
michael@0:     // we have to repeat the preceding two checks, even though we
michael@0:     // do them in findRule(), because the version of format() that
michael@0:     // takes a long bypasses findRule() and goes straight to this
michael@0:     // function.  This function does skip the fraction rules since
michael@0:     // we know the value is an integer (it also skips the master
michael@0:     // rule, since it's considered a fraction rule.  Skipping the
michael@0:     // master rule in this function is also how we avoid infinite
michael@0:     // recursion)
michael@0: 
michael@0:     // {dlf} unfortunately this fails if there are no rules except
michael@0:     // special rules.  If there are no rules, use the master rule.
michael@0: 
michael@0:     // binary-search the rule list for the applicable rule
michael@0:     // (a rule is used for all values from its base value to
michael@0:     // the next rule's base value)
michael@0:     int32_t hi = rules.size();
michael@0:     if (hi > 0) {
michael@0:         int32_t lo = 0;
michael@0: 
michael@0:         while (lo < hi) {
michael@0:             int32_t mid = (lo + hi) / 2;
michael@0:             if (rules[mid]->getBaseValue() == number) {
michael@0:                 return rules[mid];
michael@0:             }
michael@0:             else if (rules[mid]->getBaseValue() > number) {
michael@0:                 hi = mid;
michael@0:             }
michael@0:             else {
michael@0:                 lo = mid + 1;
michael@0:             }
michael@0:         }
michael@0:         if (hi == 0) { // bad rule set, minimum base > 0
michael@0:             return NULL; // want to throw exception here
michael@0:         }
michael@0: 
michael@0:         NFRule *result = rules[hi - 1];
michael@0: 
michael@0:         // use shouldRollBack() to see whether we need to invoke the
michael@0:         // rollback rule (see shouldRollBack()'s documentation for
michael@0:         // an explanation of the rollback rule).  If we do, roll back
michael@0:         // one rule and return that one instead of the one we'd normally
michael@0:         // return
michael@0:         if (result->shouldRollBack((double)number)) {
michael@0:             if (hi == 1) { // bad rule set, no prior rule to rollback to from this base
michael@0:                 return NULL;
michael@0:             }
michael@0:             result = rules[hi - 2];
michael@0:         }
michael@0:         return result;
michael@0:     }
michael@0:     // else use the master rule
michael@0:     return fractionRules[2];
michael@0: }
michael@0: 
michael@0: /**
michael@0:  * If this rule is a fraction rule set, this function is used by
michael@0:  * findRule() to select the most appropriate rule for formatting
michael@0:  * the number.  Basically, the base value of each rule in the rule
michael@0:  * set is treated as the denominator of a fraction.  Whichever
michael@0:  * denominator can produce the fraction closest in value to the
michael@0:  * number passed in is the result.  If there's a tie, the earlier
michael@0:  * one in the list wins.  (If there are two rules in a row with the
michael@0:  * same base value, the first one is used when the numerator of the
michael@0:  * fraction would be 1, and the second rule is used the rest of the
michael@0:  * time.
michael@0:  * @param number The number being formatted (which will always be
michael@0:  * a number between 0 and 1)
michael@0:  * @return The rule to use to format this number
michael@0:  */
michael@0: NFRule*
michael@0: NFRuleSet::findFractionRuleSetRule(double number) const
michael@0: {
michael@0:     // the obvious way to do this (multiply the value being formatted
michael@0:     // by each rule's base value until you get an integral result)
michael@0:     // doesn't work because of rounding error.  This method is more
michael@0:     // accurate
michael@0: 
michael@0:     // find the least common multiple of the rules' base values
michael@0:     // and multiply this by the number being formatted.  This is
michael@0:     // all the precision we need, and we can do all of the rest
michael@0:     // of the math using integer arithmetic
michael@0:     int64_t leastCommonMultiple = rules[0]->getBaseValue();
michael@0:     int64_t numerator;
michael@0:     {
michael@0:         for (uint32_t i = 1; i < rules.size(); ++i) {
michael@0:             leastCommonMultiple = util_lcm(leastCommonMultiple, rules[i]->getBaseValue());
michael@0:         }
michael@0:         numerator = util64_fromDouble(number * (double)leastCommonMultiple + 0.5);
michael@0:     }
michael@0:     // for each rule, do the following...
michael@0:     int64_t tempDifference;
michael@0:     int64_t difference = util64_fromDouble(uprv_maxMantissa());
michael@0:     int32_t winner = 0;
michael@0:     for (uint32_t i = 0; i < rules.size(); ++i) {
michael@0:         // "numerator" is the numerator of the fraction if the
michael@0:         // denominator is the LCD.  The numerator if the rule's
michael@0:         // base value is the denominator is "numerator" times the
michael@0:         // base value divided bythe LCD.  Here we check to see if
michael@0:         // that's an integer, and if not, how close it is to being
michael@0:         // an integer.
michael@0:         tempDifference = numerator * rules[i]->getBaseValue() % leastCommonMultiple;
michael@0: 
michael@0: 
michael@0:         // normalize the result of the above calculation: we want
michael@0:         // the numerator's distance from the CLOSEST multiple
michael@0:         // of the LCD
michael@0:         if (leastCommonMultiple - tempDifference < tempDifference) {
michael@0:             tempDifference = leastCommonMultiple - tempDifference;
michael@0:         }
michael@0: 
michael@0:         // if this is as close as we've come, keep track of how close
michael@0:         // that is, and the line number of the rule that did it.  If
michael@0:         // we've scored a direct hit, we don't have to look at any more
michael@0:         // rules
michael@0:         if (tempDifference < difference) {
michael@0:             difference = tempDifference;
michael@0:             winner = i;
michael@0:             if (difference == 0) {
michael@0:                 break;
michael@0:             }
michael@0:         }
michael@0:     }
michael@0: 
michael@0:     // if we have two successive rules that both have the winning base
michael@0:     // value, then the first one (the one we found above) is used if
michael@0:     // the numerator of the fraction is 1 and the second one is used if
michael@0:     // the numerator of the fraction is anything else (this lets us
michael@0:     // do things like "one third"/"two thirds" without haveing to define
michael@0:     // a whole bunch of extra rule sets)
michael@0:     if ((unsigned)(winner + 1) < rules.size() &&
michael@0:         rules[winner + 1]->getBaseValue() == rules[winner]->getBaseValue()) {
michael@0:         double n = ((double)rules[winner]->getBaseValue()) * number;
michael@0:         if (n < 0.5 || n >= 2) {
michael@0:             ++winner;
michael@0:         }
michael@0:     }
michael@0: 
michael@0:     // finally, return the winning rule
michael@0:     return rules[winner];
michael@0: }
michael@0: 
michael@0: /**
michael@0:  * Parses a string.  Matches the string to be parsed against each
michael@0:  * of its rules (with a base value less than upperBound) and returns
michael@0:  * the value produced by the rule that matched the most charcters
michael@0:  * in the source string.
michael@0:  * @param text The string to parse
michael@0:  * @param parsePosition The initial position is ignored and assumed
michael@0:  * to be 0.  On exit, this object has been updated to point to the
michael@0:  * first character position this rule set didn't consume.
michael@0:  * @param upperBound Limits the rules that can be allowed to match.
michael@0:  * Only rules whose base values are strictly less than upperBound
michael@0:  * are considered.
michael@0:  * @return The numerical result of parsing this string.  This will
michael@0:  * be the matching rule's base value, composed appropriately with
michael@0:  * the results of matching any of its substitutions.  The object
michael@0:  * will be an instance of Long if it's an integral value; otherwise,
michael@0:  * it will be an instance of Double.  This function always returns
michael@0:  * a valid object: If nothing matched the input string at all,
michael@0:  * this function returns new Long(0), and the parse position is
michael@0:  * left unchanged.
michael@0:  */
michael@0: #ifdef RBNF_DEBUG
michael@0: #include <stdio.h>
michael@0: 
michael@0: static void dumpUS(FILE* f, const UnicodeString& us) {
michael@0:   int len = us.length();
michael@0:   char* buf = (char *)uprv_malloc((len+1)*sizeof(char)); //new char[len+1];
michael@0:   if (buf != NULL) {
michael@0: 	  us.extract(0, len, buf);
michael@0: 	  buf[len] = 0;
michael@0: 	  fprintf(f, "%s", buf);
michael@0: 	  uprv_free(buf); //delete[] buf;
michael@0:   }
michael@0: }
michael@0: #endif
michael@0: 
michael@0: UBool
michael@0: NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result) const
michael@0: {
michael@0:     // try matching each rule in the rule set against the text being
michael@0:     // parsed.  Whichever one matches the most characters is the one
michael@0:     // that determines the value we return.
michael@0: 
michael@0:     result.setLong(0);
michael@0: 
michael@0:     // dump out if there's no text to parse
michael@0:     if (text.length() == 0) {
michael@0:         return 0;
michael@0:     }
michael@0: 
michael@0:     ParsePosition highWaterMark;
michael@0:     ParsePosition workingPos = pos;
michael@0: 
michael@0: #ifdef RBNF_DEBUG
michael@0:     fprintf(stderr, "<nfrs> %x '", this);
michael@0:     dumpUS(stderr, name);
michael@0:     fprintf(stderr, "' text '");
michael@0:     dumpUS(stderr, text);
michael@0:     fprintf(stderr, "'\n");
michael@0:     fprintf(stderr, "  parse negative: %d\n", this, negativeNumberRule != 0);
michael@0: #endif
michael@0: 
michael@0:     // start by trying the negative number rule (if there is one)
michael@0:     if (negativeNumberRule) {
michael@0:         Formattable tempResult;
michael@0: #ifdef RBNF_DEBUG
michael@0:         fprintf(stderr, "  <nfrs before negative> %x ub: %g\n", negativeNumberRule, upperBound);
michael@0: #endif
michael@0:         UBool success = negativeNumberRule->doParse(text, workingPos, 0, upperBound, tempResult);
michael@0: #ifdef RBNF_DEBUG
michael@0:         fprintf(stderr, "  <nfrs after negative> success: %d wpi: %d\n", success, workingPos.getIndex());
michael@0: #endif
michael@0:         if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
michael@0:             result = tempResult;
michael@0:             highWaterMark = workingPos;
michael@0:         }
michael@0:         workingPos = pos;
michael@0:     }
michael@0: #ifdef RBNF_DEBUG
michael@0:     fprintf(stderr, "<nfrs> continue fractional with text '");
michael@0:     dumpUS(stderr, text);
michael@0:     fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex());
michael@0: #endif
michael@0:     // then try each of the fraction rules
michael@0:     {
michael@0:         for (int i = 0; i < 3; i++) {
michael@0:             if (fractionRules[i]) {
michael@0:                 Formattable tempResult;
michael@0:                 UBool success = fractionRules[i]->doParse(text, workingPos, 0, upperBound, tempResult);
michael@0:                 if (success && (workingPos.getIndex() > highWaterMark.getIndex())) {
michael@0:                     result = tempResult;
michael@0:                     highWaterMark = workingPos;
michael@0:                 }
michael@0:                 workingPos = pos;
michael@0:             }
michael@0:         }
michael@0:     }
michael@0: #ifdef RBNF_DEBUG
michael@0:     fprintf(stderr, "<nfrs> continue other with text '");
michael@0:     dumpUS(stderr, text);
michael@0:     fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex());
michael@0: #endif
michael@0: 
michael@0:     // finally, go through the regular rules one at a time.  We start
michael@0:     // at the end of the list because we want to try matching the most
michael@0:     // sigificant rule first (this helps ensure that we parse
michael@0:     // "five thousand three hundred six" as
michael@0:     // "(five thousand) (three hundred) (six)" rather than
michael@0:     // "((five thousand three) hundred) (six)").  Skip rules whose
michael@0:     // base values are higher than the upper bound (again, this helps
michael@0:     // limit ambiguity by making sure the rules that match a rule's
michael@0:     // are less significant than the rule containing the substitutions)/
michael@0:     {
michael@0:         int64_t ub = util64_fromDouble(upperBound);
michael@0: #ifdef RBNF_DEBUG
michael@0:         {
michael@0:             char ubstr[64];
michael@0:             util64_toa(ub, ubstr, 64);
michael@0:             char ubstrhex[64];
michael@0:             util64_toa(ub, ubstrhex, 64, 16);
michael@0:             fprintf(stderr, "ub: %g, i64: %s (%s)\n", upperBound, ubstr, ubstrhex);
michael@0:         }
michael@0: #endif
michael@0:         for (int32_t i = rules.size(); --i >= 0 && highWaterMark.getIndex() < text.length();) {
michael@0:             if ((!fIsFractionRuleSet) && (rules[i]->getBaseValue() >= ub)) {
michael@0:                 continue;
michael@0:             }
michael@0:             Formattable tempResult;
michael@0:             UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, tempResult);
michael@0:             if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
michael@0:                 result = tempResult;
michael@0:                 highWaterMark = workingPos;
michael@0:             }
michael@0:             workingPos = pos;
michael@0:         }
michael@0:     }
michael@0: #ifdef RBNF_DEBUG
michael@0:     fprintf(stderr, "<nfrs> exit\n");
michael@0: #endif
michael@0:     // finally, update the parse postion we were passed to point to the
michael@0:     // first character we didn't use, and return the result that
michael@0:     // corresponds to that string of characters
michael@0:     pos = highWaterMark;
michael@0: 
michael@0:     return 1;
michael@0: }
michael@0: 
michael@0: void
michael@0: NFRuleSet::appendRules(UnicodeString& result) const
michael@0: {
michael@0:     // the rule set name goes first...
michael@0:     result.append(name);
michael@0:     result.append(gColon);
michael@0:     result.append(gLineFeed);
michael@0: 
michael@0:     // followed by the regular rules...
michael@0:     for (uint32_t i = 0; i < rules.size(); i++) {
michael@0:         result.append(gFourSpaces, 4);
michael@0:         rules[i]->_appendRuleText(result);
michael@0:         result.append(gLineFeed);
michael@0:     }
michael@0: 
michael@0:     // followed by the special rules (if they exist)
michael@0:     if (negativeNumberRule) {
michael@0:         result.append(gFourSpaces, 4);
michael@0:         negativeNumberRule->_appendRuleText(result);
michael@0:         result.append(gLineFeed);
michael@0:     }
michael@0: 
michael@0:     {
michael@0:         for (uint32_t i = 0; i < 3; ++i) {
michael@0:             if (fractionRules[i]) {
michael@0:                 result.append(gFourSpaces, 4);
michael@0:                 fractionRules[i]->_appendRuleText(result);
michael@0:                 result.append(gLineFeed);
michael@0:             }
michael@0:         }
michael@0:     }
michael@0: }
michael@0: 
michael@0: // utility functions
michael@0: 
michael@0: int64_t util64_fromDouble(double d) {
michael@0:     int64_t result = 0;
michael@0:     if (!uprv_isNaN(d)) {
michael@0:         double mant = uprv_maxMantissa();
michael@0:         if (d < -mant) {
michael@0:             d = -mant;
michael@0:         } else if (d > mant) {
michael@0:             d = mant;
michael@0:         }
michael@0:         UBool neg = d < 0; 
michael@0:         if (neg) {
michael@0:             d = -d;
michael@0:         }
michael@0:         result = (int64_t)uprv_floor(d);
michael@0:         if (neg) {
michael@0:             result = -result;
michael@0:         }
michael@0:     }
michael@0:     return result;
michael@0: }
michael@0: 
michael@0: int64_t util64_pow(int32_t r, uint32_t e)  { 
michael@0:     if (r == 0) {
michael@0:         return 0;
michael@0:     } else if (e == 0) {
michael@0:         return 1;
michael@0:     } else {
michael@0:         int64_t n = r;
michael@0:         while (--e > 0) {
michael@0:             n *= r;
michael@0:         }
michael@0:         return n;
michael@0:     }
michael@0: }
michael@0: 
michael@0: static const uint8_t asciiDigits[] = { 
michael@0:     0x30u, 0x31u, 0x32u, 0x33u, 0x34u, 0x35u, 0x36u, 0x37u,
michael@0:     0x38u, 0x39u, 0x61u, 0x62u, 0x63u, 0x64u, 0x65u, 0x66u,
michael@0:     0x67u, 0x68u, 0x69u, 0x6au, 0x6bu, 0x6cu, 0x6du, 0x6eu,
michael@0:     0x6fu, 0x70u, 0x71u, 0x72u, 0x73u, 0x74u, 0x75u, 0x76u,
michael@0:     0x77u, 0x78u, 0x79u, 0x7au,  
michael@0: };
michael@0: 
michael@0: static const UChar kUMinus = (UChar)0x002d;
michael@0: 
michael@0: #ifdef RBNF_DEBUG
michael@0: static const char kMinus = '-';
michael@0: 
michael@0: static const uint8_t digitInfo[] = {
michael@0:         0,     0,     0,     0,     0,     0,     0,     0,
michael@0:         0,     0,     0,     0,     0,     0,     0,     0,
michael@0:         0,     0,     0,     0,     0,     0,     0,     0,
michael@0:         0,     0,     0,     0,     0,     0,     0,     0,
michael@0:         0,     0,     0,     0,     0,     0,     0,     0,
michael@0:         0,     0,     0,     0,     0,     0,     0,     0,
michael@0:     0x80u, 0x81u, 0x82u, 0x83u, 0x84u, 0x85u, 0x86u, 0x87u,
michael@0:     0x88u, 0x89u,     0,     0,     0,     0,     0,     0,
michael@0:         0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
michael@0:     0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
michael@0:     0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
michael@0:     0xa1u, 0xa2u, 0xa3u,     0,     0,     0,     0,     0,
michael@0:         0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
michael@0:     0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
michael@0:     0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
michael@0:     0xa1u, 0xa2u, 0xa3u,     0,     0,     0,     0,     0,
michael@0: };
michael@0: 
michael@0: int64_t util64_atoi(const char* str, uint32_t radix)
michael@0: {
michael@0:     if (radix > 36) {
michael@0:         radix = 36;
michael@0:     } else if (radix < 2) {
michael@0:         radix = 2;
michael@0:     }
michael@0:     int64_t lradix = radix;
michael@0: 
michael@0:     int neg = 0;
michael@0:     if (*str == kMinus) {
michael@0:         ++str;
michael@0:         neg = 1;
michael@0:     }
michael@0:     int64_t result = 0;
michael@0:     uint8_t b;
michael@0:     while ((b = digitInfo[*str++]) && ((b &= 0x7f) < radix)) {
michael@0:         result *= lradix;
michael@0:         result += (int32_t)b;
michael@0:     }
michael@0:     if (neg) {
michael@0:         result = -result;
michael@0:     }
michael@0:     return result;
michael@0: }
michael@0: 
michael@0: int64_t util64_utoi(const UChar* str, uint32_t radix)
michael@0: {
michael@0:     if (radix > 36) {
michael@0:         radix = 36;
michael@0:     } else if (radix < 2) {
michael@0:         radix = 2;
michael@0:     }
michael@0:     int64_t lradix = radix;
michael@0: 
michael@0:     int neg = 0;
michael@0:     if (*str == kUMinus) {
michael@0:         ++str;
michael@0:         neg = 1;
michael@0:     }
michael@0:     int64_t result = 0;
michael@0:     UChar c;
michael@0:     uint8_t b;
michael@0:     while (((c = *str++) < 0x0080) && (b = digitInfo[c]) && ((b &= 0x7f) < radix)) {
michael@0:         result *= lradix;
michael@0:         result += (int32_t)b;
michael@0:     }
michael@0:     if (neg) {
michael@0:         result = -result;
michael@0:     }
michael@0:     return result;
michael@0: }
michael@0: 
michael@0: uint32_t util64_toa(int64_t w, char* buf, uint32_t len, uint32_t radix, UBool raw)
michael@0: {    
michael@0:     if (radix > 36) {
michael@0:         radix = 36;
michael@0:     } else if (radix < 2) {
michael@0:         radix = 2;
michael@0:     }
michael@0:     int64_t base = radix;
michael@0: 
michael@0:     char* p = buf;
michael@0:     if (len && (w < 0) && (radix == 10) && !raw) {
michael@0:         w = -w;
michael@0:         *p++ = kMinus;
michael@0:         --len;
michael@0:     } else if (len && (w == 0)) {
michael@0:         *p++ = (char)raw ? 0 : asciiDigits[0];
michael@0:         --len;
michael@0:     }
michael@0: 
michael@0:     while (len && w != 0) {
michael@0:         int64_t n = w / base;
michael@0:         int64_t m = n * base;
michael@0:         int32_t d = (int32_t)(w-m);
michael@0:         *p++ = raw ? (char)d : asciiDigits[d];
michael@0:         w = n;
michael@0:         --len;
michael@0:     }
michael@0:     if (len) {
michael@0:         *p = 0; // null terminate if room for caller convenience
michael@0:     }
michael@0: 
michael@0:     len = p - buf;
michael@0:     if (*buf == kMinus) {
michael@0:         ++buf;
michael@0:     }
michael@0:     while (--p > buf) {
michael@0:         char c = *p;
michael@0:         *p = *buf;
michael@0:         *buf = c;
michael@0:         ++buf;
michael@0:     }
michael@0: 
michael@0:     return len;
michael@0: }
michael@0: #endif
michael@0: 
michael@0: uint32_t util64_tou(int64_t w, UChar* buf, uint32_t len, uint32_t radix, UBool raw)
michael@0: {    
michael@0:     if (radix > 36) {
michael@0:         radix = 36;
michael@0:     } else if (radix < 2) {
michael@0:         radix = 2;
michael@0:     }
michael@0:     int64_t base = radix;
michael@0: 
michael@0:     UChar* p = buf;
michael@0:     if (len && (w < 0) && (radix == 10) && !raw) {
michael@0:         w = -w;
michael@0:         *p++ = kUMinus;
michael@0:         --len;
michael@0:     } else if (len && (w == 0)) {
michael@0:         *p++ = (UChar)raw ? 0 : asciiDigits[0];
michael@0:         --len;
michael@0:     }
michael@0: 
michael@0:     while (len && (w != 0)) {
michael@0:         int64_t n = w / base;
michael@0:         int64_t m = n * base;
michael@0:         int32_t d = (int32_t)(w-m);
michael@0:         *p++ = (UChar)(raw ? d : asciiDigits[d]);
michael@0:         w = n;
michael@0:         --len;
michael@0:     }
michael@0:     if (len) {
michael@0:         *p = 0; // null terminate if room for caller convenience
michael@0:     }
michael@0: 
michael@0:     len = (uint32_t)(p - buf);
michael@0:     if (*buf == kUMinus) {
michael@0:         ++buf;
michael@0:     }
michael@0:     while (--p > buf) {
michael@0:         UChar c = *p;
michael@0:         *p = *buf;
michael@0:         *buf = c;
michael@0:         ++buf;
michael@0:     }
michael@0: 
michael@0:     return len;
michael@0: }
michael@0: 
michael@0: 
michael@0: U_NAMESPACE_END
michael@0: 
michael@0: /* U_HAVE_RBNF */
michael@0: #endif
michael@0: