michael@0: /*
michael@0: *******************************************************************************
michael@0: * Copyright (C) 2007-2013, International Business Machines Corporation and
michael@0: * others. All Rights Reserved.
michael@0: *******************************************************************************
michael@0: *
michael@0: * File plurrule.cpp
michael@0: */
michael@0: 
michael@0: #include <math.h>
michael@0: #include <stdio.h>
michael@0: 
michael@0: #include "unicode/utypes.h"
michael@0: #include "unicode/localpointer.h"
michael@0: #include "unicode/plurrule.h"
michael@0: #include "unicode/upluralrules.h"
michael@0: #include "unicode/ures.h"
michael@0: #include "charstr.h"
michael@0: #include "cmemory.h"
michael@0: #include "cstring.h"
michael@0: #include "digitlst.h"
michael@0: #include "hash.h"
michael@0: #include "locutil.h"
michael@0: #include "mutex.h"
michael@0: #include "patternprops.h"
michael@0: #include "plurrule_impl.h"
michael@0: #include "putilimp.h"
michael@0: #include "ucln_in.h"
michael@0: #include "ustrfmt.h"
michael@0: #include "uassert.h"
michael@0: #include "uvectr32.h"
michael@0: 
michael@0: #if !UCONFIG_NO_FORMATTING
michael@0: 
michael@0: U_NAMESPACE_BEGIN
michael@0: 
michael@0: #define ARRAY_SIZE(array) (int32_t)(sizeof array  / sizeof array[0])
michael@0: 
michael@0: static const UChar PLURAL_KEYWORD_OTHER[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,0};
michael@0: static const UChar PLURAL_DEFAULT_RULE[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,COLON,SPACE,LOW_N,0};
michael@0: static const UChar PK_IN[]={LOW_I,LOW_N,0};
michael@0: static const UChar PK_NOT[]={LOW_N,LOW_O,LOW_T,0};
michael@0: static const UChar PK_IS[]={LOW_I,LOW_S,0};
michael@0: static const UChar PK_MOD[]={LOW_M,LOW_O,LOW_D,0};
michael@0: static const UChar PK_AND[]={LOW_A,LOW_N,LOW_D,0};
michael@0: static const UChar PK_OR[]={LOW_O,LOW_R,0};
michael@0: static const UChar PK_VAR_N[]={LOW_N,0};
michael@0: static const UChar PK_VAR_I[]={LOW_I,0};
michael@0: static const UChar PK_VAR_F[]={LOW_F,0};
michael@0: static const UChar PK_VAR_T[]={LOW_T,0};
michael@0: static const UChar PK_VAR_V[]={LOW_V,0};
michael@0: static const UChar PK_VAR_J[]={LOW_J,0};
michael@0: static const UChar PK_WITHIN[]={LOW_W,LOW_I,LOW_T,LOW_H,LOW_I,LOW_N,0};
michael@0: static const UChar PK_DECIMAL[]={LOW_D,LOW_E,LOW_C,LOW_I,LOW_M,LOW_A,LOW_L,0};
michael@0: static const UChar PK_INTEGER[]={LOW_I,LOW_N,LOW_T,LOW_E,LOW_G,LOW_E,LOW_R,0};
michael@0: 
michael@0: UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralRules)
michael@0: UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralKeywordEnumeration)
michael@0: 
michael@0: PluralRules::PluralRules(UErrorCode& /*status*/)
michael@0: :   UObject(),
michael@0:     mRules(NULL)
michael@0: {
michael@0: }
michael@0: 
michael@0: PluralRules::PluralRules(const PluralRules& other)
michael@0: : UObject(other),
michael@0:     mRules(NULL)
michael@0: {
michael@0:     *this=other;
michael@0: }
michael@0: 
michael@0: PluralRules::~PluralRules() {
michael@0:     delete mRules;
michael@0: }
michael@0: 
michael@0: PluralRules*
michael@0: PluralRules::clone() const {
michael@0:     return new PluralRules(*this);
michael@0: }
michael@0: 
michael@0: PluralRules&
michael@0: PluralRules::operator=(const PluralRules& other) {
michael@0:     if (this != &other) {
michael@0:         delete mRules;
michael@0:         if (other.mRules==NULL) {
michael@0:             mRules = NULL;
michael@0:         }
michael@0:         else {
michael@0:             mRules = new RuleChain(*other.mRules);
michael@0:         }
michael@0:     }
michael@0: 
michael@0:     return *this;
michael@0: }
michael@0: 
michael@0: StringEnumeration* PluralRules::getAvailableLocales(UErrorCode &status) {
michael@0:     StringEnumeration *result = new PluralAvailableLocalesEnumeration(status);
michael@0:     if (result == NULL && U_SUCCESS(status)) {
michael@0:         status = U_MEMORY_ALLOCATION_ERROR;
michael@0:     }
michael@0:     if (U_FAILURE(status)) {
michael@0:         delete result;
michael@0:         result = NULL;
michael@0:     }
michael@0:     return result;
michael@0: }
michael@0: 
michael@0: 
michael@0: PluralRules* U_EXPORT2
michael@0: PluralRules::createRules(const UnicodeString& description, UErrorCode& status) {
michael@0:     if (U_FAILURE(status)) {
michael@0:         return NULL;
michael@0:     }
michael@0: 
michael@0:     PluralRuleParser parser;
michael@0:     PluralRules *newRules = new PluralRules(status);
michael@0:     if (U_SUCCESS(status) && newRules == NULL) {
michael@0:         status = U_MEMORY_ALLOCATION_ERROR;
michael@0:     }
michael@0:     parser.parse(description, newRules, status);
michael@0:     if (U_FAILURE(status)) {
michael@0:         delete newRules;
michael@0:         newRules = NULL;
michael@0:     }
michael@0:     return newRules;
michael@0: }
michael@0: 
michael@0: 
michael@0: PluralRules* U_EXPORT2
michael@0: PluralRules::createDefaultRules(UErrorCode& status) {
michael@0:     return createRules(UnicodeString(TRUE, PLURAL_DEFAULT_RULE, -1), status);
michael@0: }
michael@0: 
michael@0: PluralRules* U_EXPORT2
michael@0: PluralRules::forLocale(const Locale& locale, UErrorCode& status) {
michael@0:     return forLocale(locale, UPLURAL_TYPE_CARDINAL, status);
michael@0: }
michael@0: 
michael@0: PluralRules* U_EXPORT2
michael@0: PluralRules::forLocale(const Locale& locale, UPluralType type, UErrorCode& status) {
michael@0:     if (U_FAILURE(status)) {
michael@0:         return NULL;
michael@0:     }
michael@0:     if (type >= UPLURAL_TYPE_COUNT) {
michael@0:         status = U_ILLEGAL_ARGUMENT_ERROR;
michael@0:         return NULL;
michael@0:     }
michael@0:     PluralRules *newObj = new PluralRules(status);
michael@0:     if (newObj==NULL || U_FAILURE(status)) {
michael@0:         delete newObj;
michael@0:         return NULL;
michael@0:     }
michael@0:     UnicodeString locRule = newObj->getRuleFromResource(locale, type, status);
michael@0:     // TODO: which errors, if any, should be returned?
michael@0:     if (locRule.length() == 0) {
michael@0:         // Locales with no specific rules (all numbers have the "other" category
michael@0:         //   will return a U_MISSING_RESOURCE_ERROR at this point. This is not
michael@0:         //   an error.
michael@0:         locRule =  UnicodeString(PLURAL_DEFAULT_RULE);
michael@0:         status = U_ZERO_ERROR;
michael@0:     }
michael@0:     PluralRuleParser parser;
michael@0:     parser.parse(locRule, newObj, status);
michael@0:         //  TODO: should rule parse errors be returned, or
michael@0:         //        should we silently use default rules?
michael@0:         //        Original impl used default rules.
michael@0:         //        Ask the question to ICU Core.
michael@0: 
michael@0:     return newObj;
michael@0: }
michael@0: 
michael@0: UnicodeString
michael@0: PluralRules::select(int32_t number) const {
michael@0:     return select(FixedDecimal(number));
michael@0: }
michael@0: 
michael@0: UnicodeString
michael@0: PluralRules::select(double number) const {
michael@0:     return select(FixedDecimal(number));
michael@0: }
michael@0: 
michael@0: UnicodeString
michael@0: PluralRules::select(const FixedDecimal &number) const {
michael@0:     if (mRules == NULL) {
michael@0:         return UnicodeString(TRUE, PLURAL_DEFAULT_RULE, -1);
michael@0:     }
michael@0:     else {
michael@0:         return mRules->select(number);
michael@0:     }
michael@0: }
michael@0: 
michael@0: StringEnumeration*
michael@0: PluralRules::getKeywords(UErrorCode& status) const {
michael@0:     if (U_FAILURE(status))  return NULL;
michael@0:     StringEnumeration* nameEnumerator = new PluralKeywordEnumeration(mRules, status);
michael@0:     if (U_FAILURE(status)) {
michael@0:       delete nameEnumerator;
michael@0:       return NULL;
michael@0:     }
michael@0: 
michael@0:     return nameEnumerator;
michael@0: }
michael@0: 
michael@0: double
michael@0: PluralRules::getUniqueKeywordValue(const UnicodeString& /* keyword */) {
michael@0:   // Not Implemented.
michael@0:   return UPLRULES_NO_UNIQUE_VALUE;
michael@0: }
michael@0: 
michael@0: int32_t
michael@0: PluralRules::getAllKeywordValues(const UnicodeString & /* keyword */, double * /* dest */,
michael@0:                                  int32_t /* destCapacity */, UErrorCode& error) {
michael@0:     error = U_UNSUPPORTED_ERROR;
michael@0:     return 0;
michael@0: }
michael@0: 
michael@0:     
michael@0: static double scaleForInt(double d) {
michael@0:     double scale = 1.0;
michael@0:     while (d != floor(d)) {
michael@0:         d = d * 10.0;
michael@0:         scale = scale * 10.0;
michael@0:     }
michael@0:     return scale;
michael@0: }
michael@0: 
michael@0: static int32_t
michael@0: getSamplesFromString(const UnicodeString &samples, double *dest,
michael@0:                         int32_t destCapacity, UErrorCode& status) {
michael@0:     int32_t sampleCount = 0;
michael@0:     int32_t sampleStartIdx = 0;
michael@0:     int32_t sampleEndIdx = 0;
michael@0: 
michael@0:     //std::string ss;  // TODO: debugging.
michael@0:     // std::cout << "PluralRules::getSamples(), samples = \"" << samples.toUTF8String(ss) << "\"\n";
michael@0:     for (sampleCount = 0; sampleCount < destCapacity && sampleStartIdx < samples.length(); ) {
michael@0:         sampleEndIdx = samples.indexOf(COMMA, sampleStartIdx);
michael@0:         if (sampleEndIdx == -1) {
michael@0:             sampleEndIdx = samples.length();
michael@0:         }
michael@0:         const UnicodeString &sampleRange = samples.tempSubStringBetween(sampleStartIdx, sampleEndIdx);
michael@0:         // ss.erase();
michael@0:         // std::cout << "PluralRules::getSamples(), samplesRange = \"" << sampleRange.toUTF8String(ss) << "\"\n";
michael@0:         int32_t tildeIndex = sampleRange.indexOf(TILDE);
michael@0:         if (tildeIndex < 0) {
michael@0:             FixedDecimal fixed(sampleRange, status);
michael@0:             double sampleValue = fixed.source;
michael@0:             if (fixed.visibleDecimalDigitCount == 0 || sampleValue != floor(sampleValue)) {
michael@0:                 dest[sampleCount++] = sampleValue;
michael@0:             }
michael@0:         } else {
michael@0:             
michael@0:             FixedDecimal fixedLo(sampleRange.tempSubStringBetween(0, tildeIndex), status);
michael@0:             FixedDecimal fixedHi(sampleRange.tempSubStringBetween(tildeIndex+1), status);
michael@0:             double rangeLo = fixedLo.source;
michael@0:             double rangeHi = fixedHi.source;
michael@0:             if (U_FAILURE(status)) {
michael@0:                 break;
michael@0:             }
michael@0:             if (rangeHi < rangeLo) {
michael@0:                 status = U_INVALID_FORMAT_ERROR;
michael@0:                 break;
michael@0:             }
michael@0: 
michael@0:             // For ranges of samples with fraction decimal digits, scale the number up so that we
michael@0:             //   are adding one in the units place. Avoids roundoffs from repetitive adds of tenths.
michael@0: 
michael@0:             double scale = scaleForInt(rangeLo); 
michael@0:             double t = scaleForInt(rangeHi);
michael@0:             if (t > scale) {
michael@0:                 scale = t;
michael@0:             }
michael@0:             rangeLo *= scale;
michael@0:             rangeHi *= scale;
michael@0:             for (double n=rangeLo; n<=rangeHi; n+=1) {
michael@0:                 // Hack Alert: don't return any decimal samples with integer values that
michael@0:                 //    originated from a format with trailing decimals.
michael@0:                 //    This API is returning doubles, which can't distinguish having displayed
michael@0:                 //    zeros to the right of the decimal.
michael@0:                 //    This results in test failures with values mapping back to a different keyword.
michael@0:                 double sampleValue = n/scale;
michael@0:                 if (!(sampleValue == floor(sampleValue) && fixedLo.visibleDecimalDigitCount > 0)) {
michael@0:                     dest[sampleCount++] = sampleValue;
michael@0:                 }
michael@0:                 if (sampleCount >= destCapacity) {
michael@0:                     break;
michael@0:                 }
michael@0:             }
michael@0:         }
michael@0:         sampleStartIdx = sampleEndIdx + 1;
michael@0:     }
michael@0:     return sampleCount;
michael@0: }
michael@0: 
michael@0: 
michael@0: int32_t
michael@0: PluralRules::getSamples(const UnicodeString &keyword, double *dest,
michael@0:                         int32_t destCapacity, UErrorCode& status) {
michael@0:     RuleChain *rc = rulesForKeyword(keyword);
michael@0:     if (rc == NULL || destCapacity == 0 || U_FAILURE(status)) {
michael@0:         return 0;
michael@0:     }
michael@0:     int32_t numSamples = getSamplesFromString(rc->fIntegerSamples, dest, destCapacity, status);
michael@0:     if (numSamples == 0) { 
michael@0:         numSamples = getSamplesFromString(rc->fDecimalSamples, dest, destCapacity, status);
michael@0:     }
michael@0:     return numSamples;
michael@0: }
michael@0:     
michael@0: 
michael@0: RuleChain *PluralRules::rulesForKeyword(const UnicodeString &keyword) const {
michael@0:     RuleChain *rc;
michael@0:     for (rc = mRules; rc != NULL; rc = rc->fNext) {
michael@0:         if (rc->fKeyword == keyword) {
michael@0:             break;
michael@0:         }
michael@0:     }
michael@0:     return rc;
michael@0: }
michael@0: 
michael@0: 
michael@0: UBool
michael@0: PluralRules::isKeyword(const UnicodeString& keyword) const {
michael@0:     if (0 == keyword.compare(PLURAL_KEYWORD_OTHER, 5)) {
michael@0:         return true;
michael@0:     }
michael@0:     return rulesForKeyword(keyword) != NULL;
michael@0: }
michael@0: 
michael@0: UnicodeString
michael@0: PluralRules::getKeywordOther() const {
michael@0:     return UnicodeString(TRUE, PLURAL_KEYWORD_OTHER, 5);
michael@0: }
michael@0: 
michael@0: UBool
michael@0: PluralRules::operator==(const PluralRules& other) const  {
michael@0:     const UnicodeString *ptrKeyword;
michael@0:     UErrorCode status= U_ZERO_ERROR;
michael@0: 
michael@0:     if ( this == &other ) {
michael@0:         return TRUE;
michael@0:     }
michael@0:     LocalPointer<StringEnumeration> myKeywordList(getKeywords(status));
michael@0:     LocalPointer<StringEnumeration> otherKeywordList(other.getKeywords(status));
michael@0:     if (U_FAILURE(status)) {
michael@0:         return FALSE;
michael@0:     }
michael@0: 
michael@0:     if (myKeywordList->count(status)!=otherKeywordList->count(status)) {
michael@0:         return FALSE;
michael@0:     }
michael@0:     myKeywordList->reset(status);
michael@0:     while ((ptrKeyword=myKeywordList->snext(status))!=NULL) {
michael@0:         if (!other.isKeyword(*ptrKeyword)) {
michael@0:             return FALSE;
michael@0:         }
michael@0:     }
michael@0:     otherKeywordList->reset(status);
michael@0:     while ((ptrKeyword=otherKeywordList->snext(status))!=NULL) {
michael@0:         if (!this->isKeyword(*ptrKeyword)) {
michael@0:             return FALSE;
michael@0:         }
michael@0:     }
michael@0:     if (U_FAILURE(status)) {
michael@0:         return FALSE;
michael@0:     }
michael@0: 
michael@0:     return TRUE;
michael@0: }
michael@0: 
michael@0: 
michael@0: void
michael@0: PluralRuleParser::parse(const UnicodeString& ruleData, PluralRules *prules, UErrorCode &status)
michael@0: {
michael@0:     if (U_FAILURE(status)) {
michael@0:         return;
michael@0:     }
michael@0:     U_ASSERT(ruleIndex == 0);    // Parsers are good for a single use only!
michael@0:     ruleSrc = &ruleData;
michael@0: 
michael@0:     while (ruleIndex< ruleSrc->length()) {
michael@0:         getNextToken(status);
michael@0:         if (U_FAILURE(status)) {
michael@0:             return;
michael@0:         }
michael@0:         checkSyntax(status);
michael@0:         if (U_FAILURE(status)) {
michael@0:             return;
michael@0:         }
michael@0:         switch (type) {
michael@0:         case tAnd:
michael@0:             U_ASSERT(curAndConstraint != NULL);
michael@0:             curAndConstraint = curAndConstraint->add();
michael@0:             break;
michael@0:         case tOr:
michael@0:             {
michael@0:                 U_ASSERT(currentChain != NULL);
michael@0:                 OrConstraint *orNode=currentChain->ruleHeader;
michael@0:                 while (orNode->next != NULL) {
michael@0:                     orNode = orNode->next;
michael@0:                 }
michael@0:                 orNode->next= new OrConstraint();
michael@0:                 orNode=orNode->next;
michael@0:                 orNode->next=NULL;
michael@0:                 curAndConstraint = orNode->add();
michael@0:             }
michael@0:             break;
michael@0:         case tIs:
michael@0:             U_ASSERT(curAndConstraint != NULL);
michael@0:             U_ASSERT(curAndConstraint->value == -1);
michael@0:             U_ASSERT(curAndConstraint->rangeList == NULL);
michael@0:             break;
michael@0:         case tNot:
michael@0:             U_ASSERT(curAndConstraint != NULL);
michael@0:             curAndConstraint->negated=TRUE;
michael@0:             break;
michael@0: 
michael@0:         case tNotEqual:
michael@0:             curAndConstraint->negated=TRUE;
michael@0:         case tIn:
michael@0:         case tWithin:
michael@0:         case tEqual:
michael@0:             U_ASSERT(curAndConstraint != NULL);
michael@0:             curAndConstraint->rangeList = new UVector32(status);
michael@0:             curAndConstraint->rangeList->addElement(-1, status);  // range Low
michael@0:             curAndConstraint->rangeList->addElement(-1, status);  // range Hi
michael@0:             rangeLowIdx = 0;
michael@0:             rangeHiIdx  = 1;
michael@0:             curAndConstraint->value=PLURAL_RANGE_HIGH;
michael@0:             curAndConstraint->integerOnly = (type != tWithin);
michael@0:             break;
michael@0:         case tNumber:
michael@0:             U_ASSERT(curAndConstraint != NULL);
michael@0:             if ( (curAndConstraint->op==AndConstraint::MOD)&&
michael@0:                  (curAndConstraint->opNum == -1 ) ) {
michael@0:                 curAndConstraint->opNum=getNumberValue(token);
michael@0:             }
michael@0:             else {
michael@0:                 if (curAndConstraint->rangeList == NULL) {
michael@0:                     // this is for an 'is' rule
michael@0:                     curAndConstraint->value = getNumberValue(token);
michael@0:                 } else {
michael@0:                     // this is for an 'in' or 'within' rule
michael@0:                     if (curAndConstraint->rangeList->elementAti(rangeLowIdx) == -1) {
michael@0:                         curAndConstraint->rangeList->setElementAt(getNumberValue(token), rangeLowIdx);
michael@0:                         curAndConstraint->rangeList->setElementAt(getNumberValue(token), rangeHiIdx);
michael@0:                     }
michael@0:                     else {
michael@0:                         curAndConstraint->rangeList->setElementAt(getNumberValue(token), rangeHiIdx);
michael@0:                         if (curAndConstraint->rangeList->elementAti(rangeLowIdx) > 
michael@0:                                 curAndConstraint->rangeList->elementAti(rangeHiIdx)) {
michael@0:                             // Range Lower bound > Range Upper bound.
michael@0:                             // U_UNEXPECTED_TOKEN seems a little funny, but it is consistently
michael@0:                             // used for all plural rule parse errors.
michael@0:                             status = U_UNEXPECTED_TOKEN;
michael@0:                             break;
michael@0:                         }
michael@0:                     }
michael@0:                 }
michael@0:             }
michael@0:             break;
michael@0:         case tComma:
michael@0:             // TODO: rule syntax checking is inadequate, can happen with badly formed rules.
michael@0:             //       Catch cases like "n mod 10, is 1" here instead.
michael@0:             if (curAndConstraint == NULL || curAndConstraint->rangeList == NULL) {
michael@0:                 status = U_UNEXPECTED_TOKEN;
michael@0:                 break;
michael@0:             }
michael@0:             U_ASSERT(curAndConstraint->rangeList->size() >= 2);
michael@0:             rangeLowIdx = curAndConstraint->rangeList->size();
michael@0:             curAndConstraint->rangeList->addElement(-1, status);  // range Low
michael@0:             rangeHiIdx = curAndConstraint->rangeList->size();
michael@0:             curAndConstraint->rangeList->addElement(-1, status);  // range Hi
michael@0:             break;
michael@0:         case tMod:
michael@0:             U_ASSERT(curAndConstraint != NULL);
michael@0:             curAndConstraint->op=AndConstraint::MOD;
michael@0:             break;
michael@0:         case tVariableN:
michael@0:         case tVariableI:
michael@0:         case tVariableF:
michael@0:         case tVariableT:
michael@0:         case tVariableV:
michael@0:             U_ASSERT(curAndConstraint != NULL);
michael@0:             curAndConstraint->digitsType = type;
michael@0:             break;
michael@0:         case tKeyword:
michael@0:             {
michael@0:             RuleChain *newChain = new RuleChain;
michael@0:             if (newChain == NULL) {
michael@0:                 status = U_MEMORY_ALLOCATION_ERROR;
michael@0:                 break;
michael@0:             }
michael@0:             newChain->fKeyword = token;
michael@0:             if (prules->mRules == NULL) {
michael@0:                 prules->mRules = newChain;
michael@0:             } else {
michael@0:                 // The new rule chain goes at the end of the linked list of rule chains,
michael@0:                 //   unless there is an "other" keyword & chain. "other" must remain last.
michael@0:                 RuleChain *insertAfter = prules->mRules;
michael@0:                 while (insertAfter->fNext!=NULL && 
michael@0:                        insertAfter->fNext->fKeyword.compare(PLURAL_KEYWORD_OTHER, 5) != 0 ){
michael@0:                     insertAfter=insertAfter->fNext;
michael@0:                 }
michael@0:                 newChain->fNext = insertAfter->fNext;
michael@0:                 insertAfter->fNext = newChain;
michael@0:             }
michael@0:             OrConstraint *orNode = new OrConstraint();
michael@0:             newChain->ruleHeader = orNode;
michael@0:             curAndConstraint = orNode->add();
michael@0:             currentChain = newChain;
michael@0:             }
michael@0:             break;
michael@0: 
michael@0:         case tInteger:
michael@0:             for (;;) {
michael@0:                 getNextToken(status);
michael@0:                 if (U_FAILURE(status) || type == tSemiColon || type == tEOF || type == tAt) {
michael@0:                     break;
michael@0:                 }
michael@0:                 if (type == tEllipsis) {
michael@0:                     currentChain->fIntegerSamplesUnbounded = TRUE;
michael@0:                     continue;
michael@0:                 }
michael@0:                 currentChain->fIntegerSamples.append(token);
michael@0:             }
michael@0:             break;
michael@0: 
michael@0:         case tDecimal:
michael@0:             for (;;) {
michael@0:                 getNextToken(status);
michael@0:                 if (U_FAILURE(status) || type == tSemiColon || type == tEOF || type == tAt) {
michael@0:                     break;
michael@0:                 }
michael@0:                 if (type == tEllipsis) {
michael@0:                     currentChain->fDecimalSamplesUnbounded = TRUE;
michael@0:                     continue;
michael@0:                 }
michael@0:                 currentChain->fDecimalSamples.append(token);
michael@0:             }
michael@0:             break;
michael@0:                 
michael@0:         default:
michael@0:             break;
michael@0:         }
michael@0:         prevType=type;
michael@0:         if (U_FAILURE(status)) {
michael@0:             break;
michael@0:         }
michael@0:     }
michael@0: }
michael@0: 
michael@0: UnicodeString
michael@0: PluralRules::getRuleFromResource(const Locale& locale, UPluralType type, UErrorCode& errCode) {
michael@0:     UnicodeString emptyStr;
michael@0: 
michael@0:     if (U_FAILURE(errCode)) {
michael@0:         return emptyStr;
michael@0:     }
michael@0:     LocalUResourceBundlePointer rb(ures_openDirect(NULL, "plurals", &errCode));
michael@0:     if(U_FAILURE(errCode)) {
michael@0:         return emptyStr;
michael@0:     }
michael@0:     const char *typeKey;
michael@0:     switch (type) {
michael@0:     case UPLURAL_TYPE_CARDINAL:
michael@0:         typeKey = "locales";
michael@0:         break;
michael@0:     case UPLURAL_TYPE_ORDINAL:
michael@0:         typeKey = "locales_ordinals";
michael@0:         break;
michael@0:     default:
michael@0:         // Must not occur: The caller should have checked for valid types.
michael@0:         errCode = U_ILLEGAL_ARGUMENT_ERROR;
michael@0:         return emptyStr;
michael@0:     }
michael@0:     LocalUResourceBundlePointer locRes(ures_getByKey(rb.getAlias(), typeKey, NULL, &errCode));
michael@0:     if(U_FAILURE(errCode)) {
michael@0:         return emptyStr;
michael@0:     }
michael@0:     int32_t resLen=0;
michael@0:     const char *curLocaleName=locale.getName();
michael@0:     const UChar* s = ures_getStringByKey(locRes.getAlias(), curLocaleName, &resLen, &errCode);
michael@0: 
michael@0:     if (s == NULL) {
michael@0:         // Check parent locales.
michael@0:         UErrorCode status = U_ZERO_ERROR;
michael@0:         char parentLocaleName[ULOC_FULLNAME_CAPACITY];
michael@0:         const char *curLocaleName=locale.getName();
michael@0:         uprv_strcpy(parentLocaleName, curLocaleName);
michael@0: 
michael@0:         while (uloc_getParent(parentLocaleName, parentLocaleName,
michael@0:                                        ULOC_FULLNAME_CAPACITY, &status) > 0) {
michael@0:             resLen=0;
michael@0:             s = ures_getStringByKey(locRes.getAlias(), parentLocaleName, &resLen, &status);
michael@0:             if (s != NULL) {
michael@0:                 errCode = U_ZERO_ERROR;
michael@0:                 break;
michael@0:             }
michael@0:             status = U_ZERO_ERROR;
michael@0:         }
michael@0:     }
michael@0:     if (s==NULL) {
michael@0:         return emptyStr;
michael@0:     }
michael@0: 
michael@0:     char setKey[256];
michael@0:     u_UCharsToChars(s, setKey, resLen + 1);
michael@0:     // printf("\n PluralRule: %s\n", setKey);
michael@0: 
michael@0:     LocalUResourceBundlePointer ruleRes(ures_getByKey(rb.getAlias(), "rules", NULL, &errCode));
michael@0:     if(U_FAILURE(errCode)) {
michael@0:         return emptyStr;
michael@0:     }
michael@0:     LocalUResourceBundlePointer setRes(ures_getByKey(ruleRes.getAlias(), setKey, NULL, &errCode));
michael@0:     if (U_FAILURE(errCode)) {
michael@0:         return emptyStr;
michael@0:     }
michael@0: 
michael@0:     int32_t numberKeys = ures_getSize(setRes.getAlias());
michael@0:     UnicodeString result;
michael@0:     const char *key=NULL;
michael@0:     for(int32_t i=0; i<numberKeys; ++i) {   // Keys are zero, one, few, ...
michael@0:         UnicodeString rules = ures_getNextUnicodeString(setRes.getAlias(), &key, &errCode);
michael@0:         UnicodeString uKey(key, -1, US_INV);
michael@0:         result.append(uKey);
michael@0:         result.append(COLON);
michael@0:         result.append(rules);
michael@0:         result.append(SEMI_COLON);
michael@0:     }
michael@0:     return result;
michael@0: }
michael@0: 
michael@0: 
michael@0: UnicodeString
michael@0: PluralRules::getRules() const {
michael@0:     UnicodeString rules;
michael@0:     if (mRules != NULL) {
michael@0:         mRules->dumpRules(rules);
michael@0:     }
michael@0:     return rules;
michael@0: }
michael@0: 
michael@0: 
michael@0: AndConstraint::AndConstraint() {
michael@0:     op = AndConstraint::NONE;
michael@0:     opNum=-1;
michael@0:     value = -1;
michael@0:     rangeList = NULL;
michael@0:     negated = FALSE;
michael@0:     integerOnly = FALSE;
michael@0:     digitsType = none;
michael@0:     next=NULL;
michael@0: }
michael@0: 
michael@0: 
michael@0: AndConstraint::AndConstraint(const AndConstraint& other) {
michael@0:     this->op = other.op;
michael@0:     this->opNum=other.opNum;
michael@0:     this->value=other.value;
michael@0:     this->rangeList=NULL;
michael@0:     if (other.rangeList != NULL) {
michael@0:         UErrorCode status = U_ZERO_ERROR;
michael@0:         this->rangeList = new UVector32(status);
michael@0:         this->rangeList->assign(*other.rangeList, status);
michael@0:     }
michael@0:     this->integerOnly=other.integerOnly;
michael@0:     this->negated=other.negated;
michael@0:     this->digitsType = other.digitsType;
michael@0:     if (other.next==NULL) {
michael@0:         this->next=NULL;
michael@0:     }
michael@0:     else {
michael@0:         this->next = new AndConstraint(*other.next);
michael@0:     }
michael@0: }
michael@0: 
michael@0: AndConstraint::~AndConstraint() {
michael@0:     delete rangeList;
michael@0:     if (next!=NULL) {
michael@0:         delete next;
michael@0:     }
michael@0: }
michael@0: 
michael@0: 
michael@0: UBool
michael@0: AndConstraint::isFulfilled(const FixedDecimal &number) {
michael@0:     UBool result = TRUE;
michael@0:     if (digitsType == none) {
michael@0:         // An empty AndConstraint, created by a rule with a keyword but no following expression.
michael@0:         return TRUE;
michael@0:     }
michael@0:     double n = number.get(digitsType);  // pulls n | i | v | f value for the number.
michael@0:                                         // Will always be positive.
michael@0:                                         // May be non-integer (n option only)
michael@0:     do {
michael@0:         if (integerOnly && n != uprv_floor(n)) {
michael@0:             result = FALSE;
michael@0:             break;
michael@0:         }
michael@0: 
michael@0:         if (op == MOD) {
michael@0:             n = fmod(n, opNum);
michael@0:         }
michael@0:         if (rangeList == NULL) {
michael@0:             result = value == -1 ||    // empty rule
michael@0:                      n == value;       //  'is' rule
michael@0:             break;
michael@0:         }
michael@0:         result = FALSE;                // 'in' or 'within' rule
michael@0:         for (int32_t r=0; r<rangeList->size(); r+=2) {
michael@0:             if (rangeList->elementAti(r) <= n && n <= rangeList->elementAti(r+1)) {
michael@0:                 result = TRUE;
michael@0:                 break;
michael@0:             }
michael@0:         }
michael@0:     } while (FALSE);
michael@0: 
michael@0:     if (negated) {
michael@0:         result = !result;
michael@0:     }
michael@0:     return result;
michael@0: }
michael@0: 
michael@0: 
michael@0: AndConstraint*
michael@0: AndConstraint::add()
michael@0: {
michael@0:     this->next = new AndConstraint();
michael@0:     return this->next;
michael@0: }
michael@0: 
michael@0: OrConstraint::OrConstraint() {
michael@0:     childNode=NULL;
michael@0:     next=NULL;
michael@0: }
michael@0: 
michael@0: OrConstraint::OrConstraint(const OrConstraint& other) {
michael@0:     if ( other.childNode == NULL ) {
michael@0:         this->childNode = NULL;
michael@0:     }
michael@0:     else {
michael@0:         this->childNode = new AndConstraint(*(other.childNode));
michael@0:     }
michael@0:     if (other.next == NULL ) {
michael@0:         this->next = NULL;
michael@0:     }
michael@0:     else {
michael@0:         this->next = new OrConstraint(*(other.next));
michael@0:     }
michael@0: }
michael@0: 
michael@0: OrConstraint::~OrConstraint() {
michael@0:     if (childNode!=NULL) {
michael@0:         delete childNode;
michael@0:     }
michael@0:     if (next!=NULL) {
michael@0:         delete next;
michael@0:     }
michael@0: }
michael@0: 
michael@0: AndConstraint*
michael@0: OrConstraint::add()
michael@0: {
michael@0:     OrConstraint *curOrConstraint=this;
michael@0:     {
michael@0:         while (curOrConstraint->next!=NULL) {
michael@0:             curOrConstraint = curOrConstraint->next;
michael@0:         }
michael@0:         U_ASSERT(curOrConstraint->childNode == NULL);
michael@0:         curOrConstraint->childNode = new AndConstraint();
michael@0:     }
michael@0:     return curOrConstraint->childNode;
michael@0: }
michael@0: 
michael@0: UBool
michael@0: OrConstraint::isFulfilled(const FixedDecimal &number) {
michael@0:     OrConstraint* orRule=this;
michael@0:     UBool result=FALSE;
michael@0: 
michael@0:     while (orRule!=NULL && !result) {
michael@0:         result=TRUE;
michael@0:         AndConstraint* andRule = orRule->childNode;
michael@0:         while (andRule!=NULL && result) {
michael@0:             result = andRule->isFulfilled(number);
michael@0:             andRule=andRule->next;
michael@0:         }
michael@0:         orRule = orRule->next;
michael@0:     }
michael@0: 
michael@0:     return result;
michael@0: }
michael@0: 
michael@0: 
michael@0: RuleChain::RuleChain(): fKeyword(), fNext(NULL), ruleHeader(NULL), fDecimalSamples(), fIntegerSamples(), 
michael@0:                         fDecimalSamplesUnbounded(FALSE), fIntegerSamplesUnbounded(FALSE) {
michael@0: }
michael@0: 
michael@0: RuleChain::RuleChain(const RuleChain& other) : 
michael@0:         fKeyword(other.fKeyword), fNext(NULL), ruleHeader(NULL), fDecimalSamples(other.fDecimalSamples),
michael@0:         fIntegerSamples(other.fIntegerSamples), fDecimalSamplesUnbounded(other.fDecimalSamplesUnbounded), 
michael@0:         fIntegerSamplesUnbounded(other.fIntegerSamplesUnbounded) {
michael@0:     if (other.ruleHeader != NULL) {
michael@0:         this->ruleHeader = new OrConstraint(*(other.ruleHeader));
michael@0:     }
michael@0:     if (other.fNext != NULL ) {
michael@0:         this->fNext = new RuleChain(*other.fNext);
michael@0:     }
michael@0: }
michael@0: 
michael@0: RuleChain::~RuleChain() {
michael@0:     delete fNext;
michael@0:     delete ruleHeader;
michael@0: }
michael@0: 
michael@0: 
michael@0: UnicodeString
michael@0: RuleChain::select(const FixedDecimal &number) const {
michael@0:     if (!number.isNanOrInfinity) {
michael@0:         for (const RuleChain *rules = this; rules != NULL; rules = rules->fNext) {
michael@0:              if (rules->ruleHeader->isFulfilled(number)) {
michael@0:                  return rules->fKeyword;
michael@0:              }
michael@0:         }
michael@0:     }
michael@0:     return UnicodeString(TRUE, PLURAL_KEYWORD_OTHER, 5);
michael@0: }
michael@0: 
michael@0: static UnicodeString tokenString(tokenType tok) {
michael@0:     UnicodeString s;
michael@0:     switch (tok) {
michael@0:       case tVariableN:
michael@0:         s.append(LOW_N); break;
michael@0:       case tVariableI:
michael@0:         s.append(LOW_I); break;
michael@0:       case tVariableF:
michael@0:         s.append(LOW_F); break;
michael@0:       case tVariableV:
michael@0:         s.append(LOW_V); break;
michael@0:       case tVariableT:
michael@0:         s.append(LOW_T); break;
michael@0:       default:
michael@0:         s.append(TILDE);
michael@0:     }
michael@0:     return s;
michael@0: }
michael@0: 
michael@0: void
michael@0: RuleChain::dumpRules(UnicodeString& result) {
michael@0:     UChar digitString[16];
michael@0: 
michael@0:     if ( ruleHeader != NULL ) {
michael@0:         result +=  fKeyword;
michael@0:         result += COLON;
michael@0:         result += SPACE;
michael@0:         OrConstraint* orRule=ruleHeader;
michael@0:         while ( orRule != NULL ) {
michael@0:             AndConstraint* andRule=orRule->childNode;
michael@0:             while ( andRule != NULL ) {
michael@0:                 if ((andRule->op==AndConstraint::NONE) &&  (andRule->rangeList==NULL) && (andRule->value == -1)) {
michael@0:                     // Empty Rules.
michael@0:                 } else if ( (andRule->op==AndConstraint::NONE) && (andRule->rangeList==NULL) ) {
michael@0:                     result += tokenString(andRule->digitsType);
michael@0:                     result += UNICODE_STRING_SIMPLE(" is ");
michael@0:                     if (andRule->negated) {
michael@0:                         result += UNICODE_STRING_SIMPLE("not ");
michael@0:                     }
michael@0:                     uprv_itou(digitString,16, andRule->value,10,0);
michael@0:                     result += UnicodeString(digitString);
michael@0:                 }
michael@0:                 else {
michael@0:                     result += tokenString(andRule->digitsType);
michael@0:                     result += SPACE;
michael@0:                     if (andRule->op==AndConstraint::MOD) {
michael@0:                         result += UNICODE_STRING_SIMPLE("mod ");
michael@0:                         uprv_itou(digitString,16, andRule->opNum,10,0);
michael@0:                         result += UnicodeString(digitString);
michael@0:                     }
michael@0:                     if (andRule->rangeList==NULL) {
michael@0:                         if (andRule->negated) {
michael@0:                             result += UNICODE_STRING_SIMPLE(" is not ");
michael@0:                             uprv_itou(digitString,16, andRule->value,10,0);
michael@0:                             result += UnicodeString(digitString);
michael@0:                         }
michael@0:                         else {
michael@0:                             result += UNICODE_STRING_SIMPLE(" is ");
michael@0:                             uprv_itou(digitString,16, andRule->value,10,0);
michael@0:                             result += UnicodeString(digitString);
michael@0:                         }
michael@0:                     }
michael@0:                     else {
michael@0:                         if (andRule->negated) {
michael@0:                             if ( andRule->integerOnly ) {
michael@0:                                 result += UNICODE_STRING_SIMPLE(" not in ");
michael@0:                             }
michael@0:                             else {
michael@0:                                 result += UNICODE_STRING_SIMPLE(" not within ");
michael@0:                             }
michael@0:                         }
michael@0:                         else {
michael@0:                             if ( andRule->integerOnly ) {
michael@0:                                 result += UNICODE_STRING_SIMPLE(" in ");
michael@0:                             }
michael@0:                             else {
michael@0:                                 result += UNICODE_STRING_SIMPLE(" within ");
michael@0:                             }
michael@0:                         }
michael@0:                         for (int32_t r=0; r<andRule->rangeList->size(); r+=2) {
michael@0:                             int32_t rangeLo = andRule->rangeList->elementAti(r);
michael@0:                             int32_t rangeHi = andRule->rangeList->elementAti(r+1);
michael@0:                             uprv_itou(digitString,16, rangeLo, 10, 0);
michael@0:                             result += UnicodeString(digitString);
michael@0:                             result += UNICODE_STRING_SIMPLE("..");
michael@0:                             uprv_itou(digitString,16, rangeHi, 10,0);
michael@0:                             result += UnicodeString(digitString);
michael@0:                             if (r+2 < andRule->rangeList->size()) {
michael@0:                                 result += UNICODE_STRING_SIMPLE(", ");
michael@0:                             }
michael@0:                         }
michael@0:                     }
michael@0:                 }
michael@0:                 if ( (andRule=andRule->next) != NULL) {
michael@0:                     result += UNICODE_STRING_SIMPLE(" and ");
michael@0:                 }
michael@0:             }
michael@0:             if ( (orRule = orRule->next) != NULL ) {
michael@0:                 result += UNICODE_STRING_SIMPLE(" or ");
michael@0:             }
michael@0:         }
michael@0:     }
michael@0:     if ( fNext != NULL ) {
michael@0:         result += UNICODE_STRING_SIMPLE("; ");
michael@0:         fNext->dumpRules(result);
michael@0:     }
michael@0: }
michael@0: 
michael@0: 
michael@0: UErrorCode
michael@0: RuleChain::getKeywords(int32_t capacityOfKeywords, UnicodeString* keywords, int32_t& arraySize) const {
michael@0:     if ( arraySize < capacityOfKeywords-1 ) {
michael@0:         keywords[arraySize++]=fKeyword;
michael@0:     }
michael@0:     else {
michael@0:         return U_BUFFER_OVERFLOW_ERROR;
michael@0:     }
michael@0: 
michael@0:     if ( fNext != NULL ) {
michael@0:         return fNext->getKeywords(capacityOfKeywords, keywords, arraySize);
michael@0:     }
michael@0:     else {
michael@0:         return U_ZERO_ERROR;
michael@0:     }
michael@0: }
michael@0: 
michael@0: UBool
michael@0: RuleChain::isKeyword(const UnicodeString& keywordParam) const {
michael@0:     if ( fKeyword == keywordParam ) {
michael@0:         return TRUE;
michael@0:     }
michael@0: 
michael@0:     if ( fNext != NULL ) {
michael@0:         return fNext->isKeyword(keywordParam);
michael@0:     }
michael@0:     else {
michael@0:         return FALSE;
michael@0:     }
michael@0: }
michael@0: 
michael@0: 
michael@0: PluralRuleParser::PluralRuleParser() : 
michael@0:         ruleIndex(0), token(), type(none), prevType(none), 
michael@0:         curAndConstraint(NULL), currentChain(NULL), rangeLowIdx(-1), rangeHiIdx(-1)  
michael@0: {
michael@0: }
michael@0: 
michael@0: PluralRuleParser::~PluralRuleParser() {
michael@0: }
michael@0: 
michael@0: 
michael@0: int32_t
michael@0: PluralRuleParser::getNumberValue(const UnicodeString& token) {
michael@0:     int32_t i;
michael@0:     char digits[128];
michael@0: 
michael@0:     i = token.extract(0, token.length(), digits, ARRAY_SIZE(digits), US_INV);
michael@0:     digits[i]='\0';
michael@0: 
michael@0:     return((int32_t)atoi(digits));
michael@0: }
michael@0: 
michael@0: 
michael@0: void
michael@0: PluralRuleParser::checkSyntax(UErrorCode &status)
michael@0: {
michael@0:     if (U_FAILURE(status)) {
michael@0:         return;
michael@0:     }
michael@0:     if (!(prevType==none || prevType==tSemiColon)) {
michael@0:         type = getKeyType(token, type);  // Switch token type from tKeyword if we scanned a reserved word,
michael@0:                                                //   and we are not at the start of a rule, where a
michael@0:                                                //   keyword is expected.
michael@0:     }
michael@0: 
michael@0:     switch(prevType) {
michael@0:     case none:
michael@0:     case tSemiColon:
michael@0:         if (type!=tKeyword && type != tEOF) {
michael@0:             status = U_UNEXPECTED_TOKEN;
michael@0:         }
michael@0:         break;
michael@0:     case tVariableN:
michael@0:     case tVariableI:
michael@0:     case tVariableF:
michael@0:     case tVariableT:
michael@0:     case tVariableV:
michael@0:         if (type != tIs && type != tMod && type != tIn &&
michael@0:             type != tNot && type != tWithin && type != tEqual && type != tNotEqual) {
michael@0:             status = U_UNEXPECTED_TOKEN;
michael@0:         }
michael@0:         break;
michael@0:     case tKeyword:
michael@0:         if (type != tColon) {
michael@0:             status = U_UNEXPECTED_TOKEN;
michael@0:         }
michael@0:         break;
michael@0:     case tColon:
michael@0:         if (!(type == tVariableN ||
michael@0:               type == tVariableI ||
michael@0:               type == tVariableF ||
michael@0:               type == tVariableT ||
michael@0:               type == tVariableV ||
michael@0:               type == tAt)) {
michael@0:             status = U_UNEXPECTED_TOKEN;
michael@0:         }
michael@0:         break;
michael@0:     case tIs:
michael@0:         if ( type != tNumber && type != tNot) {
michael@0:             status = U_UNEXPECTED_TOKEN;
michael@0:         }
michael@0:         break;
michael@0:     case tNot:
michael@0:         if (type != tNumber && type != tIn && type != tWithin) {
michael@0:             status = U_UNEXPECTED_TOKEN;
michael@0:         }
michael@0:         break;
michael@0:     case tMod:
michael@0:     case tDot2:
michael@0:     case tIn:
michael@0:     case tWithin:
michael@0:     case tEqual:
michael@0:     case tNotEqual:
michael@0:         if (type != tNumber) {
michael@0:             status = U_UNEXPECTED_TOKEN;
michael@0:         }
michael@0:         break;
michael@0:     case tAnd:
michael@0:     case tOr:
michael@0:         if ( type != tVariableN &&
michael@0:              type != tVariableI &&
michael@0:              type != tVariableF &&
michael@0:              type != tVariableT &&
michael@0:              type != tVariableV) {
michael@0:             status = U_UNEXPECTED_TOKEN;
michael@0:         }
michael@0:         break;
michael@0:     case tComma:
michael@0:         if (type != tNumber) {
michael@0:             status = U_UNEXPECTED_TOKEN;
michael@0:         }
michael@0:         break;
michael@0:     case tNumber:
michael@0:         if (type != tDot2  && type != tSemiColon && type != tIs       && type != tNot    &&
michael@0:             type != tIn    && type != tEqual     && type != tNotEqual && type != tWithin && 
michael@0:             type != tAnd   && type != tOr        && type != tComma    && type != tAt     && 
michael@0:             type != tEOF)
michael@0:         {
michael@0:             status = U_UNEXPECTED_TOKEN;
michael@0:         }
michael@0:         // TODO: a comma following a number that is not part of a range will be allowed.
michael@0:         //       It's not the only case of this sort of thing. Parser needs a re-write.
michael@0:         break;
michael@0:     case tAt:
michael@0:         if (type != tDecimal && type != tInteger) {
michael@0:             status = U_UNEXPECTED_TOKEN;
michael@0:         }
michael@0:         break;
michael@0:     default:
michael@0:         status = U_UNEXPECTED_TOKEN;
michael@0:         break;
michael@0:     }
michael@0: }
michael@0: 
michael@0: 
michael@0: /*
michael@0:  *  Scan the next token from the input rules.
michael@0:  *     rules and returned token type are in the parser state variables.
michael@0:  */
michael@0: void
michael@0: PluralRuleParser::getNextToken(UErrorCode &status)
michael@0: {
michael@0:     if (U_FAILURE(status)) {
michael@0:         return;
michael@0:     }
michael@0: 
michael@0:     UChar ch;
michael@0:     while (ruleIndex < ruleSrc->length()) {
michael@0:         ch = ruleSrc->charAt(ruleIndex);
michael@0:         type = charType(ch);
michael@0:         if (type != tSpace) {
michael@0:             break;
michael@0:         }
michael@0:         ++(ruleIndex);
michael@0:     }
michael@0:     if (ruleIndex >= ruleSrc->length()) {
michael@0:         type = tEOF;
michael@0:         return;
michael@0:     }
michael@0:     int32_t curIndex= ruleIndex;
michael@0:         
michael@0:     switch (type) {
michael@0:       case tColon:
michael@0:       case tSemiColon:
michael@0:       case tComma:
michael@0:       case tEllipsis:
michael@0:       case tTilde:   // scanned '~'
michael@0:       case tAt:      // scanned '@'
michael@0:       case tEqual:   // scanned '='
michael@0:       case tMod:     // scanned '%'
michael@0:         // Single character tokens.
michael@0:         ++curIndex;
michael@0:         break;
michael@0: 
michael@0:       case tNotEqual:  // scanned '!'
michael@0:         if (ruleSrc->charAt(curIndex+1) == EQUALS) {
michael@0:             curIndex += 2;
michael@0:         } else {
michael@0:             type = none;
michael@0:             curIndex += 1;
michael@0:         }
michael@0:         break;
michael@0: 
michael@0:       case tKeyword:
michael@0:          while (type == tKeyword && ++curIndex < ruleSrc->length()) {
michael@0:              ch = ruleSrc->charAt(curIndex);
michael@0:              type = charType(ch);
michael@0:          }
michael@0:          type = tKeyword;
michael@0:          break;
michael@0: 
michael@0:       case tNumber:
michael@0:          while (type == tNumber && ++curIndex < ruleSrc->length()) {
michael@0:              ch = ruleSrc->charAt(curIndex);
michael@0:              type = charType(ch);
michael@0:          }
michael@0:          type = tNumber;
michael@0:          break;
michael@0: 
michael@0:        case tDot:
michael@0:          // We could be looking at either ".." in a range, or "..." at the end of a sample.
michael@0:          if (curIndex+1 >= ruleSrc->length() || ruleSrc->charAt(curIndex+1) != DOT) {
michael@0:              ++curIndex;
michael@0:              break; // Single dot
michael@0:          }
michael@0:          if (curIndex+2 >= ruleSrc->length() || ruleSrc->charAt(curIndex+2) != DOT) {
michael@0:              curIndex += 2;
michael@0:              type = tDot2;
michael@0:              break; // double dot
michael@0:          }
michael@0:          type = tEllipsis;
michael@0:          curIndex += 3;
michael@0:          break;     // triple dot
michael@0: 
michael@0:        default:
michael@0:          status = U_UNEXPECTED_TOKEN;
michael@0:          ++curIndex;
michael@0:          break;
michael@0:     }
michael@0: 
michael@0:     U_ASSERT(ruleIndex <= ruleSrc->length());
michael@0:     U_ASSERT(curIndex <= ruleSrc->length());
michael@0:     token=UnicodeString(*ruleSrc, ruleIndex, curIndex-ruleIndex);
michael@0:     ruleIndex = curIndex;
michael@0: }
michael@0: 
michael@0: tokenType
michael@0: PluralRuleParser::charType(UChar ch) {
michael@0:     if ((ch>=U_ZERO) && (ch<=U_NINE)) {
michael@0:         return tNumber;
michael@0:     }
michael@0:     if (ch>=LOW_A && ch<=LOW_Z) {
michael@0:         return tKeyword;
michael@0:     }
michael@0:     switch (ch) {
michael@0:     case COLON:
michael@0:         return tColon;
michael@0:     case SPACE:
michael@0:         return tSpace;
michael@0:     case SEMI_COLON:
michael@0:         return tSemiColon;
michael@0:     case DOT:
michael@0:         return tDot;
michael@0:     case COMMA:
michael@0:         return tComma;
michael@0:     case EXCLAMATION:
michael@0:         return tNotEqual;
michael@0:     case EQUALS:
michael@0:         return tEqual;
michael@0:     case PERCENT_SIGN:
michael@0:         return tMod;
michael@0:     case AT:
michael@0:         return tAt;
michael@0:     case ELLIPSIS:
michael@0:         return tEllipsis;
michael@0:     case TILDE:
michael@0:         return tTilde;
michael@0:     default :
michael@0:         return none;
michael@0:     }
michael@0: }
michael@0: 
michael@0: 
michael@0: //  Set token type for reserved words in the Plural Rule syntax.
michael@0: 
michael@0: tokenType 
michael@0: PluralRuleParser::getKeyType(const UnicodeString &token, tokenType keyType)
michael@0: {
michael@0:     if (keyType != tKeyword) {
michael@0:         return keyType;
michael@0:     }
michael@0: 
michael@0:     if (0 == token.compare(PK_VAR_N, 1)) {
michael@0:         keyType = tVariableN;
michael@0:     } else if (0 == token.compare(PK_VAR_I, 1)) {
michael@0:         keyType = tVariableI;
michael@0:     } else if (0 == token.compare(PK_VAR_F, 1)) {
michael@0:         keyType = tVariableF;
michael@0:     } else if (0 == token.compare(PK_VAR_T, 1)) {
michael@0:         keyType = tVariableT;
michael@0:     } else if (0 == token.compare(PK_VAR_V, 1)) {
michael@0:         keyType = tVariableV;
michael@0:     } else if (0 == token.compare(PK_IS, 2)) {
michael@0:         keyType = tIs;
michael@0:     } else if (0 == token.compare(PK_AND, 3)) {
michael@0:         keyType = tAnd;
michael@0:     } else if (0 == token.compare(PK_IN, 2)) {
michael@0:         keyType = tIn;
michael@0:     } else if (0 == token.compare(PK_WITHIN, 6)) {
michael@0:         keyType = tWithin;
michael@0:     } else if (0 == token.compare(PK_NOT, 3)) {
michael@0:         keyType = tNot;
michael@0:     } else if (0 == token.compare(PK_MOD, 3)) {
michael@0:         keyType = tMod;
michael@0:     } else if (0 == token.compare(PK_OR, 2)) {
michael@0:         keyType = tOr;
michael@0:     } else if (0 == token.compare(PK_DECIMAL, 7)) {
michael@0:         keyType = tDecimal;
michael@0:     } else if (0 == token.compare(PK_INTEGER, 7)) {
michael@0:         keyType = tInteger;
michael@0:     }
michael@0:     return keyType;
michael@0: }
michael@0: 
michael@0: 
michael@0: PluralKeywordEnumeration::PluralKeywordEnumeration(RuleChain *header, UErrorCode& status)
michael@0:         : pos(0), fKeywordNames(status) {
michael@0:     if (U_FAILURE(status)) {
michael@0:         return;
michael@0:     }
michael@0:     fKeywordNames.setDeleter(uprv_deleteUObject);
michael@0:     UBool  addKeywordOther=TRUE;
michael@0:     RuleChain *node=header;
michael@0:     while(node!=NULL) {
michael@0:         fKeywordNames.addElement(new UnicodeString(node->fKeyword), status);
michael@0:         if (U_FAILURE(status)) {
michael@0:             return;
michael@0:         }
michael@0:         if (0 == node->fKeyword.compare(PLURAL_KEYWORD_OTHER, 5)) {
michael@0:             addKeywordOther= FALSE;
michael@0:         }
michael@0:         node=node->fNext;
michael@0:     }
michael@0: 
michael@0:     if (addKeywordOther) {
michael@0:         fKeywordNames.addElement(new UnicodeString(PLURAL_KEYWORD_OTHER), status);
michael@0:     }
michael@0: }
michael@0: 
michael@0: const UnicodeString*
michael@0: PluralKeywordEnumeration::snext(UErrorCode& status) {
michael@0:     if (U_SUCCESS(status) && pos < fKeywordNames.size()) {
michael@0:         return (const UnicodeString*)fKeywordNames.elementAt(pos++);
michael@0:     }
michael@0:     return NULL;
michael@0: }
michael@0: 
michael@0: void
michael@0: PluralKeywordEnumeration::reset(UErrorCode& /*status*/) {
michael@0:     pos=0;
michael@0: }
michael@0: 
michael@0: int32_t
michael@0: PluralKeywordEnumeration::count(UErrorCode& /*status*/) const {
michael@0:        return fKeywordNames.size();
michael@0: }
michael@0: 
michael@0: PluralKeywordEnumeration::~PluralKeywordEnumeration() {
michael@0: }
michael@0: 
michael@0: 
michael@0: 
michael@0: FixedDecimal::FixedDecimal(double n, int32_t v, int64_t f) {
michael@0:     init(n, v, f);
michael@0:     // check values. TODO make into unit test.
michael@0:     //            
michael@0:     //            long visiblePower = (int) Math.pow(10, v);
michael@0:     //            if (decimalDigits > visiblePower) {
michael@0:     //                throw new IllegalArgumentException();
michael@0:     //            }
michael@0:     //            double fraction = intValue + (decimalDigits / (double) visiblePower);
michael@0:     //            if (fraction != source) {
michael@0:     //                double diff = Math.abs(fraction - source)/(Math.abs(fraction) + Math.abs(source));
michael@0:     //                if (diff > 0.00000001d) {
michael@0:     //                    throw new IllegalArgumentException();
michael@0:     //                }
michael@0:     //            }
michael@0: }
michael@0: 
michael@0: FixedDecimal::FixedDecimal(double n, int32_t v) {
michael@0:     // Ugly, but for samples we don't care.
michael@0:     init(n, v, getFractionalDigits(n, v));
michael@0: }
michael@0: 
michael@0: FixedDecimal::FixedDecimal(double n) {
michael@0:     init(n);
michael@0: }
michael@0: 
michael@0: FixedDecimal::FixedDecimal() {
michael@0:     init(0, 0, 0);
michael@0: }
michael@0: 
michael@0: 
michael@0: // Create a FixedDecimal from a UnicodeString containing a number.
michael@0: //    Inefficient, but only used for samples, so simplicity trumps efficiency.
michael@0: 
michael@0: FixedDecimal::FixedDecimal(const UnicodeString &num, UErrorCode &status) {
michael@0:     CharString cs;
michael@0:     cs.appendInvariantChars(num, status);
michael@0:     DigitList dl;
michael@0:     dl.set(cs.toStringPiece(), status);
michael@0:     if (U_FAILURE(status)) {
michael@0:         init(0, 0, 0);
michael@0:         return;
michael@0:     }
michael@0:     int32_t decimalPoint = num.indexOf(DOT);
michael@0:     double n = dl.getDouble();
michael@0:     if (decimalPoint == -1) {
michael@0:         init(n, 0, 0);
michael@0:     } else {
michael@0:         int32_t v = num.length() - decimalPoint - 1;
michael@0:         init(n, v, getFractionalDigits(n, v));
michael@0:     }
michael@0: }
michael@0: 
michael@0: 
michael@0: FixedDecimal::FixedDecimal(const FixedDecimal &other) {
michael@0:     source = other.source;
michael@0:     visibleDecimalDigitCount = other.visibleDecimalDigitCount;
michael@0:     decimalDigits = other.decimalDigits;
michael@0:     decimalDigitsWithoutTrailingZeros = other.decimalDigitsWithoutTrailingZeros;
michael@0:     intValue = other.intValue;
michael@0:     hasIntegerValue = other.hasIntegerValue;
michael@0:     isNegative = other.isNegative;
michael@0:     isNanOrInfinity = other.isNanOrInfinity;
michael@0: }
michael@0: 
michael@0: 
michael@0: void FixedDecimal::init(double n) {
michael@0:     int32_t numFractionDigits = decimals(n);
michael@0:     init(n, numFractionDigits, getFractionalDigits(n, numFractionDigits));
michael@0: }
michael@0: 
michael@0: 
michael@0: void FixedDecimal::init(double n, int32_t v, int64_t f) {
michael@0:     isNegative = n < 0.0;
michael@0:     source = fabs(n);
michael@0:     isNanOrInfinity = uprv_isNaN(source) || uprv_isPositiveInfinity(source);
michael@0:     if (isNanOrInfinity) {
michael@0:         v = 0;
michael@0:         f = 0;
michael@0:         intValue = 0;
michael@0:         hasIntegerValue = FALSE;
michael@0:     } else {
michael@0:         intValue = (int64_t)source;
michael@0:         hasIntegerValue = (source == intValue);
michael@0:     }
michael@0: 
michael@0:     visibleDecimalDigitCount = v;
michael@0:     decimalDigits = f;
michael@0:     if (f == 0) {
michael@0:          decimalDigitsWithoutTrailingZeros = 0;
michael@0:     } else {
michael@0:         int64_t fdwtz = f;
michael@0:         while ((fdwtz%10) == 0) {
michael@0:             fdwtz /= 10;
michael@0:         }
michael@0:         decimalDigitsWithoutTrailingZeros = fdwtz;
michael@0:     }
michael@0: }
michael@0: 
michael@0: 
michael@0: //  Fast path only exact initialization. Return true if successful.
michael@0: //     Note: Do not multiply by 10 each time through loop, rounding cruft can build
michael@0: //           up that makes the check for an integer result fail.
michael@0: //           A single multiply of the original number works more reliably.
michael@0: static int32_t p10[] = {1, 10, 100, 1000, 10000};
michael@0: UBool FixedDecimal::quickInit(double n) {
michael@0:     UBool success = FALSE;
michael@0:     n = fabs(n);
michael@0:     int32_t numFractionDigits;
michael@0:     for (numFractionDigits = 0; numFractionDigits <= 3; numFractionDigits++) {
michael@0:         double scaledN = n * p10[numFractionDigits];
michael@0:         if (scaledN == floor(scaledN)) {
michael@0:             success = TRUE;
michael@0:             break;
michael@0:         }
michael@0:     }
michael@0:     if (success) {
michael@0:         init(n, numFractionDigits, getFractionalDigits(n, numFractionDigits));
michael@0:     }
michael@0:     return success;
michael@0: }
michael@0: 
michael@0: 
michael@0: 
michael@0: int32_t FixedDecimal::decimals(double n) {
michael@0:     // Count the number of decimal digits in the fraction part of the number, excluding trailing zeros.
michael@0:     // fastpath the common cases, integers or fractions with 3 or fewer digits
michael@0:     n = fabs(n);
michael@0:     for (int ndigits=0; ndigits<=3; ndigits++) {
michael@0:         double scaledN = n * p10[ndigits];
michael@0:         if (scaledN == floor(scaledN)) {
michael@0:             return ndigits;
michael@0:         }
michael@0:     }
michael@0: 
michael@0:     // Slow path, convert with sprintf, parse converted output.
michael@0:     char  buf[30] = {0};
michael@0:     sprintf(buf, "%1.15e", n);
michael@0:     // formatted number looks like this: 1.234567890123457e-01
michael@0:     int exponent = atoi(buf+18);
michael@0:     int numFractionDigits = 15;
michael@0:     for (int i=16; ; --i) {
michael@0:         if (buf[i] != '0') {
michael@0:             break;
michael@0:         }
michael@0:         --numFractionDigits; 
michael@0:     }
michael@0:     numFractionDigits -= exponent;   // Fraction part of fixed point representation.
michael@0:     return numFractionDigits;
michael@0: }
michael@0: 
michael@0: 
michael@0: // Get the fraction digits of a double, represented as an integer.
michael@0: //    v is the number of visible fraction digits in the displayed form of the number.
michael@0: //       Example: n = 1001.234, v = 6, result = 234000
michael@0: //    TODO: need to think through how this is used in the plural rule context.
michael@0: //          This function can easily encounter integer overflow, 
michael@0: //          and can easily return noise digits when the precision of a double is exceeded.
michael@0: 
michael@0: int64_t FixedDecimal::getFractionalDigits(double n, int32_t v) {
michael@0:     if (v == 0 || n == floor(n) || uprv_isNaN(n) || uprv_isPositiveInfinity(n)) {
michael@0:         return 0;
michael@0:     }
michael@0:     n = fabs(n);
michael@0:     double fract = n - floor(n);
michael@0:     switch (v) {
michael@0:       case 1: return (int64_t)(fract*10.0 + 0.5);
michael@0:       case 2: return (int64_t)(fract*100.0 + 0.5);
michael@0:       case 3: return (int64_t)(fract*1000.0 + 0.5);
michael@0:       default:
michael@0:           double scaled = floor(fract * pow(10.0, (double)v) + 0.5);
michael@0:           if (scaled > U_INT64_MAX) {
michael@0:               return U_INT64_MAX;
michael@0:           } else {
michael@0:               return (int64_t)scaled;
michael@0:           }
michael@0:       }
michael@0: }
michael@0: 
michael@0: 
michael@0: void FixedDecimal::adjustForMinFractionDigits(int32_t minFractionDigits) {
michael@0:     int32_t numTrailingFractionZeros = minFractionDigits - visibleDecimalDigitCount;
michael@0:     if (numTrailingFractionZeros > 0) {
michael@0:         for (int32_t i=0; i<numTrailingFractionZeros; i++) {
michael@0:             // Do not let the decimalDigits value overflow if there are many trailing zeros.
michael@0:             // Limit the value to 18 digits, the most that a 64 bit int can fully represent.
michael@0:             if (decimalDigits >= 100000000000000000LL) {
michael@0:                 break;
michael@0:             }
michael@0:             decimalDigits *= 10;
michael@0:         }
michael@0:         visibleDecimalDigitCount += numTrailingFractionZeros;
michael@0:     }
michael@0: }
michael@0:         
michael@0: 
michael@0: double FixedDecimal::get(tokenType operand) const {
michael@0:     switch(operand) {
michael@0:         case tVariableN: return source;
michael@0:         case tVariableI: return (double)intValue;
michael@0:         case tVariableF: return (double)decimalDigits;
michael@0:         case tVariableT: return (double)decimalDigitsWithoutTrailingZeros; 
michael@0:         case tVariableV: return visibleDecimalDigitCount;
michael@0:         default:
michael@0:              U_ASSERT(FALSE);  // unexpected.
michael@0:              return source;
michael@0:     }
michael@0: }
michael@0: 
michael@0: int32_t FixedDecimal::getVisibleFractionDigitCount() const {
michael@0:     return visibleDecimalDigitCount;
michael@0: }
michael@0: 
michael@0: 
michael@0: 
michael@0: PluralAvailableLocalesEnumeration::PluralAvailableLocalesEnumeration(UErrorCode &status) {
michael@0:     fLocales = NULL;
michael@0:     fRes = NULL;
michael@0:     fOpenStatus = status;
michael@0:     if (U_FAILURE(status)) {
michael@0:         return;
michael@0:     }
michael@0:     fOpenStatus = U_ZERO_ERROR;
michael@0:     LocalUResourceBundlePointer rb(ures_openDirect(NULL, "plurals", &fOpenStatus));
michael@0:     fLocales = ures_getByKey(rb.getAlias(), "locales", NULL, &fOpenStatus);
michael@0: }
michael@0: 
michael@0: PluralAvailableLocalesEnumeration::~PluralAvailableLocalesEnumeration() {
michael@0:     ures_close(fLocales);
michael@0:     ures_close(fRes);
michael@0:     fLocales = NULL;
michael@0:     fRes = NULL;
michael@0: }
michael@0: 
michael@0: const char *PluralAvailableLocalesEnumeration::next(int32_t *resultLength, UErrorCode &status) {
michael@0:     if (U_FAILURE(status)) {
michael@0:         return NULL;
michael@0:     }
michael@0:     if (U_FAILURE(fOpenStatus)) {
michael@0:         status = fOpenStatus;
michael@0:         return NULL;
michael@0:     }
michael@0:     fRes = ures_getNextResource(fLocales, fRes, &status);
michael@0:     if (fRes == NULL || U_FAILURE(status)) {
michael@0:         if (status == U_INDEX_OUTOFBOUNDS_ERROR) {
michael@0:             status = U_ZERO_ERROR;
michael@0:         }
michael@0:         return NULL;
michael@0:     }
michael@0:     const char *result = ures_getKey(fRes);
michael@0:     if (resultLength != NULL) {
michael@0:         *resultLength = uprv_strlen(result);
michael@0:     }
michael@0:     return result;
michael@0: }
michael@0: 
michael@0: 
michael@0: void PluralAvailableLocalesEnumeration::reset(UErrorCode &status) {
michael@0:     if (U_FAILURE(status)) {
michael@0:        return;
michael@0:     }
michael@0:     if (U_FAILURE(fOpenStatus)) {
michael@0:         status = fOpenStatus;
michael@0:         return;
michael@0:     }
michael@0:     ures_resetIterator(fLocales);
michael@0: }
michael@0: 
michael@0: int32_t PluralAvailableLocalesEnumeration::count(UErrorCode &status) const {
michael@0:     if (U_FAILURE(status)) {
michael@0:         return 0;
michael@0:     }
michael@0:     if (U_FAILURE(fOpenStatus)) {
michael@0:         status = fOpenStatus;
michael@0:         return 0;
michael@0:     }
michael@0:     return ures_getSize(fLocales);
michael@0: }
michael@0: 
michael@0: U_NAMESPACE_END
michael@0: 
michael@0: 
michael@0: #endif /* #if !UCONFIG_NO_FORMATTING */
michael@0: 
michael@0: //eof