1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/i18n/nfrule.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,1474 @@ 1.4 +/* 1.5 +****************************************************************************** 1.6 +* Copyright (C) 1997-2011, International Business Machines 1.7 +* Corporation and others. All Rights Reserved. 1.8 +****************************************************************************** 1.9 +* file name: nfrule.cpp 1.10 +* encoding: US-ASCII 1.11 +* tab size: 8 (not used) 1.12 +* indentation:4 1.13 +* 1.14 +* Modification history 1.15 +* Date Name Comments 1.16 +* 10/11/2001 Doug Ported from ICU4J 1.17 +*/ 1.18 + 1.19 +#include "nfrule.h" 1.20 + 1.21 +#if U_HAVE_RBNF 1.22 + 1.23 +#include "unicode/rbnf.h" 1.24 +#include "unicode/tblcoll.h" 1.25 +#include "unicode/coleitr.h" 1.26 +#include "unicode/uchar.h" 1.27 +#include "nfrs.h" 1.28 +#include "nfrlist.h" 1.29 +#include "nfsubs.h" 1.30 +#include "patternprops.h" 1.31 + 1.32 +U_NAMESPACE_BEGIN 1.33 + 1.34 +NFRule::NFRule(const RuleBasedNumberFormat* _rbnf) 1.35 + : baseValue((int32_t)0) 1.36 + , radix(0) 1.37 + , exponent(0) 1.38 + , ruleText() 1.39 + , sub1(NULL) 1.40 + , sub2(NULL) 1.41 + , formatter(_rbnf) 1.42 +{ 1.43 +} 1.44 + 1.45 +NFRule::~NFRule() 1.46 +{ 1.47 + delete sub1; 1.48 + delete sub2; 1.49 +} 1.50 + 1.51 +static const UChar gLeftBracket = 0x005b; 1.52 +static const UChar gRightBracket = 0x005d; 1.53 +static const UChar gColon = 0x003a; 1.54 +static const UChar gZero = 0x0030; 1.55 +static const UChar gNine = 0x0039; 1.56 +static const UChar gSpace = 0x0020; 1.57 +static const UChar gSlash = 0x002f; 1.58 +static const UChar gGreaterThan = 0x003e; 1.59 +static const UChar gLessThan = 0x003c; 1.60 +static const UChar gComma = 0x002c; 1.61 +static const UChar gDot = 0x002e; 1.62 +static const UChar gTick = 0x0027; 1.63 +//static const UChar gMinus = 0x002d; 1.64 +static const UChar gSemicolon = 0x003b; 1.65 + 1.66 +static const UChar gMinusX[] = {0x2D, 0x78, 0}; /* "-x" */ 1.67 +static const UChar gXDotX[] = {0x78, 0x2E, 0x78, 0}; /* "x.x" */ 1.68 +static const UChar gXDotZero[] = {0x78, 0x2E, 0x30, 0}; /* "x.0" */ 1.69 +static const UChar gZeroDotX[] = {0x30, 0x2E, 0x78, 0}; /* "0.x" */ 1.70 + 1.71 +static const UChar gLessLess[] = {0x3C, 0x3C, 0}; /* "<<" */ 1.72 +static const UChar gLessPercent[] = {0x3C, 0x25, 0}; /* "<%" */ 1.73 +static const UChar gLessHash[] = {0x3C, 0x23, 0}; /* "<#" */ 1.74 +static const UChar gLessZero[] = {0x3C, 0x30, 0}; /* "<0" */ 1.75 +static const UChar gGreaterGreater[] = {0x3E, 0x3E, 0}; /* ">>" */ 1.76 +static const UChar gGreaterPercent[] = {0x3E, 0x25, 0}; /* ">%" */ 1.77 +static const UChar gGreaterHash[] = {0x3E, 0x23, 0}; /* ">#" */ 1.78 +static const UChar gGreaterZero[] = {0x3E, 0x30, 0}; /* ">0" */ 1.79 +static const UChar gEqualPercent[] = {0x3D, 0x25, 0}; /* "=%" */ 1.80 +static const UChar gEqualHash[] = {0x3D, 0x23, 0}; /* "=#" */ 1.81 +static const UChar gEqualZero[] = {0x3D, 0x30, 0}; /* "=0" */ 1.82 +static const UChar gGreaterGreaterGreater[] = {0x3E, 0x3E, 0x3E, 0}; /* ">>>" */ 1.83 + 1.84 +static const UChar * const tokenStrings[] = { 1.85 + gLessLess, gLessPercent, gLessHash, gLessZero, 1.86 + gGreaterGreater, gGreaterPercent,gGreaterHash, gGreaterZero, 1.87 + gEqualPercent, gEqualHash, gEqualZero, NULL 1.88 +}; 1.89 + 1.90 +void 1.91 +NFRule::makeRules(UnicodeString& description, 1.92 + const NFRuleSet *ruleSet, 1.93 + const NFRule *predecessor, 1.94 + const RuleBasedNumberFormat *rbnf, 1.95 + NFRuleList& rules, 1.96 + UErrorCode& status) 1.97 +{ 1.98 + // we know we're making at least one rule, so go ahead and 1.99 + // new it up and initialize its basevalue and divisor 1.100 + // (this also strips the rule descriptor, if any, off the 1.101 + // descripton string) 1.102 + NFRule* rule1 = new NFRule(rbnf); 1.103 + /* test for NULL */ 1.104 + if (rule1 == 0) { 1.105 + status = U_MEMORY_ALLOCATION_ERROR; 1.106 + return; 1.107 + } 1.108 + rule1->parseRuleDescriptor(description, status); 1.109 + 1.110 + // check the description to see whether there's text enclosed 1.111 + // in brackets 1.112 + int32_t brack1 = description.indexOf(gLeftBracket); 1.113 + int32_t brack2 = description.indexOf(gRightBracket); 1.114 + 1.115 + // if the description doesn't contain a matched pair of brackets, 1.116 + // or if it's of a type that doesn't recognize bracketed text, 1.117 + // then leave the description alone, initialize the rule's 1.118 + // rule text and substitutions, and return that rule 1.119 + if (brack1 == -1 || brack2 == -1 || brack1 > brack2 1.120 + || rule1->getType() == kProperFractionRule 1.121 + || rule1->getType() == kNegativeNumberRule) { 1.122 + rule1->ruleText = description; 1.123 + rule1->extractSubstitutions(ruleSet, predecessor, rbnf, status); 1.124 + rules.add(rule1); 1.125 + } else { 1.126 + // if the description does contain a matched pair of brackets, 1.127 + // then it's really shorthand for two rules (with one exception) 1.128 + NFRule* rule2 = NULL; 1.129 + UnicodeString sbuf; 1.130 + 1.131 + // we'll actually only split the rule into two rules if its 1.132 + // base value is an even multiple of its divisor (or it's one 1.133 + // of the special rules) 1.134 + if ((rule1->baseValue > 0 1.135 + && (rule1->baseValue % util64_pow(rule1->radix, rule1->exponent)) == 0) 1.136 + || rule1->getType() == kImproperFractionRule 1.137 + || rule1->getType() == kMasterRule) { 1.138 + 1.139 + // if it passes that test, new up the second rule. If the 1.140 + // rule set both rules will belong to is a fraction rule 1.141 + // set, they both have the same base value; otherwise, 1.142 + // increment the original rule's base value ("rule1" actually 1.143 + // goes SECOND in the rule set's rule list) 1.144 + rule2 = new NFRule(rbnf); 1.145 + /* test for NULL */ 1.146 + if (rule2 == 0) { 1.147 + status = U_MEMORY_ALLOCATION_ERROR; 1.148 + return; 1.149 + } 1.150 + if (rule1->baseValue >= 0) { 1.151 + rule2->baseValue = rule1->baseValue; 1.152 + if (!ruleSet->isFractionRuleSet()) { 1.153 + ++rule1->baseValue; 1.154 + } 1.155 + } 1.156 + 1.157 + // if the description began with "x.x" and contains bracketed 1.158 + // text, it describes both the improper fraction rule and 1.159 + // the proper fraction rule 1.160 + else if (rule1->getType() == kImproperFractionRule) { 1.161 + rule2->setType(kProperFractionRule); 1.162 + } 1.163 + 1.164 + // if the description began with "x.0" and contains bracketed 1.165 + // text, it describes both the master rule and the 1.166 + // improper fraction rule 1.167 + else if (rule1->getType() == kMasterRule) { 1.168 + rule2->baseValue = rule1->baseValue; 1.169 + rule1->setType(kImproperFractionRule); 1.170 + } 1.171 + 1.172 + // both rules have the same radix and exponent (i.e., the 1.173 + // same divisor) 1.174 + rule2->radix = rule1->radix; 1.175 + rule2->exponent = rule1->exponent; 1.176 + 1.177 + // rule2's rule text omits the stuff in brackets: initalize 1.178 + // its rule text and substitutions accordingly 1.179 + sbuf.append(description, 0, brack1); 1.180 + if (brack2 + 1 < description.length()) { 1.181 + sbuf.append(description, brack2 + 1, description.length() - brack2 - 1); 1.182 + } 1.183 + rule2->ruleText.setTo(sbuf); 1.184 + rule2->extractSubstitutions(ruleSet, predecessor, rbnf, status); 1.185 + } 1.186 + 1.187 + // rule1's text includes the text in the brackets but omits 1.188 + // the brackets themselves: initialize _its_ rule text and 1.189 + // substitutions accordingly 1.190 + sbuf.setTo(description, 0, brack1); 1.191 + sbuf.append(description, brack1 + 1, brack2 - brack1 - 1); 1.192 + if (brack2 + 1 < description.length()) { 1.193 + sbuf.append(description, brack2 + 1, description.length() - brack2 - 1); 1.194 + } 1.195 + rule1->ruleText.setTo(sbuf); 1.196 + rule1->extractSubstitutions(ruleSet, predecessor, rbnf, status); 1.197 + 1.198 + // if we only have one rule, return it; if we have two, return 1.199 + // a two-element array containing them (notice that rule2 goes 1.200 + // BEFORE rule1 in the list: in all cases, rule2 OMITS the 1.201 + // material in the brackets and rule1 INCLUDES the material 1.202 + // in the brackets) 1.203 + if (rule2 != NULL) { 1.204 + rules.add(rule2); 1.205 + } 1.206 + rules.add(rule1); 1.207 + } 1.208 +} 1.209 + 1.210 +/** 1.211 + * This function parses the rule's rule descriptor (i.e., the base 1.212 + * value and/or other tokens that precede the rule's rule text 1.213 + * in the description) and sets the rule's base value, radix, and 1.214 + * exponent according to the descriptor. (If the description doesn't 1.215 + * include a rule descriptor, then this function sets everything to 1.216 + * default values and the rule set sets the rule's real base value). 1.217 + * @param description The rule's description 1.218 + * @return If "description" included a rule descriptor, this is 1.219 + * "description" with the descriptor and any trailing whitespace 1.220 + * stripped off. Otherwise; it's "descriptor" unchangd. 1.221 + */ 1.222 +void 1.223 +NFRule::parseRuleDescriptor(UnicodeString& description, UErrorCode& status) 1.224 +{ 1.225 + // the description consists of a rule descriptor and a rule body, 1.226 + // separated by a colon. The rule descriptor is optional. If 1.227 + // it's omitted, just set the base value to 0. 1.228 + int32_t p = description.indexOf(gColon); 1.229 + if (p == -1) { 1.230 + setBaseValue((int32_t)0, status); 1.231 + } else { 1.232 + // copy the descriptor out into its own string and strip it, 1.233 + // along with any trailing whitespace, out of the original 1.234 + // description 1.235 + UnicodeString descriptor; 1.236 + descriptor.setTo(description, 0, p); 1.237 + 1.238 + ++p; 1.239 + while (p < description.length() && PatternProps::isWhiteSpace(description.charAt(p))) { 1.240 + ++p; 1.241 + } 1.242 + description.removeBetween(0, p); 1.243 + 1.244 + // check first to see if the rule descriptor matches the token 1.245 + // for one of the special rules. If it does, set the base 1.246 + // value to the correct identfier value 1.247 + if (0 == descriptor.compare(gMinusX, 2)) { 1.248 + setType(kNegativeNumberRule); 1.249 + } 1.250 + else if (0 == descriptor.compare(gXDotX, 3)) { 1.251 + setType(kImproperFractionRule); 1.252 + } 1.253 + else if (0 == descriptor.compare(gZeroDotX, 3)) { 1.254 + setType(kProperFractionRule); 1.255 + } 1.256 + else if (0 == descriptor.compare(gXDotZero, 3)) { 1.257 + setType(kMasterRule); 1.258 + } 1.259 + 1.260 + // if the rule descriptor begins with a digit, it's a descriptor 1.261 + // for a normal rule 1.262 + // since we don't have Long.parseLong, and this isn't much work anyway, 1.263 + // just build up the value as we encounter the digits. 1.264 + else if (descriptor.charAt(0) >= gZero && descriptor.charAt(0) <= gNine) { 1.265 + int64_t val = 0; 1.266 + p = 0; 1.267 + UChar c = gSpace; 1.268 + 1.269 + // begin parsing the descriptor: copy digits 1.270 + // into "tempValue", skip periods, commas, and spaces, 1.271 + // stop on a slash or > sign (or at the end of the string), 1.272 + // and throw an exception on any other character 1.273 + int64_t ll_10 = 10; 1.274 + while (p < descriptor.length()) { 1.275 + c = descriptor.charAt(p); 1.276 + if (c >= gZero && c <= gNine) { 1.277 + val = val * ll_10 + (int32_t)(c - gZero); 1.278 + } 1.279 + else if (c == gSlash || c == gGreaterThan) { 1.280 + break; 1.281 + } 1.282 + else if (PatternProps::isWhiteSpace(c) || c == gComma || c == gDot) { 1.283 + } 1.284 + else { 1.285 + // throw new IllegalArgumentException("Illegal character in rule descriptor"); 1.286 + status = U_PARSE_ERROR; 1.287 + return; 1.288 + } 1.289 + ++p; 1.290 + } 1.291 + 1.292 + // we have the base value, so set it 1.293 + setBaseValue(val, status); 1.294 + 1.295 + // if we stopped the previous loop on a slash, we're 1.296 + // now parsing the rule's radix. Again, accumulate digits 1.297 + // in tempValue, skip punctuation, stop on a > mark, and 1.298 + // throw an exception on anything else 1.299 + if (c == gSlash) { 1.300 + val = 0; 1.301 + ++p; 1.302 + int64_t ll_10 = 10; 1.303 + while (p < descriptor.length()) { 1.304 + c = descriptor.charAt(p); 1.305 + if (c >= gZero && c <= gNine) { 1.306 + val = val * ll_10 + (int32_t)(c - gZero); 1.307 + } 1.308 + else if (c == gGreaterThan) { 1.309 + break; 1.310 + } 1.311 + else if (PatternProps::isWhiteSpace(c) || c == gComma || c == gDot) { 1.312 + } 1.313 + else { 1.314 + // throw new IllegalArgumentException("Illegal character is rule descriptor"); 1.315 + status = U_PARSE_ERROR; 1.316 + return; 1.317 + } 1.318 + ++p; 1.319 + } 1.320 + 1.321 + // tempValue now contain's the rule's radix. Set it 1.322 + // accordingly, and recalculate the rule's exponent 1.323 + radix = (int32_t)val; 1.324 + if (radix == 0) { 1.325 + // throw new IllegalArgumentException("Rule can't have radix of 0"); 1.326 + status = U_PARSE_ERROR; 1.327 + } 1.328 + 1.329 + exponent = expectedExponent(); 1.330 + } 1.331 + 1.332 + // if we stopped the previous loop on a > sign, then continue 1.333 + // for as long as we still see > signs. For each one, 1.334 + // decrement the exponent (unless the exponent is already 0). 1.335 + // If we see another character before reaching the end of 1.336 + // the descriptor, that's also a syntax error. 1.337 + if (c == gGreaterThan) { 1.338 + while (p < descriptor.length()) { 1.339 + c = descriptor.charAt(p); 1.340 + if (c == gGreaterThan && exponent > 0) { 1.341 + --exponent; 1.342 + } else { 1.343 + // throw new IllegalArgumentException("Illegal character in rule descriptor"); 1.344 + status = U_PARSE_ERROR; 1.345 + return; 1.346 + } 1.347 + ++p; 1.348 + } 1.349 + } 1.350 + } 1.351 + } 1.352 + 1.353 + // finally, if the rule body begins with an apostrophe, strip it off 1.354 + // (this is generally used to put whitespace at the beginning of 1.355 + // a rule's rule text) 1.356 + if (description.length() > 0 && description.charAt(0) == gTick) { 1.357 + description.removeBetween(0, 1); 1.358 + } 1.359 + 1.360 + // return the description with all the stuff we've just waded through 1.361 + // stripped off the front. It now contains just the rule body. 1.362 + // return description; 1.363 +} 1.364 + 1.365 +/** 1.366 +* Searches the rule's rule text for the substitution tokens, 1.367 +* creates the substitutions, and removes the substitution tokens 1.368 +* from the rule's rule text. 1.369 +* @param owner The rule set containing this rule 1.370 +* @param predecessor The rule preseding this one in "owners" rule list 1.371 +* @param ownersOwner The RuleBasedFormat that owns this rule 1.372 +*/ 1.373 +void 1.374 +NFRule::extractSubstitutions(const NFRuleSet* ruleSet, 1.375 + const NFRule* predecessor, 1.376 + const RuleBasedNumberFormat* rbnf, 1.377 + UErrorCode& status) 1.378 +{ 1.379 + if (U_SUCCESS(status)) { 1.380 + sub1 = extractSubstitution(ruleSet, predecessor, rbnf, status); 1.381 + sub2 = extractSubstitution(ruleSet, predecessor, rbnf, status); 1.382 + } 1.383 +} 1.384 + 1.385 +/** 1.386 +* Searches the rule's rule text for the first substitution token, 1.387 +* creates a substitution based on it, and removes the token from 1.388 +* the rule's rule text. 1.389 +* @param owner The rule set containing this rule 1.390 +* @param predecessor The rule preceding this one in the rule set's 1.391 +* rule list 1.392 +* @param ownersOwner The RuleBasedNumberFormat that owns this rule 1.393 +* @return The newly-created substitution. This is never null; if 1.394 +* the rule text doesn't contain any substitution tokens, this will 1.395 +* be a NullSubstitution. 1.396 +*/ 1.397 +NFSubstitution * 1.398 +NFRule::extractSubstitution(const NFRuleSet* ruleSet, 1.399 + const NFRule* predecessor, 1.400 + const RuleBasedNumberFormat* rbnf, 1.401 + UErrorCode& status) 1.402 +{ 1.403 + NFSubstitution* result = NULL; 1.404 + 1.405 + // search the rule's rule text for the first two characters of 1.406 + // a substitution token 1.407 + int32_t subStart = indexOfAny(tokenStrings); 1.408 + int32_t subEnd = subStart; 1.409 + 1.410 + // if we didn't find one, create a null substitution positioned 1.411 + // at the end of the rule text 1.412 + if (subStart == -1) { 1.413 + return NFSubstitution::makeSubstitution(ruleText.length(), this, predecessor, 1.414 + ruleSet, rbnf, UnicodeString(), status); 1.415 + } 1.416 + 1.417 + // special-case the ">>>" token, since searching for the > at the 1.418 + // end will actually find the > in the middle 1.419 + if (ruleText.indexOf(gGreaterGreaterGreater, 3, 0) == subStart) { 1.420 + subEnd = subStart + 2; 1.421 + 1.422 + // otherwise the substitution token ends with the same character 1.423 + // it began with 1.424 + } else { 1.425 + UChar c = ruleText.charAt(subStart); 1.426 + subEnd = ruleText.indexOf(c, subStart + 1); 1.427 + // special case for '<%foo<<' 1.428 + if (c == gLessThan && subEnd != -1 && subEnd < ruleText.length() - 1 && ruleText.charAt(subEnd+1) == c) { 1.429 + // ordinals use "=#,##0==%abbrev=" as their rule. Notice that the '==' in the middle 1.430 + // occurs because of the juxtaposition of two different rules. The check for '<' is a hack 1.431 + // to get around this. Having the duplicate at the front would cause problems with 1.432 + // rules like "<<%" to format, say, percents... 1.433 + ++subEnd; 1.434 + } 1.435 + } 1.436 + 1.437 + // if we don't find the end of the token (i.e., if we're on a single, 1.438 + // unmatched token character), create a null substitution positioned 1.439 + // at the end of the rule 1.440 + if (subEnd == -1) { 1.441 + return NFSubstitution::makeSubstitution(ruleText.length(), this, predecessor, 1.442 + ruleSet, rbnf, UnicodeString(), status); 1.443 + } 1.444 + 1.445 + // if we get here, we have a real substitution token (or at least 1.446 + // some text bounded by substitution token characters). Use 1.447 + // makeSubstitution() to create the right kind of substitution 1.448 + UnicodeString subToken; 1.449 + subToken.setTo(ruleText, subStart, subEnd + 1 - subStart); 1.450 + result = NFSubstitution::makeSubstitution(subStart, this, predecessor, ruleSet, 1.451 + rbnf, subToken, status); 1.452 + 1.453 + // remove the substitution from the rule text 1.454 + ruleText.removeBetween(subStart, subEnd+1); 1.455 + 1.456 + return result; 1.457 +} 1.458 + 1.459 +/** 1.460 + * Sets the rule's base value, and causes the radix and exponent 1.461 + * to be recalculated. This is used during construction when we 1.462 + * don't know the rule's base value until after it's been 1.463 + * constructed. It should be used at any other time. 1.464 + * @param The new base value for the rule. 1.465 + */ 1.466 +void 1.467 +NFRule::setBaseValue(int64_t newBaseValue, UErrorCode& status) 1.468 +{ 1.469 + // set the base value 1.470 + baseValue = newBaseValue; 1.471 + 1.472 + // if this isn't a special rule, recalculate the radix and exponent 1.473 + // (the radix always defaults to 10; if it's supposed to be something 1.474 + // else, it's cleaned up by the caller and the exponent is 1.475 + // recalculated again-- the only function that does this is 1.476 + // NFRule.parseRuleDescriptor() ) 1.477 + if (baseValue >= 1) { 1.478 + radix = 10; 1.479 + exponent = expectedExponent(); 1.480 + 1.481 + // this function gets called on a fully-constructed rule whose 1.482 + // description didn't specify a base value. This means it 1.483 + // has substitutions, and some substitutions hold on to copies 1.484 + // of the rule's divisor. Fix their copies of the divisor. 1.485 + if (sub1 != NULL) { 1.486 + sub1->setDivisor(radix, exponent, status); 1.487 + } 1.488 + if (sub2 != NULL) { 1.489 + sub2->setDivisor(radix, exponent, status); 1.490 + } 1.491 + 1.492 + // if this is a special rule, its radix and exponent are basically 1.493 + // ignored. Set them to "safe" default values 1.494 + } else { 1.495 + radix = 10; 1.496 + exponent = 0; 1.497 + } 1.498 +} 1.499 + 1.500 +/** 1.501 +* This calculates the rule's exponent based on its radix and base 1.502 +* value. This will be the highest power the radix can be raised to 1.503 +* and still produce a result less than or equal to the base value. 1.504 +*/ 1.505 +int16_t 1.506 +NFRule::expectedExponent() const 1.507 +{ 1.508 + // since the log of 0, or the log base 0 of something, causes an 1.509 + // error, declare the exponent in these cases to be 0 (we also 1.510 + // deal with the special-rule identifiers here) 1.511 + if (radix == 0 || baseValue < 1) { 1.512 + return 0; 1.513 + } 1.514 + 1.515 + // we get rounding error in some cases-- for example, log 1000 / log 10 1.516 + // gives us 1.9999999996 instead of 2. The extra logic here is to take 1.517 + // that into account 1.518 + int16_t tempResult = (int16_t)(uprv_log((double)baseValue) / uprv_log((double)radix)); 1.519 + int64_t temp = util64_pow(radix, tempResult + 1); 1.520 + if (temp <= baseValue) { 1.521 + tempResult += 1; 1.522 + } 1.523 + return tempResult; 1.524 +} 1.525 + 1.526 +/** 1.527 + * Searches the rule's rule text for any of the specified strings. 1.528 + * @param strings An array of strings to search the rule's rule 1.529 + * text for 1.530 + * @return The index of the first match in the rule's rule text 1.531 + * (i.e., the first substring in the rule's rule text that matches 1.532 + * _any_ of the strings in "strings"). If none of the strings in 1.533 + * "strings" is found in the rule's rule text, returns -1. 1.534 + */ 1.535 +int32_t 1.536 +NFRule::indexOfAny(const UChar* const strings[]) const 1.537 +{ 1.538 + int result = -1; 1.539 + for (int i = 0; strings[i]; i++) { 1.540 + int32_t pos = ruleText.indexOf(*strings[i]); 1.541 + if (pos != -1 && (result == -1 || pos < result)) { 1.542 + result = pos; 1.543 + } 1.544 + } 1.545 + return result; 1.546 +} 1.547 + 1.548 +//----------------------------------------------------------------------- 1.549 +// boilerplate 1.550 +//----------------------------------------------------------------------- 1.551 + 1.552 +/** 1.553 +* Tests two rules for equality. 1.554 +* @param that The rule to compare this one against 1.555 +* @return True is the two rules are functionally equivalent 1.556 +*/ 1.557 +UBool 1.558 +NFRule::operator==(const NFRule& rhs) const 1.559 +{ 1.560 + return baseValue == rhs.baseValue 1.561 + && radix == rhs.radix 1.562 + && exponent == rhs.exponent 1.563 + && ruleText == rhs.ruleText 1.564 + && *sub1 == *rhs.sub1 1.565 + && *sub2 == *rhs.sub2; 1.566 +} 1.567 + 1.568 +/** 1.569 +* Returns a textual representation of the rule. This won't 1.570 +* necessarily be the same as the description that this rule 1.571 +* was created with, but it will produce the same result. 1.572 +* @return A textual description of the rule 1.573 +*/ 1.574 +static void util_append64(UnicodeString& result, int64_t n) 1.575 +{ 1.576 + UChar buffer[256]; 1.577 + int32_t len = util64_tou(n, buffer, sizeof(buffer)); 1.578 + UnicodeString temp(buffer, len); 1.579 + result.append(temp); 1.580 +} 1.581 + 1.582 +void 1.583 +NFRule::_appendRuleText(UnicodeString& result) const 1.584 +{ 1.585 + switch (getType()) { 1.586 + case kNegativeNumberRule: result.append(gMinusX, 2); break; 1.587 + case kImproperFractionRule: result.append(gXDotX, 3); break; 1.588 + case kProperFractionRule: result.append(gZeroDotX, 3); break; 1.589 + case kMasterRule: result.append(gXDotZero, 3); break; 1.590 + default: 1.591 + // for a normal rule, write out its base value, and if the radix is 1.592 + // something other than 10, write out the radix (with the preceding 1.593 + // slash, of course). Then calculate the expected exponent and if 1.594 + // if isn't the same as the actual exponent, write an appropriate 1.595 + // number of > signs. Finally, terminate the whole thing with 1.596 + // a colon. 1.597 + util_append64(result, baseValue); 1.598 + if (radix != 10) { 1.599 + result.append(gSlash); 1.600 + util_append64(result, radix); 1.601 + } 1.602 + int numCarets = expectedExponent() - exponent; 1.603 + for (int i = 0; i < numCarets; i++) { 1.604 + result.append(gGreaterThan); 1.605 + } 1.606 + break; 1.607 + } 1.608 + result.append(gColon); 1.609 + result.append(gSpace); 1.610 + 1.611 + // if the rule text begins with a space, write an apostrophe 1.612 + // (whitespace after the rule descriptor is ignored; the 1.613 + // apostrophe is used to make the whitespace significant) 1.614 + if (ruleText.charAt(0) == gSpace && sub1->getPos() != 0) { 1.615 + result.append(gTick); 1.616 + } 1.617 + 1.618 + // now, write the rule's rule text, inserting appropriate 1.619 + // substitution tokens in the appropriate places 1.620 + UnicodeString ruleTextCopy; 1.621 + ruleTextCopy.setTo(ruleText); 1.622 + 1.623 + UnicodeString temp; 1.624 + sub2->toString(temp); 1.625 + ruleTextCopy.insert(sub2->getPos(), temp); 1.626 + sub1->toString(temp); 1.627 + ruleTextCopy.insert(sub1->getPos(), temp); 1.628 + 1.629 + result.append(ruleTextCopy); 1.630 + 1.631 + // and finally, top the whole thing off with a semicolon and 1.632 + // return the result 1.633 + result.append(gSemicolon); 1.634 +} 1.635 + 1.636 +//----------------------------------------------------------------------- 1.637 +// formatting 1.638 +//----------------------------------------------------------------------- 1.639 + 1.640 +/** 1.641 +* Formats the number, and inserts the resulting text into 1.642 +* toInsertInto. 1.643 +* @param number The number being formatted 1.644 +* @param toInsertInto The string where the resultant text should 1.645 +* be inserted 1.646 +* @param pos The position in toInsertInto where the resultant text 1.647 +* should be inserted 1.648 +*/ 1.649 +void 1.650 +NFRule::doFormat(int64_t number, UnicodeString& toInsertInto, int32_t pos) const 1.651 +{ 1.652 + // first, insert the rule's rule text into toInsertInto at the 1.653 + // specified position, then insert the results of the substitutions 1.654 + // into the right places in toInsertInto (notice we do the 1.655 + // substitutions in reverse order so that the offsets don't get 1.656 + // messed up) 1.657 + toInsertInto.insert(pos, ruleText); 1.658 + sub2->doSubstitution(number, toInsertInto, pos); 1.659 + sub1->doSubstitution(number, toInsertInto, pos); 1.660 +} 1.661 + 1.662 +/** 1.663 +* Formats the number, and inserts the resulting text into 1.664 +* toInsertInto. 1.665 +* @param number The number being formatted 1.666 +* @param toInsertInto The string where the resultant text should 1.667 +* be inserted 1.668 +* @param pos The position in toInsertInto where the resultant text 1.669 +* should be inserted 1.670 +*/ 1.671 +void 1.672 +NFRule::doFormat(double number, UnicodeString& toInsertInto, int32_t pos) const 1.673 +{ 1.674 + // first, insert the rule's rule text into toInsertInto at the 1.675 + // specified position, then insert the results of the substitutions 1.676 + // into the right places in toInsertInto 1.677 + // [again, we have two copies of this routine that do the same thing 1.678 + // so that we don't sacrifice precision in a long by casting it 1.679 + // to a double] 1.680 + toInsertInto.insert(pos, ruleText); 1.681 + sub2->doSubstitution(number, toInsertInto, pos); 1.682 + sub1->doSubstitution(number, toInsertInto, pos); 1.683 +} 1.684 + 1.685 +/** 1.686 +* Used by the owning rule set to determine whether to invoke the 1.687 +* rollback rule (i.e., whether this rule or the one that precedes 1.688 +* it in the rule set's list should be used to format the number) 1.689 +* @param The number being formatted 1.690 +* @return True if the rule set should use the rule that precedes 1.691 +* this one in its list; false if it should use this rule 1.692 +*/ 1.693 +UBool 1.694 +NFRule::shouldRollBack(double number) const 1.695 +{ 1.696 + // we roll back if the rule contains a modulus substitution, 1.697 + // the number being formatted is an even multiple of the rule's 1.698 + // divisor, and the rule's base value is NOT an even multiple 1.699 + // of its divisor 1.700 + // In other words, if the original description had 1.701 + // 100: << hundred[ >>]; 1.702 + // that expands into 1.703 + // 100: << hundred; 1.704 + // 101: << hundred >>; 1.705 + // internally. But when we're formatting 200, if we use the rule 1.706 + // at 101, which would normally apply, we get "two hundred zero". 1.707 + // To prevent this, we roll back and use the rule at 100 instead. 1.708 + // This is the logic that makes this happen: the rule at 101 has 1.709 + // a modulus substitution, its base value isn't an even multiple 1.710 + // of 100, and the value we're trying to format _is_ an even 1.711 + // multiple of 100. This is called the "rollback rule." 1.712 + if ((sub1->isModulusSubstitution()) || (sub2->isModulusSubstitution())) { 1.713 + int64_t re = util64_pow(radix, exponent); 1.714 + return uprv_fmod(number, (double)re) == 0 && (baseValue % re) != 0; 1.715 + } 1.716 + return FALSE; 1.717 +} 1.718 + 1.719 +//----------------------------------------------------------------------- 1.720 +// parsing 1.721 +//----------------------------------------------------------------------- 1.722 + 1.723 +/** 1.724 +* Attempts to parse the string with this rule. 1.725 +* @param text The string being parsed 1.726 +* @param parsePosition On entry, the value is ignored and assumed to 1.727 +* be 0. On exit, this has been updated with the position of the first 1.728 +* character not consumed by matching the text against this rule 1.729 +* (if this rule doesn't match the text at all, the parse position 1.730 +* if left unchanged (presumably at 0) and the function returns 1.731 +* new Long(0)). 1.732 +* @param isFractionRule True if this rule is contained within a 1.733 +* fraction rule set. This is only used if the rule has no 1.734 +* substitutions. 1.735 +* @return If this rule matched the text, this is the rule's base value 1.736 +* combined appropriately with the results of parsing the substitutions. 1.737 +* If nothing matched, this is new Long(0) and the parse position is 1.738 +* left unchanged. The result will be an instance of Long if the 1.739 +* result is an integer and Double otherwise. The result is never null. 1.740 +*/ 1.741 +#ifdef RBNF_DEBUG 1.742 +#include <stdio.h> 1.743 + 1.744 +static void dumpUS(FILE* f, const UnicodeString& us) { 1.745 + int len = us.length(); 1.746 + char* buf = (char *)uprv_malloc((len+1)*sizeof(char)); //new char[len+1]; 1.747 + if (buf != NULL) { 1.748 + us.extract(0, len, buf); 1.749 + buf[len] = 0; 1.750 + fprintf(f, "%s", buf); 1.751 + uprv_free(buf); //delete[] buf; 1.752 + } 1.753 +} 1.754 +#endif 1.755 + 1.756 +UBool 1.757 +NFRule::doParse(const UnicodeString& text, 1.758 + ParsePosition& parsePosition, 1.759 + UBool isFractionRule, 1.760 + double upperBound, 1.761 + Formattable& resVal) const 1.762 +{ 1.763 + // internally we operate on a copy of the string being parsed 1.764 + // (because we're going to change it) and use our own ParsePosition 1.765 + ParsePosition pp; 1.766 + UnicodeString workText(text); 1.767 + 1.768 + // check to see whether the text before the first substitution 1.769 + // matches the text at the beginning of the string being 1.770 + // parsed. If it does, strip that off the front of workText; 1.771 + // otherwise, dump out with a mismatch 1.772 + UnicodeString prefix; 1.773 + prefix.setTo(ruleText, 0, sub1->getPos()); 1.774 + 1.775 +#ifdef RBNF_DEBUG 1.776 + fprintf(stderr, "doParse %x ", this); 1.777 + { 1.778 + UnicodeString rt; 1.779 + _appendRuleText(rt); 1.780 + dumpUS(stderr, rt); 1.781 + } 1.782 + 1.783 + fprintf(stderr, " text: '", this); 1.784 + dumpUS(stderr, text); 1.785 + fprintf(stderr, "' prefix: '"); 1.786 + dumpUS(stderr, prefix); 1.787 +#endif 1.788 + stripPrefix(workText, prefix, pp); 1.789 + int32_t prefixLength = text.length() - workText.length(); 1.790 + 1.791 +#ifdef RBNF_DEBUG 1.792 + fprintf(stderr, "' pl: %d ppi: %d s1p: %d\n", prefixLength, pp.getIndex(), sub1->getPos()); 1.793 +#endif 1.794 + 1.795 + if (pp.getIndex() == 0 && sub1->getPos() != 0) { 1.796 + // commented out because ParsePosition doesn't have error index in 1.1.x 1.797 + // restored for ICU4C port 1.798 + parsePosition.setErrorIndex(pp.getErrorIndex()); 1.799 + resVal.setLong(0); 1.800 + return TRUE; 1.801 + } 1.802 + 1.803 + // this is the fun part. The basic guts of the rule-matching 1.804 + // logic is matchToDelimiter(), which is called twice. The first 1.805 + // time it searches the input string for the rule text BETWEEN 1.806 + // the substitutions and tries to match the intervening text 1.807 + // in the input string with the first substitution. If that 1.808 + // succeeds, it then calls it again, this time to look for the 1.809 + // rule text after the second substitution and to match the 1.810 + // intervening input text against the second substitution. 1.811 + // 1.812 + // For example, say we have a rule that looks like this: 1.813 + // first << middle >> last; 1.814 + // and input text that looks like this: 1.815 + // first one middle two last 1.816 + // First we use stripPrefix() to match "first " in both places and 1.817 + // strip it off the front, leaving 1.818 + // one middle two last 1.819 + // Then we use matchToDelimiter() to match " middle " and try to 1.820 + // match "one" against a substitution. If it's successful, we now 1.821 + // have 1.822 + // two last 1.823 + // We use matchToDelimiter() a second time to match " last" and 1.824 + // try to match "two" against a substitution. If "two" matches 1.825 + // the substitution, we have a successful parse. 1.826 + // 1.827 + // Since it's possible in many cases to find multiple instances 1.828 + // of each of these pieces of rule text in the input string, 1.829 + // we need to try all the possible combinations of these 1.830 + // locations. This prevents us from prematurely declaring a mismatch, 1.831 + // and makes sure we match as much input text as we can. 1.832 + int highWaterMark = 0; 1.833 + double result = 0; 1.834 + int start = 0; 1.835 + double tempBaseValue = (double)(baseValue <= 0 ? 0 : baseValue); 1.836 + 1.837 + UnicodeString temp; 1.838 + do { 1.839 + // our partial parse result starts out as this rule's base 1.840 + // value. If it finds a successful match, matchToDelimiter() 1.841 + // will compose this in some way with what it gets back from 1.842 + // the substitution, giving us a new partial parse result 1.843 + pp.setIndex(0); 1.844 + 1.845 + temp.setTo(ruleText, sub1->getPos(), sub2->getPos() - sub1->getPos()); 1.846 + double partialResult = matchToDelimiter(workText, start, tempBaseValue, 1.847 + temp, pp, sub1, 1.848 + upperBound); 1.849 + 1.850 + // if we got a successful match (or were trying to match a 1.851 + // null substitution), pp is now pointing at the first unmatched 1.852 + // character. Take note of that, and try matchToDelimiter() 1.853 + // on the input text again 1.854 + if (pp.getIndex() != 0 || sub1->isNullSubstitution()) { 1.855 + start = pp.getIndex(); 1.856 + 1.857 + UnicodeString workText2; 1.858 + workText2.setTo(workText, pp.getIndex(), workText.length() - pp.getIndex()); 1.859 + ParsePosition pp2; 1.860 + 1.861 + // the second matchToDelimiter() will compose our previous 1.862 + // partial result with whatever it gets back from its 1.863 + // substitution if there's a successful match, giving us 1.864 + // a real result 1.865 + temp.setTo(ruleText, sub2->getPos(), ruleText.length() - sub2->getPos()); 1.866 + partialResult = matchToDelimiter(workText2, 0, partialResult, 1.867 + temp, pp2, sub2, 1.868 + upperBound); 1.869 + 1.870 + // if we got a successful match on this second 1.871 + // matchToDelimiter() call, update the high-water mark 1.872 + // and result (if necessary) 1.873 + if (pp2.getIndex() != 0 || sub2->isNullSubstitution()) { 1.874 + if (prefixLength + pp.getIndex() + pp2.getIndex() > highWaterMark) { 1.875 + highWaterMark = prefixLength + pp.getIndex() + pp2.getIndex(); 1.876 + result = partialResult; 1.877 + } 1.878 + } 1.879 + // commented out because ParsePosition doesn't have error index in 1.1.x 1.880 + // restored for ICU4C port 1.881 + else { 1.882 + int32_t temp = pp2.getErrorIndex() + sub1->getPos() + pp.getIndex(); 1.883 + if (temp> parsePosition.getErrorIndex()) { 1.884 + parsePosition.setErrorIndex(temp); 1.885 + } 1.886 + } 1.887 + } 1.888 + // commented out because ParsePosition doesn't have error index in 1.1.x 1.889 + // restored for ICU4C port 1.890 + else { 1.891 + int32_t temp = sub1->getPos() + pp.getErrorIndex(); 1.892 + if (temp > parsePosition.getErrorIndex()) { 1.893 + parsePosition.setErrorIndex(temp); 1.894 + } 1.895 + } 1.896 + // keep trying to match things until the outer matchToDelimiter() 1.897 + // call fails to make a match (each time, it picks up where it 1.898 + // left off the previous time) 1.899 + } while (sub1->getPos() != sub2->getPos() 1.900 + && pp.getIndex() > 0 1.901 + && pp.getIndex() < workText.length() 1.902 + && pp.getIndex() != start); 1.903 + 1.904 + // update the caller's ParsePosition with our high-water mark 1.905 + // (i.e., it now points at the first character this function 1.906 + // didn't match-- the ParsePosition is therefore unchanged if 1.907 + // we didn't match anything) 1.908 + parsePosition.setIndex(highWaterMark); 1.909 + // commented out because ParsePosition doesn't have error index in 1.1.x 1.910 + // restored for ICU4C port 1.911 + if (highWaterMark > 0) { 1.912 + parsePosition.setErrorIndex(0); 1.913 + } 1.914 + 1.915 + // this is a hack for one unusual condition: Normally, whether this 1.916 + // rule belong to a fraction rule set or not is handled by its 1.917 + // substitutions. But if that rule HAS NO substitutions, then 1.918 + // we have to account for it here. By definition, if the matching 1.919 + // rule in a fraction rule set has no substitutions, its numerator 1.920 + // is 1, and so the result is the reciprocal of its base value. 1.921 + if (isFractionRule && 1.922 + highWaterMark > 0 && 1.923 + sub1->isNullSubstitution()) { 1.924 + result = 1 / result; 1.925 + } 1.926 + 1.927 + resVal.setDouble(result); 1.928 + return TRUE; // ??? do we need to worry if it is a long or a double? 1.929 +} 1.930 + 1.931 +/** 1.932 +* This function is used by parse() to match the text being parsed 1.933 +* against a possible prefix string. This function 1.934 +* matches characters from the beginning of the string being parsed 1.935 +* to characters from the prospective prefix. If they match, pp is 1.936 +* updated to the first character not matched, and the result is 1.937 +* the unparsed part of the string. If they don't match, the whole 1.938 +* string is returned, and pp is left unchanged. 1.939 +* @param text The string being parsed 1.940 +* @param prefix The text to match against 1.941 +* @param pp On entry, ignored and assumed to be 0. On exit, points 1.942 +* to the first unmatched character (assuming the whole prefix matched), 1.943 +* or is unchanged (if the whole prefix didn't match). 1.944 +* @return If things match, this is the unparsed part of "text"; 1.945 +* if they didn't match, this is "text". 1.946 +*/ 1.947 +void 1.948 +NFRule::stripPrefix(UnicodeString& text, const UnicodeString& prefix, ParsePosition& pp) const 1.949 +{ 1.950 + // if the prefix text is empty, dump out without doing anything 1.951 + if (prefix.length() != 0) { 1.952 + UErrorCode status = U_ZERO_ERROR; 1.953 + // use prefixLength() to match the beginning of 1.954 + // "text" against "prefix". This function returns the 1.955 + // number of characters from "text" that matched (or 0 if 1.956 + // we didn't match the whole prefix) 1.957 + int32_t pfl = prefixLength(text, prefix, status); 1.958 + if (U_FAILURE(status)) { // Memory allocation error. 1.959 + return; 1.960 + } 1.961 + if (pfl != 0) { 1.962 + // if we got a successful match, update the parse position 1.963 + // and strip the prefix off of "text" 1.964 + pp.setIndex(pp.getIndex() + pfl); 1.965 + text.remove(0, pfl); 1.966 + } 1.967 + } 1.968 +} 1.969 + 1.970 +/** 1.971 +* Used by parse() to match a substitution and any following text. 1.972 +* "text" is searched for instances of "delimiter". For each instance 1.973 +* of delimiter, the intervening text is tested to see whether it 1.974 +* matches the substitution. The longest match wins. 1.975 +* @param text The string being parsed 1.976 +* @param startPos The position in "text" where we should start looking 1.977 +* for "delimiter". 1.978 +* @param baseValue A partial parse result (often the rule's base value), 1.979 +* which is combined with the result from matching the substitution 1.980 +* @param delimiter The string to search "text" for. 1.981 +* @param pp Ignored and presumed to be 0 on entry. If there's a match, 1.982 +* on exit this will point to the first unmatched character. 1.983 +* @param sub If we find "delimiter" in "text", this substitution is used 1.984 +* to match the text between the beginning of the string and the 1.985 +* position of "delimiter." (If "delimiter" is the empty string, then 1.986 +* this function just matches against this substitution and updates 1.987 +* everything accordingly.) 1.988 +* @param upperBound When matching the substitution, it will only 1.989 +* consider rules with base values lower than this value. 1.990 +* @return If there's a match, this is the result of composing 1.991 +* baseValue with the result of matching the substitution. Otherwise, 1.992 +* this is new Long(0). It's never null. If the result is an integer, 1.993 +* this will be an instance of Long; otherwise, it's an instance of 1.994 +* Double. 1.995 +* 1.996 +* !!! note {dlf} in point of fact, in the java code the caller always converts 1.997 +* the result to a double, so we might as well return one. 1.998 +*/ 1.999 +double 1.1000 +NFRule::matchToDelimiter(const UnicodeString& text, 1.1001 + int32_t startPos, 1.1002 + double _baseValue, 1.1003 + const UnicodeString& delimiter, 1.1004 + ParsePosition& pp, 1.1005 + const NFSubstitution* sub, 1.1006 + double upperBound) const 1.1007 +{ 1.1008 + UErrorCode status = U_ZERO_ERROR; 1.1009 + // if "delimiter" contains real (i.e., non-ignorable) text, search 1.1010 + // it for "delimiter" beginning at "start". If that succeeds, then 1.1011 + // use "sub"'s doParse() method to match the text before the 1.1012 + // instance of "delimiter" we just found. 1.1013 + if (!allIgnorable(delimiter, status)) { 1.1014 + if (U_FAILURE(status)) { //Memory allocation error. 1.1015 + return 0; 1.1016 + } 1.1017 + ParsePosition tempPP; 1.1018 + Formattable result; 1.1019 + 1.1020 + // use findText() to search for "delimiter". It returns a two- 1.1021 + // element array: element 0 is the position of the match, and 1.1022 + // element 1 is the number of characters that matched 1.1023 + // "delimiter". 1.1024 + int32_t dLen; 1.1025 + int32_t dPos = findText(text, delimiter, startPos, &dLen); 1.1026 + 1.1027 + // if findText() succeeded, isolate the text preceding the 1.1028 + // match, and use "sub" to match that text 1.1029 + while (dPos >= 0) { 1.1030 + UnicodeString subText; 1.1031 + subText.setTo(text, 0, dPos); 1.1032 + if (subText.length() > 0) { 1.1033 + UBool success = sub->doParse(subText, tempPP, _baseValue, upperBound, 1.1034 +#if UCONFIG_NO_COLLATION 1.1035 + FALSE, 1.1036 +#else 1.1037 + formatter->isLenient(), 1.1038 +#endif 1.1039 + result); 1.1040 + 1.1041 + // if the substitution could match all the text up to 1.1042 + // where we found "delimiter", then this function has 1.1043 + // a successful match. Bump the caller's parse position 1.1044 + // to point to the first character after the text 1.1045 + // that matches "delimiter", and return the result 1.1046 + // we got from parsing the substitution. 1.1047 + if (success && tempPP.getIndex() == dPos) { 1.1048 + pp.setIndex(dPos + dLen); 1.1049 + return result.getDouble(); 1.1050 + } 1.1051 + // commented out because ParsePosition doesn't have error index in 1.1.x 1.1052 + // restored for ICU4C port 1.1053 + else { 1.1054 + if (tempPP.getErrorIndex() > 0) { 1.1055 + pp.setErrorIndex(tempPP.getErrorIndex()); 1.1056 + } else { 1.1057 + pp.setErrorIndex(tempPP.getIndex()); 1.1058 + } 1.1059 + } 1.1060 + } 1.1061 + 1.1062 + // if we didn't match the substitution, search for another 1.1063 + // copy of "delimiter" in "text" and repeat the loop if 1.1064 + // we find it 1.1065 + tempPP.setIndex(0); 1.1066 + dPos = findText(text, delimiter, dPos + dLen, &dLen); 1.1067 + } 1.1068 + // if we make it here, this was an unsuccessful match, and we 1.1069 + // leave pp unchanged and return 0 1.1070 + pp.setIndex(0); 1.1071 + return 0; 1.1072 + 1.1073 + // if "delimiter" is empty, or consists only of ignorable characters 1.1074 + // (i.e., is semantically empty), thwe we obviously can't search 1.1075 + // for "delimiter". Instead, just use "sub" to parse as much of 1.1076 + // "text" as possible. 1.1077 + } else { 1.1078 + ParsePosition tempPP; 1.1079 + Formattable result; 1.1080 + 1.1081 + // try to match the whole string against the substitution 1.1082 + UBool success = sub->doParse(text, tempPP, _baseValue, upperBound, 1.1083 +#if UCONFIG_NO_COLLATION 1.1084 + FALSE, 1.1085 +#else 1.1086 + formatter->isLenient(), 1.1087 +#endif 1.1088 + result); 1.1089 + if (success && (tempPP.getIndex() != 0 || sub->isNullSubstitution())) { 1.1090 + // if there's a successful match (or it's a null 1.1091 + // substitution), update pp to point to the first 1.1092 + // character we didn't match, and pass the result from 1.1093 + // sub.doParse() on through to the caller 1.1094 + pp.setIndex(tempPP.getIndex()); 1.1095 + return result.getDouble(); 1.1096 + } 1.1097 + // commented out because ParsePosition doesn't have error index in 1.1.x 1.1098 + // restored for ICU4C port 1.1099 + else { 1.1100 + pp.setErrorIndex(tempPP.getErrorIndex()); 1.1101 + } 1.1102 + 1.1103 + // and if we get to here, then nothing matched, so we return 1.1104 + // 0 and leave pp alone 1.1105 + return 0; 1.1106 + } 1.1107 +} 1.1108 + 1.1109 +/** 1.1110 +* Used by stripPrefix() to match characters. If lenient parse mode 1.1111 +* is off, this just calls startsWith(). If lenient parse mode is on, 1.1112 +* this function uses CollationElementIterators to match characters in 1.1113 +* the strings (only primary-order differences are significant in 1.1114 +* determining whether there's a match). 1.1115 +* @param str The string being tested 1.1116 +* @param prefix The text we're hoping to see at the beginning 1.1117 +* of "str" 1.1118 +* @return If "prefix" is found at the beginning of "str", this 1.1119 +* is the number of characters in "str" that were matched (this 1.1120 +* isn't necessarily the same as the length of "prefix" when matching 1.1121 +* text with a collator). If there's no match, this is 0. 1.1122 +*/ 1.1123 +int32_t 1.1124 +NFRule::prefixLength(const UnicodeString& str, const UnicodeString& prefix, UErrorCode& status) const 1.1125 +{ 1.1126 + // if we're looking for an empty prefix, it obviously matches 1.1127 + // zero characters. Just go ahead and return 0. 1.1128 + if (prefix.length() == 0) { 1.1129 + return 0; 1.1130 + } 1.1131 + 1.1132 +#if !UCONFIG_NO_COLLATION 1.1133 + // go through all this grief if we're in lenient-parse mode 1.1134 + if (formatter->isLenient()) { 1.1135 + // get the formatter's collator and use it to create two 1.1136 + // collation element iterators, one over the target string 1.1137 + // and another over the prefix (right now, we'll throw an 1.1138 + // exception if the collator we get back from the formatter 1.1139 + // isn't a RuleBasedCollator, because RuleBasedCollator defines 1.1140 + // the CollationElementIterator protocol. Hopefully, this 1.1141 + // will change someday.) 1.1142 + RuleBasedCollator* collator = (RuleBasedCollator*)formatter->getCollator(); 1.1143 + CollationElementIterator* strIter = collator->createCollationElementIterator(str); 1.1144 + CollationElementIterator* prefixIter = collator->createCollationElementIterator(prefix); 1.1145 + // Check for memory allocation error. 1.1146 + if (collator == NULL || strIter == NULL || prefixIter == NULL) { 1.1147 + delete collator; 1.1148 + delete strIter; 1.1149 + delete prefixIter; 1.1150 + status = U_MEMORY_ALLOCATION_ERROR; 1.1151 + return 0; 1.1152 + } 1.1153 + 1.1154 + UErrorCode err = U_ZERO_ERROR; 1.1155 + 1.1156 + // The original code was problematic. Consider this match: 1.1157 + // prefix = "fifty-" 1.1158 + // string = " fifty-7" 1.1159 + // The intent is to match string up to the '7', by matching 'fifty-' at position 1 1.1160 + // in the string. Unfortunately, we were getting a match, and then computing where 1.1161 + // the match terminated by rematching the string. The rematch code was using as an 1.1162 + // initial guess the substring of string between 0 and prefix.length. Because of 1.1163 + // the leading space and trailing hyphen (both ignorable) this was succeeding, leaving 1.1164 + // the position before the hyphen in the string. Recursing down, we then parsed the 1.1165 + // remaining string '-7' as numeric. The resulting number turned out as 43 (50 - 7). 1.1166 + // This was not pretty, especially since the string "fifty-7" parsed just fine. 1.1167 + // 1.1168 + // We have newer APIs now, so we can use calls on the iterator to determine what we 1.1169 + // matched up to. If we terminate because we hit the last element in the string, 1.1170 + // our match terminates at this length. If we terminate because we hit the last element 1.1171 + // in the target, our match terminates at one before the element iterator position. 1.1172 + 1.1173 + // match collation elements between the strings 1.1174 + int32_t oStr = strIter->next(err); 1.1175 + int32_t oPrefix = prefixIter->next(err); 1.1176 + 1.1177 + while (oPrefix != CollationElementIterator::NULLORDER) { 1.1178 + // skip over ignorable characters in the target string 1.1179 + while (CollationElementIterator::primaryOrder(oStr) == 0 1.1180 + && oStr != CollationElementIterator::NULLORDER) { 1.1181 + oStr = strIter->next(err); 1.1182 + } 1.1183 + 1.1184 + // skip over ignorable characters in the prefix 1.1185 + while (CollationElementIterator::primaryOrder(oPrefix) == 0 1.1186 + && oPrefix != CollationElementIterator::NULLORDER) { 1.1187 + oPrefix = prefixIter->next(err); 1.1188 + } 1.1189 + 1.1190 + // dlf: move this above following test, if we consume the 1.1191 + // entire target, aren't we ok even if the source was also 1.1192 + // entirely consumed? 1.1193 + 1.1194 + // if skipping over ignorables brought to the end of 1.1195 + // the prefix, we DID match: drop out of the loop 1.1196 + if (oPrefix == CollationElementIterator::NULLORDER) { 1.1197 + break; 1.1198 + } 1.1199 + 1.1200 + // if skipping over ignorables brought us to the end 1.1201 + // of the target string, we didn't match and return 0 1.1202 + if (oStr == CollationElementIterator::NULLORDER) { 1.1203 + delete prefixIter; 1.1204 + delete strIter; 1.1205 + return 0; 1.1206 + } 1.1207 + 1.1208 + // match collation elements from the two strings 1.1209 + // (considering only primary differences). If we 1.1210 + // get a mismatch, dump out and return 0 1.1211 + if (CollationElementIterator::primaryOrder(oStr) 1.1212 + != CollationElementIterator::primaryOrder(oPrefix)) { 1.1213 + delete prefixIter; 1.1214 + delete strIter; 1.1215 + return 0; 1.1216 + 1.1217 + // otherwise, advance to the next character in each string 1.1218 + // and loop (we drop out of the loop when we exhaust 1.1219 + // collation elements in the prefix) 1.1220 + } else { 1.1221 + oStr = strIter->next(err); 1.1222 + oPrefix = prefixIter->next(err); 1.1223 + } 1.1224 + } 1.1225 + 1.1226 + int32_t result = strIter->getOffset(); 1.1227 + if (oStr != CollationElementIterator::NULLORDER) { 1.1228 + --result; // back over character that we don't want to consume; 1.1229 + } 1.1230 + 1.1231 +#ifdef RBNF_DEBUG 1.1232 + fprintf(stderr, "prefix length: %d\n", result); 1.1233 +#endif 1.1234 + delete prefixIter; 1.1235 + delete strIter; 1.1236 + 1.1237 + return result; 1.1238 +#if 0 1.1239 + //---------------------------------------------------------------- 1.1240 + // JDK 1.2-specific API call 1.1241 + // return strIter.getOffset(); 1.1242 + //---------------------------------------------------------------- 1.1243 + // JDK 1.1 HACK (take out for 1.2-specific code) 1.1244 + 1.1245 + // if we make it to here, we have a successful match. Now we 1.1246 + // have to find out HOW MANY characters from the target string 1.1247 + // matched the prefix (there isn't necessarily a one-to-one 1.1248 + // mapping between collation elements and characters). 1.1249 + // In JDK 1.2, there's a simple getOffset() call we can use. 1.1250 + // In JDK 1.1, on the other hand, we have to go through some 1.1251 + // ugly contortions. First, use the collator to compare the 1.1252 + // same number of characters from the prefix and target string. 1.1253 + // If they're equal, we're done. 1.1254 + collator->setStrength(Collator::PRIMARY); 1.1255 + if (str.length() >= prefix.length()) { 1.1256 + UnicodeString temp; 1.1257 + temp.setTo(str, 0, prefix.length()); 1.1258 + if (collator->equals(temp, prefix)) { 1.1259 +#ifdef RBNF_DEBUG 1.1260 + fprintf(stderr, "returning: %d\n", prefix.length()); 1.1261 +#endif 1.1262 + return prefix.length(); 1.1263 + } 1.1264 + } 1.1265 + 1.1266 + // if they're not equal, then we have to compare successively 1.1267 + // larger and larger substrings of the target string until we 1.1268 + // get to one that matches the prefix. At that point, we know 1.1269 + // how many characters matched the prefix, and we can return. 1.1270 + int32_t p = 1; 1.1271 + while (p <= str.length()) { 1.1272 + UnicodeString temp; 1.1273 + temp.setTo(str, 0, p); 1.1274 + if (collator->equals(temp, prefix)) { 1.1275 + return p; 1.1276 + } else { 1.1277 + ++p; 1.1278 + } 1.1279 + } 1.1280 + 1.1281 + // SHOULD NEVER GET HERE!!! 1.1282 + return 0; 1.1283 + //---------------------------------------------------------------- 1.1284 +#endif 1.1285 + 1.1286 + // If lenient parsing is turned off, forget all that crap above. 1.1287 + // Just use String.startsWith() and be done with it. 1.1288 + } else 1.1289 +#endif 1.1290 + { 1.1291 + if (str.startsWith(prefix)) { 1.1292 + return prefix.length(); 1.1293 + } else { 1.1294 + return 0; 1.1295 + } 1.1296 + } 1.1297 +} 1.1298 + 1.1299 +/** 1.1300 +* Searches a string for another string. If lenient parsing is off, 1.1301 +* this just calls indexOf(). If lenient parsing is on, this function 1.1302 +* uses CollationElementIterator to match characters, and only 1.1303 +* primary-order differences are significant in determining whether 1.1304 +* there's a match. 1.1305 +* @param str The string to search 1.1306 +* @param key The string to search "str" for 1.1307 +* @param startingAt The index into "str" where the search is to 1.1308 +* begin 1.1309 +* @return A two-element array of ints. Element 0 is the position 1.1310 +* of the match, or -1 if there was no match. Element 1 is the 1.1311 +* number of characters in "str" that matched (which isn't necessarily 1.1312 +* the same as the length of "key") 1.1313 +*/ 1.1314 +int32_t 1.1315 +NFRule::findText(const UnicodeString& str, 1.1316 + const UnicodeString& key, 1.1317 + int32_t startingAt, 1.1318 + int32_t* length) const 1.1319 +{ 1.1320 +#if !UCONFIG_NO_COLLATION 1.1321 + // if lenient parsing is turned off, this is easy: just call 1.1322 + // String.indexOf() and we're done 1.1323 + if (!formatter->isLenient()) { 1.1324 + *length = key.length(); 1.1325 + return str.indexOf(key, startingAt); 1.1326 + 1.1327 + // but if lenient parsing is turned ON, we've got some work 1.1328 + // ahead of us 1.1329 + } else 1.1330 +#endif 1.1331 + { 1.1332 + //---------------------------------------------------------------- 1.1333 + // JDK 1.1 HACK (take out of 1.2-specific code) 1.1334 + 1.1335 + // in JDK 1.2, CollationElementIterator provides us with an 1.1336 + // API to map between character offsets and collation elements 1.1337 + // and we can do this by marching through the string comparing 1.1338 + // collation elements. We can't do that in JDK 1.1. Insted, 1.1339 + // we have to go through this horrible slow mess: 1.1340 + int32_t p = startingAt; 1.1341 + int32_t keyLen = 0; 1.1342 + 1.1343 + // basically just isolate smaller and smaller substrings of 1.1344 + // the target string (each running to the end of the string, 1.1345 + // and with the first one running from startingAt to the end) 1.1346 + // and then use prefixLength() to see if the search key is at 1.1347 + // the beginning of each substring. This is excruciatingly 1.1348 + // slow, but it will locate the key and tell use how long the 1.1349 + // matching text was. 1.1350 + UnicodeString temp; 1.1351 + UErrorCode status = U_ZERO_ERROR; 1.1352 + while (p < str.length() && keyLen == 0) { 1.1353 + temp.setTo(str, p, str.length() - p); 1.1354 + keyLen = prefixLength(temp, key, status); 1.1355 + if (U_FAILURE(status)) { 1.1356 + break; 1.1357 + } 1.1358 + if (keyLen != 0) { 1.1359 + *length = keyLen; 1.1360 + return p; 1.1361 + } 1.1362 + ++p; 1.1363 + } 1.1364 + // if we make it to here, we didn't find it. Return -1 for the 1.1365 + // location. The length should be ignored, but set it to 0, 1.1366 + // which should be "safe" 1.1367 + *length = 0; 1.1368 + return -1; 1.1369 + 1.1370 + //---------------------------------------------------------------- 1.1371 + // JDK 1.2 version of this routine 1.1372 + //RuleBasedCollator collator = (RuleBasedCollator)formatter.getCollator(); 1.1373 + // 1.1374 + //CollationElementIterator strIter = collator.getCollationElementIterator(str); 1.1375 + //CollationElementIterator keyIter = collator.getCollationElementIterator(key); 1.1376 + // 1.1377 + //int keyStart = -1; 1.1378 + // 1.1379 + //str.setOffset(startingAt); 1.1380 + // 1.1381 + //int oStr = strIter.next(); 1.1382 + //int oKey = keyIter.next(); 1.1383 + //while (oKey != CollationElementIterator.NULLORDER) { 1.1384 + // while (oStr != CollationElementIterator.NULLORDER && 1.1385 + // CollationElementIterator.primaryOrder(oStr) == 0) 1.1386 + // oStr = strIter.next(); 1.1387 + // 1.1388 + // while (oKey != CollationElementIterator.NULLORDER && 1.1389 + // CollationElementIterator.primaryOrder(oKey) == 0) 1.1390 + // oKey = keyIter.next(); 1.1391 + // 1.1392 + // if (oStr == CollationElementIterator.NULLORDER) { 1.1393 + // return new int[] { -1, 0 }; 1.1394 + // } 1.1395 + // 1.1396 + // if (oKey == CollationElementIterator.NULLORDER) { 1.1397 + // break; 1.1398 + // } 1.1399 + // 1.1400 + // if (CollationElementIterator.primaryOrder(oStr) == 1.1401 + // CollationElementIterator.primaryOrder(oKey)) { 1.1402 + // keyStart = strIter.getOffset(); 1.1403 + // oStr = strIter.next(); 1.1404 + // oKey = keyIter.next(); 1.1405 + // } else { 1.1406 + // if (keyStart != -1) { 1.1407 + // keyStart = -1; 1.1408 + // keyIter.reset(); 1.1409 + // } else { 1.1410 + // oStr = strIter.next(); 1.1411 + // } 1.1412 + // } 1.1413 + //} 1.1414 + // 1.1415 + //if (oKey == CollationElementIterator.NULLORDER) { 1.1416 + // return new int[] { keyStart, strIter.getOffset() - keyStart }; 1.1417 + //} else { 1.1418 + // return new int[] { -1, 0 }; 1.1419 + //} 1.1420 + } 1.1421 +} 1.1422 + 1.1423 +/** 1.1424 +* Checks to see whether a string consists entirely of ignorable 1.1425 +* characters. 1.1426 +* @param str The string to test. 1.1427 +* @return true if the string is empty of consists entirely of 1.1428 +* characters that the number formatter's collator says are 1.1429 +* ignorable at the primary-order level. false otherwise. 1.1430 +*/ 1.1431 +UBool 1.1432 +NFRule::allIgnorable(const UnicodeString& str, UErrorCode& status) const 1.1433 +{ 1.1434 + // if the string is empty, we can just return true 1.1435 + if (str.length() == 0) { 1.1436 + return TRUE; 1.1437 + } 1.1438 + 1.1439 +#if !UCONFIG_NO_COLLATION 1.1440 + // if lenient parsing is turned on, walk through the string with 1.1441 + // a collation element iterator and make sure each collation 1.1442 + // element is 0 (ignorable) at the primary level 1.1443 + if (formatter->isLenient()) { 1.1444 + RuleBasedCollator* collator = (RuleBasedCollator*)(formatter->getCollator()); 1.1445 + CollationElementIterator* iter = collator->createCollationElementIterator(str); 1.1446 + 1.1447 + // Memory allocation error check. 1.1448 + if (collator == NULL || iter == NULL) { 1.1449 + delete collator; 1.1450 + delete iter; 1.1451 + status = U_MEMORY_ALLOCATION_ERROR; 1.1452 + return FALSE; 1.1453 + } 1.1454 + 1.1455 + UErrorCode err = U_ZERO_ERROR; 1.1456 + int32_t o = iter->next(err); 1.1457 + while (o != CollationElementIterator::NULLORDER 1.1458 + && CollationElementIterator::primaryOrder(o) == 0) { 1.1459 + o = iter->next(err); 1.1460 + } 1.1461 + 1.1462 + delete iter; 1.1463 + return o == CollationElementIterator::NULLORDER; 1.1464 + } 1.1465 +#endif 1.1466 + 1.1467 + // if lenient parsing is turned off, there is no such thing as 1.1468 + // an ignorable character: return true only if the string is empty 1.1469 + return FALSE; 1.1470 +} 1.1471 + 1.1472 +U_NAMESPACE_END 1.1473 + 1.1474 +/* U_HAVE_RBNF */ 1.1475 +#endif 1.1476 + 1.1477 +