|
1 /* |
|
2 ****************************************************************************** |
|
3 * Copyright (C) 1997-2012, International Business Machines |
|
4 * Corporation and others. All Rights Reserved. |
|
5 ****************************************************************************** |
|
6 * file name: nfrs.cpp |
|
7 * encoding: US-ASCII |
|
8 * tab size: 8 (not used) |
|
9 * indentation:4 |
|
10 * |
|
11 * Modification history |
|
12 * Date Name Comments |
|
13 * 10/11/2001 Doug Ported from ICU4J |
|
14 */ |
|
15 |
|
16 #include "nfrs.h" |
|
17 |
|
18 #if U_HAVE_RBNF |
|
19 |
|
20 #include "unicode/uchar.h" |
|
21 #include "nfrule.h" |
|
22 #include "nfrlist.h" |
|
23 #include "patternprops.h" |
|
24 |
|
25 #ifdef RBNF_DEBUG |
|
26 #include "cmemory.h" |
|
27 #endif |
|
28 |
|
29 U_NAMESPACE_BEGIN |
|
30 |
|
31 #if 0 |
|
32 // euclid's algorithm works with doubles |
|
33 // note, doubles only get us up to one quadrillion or so, which |
|
34 // isn't as much range as we get with longs. We probably still |
|
35 // want either 64-bit math, or BigInteger. |
|
36 |
|
37 static int64_t |
|
38 util_lcm(int64_t x, int64_t y) |
|
39 { |
|
40 x.abs(); |
|
41 y.abs(); |
|
42 |
|
43 if (x == 0 || y == 0) { |
|
44 return 0; |
|
45 } else { |
|
46 do { |
|
47 if (x < y) { |
|
48 int64_t t = x; x = y; y = t; |
|
49 } |
|
50 x -= y * (x/y); |
|
51 } while (x != 0); |
|
52 |
|
53 return y; |
|
54 } |
|
55 } |
|
56 |
|
57 #else |
|
58 /** |
|
59 * Calculates the least common multiple of x and y. |
|
60 */ |
|
61 static int64_t |
|
62 util_lcm(int64_t x, int64_t y) |
|
63 { |
|
64 // binary gcd algorithm from Knuth, "The Art of Computer Programming," |
|
65 // vol. 2, 1st ed., pp. 298-299 |
|
66 int64_t x1 = x; |
|
67 int64_t y1 = y; |
|
68 |
|
69 int p2 = 0; |
|
70 while ((x1 & 1) == 0 && (y1 & 1) == 0) { |
|
71 ++p2; |
|
72 x1 >>= 1; |
|
73 y1 >>= 1; |
|
74 } |
|
75 |
|
76 int64_t t; |
|
77 if ((x1 & 1) == 1) { |
|
78 t = -y1; |
|
79 } else { |
|
80 t = x1; |
|
81 } |
|
82 |
|
83 while (t != 0) { |
|
84 while ((t & 1) == 0) { |
|
85 t = t >> 1; |
|
86 } |
|
87 if (t > 0) { |
|
88 x1 = t; |
|
89 } else { |
|
90 y1 = -t; |
|
91 } |
|
92 t = x1 - y1; |
|
93 } |
|
94 |
|
95 int64_t gcd = x1 << p2; |
|
96 |
|
97 // x * y == gcd(x, y) * lcm(x, y) |
|
98 return x / gcd * y; |
|
99 } |
|
100 #endif |
|
101 |
|
102 static const UChar gPercent = 0x0025; |
|
103 static const UChar gColon = 0x003a; |
|
104 static const UChar gSemicolon = 0x003b; |
|
105 static const UChar gLineFeed = 0x000a; |
|
106 |
|
107 static const UChar gFourSpaces[] = |
|
108 { |
|
109 0x20, 0x20, 0x20, 0x20, 0 |
|
110 }; /* " " */ |
|
111 static const UChar gPercentPercent[] = |
|
112 { |
|
113 0x25, 0x25, 0 |
|
114 }; /* "%%" */ |
|
115 |
|
116 static const UChar gNoparse[] = |
|
117 { |
|
118 0x40, 0x6E, 0x6F, 0x70, 0x61, 0x72, 0x73, 0x65, 0 |
|
119 }; /* "@noparse" */ |
|
120 |
|
121 NFRuleSet::NFRuleSet(UnicodeString* descriptions, int32_t index, UErrorCode& status) |
|
122 : name() |
|
123 , rules(0) |
|
124 , negativeNumberRule(NULL) |
|
125 , fIsFractionRuleSet(FALSE) |
|
126 , fIsPublic(FALSE) |
|
127 , fIsParseable(TRUE) |
|
128 , fRecursionCount(0) |
|
129 { |
|
130 for (int i = 0; i < 3; ++i) { |
|
131 fractionRules[i] = NULL; |
|
132 } |
|
133 |
|
134 if (U_FAILURE(status)) { |
|
135 return; |
|
136 } |
|
137 |
|
138 UnicodeString& description = descriptions[index]; // !!! make sure index is valid |
|
139 |
|
140 if (description.length() == 0) { |
|
141 // throw new IllegalArgumentException("Empty rule set description"); |
|
142 status = U_PARSE_ERROR; |
|
143 return; |
|
144 } |
|
145 |
|
146 // if the description begins with a rule set name (the rule set |
|
147 // name can be omitted in formatter descriptions that consist |
|
148 // of only one rule set), copy it out into our "name" member |
|
149 // and delete it from the description |
|
150 if (description.charAt(0) == gPercent) { |
|
151 int32_t pos = description.indexOf(gColon); |
|
152 if (pos == -1) { |
|
153 // throw new IllegalArgumentException("Rule set name doesn't end in colon"); |
|
154 status = U_PARSE_ERROR; |
|
155 } else { |
|
156 name.setTo(description, 0, pos); |
|
157 while (pos < description.length() && PatternProps::isWhiteSpace(description.charAt(++pos))) { |
|
158 } |
|
159 description.remove(0, pos); |
|
160 } |
|
161 } else { |
|
162 name.setTo(UNICODE_STRING_SIMPLE("%default")); |
|
163 } |
|
164 |
|
165 if (description.length() == 0) { |
|
166 // throw new IllegalArgumentException("Empty rule set description"); |
|
167 status = U_PARSE_ERROR; |
|
168 } |
|
169 |
|
170 fIsPublic = name.indexOf(gPercentPercent, 2, 0) != 0; |
|
171 |
|
172 if ( name.endsWith(gNoparse,8) ) { |
|
173 fIsParseable = FALSE; |
|
174 name.truncate(name.length()-8); // remove the @noparse from the name |
|
175 } |
|
176 |
|
177 // all of the other members of NFRuleSet are initialized |
|
178 // by parseRules() |
|
179 } |
|
180 |
|
181 void |
|
182 NFRuleSet::parseRules(UnicodeString& description, const RuleBasedNumberFormat* owner, UErrorCode& status) |
|
183 { |
|
184 // start by creating a Vector whose elements are Strings containing |
|
185 // the descriptions of the rules (one rule per element). The rules |
|
186 // are separated by semicolons (there's no escape facility: ALL |
|
187 // semicolons are rule delimiters) |
|
188 |
|
189 if (U_FAILURE(status)) { |
|
190 return; |
|
191 } |
|
192 |
|
193 // ensure we are starting with an empty rule list |
|
194 rules.deleteAll(); |
|
195 |
|
196 // dlf - the original code kept a separate description array for no reason, |
|
197 // so I got rid of it. The loop was too complex so I simplified it. |
|
198 |
|
199 UnicodeString currentDescription; |
|
200 int32_t oldP = 0; |
|
201 while (oldP < description.length()) { |
|
202 int32_t p = description.indexOf(gSemicolon, oldP); |
|
203 if (p == -1) { |
|
204 p = description.length(); |
|
205 } |
|
206 currentDescription.setTo(description, oldP, p - oldP); |
|
207 NFRule::makeRules(currentDescription, this, rules.last(), owner, rules, status); |
|
208 oldP = p + 1; |
|
209 } |
|
210 |
|
211 // for rules that didn't specify a base value, their base values |
|
212 // were initialized to 0. Make another pass through the list and |
|
213 // set all those rules' base values. We also remove any special |
|
214 // rules from the list and put them into their own member variables |
|
215 int64_t defaultBaseValue = 0; |
|
216 |
|
217 // (this isn't a for loop because we might be deleting items from |
|
218 // the vector-- we want to make sure we only increment i when |
|
219 // we _didn't_ delete aything from the vector) |
|
220 uint32_t i = 0; |
|
221 while (i < rules.size()) { |
|
222 NFRule* rule = rules[i]; |
|
223 |
|
224 switch (rule->getType()) { |
|
225 // if the rule's base value is 0, fill in a default |
|
226 // base value (this will be 1 plus the preceding |
|
227 // rule's base value for regular rule sets, and the |
|
228 // same as the preceding rule's base value in fraction |
|
229 // rule sets) |
|
230 case NFRule::kNoBase: |
|
231 rule->setBaseValue(defaultBaseValue, status); |
|
232 if (!isFractionRuleSet()) { |
|
233 ++defaultBaseValue; |
|
234 } |
|
235 ++i; |
|
236 break; |
|
237 |
|
238 // if it's the negative-number rule, copy it into its own |
|
239 // data member and delete it from the list |
|
240 case NFRule::kNegativeNumberRule: |
|
241 if (negativeNumberRule) { |
|
242 delete negativeNumberRule; |
|
243 } |
|
244 negativeNumberRule = rules.remove(i); |
|
245 break; |
|
246 |
|
247 // if it's the improper fraction rule, copy it into the |
|
248 // correct element of fractionRules |
|
249 case NFRule::kImproperFractionRule: |
|
250 if (fractionRules[0]) { |
|
251 delete fractionRules[0]; |
|
252 } |
|
253 fractionRules[0] = rules.remove(i); |
|
254 break; |
|
255 |
|
256 // if it's the proper fraction rule, copy it into the |
|
257 // correct element of fractionRules |
|
258 case NFRule::kProperFractionRule: |
|
259 if (fractionRules[1]) { |
|
260 delete fractionRules[1]; |
|
261 } |
|
262 fractionRules[1] = rules.remove(i); |
|
263 break; |
|
264 |
|
265 // if it's the master rule, copy it into the |
|
266 // correct element of fractionRules |
|
267 case NFRule::kMasterRule: |
|
268 if (fractionRules[2]) { |
|
269 delete fractionRules[2]; |
|
270 } |
|
271 fractionRules[2] = rules.remove(i); |
|
272 break; |
|
273 |
|
274 // if it's a regular rule that already knows its base value, |
|
275 // check to make sure the rules are in order, and update |
|
276 // the default base value for the next rule |
|
277 default: |
|
278 if (rule->getBaseValue() < defaultBaseValue) { |
|
279 // throw new IllegalArgumentException("Rules are not in order"); |
|
280 status = U_PARSE_ERROR; |
|
281 return; |
|
282 } |
|
283 defaultBaseValue = rule->getBaseValue(); |
|
284 if (!isFractionRuleSet()) { |
|
285 ++defaultBaseValue; |
|
286 } |
|
287 ++i; |
|
288 break; |
|
289 } |
|
290 } |
|
291 } |
|
292 |
|
293 NFRuleSet::~NFRuleSet() |
|
294 { |
|
295 delete negativeNumberRule; |
|
296 delete fractionRules[0]; |
|
297 delete fractionRules[1]; |
|
298 delete fractionRules[2]; |
|
299 } |
|
300 |
|
301 static UBool |
|
302 util_equalRules(const NFRule* rule1, const NFRule* rule2) |
|
303 { |
|
304 if (rule1) { |
|
305 if (rule2) { |
|
306 return *rule1 == *rule2; |
|
307 } |
|
308 } else if (!rule2) { |
|
309 return TRUE; |
|
310 } |
|
311 return FALSE; |
|
312 } |
|
313 |
|
314 UBool |
|
315 NFRuleSet::operator==(const NFRuleSet& rhs) const |
|
316 { |
|
317 if (rules.size() == rhs.rules.size() && |
|
318 fIsFractionRuleSet == rhs.fIsFractionRuleSet && |
|
319 name == rhs.name && |
|
320 util_equalRules(negativeNumberRule, rhs.negativeNumberRule) && |
|
321 util_equalRules(fractionRules[0], rhs.fractionRules[0]) && |
|
322 util_equalRules(fractionRules[1], rhs.fractionRules[1]) && |
|
323 util_equalRules(fractionRules[2], rhs.fractionRules[2])) { |
|
324 |
|
325 for (uint32_t i = 0; i < rules.size(); ++i) { |
|
326 if (*rules[i] != *rhs.rules[i]) { |
|
327 return FALSE; |
|
328 } |
|
329 } |
|
330 return TRUE; |
|
331 } |
|
332 return FALSE; |
|
333 } |
|
334 |
|
335 #define RECURSION_LIMIT 50 |
|
336 |
|
337 void |
|
338 NFRuleSet::format(int64_t number, UnicodeString& toAppendTo, int32_t pos) const |
|
339 { |
|
340 NFRule *rule = findNormalRule(number); |
|
341 if (rule) { // else error, but can't report it |
|
342 NFRuleSet* ncThis = (NFRuleSet*)this; |
|
343 if (ncThis->fRecursionCount++ >= RECURSION_LIMIT) { |
|
344 // stop recursion |
|
345 ncThis->fRecursionCount = 0; |
|
346 } else { |
|
347 rule->doFormat(number, toAppendTo, pos); |
|
348 ncThis->fRecursionCount--; |
|
349 } |
|
350 } |
|
351 } |
|
352 |
|
353 void |
|
354 NFRuleSet::format(double number, UnicodeString& toAppendTo, int32_t pos) const |
|
355 { |
|
356 NFRule *rule = findDoubleRule(number); |
|
357 if (rule) { // else error, but can't report it |
|
358 NFRuleSet* ncThis = (NFRuleSet*)this; |
|
359 if (ncThis->fRecursionCount++ >= RECURSION_LIMIT) { |
|
360 // stop recursion |
|
361 ncThis->fRecursionCount = 0; |
|
362 } else { |
|
363 rule->doFormat(number, toAppendTo, pos); |
|
364 ncThis->fRecursionCount--; |
|
365 } |
|
366 } |
|
367 } |
|
368 |
|
369 NFRule* |
|
370 NFRuleSet::findDoubleRule(double number) const |
|
371 { |
|
372 // if this is a fraction rule set, use findFractionRuleSetRule() |
|
373 if (isFractionRuleSet()) { |
|
374 return findFractionRuleSetRule(number); |
|
375 } |
|
376 |
|
377 // if the number is negative, return the negative number rule |
|
378 // (if there isn't a negative-number rule, we pretend it's a |
|
379 // positive number) |
|
380 if (number < 0) { |
|
381 if (negativeNumberRule) { |
|
382 return negativeNumberRule; |
|
383 } else { |
|
384 number = -number; |
|
385 } |
|
386 } |
|
387 |
|
388 // if the number isn't an integer, we use one of the fraction rules... |
|
389 if (number != uprv_floor(number)) { |
|
390 // if the number is between 0 and 1, return the proper |
|
391 // fraction rule |
|
392 if (number < 1 && fractionRules[1]) { |
|
393 return fractionRules[1]; |
|
394 } |
|
395 // otherwise, return the improper fraction rule |
|
396 else if (fractionRules[0]) { |
|
397 return fractionRules[0]; |
|
398 } |
|
399 } |
|
400 |
|
401 // if there's a master rule, use it to format the number |
|
402 if (fractionRules[2]) { |
|
403 return fractionRules[2]; |
|
404 } |
|
405 |
|
406 // and if we haven't yet returned a rule, use findNormalRule() |
|
407 // to find the applicable rule |
|
408 int64_t r = util64_fromDouble(number + 0.5); |
|
409 return findNormalRule(r); |
|
410 } |
|
411 |
|
412 NFRule * |
|
413 NFRuleSet::findNormalRule(int64_t number) const |
|
414 { |
|
415 // if this is a fraction rule set, use findFractionRuleSetRule() |
|
416 // to find the rule (we should only go into this clause if the |
|
417 // value is 0) |
|
418 if (fIsFractionRuleSet) { |
|
419 return findFractionRuleSetRule((double)number); |
|
420 } |
|
421 |
|
422 // if the number is negative, return the negative-number rule |
|
423 // (if there isn't one, pretend the number is positive) |
|
424 if (number < 0) { |
|
425 if (negativeNumberRule) { |
|
426 return negativeNumberRule; |
|
427 } else { |
|
428 number = -number; |
|
429 } |
|
430 } |
|
431 |
|
432 // we have to repeat the preceding two checks, even though we |
|
433 // do them in findRule(), because the version of format() that |
|
434 // takes a long bypasses findRule() and goes straight to this |
|
435 // function. This function does skip the fraction rules since |
|
436 // we know the value is an integer (it also skips the master |
|
437 // rule, since it's considered a fraction rule. Skipping the |
|
438 // master rule in this function is also how we avoid infinite |
|
439 // recursion) |
|
440 |
|
441 // {dlf} unfortunately this fails if there are no rules except |
|
442 // special rules. If there are no rules, use the master rule. |
|
443 |
|
444 // binary-search the rule list for the applicable rule |
|
445 // (a rule is used for all values from its base value to |
|
446 // the next rule's base value) |
|
447 int32_t hi = rules.size(); |
|
448 if (hi > 0) { |
|
449 int32_t lo = 0; |
|
450 |
|
451 while (lo < hi) { |
|
452 int32_t mid = (lo + hi) / 2; |
|
453 if (rules[mid]->getBaseValue() == number) { |
|
454 return rules[mid]; |
|
455 } |
|
456 else if (rules[mid]->getBaseValue() > number) { |
|
457 hi = mid; |
|
458 } |
|
459 else { |
|
460 lo = mid + 1; |
|
461 } |
|
462 } |
|
463 if (hi == 0) { // bad rule set, minimum base > 0 |
|
464 return NULL; // want to throw exception here |
|
465 } |
|
466 |
|
467 NFRule *result = rules[hi - 1]; |
|
468 |
|
469 // use shouldRollBack() to see whether we need to invoke the |
|
470 // rollback rule (see shouldRollBack()'s documentation for |
|
471 // an explanation of the rollback rule). If we do, roll back |
|
472 // one rule and return that one instead of the one we'd normally |
|
473 // return |
|
474 if (result->shouldRollBack((double)number)) { |
|
475 if (hi == 1) { // bad rule set, no prior rule to rollback to from this base |
|
476 return NULL; |
|
477 } |
|
478 result = rules[hi - 2]; |
|
479 } |
|
480 return result; |
|
481 } |
|
482 // else use the master rule |
|
483 return fractionRules[2]; |
|
484 } |
|
485 |
|
486 /** |
|
487 * If this rule is a fraction rule set, this function is used by |
|
488 * findRule() to select the most appropriate rule for formatting |
|
489 * the number. Basically, the base value of each rule in the rule |
|
490 * set is treated as the denominator of a fraction. Whichever |
|
491 * denominator can produce the fraction closest in value to the |
|
492 * number passed in is the result. If there's a tie, the earlier |
|
493 * one in the list wins. (If there are two rules in a row with the |
|
494 * same base value, the first one is used when the numerator of the |
|
495 * fraction would be 1, and the second rule is used the rest of the |
|
496 * time. |
|
497 * @param number The number being formatted (which will always be |
|
498 * a number between 0 and 1) |
|
499 * @return The rule to use to format this number |
|
500 */ |
|
501 NFRule* |
|
502 NFRuleSet::findFractionRuleSetRule(double number) const |
|
503 { |
|
504 // the obvious way to do this (multiply the value being formatted |
|
505 // by each rule's base value until you get an integral result) |
|
506 // doesn't work because of rounding error. This method is more |
|
507 // accurate |
|
508 |
|
509 // find the least common multiple of the rules' base values |
|
510 // and multiply this by the number being formatted. This is |
|
511 // all the precision we need, and we can do all of the rest |
|
512 // of the math using integer arithmetic |
|
513 int64_t leastCommonMultiple = rules[0]->getBaseValue(); |
|
514 int64_t numerator; |
|
515 { |
|
516 for (uint32_t i = 1; i < rules.size(); ++i) { |
|
517 leastCommonMultiple = util_lcm(leastCommonMultiple, rules[i]->getBaseValue()); |
|
518 } |
|
519 numerator = util64_fromDouble(number * (double)leastCommonMultiple + 0.5); |
|
520 } |
|
521 // for each rule, do the following... |
|
522 int64_t tempDifference; |
|
523 int64_t difference = util64_fromDouble(uprv_maxMantissa()); |
|
524 int32_t winner = 0; |
|
525 for (uint32_t i = 0; i < rules.size(); ++i) { |
|
526 // "numerator" is the numerator of the fraction if the |
|
527 // denominator is the LCD. The numerator if the rule's |
|
528 // base value is the denominator is "numerator" times the |
|
529 // base value divided bythe LCD. Here we check to see if |
|
530 // that's an integer, and if not, how close it is to being |
|
531 // an integer. |
|
532 tempDifference = numerator * rules[i]->getBaseValue() % leastCommonMultiple; |
|
533 |
|
534 |
|
535 // normalize the result of the above calculation: we want |
|
536 // the numerator's distance from the CLOSEST multiple |
|
537 // of the LCD |
|
538 if (leastCommonMultiple - tempDifference < tempDifference) { |
|
539 tempDifference = leastCommonMultiple - tempDifference; |
|
540 } |
|
541 |
|
542 // if this is as close as we've come, keep track of how close |
|
543 // that is, and the line number of the rule that did it. If |
|
544 // we've scored a direct hit, we don't have to look at any more |
|
545 // rules |
|
546 if (tempDifference < difference) { |
|
547 difference = tempDifference; |
|
548 winner = i; |
|
549 if (difference == 0) { |
|
550 break; |
|
551 } |
|
552 } |
|
553 } |
|
554 |
|
555 // if we have two successive rules that both have the winning base |
|
556 // value, then the first one (the one we found above) is used if |
|
557 // the numerator of the fraction is 1 and the second one is used if |
|
558 // the numerator of the fraction is anything else (this lets us |
|
559 // do things like "one third"/"two thirds" without haveing to define |
|
560 // a whole bunch of extra rule sets) |
|
561 if ((unsigned)(winner + 1) < rules.size() && |
|
562 rules[winner + 1]->getBaseValue() == rules[winner]->getBaseValue()) { |
|
563 double n = ((double)rules[winner]->getBaseValue()) * number; |
|
564 if (n < 0.5 || n >= 2) { |
|
565 ++winner; |
|
566 } |
|
567 } |
|
568 |
|
569 // finally, return the winning rule |
|
570 return rules[winner]; |
|
571 } |
|
572 |
|
573 /** |
|
574 * Parses a string. Matches the string to be parsed against each |
|
575 * of its rules (with a base value less than upperBound) and returns |
|
576 * the value produced by the rule that matched the most charcters |
|
577 * in the source string. |
|
578 * @param text The string to parse |
|
579 * @param parsePosition The initial position is ignored and assumed |
|
580 * to be 0. On exit, this object has been updated to point to the |
|
581 * first character position this rule set didn't consume. |
|
582 * @param upperBound Limits the rules that can be allowed to match. |
|
583 * Only rules whose base values are strictly less than upperBound |
|
584 * are considered. |
|
585 * @return The numerical result of parsing this string. This will |
|
586 * be the matching rule's base value, composed appropriately with |
|
587 * the results of matching any of its substitutions. The object |
|
588 * will be an instance of Long if it's an integral value; otherwise, |
|
589 * it will be an instance of Double. This function always returns |
|
590 * a valid object: If nothing matched the input string at all, |
|
591 * this function returns new Long(0), and the parse position is |
|
592 * left unchanged. |
|
593 */ |
|
594 #ifdef RBNF_DEBUG |
|
595 #include <stdio.h> |
|
596 |
|
597 static void dumpUS(FILE* f, const UnicodeString& us) { |
|
598 int len = us.length(); |
|
599 char* buf = (char *)uprv_malloc((len+1)*sizeof(char)); //new char[len+1]; |
|
600 if (buf != NULL) { |
|
601 us.extract(0, len, buf); |
|
602 buf[len] = 0; |
|
603 fprintf(f, "%s", buf); |
|
604 uprv_free(buf); //delete[] buf; |
|
605 } |
|
606 } |
|
607 #endif |
|
608 |
|
609 UBool |
|
610 NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result) const |
|
611 { |
|
612 // try matching each rule in the rule set against the text being |
|
613 // parsed. Whichever one matches the most characters is the one |
|
614 // that determines the value we return. |
|
615 |
|
616 result.setLong(0); |
|
617 |
|
618 // dump out if there's no text to parse |
|
619 if (text.length() == 0) { |
|
620 return 0; |
|
621 } |
|
622 |
|
623 ParsePosition highWaterMark; |
|
624 ParsePosition workingPos = pos; |
|
625 |
|
626 #ifdef RBNF_DEBUG |
|
627 fprintf(stderr, "<nfrs> %x '", this); |
|
628 dumpUS(stderr, name); |
|
629 fprintf(stderr, "' text '"); |
|
630 dumpUS(stderr, text); |
|
631 fprintf(stderr, "'\n"); |
|
632 fprintf(stderr, " parse negative: %d\n", this, negativeNumberRule != 0); |
|
633 #endif |
|
634 |
|
635 // start by trying the negative number rule (if there is one) |
|
636 if (negativeNumberRule) { |
|
637 Formattable tempResult; |
|
638 #ifdef RBNF_DEBUG |
|
639 fprintf(stderr, " <nfrs before negative> %x ub: %g\n", negativeNumberRule, upperBound); |
|
640 #endif |
|
641 UBool success = negativeNumberRule->doParse(text, workingPos, 0, upperBound, tempResult); |
|
642 #ifdef RBNF_DEBUG |
|
643 fprintf(stderr, " <nfrs after negative> success: %d wpi: %d\n", success, workingPos.getIndex()); |
|
644 #endif |
|
645 if (success && workingPos.getIndex() > highWaterMark.getIndex()) { |
|
646 result = tempResult; |
|
647 highWaterMark = workingPos; |
|
648 } |
|
649 workingPos = pos; |
|
650 } |
|
651 #ifdef RBNF_DEBUG |
|
652 fprintf(stderr, "<nfrs> continue fractional with text '"); |
|
653 dumpUS(stderr, text); |
|
654 fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex()); |
|
655 #endif |
|
656 // then try each of the fraction rules |
|
657 { |
|
658 for (int i = 0; i < 3; i++) { |
|
659 if (fractionRules[i]) { |
|
660 Formattable tempResult; |
|
661 UBool success = fractionRules[i]->doParse(text, workingPos, 0, upperBound, tempResult); |
|
662 if (success && (workingPos.getIndex() > highWaterMark.getIndex())) { |
|
663 result = tempResult; |
|
664 highWaterMark = workingPos; |
|
665 } |
|
666 workingPos = pos; |
|
667 } |
|
668 } |
|
669 } |
|
670 #ifdef RBNF_DEBUG |
|
671 fprintf(stderr, "<nfrs> continue other with text '"); |
|
672 dumpUS(stderr, text); |
|
673 fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex()); |
|
674 #endif |
|
675 |
|
676 // finally, go through the regular rules one at a time. We start |
|
677 // at the end of the list because we want to try matching the most |
|
678 // sigificant rule first (this helps ensure that we parse |
|
679 // "five thousand three hundred six" as |
|
680 // "(five thousand) (three hundred) (six)" rather than |
|
681 // "((five thousand three) hundred) (six)"). Skip rules whose |
|
682 // base values are higher than the upper bound (again, this helps |
|
683 // limit ambiguity by making sure the rules that match a rule's |
|
684 // are less significant than the rule containing the substitutions)/ |
|
685 { |
|
686 int64_t ub = util64_fromDouble(upperBound); |
|
687 #ifdef RBNF_DEBUG |
|
688 { |
|
689 char ubstr[64]; |
|
690 util64_toa(ub, ubstr, 64); |
|
691 char ubstrhex[64]; |
|
692 util64_toa(ub, ubstrhex, 64, 16); |
|
693 fprintf(stderr, "ub: %g, i64: %s (%s)\n", upperBound, ubstr, ubstrhex); |
|
694 } |
|
695 #endif |
|
696 for (int32_t i = rules.size(); --i >= 0 && highWaterMark.getIndex() < text.length();) { |
|
697 if ((!fIsFractionRuleSet) && (rules[i]->getBaseValue() >= ub)) { |
|
698 continue; |
|
699 } |
|
700 Formattable tempResult; |
|
701 UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, tempResult); |
|
702 if (success && workingPos.getIndex() > highWaterMark.getIndex()) { |
|
703 result = tempResult; |
|
704 highWaterMark = workingPos; |
|
705 } |
|
706 workingPos = pos; |
|
707 } |
|
708 } |
|
709 #ifdef RBNF_DEBUG |
|
710 fprintf(stderr, "<nfrs> exit\n"); |
|
711 #endif |
|
712 // finally, update the parse postion we were passed to point to the |
|
713 // first character we didn't use, and return the result that |
|
714 // corresponds to that string of characters |
|
715 pos = highWaterMark; |
|
716 |
|
717 return 1; |
|
718 } |
|
719 |
|
720 void |
|
721 NFRuleSet::appendRules(UnicodeString& result) const |
|
722 { |
|
723 // the rule set name goes first... |
|
724 result.append(name); |
|
725 result.append(gColon); |
|
726 result.append(gLineFeed); |
|
727 |
|
728 // followed by the regular rules... |
|
729 for (uint32_t i = 0; i < rules.size(); i++) { |
|
730 result.append(gFourSpaces, 4); |
|
731 rules[i]->_appendRuleText(result); |
|
732 result.append(gLineFeed); |
|
733 } |
|
734 |
|
735 // followed by the special rules (if they exist) |
|
736 if (negativeNumberRule) { |
|
737 result.append(gFourSpaces, 4); |
|
738 negativeNumberRule->_appendRuleText(result); |
|
739 result.append(gLineFeed); |
|
740 } |
|
741 |
|
742 { |
|
743 for (uint32_t i = 0; i < 3; ++i) { |
|
744 if (fractionRules[i]) { |
|
745 result.append(gFourSpaces, 4); |
|
746 fractionRules[i]->_appendRuleText(result); |
|
747 result.append(gLineFeed); |
|
748 } |
|
749 } |
|
750 } |
|
751 } |
|
752 |
|
753 // utility functions |
|
754 |
|
755 int64_t util64_fromDouble(double d) { |
|
756 int64_t result = 0; |
|
757 if (!uprv_isNaN(d)) { |
|
758 double mant = uprv_maxMantissa(); |
|
759 if (d < -mant) { |
|
760 d = -mant; |
|
761 } else if (d > mant) { |
|
762 d = mant; |
|
763 } |
|
764 UBool neg = d < 0; |
|
765 if (neg) { |
|
766 d = -d; |
|
767 } |
|
768 result = (int64_t)uprv_floor(d); |
|
769 if (neg) { |
|
770 result = -result; |
|
771 } |
|
772 } |
|
773 return result; |
|
774 } |
|
775 |
|
776 int64_t util64_pow(int32_t r, uint32_t e) { |
|
777 if (r == 0) { |
|
778 return 0; |
|
779 } else if (e == 0) { |
|
780 return 1; |
|
781 } else { |
|
782 int64_t n = r; |
|
783 while (--e > 0) { |
|
784 n *= r; |
|
785 } |
|
786 return n; |
|
787 } |
|
788 } |
|
789 |
|
790 static const uint8_t asciiDigits[] = { |
|
791 0x30u, 0x31u, 0x32u, 0x33u, 0x34u, 0x35u, 0x36u, 0x37u, |
|
792 0x38u, 0x39u, 0x61u, 0x62u, 0x63u, 0x64u, 0x65u, 0x66u, |
|
793 0x67u, 0x68u, 0x69u, 0x6au, 0x6bu, 0x6cu, 0x6du, 0x6eu, |
|
794 0x6fu, 0x70u, 0x71u, 0x72u, 0x73u, 0x74u, 0x75u, 0x76u, |
|
795 0x77u, 0x78u, 0x79u, 0x7au, |
|
796 }; |
|
797 |
|
798 static const UChar kUMinus = (UChar)0x002d; |
|
799 |
|
800 #ifdef RBNF_DEBUG |
|
801 static const char kMinus = '-'; |
|
802 |
|
803 static const uint8_t digitInfo[] = { |
|
804 0, 0, 0, 0, 0, 0, 0, 0, |
|
805 0, 0, 0, 0, 0, 0, 0, 0, |
|
806 0, 0, 0, 0, 0, 0, 0, 0, |
|
807 0, 0, 0, 0, 0, 0, 0, 0, |
|
808 0, 0, 0, 0, 0, 0, 0, 0, |
|
809 0, 0, 0, 0, 0, 0, 0, 0, |
|
810 0x80u, 0x81u, 0x82u, 0x83u, 0x84u, 0x85u, 0x86u, 0x87u, |
|
811 0x88u, 0x89u, 0, 0, 0, 0, 0, 0, |
|
812 0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u, |
|
813 0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u, |
|
814 0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u, |
|
815 0xa1u, 0xa2u, 0xa3u, 0, 0, 0, 0, 0, |
|
816 0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u, |
|
817 0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u, |
|
818 0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u, |
|
819 0xa1u, 0xa2u, 0xa3u, 0, 0, 0, 0, 0, |
|
820 }; |
|
821 |
|
822 int64_t util64_atoi(const char* str, uint32_t radix) |
|
823 { |
|
824 if (radix > 36) { |
|
825 radix = 36; |
|
826 } else if (radix < 2) { |
|
827 radix = 2; |
|
828 } |
|
829 int64_t lradix = radix; |
|
830 |
|
831 int neg = 0; |
|
832 if (*str == kMinus) { |
|
833 ++str; |
|
834 neg = 1; |
|
835 } |
|
836 int64_t result = 0; |
|
837 uint8_t b; |
|
838 while ((b = digitInfo[*str++]) && ((b &= 0x7f) < radix)) { |
|
839 result *= lradix; |
|
840 result += (int32_t)b; |
|
841 } |
|
842 if (neg) { |
|
843 result = -result; |
|
844 } |
|
845 return result; |
|
846 } |
|
847 |
|
848 int64_t util64_utoi(const UChar* str, uint32_t radix) |
|
849 { |
|
850 if (radix > 36) { |
|
851 radix = 36; |
|
852 } else if (radix < 2) { |
|
853 radix = 2; |
|
854 } |
|
855 int64_t lradix = radix; |
|
856 |
|
857 int neg = 0; |
|
858 if (*str == kUMinus) { |
|
859 ++str; |
|
860 neg = 1; |
|
861 } |
|
862 int64_t result = 0; |
|
863 UChar c; |
|
864 uint8_t b; |
|
865 while (((c = *str++) < 0x0080) && (b = digitInfo[c]) && ((b &= 0x7f) < radix)) { |
|
866 result *= lradix; |
|
867 result += (int32_t)b; |
|
868 } |
|
869 if (neg) { |
|
870 result = -result; |
|
871 } |
|
872 return result; |
|
873 } |
|
874 |
|
875 uint32_t util64_toa(int64_t w, char* buf, uint32_t len, uint32_t radix, UBool raw) |
|
876 { |
|
877 if (radix > 36) { |
|
878 radix = 36; |
|
879 } else if (radix < 2) { |
|
880 radix = 2; |
|
881 } |
|
882 int64_t base = radix; |
|
883 |
|
884 char* p = buf; |
|
885 if (len && (w < 0) && (radix == 10) && !raw) { |
|
886 w = -w; |
|
887 *p++ = kMinus; |
|
888 --len; |
|
889 } else if (len && (w == 0)) { |
|
890 *p++ = (char)raw ? 0 : asciiDigits[0]; |
|
891 --len; |
|
892 } |
|
893 |
|
894 while (len && w != 0) { |
|
895 int64_t n = w / base; |
|
896 int64_t m = n * base; |
|
897 int32_t d = (int32_t)(w-m); |
|
898 *p++ = raw ? (char)d : asciiDigits[d]; |
|
899 w = n; |
|
900 --len; |
|
901 } |
|
902 if (len) { |
|
903 *p = 0; // null terminate if room for caller convenience |
|
904 } |
|
905 |
|
906 len = p - buf; |
|
907 if (*buf == kMinus) { |
|
908 ++buf; |
|
909 } |
|
910 while (--p > buf) { |
|
911 char c = *p; |
|
912 *p = *buf; |
|
913 *buf = c; |
|
914 ++buf; |
|
915 } |
|
916 |
|
917 return len; |
|
918 } |
|
919 #endif |
|
920 |
|
921 uint32_t util64_tou(int64_t w, UChar* buf, uint32_t len, uint32_t radix, UBool raw) |
|
922 { |
|
923 if (radix > 36) { |
|
924 radix = 36; |
|
925 } else if (radix < 2) { |
|
926 radix = 2; |
|
927 } |
|
928 int64_t base = radix; |
|
929 |
|
930 UChar* p = buf; |
|
931 if (len && (w < 0) && (radix == 10) && !raw) { |
|
932 w = -w; |
|
933 *p++ = kUMinus; |
|
934 --len; |
|
935 } else if (len && (w == 0)) { |
|
936 *p++ = (UChar)raw ? 0 : asciiDigits[0]; |
|
937 --len; |
|
938 } |
|
939 |
|
940 while (len && (w != 0)) { |
|
941 int64_t n = w / base; |
|
942 int64_t m = n * base; |
|
943 int32_t d = (int32_t)(w-m); |
|
944 *p++ = (UChar)(raw ? d : asciiDigits[d]); |
|
945 w = n; |
|
946 --len; |
|
947 } |
|
948 if (len) { |
|
949 *p = 0; // null terminate if room for caller convenience |
|
950 } |
|
951 |
|
952 len = (uint32_t)(p - buf); |
|
953 if (*buf == kUMinus) { |
|
954 ++buf; |
|
955 } |
|
956 while (--p > buf) { |
|
957 UChar c = *p; |
|
958 *p = *buf; |
|
959 *buf = c; |
|
960 ++buf; |
|
961 } |
|
962 |
|
963 return len; |
|
964 } |
|
965 |
|
966 |
|
967 U_NAMESPACE_END |
|
968 |
|
969 /* U_HAVE_RBNF */ |
|
970 #endif |
|
971 |