|
1 /* |
|
2 ******************************************************************************* |
|
3 * Copyright (C) 2007-2013, International Business Machines Corporation and |
|
4 * others. All Rights Reserved. |
|
5 ******************************************************************************* |
|
6 * |
|
7 * File plurrule.cpp |
|
8 */ |
|
9 |
|
10 #include <math.h> |
|
11 #include <stdio.h> |
|
12 |
|
13 #include "unicode/utypes.h" |
|
14 #include "unicode/localpointer.h" |
|
15 #include "unicode/plurrule.h" |
|
16 #include "unicode/upluralrules.h" |
|
17 #include "unicode/ures.h" |
|
18 #include "charstr.h" |
|
19 #include "cmemory.h" |
|
20 #include "cstring.h" |
|
21 #include "digitlst.h" |
|
22 #include "hash.h" |
|
23 #include "locutil.h" |
|
24 #include "mutex.h" |
|
25 #include "patternprops.h" |
|
26 #include "plurrule_impl.h" |
|
27 #include "putilimp.h" |
|
28 #include "ucln_in.h" |
|
29 #include "ustrfmt.h" |
|
30 #include "uassert.h" |
|
31 #include "uvectr32.h" |
|
32 |
|
33 #if !UCONFIG_NO_FORMATTING |
|
34 |
|
35 U_NAMESPACE_BEGIN |
|
36 |
|
37 #define ARRAY_SIZE(array) (int32_t)(sizeof array / sizeof array[0]) |
|
38 |
|
39 static const UChar PLURAL_KEYWORD_OTHER[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,0}; |
|
40 static const UChar PLURAL_DEFAULT_RULE[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,COLON,SPACE,LOW_N,0}; |
|
41 static const UChar PK_IN[]={LOW_I,LOW_N,0}; |
|
42 static const UChar PK_NOT[]={LOW_N,LOW_O,LOW_T,0}; |
|
43 static const UChar PK_IS[]={LOW_I,LOW_S,0}; |
|
44 static const UChar PK_MOD[]={LOW_M,LOW_O,LOW_D,0}; |
|
45 static const UChar PK_AND[]={LOW_A,LOW_N,LOW_D,0}; |
|
46 static const UChar PK_OR[]={LOW_O,LOW_R,0}; |
|
47 static const UChar PK_VAR_N[]={LOW_N,0}; |
|
48 static const UChar PK_VAR_I[]={LOW_I,0}; |
|
49 static const UChar PK_VAR_F[]={LOW_F,0}; |
|
50 static const UChar PK_VAR_T[]={LOW_T,0}; |
|
51 static const UChar PK_VAR_V[]={LOW_V,0}; |
|
52 static const UChar PK_VAR_J[]={LOW_J,0}; |
|
53 static const UChar PK_WITHIN[]={LOW_W,LOW_I,LOW_T,LOW_H,LOW_I,LOW_N,0}; |
|
54 static const UChar PK_DECIMAL[]={LOW_D,LOW_E,LOW_C,LOW_I,LOW_M,LOW_A,LOW_L,0}; |
|
55 static const UChar PK_INTEGER[]={LOW_I,LOW_N,LOW_T,LOW_E,LOW_G,LOW_E,LOW_R,0}; |
|
56 |
|
57 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralRules) |
|
58 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralKeywordEnumeration) |
|
59 |
|
60 PluralRules::PluralRules(UErrorCode& /*status*/) |
|
61 : UObject(), |
|
62 mRules(NULL) |
|
63 { |
|
64 } |
|
65 |
|
66 PluralRules::PluralRules(const PluralRules& other) |
|
67 : UObject(other), |
|
68 mRules(NULL) |
|
69 { |
|
70 *this=other; |
|
71 } |
|
72 |
|
73 PluralRules::~PluralRules() { |
|
74 delete mRules; |
|
75 } |
|
76 |
|
77 PluralRules* |
|
78 PluralRules::clone() const { |
|
79 return new PluralRules(*this); |
|
80 } |
|
81 |
|
82 PluralRules& |
|
83 PluralRules::operator=(const PluralRules& other) { |
|
84 if (this != &other) { |
|
85 delete mRules; |
|
86 if (other.mRules==NULL) { |
|
87 mRules = NULL; |
|
88 } |
|
89 else { |
|
90 mRules = new RuleChain(*other.mRules); |
|
91 } |
|
92 } |
|
93 |
|
94 return *this; |
|
95 } |
|
96 |
|
97 StringEnumeration* PluralRules::getAvailableLocales(UErrorCode &status) { |
|
98 StringEnumeration *result = new PluralAvailableLocalesEnumeration(status); |
|
99 if (result == NULL && U_SUCCESS(status)) { |
|
100 status = U_MEMORY_ALLOCATION_ERROR; |
|
101 } |
|
102 if (U_FAILURE(status)) { |
|
103 delete result; |
|
104 result = NULL; |
|
105 } |
|
106 return result; |
|
107 } |
|
108 |
|
109 |
|
110 PluralRules* U_EXPORT2 |
|
111 PluralRules::createRules(const UnicodeString& description, UErrorCode& status) { |
|
112 if (U_FAILURE(status)) { |
|
113 return NULL; |
|
114 } |
|
115 |
|
116 PluralRuleParser parser; |
|
117 PluralRules *newRules = new PluralRules(status); |
|
118 if (U_SUCCESS(status) && newRules == NULL) { |
|
119 status = U_MEMORY_ALLOCATION_ERROR; |
|
120 } |
|
121 parser.parse(description, newRules, status); |
|
122 if (U_FAILURE(status)) { |
|
123 delete newRules; |
|
124 newRules = NULL; |
|
125 } |
|
126 return newRules; |
|
127 } |
|
128 |
|
129 |
|
130 PluralRules* U_EXPORT2 |
|
131 PluralRules::createDefaultRules(UErrorCode& status) { |
|
132 return createRules(UnicodeString(TRUE, PLURAL_DEFAULT_RULE, -1), status); |
|
133 } |
|
134 |
|
135 PluralRules* U_EXPORT2 |
|
136 PluralRules::forLocale(const Locale& locale, UErrorCode& status) { |
|
137 return forLocale(locale, UPLURAL_TYPE_CARDINAL, status); |
|
138 } |
|
139 |
|
140 PluralRules* U_EXPORT2 |
|
141 PluralRules::forLocale(const Locale& locale, UPluralType type, UErrorCode& status) { |
|
142 if (U_FAILURE(status)) { |
|
143 return NULL; |
|
144 } |
|
145 if (type >= UPLURAL_TYPE_COUNT) { |
|
146 status = U_ILLEGAL_ARGUMENT_ERROR; |
|
147 return NULL; |
|
148 } |
|
149 PluralRules *newObj = new PluralRules(status); |
|
150 if (newObj==NULL || U_FAILURE(status)) { |
|
151 delete newObj; |
|
152 return NULL; |
|
153 } |
|
154 UnicodeString locRule = newObj->getRuleFromResource(locale, type, status); |
|
155 // TODO: which errors, if any, should be returned? |
|
156 if (locRule.length() == 0) { |
|
157 // Locales with no specific rules (all numbers have the "other" category |
|
158 // will return a U_MISSING_RESOURCE_ERROR at this point. This is not |
|
159 // an error. |
|
160 locRule = UnicodeString(PLURAL_DEFAULT_RULE); |
|
161 status = U_ZERO_ERROR; |
|
162 } |
|
163 PluralRuleParser parser; |
|
164 parser.parse(locRule, newObj, status); |
|
165 // TODO: should rule parse errors be returned, or |
|
166 // should we silently use default rules? |
|
167 // Original impl used default rules. |
|
168 // Ask the question to ICU Core. |
|
169 |
|
170 return newObj; |
|
171 } |
|
172 |
|
173 UnicodeString |
|
174 PluralRules::select(int32_t number) const { |
|
175 return select(FixedDecimal(number)); |
|
176 } |
|
177 |
|
178 UnicodeString |
|
179 PluralRules::select(double number) const { |
|
180 return select(FixedDecimal(number)); |
|
181 } |
|
182 |
|
183 UnicodeString |
|
184 PluralRules::select(const FixedDecimal &number) const { |
|
185 if (mRules == NULL) { |
|
186 return UnicodeString(TRUE, PLURAL_DEFAULT_RULE, -1); |
|
187 } |
|
188 else { |
|
189 return mRules->select(number); |
|
190 } |
|
191 } |
|
192 |
|
193 StringEnumeration* |
|
194 PluralRules::getKeywords(UErrorCode& status) const { |
|
195 if (U_FAILURE(status)) return NULL; |
|
196 StringEnumeration* nameEnumerator = new PluralKeywordEnumeration(mRules, status); |
|
197 if (U_FAILURE(status)) { |
|
198 delete nameEnumerator; |
|
199 return NULL; |
|
200 } |
|
201 |
|
202 return nameEnumerator; |
|
203 } |
|
204 |
|
205 double |
|
206 PluralRules::getUniqueKeywordValue(const UnicodeString& /* keyword */) { |
|
207 // Not Implemented. |
|
208 return UPLRULES_NO_UNIQUE_VALUE; |
|
209 } |
|
210 |
|
211 int32_t |
|
212 PluralRules::getAllKeywordValues(const UnicodeString & /* keyword */, double * /* dest */, |
|
213 int32_t /* destCapacity */, UErrorCode& error) { |
|
214 error = U_UNSUPPORTED_ERROR; |
|
215 return 0; |
|
216 } |
|
217 |
|
218 |
|
219 static double scaleForInt(double d) { |
|
220 double scale = 1.0; |
|
221 while (d != floor(d)) { |
|
222 d = d * 10.0; |
|
223 scale = scale * 10.0; |
|
224 } |
|
225 return scale; |
|
226 } |
|
227 |
|
228 static int32_t |
|
229 getSamplesFromString(const UnicodeString &samples, double *dest, |
|
230 int32_t destCapacity, UErrorCode& status) { |
|
231 int32_t sampleCount = 0; |
|
232 int32_t sampleStartIdx = 0; |
|
233 int32_t sampleEndIdx = 0; |
|
234 |
|
235 //std::string ss; // TODO: debugging. |
|
236 // std::cout << "PluralRules::getSamples(), samples = \"" << samples.toUTF8String(ss) << "\"\n"; |
|
237 for (sampleCount = 0; sampleCount < destCapacity && sampleStartIdx < samples.length(); ) { |
|
238 sampleEndIdx = samples.indexOf(COMMA, sampleStartIdx); |
|
239 if (sampleEndIdx == -1) { |
|
240 sampleEndIdx = samples.length(); |
|
241 } |
|
242 const UnicodeString &sampleRange = samples.tempSubStringBetween(sampleStartIdx, sampleEndIdx); |
|
243 // ss.erase(); |
|
244 // std::cout << "PluralRules::getSamples(), samplesRange = \"" << sampleRange.toUTF8String(ss) << "\"\n"; |
|
245 int32_t tildeIndex = sampleRange.indexOf(TILDE); |
|
246 if (tildeIndex < 0) { |
|
247 FixedDecimal fixed(sampleRange, status); |
|
248 double sampleValue = fixed.source; |
|
249 if (fixed.visibleDecimalDigitCount == 0 || sampleValue != floor(sampleValue)) { |
|
250 dest[sampleCount++] = sampleValue; |
|
251 } |
|
252 } else { |
|
253 |
|
254 FixedDecimal fixedLo(sampleRange.tempSubStringBetween(0, tildeIndex), status); |
|
255 FixedDecimal fixedHi(sampleRange.tempSubStringBetween(tildeIndex+1), status); |
|
256 double rangeLo = fixedLo.source; |
|
257 double rangeHi = fixedHi.source; |
|
258 if (U_FAILURE(status)) { |
|
259 break; |
|
260 } |
|
261 if (rangeHi < rangeLo) { |
|
262 status = U_INVALID_FORMAT_ERROR; |
|
263 break; |
|
264 } |
|
265 |
|
266 // For ranges of samples with fraction decimal digits, scale the number up so that we |
|
267 // are adding one in the units place. Avoids roundoffs from repetitive adds of tenths. |
|
268 |
|
269 double scale = scaleForInt(rangeLo); |
|
270 double t = scaleForInt(rangeHi); |
|
271 if (t > scale) { |
|
272 scale = t; |
|
273 } |
|
274 rangeLo *= scale; |
|
275 rangeHi *= scale; |
|
276 for (double n=rangeLo; n<=rangeHi; n+=1) { |
|
277 // Hack Alert: don't return any decimal samples with integer values that |
|
278 // originated from a format with trailing decimals. |
|
279 // This API is returning doubles, which can't distinguish having displayed |
|
280 // zeros to the right of the decimal. |
|
281 // This results in test failures with values mapping back to a different keyword. |
|
282 double sampleValue = n/scale; |
|
283 if (!(sampleValue == floor(sampleValue) && fixedLo.visibleDecimalDigitCount > 0)) { |
|
284 dest[sampleCount++] = sampleValue; |
|
285 } |
|
286 if (sampleCount >= destCapacity) { |
|
287 break; |
|
288 } |
|
289 } |
|
290 } |
|
291 sampleStartIdx = sampleEndIdx + 1; |
|
292 } |
|
293 return sampleCount; |
|
294 } |
|
295 |
|
296 |
|
297 int32_t |
|
298 PluralRules::getSamples(const UnicodeString &keyword, double *dest, |
|
299 int32_t destCapacity, UErrorCode& status) { |
|
300 RuleChain *rc = rulesForKeyword(keyword); |
|
301 if (rc == NULL || destCapacity == 0 || U_FAILURE(status)) { |
|
302 return 0; |
|
303 } |
|
304 int32_t numSamples = getSamplesFromString(rc->fIntegerSamples, dest, destCapacity, status); |
|
305 if (numSamples == 0) { |
|
306 numSamples = getSamplesFromString(rc->fDecimalSamples, dest, destCapacity, status); |
|
307 } |
|
308 return numSamples; |
|
309 } |
|
310 |
|
311 |
|
312 RuleChain *PluralRules::rulesForKeyword(const UnicodeString &keyword) const { |
|
313 RuleChain *rc; |
|
314 for (rc = mRules; rc != NULL; rc = rc->fNext) { |
|
315 if (rc->fKeyword == keyword) { |
|
316 break; |
|
317 } |
|
318 } |
|
319 return rc; |
|
320 } |
|
321 |
|
322 |
|
323 UBool |
|
324 PluralRules::isKeyword(const UnicodeString& keyword) const { |
|
325 if (0 == keyword.compare(PLURAL_KEYWORD_OTHER, 5)) { |
|
326 return true; |
|
327 } |
|
328 return rulesForKeyword(keyword) != NULL; |
|
329 } |
|
330 |
|
331 UnicodeString |
|
332 PluralRules::getKeywordOther() const { |
|
333 return UnicodeString(TRUE, PLURAL_KEYWORD_OTHER, 5); |
|
334 } |
|
335 |
|
336 UBool |
|
337 PluralRules::operator==(const PluralRules& other) const { |
|
338 const UnicodeString *ptrKeyword; |
|
339 UErrorCode status= U_ZERO_ERROR; |
|
340 |
|
341 if ( this == &other ) { |
|
342 return TRUE; |
|
343 } |
|
344 LocalPointer<StringEnumeration> myKeywordList(getKeywords(status)); |
|
345 LocalPointer<StringEnumeration> otherKeywordList(other.getKeywords(status)); |
|
346 if (U_FAILURE(status)) { |
|
347 return FALSE; |
|
348 } |
|
349 |
|
350 if (myKeywordList->count(status)!=otherKeywordList->count(status)) { |
|
351 return FALSE; |
|
352 } |
|
353 myKeywordList->reset(status); |
|
354 while ((ptrKeyword=myKeywordList->snext(status))!=NULL) { |
|
355 if (!other.isKeyword(*ptrKeyword)) { |
|
356 return FALSE; |
|
357 } |
|
358 } |
|
359 otherKeywordList->reset(status); |
|
360 while ((ptrKeyword=otherKeywordList->snext(status))!=NULL) { |
|
361 if (!this->isKeyword(*ptrKeyword)) { |
|
362 return FALSE; |
|
363 } |
|
364 } |
|
365 if (U_FAILURE(status)) { |
|
366 return FALSE; |
|
367 } |
|
368 |
|
369 return TRUE; |
|
370 } |
|
371 |
|
372 |
|
373 void |
|
374 PluralRuleParser::parse(const UnicodeString& ruleData, PluralRules *prules, UErrorCode &status) |
|
375 { |
|
376 if (U_FAILURE(status)) { |
|
377 return; |
|
378 } |
|
379 U_ASSERT(ruleIndex == 0); // Parsers are good for a single use only! |
|
380 ruleSrc = &ruleData; |
|
381 |
|
382 while (ruleIndex< ruleSrc->length()) { |
|
383 getNextToken(status); |
|
384 if (U_FAILURE(status)) { |
|
385 return; |
|
386 } |
|
387 checkSyntax(status); |
|
388 if (U_FAILURE(status)) { |
|
389 return; |
|
390 } |
|
391 switch (type) { |
|
392 case tAnd: |
|
393 U_ASSERT(curAndConstraint != NULL); |
|
394 curAndConstraint = curAndConstraint->add(); |
|
395 break; |
|
396 case tOr: |
|
397 { |
|
398 U_ASSERT(currentChain != NULL); |
|
399 OrConstraint *orNode=currentChain->ruleHeader; |
|
400 while (orNode->next != NULL) { |
|
401 orNode = orNode->next; |
|
402 } |
|
403 orNode->next= new OrConstraint(); |
|
404 orNode=orNode->next; |
|
405 orNode->next=NULL; |
|
406 curAndConstraint = orNode->add(); |
|
407 } |
|
408 break; |
|
409 case tIs: |
|
410 U_ASSERT(curAndConstraint != NULL); |
|
411 U_ASSERT(curAndConstraint->value == -1); |
|
412 U_ASSERT(curAndConstraint->rangeList == NULL); |
|
413 break; |
|
414 case tNot: |
|
415 U_ASSERT(curAndConstraint != NULL); |
|
416 curAndConstraint->negated=TRUE; |
|
417 break; |
|
418 |
|
419 case tNotEqual: |
|
420 curAndConstraint->negated=TRUE; |
|
421 case tIn: |
|
422 case tWithin: |
|
423 case tEqual: |
|
424 U_ASSERT(curAndConstraint != NULL); |
|
425 curAndConstraint->rangeList = new UVector32(status); |
|
426 curAndConstraint->rangeList->addElement(-1, status); // range Low |
|
427 curAndConstraint->rangeList->addElement(-1, status); // range Hi |
|
428 rangeLowIdx = 0; |
|
429 rangeHiIdx = 1; |
|
430 curAndConstraint->value=PLURAL_RANGE_HIGH; |
|
431 curAndConstraint->integerOnly = (type != tWithin); |
|
432 break; |
|
433 case tNumber: |
|
434 U_ASSERT(curAndConstraint != NULL); |
|
435 if ( (curAndConstraint->op==AndConstraint::MOD)&& |
|
436 (curAndConstraint->opNum == -1 ) ) { |
|
437 curAndConstraint->opNum=getNumberValue(token); |
|
438 } |
|
439 else { |
|
440 if (curAndConstraint->rangeList == NULL) { |
|
441 // this is for an 'is' rule |
|
442 curAndConstraint->value = getNumberValue(token); |
|
443 } else { |
|
444 // this is for an 'in' or 'within' rule |
|
445 if (curAndConstraint->rangeList->elementAti(rangeLowIdx) == -1) { |
|
446 curAndConstraint->rangeList->setElementAt(getNumberValue(token), rangeLowIdx); |
|
447 curAndConstraint->rangeList->setElementAt(getNumberValue(token), rangeHiIdx); |
|
448 } |
|
449 else { |
|
450 curAndConstraint->rangeList->setElementAt(getNumberValue(token), rangeHiIdx); |
|
451 if (curAndConstraint->rangeList->elementAti(rangeLowIdx) > |
|
452 curAndConstraint->rangeList->elementAti(rangeHiIdx)) { |
|
453 // Range Lower bound > Range Upper bound. |
|
454 // U_UNEXPECTED_TOKEN seems a little funny, but it is consistently |
|
455 // used for all plural rule parse errors. |
|
456 status = U_UNEXPECTED_TOKEN; |
|
457 break; |
|
458 } |
|
459 } |
|
460 } |
|
461 } |
|
462 break; |
|
463 case tComma: |
|
464 // TODO: rule syntax checking is inadequate, can happen with badly formed rules. |
|
465 // Catch cases like "n mod 10, is 1" here instead. |
|
466 if (curAndConstraint == NULL || curAndConstraint->rangeList == NULL) { |
|
467 status = U_UNEXPECTED_TOKEN; |
|
468 break; |
|
469 } |
|
470 U_ASSERT(curAndConstraint->rangeList->size() >= 2); |
|
471 rangeLowIdx = curAndConstraint->rangeList->size(); |
|
472 curAndConstraint->rangeList->addElement(-1, status); // range Low |
|
473 rangeHiIdx = curAndConstraint->rangeList->size(); |
|
474 curAndConstraint->rangeList->addElement(-1, status); // range Hi |
|
475 break; |
|
476 case tMod: |
|
477 U_ASSERT(curAndConstraint != NULL); |
|
478 curAndConstraint->op=AndConstraint::MOD; |
|
479 break; |
|
480 case tVariableN: |
|
481 case tVariableI: |
|
482 case tVariableF: |
|
483 case tVariableT: |
|
484 case tVariableV: |
|
485 U_ASSERT(curAndConstraint != NULL); |
|
486 curAndConstraint->digitsType = type; |
|
487 break; |
|
488 case tKeyword: |
|
489 { |
|
490 RuleChain *newChain = new RuleChain; |
|
491 if (newChain == NULL) { |
|
492 status = U_MEMORY_ALLOCATION_ERROR; |
|
493 break; |
|
494 } |
|
495 newChain->fKeyword = token; |
|
496 if (prules->mRules == NULL) { |
|
497 prules->mRules = newChain; |
|
498 } else { |
|
499 // The new rule chain goes at the end of the linked list of rule chains, |
|
500 // unless there is an "other" keyword & chain. "other" must remain last. |
|
501 RuleChain *insertAfter = prules->mRules; |
|
502 while (insertAfter->fNext!=NULL && |
|
503 insertAfter->fNext->fKeyword.compare(PLURAL_KEYWORD_OTHER, 5) != 0 ){ |
|
504 insertAfter=insertAfter->fNext; |
|
505 } |
|
506 newChain->fNext = insertAfter->fNext; |
|
507 insertAfter->fNext = newChain; |
|
508 } |
|
509 OrConstraint *orNode = new OrConstraint(); |
|
510 newChain->ruleHeader = orNode; |
|
511 curAndConstraint = orNode->add(); |
|
512 currentChain = newChain; |
|
513 } |
|
514 break; |
|
515 |
|
516 case tInteger: |
|
517 for (;;) { |
|
518 getNextToken(status); |
|
519 if (U_FAILURE(status) || type == tSemiColon || type == tEOF || type == tAt) { |
|
520 break; |
|
521 } |
|
522 if (type == tEllipsis) { |
|
523 currentChain->fIntegerSamplesUnbounded = TRUE; |
|
524 continue; |
|
525 } |
|
526 currentChain->fIntegerSamples.append(token); |
|
527 } |
|
528 break; |
|
529 |
|
530 case tDecimal: |
|
531 for (;;) { |
|
532 getNextToken(status); |
|
533 if (U_FAILURE(status) || type == tSemiColon || type == tEOF || type == tAt) { |
|
534 break; |
|
535 } |
|
536 if (type == tEllipsis) { |
|
537 currentChain->fDecimalSamplesUnbounded = TRUE; |
|
538 continue; |
|
539 } |
|
540 currentChain->fDecimalSamples.append(token); |
|
541 } |
|
542 break; |
|
543 |
|
544 default: |
|
545 break; |
|
546 } |
|
547 prevType=type; |
|
548 if (U_FAILURE(status)) { |
|
549 break; |
|
550 } |
|
551 } |
|
552 } |
|
553 |
|
554 UnicodeString |
|
555 PluralRules::getRuleFromResource(const Locale& locale, UPluralType type, UErrorCode& errCode) { |
|
556 UnicodeString emptyStr; |
|
557 |
|
558 if (U_FAILURE(errCode)) { |
|
559 return emptyStr; |
|
560 } |
|
561 LocalUResourceBundlePointer rb(ures_openDirect(NULL, "plurals", &errCode)); |
|
562 if(U_FAILURE(errCode)) { |
|
563 return emptyStr; |
|
564 } |
|
565 const char *typeKey; |
|
566 switch (type) { |
|
567 case UPLURAL_TYPE_CARDINAL: |
|
568 typeKey = "locales"; |
|
569 break; |
|
570 case UPLURAL_TYPE_ORDINAL: |
|
571 typeKey = "locales_ordinals"; |
|
572 break; |
|
573 default: |
|
574 // Must not occur: The caller should have checked for valid types. |
|
575 errCode = U_ILLEGAL_ARGUMENT_ERROR; |
|
576 return emptyStr; |
|
577 } |
|
578 LocalUResourceBundlePointer locRes(ures_getByKey(rb.getAlias(), typeKey, NULL, &errCode)); |
|
579 if(U_FAILURE(errCode)) { |
|
580 return emptyStr; |
|
581 } |
|
582 int32_t resLen=0; |
|
583 const char *curLocaleName=locale.getName(); |
|
584 const UChar* s = ures_getStringByKey(locRes.getAlias(), curLocaleName, &resLen, &errCode); |
|
585 |
|
586 if (s == NULL) { |
|
587 // Check parent locales. |
|
588 UErrorCode status = U_ZERO_ERROR; |
|
589 char parentLocaleName[ULOC_FULLNAME_CAPACITY]; |
|
590 const char *curLocaleName=locale.getName(); |
|
591 uprv_strcpy(parentLocaleName, curLocaleName); |
|
592 |
|
593 while (uloc_getParent(parentLocaleName, parentLocaleName, |
|
594 ULOC_FULLNAME_CAPACITY, &status) > 0) { |
|
595 resLen=0; |
|
596 s = ures_getStringByKey(locRes.getAlias(), parentLocaleName, &resLen, &status); |
|
597 if (s != NULL) { |
|
598 errCode = U_ZERO_ERROR; |
|
599 break; |
|
600 } |
|
601 status = U_ZERO_ERROR; |
|
602 } |
|
603 } |
|
604 if (s==NULL) { |
|
605 return emptyStr; |
|
606 } |
|
607 |
|
608 char setKey[256]; |
|
609 u_UCharsToChars(s, setKey, resLen + 1); |
|
610 // printf("\n PluralRule: %s\n", setKey); |
|
611 |
|
612 LocalUResourceBundlePointer ruleRes(ures_getByKey(rb.getAlias(), "rules", NULL, &errCode)); |
|
613 if(U_FAILURE(errCode)) { |
|
614 return emptyStr; |
|
615 } |
|
616 LocalUResourceBundlePointer setRes(ures_getByKey(ruleRes.getAlias(), setKey, NULL, &errCode)); |
|
617 if (U_FAILURE(errCode)) { |
|
618 return emptyStr; |
|
619 } |
|
620 |
|
621 int32_t numberKeys = ures_getSize(setRes.getAlias()); |
|
622 UnicodeString result; |
|
623 const char *key=NULL; |
|
624 for(int32_t i=0; i<numberKeys; ++i) { // Keys are zero, one, few, ... |
|
625 UnicodeString rules = ures_getNextUnicodeString(setRes.getAlias(), &key, &errCode); |
|
626 UnicodeString uKey(key, -1, US_INV); |
|
627 result.append(uKey); |
|
628 result.append(COLON); |
|
629 result.append(rules); |
|
630 result.append(SEMI_COLON); |
|
631 } |
|
632 return result; |
|
633 } |
|
634 |
|
635 |
|
636 UnicodeString |
|
637 PluralRules::getRules() const { |
|
638 UnicodeString rules; |
|
639 if (mRules != NULL) { |
|
640 mRules->dumpRules(rules); |
|
641 } |
|
642 return rules; |
|
643 } |
|
644 |
|
645 |
|
646 AndConstraint::AndConstraint() { |
|
647 op = AndConstraint::NONE; |
|
648 opNum=-1; |
|
649 value = -1; |
|
650 rangeList = NULL; |
|
651 negated = FALSE; |
|
652 integerOnly = FALSE; |
|
653 digitsType = none; |
|
654 next=NULL; |
|
655 } |
|
656 |
|
657 |
|
658 AndConstraint::AndConstraint(const AndConstraint& other) { |
|
659 this->op = other.op; |
|
660 this->opNum=other.opNum; |
|
661 this->value=other.value; |
|
662 this->rangeList=NULL; |
|
663 if (other.rangeList != NULL) { |
|
664 UErrorCode status = U_ZERO_ERROR; |
|
665 this->rangeList = new UVector32(status); |
|
666 this->rangeList->assign(*other.rangeList, status); |
|
667 } |
|
668 this->integerOnly=other.integerOnly; |
|
669 this->negated=other.negated; |
|
670 this->digitsType = other.digitsType; |
|
671 if (other.next==NULL) { |
|
672 this->next=NULL; |
|
673 } |
|
674 else { |
|
675 this->next = new AndConstraint(*other.next); |
|
676 } |
|
677 } |
|
678 |
|
679 AndConstraint::~AndConstraint() { |
|
680 delete rangeList; |
|
681 if (next!=NULL) { |
|
682 delete next; |
|
683 } |
|
684 } |
|
685 |
|
686 |
|
687 UBool |
|
688 AndConstraint::isFulfilled(const FixedDecimal &number) { |
|
689 UBool result = TRUE; |
|
690 if (digitsType == none) { |
|
691 // An empty AndConstraint, created by a rule with a keyword but no following expression. |
|
692 return TRUE; |
|
693 } |
|
694 double n = number.get(digitsType); // pulls n | i | v | f value for the number. |
|
695 // Will always be positive. |
|
696 // May be non-integer (n option only) |
|
697 do { |
|
698 if (integerOnly && n != uprv_floor(n)) { |
|
699 result = FALSE; |
|
700 break; |
|
701 } |
|
702 |
|
703 if (op == MOD) { |
|
704 n = fmod(n, opNum); |
|
705 } |
|
706 if (rangeList == NULL) { |
|
707 result = value == -1 || // empty rule |
|
708 n == value; // 'is' rule |
|
709 break; |
|
710 } |
|
711 result = FALSE; // 'in' or 'within' rule |
|
712 for (int32_t r=0; r<rangeList->size(); r+=2) { |
|
713 if (rangeList->elementAti(r) <= n && n <= rangeList->elementAti(r+1)) { |
|
714 result = TRUE; |
|
715 break; |
|
716 } |
|
717 } |
|
718 } while (FALSE); |
|
719 |
|
720 if (negated) { |
|
721 result = !result; |
|
722 } |
|
723 return result; |
|
724 } |
|
725 |
|
726 |
|
727 AndConstraint* |
|
728 AndConstraint::add() |
|
729 { |
|
730 this->next = new AndConstraint(); |
|
731 return this->next; |
|
732 } |
|
733 |
|
734 OrConstraint::OrConstraint() { |
|
735 childNode=NULL; |
|
736 next=NULL; |
|
737 } |
|
738 |
|
739 OrConstraint::OrConstraint(const OrConstraint& other) { |
|
740 if ( other.childNode == NULL ) { |
|
741 this->childNode = NULL; |
|
742 } |
|
743 else { |
|
744 this->childNode = new AndConstraint(*(other.childNode)); |
|
745 } |
|
746 if (other.next == NULL ) { |
|
747 this->next = NULL; |
|
748 } |
|
749 else { |
|
750 this->next = new OrConstraint(*(other.next)); |
|
751 } |
|
752 } |
|
753 |
|
754 OrConstraint::~OrConstraint() { |
|
755 if (childNode!=NULL) { |
|
756 delete childNode; |
|
757 } |
|
758 if (next!=NULL) { |
|
759 delete next; |
|
760 } |
|
761 } |
|
762 |
|
763 AndConstraint* |
|
764 OrConstraint::add() |
|
765 { |
|
766 OrConstraint *curOrConstraint=this; |
|
767 { |
|
768 while (curOrConstraint->next!=NULL) { |
|
769 curOrConstraint = curOrConstraint->next; |
|
770 } |
|
771 U_ASSERT(curOrConstraint->childNode == NULL); |
|
772 curOrConstraint->childNode = new AndConstraint(); |
|
773 } |
|
774 return curOrConstraint->childNode; |
|
775 } |
|
776 |
|
777 UBool |
|
778 OrConstraint::isFulfilled(const FixedDecimal &number) { |
|
779 OrConstraint* orRule=this; |
|
780 UBool result=FALSE; |
|
781 |
|
782 while (orRule!=NULL && !result) { |
|
783 result=TRUE; |
|
784 AndConstraint* andRule = orRule->childNode; |
|
785 while (andRule!=NULL && result) { |
|
786 result = andRule->isFulfilled(number); |
|
787 andRule=andRule->next; |
|
788 } |
|
789 orRule = orRule->next; |
|
790 } |
|
791 |
|
792 return result; |
|
793 } |
|
794 |
|
795 |
|
796 RuleChain::RuleChain(): fKeyword(), fNext(NULL), ruleHeader(NULL), fDecimalSamples(), fIntegerSamples(), |
|
797 fDecimalSamplesUnbounded(FALSE), fIntegerSamplesUnbounded(FALSE) { |
|
798 } |
|
799 |
|
800 RuleChain::RuleChain(const RuleChain& other) : |
|
801 fKeyword(other.fKeyword), fNext(NULL), ruleHeader(NULL), fDecimalSamples(other.fDecimalSamples), |
|
802 fIntegerSamples(other.fIntegerSamples), fDecimalSamplesUnbounded(other.fDecimalSamplesUnbounded), |
|
803 fIntegerSamplesUnbounded(other.fIntegerSamplesUnbounded) { |
|
804 if (other.ruleHeader != NULL) { |
|
805 this->ruleHeader = new OrConstraint(*(other.ruleHeader)); |
|
806 } |
|
807 if (other.fNext != NULL ) { |
|
808 this->fNext = new RuleChain(*other.fNext); |
|
809 } |
|
810 } |
|
811 |
|
812 RuleChain::~RuleChain() { |
|
813 delete fNext; |
|
814 delete ruleHeader; |
|
815 } |
|
816 |
|
817 |
|
818 UnicodeString |
|
819 RuleChain::select(const FixedDecimal &number) const { |
|
820 if (!number.isNanOrInfinity) { |
|
821 for (const RuleChain *rules = this; rules != NULL; rules = rules->fNext) { |
|
822 if (rules->ruleHeader->isFulfilled(number)) { |
|
823 return rules->fKeyword; |
|
824 } |
|
825 } |
|
826 } |
|
827 return UnicodeString(TRUE, PLURAL_KEYWORD_OTHER, 5); |
|
828 } |
|
829 |
|
830 static UnicodeString tokenString(tokenType tok) { |
|
831 UnicodeString s; |
|
832 switch (tok) { |
|
833 case tVariableN: |
|
834 s.append(LOW_N); break; |
|
835 case tVariableI: |
|
836 s.append(LOW_I); break; |
|
837 case tVariableF: |
|
838 s.append(LOW_F); break; |
|
839 case tVariableV: |
|
840 s.append(LOW_V); break; |
|
841 case tVariableT: |
|
842 s.append(LOW_T); break; |
|
843 default: |
|
844 s.append(TILDE); |
|
845 } |
|
846 return s; |
|
847 } |
|
848 |
|
849 void |
|
850 RuleChain::dumpRules(UnicodeString& result) { |
|
851 UChar digitString[16]; |
|
852 |
|
853 if ( ruleHeader != NULL ) { |
|
854 result += fKeyword; |
|
855 result += COLON; |
|
856 result += SPACE; |
|
857 OrConstraint* orRule=ruleHeader; |
|
858 while ( orRule != NULL ) { |
|
859 AndConstraint* andRule=orRule->childNode; |
|
860 while ( andRule != NULL ) { |
|
861 if ((andRule->op==AndConstraint::NONE) && (andRule->rangeList==NULL) && (andRule->value == -1)) { |
|
862 // Empty Rules. |
|
863 } else if ( (andRule->op==AndConstraint::NONE) && (andRule->rangeList==NULL) ) { |
|
864 result += tokenString(andRule->digitsType); |
|
865 result += UNICODE_STRING_SIMPLE(" is "); |
|
866 if (andRule->negated) { |
|
867 result += UNICODE_STRING_SIMPLE("not "); |
|
868 } |
|
869 uprv_itou(digitString,16, andRule->value,10,0); |
|
870 result += UnicodeString(digitString); |
|
871 } |
|
872 else { |
|
873 result += tokenString(andRule->digitsType); |
|
874 result += SPACE; |
|
875 if (andRule->op==AndConstraint::MOD) { |
|
876 result += UNICODE_STRING_SIMPLE("mod "); |
|
877 uprv_itou(digitString,16, andRule->opNum,10,0); |
|
878 result += UnicodeString(digitString); |
|
879 } |
|
880 if (andRule->rangeList==NULL) { |
|
881 if (andRule->negated) { |
|
882 result += UNICODE_STRING_SIMPLE(" is not "); |
|
883 uprv_itou(digitString,16, andRule->value,10,0); |
|
884 result += UnicodeString(digitString); |
|
885 } |
|
886 else { |
|
887 result += UNICODE_STRING_SIMPLE(" is "); |
|
888 uprv_itou(digitString,16, andRule->value,10,0); |
|
889 result += UnicodeString(digitString); |
|
890 } |
|
891 } |
|
892 else { |
|
893 if (andRule->negated) { |
|
894 if ( andRule->integerOnly ) { |
|
895 result += UNICODE_STRING_SIMPLE(" not in "); |
|
896 } |
|
897 else { |
|
898 result += UNICODE_STRING_SIMPLE(" not within "); |
|
899 } |
|
900 } |
|
901 else { |
|
902 if ( andRule->integerOnly ) { |
|
903 result += UNICODE_STRING_SIMPLE(" in "); |
|
904 } |
|
905 else { |
|
906 result += UNICODE_STRING_SIMPLE(" within "); |
|
907 } |
|
908 } |
|
909 for (int32_t r=0; r<andRule->rangeList->size(); r+=2) { |
|
910 int32_t rangeLo = andRule->rangeList->elementAti(r); |
|
911 int32_t rangeHi = andRule->rangeList->elementAti(r+1); |
|
912 uprv_itou(digitString,16, rangeLo, 10, 0); |
|
913 result += UnicodeString(digitString); |
|
914 result += UNICODE_STRING_SIMPLE(".."); |
|
915 uprv_itou(digitString,16, rangeHi, 10,0); |
|
916 result += UnicodeString(digitString); |
|
917 if (r+2 < andRule->rangeList->size()) { |
|
918 result += UNICODE_STRING_SIMPLE(", "); |
|
919 } |
|
920 } |
|
921 } |
|
922 } |
|
923 if ( (andRule=andRule->next) != NULL) { |
|
924 result += UNICODE_STRING_SIMPLE(" and "); |
|
925 } |
|
926 } |
|
927 if ( (orRule = orRule->next) != NULL ) { |
|
928 result += UNICODE_STRING_SIMPLE(" or "); |
|
929 } |
|
930 } |
|
931 } |
|
932 if ( fNext != NULL ) { |
|
933 result += UNICODE_STRING_SIMPLE("; "); |
|
934 fNext->dumpRules(result); |
|
935 } |
|
936 } |
|
937 |
|
938 |
|
939 UErrorCode |
|
940 RuleChain::getKeywords(int32_t capacityOfKeywords, UnicodeString* keywords, int32_t& arraySize) const { |
|
941 if ( arraySize < capacityOfKeywords-1 ) { |
|
942 keywords[arraySize++]=fKeyword; |
|
943 } |
|
944 else { |
|
945 return U_BUFFER_OVERFLOW_ERROR; |
|
946 } |
|
947 |
|
948 if ( fNext != NULL ) { |
|
949 return fNext->getKeywords(capacityOfKeywords, keywords, arraySize); |
|
950 } |
|
951 else { |
|
952 return U_ZERO_ERROR; |
|
953 } |
|
954 } |
|
955 |
|
956 UBool |
|
957 RuleChain::isKeyword(const UnicodeString& keywordParam) const { |
|
958 if ( fKeyword == keywordParam ) { |
|
959 return TRUE; |
|
960 } |
|
961 |
|
962 if ( fNext != NULL ) { |
|
963 return fNext->isKeyword(keywordParam); |
|
964 } |
|
965 else { |
|
966 return FALSE; |
|
967 } |
|
968 } |
|
969 |
|
970 |
|
971 PluralRuleParser::PluralRuleParser() : |
|
972 ruleIndex(0), token(), type(none), prevType(none), |
|
973 curAndConstraint(NULL), currentChain(NULL), rangeLowIdx(-1), rangeHiIdx(-1) |
|
974 { |
|
975 } |
|
976 |
|
977 PluralRuleParser::~PluralRuleParser() { |
|
978 } |
|
979 |
|
980 |
|
981 int32_t |
|
982 PluralRuleParser::getNumberValue(const UnicodeString& token) { |
|
983 int32_t i; |
|
984 char digits[128]; |
|
985 |
|
986 i = token.extract(0, token.length(), digits, ARRAY_SIZE(digits), US_INV); |
|
987 digits[i]='\0'; |
|
988 |
|
989 return((int32_t)atoi(digits)); |
|
990 } |
|
991 |
|
992 |
|
993 void |
|
994 PluralRuleParser::checkSyntax(UErrorCode &status) |
|
995 { |
|
996 if (U_FAILURE(status)) { |
|
997 return; |
|
998 } |
|
999 if (!(prevType==none || prevType==tSemiColon)) { |
|
1000 type = getKeyType(token, type); // Switch token type from tKeyword if we scanned a reserved word, |
|
1001 // and we are not at the start of a rule, where a |
|
1002 // keyword is expected. |
|
1003 } |
|
1004 |
|
1005 switch(prevType) { |
|
1006 case none: |
|
1007 case tSemiColon: |
|
1008 if (type!=tKeyword && type != tEOF) { |
|
1009 status = U_UNEXPECTED_TOKEN; |
|
1010 } |
|
1011 break; |
|
1012 case tVariableN: |
|
1013 case tVariableI: |
|
1014 case tVariableF: |
|
1015 case tVariableT: |
|
1016 case tVariableV: |
|
1017 if (type != tIs && type != tMod && type != tIn && |
|
1018 type != tNot && type != tWithin && type != tEqual && type != tNotEqual) { |
|
1019 status = U_UNEXPECTED_TOKEN; |
|
1020 } |
|
1021 break; |
|
1022 case tKeyword: |
|
1023 if (type != tColon) { |
|
1024 status = U_UNEXPECTED_TOKEN; |
|
1025 } |
|
1026 break; |
|
1027 case tColon: |
|
1028 if (!(type == tVariableN || |
|
1029 type == tVariableI || |
|
1030 type == tVariableF || |
|
1031 type == tVariableT || |
|
1032 type == tVariableV || |
|
1033 type == tAt)) { |
|
1034 status = U_UNEXPECTED_TOKEN; |
|
1035 } |
|
1036 break; |
|
1037 case tIs: |
|
1038 if ( type != tNumber && type != tNot) { |
|
1039 status = U_UNEXPECTED_TOKEN; |
|
1040 } |
|
1041 break; |
|
1042 case tNot: |
|
1043 if (type != tNumber && type != tIn && type != tWithin) { |
|
1044 status = U_UNEXPECTED_TOKEN; |
|
1045 } |
|
1046 break; |
|
1047 case tMod: |
|
1048 case tDot2: |
|
1049 case tIn: |
|
1050 case tWithin: |
|
1051 case tEqual: |
|
1052 case tNotEqual: |
|
1053 if (type != tNumber) { |
|
1054 status = U_UNEXPECTED_TOKEN; |
|
1055 } |
|
1056 break; |
|
1057 case tAnd: |
|
1058 case tOr: |
|
1059 if ( type != tVariableN && |
|
1060 type != tVariableI && |
|
1061 type != tVariableF && |
|
1062 type != tVariableT && |
|
1063 type != tVariableV) { |
|
1064 status = U_UNEXPECTED_TOKEN; |
|
1065 } |
|
1066 break; |
|
1067 case tComma: |
|
1068 if (type != tNumber) { |
|
1069 status = U_UNEXPECTED_TOKEN; |
|
1070 } |
|
1071 break; |
|
1072 case tNumber: |
|
1073 if (type != tDot2 && type != tSemiColon && type != tIs && type != tNot && |
|
1074 type != tIn && type != tEqual && type != tNotEqual && type != tWithin && |
|
1075 type != tAnd && type != tOr && type != tComma && type != tAt && |
|
1076 type != tEOF) |
|
1077 { |
|
1078 status = U_UNEXPECTED_TOKEN; |
|
1079 } |
|
1080 // TODO: a comma following a number that is not part of a range will be allowed. |
|
1081 // It's not the only case of this sort of thing. Parser needs a re-write. |
|
1082 break; |
|
1083 case tAt: |
|
1084 if (type != tDecimal && type != tInteger) { |
|
1085 status = U_UNEXPECTED_TOKEN; |
|
1086 } |
|
1087 break; |
|
1088 default: |
|
1089 status = U_UNEXPECTED_TOKEN; |
|
1090 break; |
|
1091 } |
|
1092 } |
|
1093 |
|
1094 |
|
1095 /* |
|
1096 * Scan the next token from the input rules. |
|
1097 * rules and returned token type are in the parser state variables. |
|
1098 */ |
|
1099 void |
|
1100 PluralRuleParser::getNextToken(UErrorCode &status) |
|
1101 { |
|
1102 if (U_FAILURE(status)) { |
|
1103 return; |
|
1104 } |
|
1105 |
|
1106 UChar ch; |
|
1107 while (ruleIndex < ruleSrc->length()) { |
|
1108 ch = ruleSrc->charAt(ruleIndex); |
|
1109 type = charType(ch); |
|
1110 if (type != tSpace) { |
|
1111 break; |
|
1112 } |
|
1113 ++(ruleIndex); |
|
1114 } |
|
1115 if (ruleIndex >= ruleSrc->length()) { |
|
1116 type = tEOF; |
|
1117 return; |
|
1118 } |
|
1119 int32_t curIndex= ruleIndex; |
|
1120 |
|
1121 switch (type) { |
|
1122 case tColon: |
|
1123 case tSemiColon: |
|
1124 case tComma: |
|
1125 case tEllipsis: |
|
1126 case tTilde: // scanned '~' |
|
1127 case tAt: // scanned '@' |
|
1128 case tEqual: // scanned '=' |
|
1129 case tMod: // scanned '%' |
|
1130 // Single character tokens. |
|
1131 ++curIndex; |
|
1132 break; |
|
1133 |
|
1134 case tNotEqual: // scanned '!' |
|
1135 if (ruleSrc->charAt(curIndex+1) == EQUALS) { |
|
1136 curIndex += 2; |
|
1137 } else { |
|
1138 type = none; |
|
1139 curIndex += 1; |
|
1140 } |
|
1141 break; |
|
1142 |
|
1143 case tKeyword: |
|
1144 while (type == tKeyword && ++curIndex < ruleSrc->length()) { |
|
1145 ch = ruleSrc->charAt(curIndex); |
|
1146 type = charType(ch); |
|
1147 } |
|
1148 type = tKeyword; |
|
1149 break; |
|
1150 |
|
1151 case tNumber: |
|
1152 while (type == tNumber && ++curIndex < ruleSrc->length()) { |
|
1153 ch = ruleSrc->charAt(curIndex); |
|
1154 type = charType(ch); |
|
1155 } |
|
1156 type = tNumber; |
|
1157 break; |
|
1158 |
|
1159 case tDot: |
|
1160 // We could be looking at either ".." in a range, or "..." at the end of a sample. |
|
1161 if (curIndex+1 >= ruleSrc->length() || ruleSrc->charAt(curIndex+1) != DOT) { |
|
1162 ++curIndex; |
|
1163 break; // Single dot |
|
1164 } |
|
1165 if (curIndex+2 >= ruleSrc->length() || ruleSrc->charAt(curIndex+2) != DOT) { |
|
1166 curIndex += 2; |
|
1167 type = tDot2; |
|
1168 break; // double dot |
|
1169 } |
|
1170 type = tEllipsis; |
|
1171 curIndex += 3; |
|
1172 break; // triple dot |
|
1173 |
|
1174 default: |
|
1175 status = U_UNEXPECTED_TOKEN; |
|
1176 ++curIndex; |
|
1177 break; |
|
1178 } |
|
1179 |
|
1180 U_ASSERT(ruleIndex <= ruleSrc->length()); |
|
1181 U_ASSERT(curIndex <= ruleSrc->length()); |
|
1182 token=UnicodeString(*ruleSrc, ruleIndex, curIndex-ruleIndex); |
|
1183 ruleIndex = curIndex; |
|
1184 } |
|
1185 |
|
1186 tokenType |
|
1187 PluralRuleParser::charType(UChar ch) { |
|
1188 if ((ch>=U_ZERO) && (ch<=U_NINE)) { |
|
1189 return tNumber; |
|
1190 } |
|
1191 if (ch>=LOW_A && ch<=LOW_Z) { |
|
1192 return tKeyword; |
|
1193 } |
|
1194 switch (ch) { |
|
1195 case COLON: |
|
1196 return tColon; |
|
1197 case SPACE: |
|
1198 return tSpace; |
|
1199 case SEMI_COLON: |
|
1200 return tSemiColon; |
|
1201 case DOT: |
|
1202 return tDot; |
|
1203 case COMMA: |
|
1204 return tComma; |
|
1205 case EXCLAMATION: |
|
1206 return tNotEqual; |
|
1207 case EQUALS: |
|
1208 return tEqual; |
|
1209 case PERCENT_SIGN: |
|
1210 return tMod; |
|
1211 case AT: |
|
1212 return tAt; |
|
1213 case ELLIPSIS: |
|
1214 return tEllipsis; |
|
1215 case TILDE: |
|
1216 return tTilde; |
|
1217 default : |
|
1218 return none; |
|
1219 } |
|
1220 } |
|
1221 |
|
1222 |
|
1223 // Set token type for reserved words in the Plural Rule syntax. |
|
1224 |
|
1225 tokenType |
|
1226 PluralRuleParser::getKeyType(const UnicodeString &token, tokenType keyType) |
|
1227 { |
|
1228 if (keyType != tKeyword) { |
|
1229 return keyType; |
|
1230 } |
|
1231 |
|
1232 if (0 == token.compare(PK_VAR_N, 1)) { |
|
1233 keyType = tVariableN; |
|
1234 } else if (0 == token.compare(PK_VAR_I, 1)) { |
|
1235 keyType = tVariableI; |
|
1236 } else if (0 == token.compare(PK_VAR_F, 1)) { |
|
1237 keyType = tVariableF; |
|
1238 } else if (0 == token.compare(PK_VAR_T, 1)) { |
|
1239 keyType = tVariableT; |
|
1240 } else if (0 == token.compare(PK_VAR_V, 1)) { |
|
1241 keyType = tVariableV; |
|
1242 } else if (0 == token.compare(PK_IS, 2)) { |
|
1243 keyType = tIs; |
|
1244 } else if (0 == token.compare(PK_AND, 3)) { |
|
1245 keyType = tAnd; |
|
1246 } else if (0 == token.compare(PK_IN, 2)) { |
|
1247 keyType = tIn; |
|
1248 } else if (0 == token.compare(PK_WITHIN, 6)) { |
|
1249 keyType = tWithin; |
|
1250 } else if (0 == token.compare(PK_NOT, 3)) { |
|
1251 keyType = tNot; |
|
1252 } else if (0 == token.compare(PK_MOD, 3)) { |
|
1253 keyType = tMod; |
|
1254 } else if (0 == token.compare(PK_OR, 2)) { |
|
1255 keyType = tOr; |
|
1256 } else if (0 == token.compare(PK_DECIMAL, 7)) { |
|
1257 keyType = tDecimal; |
|
1258 } else if (0 == token.compare(PK_INTEGER, 7)) { |
|
1259 keyType = tInteger; |
|
1260 } |
|
1261 return keyType; |
|
1262 } |
|
1263 |
|
1264 |
|
1265 PluralKeywordEnumeration::PluralKeywordEnumeration(RuleChain *header, UErrorCode& status) |
|
1266 : pos(0), fKeywordNames(status) { |
|
1267 if (U_FAILURE(status)) { |
|
1268 return; |
|
1269 } |
|
1270 fKeywordNames.setDeleter(uprv_deleteUObject); |
|
1271 UBool addKeywordOther=TRUE; |
|
1272 RuleChain *node=header; |
|
1273 while(node!=NULL) { |
|
1274 fKeywordNames.addElement(new UnicodeString(node->fKeyword), status); |
|
1275 if (U_FAILURE(status)) { |
|
1276 return; |
|
1277 } |
|
1278 if (0 == node->fKeyword.compare(PLURAL_KEYWORD_OTHER, 5)) { |
|
1279 addKeywordOther= FALSE; |
|
1280 } |
|
1281 node=node->fNext; |
|
1282 } |
|
1283 |
|
1284 if (addKeywordOther) { |
|
1285 fKeywordNames.addElement(new UnicodeString(PLURAL_KEYWORD_OTHER), status); |
|
1286 } |
|
1287 } |
|
1288 |
|
1289 const UnicodeString* |
|
1290 PluralKeywordEnumeration::snext(UErrorCode& status) { |
|
1291 if (U_SUCCESS(status) && pos < fKeywordNames.size()) { |
|
1292 return (const UnicodeString*)fKeywordNames.elementAt(pos++); |
|
1293 } |
|
1294 return NULL; |
|
1295 } |
|
1296 |
|
1297 void |
|
1298 PluralKeywordEnumeration::reset(UErrorCode& /*status*/) { |
|
1299 pos=0; |
|
1300 } |
|
1301 |
|
1302 int32_t |
|
1303 PluralKeywordEnumeration::count(UErrorCode& /*status*/) const { |
|
1304 return fKeywordNames.size(); |
|
1305 } |
|
1306 |
|
1307 PluralKeywordEnumeration::~PluralKeywordEnumeration() { |
|
1308 } |
|
1309 |
|
1310 |
|
1311 |
|
1312 FixedDecimal::FixedDecimal(double n, int32_t v, int64_t f) { |
|
1313 init(n, v, f); |
|
1314 // check values. TODO make into unit test. |
|
1315 // |
|
1316 // long visiblePower = (int) Math.pow(10, v); |
|
1317 // if (decimalDigits > visiblePower) { |
|
1318 // throw new IllegalArgumentException(); |
|
1319 // } |
|
1320 // double fraction = intValue + (decimalDigits / (double) visiblePower); |
|
1321 // if (fraction != source) { |
|
1322 // double diff = Math.abs(fraction - source)/(Math.abs(fraction) + Math.abs(source)); |
|
1323 // if (diff > 0.00000001d) { |
|
1324 // throw new IllegalArgumentException(); |
|
1325 // } |
|
1326 // } |
|
1327 } |
|
1328 |
|
1329 FixedDecimal::FixedDecimal(double n, int32_t v) { |
|
1330 // Ugly, but for samples we don't care. |
|
1331 init(n, v, getFractionalDigits(n, v)); |
|
1332 } |
|
1333 |
|
1334 FixedDecimal::FixedDecimal(double n) { |
|
1335 init(n); |
|
1336 } |
|
1337 |
|
1338 FixedDecimal::FixedDecimal() { |
|
1339 init(0, 0, 0); |
|
1340 } |
|
1341 |
|
1342 |
|
1343 // Create a FixedDecimal from a UnicodeString containing a number. |
|
1344 // Inefficient, but only used for samples, so simplicity trumps efficiency. |
|
1345 |
|
1346 FixedDecimal::FixedDecimal(const UnicodeString &num, UErrorCode &status) { |
|
1347 CharString cs; |
|
1348 cs.appendInvariantChars(num, status); |
|
1349 DigitList dl; |
|
1350 dl.set(cs.toStringPiece(), status); |
|
1351 if (U_FAILURE(status)) { |
|
1352 init(0, 0, 0); |
|
1353 return; |
|
1354 } |
|
1355 int32_t decimalPoint = num.indexOf(DOT); |
|
1356 double n = dl.getDouble(); |
|
1357 if (decimalPoint == -1) { |
|
1358 init(n, 0, 0); |
|
1359 } else { |
|
1360 int32_t v = num.length() - decimalPoint - 1; |
|
1361 init(n, v, getFractionalDigits(n, v)); |
|
1362 } |
|
1363 } |
|
1364 |
|
1365 |
|
1366 FixedDecimal::FixedDecimal(const FixedDecimal &other) { |
|
1367 source = other.source; |
|
1368 visibleDecimalDigitCount = other.visibleDecimalDigitCount; |
|
1369 decimalDigits = other.decimalDigits; |
|
1370 decimalDigitsWithoutTrailingZeros = other.decimalDigitsWithoutTrailingZeros; |
|
1371 intValue = other.intValue; |
|
1372 hasIntegerValue = other.hasIntegerValue; |
|
1373 isNegative = other.isNegative; |
|
1374 isNanOrInfinity = other.isNanOrInfinity; |
|
1375 } |
|
1376 |
|
1377 |
|
1378 void FixedDecimal::init(double n) { |
|
1379 int32_t numFractionDigits = decimals(n); |
|
1380 init(n, numFractionDigits, getFractionalDigits(n, numFractionDigits)); |
|
1381 } |
|
1382 |
|
1383 |
|
1384 void FixedDecimal::init(double n, int32_t v, int64_t f) { |
|
1385 isNegative = n < 0.0; |
|
1386 source = fabs(n); |
|
1387 isNanOrInfinity = uprv_isNaN(source) || uprv_isPositiveInfinity(source); |
|
1388 if (isNanOrInfinity) { |
|
1389 v = 0; |
|
1390 f = 0; |
|
1391 intValue = 0; |
|
1392 hasIntegerValue = FALSE; |
|
1393 } else { |
|
1394 intValue = (int64_t)source; |
|
1395 hasIntegerValue = (source == intValue); |
|
1396 } |
|
1397 |
|
1398 visibleDecimalDigitCount = v; |
|
1399 decimalDigits = f; |
|
1400 if (f == 0) { |
|
1401 decimalDigitsWithoutTrailingZeros = 0; |
|
1402 } else { |
|
1403 int64_t fdwtz = f; |
|
1404 while ((fdwtz%10) == 0) { |
|
1405 fdwtz /= 10; |
|
1406 } |
|
1407 decimalDigitsWithoutTrailingZeros = fdwtz; |
|
1408 } |
|
1409 } |
|
1410 |
|
1411 |
|
1412 // Fast path only exact initialization. Return true if successful. |
|
1413 // Note: Do not multiply by 10 each time through loop, rounding cruft can build |
|
1414 // up that makes the check for an integer result fail. |
|
1415 // A single multiply of the original number works more reliably. |
|
1416 static int32_t p10[] = {1, 10, 100, 1000, 10000}; |
|
1417 UBool FixedDecimal::quickInit(double n) { |
|
1418 UBool success = FALSE; |
|
1419 n = fabs(n); |
|
1420 int32_t numFractionDigits; |
|
1421 for (numFractionDigits = 0; numFractionDigits <= 3; numFractionDigits++) { |
|
1422 double scaledN = n * p10[numFractionDigits]; |
|
1423 if (scaledN == floor(scaledN)) { |
|
1424 success = TRUE; |
|
1425 break; |
|
1426 } |
|
1427 } |
|
1428 if (success) { |
|
1429 init(n, numFractionDigits, getFractionalDigits(n, numFractionDigits)); |
|
1430 } |
|
1431 return success; |
|
1432 } |
|
1433 |
|
1434 |
|
1435 |
|
1436 int32_t FixedDecimal::decimals(double n) { |
|
1437 // Count the number of decimal digits in the fraction part of the number, excluding trailing zeros. |
|
1438 // fastpath the common cases, integers or fractions with 3 or fewer digits |
|
1439 n = fabs(n); |
|
1440 for (int ndigits=0; ndigits<=3; ndigits++) { |
|
1441 double scaledN = n * p10[ndigits]; |
|
1442 if (scaledN == floor(scaledN)) { |
|
1443 return ndigits; |
|
1444 } |
|
1445 } |
|
1446 |
|
1447 // Slow path, convert with sprintf, parse converted output. |
|
1448 char buf[30] = {0}; |
|
1449 sprintf(buf, "%1.15e", n); |
|
1450 // formatted number looks like this: 1.234567890123457e-01 |
|
1451 int exponent = atoi(buf+18); |
|
1452 int numFractionDigits = 15; |
|
1453 for (int i=16; ; --i) { |
|
1454 if (buf[i] != '0') { |
|
1455 break; |
|
1456 } |
|
1457 --numFractionDigits; |
|
1458 } |
|
1459 numFractionDigits -= exponent; // Fraction part of fixed point representation. |
|
1460 return numFractionDigits; |
|
1461 } |
|
1462 |
|
1463 |
|
1464 // Get the fraction digits of a double, represented as an integer. |
|
1465 // v is the number of visible fraction digits in the displayed form of the number. |
|
1466 // Example: n = 1001.234, v = 6, result = 234000 |
|
1467 // TODO: need to think through how this is used in the plural rule context. |
|
1468 // This function can easily encounter integer overflow, |
|
1469 // and can easily return noise digits when the precision of a double is exceeded. |
|
1470 |
|
1471 int64_t FixedDecimal::getFractionalDigits(double n, int32_t v) { |
|
1472 if (v == 0 || n == floor(n) || uprv_isNaN(n) || uprv_isPositiveInfinity(n)) { |
|
1473 return 0; |
|
1474 } |
|
1475 n = fabs(n); |
|
1476 double fract = n - floor(n); |
|
1477 switch (v) { |
|
1478 case 1: return (int64_t)(fract*10.0 + 0.5); |
|
1479 case 2: return (int64_t)(fract*100.0 + 0.5); |
|
1480 case 3: return (int64_t)(fract*1000.0 + 0.5); |
|
1481 default: |
|
1482 double scaled = floor(fract * pow(10.0, (double)v) + 0.5); |
|
1483 if (scaled > U_INT64_MAX) { |
|
1484 return U_INT64_MAX; |
|
1485 } else { |
|
1486 return (int64_t)scaled; |
|
1487 } |
|
1488 } |
|
1489 } |
|
1490 |
|
1491 |
|
1492 void FixedDecimal::adjustForMinFractionDigits(int32_t minFractionDigits) { |
|
1493 int32_t numTrailingFractionZeros = minFractionDigits - visibleDecimalDigitCount; |
|
1494 if (numTrailingFractionZeros > 0) { |
|
1495 for (int32_t i=0; i<numTrailingFractionZeros; i++) { |
|
1496 // Do not let the decimalDigits value overflow if there are many trailing zeros. |
|
1497 // Limit the value to 18 digits, the most that a 64 bit int can fully represent. |
|
1498 if (decimalDigits >= 100000000000000000LL) { |
|
1499 break; |
|
1500 } |
|
1501 decimalDigits *= 10; |
|
1502 } |
|
1503 visibleDecimalDigitCount += numTrailingFractionZeros; |
|
1504 } |
|
1505 } |
|
1506 |
|
1507 |
|
1508 double FixedDecimal::get(tokenType operand) const { |
|
1509 switch(operand) { |
|
1510 case tVariableN: return source; |
|
1511 case tVariableI: return (double)intValue; |
|
1512 case tVariableF: return (double)decimalDigits; |
|
1513 case tVariableT: return (double)decimalDigitsWithoutTrailingZeros; |
|
1514 case tVariableV: return visibleDecimalDigitCount; |
|
1515 default: |
|
1516 U_ASSERT(FALSE); // unexpected. |
|
1517 return source; |
|
1518 } |
|
1519 } |
|
1520 |
|
1521 int32_t FixedDecimal::getVisibleFractionDigitCount() const { |
|
1522 return visibleDecimalDigitCount; |
|
1523 } |
|
1524 |
|
1525 |
|
1526 |
|
1527 PluralAvailableLocalesEnumeration::PluralAvailableLocalesEnumeration(UErrorCode &status) { |
|
1528 fLocales = NULL; |
|
1529 fRes = NULL; |
|
1530 fOpenStatus = status; |
|
1531 if (U_FAILURE(status)) { |
|
1532 return; |
|
1533 } |
|
1534 fOpenStatus = U_ZERO_ERROR; |
|
1535 LocalUResourceBundlePointer rb(ures_openDirect(NULL, "plurals", &fOpenStatus)); |
|
1536 fLocales = ures_getByKey(rb.getAlias(), "locales", NULL, &fOpenStatus); |
|
1537 } |
|
1538 |
|
1539 PluralAvailableLocalesEnumeration::~PluralAvailableLocalesEnumeration() { |
|
1540 ures_close(fLocales); |
|
1541 ures_close(fRes); |
|
1542 fLocales = NULL; |
|
1543 fRes = NULL; |
|
1544 } |
|
1545 |
|
1546 const char *PluralAvailableLocalesEnumeration::next(int32_t *resultLength, UErrorCode &status) { |
|
1547 if (U_FAILURE(status)) { |
|
1548 return NULL; |
|
1549 } |
|
1550 if (U_FAILURE(fOpenStatus)) { |
|
1551 status = fOpenStatus; |
|
1552 return NULL; |
|
1553 } |
|
1554 fRes = ures_getNextResource(fLocales, fRes, &status); |
|
1555 if (fRes == NULL || U_FAILURE(status)) { |
|
1556 if (status == U_INDEX_OUTOFBOUNDS_ERROR) { |
|
1557 status = U_ZERO_ERROR; |
|
1558 } |
|
1559 return NULL; |
|
1560 } |
|
1561 const char *result = ures_getKey(fRes); |
|
1562 if (resultLength != NULL) { |
|
1563 *resultLength = uprv_strlen(result); |
|
1564 } |
|
1565 return result; |
|
1566 } |
|
1567 |
|
1568 |
|
1569 void PluralAvailableLocalesEnumeration::reset(UErrorCode &status) { |
|
1570 if (U_FAILURE(status)) { |
|
1571 return; |
|
1572 } |
|
1573 if (U_FAILURE(fOpenStatus)) { |
|
1574 status = fOpenStatus; |
|
1575 return; |
|
1576 } |
|
1577 ures_resetIterator(fLocales); |
|
1578 } |
|
1579 |
|
1580 int32_t PluralAvailableLocalesEnumeration::count(UErrorCode &status) const { |
|
1581 if (U_FAILURE(status)) { |
|
1582 return 0; |
|
1583 } |
|
1584 if (U_FAILURE(fOpenStatus)) { |
|
1585 status = fOpenStatus; |
|
1586 return 0; |
|
1587 } |
|
1588 return ures_getSize(fLocales); |
|
1589 } |
|
1590 |
|
1591 U_NAMESPACE_END |
|
1592 |
|
1593 |
|
1594 #endif /* #if !UCONFIG_NO_FORMATTING */ |
|
1595 |
|
1596 //eof |