| |
1 /* |
| |
2 ******************************************************************************* |
| |
3 * Copyright (C) 2007-2013, International Business Machines Corporation and |
| |
4 * others. All Rights Reserved. |
| |
5 ******************************************************************************* |
| |
6 * |
| |
7 * File plurrule.cpp |
| |
8 */ |
| |
9 |
| |
10 #include <math.h> |
| |
11 #include <stdio.h> |
| |
12 |
| |
13 #include "unicode/utypes.h" |
| |
14 #include "unicode/localpointer.h" |
| |
15 #include "unicode/plurrule.h" |
| |
16 #include "unicode/upluralrules.h" |
| |
17 #include "unicode/ures.h" |
| |
18 #include "charstr.h" |
| |
19 #include "cmemory.h" |
| |
20 #include "cstring.h" |
| |
21 #include "digitlst.h" |
| |
22 #include "hash.h" |
| |
23 #include "locutil.h" |
| |
24 #include "mutex.h" |
| |
25 #include "patternprops.h" |
| |
26 #include "plurrule_impl.h" |
| |
27 #include "putilimp.h" |
| |
28 #include "ucln_in.h" |
| |
29 #include "ustrfmt.h" |
| |
30 #include "uassert.h" |
| |
31 #include "uvectr32.h" |
| |
32 |
| |
33 #if !UCONFIG_NO_FORMATTING |
| |
34 |
| |
35 U_NAMESPACE_BEGIN |
| |
36 |
| |
37 #define ARRAY_SIZE(array) (int32_t)(sizeof array / sizeof array[0]) |
| |
38 |
| |
39 static const UChar PLURAL_KEYWORD_OTHER[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,0}; |
| |
40 static const UChar PLURAL_DEFAULT_RULE[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,COLON,SPACE,LOW_N,0}; |
| |
41 static const UChar PK_IN[]={LOW_I,LOW_N,0}; |
| |
42 static const UChar PK_NOT[]={LOW_N,LOW_O,LOW_T,0}; |
| |
43 static const UChar PK_IS[]={LOW_I,LOW_S,0}; |
| |
44 static const UChar PK_MOD[]={LOW_M,LOW_O,LOW_D,0}; |
| |
45 static const UChar PK_AND[]={LOW_A,LOW_N,LOW_D,0}; |
| |
46 static const UChar PK_OR[]={LOW_O,LOW_R,0}; |
| |
47 static const UChar PK_VAR_N[]={LOW_N,0}; |
| |
48 static const UChar PK_VAR_I[]={LOW_I,0}; |
| |
49 static const UChar PK_VAR_F[]={LOW_F,0}; |
| |
50 static const UChar PK_VAR_T[]={LOW_T,0}; |
| |
51 static const UChar PK_VAR_V[]={LOW_V,0}; |
| |
52 static const UChar PK_VAR_J[]={LOW_J,0}; |
| |
53 static const UChar PK_WITHIN[]={LOW_W,LOW_I,LOW_T,LOW_H,LOW_I,LOW_N,0}; |
| |
54 static const UChar PK_DECIMAL[]={LOW_D,LOW_E,LOW_C,LOW_I,LOW_M,LOW_A,LOW_L,0}; |
| |
55 static const UChar PK_INTEGER[]={LOW_I,LOW_N,LOW_T,LOW_E,LOW_G,LOW_E,LOW_R,0}; |
| |
56 |
| |
57 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralRules) |
| |
58 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralKeywordEnumeration) |
| |
59 |
| |
60 PluralRules::PluralRules(UErrorCode& /*status*/) |
| |
61 : UObject(), |
| |
62 mRules(NULL) |
| |
63 { |
| |
64 } |
| |
65 |
| |
66 PluralRules::PluralRules(const PluralRules& other) |
| |
67 : UObject(other), |
| |
68 mRules(NULL) |
| |
69 { |
| |
70 *this=other; |
| |
71 } |
| |
72 |
| |
73 PluralRules::~PluralRules() { |
| |
74 delete mRules; |
| |
75 } |
| |
76 |
| |
77 PluralRules* |
| |
78 PluralRules::clone() const { |
| |
79 return new PluralRules(*this); |
| |
80 } |
| |
81 |
| |
82 PluralRules& |
| |
83 PluralRules::operator=(const PluralRules& other) { |
| |
84 if (this != &other) { |
| |
85 delete mRules; |
| |
86 if (other.mRules==NULL) { |
| |
87 mRules = NULL; |
| |
88 } |
| |
89 else { |
| |
90 mRules = new RuleChain(*other.mRules); |
| |
91 } |
| |
92 } |
| |
93 |
| |
94 return *this; |
| |
95 } |
| |
96 |
| |
97 StringEnumeration* PluralRules::getAvailableLocales(UErrorCode &status) { |
| |
98 StringEnumeration *result = new PluralAvailableLocalesEnumeration(status); |
| |
99 if (result == NULL && U_SUCCESS(status)) { |
| |
100 status = U_MEMORY_ALLOCATION_ERROR; |
| |
101 } |
| |
102 if (U_FAILURE(status)) { |
| |
103 delete result; |
| |
104 result = NULL; |
| |
105 } |
| |
106 return result; |
| |
107 } |
| |
108 |
| |
109 |
| |
110 PluralRules* U_EXPORT2 |
| |
111 PluralRules::createRules(const UnicodeString& description, UErrorCode& status) { |
| |
112 if (U_FAILURE(status)) { |
| |
113 return NULL; |
| |
114 } |
| |
115 |
| |
116 PluralRuleParser parser; |
| |
117 PluralRules *newRules = new PluralRules(status); |
| |
118 if (U_SUCCESS(status) && newRules == NULL) { |
| |
119 status = U_MEMORY_ALLOCATION_ERROR; |
| |
120 } |
| |
121 parser.parse(description, newRules, status); |
| |
122 if (U_FAILURE(status)) { |
| |
123 delete newRules; |
| |
124 newRules = NULL; |
| |
125 } |
| |
126 return newRules; |
| |
127 } |
| |
128 |
| |
129 |
| |
130 PluralRules* U_EXPORT2 |
| |
131 PluralRules::createDefaultRules(UErrorCode& status) { |
| |
132 return createRules(UnicodeString(TRUE, PLURAL_DEFAULT_RULE, -1), status); |
| |
133 } |
| |
134 |
| |
135 PluralRules* U_EXPORT2 |
| |
136 PluralRules::forLocale(const Locale& locale, UErrorCode& status) { |
| |
137 return forLocale(locale, UPLURAL_TYPE_CARDINAL, status); |
| |
138 } |
| |
139 |
| |
140 PluralRules* U_EXPORT2 |
| |
141 PluralRules::forLocale(const Locale& locale, UPluralType type, UErrorCode& status) { |
| |
142 if (U_FAILURE(status)) { |
| |
143 return NULL; |
| |
144 } |
| |
145 if (type >= UPLURAL_TYPE_COUNT) { |
| |
146 status = U_ILLEGAL_ARGUMENT_ERROR; |
| |
147 return NULL; |
| |
148 } |
| |
149 PluralRules *newObj = new PluralRules(status); |
| |
150 if (newObj==NULL || U_FAILURE(status)) { |
| |
151 delete newObj; |
| |
152 return NULL; |
| |
153 } |
| |
154 UnicodeString locRule = newObj->getRuleFromResource(locale, type, status); |
| |
155 // TODO: which errors, if any, should be returned? |
| |
156 if (locRule.length() == 0) { |
| |
157 // Locales with no specific rules (all numbers have the "other" category |
| |
158 // will return a U_MISSING_RESOURCE_ERROR at this point. This is not |
| |
159 // an error. |
| |
160 locRule = UnicodeString(PLURAL_DEFAULT_RULE); |
| |
161 status = U_ZERO_ERROR; |
| |
162 } |
| |
163 PluralRuleParser parser; |
| |
164 parser.parse(locRule, newObj, status); |
| |
165 // TODO: should rule parse errors be returned, or |
| |
166 // should we silently use default rules? |
| |
167 // Original impl used default rules. |
| |
168 // Ask the question to ICU Core. |
| |
169 |
| |
170 return newObj; |
| |
171 } |
| |
172 |
| |
173 UnicodeString |
| |
174 PluralRules::select(int32_t number) const { |
| |
175 return select(FixedDecimal(number)); |
| |
176 } |
| |
177 |
| |
178 UnicodeString |
| |
179 PluralRules::select(double number) const { |
| |
180 return select(FixedDecimal(number)); |
| |
181 } |
| |
182 |
| |
183 UnicodeString |
| |
184 PluralRules::select(const FixedDecimal &number) const { |
| |
185 if (mRules == NULL) { |
| |
186 return UnicodeString(TRUE, PLURAL_DEFAULT_RULE, -1); |
| |
187 } |
| |
188 else { |
| |
189 return mRules->select(number); |
| |
190 } |
| |
191 } |
| |
192 |
| |
193 StringEnumeration* |
| |
194 PluralRules::getKeywords(UErrorCode& status) const { |
| |
195 if (U_FAILURE(status)) return NULL; |
| |
196 StringEnumeration* nameEnumerator = new PluralKeywordEnumeration(mRules, status); |
| |
197 if (U_FAILURE(status)) { |
| |
198 delete nameEnumerator; |
| |
199 return NULL; |
| |
200 } |
| |
201 |
| |
202 return nameEnumerator; |
| |
203 } |
| |
204 |
| |
205 double |
| |
206 PluralRules::getUniqueKeywordValue(const UnicodeString& /* keyword */) { |
| |
207 // Not Implemented. |
| |
208 return UPLRULES_NO_UNIQUE_VALUE; |
| |
209 } |
| |
210 |
| |
211 int32_t |
| |
212 PluralRules::getAllKeywordValues(const UnicodeString & /* keyword */, double * /* dest */, |
| |
213 int32_t /* destCapacity */, UErrorCode& error) { |
| |
214 error = U_UNSUPPORTED_ERROR; |
| |
215 return 0; |
| |
216 } |
| |
217 |
| |
218 |
| |
219 static double scaleForInt(double d) { |
| |
220 double scale = 1.0; |
| |
221 while (d != floor(d)) { |
| |
222 d = d * 10.0; |
| |
223 scale = scale * 10.0; |
| |
224 } |
| |
225 return scale; |
| |
226 } |
| |
227 |
| |
228 static int32_t |
| |
229 getSamplesFromString(const UnicodeString &samples, double *dest, |
| |
230 int32_t destCapacity, UErrorCode& status) { |
| |
231 int32_t sampleCount = 0; |
| |
232 int32_t sampleStartIdx = 0; |
| |
233 int32_t sampleEndIdx = 0; |
| |
234 |
| |
235 //std::string ss; // TODO: debugging. |
| |
236 // std::cout << "PluralRules::getSamples(), samples = \"" << samples.toUTF8String(ss) << "\"\n"; |
| |
237 for (sampleCount = 0; sampleCount < destCapacity && sampleStartIdx < samples.length(); ) { |
| |
238 sampleEndIdx = samples.indexOf(COMMA, sampleStartIdx); |
| |
239 if (sampleEndIdx == -1) { |
| |
240 sampleEndIdx = samples.length(); |
| |
241 } |
| |
242 const UnicodeString &sampleRange = samples.tempSubStringBetween(sampleStartIdx, sampleEndIdx); |
| |
243 // ss.erase(); |
| |
244 // std::cout << "PluralRules::getSamples(), samplesRange = \"" << sampleRange.toUTF8String(ss) << "\"\n"; |
| |
245 int32_t tildeIndex = sampleRange.indexOf(TILDE); |
| |
246 if (tildeIndex < 0) { |
| |
247 FixedDecimal fixed(sampleRange, status); |
| |
248 double sampleValue = fixed.source; |
| |
249 if (fixed.visibleDecimalDigitCount == 0 || sampleValue != floor(sampleValue)) { |
| |
250 dest[sampleCount++] = sampleValue; |
| |
251 } |
| |
252 } else { |
| |
253 |
| |
254 FixedDecimal fixedLo(sampleRange.tempSubStringBetween(0, tildeIndex), status); |
| |
255 FixedDecimal fixedHi(sampleRange.tempSubStringBetween(tildeIndex+1), status); |
| |
256 double rangeLo = fixedLo.source; |
| |
257 double rangeHi = fixedHi.source; |
| |
258 if (U_FAILURE(status)) { |
| |
259 break; |
| |
260 } |
| |
261 if (rangeHi < rangeLo) { |
| |
262 status = U_INVALID_FORMAT_ERROR; |
| |
263 break; |
| |
264 } |
| |
265 |
| |
266 // For ranges of samples with fraction decimal digits, scale the number up so that we |
| |
267 // are adding one in the units place. Avoids roundoffs from repetitive adds of tenths. |
| |
268 |
| |
269 double scale = scaleForInt(rangeLo); |
| |
270 double t = scaleForInt(rangeHi); |
| |
271 if (t > scale) { |
| |
272 scale = t; |
| |
273 } |
| |
274 rangeLo *= scale; |
| |
275 rangeHi *= scale; |
| |
276 for (double n=rangeLo; n<=rangeHi; n+=1) { |
| |
277 // Hack Alert: don't return any decimal samples with integer values that |
| |
278 // originated from a format with trailing decimals. |
| |
279 // This API is returning doubles, which can't distinguish having displayed |
| |
280 // zeros to the right of the decimal. |
| |
281 // This results in test failures with values mapping back to a different keyword. |
| |
282 double sampleValue = n/scale; |
| |
283 if (!(sampleValue == floor(sampleValue) && fixedLo.visibleDecimalDigitCount > 0)) { |
| |
284 dest[sampleCount++] = sampleValue; |
| |
285 } |
| |
286 if (sampleCount >= destCapacity) { |
| |
287 break; |
| |
288 } |
| |
289 } |
| |
290 } |
| |
291 sampleStartIdx = sampleEndIdx + 1; |
| |
292 } |
| |
293 return sampleCount; |
| |
294 } |
| |
295 |
| |
296 |
| |
297 int32_t |
| |
298 PluralRules::getSamples(const UnicodeString &keyword, double *dest, |
| |
299 int32_t destCapacity, UErrorCode& status) { |
| |
300 RuleChain *rc = rulesForKeyword(keyword); |
| |
301 if (rc == NULL || destCapacity == 0 || U_FAILURE(status)) { |
| |
302 return 0; |
| |
303 } |
| |
304 int32_t numSamples = getSamplesFromString(rc->fIntegerSamples, dest, destCapacity, status); |
| |
305 if (numSamples == 0) { |
| |
306 numSamples = getSamplesFromString(rc->fDecimalSamples, dest, destCapacity, status); |
| |
307 } |
| |
308 return numSamples; |
| |
309 } |
| |
310 |
| |
311 |
| |
312 RuleChain *PluralRules::rulesForKeyword(const UnicodeString &keyword) const { |
| |
313 RuleChain *rc; |
| |
314 for (rc = mRules; rc != NULL; rc = rc->fNext) { |
| |
315 if (rc->fKeyword == keyword) { |
| |
316 break; |
| |
317 } |
| |
318 } |
| |
319 return rc; |
| |
320 } |
| |
321 |
| |
322 |
| |
323 UBool |
| |
324 PluralRules::isKeyword(const UnicodeString& keyword) const { |
| |
325 if (0 == keyword.compare(PLURAL_KEYWORD_OTHER, 5)) { |
| |
326 return true; |
| |
327 } |
| |
328 return rulesForKeyword(keyword) != NULL; |
| |
329 } |
| |
330 |
| |
331 UnicodeString |
| |
332 PluralRules::getKeywordOther() const { |
| |
333 return UnicodeString(TRUE, PLURAL_KEYWORD_OTHER, 5); |
| |
334 } |
| |
335 |
| |
336 UBool |
| |
337 PluralRules::operator==(const PluralRules& other) const { |
| |
338 const UnicodeString *ptrKeyword; |
| |
339 UErrorCode status= U_ZERO_ERROR; |
| |
340 |
| |
341 if ( this == &other ) { |
| |
342 return TRUE; |
| |
343 } |
| |
344 LocalPointer<StringEnumeration> myKeywordList(getKeywords(status)); |
| |
345 LocalPointer<StringEnumeration> otherKeywordList(other.getKeywords(status)); |
| |
346 if (U_FAILURE(status)) { |
| |
347 return FALSE; |
| |
348 } |
| |
349 |
| |
350 if (myKeywordList->count(status)!=otherKeywordList->count(status)) { |
| |
351 return FALSE; |
| |
352 } |
| |
353 myKeywordList->reset(status); |
| |
354 while ((ptrKeyword=myKeywordList->snext(status))!=NULL) { |
| |
355 if (!other.isKeyword(*ptrKeyword)) { |
| |
356 return FALSE; |
| |
357 } |
| |
358 } |
| |
359 otherKeywordList->reset(status); |
| |
360 while ((ptrKeyword=otherKeywordList->snext(status))!=NULL) { |
| |
361 if (!this->isKeyword(*ptrKeyword)) { |
| |
362 return FALSE; |
| |
363 } |
| |
364 } |
| |
365 if (U_FAILURE(status)) { |
| |
366 return FALSE; |
| |
367 } |
| |
368 |
| |
369 return TRUE; |
| |
370 } |
| |
371 |
| |
372 |
| |
373 void |
| |
374 PluralRuleParser::parse(const UnicodeString& ruleData, PluralRules *prules, UErrorCode &status) |
| |
375 { |
| |
376 if (U_FAILURE(status)) { |
| |
377 return; |
| |
378 } |
| |
379 U_ASSERT(ruleIndex == 0); // Parsers are good for a single use only! |
| |
380 ruleSrc = &ruleData; |
| |
381 |
| |
382 while (ruleIndex< ruleSrc->length()) { |
| |
383 getNextToken(status); |
| |
384 if (U_FAILURE(status)) { |
| |
385 return; |
| |
386 } |
| |
387 checkSyntax(status); |
| |
388 if (U_FAILURE(status)) { |
| |
389 return; |
| |
390 } |
| |
391 switch (type) { |
| |
392 case tAnd: |
| |
393 U_ASSERT(curAndConstraint != NULL); |
| |
394 curAndConstraint = curAndConstraint->add(); |
| |
395 break; |
| |
396 case tOr: |
| |
397 { |
| |
398 U_ASSERT(currentChain != NULL); |
| |
399 OrConstraint *orNode=currentChain->ruleHeader; |
| |
400 while (orNode->next != NULL) { |
| |
401 orNode = orNode->next; |
| |
402 } |
| |
403 orNode->next= new OrConstraint(); |
| |
404 orNode=orNode->next; |
| |
405 orNode->next=NULL; |
| |
406 curAndConstraint = orNode->add(); |
| |
407 } |
| |
408 break; |
| |
409 case tIs: |
| |
410 U_ASSERT(curAndConstraint != NULL); |
| |
411 U_ASSERT(curAndConstraint->value == -1); |
| |
412 U_ASSERT(curAndConstraint->rangeList == NULL); |
| |
413 break; |
| |
414 case tNot: |
| |
415 U_ASSERT(curAndConstraint != NULL); |
| |
416 curAndConstraint->negated=TRUE; |
| |
417 break; |
| |
418 |
| |
419 case tNotEqual: |
| |
420 curAndConstraint->negated=TRUE; |
| |
421 case tIn: |
| |
422 case tWithin: |
| |
423 case tEqual: |
| |
424 U_ASSERT(curAndConstraint != NULL); |
| |
425 curAndConstraint->rangeList = new UVector32(status); |
| |
426 curAndConstraint->rangeList->addElement(-1, status); // range Low |
| |
427 curAndConstraint->rangeList->addElement(-1, status); // range Hi |
| |
428 rangeLowIdx = 0; |
| |
429 rangeHiIdx = 1; |
| |
430 curAndConstraint->value=PLURAL_RANGE_HIGH; |
| |
431 curAndConstraint->integerOnly = (type != tWithin); |
| |
432 break; |
| |
433 case tNumber: |
| |
434 U_ASSERT(curAndConstraint != NULL); |
| |
435 if ( (curAndConstraint->op==AndConstraint::MOD)&& |
| |
436 (curAndConstraint->opNum == -1 ) ) { |
| |
437 curAndConstraint->opNum=getNumberValue(token); |
| |
438 } |
| |
439 else { |
| |
440 if (curAndConstraint->rangeList == NULL) { |
| |
441 // this is for an 'is' rule |
| |
442 curAndConstraint->value = getNumberValue(token); |
| |
443 } else { |
| |
444 // this is for an 'in' or 'within' rule |
| |
445 if (curAndConstraint->rangeList->elementAti(rangeLowIdx) == -1) { |
| |
446 curAndConstraint->rangeList->setElementAt(getNumberValue(token), rangeLowIdx); |
| |
447 curAndConstraint->rangeList->setElementAt(getNumberValue(token), rangeHiIdx); |
| |
448 } |
| |
449 else { |
| |
450 curAndConstraint->rangeList->setElementAt(getNumberValue(token), rangeHiIdx); |
| |
451 if (curAndConstraint->rangeList->elementAti(rangeLowIdx) > |
| |
452 curAndConstraint->rangeList->elementAti(rangeHiIdx)) { |
| |
453 // Range Lower bound > Range Upper bound. |
| |
454 // U_UNEXPECTED_TOKEN seems a little funny, but it is consistently |
| |
455 // used for all plural rule parse errors. |
| |
456 status = U_UNEXPECTED_TOKEN; |
| |
457 break; |
| |
458 } |
| |
459 } |
| |
460 } |
| |
461 } |
| |
462 break; |
| |
463 case tComma: |
| |
464 // TODO: rule syntax checking is inadequate, can happen with badly formed rules. |
| |
465 // Catch cases like "n mod 10, is 1" here instead. |
| |
466 if (curAndConstraint == NULL || curAndConstraint->rangeList == NULL) { |
| |
467 status = U_UNEXPECTED_TOKEN; |
| |
468 break; |
| |
469 } |
| |
470 U_ASSERT(curAndConstraint->rangeList->size() >= 2); |
| |
471 rangeLowIdx = curAndConstraint->rangeList->size(); |
| |
472 curAndConstraint->rangeList->addElement(-1, status); // range Low |
| |
473 rangeHiIdx = curAndConstraint->rangeList->size(); |
| |
474 curAndConstraint->rangeList->addElement(-1, status); // range Hi |
| |
475 break; |
| |
476 case tMod: |
| |
477 U_ASSERT(curAndConstraint != NULL); |
| |
478 curAndConstraint->op=AndConstraint::MOD; |
| |
479 break; |
| |
480 case tVariableN: |
| |
481 case tVariableI: |
| |
482 case tVariableF: |
| |
483 case tVariableT: |
| |
484 case tVariableV: |
| |
485 U_ASSERT(curAndConstraint != NULL); |
| |
486 curAndConstraint->digitsType = type; |
| |
487 break; |
| |
488 case tKeyword: |
| |
489 { |
| |
490 RuleChain *newChain = new RuleChain; |
| |
491 if (newChain == NULL) { |
| |
492 status = U_MEMORY_ALLOCATION_ERROR; |
| |
493 break; |
| |
494 } |
| |
495 newChain->fKeyword = token; |
| |
496 if (prules->mRules == NULL) { |
| |
497 prules->mRules = newChain; |
| |
498 } else { |
| |
499 // The new rule chain goes at the end of the linked list of rule chains, |
| |
500 // unless there is an "other" keyword & chain. "other" must remain last. |
| |
501 RuleChain *insertAfter = prules->mRules; |
| |
502 while (insertAfter->fNext!=NULL && |
| |
503 insertAfter->fNext->fKeyword.compare(PLURAL_KEYWORD_OTHER, 5) != 0 ){ |
| |
504 insertAfter=insertAfter->fNext; |
| |
505 } |
| |
506 newChain->fNext = insertAfter->fNext; |
| |
507 insertAfter->fNext = newChain; |
| |
508 } |
| |
509 OrConstraint *orNode = new OrConstraint(); |
| |
510 newChain->ruleHeader = orNode; |
| |
511 curAndConstraint = orNode->add(); |
| |
512 currentChain = newChain; |
| |
513 } |
| |
514 break; |
| |
515 |
| |
516 case tInteger: |
| |
517 for (;;) { |
| |
518 getNextToken(status); |
| |
519 if (U_FAILURE(status) || type == tSemiColon || type == tEOF || type == tAt) { |
| |
520 break; |
| |
521 } |
| |
522 if (type == tEllipsis) { |
| |
523 currentChain->fIntegerSamplesUnbounded = TRUE; |
| |
524 continue; |
| |
525 } |
| |
526 currentChain->fIntegerSamples.append(token); |
| |
527 } |
| |
528 break; |
| |
529 |
| |
530 case tDecimal: |
| |
531 for (;;) { |
| |
532 getNextToken(status); |
| |
533 if (U_FAILURE(status) || type == tSemiColon || type == tEOF || type == tAt) { |
| |
534 break; |
| |
535 } |
| |
536 if (type == tEllipsis) { |
| |
537 currentChain->fDecimalSamplesUnbounded = TRUE; |
| |
538 continue; |
| |
539 } |
| |
540 currentChain->fDecimalSamples.append(token); |
| |
541 } |
| |
542 break; |
| |
543 |
| |
544 default: |
| |
545 break; |
| |
546 } |
| |
547 prevType=type; |
| |
548 if (U_FAILURE(status)) { |
| |
549 break; |
| |
550 } |
| |
551 } |
| |
552 } |
| |
553 |
| |
554 UnicodeString |
| |
555 PluralRules::getRuleFromResource(const Locale& locale, UPluralType type, UErrorCode& errCode) { |
| |
556 UnicodeString emptyStr; |
| |
557 |
| |
558 if (U_FAILURE(errCode)) { |
| |
559 return emptyStr; |
| |
560 } |
| |
561 LocalUResourceBundlePointer rb(ures_openDirect(NULL, "plurals", &errCode)); |
| |
562 if(U_FAILURE(errCode)) { |
| |
563 return emptyStr; |
| |
564 } |
| |
565 const char *typeKey; |
| |
566 switch (type) { |
| |
567 case UPLURAL_TYPE_CARDINAL: |
| |
568 typeKey = "locales"; |
| |
569 break; |
| |
570 case UPLURAL_TYPE_ORDINAL: |
| |
571 typeKey = "locales_ordinals"; |
| |
572 break; |
| |
573 default: |
| |
574 // Must not occur: The caller should have checked for valid types. |
| |
575 errCode = U_ILLEGAL_ARGUMENT_ERROR; |
| |
576 return emptyStr; |
| |
577 } |
| |
578 LocalUResourceBundlePointer locRes(ures_getByKey(rb.getAlias(), typeKey, NULL, &errCode)); |
| |
579 if(U_FAILURE(errCode)) { |
| |
580 return emptyStr; |
| |
581 } |
| |
582 int32_t resLen=0; |
| |
583 const char *curLocaleName=locale.getName(); |
| |
584 const UChar* s = ures_getStringByKey(locRes.getAlias(), curLocaleName, &resLen, &errCode); |
| |
585 |
| |
586 if (s == NULL) { |
| |
587 // Check parent locales. |
| |
588 UErrorCode status = U_ZERO_ERROR; |
| |
589 char parentLocaleName[ULOC_FULLNAME_CAPACITY]; |
| |
590 const char *curLocaleName=locale.getName(); |
| |
591 uprv_strcpy(parentLocaleName, curLocaleName); |
| |
592 |
| |
593 while (uloc_getParent(parentLocaleName, parentLocaleName, |
| |
594 ULOC_FULLNAME_CAPACITY, &status) > 0) { |
| |
595 resLen=0; |
| |
596 s = ures_getStringByKey(locRes.getAlias(), parentLocaleName, &resLen, &status); |
| |
597 if (s != NULL) { |
| |
598 errCode = U_ZERO_ERROR; |
| |
599 break; |
| |
600 } |
| |
601 status = U_ZERO_ERROR; |
| |
602 } |
| |
603 } |
| |
604 if (s==NULL) { |
| |
605 return emptyStr; |
| |
606 } |
| |
607 |
| |
608 char setKey[256]; |
| |
609 u_UCharsToChars(s, setKey, resLen + 1); |
| |
610 // printf("\n PluralRule: %s\n", setKey); |
| |
611 |
| |
612 LocalUResourceBundlePointer ruleRes(ures_getByKey(rb.getAlias(), "rules", NULL, &errCode)); |
| |
613 if(U_FAILURE(errCode)) { |
| |
614 return emptyStr; |
| |
615 } |
| |
616 LocalUResourceBundlePointer setRes(ures_getByKey(ruleRes.getAlias(), setKey, NULL, &errCode)); |
| |
617 if (U_FAILURE(errCode)) { |
| |
618 return emptyStr; |
| |
619 } |
| |
620 |
| |
621 int32_t numberKeys = ures_getSize(setRes.getAlias()); |
| |
622 UnicodeString result; |
| |
623 const char *key=NULL; |
| |
624 for(int32_t i=0; i<numberKeys; ++i) { // Keys are zero, one, few, ... |
| |
625 UnicodeString rules = ures_getNextUnicodeString(setRes.getAlias(), &key, &errCode); |
| |
626 UnicodeString uKey(key, -1, US_INV); |
| |
627 result.append(uKey); |
| |
628 result.append(COLON); |
| |
629 result.append(rules); |
| |
630 result.append(SEMI_COLON); |
| |
631 } |
| |
632 return result; |
| |
633 } |
| |
634 |
| |
635 |
| |
636 UnicodeString |
| |
637 PluralRules::getRules() const { |
| |
638 UnicodeString rules; |
| |
639 if (mRules != NULL) { |
| |
640 mRules->dumpRules(rules); |
| |
641 } |
| |
642 return rules; |
| |
643 } |
| |
644 |
| |
645 |
| |
646 AndConstraint::AndConstraint() { |
| |
647 op = AndConstraint::NONE; |
| |
648 opNum=-1; |
| |
649 value = -1; |
| |
650 rangeList = NULL; |
| |
651 negated = FALSE; |
| |
652 integerOnly = FALSE; |
| |
653 digitsType = none; |
| |
654 next=NULL; |
| |
655 } |
| |
656 |
| |
657 |
| |
658 AndConstraint::AndConstraint(const AndConstraint& other) { |
| |
659 this->op = other.op; |
| |
660 this->opNum=other.opNum; |
| |
661 this->value=other.value; |
| |
662 this->rangeList=NULL; |
| |
663 if (other.rangeList != NULL) { |
| |
664 UErrorCode status = U_ZERO_ERROR; |
| |
665 this->rangeList = new UVector32(status); |
| |
666 this->rangeList->assign(*other.rangeList, status); |
| |
667 } |
| |
668 this->integerOnly=other.integerOnly; |
| |
669 this->negated=other.negated; |
| |
670 this->digitsType = other.digitsType; |
| |
671 if (other.next==NULL) { |
| |
672 this->next=NULL; |
| |
673 } |
| |
674 else { |
| |
675 this->next = new AndConstraint(*other.next); |
| |
676 } |
| |
677 } |
| |
678 |
| |
679 AndConstraint::~AndConstraint() { |
| |
680 delete rangeList; |
| |
681 if (next!=NULL) { |
| |
682 delete next; |
| |
683 } |
| |
684 } |
| |
685 |
| |
686 |
| |
687 UBool |
| |
688 AndConstraint::isFulfilled(const FixedDecimal &number) { |
| |
689 UBool result = TRUE; |
| |
690 if (digitsType == none) { |
| |
691 // An empty AndConstraint, created by a rule with a keyword but no following expression. |
| |
692 return TRUE; |
| |
693 } |
| |
694 double n = number.get(digitsType); // pulls n | i | v | f value for the number. |
| |
695 // Will always be positive. |
| |
696 // May be non-integer (n option only) |
| |
697 do { |
| |
698 if (integerOnly && n != uprv_floor(n)) { |
| |
699 result = FALSE; |
| |
700 break; |
| |
701 } |
| |
702 |
| |
703 if (op == MOD) { |
| |
704 n = fmod(n, opNum); |
| |
705 } |
| |
706 if (rangeList == NULL) { |
| |
707 result = value == -1 || // empty rule |
| |
708 n == value; // 'is' rule |
| |
709 break; |
| |
710 } |
| |
711 result = FALSE; // 'in' or 'within' rule |
| |
712 for (int32_t r=0; r<rangeList->size(); r+=2) { |
| |
713 if (rangeList->elementAti(r) <= n && n <= rangeList->elementAti(r+1)) { |
| |
714 result = TRUE; |
| |
715 break; |
| |
716 } |
| |
717 } |
| |
718 } while (FALSE); |
| |
719 |
| |
720 if (negated) { |
| |
721 result = !result; |
| |
722 } |
| |
723 return result; |
| |
724 } |
| |
725 |
| |
726 |
| |
727 AndConstraint* |
| |
728 AndConstraint::add() |
| |
729 { |
| |
730 this->next = new AndConstraint(); |
| |
731 return this->next; |
| |
732 } |
| |
733 |
| |
734 OrConstraint::OrConstraint() { |
| |
735 childNode=NULL; |
| |
736 next=NULL; |
| |
737 } |
| |
738 |
| |
739 OrConstraint::OrConstraint(const OrConstraint& other) { |
| |
740 if ( other.childNode == NULL ) { |
| |
741 this->childNode = NULL; |
| |
742 } |
| |
743 else { |
| |
744 this->childNode = new AndConstraint(*(other.childNode)); |
| |
745 } |
| |
746 if (other.next == NULL ) { |
| |
747 this->next = NULL; |
| |
748 } |
| |
749 else { |
| |
750 this->next = new OrConstraint(*(other.next)); |
| |
751 } |
| |
752 } |
| |
753 |
| |
754 OrConstraint::~OrConstraint() { |
| |
755 if (childNode!=NULL) { |
| |
756 delete childNode; |
| |
757 } |
| |
758 if (next!=NULL) { |
| |
759 delete next; |
| |
760 } |
| |
761 } |
| |
762 |
| |
763 AndConstraint* |
| |
764 OrConstraint::add() |
| |
765 { |
| |
766 OrConstraint *curOrConstraint=this; |
| |
767 { |
| |
768 while (curOrConstraint->next!=NULL) { |
| |
769 curOrConstraint = curOrConstraint->next; |
| |
770 } |
| |
771 U_ASSERT(curOrConstraint->childNode == NULL); |
| |
772 curOrConstraint->childNode = new AndConstraint(); |
| |
773 } |
| |
774 return curOrConstraint->childNode; |
| |
775 } |
| |
776 |
| |
777 UBool |
| |
778 OrConstraint::isFulfilled(const FixedDecimal &number) { |
| |
779 OrConstraint* orRule=this; |
| |
780 UBool result=FALSE; |
| |
781 |
| |
782 while (orRule!=NULL && !result) { |
| |
783 result=TRUE; |
| |
784 AndConstraint* andRule = orRule->childNode; |
| |
785 while (andRule!=NULL && result) { |
| |
786 result = andRule->isFulfilled(number); |
| |
787 andRule=andRule->next; |
| |
788 } |
| |
789 orRule = orRule->next; |
| |
790 } |
| |
791 |
| |
792 return result; |
| |
793 } |
| |
794 |
| |
795 |
| |
796 RuleChain::RuleChain(): fKeyword(), fNext(NULL), ruleHeader(NULL), fDecimalSamples(), fIntegerSamples(), |
| |
797 fDecimalSamplesUnbounded(FALSE), fIntegerSamplesUnbounded(FALSE) { |
| |
798 } |
| |
799 |
| |
800 RuleChain::RuleChain(const RuleChain& other) : |
| |
801 fKeyword(other.fKeyword), fNext(NULL), ruleHeader(NULL), fDecimalSamples(other.fDecimalSamples), |
| |
802 fIntegerSamples(other.fIntegerSamples), fDecimalSamplesUnbounded(other.fDecimalSamplesUnbounded), |
| |
803 fIntegerSamplesUnbounded(other.fIntegerSamplesUnbounded) { |
| |
804 if (other.ruleHeader != NULL) { |
| |
805 this->ruleHeader = new OrConstraint(*(other.ruleHeader)); |
| |
806 } |
| |
807 if (other.fNext != NULL ) { |
| |
808 this->fNext = new RuleChain(*other.fNext); |
| |
809 } |
| |
810 } |
| |
811 |
| |
812 RuleChain::~RuleChain() { |
| |
813 delete fNext; |
| |
814 delete ruleHeader; |
| |
815 } |
| |
816 |
| |
817 |
| |
818 UnicodeString |
| |
819 RuleChain::select(const FixedDecimal &number) const { |
| |
820 if (!number.isNanOrInfinity) { |
| |
821 for (const RuleChain *rules = this; rules != NULL; rules = rules->fNext) { |
| |
822 if (rules->ruleHeader->isFulfilled(number)) { |
| |
823 return rules->fKeyword; |
| |
824 } |
| |
825 } |
| |
826 } |
| |
827 return UnicodeString(TRUE, PLURAL_KEYWORD_OTHER, 5); |
| |
828 } |
| |
829 |
| |
830 static UnicodeString tokenString(tokenType tok) { |
| |
831 UnicodeString s; |
| |
832 switch (tok) { |
| |
833 case tVariableN: |
| |
834 s.append(LOW_N); break; |
| |
835 case tVariableI: |
| |
836 s.append(LOW_I); break; |
| |
837 case tVariableF: |
| |
838 s.append(LOW_F); break; |
| |
839 case tVariableV: |
| |
840 s.append(LOW_V); break; |
| |
841 case tVariableT: |
| |
842 s.append(LOW_T); break; |
| |
843 default: |
| |
844 s.append(TILDE); |
| |
845 } |
| |
846 return s; |
| |
847 } |
| |
848 |
| |
849 void |
| |
850 RuleChain::dumpRules(UnicodeString& result) { |
| |
851 UChar digitString[16]; |
| |
852 |
| |
853 if ( ruleHeader != NULL ) { |
| |
854 result += fKeyword; |
| |
855 result += COLON; |
| |
856 result += SPACE; |
| |
857 OrConstraint* orRule=ruleHeader; |
| |
858 while ( orRule != NULL ) { |
| |
859 AndConstraint* andRule=orRule->childNode; |
| |
860 while ( andRule != NULL ) { |
| |
861 if ((andRule->op==AndConstraint::NONE) && (andRule->rangeList==NULL) && (andRule->value == -1)) { |
| |
862 // Empty Rules. |
| |
863 } else if ( (andRule->op==AndConstraint::NONE) && (andRule->rangeList==NULL) ) { |
| |
864 result += tokenString(andRule->digitsType); |
| |
865 result += UNICODE_STRING_SIMPLE(" is "); |
| |
866 if (andRule->negated) { |
| |
867 result += UNICODE_STRING_SIMPLE("not "); |
| |
868 } |
| |
869 uprv_itou(digitString,16, andRule->value,10,0); |
| |
870 result += UnicodeString(digitString); |
| |
871 } |
| |
872 else { |
| |
873 result += tokenString(andRule->digitsType); |
| |
874 result += SPACE; |
| |
875 if (andRule->op==AndConstraint::MOD) { |
| |
876 result += UNICODE_STRING_SIMPLE("mod "); |
| |
877 uprv_itou(digitString,16, andRule->opNum,10,0); |
| |
878 result += UnicodeString(digitString); |
| |
879 } |
| |
880 if (andRule->rangeList==NULL) { |
| |
881 if (andRule->negated) { |
| |
882 result += UNICODE_STRING_SIMPLE(" is not "); |
| |
883 uprv_itou(digitString,16, andRule->value,10,0); |
| |
884 result += UnicodeString(digitString); |
| |
885 } |
| |
886 else { |
| |
887 result += UNICODE_STRING_SIMPLE(" is "); |
| |
888 uprv_itou(digitString,16, andRule->value,10,0); |
| |
889 result += UnicodeString(digitString); |
| |
890 } |
| |
891 } |
| |
892 else { |
| |
893 if (andRule->negated) { |
| |
894 if ( andRule->integerOnly ) { |
| |
895 result += UNICODE_STRING_SIMPLE(" not in "); |
| |
896 } |
| |
897 else { |
| |
898 result += UNICODE_STRING_SIMPLE(" not within "); |
| |
899 } |
| |
900 } |
| |
901 else { |
| |
902 if ( andRule->integerOnly ) { |
| |
903 result += UNICODE_STRING_SIMPLE(" in "); |
| |
904 } |
| |
905 else { |
| |
906 result += UNICODE_STRING_SIMPLE(" within "); |
| |
907 } |
| |
908 } |
| |
909 for (int32_t r=0; r<andRule->rangeList->size(); r+=2) { |
| |
910 int32_t rangeLo = andRule->rangeList->elementAti(r); |
| |
911 int32_t rangeHi = andRule->rangeList->elementAti(r+1); |
| |
912 uprv_itou(digitString,16, rangeLo, 10, 0); |
| |
913 result += UnicodeString(digitString); |
| |
914 result += UNICODE_STRING_SIMPLE(".."); |
| |
915 uprv_itou(digitString,16, rangeHi, 10,0); |
| |
916 result += UnicodeString(digitString); |
| |
917 if (r+2 < andRule->rangeList->size()) { |
| |
918 result += UNICODE_STRING_SIMPLE(", "); |
| |
919 } |
| |
920 } |
| |
921 } |
| |
922 } |
| |
923 if ( (andRule=andRule->next) != NULL) { |
| |
924 result += UNICODE_STRING_SIMPLE(" and "); |
| |
925 } |
| |
926 } |
| |
927 if ( (orRule = orRule->next) != NULL ) { |
| |
928 result += UNICODE_STRING_SIMPLE(" or "); |
| |
929 } |
| |
930 } |
| |
931 } |
| |
932 if ( fNext != NULL ) { |
| |
933 result += UNICODE_STRING_SIMPLE("; "); |
| |
934 fNext->dumpRules(result); |
| |
935 } |
| |
936 } |
| |
937 |
| |
938 |
| |
939 UErrorCode |
| |
940 RuleChain::getKeywords(int32_t capacityOfKeywords, UnicodeString* keywords, int32_t& arraySize) const { |
| |
941 if ( arraySize < capacityOfKeywords-1 ) { |
| |
942 keywords[arraySize++]=fKeyword; |
| |
943 } |
| |
944 else { |
| |
945 return U_BUFFER_OVERFLOW_ERROR; |
| |
946 } |
| |
947 |
| |
948 if ( fNext != NULL ) { |
| |
949 return fNext->getKeywords(capacityOfKeywords, keywords, arraySize); |
| |
950 } |
| |
951 else { |
| |
952 return U_ZERO_ERROR; |
| |
953 } |
| |
954 } |
| |
955 |
| |
956 UBool |
| |
957 RuleChain::isKeyword(const UnicodeString& keywordParam) const { |
| |
958 if ( fKeyword == keywordParam ) { |
| |
959 return TRUE; |
| |
960 } |
| |
961 |
| |
962 if ( fNext != NULL ) { |
| |
963 return fNext->isKeyword(keywordParam); |
| |
964 } |
| |
965 else { |
| |
966 return FALSE; |
| |
967 } |
| |
968 } |
| |
969 |
| |
970 |
| |
971 PluralRuleParser::PluralRuleParser() : |
| |
972 ruleIndex(0), token(), type(none), prevType(none), |
| |
973 curAndConstraint(NULL), currentChain(NULL), rangeLowIdx(-1), rangeHiIdx(-1) |
| |
974 { |
| |
975 } |
| |
976 |
| |
977 PluralRuleParser::~PluralRuleParser() { |
| |
978 } |
| |
979 |
| |
980 |
| |
981 int32_t |
| |
982 PluralRuleParser::getNumberValue(const UnicodeString& token) { |
| |
983 int32_t i; |
| |
984 char digits[128]; |
| |
985 |
| |
986 i = token.extract(0, token.length(), digits, ARRAY_SIZE(digits), US_INV); |
| |
987 digits[i]='\0'; |
| |
988 |
| |
989 return((int32_t)atoi(digits)); |
| |
990 } |
| |
991 |
| |
992 |
| |
993 void |
| |
994 PluralRuleParser::checkSyntax(UErrorCode &status) |
| |
995 { |
| |
996 if (U_FAILURE(status)) { |
| |
997 return; |
| |
998 } |
| |
999 if (!(prevType==none || prevType==tSemiColon)) { |
| |
1000 type = getKeyType(token, type); // Switch token type from tKeyword if we scanned a reserved word, |
| |
1001 // and we are not at the start of a rule, where a |
| |
1002 // keyword is expected. |
| |
1003 } |
| |
1004 |
| |
1005 switch(prevType) { |
| |
1006 case none: |
| |
1007 case tSemiColon: |
| |
1008 if (type!=tKeyword && type != tEOF) { |
| |
1009 status = U_UNEXPECTED_TOKEN; |
| |
1010 } |
| |
1011 break; |
| |
1012 case tVariableN: |
| |
1013 case tVariableI: |
| |
1014 case tVariableF: |
| |
1015 case tVariableT: |
| |
1016 case tVariableV: |
| |
1017 if (type != tIs && type != tMod && type != tIn && |
| |
1018 type != tNot && type != tWithin && type != tEqual && type != tNotEqual) { |
| |
1019 status = U_UNEXPECTED_TOKEN; |
| |
1020 } |
| |
1021 break; |
| |
1022 case tKeyword: |
| |
1023 if (type != tColon) { |
| |
1024 status = U_UNEXPECTED_TOKEN; |
| |
1025 } |
| |
1026 break; |
| |
1027 case tColon: |
| |
1028 if (!(type == tVariableN || |
| |
1029 type == tVariableI || |
| |
1030 type == tVariableF || |
| |
1031 type == tVariableT || |
| |
1032 type == tVariableV || |
| |
1033 type == tAt)) { |
| |
1034 status = U_UNEXPECTED_TOKEN; |
| |
1035 } |
| |
1036 break; |
| |
1037 case tIs: |
| |
1038 if ( type != tNumber && type != tNot) { |
| |
1039 status = U_UNEXPECTED_TOKEN; |
| |
1040 } |
| |
1041 break; |
| |
1042 case tNot: |
| |
1043 if (type != tNumber && type != tIn && type != tWithin) { |
| |
1044 status = U_UNEXPECTED_TOKEN; |
| |
1045 } |
| |
1046 break; |
| |
1047 case tMod: |
| |
1048 case tDot2: |
| |
1049 case tIn: |
| |
1050 case tWithin: |
| |
1051 case tEqual: |
| |
1052 case tNotEqual: |
| |
1053 if (type != tNumber) { |
| |
1054 status = U_UNEXPECTED_TOKEN; |
| |
1055 } |
| |
1056 break; |
| |
1057 case tAnd: |
| |
1058 case tOr: |
| |
1059 if ( type != tVariableN && |
| |
1060 type != tVariableI && |
| |
1061 type != tVariableF && |
| |
1062 type != tVariableT && |
| |
1063 type != tVariableV) { |
| |
1064 status = U_UNEXPECTED_TOKEN; |
| |
1065 } |
| |
1066 break; |
| |
1067 case tComma: |
| |
1068 if (type != tNumber) { |
| |
1069 status = U_UNEXPECTED_TOKEN; |
| |
1070 } |
| |
1071 break; |
| |
1072 case tNumber: |
| |
1073 if (type != tDot2 && type != tSemiColon && type != tIs && type != tNot && |
| |
1074 type != tIn && type != tEqual && type != tNotEqual && type != tWithin && |
| |
1075 type != tAnd && type != tOr && type != tComma && type != tAt && |
| |
1076 type != tEOF) |
| |
1077 { |
| |
1078 status = U_UNEXPECTED_TOKEN; |
| |
1079 } |
| |
1080 // TODO: a comma following a number that is not part of a range will be allowed. |
| |
1081 // It's not the only case of this sort of thing. Parser needs a re-write. |
| |
1082 break; |
| |
1083 case tAt: |
| |
1084 if (type != tDecimal && type != tInteger) { |
| |
1085 status = U_UNEXPECTED_TOKEN; |
| |
1086 } |
| |
1087 break; |
| |
1088 default: |
| |
1089 status = U_UNEXPECTED_TOKEN; |
| |
1090 break; |
| |
1091 } |
| |
1092 } |
| |
1093 |
| |
1094 |
| |
1095 /* |
| |
1096 * Scan the next token from the input rules. |
| |
1097 * rules and returned token type are in the parser state variables. |
| |
1098 */ |
| |
1099 void |
| |
1100 PluralRuleParser::getNextToken(UErrorCode &status) |
| |
1101 { |
| |
1102 if (U_FAILURE(status)) { |
| |
1103 return; |
| |
1104 } |
| |
1105 |
| |
1106 UChar ch; |
| |
1107 while (ruleIndex < ruleSrc->length()) { |
| |
1108 ch = ruleSrc->charAt(ruleIndex); |
| |
1109 type = charType(ch); |
| |
1110 if (type != tSpace) { |
| |
1111 break; |
| |
1112 } |
| |
1113 ++(ruleIndex); |
| |
1114 } |
| |
1115 if (ruleIndex >= ruleSrc->length()) { |
| |
1116 type = tEOF; |
| |
1117 return; |
| |
1118 } |
| |
1119 int32_t curIndex= ruleIndex; |
| |
1120 |
| |
1121 switch (type) { |
| |
1122 case tColon: |
| |
1123 case tSemiColon: |
| |
1124 case tComma: |
| |
1125 case tEllipsis: |
| |
1126 case tTilde: // scanned '~' |
| |
1127 case tAt: // scanned '@' |
| |
1128 case tEqual: // scanned '=' |
| |
1129 case tMod: // scanned '%' |
| |
1130 // Single character tokens. |
| |
1131 ++curIndex; |
| |
1132 break; |
| |
1133 |
| |
1134 case tNotEqual: // scanned '!' |
| |
1135 if (ruleSrc->charAt(curIndex+1) == EQUALS) { |
| |
1136 curIndex += 2; |
| |
1137 } else { |
| |
1138 type = none; |
| |
1139 curIndex += 1; |
| |
1140 } |
| |
1141 break; |
| |
1142 |
| |
1143 case tKeyword: |
| |
1144 while (type == tKeyword && ++curIndex < ruleSrc->length()) { |
| |
1145 ch = ruleSrc->charAt(curIndex); |
| |
1146 type = charType(ch); |
| |
1147 } |
| |
1148 type = tKeyword; |
| |
1149 break; |
| |
1150 |
| |
1151 case tNumber: |
| |
1152 while (type == tNumber && ++curIndex < ruleSrc->length()) { |
| |
1153 ch = ruleSrc->charAt(curIndex); |
| |
1154 type = charType(ch); |
| |
1155 } |
| |
1156 type = tNumber; |
| |
1157 break; |
| |
1158 |
| |
1159 case tDot: |
| |
1160 // We could be looking at either ".." in a range, or "..." at the end of a sample. |
| |
1161 if (curIndex+1 >= ruleSrc->length() || ruleSrc->charAt(curIndex+1) != DOT) { |
| |
1162 ++curIndex; |
| |
1163 break; // Single dot |
| |
1164 } |
| |
1165 if (curIndex+2 >= ruleSrc->length() || ruleSrc->charAt(curIndex+2) != DOT) { |
| |
1166 curIndex += 2; |
| |
1167 type = tDot2; |
| |
1168 break; // double dot |
| |
1169 } |
| |
1170 type = tEllipsis; |
| |
1171 curIndex += 3; |
| |
1172 break; // triple dot |
| |
1173 |
| |
1174 default: |
| |
1175 status = U_UNEXPECTED_TOKEN; |
| |
1176 ++curIndex; |
| |
1177 break; |
| |
1178 } |
| |
1179 |
| |
1180 U_ASSERT(ruleIndex <= ruleSrc->length()); |
| |
1181 U_ASSERT(curIndex <= ruleSrc->length()); |
| |
1182 token=UnicodeString(*ruleSrc, ruleIndex, curIndex-ruleIndex); |
| |
1183 ruleIndex = curIndex; |
| |
1184 } |
| |
1185 |
| |
1186 tokenType |
| |
1187 PluralRuleParser::charType(UChar ch) { |
| |
1188 if ((ch>=U_ZERO) && (ch<=U_NINE)) { |
| |
1189 return tNumber; |
| |
1190 } |
| |
1191 if (ch>=LOW_A && ch<=LOW_Z) { |
| |
1192 return tKeyword; |
| |
1193 } |
| |
1194 switch (ch) { |
| |
1195 case COLON: |
| |
1196 return tColon; |
| |
1197 case SPACE: |
| |
1198 return tSpace; |
| |
1199 case SEMI_COLON: |
| |
1200 return tSemiColon; |
| |
1201 case DOT: |
| |
1202 return tDot; |
| |
1203 case COMMA: |
| |
1204 return tComma; |
| |
1205 case EXCLAMATION: |
| |
1206 return tNotEqual; |
| |
1207 case EQUALS: |
| |
1208 return tEqual; |
| |
1209 case PERCENT_SIGN: |
| |
1210 return tMod; |
| |
1211 case AT: |
| |
1212 return tAt; |
| |
1213 case ELLIPSIS: |
| |
1214 return tEllipsis; |
| |
1215 case TILDE: |
| |
1216 return tTilde; |
| |
1217 default : |
| |
1218 return none; |
| |
1219 } |
| |
1220 } |
| |
1221 |
| |
1222 |
| |
1223 // Set token type for reserved words in the Plural Rule syntax. |
| |
1224 |
| |
1225 tokenType |
| |
1226 PluralRuleParser::getKeyType(const UnicodeString &token, tokenType keyType) |
| |
1227 { |
| |
1228 if (keyType != tKeyword) { |
| |
1229 return keyType; |
| |
1230 } |
| |
1231 |
| |
1232 if (0 == token.compare(PK_VAR_N, 1)) { |
| |
1233 keyType = tVariableN; |
| |
1234 } else if (0 == token.compare(PK_VAR_I, 1)) { |
| |
1235 keyType = tVariableI; |
| |
1236 } else if (0 == token.compare(PK_VAR_F, 1)) { |
| |
1237 keyType = tVariableF; |
| |
1238 } else if (0 == token.compare(PK_VAR_T, 1)) { |
| |
1239 keyType = tVariableT; |
| |
1240 } else if (0 == token.compare(PK_VAR_V, 1)) { |
| |
1241 keyType = tVariableV; |
| |
1242 } else if (0 == token.compare(PK_IS, 2)) { |
| |
1243 keyType = tIs; |
| |
1244 } else if (0 == token.compare(PK_AND, 3)) { |
| |
1245 keyType = tAnd; |
| |
1246 } else if (0 == token.compare(PK_IN, 2)) { |
| |
1247 keyType = tIn; |
| |
1248 } else if (0 == token.compare(PK_WITHIN, 6)) { |
| |
1249 keyType = tWithin; |
| |
1250 } else if (0 == token.compare(PK_NOT, 3)) { |
| |
1251 keyType = tNot; |
| |
1252 } else if (0 == token.compare(PK_MOD, 3)) { |
| |
1253 keyType = tMod; |
| |
1254 } else if (0 == token.compare(PK_OR, 2)) { |
| |
1255 keyType = tOr; |
| |
1256 } else if (0 == token.compare(PK_DECIMAL, 7)) { |
| |
1257 keyType = tDecimal; |
| |
1258 } else if (0 == token.compare(PK_INTEGER, 7)) { |
| |
1259 keyType = tInteger; |
| |
1260 } |
| |
1261 return keyType; |
| |
1262 } |
| |
1263 |
| |
1264 |
| |
1265 PluralKeywordEnumeration::PluralKeywordEnumeration(RuleChain *header, UErrorCode& status) |
| |
1266 : pos(0), fKeywordNames(status) { |
| |
1267 if (U_FAILURE(status)) { |
| |
1268 return; |
| |
1269 } |
| |
1270 fKeywordNames.setDeleter(uprv_deleteUObject); |
| |
1271 UBool addKeywordOther=TRUE; |
| |
1272 RuleChain *node=header; |
| |
1273 while(node!=NULL) { |
| |
1274 fKeywordNames.addElement(new UnicodeString(node->fKeyword), status); |
| |
1275 if (U_FAILURE(status)) { |
| |
1276 return; |
| |
1277 } |
| |
1278 if (0 == node->fKeyword.compare(PLURAL_KEYWORD_OTHER, 5)) { |
| |
1279 addKeywordOther= FALSE; |
| |
1280 } |
| |
1281 node=node->fNext; |
| |
1282 } |
| |
1283 |
| |
1284 if (addKeywordOther) { |
| |
1285 fKeywordNames.addElement(new UnicodeString(PLURAL_KEYWORD_OTHER), status); |
| |
1286 } |
| |
1287 } |
| |
1288 |
| |
1289 const UnicodeString* |
| |
1290 PluralKeywordEnumeration::snext(UErrorCode& status) { |
| |
1291 if (U_SUCCESS(status) && pos < fKeywordNames.size()) { |
| |
1292 return (const UnicodeString*)fKeywordNames.elementAt(pos++); |
| |
1293 } |
| |
1294 return NULL; |
| |
1295 } |
| |
1296 |
| |
1297 void |
| |
1298 PluralKeywordEnumeration::reset(UErrorCode& /*status*/) { |
| |
1299 pos=0; |
| |
1300 } |
| |
1301 |
| |
1302 int32_t |
| |
1303 PluralKeywordEnumeration::count(UErrorCode& /*status*/) const { |
| |
1304 return fKeywordNames.size(); |
| |
1305 } |
| |
1306 |
| |
1307 PluralKeywordEnumeration::~PluralKeywordEnumeration() { |
| |
1308 } |
| |
1309 |
| |
1310 |
| |
1311 |
| |
1312 FixedDecimal::FixedDecimal(double n, int32_t v, int64_t f) { |
| |
1313 init(n, v, f); |
| |
1314 // check values. TODO make into unit test. |
| |
1315 // |
| |
1316 // long visiblePower = (int) Math.pow(10, v); |
| |
1317 // if (decimalDigits > visiblePower) { |
| |
1318 // throw new IllegalArgumentException(); |
| |
1319 // } |
| |
1320 // double fraction = intValue + (decimalDigits / (double) visiblePower); |
| |
1321 // if (fraction != source) { |
| |
1322 // double diff = Math.abs(fraction - source)/(Math.abs(fraction) + Math.abs(source)); |
| |
1323 // if (diff > 0.00000001d) { |
| |
1324 // throw new IllegalArgumentException(); |
| |
1325 // } |
| |
1326 // } |
| |
1327 } |
| |
1328 |
| |
1329 FixedDecimal::FixedDecimal(double n, int32_t v) { |
| |
1330 // Ugly, but for samples we don't care. |
| |
1331 init(n, v, getFractionalDigits(n, v)); |
| |
1332 } |
| |
1333 |
| |
1334 FixedDecimal::FixedDecimal(double n) { |
| |
1335 init(n); |
| |
1336 } |
| |
1337 |
| |
1338 FixedDecimal::FixedDecimal() { |
| |
1339 init(0, 0, 0); |
| |
1340 } |
| |
1341 |
| |
1342 |
| |
1343 // Create a FixedDecimal from a UnicodeString containing a number. |
| |
1344 // Inefficient, but only used for samples, so simplicity trumps efficiency. |
| |
1345 |
| |
1346 FixedDecimal::FixedDecimal(const UnicodeString &num, UErrorCode &status) { |
| |
1347 CharString cs; |
| |
1348 cs.appendInvariantChars(num, status); |
| |
1349 DigitList dl; |
| |
1350 dl.set(cs.toStringPiece(), status); |
| |
1351 if (U_FAILURE(status)) { |
| |
1352 init(0, 0, 0); |
| |
1353 return; |
| |
1354 } |
| |
1355 int32_t decimalPoint = num.indexOf(DOT); |
| |
1356 double n = dl.getDouble(); |
| |
1357 if (decimalPoint == -1) { |
| |
1358 init(n, 0, 0); |
| |
1359 } else { |
| |
1360 int32_t v = num.length() - decimalPoint - 1; |
| |
1361 init(n, v, getFractionalDigits(n, v)); |
| |
1362 } |
| |
1363 } |
| |
1364 |
| |
1365 |
| |
1366 FixedDecimal::FixedDecimal(const FixedDecimal &other) { |
| |
1367 source = other.source; |
| |
1368 visibleDecimalDigitCount = other.visibleDecimalDigitCount; |
| |
1369 decimalDigits = other.decimalDigits; |
| |
1370 decimalDigitsWithoutTrailingZeros = other.decimalDigitsWithoutTrailingZeros; |
| |
1371 intValue = other.intValue; |
| |
1372 hasIntegerValue = other.hasIntegerValue; |
| |
1373 isNegative = other.isNegative; |
| |
1374 isNanOrInfinity = other.isNanOrInfinity; |
| |
1375 } |
| |
1376 |
| |
1377 |
| |
1378 void FixedDecimal::init(double n) { |
| |
1379 int32_t numFractionDigits = decimals(n); |
| |
1380 init(n, numFractionDigits, getFractionalDigits(n, numFractionDigits)); |
| |
1381 } |
| |
1382 |
| |
1383 |
| |
1384 void FixedDecimal::init(double n, int32_t v, int64_t f) { |
| |
1385 isNegative = n < 0.0; |
| |
1386 source = fabs(n); |
| |
1387 isNanOrInfinity = uprv_isNaN(source) || uprv_isPositiveInfinity(source); |
| |
1388 if (isNanOrInfinity) { |
| |
1389 v = 0; |
| |
1390 f = 0; |
| |
1391 intValue = 0; |
| |
1392 hasIntegerValue = FALSE; |
| |
1393 } else { |
| |
1394 intValue = (int64_t)source; |
| |
1395 hasIntegerValue = (source == intValue); |
| |
1396 } |
| |
1397 |
| |
1398 visibleDecimalDigitCount = v; |
| |
1399 decimalDigits = f; |
| |
1400 if (f == 0) { |
| |
1401 decimalDigitsWithoutTrailingZeros = 0; |
| |
1402 } else { |
| |
1403 int64_t fdwtz = f; |
| |
1404 while ((fdwtz%10) == 0) { |
| |
1405 fdwtz /= 10; |
| |
1406 } |
| |
1407 decimalDigitsWithoutTrailingZeros = fdwtz; |
| |
1408 } |
| |
1409 } |
| |
1410 |
| |
1411 |
| |
1412 // Fast path only exact initialization. Return true if successful. |
| |
1413 // Note: Do not multiply by 10 each time through loop, rounding cruft can build |
| |
1414 // up that makes the check for an integer result fail. |
| |
1415 // A single multiply of the original number works more reliably. |
| |
1416 static int32_t p10[] = {1, 10, 100, 1000, 10000}; |
| |
1417 UBool FixedDecimal::quickInit(double n) { |
| |
1418 UBool success = FALSE; |
| |
1419 n = fabs(n); |
| |
1420 int32_t numFractionDigits; |
| |
1421 for (numFractionDigits = 0; numFractionDigits <= 3; numFractionDigits++) { |
| |
1422 double scaledN = n * p10[numFractionDigits]; |
| |
1423 if (scaledN == floor(scaledN)) { |
| |
1424 success = TRUE; |
| |
1425 break; |
| |
1426 } |
| |
1427 } |
| |
1428 if (success) { |
| |
1429 init(n, numFractionDigits, getFractionalDigits(n, numFractionDigits)); |
| |
1430 } |
| |
1431 return success; |
| |
1432 } |
| |
1433 |
| |
1434 |
| |
1435 |
| |
1436 int32_t FixedDecimal::decimals(double n) { |
| |
1437 // Count the number of decimal digits in the fraction part of the number, excluding trailing zeros. |
| |
1438 // fastpath the common cases, integers or fractions with 3 or fewer digits |
| |
1439 n = fabs(n); |
| |
1440 for (int ndigits=0; ndigits<=3; ndigits++) { |
| |
1441 double scaledN = n * p10[ndigits]; |
| |
1442 if (scaledN == floor(scaledN)) { |
| |
1443 return ndigits; |
| |
1444 } |
| |
1445 } |
| |
1446 |
| |
1447 // Slow path, convert with sprintf, parse converted output. |
| |
1448 char buf[30] = {0}; |
| |
1449 sprintf(buf, "%1.15e", n); |
| |
1450 // formatted number looks like this: 1.234567890123457e-01 |
| |
1451 int exponent = atoi(buf+18); |
| |
1452 int numFractionDigits = 15; |
| |
1453 for (int i=16; ; --i) { |
| |
1454 if (buf[i] != '0') { |
| |
1455 break; |
| |
1456 } |
| |
1457 --numFractionDigits; |
| |
1458 } |
| |
1459 numFractionDigits -= exponent; // Fraction part of fixed point representation. |
| |
1460 return numFractionDigits; |
| |
1461 } |
| |
1462 |
| |
1463 |
| |
1464 // Get the fraction digits of a double, represented as an integer. |
| |
1465 // v is the number of visible fraction digits in the displayed form of the number. |
| |
1466 // Example: n = 1001.234, v = 6, result = 234000 |
| |
1467 // TODO: need to think through how this is used in the plural rule context. |
| |
1468 // This function can easily encounter integer overflow, |
| |
1469 // and can easily return noise digits when the precision of a double is exceeded. |
| |
1470 |
| |
1471 int64_t FixedDecimal::getFractionalDigits(double n, int32_t v) { |
| |
1472 if (v == 0 || n == floor(n) || uprv_isNaN(n) || uprv_isPositiveInfinity(n)) { |
| |
1473 return 0; |
| |
1474 } |
| |
1475 n = fabs(n); |
| |
1476 double fract = n - floor(n); |
| |
1477 switch (v) { |
| |
1478 case 1: return (int64_t)(fract*10.0 + 0.5); |
| |
1479 case 2: return (int64_t)(fract*100.0 + 0.5); |
| |
1480 case 3: return (int64_t)(fract*1000.0 + 0.5); |
| |
1481 default: |
| |
1482 double scaled = floor(fract * pow(10.0, (double)v) + 0.5); |
| |
1483 if (scaled > U_INT64_MAX) { |
| |
1484 return U_INT64_MAX; |
| |
1485 } else { |
| |
1486 return (int64_t)scaled; |
| |
1487 } |
| |
1488 } |
| |
1489 } |
| |
1490 |
| |
1491 |
| |
1492 void FixedDecimal::adjustForMinFractionDigits(int32_t minFractionDigits) { |
| |
1493 int32_t numTrailingFractionZeros = minFractionDigits - visibleDecimalDigitCount; |
| |
1494 if (numTrailingFractionZeros > 0) { |
| |
1495 for (int32_t i=0; i<numTrailingFractionZeros; i++) { |
| |
1496 // Do not let the decimalDigits value overflow if there are many trailing zeros. |
| |
1497 // Limit the value to 18 digits, the most that a 64 bit int can fully represent. |
| |
1498 if (decimalDigits >= 100000000000000000LL) { |
| |
1499 break; |
| |
1500 } |
| |
1501 decimalDigits *= 10; |
| |
1502 } |
| |
1503 visibleDecimalDigitCount += numTrailingFractionZeros; |
| |
1504 } |
| |
1505 } |
| |
1506 |
| |
1507 |
| |
1508 double FixedDecimal::get(tokenType operand) const { |
| |
1509 switch(operand) { |
| |
1510 case tVariableN: return source; |
| |
1511 case tVariableI: return (double)intValue; |
| |
1512 case tVariableF: return (double)decimalDigits; |
| |
1513 case tVariableT: return (double)decimalDigitsWithoutTrailingZeros; |
| |
1514 case tVariableV: return visibleDecimalDigitCount; |
| |
1515 default: |
| |
1516 U_ASSERT(FALSE); // unexpected. |
| |
1517 return source; |
| |
1518 } |
| |
1519 } |
| |
1520 |
| |
1521 int32_t FixedDecimal::getVisibleFractionDigitCount() const { |
| |
1522 return visibleDecimalDigitCount; |
| |
1523 } |
| |
1524 |
| |
1525 |
| |
1526 |
| |
1527 PluralAvailableLocalesEnumeration::PluralAvailableLocalesEnumeration(UErrorCode &status) { |
| |
1528 fLocales = NULL; |
| |
1529 fRes = NULL; |
| |
1530 fOpenStatus = status; |
| |
1531 if (U_FAILURE(status)) { |
| |
1532 return; |
| |
1533 } |
| |
1534 fOpenStatus = U_ZERO_ERROR; |
| |
1535 LocalUResourceBundlePointer rb(ures_openDirect(NULL, "plurals", &fOpenStatus)); |
| |
1536 fLocales = ures_getByKey(rb.getAlias(), "locales", NULL, &fOpenStatus); |
| |
1537 } |
| |
1538 |
| |
1539 PluralAvailableLocalesEnumeration::~PluralAvailableLocalesEnumeration() { |
| |
1540 ures_close(fLocales); |
| |
1541 ures_close(fRes); |
| |
1542 fLocales = NULL; |
| |
1543 fRes = NULL; |
| |
1544 } |
| |
1545 |
| |
1546 const char *PluralAvailableLocalesEnumeration::next(int32_t *resultLength, UErrorCode &status) { |
| |
1547 if (U_FAILURE(status)) { |
| |
1548 return NULL; |
| |
1549 } |
| |
1550 if (U_FAILURE(fOpenStatus)) { |
| |
1551 status = fOpenStatus; |
| |
1552 return NULL; |
| |
1553 } |
| |
1554 fRes = ures_getNextResource(fLocales, fRes, &status); |
| |
1555 if (fRes == NULL || U_FAILURE(status)) { |
| |
1556 if (status == U_INDEX_OUTOFBOUNDS_ERROR) { |
| |
1557 status = U_ZERO_ERROR; |
| |
1558 } |
| |
1559 return NULL; |
| |
1560 } |
| |
1561 const char *result = ures_getKey(fRes); |
| |
1562 if (resultLength != NULL) { |
| |
1563 *resultLength = uprv_strlen(result); |
| |
1564 } |
| |
1565 return result; |
| |
1566 } |
| |
1567 |
| |
1568 |
| |
1569 void PluralAvailableLocalesEnumeration::reset(UErrorCode &status) { |
| |
1570 if (U_FAILURE(status)) { |
| |
1571 return; |
| |
1572 } |
| |
1573 if (U_FAILURE(fOpenStatus)) { |
| |
1574 status = fOpenStatus; |
| |
1575 return; |
| |
1576 } |
| |
1577 ures_resetIterator(fLocales); |
| |
1578 } |
| |
1579 |
| |
1580 int32_t PluralAvailableLocalesEnumeration::count(UErrorCode &status) const { |
| |
1581 if (U_FAILURE(status)) { |
| |
1582 return 0; |
| |
1583 } |
| |
1584 if (U_FAILURE(fOpenStatus)) { |
| |
1585 status = fOpenStatus; |
| |
1586 return 0; |
| |
1587 } |
| |
1588 return ures_getSize(fLocales); |
| |
1589 } |
| |
1590 |
| |
1591 U_NAMESPACE_END |
| |
1592 |
| |
1593 |
| |
1594 #endif /* #if !UCONFIG_NO_FORMATTING */ |
| |
1595 |
| |
1596 //eof |