|
1 /* |
|
2 ******************************************************************************* |
|
3 * Copyright (C) 1997-2013, International Business Machines Corporation and * |
|
4 * others. All Rights Reserved. * |
|
5 ******************************************************************************* |
|
6 * |
|
7 * File CHOICFMT.CPP |
|
8 * |
|
9 * Modification History: |
|
10 * |
|
11 * Date Name Description |
|
12 * 02/19/97 aliu Converted from java. |
|
13 * 03/20/97 helena Finished first cut of implementation and got rid |
|
14 * of nextDouble/previousDouble and replaced with |
|
15 * boolean array. |
|
16 * 4/10/97 aliu Clean up. Modified to work on AIX. |
|
17 * 06/04/97 helena Fixed applyPattern(), toPattern() and not to include |
|
18 * wchar.h. |
|
19 * 07/09/97 helena Made ParsePosition into a class. |
|
20 * 08/06/97 nos removed overloaded constructor, fixed 'format(array)' |
|
21 * 07/22/98 stephen JDK 1.2 Sync - removed UBool array (doubleFlags) |
|
22 * 02/22/99 stephen Removed character literals for EBCDIC safety |
|
23 ******************************************************************************** |
|
24 */ |
|
25 |
|
26 #include "unicode/utypes.h" |
|
27 |
|
28 #if !UCONFIG_NO_FORMATTING |
|
29 |
|
30 #include "unicode/choicfmt.h" |
|
31 #include "unicode/numfmt.h" |
|
32 #include "unicode/locid.h" |
|
33 #include "cpputils.h" |
|
34 #include "cstring.h" |
|
35 #include "messageimpl.h" |
|
36 #include "putilimp.h" |
|
37 #include "uassert.h" |
|
38 #include <stdio.h> |
|
39 #include <float.h> |
|
40 |
|
41 // ***************************************************************************** |
|
42 // class ChoiceFormat |
|
43 // ***************************************************************************** |
|
44 |
|
45 U_NAMESPACE_BEGIN |
|
46 |
|
47 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ChoiceFormat) |
|
48 |
|
49 // Special characters used by ChoiceFormat. There are two characters |
|
50 // used interchangeably to indicate <=. Either is parsed, but only |
|
51 // LESS_EQUAL is generated by toPattern(). |
|
52 #define SINGLE_QUOTE ((UChar)0x0027) /*'*/ |
|
53 #define LESS_THAN ((UChar)0x003C) /*<*/ |
|
54 #define LESS_EQUAL ((UChar)0x0023) /*#*/ |
|
55 #define LESS_EQUAL2 ((UChar)0x2264) |
|
56 #define VERTICAL_BAR ((UChar)0x007C) /*|*/ |
|
57 #define MINUS ((UChar)0x002D) /*-*/ |
|
58 |
|
59 static const UChar LEFT_CURLY_BRACE = 0x7B; /*{*/ |
|
60 static const UChar RIGHT_CURLY_BRACE = 0x7D; /*}*/ |
|
61 |
|
62 #ifdef INFINITY |
|
63 #undef INFINITY |
|
64 #endif |
|
65 #define INFINITY ((UChar)0x221E) |
|
66 |
|
67 //static const UChar gPositiveInfinity[] = {INFINITY, 0}; |
|
68 //static const UChar gNegativeInfinity[] = {MINUS, INFINITY, 0}; |
|
69 #define POSITIVE_INF_STRLEN 1 |
|
70 #define NEGATIVE_INF_STRLEN 2 |
|
71 |
|
72 // ------------------------------------- |
|
73 // Creates a ChoiceFormat instance based on the pattern. |
|
74 |
|
75 ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern, |
|
76 UErrorCode& status) |
|
77 : constructorErrorCode(status), |
|
78 msgPattern(status) |
|
79 { |
|
80 applyPattern(newPattern, status); |
|
81 } |
|
82 |
|
83 // ------------------------------------- |
|
84 // Creates a ChoiceFormat instance with the limit array and |
|
85 // format strings for each limit. |
|
86 |
|
87 ChoiceFormat::ChoiceFormat(const double* limits, |
|
88 const UnicodeString* formats, |
|
89 int32_t cnt ) |
|
90 : constructorErrorCode(U_ZERO_ERROR), |
|
91 msgPattern(constructorErrorCode) |
|
92 { |
|
93 setChoices(limits, NULL, formats, cnt, constructorErrorCode); |
|
94 } |
|
95 |
|
96 // ------------------------------------- |
|
97 |
|
98 ChoiceFormat::ChoiceFormat(const double* limits, |
|
99 const UBool* closures, |
|
100 const UnicodeString* formats, |
|
101 int32_t cnt ) |
|
102 : constructorErrorCode(U_ZERO_ERROR), |
|
103 msgPattern(constructorErrorCode) |
|
104 { |
|
105 setChoices(limits, closures, formats, cnt, constructorErrorCode); |
|
106 } |
|
107 |
|
108 // ------------------------------------- |
|
109 // copy constructor |
|
110 |
|
111 ChoiceFormat::ChoiceFormat(const ChoiceFormat& that) |
|
112 : NumberFormat(that), |
|
113 constructorErrorCode(that.constructorErrorCode), |
|
114 msgPattern(that.msgPattern) |
|
115 { |
|
116 } |
|
117 |
|
118 // ------------------------------------- |
|
119 // Private constructor that creates a |
|
120 // ChoiceFormat instance based on the |
|
121 // pattern and populates UParseError |
|
122 |
|
123 ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern, |
|
124 UParseError& parseError, |
|
125 UErrorCode& status) |
|
126 : constructorErrorCode(status), |
|
127 msgPattern(status) |
|
128 { |
|
129 applyPattern(newPattern,parseError, status); |
|
130 } |
|
131 // ------------------------------------- |
|
132 |
|
133 UBool |
|
134 ChoiceFormat::operator==(const Format& that) const |
|
135 { |
|
136 if (this == &that) return TRUE; |
|
137 if (!NumberFormat::operator==(that)) return FALSE; |
|
138 ChoiceFormat& thatAlias = (ChoiceFormat&)that; |
|
139 return msgPattern == thatAlias.msgPattern; |
|
140 } |
|
141 |
|
142 // ------------------------------------- |
|
143 // copy constructor |
|
144 |
|
145 const ChoiceFormat& |
|
146 ChoiceFormat::operator=(const ChoiceFormat& that) |
|
147 { |
|
148 if (this != &that) { |
|
149 NumberFormat::operator=(that); |
|
150 constructorErrorCode = that.constructorErrorCode; |
|
151 msgPattern = that.msgPattern; |
|
152 } |
|
153 return *this; |
|
154 } |
|
155 |
|
156 // ------------------------------------- |
|
157 |
|
158 ChoiceFormat::~ChoiceFormat() |
|
159 { |
|
160 } |
|
161 |
|
162 // ------------------------------------- |
|
163 |
|
164 /** |
|
165 * Convert a double value to a string without the overhead of NumberFormat. |
|
166 */ |
|
167 UnicodeString& |
|
168 ChoiceFormat::dtos(double value, |
|
169 UnicodeString& string) |
|
170 { |
|
171 /* Buffer to contain the digits and any extra formatting stuff. */ |
|
172 char temp[DBL_DIG + 16]; |
|
173 char *itrPtr = temp; |
|
174 char *expPtr; |
|
175 |
|
176 sprintf(temp, "%.*g", DBL_DIG, value); |
|
177 |
|
178 /* Find and convert the decimal point. |
|
179 Using setlocale on some machines will cause sprintf to use a comma for certain locales. |
|
180 */ |
|
181 while (*itrPtr && (*itrPtr == '-' || isdigit(*itrPtr))) { |
|
182 itrPtr++; |
|
183 } |
|
184 if (*itrPtr != 0 && *itrPtr != 'e') { |
|
185 /* We reached something that looks like a decimal point. |
|
186 In case someone used setlocale(), which changes the decimal point. */ |
|
187 *itrPtr = '.'; |
|
188 itrPtr++; |
|
189 } |
|
190 /* Search for the exponent */ |
|
191 while (*itrPtr && *itrPtr != 'e') { |
|
192 itrPtr++; |
|
193 } |
|
194 if (*itrPtr == 'e') { |
|
195 itrPtr++; |
|
196 /* Verify the exponent sign */ |
|
197 if (*itrPtr == '+' || *itrPtr == '-') { |
|
198 itrPtr++; |
|
199 } |
|
200 /* Remove leading zeros. You will see this on Windows machines. */ |
|
201 expPtr = itrPtr; |
|
202 while (*itrPtr == '0') { |
|
203 itrPtr++; |
|
204 } |
|
205 if (*itrPtr && expPtr != itrPtr) { |
|
206 /* Shift the exponent without zeros. */ |
|
207 while (*itrPtr) { |
|
208 *(expPtr++) = *(itrPtr++); |
|
209 } |
|
210 // NULL terminate |
|
211 *expPtr = 0; |
|
212 } |
|
213 } |
|
214 |
|
215 string = UnicodeString(temp, -1, US_INV); /* invariant codepage */ |
|
216 return string; |
|
217 } |
|
218 |
|
219 // ------------------------------------- |
|
220 // calls the overloaded applyPattern method. |
|
221 |
|
222 void |
|
223 ChoiceFormat::applyPattern(const UnicodeString& pattern, |
|
224 UErrorCode& status) |
|
225 { |
|
226 msgPattern.parseChoiceStyle(pattern, NULL, status); |
|
227 constructorErrorCode = status; |
|
228 } |
|
229 |
|
230 // ------------------------------------- |
|
231 // Applies the pattern to this ChoiceFormat instance. |
|
232 |
|
233 void |
|
234 ChoiceFormat::applyPattern(const UnicodeString& pattern, |
|
235 UParseError& parseError, |
|
236 UErrorCode& status) |
|
237 { |
|
238 msgPattern.parseChoiceStyle(pattern, &parseError, status); |
|
239 constructorErrorCode = status; |
|
240 } |
|
241 // ------------------------------------- |
|
242 // Returns the input pattern string. |
|
243 |
|
244 UnicodeString& |
|
245 ChoiceFormat::toPattern(UnicodeString& result) const |
|
246 { |
|
247 return result = msgPattern.getPatternString(); |
|
248 } |
|
249 |
|
250 // ------------------------------------- |
|
251 // Sets the limit and format arrays. |
|
252 void |
|
253 ChoiceFormat::setChoices( const double* limits, |
|
254 const UnicodeString* formats, |
|
255 int32_t cnt ) |
|
256 { |
|
257 UErrorCode errorCode = U_ZERO_ERROR; |
|
258 setChoices(limits, NULL, formats, cnt, errorCode); |
|
259 } |
|
260 |
|
261 // ------------------------------------- |
|
262 // Sets the limit and format arrays. |
|
263 void |
|
264 ChoiceFormat::setChoices( const double* limits, |
|
265 const UBool* closures, |
|
266 const UnicodeString* formats, |
|
267 int32_t cnt ) |
|
268 { |
|
269 UErrorCode errorCode = U_ZERO_ERROR; |
|
270 setChoices(limits, closures, formats, cnt, errorCode); |
|
271 } |
|
272 |
|
273 void |
|
274 ChoiceFormat::setChoices(const double* limits, |
|
275 const UBool* closures, |
|
276 const UnicodeString* formats, |
|
277 int32_t count, |
|
278 UErrorCode &errorCode) { |
|
279 if (U_FAILURE(errorCode)) { |
|
280 return; |
|
281 } |
|
282 if (limits == NULL || formats == NULL) { |
|
283 errorCode = U_ILLEGAL_ARGUMENT_ERROR; |
|
284 return; |
|
285 } |
|
286 // Reconstruct the original input pattern. |
|
287 // Modified version of the pre-ICU 4.8 toPattern() implementation. |
|
288 UnicodeString result; |
|
289 for (int32_t i = 0; i < count; ++i) { |
|
290 if (i != 0) { |
|
291 result += VERTICAL_BAR; |
|
292 } |
|
293 UnicodeString buf; |
|
294 if (uprv_isPositiveInfinity(limits[i])) { |
|
295 result += INFINITY; |
|
296 } else if (uprv_isNegativeInfinity(limits[i])) { |
|
297 result += MINUS; |
|
298 result += INFINITY; |
|
299 } else { |
|
300 result += dtos(limits[i], buf); |
|
301 } |
|
302 if (closures != NULL && closures[i]) { |
|
303 result += LESS_THAN; |
|
304 } else { |
|
305 result += LESS_EQUAL; |
|
306 } |
|
307 // Append formats[i], using quotes if there are special |
|
308 // characters. Single quotes themselves must be escaped in |
|
309 // either case. |
|
310 const UnicodeString& text = formats[i]; |
|
311 int32_t textLength = text.length(); |
|
312 int32_t nestingLevel = 0; |
|
313 for (int32_t j = 0; j < textLength; ++j) { |
|
314 UChar c = text[j]; |
|
315 if (c == SINGLE_QUOTE && nestingLevel == 0) { |
|
316 // Double each top-level apostrophe. |
|
317 result.append(c); |
|
318 } else if (c == VERTICAL_BAR && nestingLevel == 0) { |
|
319 // Surround each pipe symbol with apostrophes for quoting. |
|
320 // If the next character is an apostrophe, then that will be doubled, |
|
321 // and although the parser will see the apostrophe pairs beginning |
|
322 // and ending one character earlier than our doubling, the result |
|
323 // is as desired. |
|
324 // | -> '|' |
|
325 // |' -> '|''' |
|
326 // |'' -> '|''''' etc. |
|
327 result.append(SINGLE_QUOTE).append(c).append(SINGLE_QUOTE); |
|
328 continue; // Skip the append(c) at the end of the loop body. |
|
329 } else if (c == LEFT_CURLY_BRACE) { |
|
330 ++nestingLevel; |
|
331 } else if (c == RIGHT_CURLY_BRACE && nestingLevel > 0) { |
|
332 --nestingLevel; |
|
333 } |
|
334 result.append(c); |
|
335 } |
|
336 } |
|
337 // Apply the reconstructed pattern. |
|
338 applyPattern(result, errorCode); |
|
339 } |
|
340 |
|
341 // ------------------------------------- |
|
342 // Gets the limit array. |
|
343 |
|
344 const double* |
|
345 ChoiceFormat::getLimits(int32_t& cnt) const |
|
346 { |
|
347 cnt = 0; |
|
348 return NULL; |
|
349 } |
|
350 |
|
351 // ------------------------------------- |
|
352 // Gets the closures array. |
|
353 |
|
354 const UBool* |
|
355 ChoiceFormat::getClosures(int32_t& cnt) const |
|
356 { |
|
357 cnt = 0; |
|
358 return NULL; |
|
359 } |
|
360 |
|
361 // ------------------------------------- |
|
362 // Gets the format array. |
|
363 |
|
364 const UnicodeString* |
|
365 ChoiceFormat::getFormats(int32_t& cnt) const |
|
366 { |
|
367 cnt = 0; |
|
368 return NULL; |
|
369 } |
|
370 |
|
371 // ------------------------------------- |
|
372 // Formats an int64 number, it's actually formatted as |
|
373 // a double. The returned format string may differ |
|
374 // from the input number because of this. |
|
375 |
|
376 UnicodeString& |
|
377 ChoiceFormat::format(int64_t number, |
|
378 UnicodeString& appendTo, |
|
379 FieldPosition& status) const |
|
380 { |
|
381 return format((double) number, appendTo, status); |
|
382 } |
|
383 |
|
384 // ------------------------------------- |
|
385 // Formats an int32_t number, it's actually formatted as |
|
386 // a double. |
|
387 |
|
388 UnicodeString& |
|
389 ChoiceFormat::format(int32_t number, |
|
390 UnicodeString& appendTo, |
|
391 FieldPosition& status) const |
|
392 { |
|
393 return format((double) number, appendTo, status); |
|
394 } |
|
395 |
|
396 // ------------------------------------- |
|
397 // Formats a double number. |
|
398 |
|
399 UnicodeString& |
|
400 ChoiceFormat::format(double number, |
|
401 UnicodeString& appendTo, |
|
402 FieldPosition& /*pos*/) const |
|
403 { |
|
404 if (msgPattern.countParts() == 0) { |
|
405 // No pattern was applied, or it failed. |
|
406 return appendTo; |
|
407 } |
|
408 // Get the appropriate sub-message. |
|
409 int32_t msgStart = findSubMessage(msgPattern, 0, number); |
|
410 if (!MessageImpl::jdkAposMode(msgPattern)) { |
|
411 int32_t patternStart = msgPattern.getPart(msgStart).getLimit(); |
|
412 int32_t msgLimit = msgPattern.getLimitPartIndex(msgStart); |
|
413 appendTo.append(msgPattern.getPatternString(), |
|
414 patternStart, |
|
415 msgPattern.getPatternIndex(msgLimit) - patternStart); |
|
416 return appendTo; |
|
417 } |
|
418 // JDK compatibility mode: Remove SKIP_SYNTAX. |
|
419 return MessageImpl::appendSubMessageWithoutSkipSyntax(msgPattern, msgStart, appendTo); |
|
420 } |
|
421 |
|
422 int32_t |
|
423 ChoiceFormat::findSubMessage(const MessagePattern &pattern, int32_t partIndex, double number) { |
|
424 int32_t count = pattern.countParts(); |
|
425 int32_t msgStart; |
|
426 // Iterate over (ARG_INT|DOUBLE, ARG_SELECTOR, message) tuples |
|
427 // until ARG_LIMIT or end of choice-only pattern. |
|
428 // Ignore the first number and selector and start the loop on the first message. |
|
429 partIndex += 2; |
|
430 for (;;) { |
|
431 // Skip but remember the current sub-message. |
|
432 msgStart = partIndex; |
|
433 partIndex = pattern.getLimitPartIndex(partIndex); |
|
434 if (++partIndex >= count) { |
|
435 // Reached the end of the choice-only pattern. |
|
436 // Return with the last sub-message. |
|
437 break; |
|
438 } |
|
439 const MessagePattern::Part &part = pattern.getPart(partIndex++); |
|
440 UMessagePatternPartType type = part.getType(); |
|
441 if (type == UMSGPAT_PART_TYPE_ARG_LIMIT) { |
|
442 // Reached the end of the ChoiceFormat style. |
|
443 // Return with the last sub-message. |
|
444 break; |
|
445 } |
|
446 // part is an ARG_INT or ARG_DOUBLE |
|
447 U_ASSERT(MessagePattern::Part::hasNumericValue(type)); |
|
448 double boundary = pattern.getNumericValue(part); |
|
449 // Fetch the ARG_SELECTOR character. |
|
450 int32_t selectorIndex = pattern.getPatternIndex(partIndex++); |
|
451 UChar boundaryChar = pattern.getPatternString().charAt(selectorIndex); |
|
452 if (boundaryChar == LESS_THAN ? !(number > boundary) : !(number >= boundary)) { |
|
453 // The number is in the interval between the previous boundary and the current one. |
|
454 // Return with the sub-message between them. |
|
455 // The !(a>b) and !(a>=b) comparisons are equivalent to |
|
456 // (a<=b) and (a<b) except they "catch" NaN. |
|
457 break; |
|
458 } |
|
459 } |
|
460 return msgStart; |
|
461 } |
|
462 |
|
463 // ------------------------------------- |
|
464 // Formats an array of objects. Checks if the data type of the objects |
|
465 // to get the right value for formatting. |
|
466 |
|
467 UnicodeString& |
|
468 ChoiceFormat::format(const Formattable* objs, |
|
469 int32_t cnt, |
|
470 UnicodeString& appendTo, |
|
471 FieldPosition& pos, |
|
472 UErrorCode& status) const |
|
473 { |
|
474 if(cnt < 0) { |
|
475 status = U_ILLEGAL_ARGUMENT_ERROR; |
|
476 return appendTo; |
|
477 } |
|
478 if (msgPattern.countParts() == 0) { |
|
479 status = U_INVALID_STATE_ERROR; |
|
480 return appendTo; |
|
481 } |
|
482 |
|
483 for (int32_t i = 0; i < cnt; i++) { |
|
484 double objDouble = objs[i].getDouble(status); |
|
485 if (U_SUCCESS(status)) { |
|
486 format(objDouble, appendTo, pos); |
|
487 } |
|
488 } |
|
489 |
|
490 return appendTo; |
|
491 } |
|
492 |
|
493 // ------------------------------------- |
|
494 |
|
495 void |
|
496 ChoiceFormat::parse(const UnicodeString& text, |
|
497 Formattable& result, |
|
498 ParsePosition& pos) const |
|
499 { |
|
500 result.setDouble(parseArgument(msgPattern, 0, text, pos)); |
|
501 } |
|
502 |
|
503 double |
|
504 ChoiceFormat::parseArgument( |
|
505 const MessagePattern &pattern, int32_t partIndex, |
|
506 const UnicodeString &source, ParsePosition &pos) { |
|
507 // find the best number (defined as the one with the longest parse) |
|
508 int32_t start = pos.getIndex(); |
|
509 int32_t furthest = start; |
|
510 double bestNumber = uprv_getNaN(); |
|
511 double tempNumber = 0.0; |
|
512 int32_t count = pattern.countParts(); |
|
513 while (partIndex < count && pattern.getPartType(partIndex) != UMSGPAT_PART_TYPE_ARG_LIMIT) { |
|
514 tempNumber = pattern.getNumericValue(pattern.getPart(partIndex)); |
|
515 partIndex += 2; // skip the numeric part and ignore the ARG_SELECTOR |
|
516 int32_t msgLimit = pattern.getLimitPartIndex(partIndex); |
|
517 int32_t len = matchStringUntilLimitPart(pattern, partIndex, msgLimit, source, start); |
|
518 if (len >= 0) { |
|
519 int32_t newIndex = start + len; |
|
520 if (newIndex > furthest) { |
|
521 furthest = newIndex; |
|
522 bestNumber = tempNumber; |
|
523 if (furthest == source.length()) { |
|
524 break; |
|
525 } |
|
526 } |
|
527 } |
|
528 partIndex = msgLimit + 1; |
|
529 } |
|
530 if (furthest == start) { |
|
531 pos.setErrorIndex(start); |
|
532 } else { |
|
533 pos.setIndex(furthest); |
|
534 } |
|
535 return bestNumber; |
|
536 } |
|
537 |
|
538 int32_t |
|
539 ChoiceFormat::matchStringUntilLimitPart( |
|
540 const MessagePattern &pattern, int32_t partIndex, int32_t limitPartIndex, |
|
541 const UnicodeString &source, int32_t sourceOffset) { |
|
542 int32_t matchingSourceLength = 0; |
|
543 const UnicodeString &msgString = pattern.getPatternString(); |
|
544 int32_t prevIndex = pattern.getPart(partIndex).getLimit(); |
|
545 for (;;) { |
|
546 const MessagePattern::Part &part = pattern.getPart(++partIndex); |
|
547 if (partIndex == limitPartIndex || part.getType() == UMSGPAT_PART_TYPE_SKIP_SYNTAX) { |
|
548 int32_t index = part.getIndex(); |
|
549 int32_t length = index - prevIndex; |
|
550 if (length != 0 && 0 != source.compare(sourceOffset, length, msgString, prevIndex, length)) { |
|
551 return -1; // mismatch |
|
552 } |
|
553 matchingSourceLength += length; |
|
554 if (partIndex == limitPartIndex) { |
|
555 return matchingSourceLength; |
|
556 } |
|
557 prevIndex = part.getLimit(); // SKIP_SYNTAX |
|
558 } |
|
559 } |
|
560 } |
|
561 |
|
562 // ------------------------------------- |
|
563 |
|
564 Format* |
|
565 ChoiceFormat::clone() const |
|
566 { |
|
567 ChoiceFormat *aCopy = new ChoiceFormat(*this); |
|
568 return aCopy; |
|
569 } |
|
570 |
|
571 U_NAMESPACE_END |
|
572 |
|
573 #endif /* #if !UCONFIG_NO_FORMATTING */ |
|
574 |
|
575 //eof |