1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/i18n/choicfmt.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,575 @@ 1.4 +/* 1.5 +******************************************************************************* 1.6 +* Copyright (C) 1997-2013, International Business Machines Corporation and * 1.7 +* others. All Rights Reserved. * 1.8 +******************************************************************************* 1.9 +* 1.10 +* File CHOICFMT.CPP 1.11 +* 1.12 +* Modification History: 1.13 +* 1.14 +* Date Name Description 1.15 +* 02/19/97 aliu Converted from java. 1.16 +* 03/20/97 helena Finished first cut of implementation and got rid 1.17 +* of nextDouble/previousDouble and replaced with 1.18 +* boolean array. 1.19 +* 4/10/97 aliu Clean up. Modified to work on AIX. 1.20 +* 06/04/97 helena Fixed applyPattern(), toPattern() and not to include 1.21 +* wchar.h. 1.22 +* 07/09/97 helena Made ParsePosition into a class. 1.23 +* 08/06/97 nos removed overloaded constructor, fixed 'format(array)' 1.24 +* 07/22/98 stephen JDK 1.2 Sync - removed UBool array (doubleFlags) 1.25 +* 02/22/99 stephen Removed character literals for EBCDIC safety 1.26 +******************************************************************************** 1.27 +*/ 1.28 + 1.29 +#include "unicode/utypes.h" 1.30 + 1.31 +#if !UCONFIG_NO_FORMATTING 1.32 + 1.33 +#include "unicode/choicfmt.h" 1.34 +#include "unicode/numfmt.h" 1.35 +#include "unicode/locid.h" 1.36 +#include "cpputils.h" 1.37 +#include "cstring.h" 1.38 +#include "messageimpl.h" 1.39 +#include "putilimp.h" 1.40 +#include "uassert.h" 1.41 +#include <stdio.h> 1.42 +#include <float.h> 1.43 + 1.44 +// ***************************************************************************** 1.45 +// class ChoiceFormat 1.46 +// ***************************************************************************** 1.47 + 1.48 +U_NAMESPACE_BEGIN 1.49 + 1.50 +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ChoiceFormat) 1.51 + 1.52 +// Special characters used by ChoiceFormat. There are two characters 1.53 +// used interchangeably to indicate <=. Either is parsed, but only 1.54 +// LESS_EQUAL is generated by toPattern(). 1.55 +#define SINGLE_QUOTE ((UChar)0x0027) /*'*/ 1.56 +#define LESS_THAN ((UChar)0x003C) /*<*/ 1.57 +#define LESS_EQUAL ((UChar)0x0023) /*#*/ 1.58 +#define LESS_EQUAL2 ((UChar)0x2264) 1.59 +#define VERTICAL_BAR ((UChar)0x007C) /*|*/ 1.60 +#define MINUS ((UChar)0x002D) /*-*/ 1.61 + 1.62 +static const UChar LEFT_CURLY_BRACE = 0x7B; /*{*/ 1.63 +static const UChar RIGHT_CURLY_BRACE = 0x7D; /*}*/ 1.64 + 1.65 +#ifdef INFINITY 1.66 +#undef INFINITY 1.67 +#endif 1.68 +#define INFINITY ((UChar)0x221E) 1.69 + 1.70 +//static const UChar gPositiveInfinity[] = {INFINITY, 0}; 1.71 +//static const UChar gNegativeInfinity[] = {MINUS, INFINITY, 0}; 1.72 +#define POSITIVE_INF_STRLEN 1 1.73 +#define NEGATIVE_INF_STRLEN 2 1.74 + 1.75 +// ------------------------------------- 1.76 +// Creates a ChoiceFormat instance based on the pattern. 1.77 + 1.78 +ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern, 1.79 + UErrorCode& status) 1.80 +: constructorErrorCode(status), 1.81 + msgPattern(status) 1.82 +{ 1.83 + applyPattern(newPattern, status); 1.84 +} 1.85 + 1.86 +// ------------------------------------- 1.87 +// Creates a ChoiceFormat instance with the limit array and 1.88 +// format strings for each limit. 1.89 + 1.90 +ChoiceFormat::ChoiceFormat(const double* limits, 1.91 + const UnicodeString* formats, 1.92 + int32_t cnt ) 1.93 +: constructorErrorCode(U_ZERO_ERROR), 1.94 + msgPattern(constructorErrorCode) 1.95 +{ 1.96 + setChoices(limits, NULL, formats, cnt, constructorErrorCode); 1.97 +} 1.98 + 1.99 +// ------------------------------------- 1.100 + 1.101 +ChoiceFormat::ChoiceFormat(const double* limits, 1.102 + const UBool* closures, 1.103 + const UnicodeString* formats, 1.104 + int32_t cnt ) 1.105 +: constructorErrorCode(U_ZERO_ERROR), 1.106 + msgPattern(constructorErrorCode) 1.107 +{ 1.108 + setChoices(limits, closures, formats, cnt, constructorErrorCode); 1.109 +} 1.110 + 1.111 +// ------------------------------------- 1.112 +// copy constructor 1.113 + 1.114 +ChoiceFormat::ChoiceFormat(const ChoiceFormat& that) 1.115 +: NumberFormat(that), 1.116 + constructorErrorCode(that.constructorErrorCode), 1.117 + msgPattern(that.msgPattern) 1.118 +{ 1.119 +} 1.120 + 1.121 +// ------------------------------------- 1.122 +// Private constructor that creates a 1.123 +// ChoiceFormat instance based on the 1.124 +// pattern and populates UParseError 1.125 + 1.126 +ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern, 1.127 + UParseError& parseError, 1.128 + UErrorCode& status) 1.129 +: constructorErrorCode(status), 1.130 + msgPattern(status) 1.131 +{ 1.132 + applyPattern(newPattern,parseError, status); 1.133 +} 1.134 +// ------------------------------------- 1.135 + 1.136 +UBool 1.137 +ChoiceFormat::operator==(const Format& that) const 1.138 +{ 1.139 + if (this == &that) return TRUE; 1.140 + if (!NumberFormat::operator==(that)) return FALSE; 1.141 + ChoiceFormat& thatAlias = (ChoiceFormat&)that; 1.142 + return msgPattern == thatAlias.msgPattern; 1.143 +} 1.144 + 1.145 +// ------------------------------------- 1.146 +// copy constructor 1.147 + 1.148 +const ChoiceFormat& 1.149 +ChoiceFormat::operator=(const ChoiceFormat& that) 1.150 +{ 1.151 + if (this != &that) { 1.152 + NumberFormat::operator=(that); 1.153 + constructorErrorCode = that.constructorErrorCode; 1.154 + msgPattern = that.msgPattern; 1.155 + } 1.156 + return *this; 1.157 +} 1.158 + 1.159 +// ------------------------------------- 1.160 + 1.161 +ChoiceFormat::~ChoiceFormat() 1.162 +{ 1.163 +} 1.164 + 1.165 +// ------------------------------------- 1.166 + 1.167 +/** 1.168 + * Convert a double value to a string without the overhead of NumberFormat. 1.169 + */ 1.170 +UnicodeString& 1.171 +ChoiceFormat::dtos(double value, 1.172 + UnicodeString& string) 1.173 +{ 1.174 + /* Buffer to contain the digits and any extra formatting stuff. */ 1.175 + char temp[DBL_DIG + 16]; 1.176 + char *itrPtr = temp; 1.177 + char *expPtr; 1.178 + 1.179 + sprintf(temp, "%.*g", DBL_DIG, value); 1.180 + 1.181 + /* Find and convert the decimal point. 1.182 + Using setlocale on some machines will cause sprintf to use a comma for certain locales. 1.183 + */ 1.184 + while (*itrPtr && (*itrPtr == '-' || isdigit(*itrPtr))) { 1.185 + itrPtr++; 1.186 + } 1.187 + if (*itrPtr != 0 && *itrPtr != 'e') { 1.188 + /* We reached something that looks like a decimal point. 1.189 + In case someone used setlocale(), which changes the decimal point. */ 1.190 + *itrPtr = '.'; 1.191 + itrPtr++; 1.192 + } 1.193 + /* Search for the exponent */ 1.194 + while (*itrPtr && *itrPtr != 'e') { 1.195 + itrPtr++; 1.196 + } 1.197 + if (*itrPtr == 'e') { 1.198 + itrPtr++; 1.199 + /* Verify the exponent sign */ 1.200 + if (*itrPtr == '+' || *itrPtr == '-') { 1.201 + itrPtr++; 1.202 + } 1.203 + /* Remove leading zeros. You will see this on Windows machines. */ 1.204 + expPtr = itrPtr; 1.205 + while (*itrPtr == '0') { 1.206 + itrPtr++; 1.207 + } 1.208 + if (*itrPtr && expPtr != itrPtr) { 1.209 + /* Shift the exponent without zeros. */ 1.210 + while (*itrPtr) { 1.211 + *(expPtr++) = *(itrPtr++); 1.212 + } 1.213 + // NULL terminate 1.214 + *expPtr = 0; 1.215 + } 1.216 + } 1.217 + 1.218 + string = UnicodeString(temp, -1, US_INV); /* invariant codepage */ 1.219 + return string; 1.220 +} 1.221 + 1.222 +// ------------------------------------- 1.223 +// calls the overloaded applyPattern method. 1.224 + 1.225 +void 1.226 +ChoiceFormat::applyPattern(const UnicodeString& pattern, 1.227 + UErrorCode& status) 1.228 +{ 1.229 + msgPattern.parseChoiceStyle(pattern, NULL, status); 1.230 + constructorErrorCode = status; 1.231 +} 1.232 + 1.233 +// ------------------------------------- 1.234 +// Applies the pattern to this ChoiceFormat instance. 1.235 + 1.236 +void 1.237 +ChoiceFormat::applyPattern(const UnicodeString& pattern, 1.238 + UParseError& parseError, 1.239 + UErrorCode& status) 1.240 +{ 1.241 + msgPattern.parseChoiceStyle(pattern, &parseError, status); 1.242 + constructorErrorCode = status; 1.243 +} 1.244 +// ------------------------------------- 1.245 +// Returns the input pattern string. 1.246 + 1.247 +UnicodeString& 1.248 +ChoiceFormat::toPattern(UnicodeString& result) const 1.249 +{ 1.250 + return result = msgPattern.getPatternString(); 1.251 +} 1.252 + 1.253 +// ------------------------------------- 1.254 +// Sets the limit and format arrays. 1.255 +void 1.256 +ChoiceFormat::setChoices( const double* limits, 1.257 + const UnicodeString* formats, 1.258 + int32_t cnt ) 1.259 +{ 1.260 + UErrorCode errorCode = U_ZERO_ERROR; 1.261 + setChoices(limits, NULL, formats, cnt, errorCode); 1.262 +} 1.263 + 1.264 +// ------------------------------------- 1.265 +// Sets the limit and format arrays. 1.266 +void 1.267 +ChoiceFormat::setChoices( const double* limits, 1.268 + const UBool* closures, 1.269 + const UnicodeString* formats, 1.270 + int32_t cnt ) 1.271 +{ 1.272 + UErrorCode errorCode = U_ZERO_ERROR; 1.273 + setChoices(limits, closures, formats, cnt, errorCode); 1.274 +} 1.275 + 1.276 +void 1.277 +ChoiceFormat::setChoices(const double* limits, 1.278 + const UBool* closures, 1.279 + const UnicodeString* formats, 1.280 + int32_t count, 1.281 + UErrorCode &errorCode) { 1.282 + if (U_FAILURE(errorCode)) { 1.283 + return; 1.284 + } 1.285 + if (limits == NULL || formats == NULL) { 1.286 + errorCode = U_ILLEGAL_ARGUMENT_ERROR; 1.287 + return; 1.288 + } 1.289 + // Reconstruct the original input pattern. 1.290 + // Modified version of the pre-ICU 4.8 toPattern() implementation. 1.291 + UnicodeString result; 1.292 + for (int32_t i = 0; i < count; ++i) { 1.293 + if (i != 0) { 1.294 + result += VERTICAL_BAR; 1.295 + } 1.296 + UnicodeString buf; 1.297 + if (uprv_isPositiveInfinity(limits[i])) { 1.298 + result += INFINITY; 1.299 + } else if (uprv_isNegativeInfinity(limits[i])) { 1.300 + result += MINUS; 1.301 + result += INFINITY; 1.302 + } else { 1.303 + result += dtos(limits[i], buf); 1.304 + } 1.305 + if (closures != NULL && closures[i]) { 1.306 + result += LESS_THAN; 1.307 + } else { 1.308 + result += LESS_EQUAL; 1.309 + } 1.310 + // Append formats[i], using quotes if there are special 1.311 + // characters. Single quotes themselves must be escaped in 1.312 + // either case. 1.313 + const UnicodeString& text = formats[i]; 1.314 + int32_t textLength = text.length(); 1.315 + int32_t nestingLevel = 0; 1.316 + for (int32_t j = 0; j < textLength; ++j) { 1.317 + UChar c = text[j]; 1.318 + if (c == SINGLE_QUOTE && nestingLevel == 0) { 1.319 + // Double each top-level apostrophe. 1.320 + result.append(c); 1.321 + } else if (c == VERTICAL_BAR && nestingLevel == 0) { 1.322 + // Surround each pipe symbol with apostrophes for quoting. 1.323 + // If the next character is an apostrophe, then that will be doubled, 1.324 + // and although the parser will see the apostrophe pairs beginning 1.325 + // and ending one character earlier than our doubling, the result 1.326 + // is as desired. 1.327 + // | -> '|' 1.328 + // |' -> '|''' 1.329 + // |'' -> '|''''' etc. 1.330 + result.append(SINGLE_QUOTE).append(c).append(SINGLE_QUOTE); 1.331 + continue; // Skip the append(c) at the end of the loop body. 1.332 + } else if (c == LEFT_CURLY_BRACE) { 1.333 + ++nestingLevel; 1.334 + } else if (c == RIGHT_CURLY_BRACE && nestingLevel > 0) { 1.335 + --nestingLevel; 1.336 + } 1.337 + result.append(c); 1.338 + } 1.339 + } 1.340 + // Apply the reconstructed pattern. 1.341 + applyPattern(result, errorCode); 1.342 +} 1.343 + 1.344 +// ------------------------------------- 1.345 +// Gets the limit array. 1.346 + 1.347 +const double* 1.348 +ChoiceFormat::getLimits(int32_t& cnt) const 1.349 +{ 1.350 + cnt = 0; 1.351 + return NULL; 1.352 +} 1.353 + 1.354 +// ------------------------------------- 1.355 +// Gets the closures array. 1.356 + 1.357 +const UBool* 1.358 +ChoiceFormat::getClosures(int32_t& cnt) const 1.359 +{ 1.360 + cnt = 0; 1.361 + return NULL; 1.362 +} 1.363 + 1.364 +// ------------------------------------- 1.365 +// Gets the format array. 1.366 + 1.367 +const UnicodeString* 1.368 +ChoiceFormat::getFormats(int32_t& cnt) const 1.369 +{ 1.370 + cnt = 0; 1.371 + return NULL; 1.372 +} 1.373 + 1.374 +// ------------------------------------- 1.375 +// Formats an int64 number, it's actually formatted as 1.376 +// a double. The returned format string may differ 1.377 +// from the input number because of this. 1.378 + 1.379 +UnicodeString& 1.380 +ChoiceFormat::format(int64_t number, 1.381 + UnicodeString& appendTo, 1.382 + FieldPosition& status) const 1.383 +{ 1.384 + return format((double) number, appendTo, status); 1.385 +} 1.386 + 1.387 +// ------------------------------------- 1.388 +// Formats an int32_t number, it's actually formatted as 1.389 +// a double. 1.390 + 1.391 +UnicodeString& 1.392 +ChoiceFormat::format(int32_t number, 1.393 + UnicodeString& appendTo, 1.394 + FieldPosition& status) const 1.395 +{ 1.396 + return format((double) number, appendTo, status); 1.397 +} 1.398 + 1.399 +// ------------------------------------- 1.400 +// Formats a double number. 1.401 + 1.402 +UnicodeString& 1.403 +ChoiceFormat::format(double number, 1.404 + UnicodeString& appendTo, 1.405 + FieldPosition& /*pos*/) const 1.406 +{ 1.407 + if (msgPattern.countParts() == 0) { 1.408 + // No pattern was applied, or it failed. 1.409 + return appendTo; 1.410 + } 1.411 + // Get the appropriate sub-message. 1.412 + int32_t msgStart = findSubMessage(msgPattern, 0, number); 1.413 + if (!MessageImpl::jdkAposMode(msgPattern)) { 1.414 + int32_t patternStart = msgPattern.getPart(msgStart).getLimit(); 1.415 + int32_t msgLimit = msgPattern.getLimitPartIndex(msgStart); 1.416 + appendTo.append(msgPattern.getPatternString(), 1.417 + patternStart, 1.418 + msgPattern.getPatternIndex(msgLimit) - patternStart); 1.419 + return appendTo; 1.420 + } 1.421 + // JDK compatibility mode: Remove SKIP_SYNTAX. 1.422 + return MessageImpl::appendSubMessageWithoutSkipSyntax(msgPattern, msgStart, appendTo); 1.423 +} 1.424 + 1.425 +int32_t 1.426 +ChoiceFormat::findSubMessage(const MessagePattern &pattern, int32_t partIndex, double number) { 1.427 + int32_t count = pattern.countParts(); 1.428 + int32_t msgStart; 1.429 + // Iterate over (ARG_INT|DOUBLE, ARG_SELECTOR, message) tuples 1.430 + // until ARG_LIMIT or end of choice-only pattern. 1.431 + // Ignore the first number and selector and start the loop on the first message. 1.432 + partIndex += 2; 1.433 + for (;;) { 1.434 + // Skip but remember the current sub-message. 1.435 + msgStart = partIndex; 1.436 + partIndex = pattern.getLimitPartIndex(partIndex); 1.437 + if (++partIndex >= count) { 1.438 + // Reached the end of the choice-only pattern. 1.439 + // Return with the last sub-message. 1.440 + break; 1.441 + } 1.442 + const MessagePattern::Part &part = pattern.getPart(partIndex++); 1.443 + UMessagePatternPartType type = part.getType(); 1.444 + if (type == UMSGPAT_PART_TYPE_ARG_LIMIT) { 1.445 + // Reached the end of the ChoiceFormat style. 1.446 + // Return with the last sub-message. 1.447 + break; 1.448 + } 1.449 + // part is an ARG_INT or ARG_DOUBLE 1.450 + U_ASSERT(MessagePattern::Part::hasNumericValue(type)); 1.451 + double boundary = pattern.getNumericValue(part); 1.452 + // Fetch the ARG_SELECTOR character. 1.453 + int32_t selectorIndex = pattern.getPatternIndex(partIndex++); 1.454 + UChar boundaryChar = pattern.getPatternString().charAt(selectorIndex); 1.455 + if (boundaryChar == LESS_THAN ? !(number > boundary) : !(number >= boundary)) { 1.456 + // The number is in the interval between the previous boundary and the current one. 1.457 + // Return with the sub-message between them. 1.458 + // The !(a>b) and !(a>=b) comparisons are equivalent to 1.459 + // (a<=b) and (a<b) except they "catch" NaN. 1.460 + break; 1.461 + } 1.462 + } 1.463 + return msgStart; 1.464 +} 1.465 + 1.466 +// ------------------------------------- 1.467 +// Formats an array of objects. Checks if the data type of the objects 1.468 +// to get the right value for formatting. 1.469 + 1.470 +UnicodeString& 1.471 +ChoiceFormat::format(const Formattable* objs, 1.472 + int32_t cnt, 1.473 + UnicodeString& appendTo, 1.474 + FieldPosition& pos, 1.475 + UErrorCode& status) const 1.476 +{ 1.477 + if(cnt < 0) { 1.478 + status = U_ILLEGAL_ARGUMENT_ERROR; 1.479 + return appendTo; 1.480 + } 1.481 + if (msgPattern.countParts() == 0) { 1.482 + status = U_INVALID_STATE_ERROR; 1.483 + return appendTo; 1.484 + } 1.485 + 1.486 + for (int32_t i = 0; i < cnt; i++) { 1.487 + double objDouble = objs[i].getDouble(status); 1.488 + if (U_SUCCESS(status)) { 1.489 + format(objDouble, appendTo, pos); 1.490 + } 1.491 + } 1.492 + 1.493 + return appendTo; 1.494 +} 1.495 + 1.496 +// ------------------------------------- 1.497 + 1.498 +void 1.499 +ChoiceFormat::parse(const UnicodeString& text, 1.500 + Formattable& result, 1.501 + ParsePosition& pos) const 1.502 +{ 1.503 + result.setDouble(parseArgument(msgPattern, 0, text, pos)); 1.504 +} 1.505 + 1.506 +double 1.507 +ChoiceFormat::parseArgument( 1.508 + const MessagePattern &pattern, int32_t partIndex, 1.509 + const UnicodeString &source, ParsePosition &pos) { 1.510 + // find the best number (defined as the one with the longest parse) 1.511 + int32_t start = pos.getIndex(); 1.512 + int32_t furthest = start; 1.513 + double bestNumber = uprv_getNaN(); 1.514 + double tempNumber = 0.0; 1.515 + int32_t count = pattern.countParts(); 1.516 + while (partIndex < count && pattern.getPartType(partIndex) != UMSGPAT_PART_TYPE_ARG_LIMIT) { 1.517 + tempNumber = pattern.getNumericValue(pattern.getPart(partIndex)); 1.518 + partIndex += 2; // skip the numeric part and ignore the ARG_SELECTOR 1.519 + int32_t msgLimit = pattern.getLimitPartIndex(partIndex); 1.520 + int32_t len = matchStringUntilLimitPart(pattern, partIndex, msgLimit, source, start); 1.521 + if (len >= 0) { 1.522 + int32_t newIndex = start + len; 1.523 + if (newIndex > furthest) { 1.524 + furthest = newIndex; 1.525 + bestNumber = tempNumber; 1.526 + if (furthest == source.length()) { 1.527 + break; 1.528 + } 1.529 + } 1.530 + } 1.531 + partIndex = msgLimit + 1; 1.532 + } 1.533 + if (furthest == start) { 1.534 + pos.setErrorIndex(start); 1.535 + } else { 1.536 + pos.setIndex(furthest); 1.537 + } 1.538 + return bestNumber; 1.539 +} 1.540 + 1.541 +int32_t 1.542 +ChoiceFormat::matchStringUntilLimitPart( 1.543 + const MessagePattern &pattern, int32_t partIndex, int32_t limitPartIndex, 1.544 + const UnicodeString &source, int32_t sourceOffset) { 1.545 + int32_t matchingSourceLength = 0; 1.546 + const UnicodeString &msgString = pattern.getPatternString(); 1.547 + int32_t prevIndex = pattern.getPart(partIndex).getLimit(); 1.548 + for (;;) { 1.549 + const MessagePattern::Part &part = pattern.getPart(++partIndex); 1.550 + if (partIndex == limitPartIndex || part.getType() == UMSGPAT_PART_TYPE_SKIP_SYNTAX) { 1.551 + int32_t index = part.getIndex(); 1.552 + int32_t length = index - prevIndex; 1.553 + if (length != 0 && 0 != source.compare(sourceOffset, length, msgString, prevIndex, length)) { 1.554 + return -1; // mismatch 1.555 + } 1.556 + matchingSourceLength += length; 1.557 + if (partIndex == limitPartIndex) { 1.558 + return matchingSourceLength; 1.559 + } 1.560 + prevIndex = part.getLimit(); // SKIP_SYNTAX 1.561 + } 1.562 + } 1.563 +} 1.564 + 1.565 +// ------------------------------------- 1.566 + 1.567 +Format* 1.568 +ChoiceFormat::clone() const 1.569 +{ 1.570 + ChoiceFormat *aCopy = new ChoiceFormat(*this); 1.571 + return aCopy; 1.572 +} 1.573 + 1.574 +U_NAMESPACE_END 1.575 + 1.576 +#endif /* #if !UCONFIG_NO_FORMATTING */ 1.577 + 1.578 +//eof