intl/icu/source/i18n/choicfmt.cpp

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 /*
     2 *******************************************************************************
     3 * Copyright (C) 1997-2013, International Business Machines Corporation and    *
     4 * others. All Rights Reserved.                                                *
     5 *******************************************************************************
     6 *
     7 * File CHOICFMT.CPP
     8 *
     9 * Modification History:
    10 *
    11 *   Date        Name        Description
    12 *   02/19/97    aliu        Converted from java.
    13 *   03/20/97    helena      Finished first cut of implementation and got rid 
    14 *                           of nextDouble/previousDouble and replaced with
    15 *                           boolean array.
    16 *   4/10/97     aliu        Clean up.  Modified to work on AIX.
    17 *   06/04/97    helena      Fixed applyPattern(), toPattern() and not to include 
    18 *                           wchar.h.
    19 *   07/09/97    helena      Made ParsePosition into a class.
    20 *   08/06/97    nos         removed overloaded constructor, fixed 'format(array)'
    21 *   07/22/98    stephen     JDK 1.2 Sync - removed UBool array (doubleFlags)
    22 *   02/22/99    stephen     Removed character literals for EBCDIC safety
    23 ********************************************************************************
    24 */
    26 #include "unicode/utypes.h"
    28 #if !UCONFIG_NO_FORMATTING
    30 #include "unicode/choicfmt.h"
    31 #include "unicode/numfmt.h"
    32 #include "unicode/locid.h"
    33 #include "cpputils.h"
    34 #include "cstring.h"
    35 #include "messageimpl.h"
    36 #include "putilimp.h"
    37 #include "uassert.h"
    38 #include <stdio.h>
    39 #include <float.h>
    41 // *****************************************************************************
    42 // class ChoiceFormat
    43 // *****************************************************************************
    45 U_NAMESPACE_BEGIN
    47 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ChoiceFormat)
    49 // Special characters used by ChoiceFormat.  There are two characters
    50 // used interchangeably to indicate <=.  Either is parsed, but only
    51 // LESS_EQUAL is generated by toPattern().
    52 #define SINGLE_QUOTE ((UChar)0x0027)   /*'*/
    53 #define LESS_THAN    ((UChar)0x003C)   /*<*/
    54 #define LESS_EQUAL   ((UChar)0x0023)   /*#*/
    55 #define LESS_EQUAL2  ((UChar)0x2264)
    56 #define VERTICAL_BAR ((UChar)0x007C)   /*|*/
    57 #define MINUS        ((UChar)0x002D)   /*-*/
    59 static const UChar LEFT_CURLY_BRACE = 0x7B;     /*{*/
    60 static const UChar RIGHT_CURLY_BRACE = 0x7D;    /*}*/
    62 #ifdef INFINITY
    63 #undef INFINITY
    64 #endif
    65 #define INFINITY     ((UChar)0x221E)
    67 //static const UChar gPositiveInfinity[] = {INFINITY, 0};
    68 //static const UChar gNegativeInfinity[] = {MINUS, INFINITY, 0};
    69 #define POSITIVE_INF_STRLEN 1
    70 #define NEGATIVE_INF_STRLEN 2
    72 // -------------------------------------
    73 // Creates a ChoiceFormat instance based on the pattern.
    75 ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern,
    76                            UErrorCode& status)
    77 : constructorErrorCode(status),
    78   msgPattern(status)
    79 {
    80     applyPattern(newPattern, status);
    81 }
    83 // -------------------------------------
    84 // Creates a ChoiceFormat instance with the limit array and 
    85 // format strings for each limit.
    87 ChoiceFormat::ChoiceFormat(const double* limits, 
    88                            const UnicodeString* formats, 
    89                            int32_t cnt )
    90 : constructorErrorCode(U_ZERO_ERROR),
    91   msgPattern(constructorErrorCode)
    92 {
    93     setChoices(limits, NULL, formats, cnt, constructorErrorCode);
    94 }
    96 // -------------------------------------
    98 ChoiceFormat::ChoiceFormat(const double* limits, 
    99                            const UBool* closures,
   100                            const UnicodeString* formats, 
   101                            int32_t cnt )
   102 : constructorErrorCode(U_ZERO_ERROR),
   103   msgPattern(constructorErrorCode)
   104 {
   105     setChoices(limits, closures, formats, cnt, constructorErrorCode);
   106 }
   108 // -------------------------------------
   109 // copy constructor
   111 ChoiceFormat::ChoiceFormat(const    ChoiceFormat&   that) 
   112 : NumberFormat(that),
   113   constructorErrorCode(that.constructorErrorCode),
   114   msgPattern(that.msgPattern)
   115 {
   116 }
   118 // -------------------------------------
   119 // Private constructor that creates a 
   120 // ChoiceFormat instance based on the 
   121 // pattern and populates UParseError
   123 ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern,
   124                            UParseError& parseError,
   125                            UErrorCode& status)
   126 : constructorErrorCode(status),
   127   msgPattern(status)
   128 {
   129     applyPattern(newPattern,parseError, status);
   130 }
   131 // -------------------------------------
   133 UBool
   134 ChoiceFormat::operator==(const Format& that) const
   135 {
   136     if (this == &that) return TRUE;
   137     if (!NumberFormat::operator==(that)) return FALSE;
   138     ChoiceFormat& thatAlias = (ChoiceFormat&)that;
   139     return msgPattern == thatAlias.msgPattern;
   140 }
   142 // -------------------------------------
   143 // copy constructor
   145 const ChoiceFormat&
   146 ChoiceFormat::operator=(const   ChoiceFormat& that)
   147 {
   148     if (this != &that) {
   149         NumberFormat::operator=(that);
   150         constructorErrorCode = that.constructorErrorCode;
   151         msgPattern = that.msgPattern;
   152     }
   153     return *this;
   154 }
   156 // -------------------------------------
   158 ChoiceFormat::~ChoiceFormat()
   159 {
   160 }
   162 // -------------------------------------
   164 /**
   165  * Convert a double value to a string without the overhead of NumberFormat.
   166  */
   167 UnicodeString&
   168 ChoiceFormat::dtos(double value,
   169                    UnicodeString& string)
   170 {
   171     /* Buffer to contain the digits and any extra formatting stuff. */
   172     char temp[DBL_DIG + 16];
   173     char *itrPtr = temp;
   174     char *expPtr;
   176     sprintf(temp, "%.*g", DBL_DIG, value);
   178     /* Find and convert the decimal point.
   179        Using setlocale on some machines will cause sprintf to use a comma for certain locales.
   180     */
   181     while (*itrPtr && (*itrPtr == '-' || isdigit(*itrPtr))) {
   182         itrPtr++;
   183     }
   184     if (*itrPtr != 0 && *itrPtr != 'e') {
   185         /* We reached something that looks like a decimal point.
   186         In case someone used setlocale(), which changes the decimal point. */
   187         *itrPtr = '.';
   188         itrPtr++;
   189     }
   190     /* Search for the exponent */
   191     while (*itrPtr && *itrPtr != 'e') {
   192         itrPtr++;
   193     }
   194     if (*itrPtr == 'e') {
   195         itrPtr++;
   196         /* Verify the exponent sign */
   197         if (*itrPtr == '+' || *itrPtr == '-') {
   198             itrPtr++;
   199         }
   200         /* Remove leading zeros. You will see this on Windows machines. */
   201         expPtr = itrPtr;
   202         while (*itrPtr == '0') {
   203             itrPtr++;
   204         }
   205         if (*itrPtr && expPtr != itrPtr) {
   206             /* Shift the exponent without zeros. */
   207             while (*itrPtr) {
   208                 *(expPtr++)  = *(itrPtr++);
   209             }
   210             // NULL terminate
   211             *expPtr = 0;
   212         }
   213     }
   215     string = UnicodeString(temp, -1, US_INV);    /* invariant codepage */
   216     return string;
   217 }
   219 // -------------------------------------
   220 // calls the overloaded applyPattern method.
   222 void
   223 ChoiceFormat::applyPattern(const UnicodeString& pattern,
   224                            UErrorCode& status)
   225 {
   226     msgPattern.parseChoiceStyle(pattern, NULL, status);
   227     constructorErrorCode = status;
   228 }
   230 // -------------------------------------
   231 // Applies the pattern to this ChoiceFormat instance.
   233 void
   234 ChoiceFormat::applyPattern(const UnicodeString& pattern,
   235                            UParseError& parseError,
   236                            UErrorCode& status)
   237 {
   238     msgPattern.parseChoiceStyle(pattern, &parseError, status);
   239     constructorErrorCode = status;
   240 }
   241 // -------------------------------------
   242 // Returns the input pattern string.
   244 UnicodeString&
   245 ChoiceFormat::toPattern(UnicodeString& result) const
   246 {
   247     return result = msgPattern.getPatternString();
   248 }
   250 // -------------------------------------
   251 // Sets the limit and format arrays. 
   252 void
   253 ChoiceFormat::setChoices(  const double* limits, 
   254                            const UnicodeString* formats, 
   255                            int32_t cnt )
   256 {
   257     UErrorCode errorCode = U_ZERO_ERROR;
   258     setChoices(limits, NULL, formats, cnt, errorCode);
   259 }
   261 // -------------------------------------
   262 // Sets the limit and format arrays. 
   263 void
   264 ChoiceFormat::setChoices(  const double* limits, 
   265                            const UBool* closures,
   266                            const UnicodeString* formats, 
   267                            int32_t cnt )
   268 {
   269     UErrorCode errorCode = U_ZERO_ERROR;
   270     setChoices(limits, closures, formats, cnt, errorCode);
   271 }
   273 void
   274 ChoiceFormat::setChoices(const double* limits,
   275                          const UBool* closures,
   276                          const UnicodeString* formats,
   277                          int32_t count,
   278                          UErrorCode &errorCode) {
   279     if (U_FAILURE(errorCode)) {
   280         return;
   281     }
   282     if (limits == NULL || formats == NULL) {
   283         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
   284         return;
   285     }
   286     // Reconstruct the original input pattern.
   287     // Modified version of the pre-ICU 4.8 toPattern() implementation.
   288     UnicodeString result;
   289     for (int32_t i = 0; i < count; ++i) {
   290         if (i != 0) {
   291             result += VERTICAL_BAR;
   292         }
   293         UnicodeString buf;
   294         if (uprv_isPositiveInfinity(limits[i])) {
   295             result += INFINITY;
   296         } else if (uprv_isNegativeInfinity(limits[i])) {
   297             result += MINUS;
   298             result += INFINITY;
   299         } else {
   300             result += dtos(limits[i], buf);
   301         }
   302         if (closures != NULL && closures[i]) {
   303             result += LESS_THAN;
   304         } else {
   305             result += LESS_EQUAL;
   306         }
   307         // Append formats[i], using quotes if there are special
   308         // characters.  Single quotes themselves must be escaped in
   309         // either case.
   310         const UnicodeString& text = formats[i];
   311         int32_t textLength = text.length();
   312         int32_t nestingLevel = 0;
   313         for (int32_t j = 0; j < textLength; ++j) {
   314             UChar c = text[j];
   315             if (c == SINGLE_QUOTE && nestingLevel == 0) {
   316                 // Double each top-level apostrophe.
   317                 result.append(c);
   318             } else if (c == VERTICAL_BAR && nestingLevel == 0) {
   319                 // Surround each pipe symbol with apostrophes for quoting.
   320                 // If the next character is an apostrophe, then that will be doubled,
   321                 // and although the parser will see the apostrophe pairs beginning
   322                 // and ending one character earlier than our doubling, the result
   323                 // is as desired.
   324                 //   | -> '|'
   325                 //   |' -> '|'''
   326                 //   |'' -> '|''''' etc.
   327                 result.append(SINGLE_QUOTE).append(c).append(SINGLE_QUOTE);
   328                 continue;  // Skip the append(c) at the end of the loop body.
   329             } else if (c == LEFT_CURLY_BRACE) {
   330                 ++nestingLevel;
   331             } else if (c == RIGHT_CURLY_BRACE && nestingLevel > 0) {
   332                 --nestingLevel;
   333             }
   334             result.append(c);
   335         }
   336     }
   337     // Apply the reconstructed pattern.
   338     applyPattern(result, errorCode);
   339 }
   341 // -------------------------------------
   342 // Gets the limit array.
   344 const double*
   345 ChoiceFormat::getLimits(int32_t& cnt) const 
   346 {
   347     cnt = 0;
   348     return NULL;
   349 }
   351 // -------------------------------------
   352 // Gets the closures array.
   354 const UBool*
   355 ChoiceFormat::getClosures(int32_t& cnt) const 
   356 {
   357     cnt = 0;
   358     return NULL;
   359 }
   361 // -------------------------------------
   362 // Gets the format array.
   364 const UnicodeString*
   365 ChoiceFormat::getFormats(int32_t& cnt) const
   366 {
   367     cnt = 0;
   368     return NULL;
   369 }
   371 // -------------------------------------
   372 // Formats an int64 number, it's actually formatted as
   373 // a double.  The returned format string may differ
   374 // from the input number because of this.
   376 UnicodeString&
   377 ChoiceFormat::format(int64_t number, 
   378                      UnicodeString& appendTo, 
   379                      FieldPosition& status) const
   380 {
   381     return format((double) number, appendTo, status);
   382 }
   384 // -------------------------------------
   385 // Formats an int32_t number, it's actually formatted as
   386 // a double.
   388 UnicodeString&
   389 ChoiceFormat::format(int32_t number, 
   390                      UnicodeString& appendTo, 
   391                      FieldPosition& status) const
   392 {
   393     return format((double) number, appendTo, status);
   394 }
   396 // -------------------------------------
   397 // Formats a double number.
   399 UnicodeString&
   400 ChoiceFormat::format(double number, 
   401                      UnicodeString& appendTo, 
   402                      FieldPosition& /*pos*/) const
   403 {
   404     if (msgPattern.countParts() == 0) {
   405         // No pattern was applied, or it failed.
   406         return appendTo;
   407     }
   408     // Get the appropriate sub-message.
   409     int32_t msgStart = findSubMessage(msgPattern, 0, number);
   410     if (!MessageImpl::jdkAposMode(msgPattern)) {
   411         int32_t patternStart = msgPattern.getPart(msgStart).getLimit();
   412         int32_t msgLimit = msgPattern.getLimitPartIndex(msgStart);
   413         appendTo.append(msgPattern.getPatternString(),
   414                         patternStart,
   415                         msgPattern.getPatternIndex(msgLimit) - patternStart);
   416         return appendTo;
   417     }
   418     // JDK compatibility mode: Remove SKIP_SYNTAX.
   419     return MessageImpl::appendSubMessageWithoutSkipSyntax(msgPattern, msgStart, appendTo);
   420 }
   422 int32_t
   423 ChoiceFormat::findSubMessage(const MessagePattern &pattern, int32_t partIndex, double number) {
   424     int32_t count = pattern.countParts();
   425     int32_t msgStart;
   426     // Iterate over (ARG_INT|DOUBLE, ARG_SELECTOR, message) tuples
   427     // until ARG_LIMIT or end of choice-only pattern.
   428     // Ignore the first number and selector and start the loop on the first message.
   429     partIndex += 2;
   430     for (;;) {
   431         // Skip but remember the current sub-message.
   432         msgStart = partIndex;
   433         partIndex = pattern.getLimitPartIndex(partIndex);
   434         if (++partIndex >= count) {
   435             // Reached the end of the choice-only pattern.
   436             // Return with the last sub-message.
   437             break;
   438         }
   439         const MessagePattern::Part &part = pattern.getPart(partIndex++);
   440         UMessagePatternPartType type = part.getType();
   441         if (type == UMSGPAT_PART_TYPE_ARG_LIMIT) {
   442             // Reached the end of the ChoiceFormat style.
   443             // Return with the last sub-message.
   444             break;
   445         }
   446         // part is an ARG_INT or ARG_DOUBLE
   447         U_ASSERT(MessagePattern::Part::hasNumericValue(type));
   448         double boundary = pattern.getNumericValue(part);
   449         // Fetch the ARG_SELECTOR character.
   450         int32_t selectorIndex = pattern.getPatternIndex(partIndex++);
   451         UChar boundaryChar = pattern.getPatternString().charAt(selectorIndex);
   452         if (boundaryChar == LESS_THAN ? !(number > boundary) : !(number >= boundary)) {
   453             // The number is in the interval between the previous boundary and the current one.
   454             // Return with the sub-message between them.
   455             // The !(a>b) and !(a>=b) comparisons are equivalent to
   456             // (a<=b) and (a<b) except they "catch" NaN.
   457             break;
   458         }
   459     }
   460     return msgStart;
   461 }
   463 // -------------------------------------
   464 // Formats an array of objects. Checks if the data type of the objects
   465 // to get the right value for formatting.  
   467 UnicodeString&
   468 ChoiceFormat::format(const Formattable* objs,
   469                      int32_t cnt,
   470                      UnicodeString& appendTo,
   471                      FieldPosition& pos,
   472                      UErrorCode& status) const
   473 {
   474     if(cnt < 0) {
   475         status = U_ILLEGAL_ARGUMENT_ERROR;
   476         return appendTo;
   477     }
   478     if (msgPattern.countParts() == 0) {
   479         status = U_INVALID_STATE_ERROR;
   480         return appendTo;
   481     }
   483     for (int32_t i = 0; i < cnt; i++) {
   484         double objDouble = objs[i].getDouble(status);
   485         if (U_SUCCESS(status)) {
   486             format(objDouble, appendTo, pos);
   487         }
   488     }
   490     return appendTo;
   491 }
   493 // -------------------------------------
   495 void
   496 ChoiceFormat::parse(const UnicodeString& text, 
   497                     Formattable& result,
   498                     ParsePosition& pos) const
   499 {
   500     result.setDouble(parseArgument(msgPattern, 0, text, pos));
   501 }
   503 double
   504 ChoiceFormat::parseArgument(
   505         const MessagePattern &pattern, int32_t partIndex,
   506         const UnicodeString &source, ParsePosition &pos) {
   507     // find the best number (defined as the one with the longest parse)
   508     int32_t start = pos.getIndex();
   509     int32_t furthest = start;
   510     double bestNumber = uprv_getNaN();
   511     double tempNumber = 0.0;
   512     int32_t count = pattern.countParts();
   513     while (partIndex < count && pattern.getPartType(partIndex) != UMSGPAT_PART_TYPE_ARG_LIMIT) {
   514         tempNumber = pattern.getNumericValue(pattern.getPart(partIndex));
   515         partIndex += 2;  // skip the numeric part and ignore the ARG_SELECTOR
   516         int32_t msgLimit = pattern.getLimitPartIndex(partIndex);
   517         int32_t len = matchStringUntilLimitPart(pattern, partIndex, msgLimit, source, start);
   518         if (len >= 0) {
   519             int32_t newIndex = start + len;
   520             if (newIndex > furthest) {
   521                 furthest = newIndex;
   522                 bestNumber = tempNumber;
   523                 if (furthest == source.length()) {
   524                     break;
   525                 }
   526             }
   527         }
   528         partIndex = msgLimit + 1;
   529     }
   530     if (furthest == start) {
   531         pos.setErrorIndex(start);
   532     } else {
   533         pos.setIndex(furthest);
   534     }
   535     return bestNumber;
   536 }
   538 int32_t
   539 ChoiceFormat::matchStringUntilLimitPart(
   540         const MessagePattern &pattern, int32_t partIndex, int32_t limitPartIndex,
   541         const UnicodeString &source, int32_t sourceOffset) {
   542     int32_t matchingSourceLength = 0;
   543     const UnicodeString &msgString = pattern.getPatternString();
   544     int32_t prevIndex = pattern.getPart(partIndex).getLimit();
   545     for (;;) {
   546         const MessagePattern::Part &part = pattern.getPart(++partIndex);
   547         if (partIndex == limitPartIndex || part.getType() == UMSGPAT_PART_TYPE_SKIP_SYNTAX) {
   548             int32_t index = part.getIndex();
   549             int32_t length = index - prevIndex;
   550             if (length != 0 && 0 != source.compare(sourceOffset, length, msgString, prevIndex, length)) {
   551                 return -1;  // mismatch
   552             }
   553             matchingSourceLength += length;
   554             if (partIndex == limitPartIndex) {
   555                 return matchingSourceLength;
   556             }
   557             prevIndex = part.getLimit();  // SKIP_SYNTAX
   558         }
   559     }
   560 }
   562 // -------------------------------------
   564 Format*
   565 ChoiceFormat::clone() const
   566 {
   567     ChoiceFormat *aCopy = new ChoiceFormat(*this);
   568     return aCopy;
   569 }
   571 U_NAMESPACE_END
   573 #endif /* #if !UCONFIG_NO_FORMATTING */
   575 //eof

mercurial