The Tor Browser: intl/icu/source/i18n/choicfmt.cpp@b8a032363ba2

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 /*

     2 *******************************************************************************

     3 * Copyright (C) 1997-2013, International Business Machines Corporation and    *

     4 * others. All Rights Reserved.                                                *

     5 *******************************************************************************

6 *

     7 * File CHOICFMT.CPP

8 *

     9 * Modification History:

    10 *

    11 *   Date        Name        Description

    12 *   02/19/97    aliu        Converted from java.

    13 *   03/20/97    helena      Finished first cut of implementation and got rid

    14 *                           of nextDouble/previousDouble and replaced with

    15 *                           boolean array.

    16 *   4/10/97     aliu        Clean up.  Modified to work on AIX.

    17 *   06/04/97    helena      Fixed applyPattern(), toPattern() and not to include

    18 *                           wchar.h.

    19 *   07/09/97    helena      Made ParsePosition into a class.

    20 *   08/06/97    nos         removed overloaded constructor, fixed 'format(array)'

    21 *   07/22/98    stephen     JDK 1.2 Sync - removed UBool array (doubleFlags)

    22 *   02/22/99    stephen     Removed character literals for EBCDIC safety

    23 ********************************************************************************

    24 */

    26 #include "unicode/utypes.h"

    28 #if !UCONFIG_NO_FORMATTING

    30 #include "unicode/choicfmt.h"

    31 #include "unicode/numfmt.h"

    32 #include "unicode/locid.h"

    33 #include "cpputils.h"

    34 #include "cstring.h"

    35 #include "messageimpl.h"

    36 #include "putilimp.h"

    37 #include "uassert.h"

    38 #include <stdio.h>

    39 #include <float.h>

    41 // *****************************************************************************

    42 // class ChoiceFormat

    43 // *****************************************************************************

    45 U_NAMESPACE_BEGIN

    47 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ChoiceFormat)

    49 // Special characters used by ChoiceFormat.  There are two characters

    50 // used interchangeably to indicate <=.  Either is parsed, but only

    51 // LESS_EQUAL is generated by toPattern().

    52 #define SINGLE_QUOTE ((UChar)0x0027)   /*'*/

    53 #define LESS_THAN    ((UChar)0x003C)   /*<*/

    54 #define LESS_EQUAL   ((UChar)0x0023)   /*#*/

    55 #define LESS_EQUAL2  ((UChar)0x2264)

    56 #define VERTICAL_BAR ((UChar)0x007C)   /*|*/

    57 #define MINUS        ((UChar)0x002D)   /*-*/

    59 static const UChar LEFT_CURLY_BRACE = 0x7B;     /*{*/

    60 static const UChar RIGHT_CURLY_BRACE = 0x7D;    /*}*/

    62 #ifdef INFINITY

    63 #undef INFINITY

    64 #endif

    65 #define INFINITY     ((UChar)0x221E)

    67 //static const UChar gPositiveInfinity[] = {INFINITY, 0};

    68 //static const UChar gNegativeInfinity[] = {MINUS, INFINITY, 0};

    69 #define POSITIVE_INF_STRLEN 1

    70 #define NEGATIVE_INF_STRLEN 2

    72 // -------------------------------------

    73 // Creates a ChoiceFormat instance based on the pattern.

    75 ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern,

    76                            UErrorCode& status)

    77 : constructorErrorCode(status),

    78   msgPattern(status)

    79 {

    80     applyPattern(newPattern, status);

    81 }

    83 // -------------------------------------

    84 // Creates a ChoiceFormat instance with the limit array and

    85 // format strings for each limit.

    87 ChoiceFormat::ChoiceFormat(const double* limits,

    88                            const UnicodeString* formats,

    89                            int32_t cnt )

    90 : constructorErrorCode(U_ZERO_ERROR),

    91   msgPattern(constructorErrorCode)

    92 {

    93     setChoices(limits, NULL, formats, cnt, constructorErrorCode);

    94 }

    96 // -------------------------------------

    98 ChoiceFormat::ChoiceFormat(const double* limits,

    99                            const UBool* closures,

   100                            const UnicodeString* formats,

   101                            int32_t cnt )

   102 : constructorErrorCode(U_ZERO_ERROR),

   103   msgPattern(constructorErrorCode)

   104 {

   105     setChoices(limits, closures, formats, cnt, constructorErrorCode);

   106 }

   108 // -------------------------------------

   109 // copy constructor

   111 ChoiceFormat::ChoiceFormat(const    ChoiceFormat&   that)

   112 : NumberFormat(that),

   113   constructorErrorCode(that.constructorErrorCode),

   114   msgPattern(that.msgPattern)

   115 {

   116 }

   118 // -------------------------------------

   119 // Private constructor that creates a

   120 // ChoiceFormat instance based on the

   121 // pattern and populates UParseError

   123 ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern,

   124                            UParseError& parseError,

   125                            UErrorCode& status)

   126 : constructorErrorCode(status),

   127   msgPattern(status)

   128 {

   129     applyPattern(newPattern,parseError, status);

   130 }

   131 // -------------------------------------

   133 UBool

   134 ChoiceFormat::operator==(const Format& that) const

   135 {

   136     if (this == &that) return TRUE;

   137     if (!NumberFormat::operator==(that)) return FALSE;

   138     ChoiceFormat& thatAlias = (ChoiceFormat&)that;

   139     return msgPattern == thatAlias.msgPattern;

   140 }

   142 // -------------------------------------

   143 // copy constructor

   145 const ChoiceFormat&

   146 ChoiceFormat::operator=(const   ChoiceFormat& that)

   147 {

   148     if (this != &that) {

   149         NumberFormat::operator=(that);

   150         constructorErrorCode = that.constructorErrorCode;

   151         msgPattern = that.msgPattern;

   152     }

   153     return *this;

   154 }

   156 // -------------------------------------

   158 ChoiceFormat::~ChoiceFormat()

   159 {

   160 }

   162 // -------------------------------------

   164 /**

   165  * Convert a double value to a string without the overhead of NumberFormat.

   166  */

   167 UnicodeString&

   168 ChoiceFormat::dtos(double value,

   169                    UnicodeString& string)

   170 {

   171     /* Buffer to contain the digits and any extra formatting stuff. */

   172     char temp[DBL_DIG + 16];

   173     char *itrPtr = temp;

   174     char *expPtr;

   176     sprintf(temp, "%.*g", DBL_DIG, value);

   178     /* Find and convert the decimal point.

   179        Using setlocale on some machines will cause sprintf to use a comma for certain locales.

   180     */

   181     while (*itrPtr && (*itrPtr == '-' || isdigit(*itrPtr))) {

   182         itrPtr++;

   183     }

   184     if (*itrPtr != 0 && *itrPtr != 'e') {

   185         /* We reached something that looks like a decimal point.

   186         In case someone used setlocale(), which changes the decimal point. */

   187         *itrPtr = '.';

   188         itrPtr++;

   189     }

   190     /* Search for the exponent */

   191     while (*itrPtr && *itrPtr != 'e') {

   192         itrPtr++;

   193     }

   194     if (*itrPtr == 'e') {

   195         itrPtr++;

   196         /* Verify the exponent sign */

   197         if (*itrPtr == '+' || *itrPtr == '-') {

   198             itrPtr++;

   199         }

   200         /* Remove leading zeros. You will see this on Windows machines. */

   201         expPtr = itrPtr;

   202         while (*itrPtr == '0') {

   203             itrPtr++;

   204         }

   205         if (*itrPtr && expPtr != itrPtr) {

   206             /* Shift the exponent without zeros. */

   207             while (*itrPtr) {

   208                 *(expPtr++)  = *(itrPtr++);

   209             }

   210             // NULL terminate

   211             *expPtr = 0;

   212         }

   213     }

   215     string = UnicodeString(temp, -1, US_INV);    /* invariant codepage */

   216     return string;

   217 }

   219 // -------------------------------------

   220 // calls the overloaded applyPattern method.

   222 void

   223 ChoiceFormat::applyPattern(const UnicodeString& pattern,

   224                            UErrorCode& status)

   225 {

   226     msgPattern.parseChoiceStyle(pattern, NULL, status);

   227     constructorErrorCode = status;

   228 }

   230 // -------------------------------------

   231 // Applies the pattern to this ChoiceFormat instance.

   233 void

   234 ChoiceFormat::applyPattern(const UnicodeString& pattern,

   235                            UParseError& parseError,

   236                            UErrorCode& status)

   237 {

   238     msgPattern.parseChoiceStyle(pattern, &parseError, status);

   239     constructorErrorCode = status;

   240 }

   241 // -------------------------------------

   242 // Returns the input pattern string.

   244 UnicodeString&

   245 ChoiceFormat::toPattern(UnicodeString& result) const

   246 {

   247     return result = msgPattern.getPatternString();

   248 }

   250 // -------------------------------------

   251 // Sets the limit and format arrays.

   252 void

   253 ChoiceFormat::setChoices(  const double* limits,

   254                            const UnicodeString* formats,

   255                            int32_t cnt )

   256 {

   257     UErrorCode errorCode = U_ZERO_ERROR;

   258     setChoices(limits, NULL, formats, cnt, errorCode);

   259 }

   261 // -------------------------------------

   262 // Sets the limit and format arrays.

   263 void

   264 ChoiceFormat::setChoices(  const double* limits,

   265                            const UBool* closures,

   266                            const UnicodeString* formats,

   267                            int32_t cnt )

   268 {

   269     UErrorCode errorCode = U_ZERO_ERROR;

   270     setChoices(limits, closures, formats, cnt, errorCode);

   271 }

   273 void

   274 ChoiceFormat::setChoices(const double* limits,

   275                          const UBool* closures,

   276                          const UnicodeString* formats,

   277                          int32_t count,

   278                          UErrorCode &errorCode) {

   279     if (U_FAILURE(errorCode)) {

   280         return;

   281     }

   282     if (limits == NULL || formats == NULL) {

   283         errorCode = U_ILLEGAL_ARGUMENT_ERROR;

   284         return;

   285     }

   286     // Reconstruct the original input pattern.

   287     // Modified version of the pre-ICU 4.8 toPattern() implementation.

   288     UnicodeString result;

   289     for (int32_t i = 0; i < count; ++i) {

   290         if (i != 0) {

   291             result += VERTICAL_BAR;

   292         }

   293         UnicodeString buf;

   294         if (uprv_isPositiveInfinity(limits[i])) {

   295             result += INFINITY;

   296         } else if (uprv_isNegativeInfinity(limits[i])) {

   297             result += MINUS;

   298             result += INFINITY;

   299         } else {

   300             result += dtos(limits[i], buf);

   301         }

   302         if (closures != NULL && closures[i]) {

   303             result += LESS_THAN;

   304         } else {

   305             result += LESS_EQUAL;

   306         }

   307         // Append formats[i], using quotes if there are special

   308         // characters.  Single quotes themselves must be escaped in

   309         // either case.

   310         const UnicodeString& text = formats[i];

   311         int32_t textLength = text.length();

   312         int32_t nestingLevel = 0;

   313         for (int32_t j = 0; j < textLength; ++j) {

   314             UChar c = text[j];

   315             if (c == SINGLE_QUOTE && nestingLevel == 0) {

   316                 // Double each top-level apostrophe.

   317                 result.append(c);

   318             } else if (c == VERTICAL_BAR && nestingLevel == 0) {

   319                 // Surround each pipe symbol with apostrophes for quoting.

   320                 // If the next character is an apostrophe, then that will be doubled,

   321                 // and although the parser will see the apostrophe pairs beginning

   322                 // and ending one character earlier than our doubling, the result

   323                 // is as desired.

   324                 //   | -> '|'

   325                 //   |' -> '|'''

   326                 //   |'' -> '|''''' etc.

   327                 result.append(SINGLE_QUOTE).append(c).append(SINGLE_QUOTE);

   328                 continue;  // Skip the append(c) at the end of the loop body.

   329             } else if (c == LEFT_CURLY_BRACE) {

   330                 ++nestingLevel;

   331             } else if (c == RIGHT_CURLY_BRACE && nestingLevel > 0) {

   332                 --nestingLevel;

   333             }

   334             result.append(c);

   335         }

   336     }

   337     // Apply the reconstructed pattern.

   338     applyPattern(result, errorCode);

   339 }

   341 // -------------------------------------

   342 // Gets the limit array.

   344 const double*

   345 ChoiceFormat::getLimits(int32_t& cnt) const

   346 {

   347     cnt = 0;

   348     return NULL;

   349 }

   351 // -------------------------------------

   352 // Gets the closures array.

   354 const UBool*

   355 ChoiceFormat::getClosures(int32_t& cnt) const

   356 {

   357     cnt = 0;

   358     return NULL;

   359 }

   361 // -------------------------------------

   362 // Gets the format array.

   364 const UnicodeString*

   365 ChoiceFormat::getFormats(int32_t& cnt) const

   366 {

   367     cnt = 0;

   368     return NULL;

   369 }

   371 // -------------------------------------

   372 // Formats an int64 number, it's actually formatted as

   373 // a double.  The returned format string may differ

   374 // from the input number because of this.

   376 UnicodeString&

   377 ChoiceFormat::format(int64_t number,

   378                      UnicodeString& appendTo,

   379                      FieldPosition& status) const

   380 {

   381     return format((double) number, appendTo, status);

   382 }

   384 // -------------------------------------

   385 // Formats an int32_t number, it's actually formatted as

   386 // a double.

   388 UnicodeString&

   389 ChoiceFormat::format(int32_t number,

   390                      UnicodeString& appendTo,

   391                      FieldPosition& status) const

   392 {

   393     return format((double) number, appendTo, status);

   394 }

   396 // -------------------------------------

   397 // Formats a double number.

   399 UnicodeString&

   400 ChoiceFormat::format(double number,

   401                      UnicodeString& appendTo,

   402                      FieldPosition& /*pos*/) const

   403 {

   404     if (msgPattern.countParts() == 0) {

   405         // No pattern was applied, or it failed.

   406         return appendTo;

   407     }

   408     // Get the appropriate sub-message.

   409     int32_t msgStart = findSubMessage(msgPattern, 0, number);

   410     if (!MessageImpl::jdkAposMode(msgPattern)) {

   411         int32_t patternStart = msgPattern.getPart(msgStart).getLimit();

   412         int32_t msgLimit = msgPattern.getLimitPartIndex(msgStart);

   413         appendTo.append(msgPattern.getPatternString(),

   414                         patternStart,

   415                         msgPattern.getPatternIndex(msgLimit) - patternStart);

   416         return appendTo;

   417     }

   418     // JDK compatibility mode: Remove SKIP_SYNTAX.

   419     return MessageImpl::appendSubMessageWithoutSkipSyntax(msgPattern, msgStart, appendTo);

   420 }

   422 int32_t

   423 ChoiceFormat::findSubMessage(const MessagePattern &pattern, int32_t partIndex, double number) {

   424     int32_t count = pattern.countParts();

   425     int32_t msgStart;

   426     // Iterate over (ARG_INT|DOUBLE, ARG_SELECTOR, message) tuples

   427     // until ARG_LIMIT or end of choice-only pattern.

   428     // Ignore the first number and selector and start the loop on the first message.

   429     partIndex += 2;

   430     for (;;) {

   431         // Skip but remember the current sub-message.

   432         msgStart = partIndex;

   433         partIndex = pattern.getLimitPartIndex(partIndex);

   434         if (++partIndex >= count) {

   435             // Reached the end of the choice-only pattern.

   436             // Return with the last sub-message.

   437             break;

   438         }

   439         const MessagePattern::Part &part = pattern.getPart(partIndex++);

   440         UMessagePatternPartType type = part.getType();

   441         if (type == UMSGPAT_PART_TYPE_ARG_LIMIT) {

   442             // Reached the end of the ChoiceFormat style.

   443             // Return with the last sub-message.

   444             break;

   445         }

   446         // part is an ARG_INT or ARG_DOUBLE

   447         U_ASSERT(MessagePattern::Part::hasNumericValue(type));

   448         double boundary = pattern.getNumericValue(part);

   449         // Fetch the ARG_SELECTOR character.

   450         int32_t selectorIndex = pattern.getPatternIndex(partIndex++);

   451         UChar boundaryChar = pattern.getPatternString().charAt(selectorIndex);

   452         if (boundaryChar == LESS_THAN ? !(number > boundary) : !(number >= boundary)) {

   453             // The number is in the interval between the previous boundary and the current one.

   454             // Return with the sub-message between them.

   455             // The !(a>b) and !(a>=b) comparisons are equivalent to

   456             // (a<=b) and (a<b) except they "catch" NaN.

   457             break;

   458         }

   459     }

   460     return msgStart;

   461 }

   463 // -------------------------------------

   464 // Formats an array of objects. Checks if the data type of the objects

   465 // to get the right value for formatting.

   467 UnicodeString&

   468 ChoiceFormat::format(const Formattable* objs,

   469                      int32_t cnt,

   470                      UnicodeString& appendTo,

   471                      FieldPosition& pos,

   472                      UErrorCode& status) const

   473 {

   474     if(cnt < 0) {

   475         status = U_ILLEGAL_ARGUMENT_ERROR;

   476         return appendTo;

   477     }

   478     if (msgPattern.countParts() == 0) {

   479         status = U_INVALID_STATE_ERROR;

   480         return appendTo;

   481     }

   483     for (int32_t i = 0; i < cnt; i++) {

   484         double objDouble = objs[i].getDouble(status);

   485         if (U_SUCCESS(status)) {

   486             format(objDouble, appendTo, pos);

   487         }

   488     }

   490     return appendTo;

   491 }

   493 // -------------------------------------

   495 void

   496 ChoiceFormat::parse(const UnicodeString& text,

   497                     Formattable& result,

   498                     ParsePosition& pos) const

   499 {

   500     result.setDouble(parseArgument(msgPattern, 0, text, pos));

   501 }

   503 double

   504 ChoiceFormat::parseArgument(

   505         const MessagePattern &pattern, int32_t partIndex,

   506         const UnicodeString &source, ParsePosition &pos) {

   507     // find the best number (defined as the one with the longest parse)

   508     int32_t start = pos.getIndex();

   509     int32_t furthest = start;

   510     double bestNumber = uprv_getNaN();

   511     double tempNumber = 0.0;

   512     int32_t count = pattern.countParts();

   513     while (partIndex < count && pattern.getPartType(partIndex) != UMSGPAT_PART_TYPE_ARG_LIMIT) {

   514         tempNumber = pattern.getNumericValue(pattern.getPart(partIndex));

   515         partIndex += 2;  // skip the numeric part and ignore the ARG_SELECTOR

   516         int32_t msgLimit = pattern.getLimitPartIndex(partIndex);

   517         int32_t len = matchStringUntilLimitPart(pattern, partIndex, msgLimit, source, start);

   518         if (len >= 0) {

   519             int32_t newIndex = start + len;

   520             if (newIndex > furthest) {

   521                 furthest = newIndex;

   522                 bestNumber = tempNumber;

   523                 if (furthest == source.length()) {

   524                     break;

   525                 }

   526             }

   527         }

   528         partIndex = msgLimit + 1;

   529     }

   530     if (furthest == start) {

   531         pos.setErrorIndex(start);

   532     } else {

   533         pos.setIndex(furthest);

   534     }

   535     return bestNumber;

   536 }

   538 int32_t

   539 ChoiceFormat::matchStringUntilLimitPart(

   540         const MessagePattern &pattern, int32_t partIndex, int32_t limitPartIndex,

   541         const UnicodeString &source, int32_t sourceOffset) {

   542     int32_t matchingSourceLength = 0;

   543     const UnicodeString &msgString = pattern.getPatternString();

   544     int32_t prevIndex = pattern.getPart(partIndex).getLimit();

   545     for (;;) {

   546         const MessagePattern::Part &part = pattern.getPart(++partIndex);

   547         if (partIndex == limitPartIndex || part.getType() == UMSGPAT_PART_TYPE_SKIP_SYNTAX) {

   548             int32_t index = part.getIndex();

   549             int32_t length = index - prevIndex;

   550             if (length != 0 && 0 != source.compare(sourceOffset, length, msgString, prevIndex, length)) {

   551                 return -1;  // mismatch

   552             }

   553             matchingSourceLength += length;

   554             if (partIndex == limitPartIndex) {

   555                 return matchingSourceLength;

   556             }

   557             prevIndex = part.getLimit();  // SKIP_SYNTAX

   558         }

   559     }

   560 }

   562 // -------------------------------------

   564 Format*

   565 ChoiceFormat::clone() const

   566 {

   567     ChoiceFormat *aCopy = new ChoiceFormat(*this);

   568     return aCopy;

   569 }

   571 U_NAMESPACE_END

   573 #endif /* #if !UCONFIG_NO_FORMATTING */

   575 //eof

The Tor Browser / file revision

intl/icu/source/i18n/choicfmt.cpp@b8a032363ba2

intl/icu/source/i18n/choicfmt.cpp