intl/icu/source/i18n/plurfmt.cpp

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 /*
     2 *******************************************************************************
     3 * Copyright (C) 2009-2013, International Business Machines Corporation and
     4 * others. All Rights Reserved.
     5 *******************************************************************************
     6 *
     7 * File PLURFMT.CPP
     8 *******************************************************************************
     9 */
    11 #include "unicode/decimfmt.h"
    12 #include "unicode/messagepattern.h"
    13 #include "unicode/plurfmt.h"
    14 #include "unicode/plurrule.h"
    15 #include "unicode/utypes.h"
    16 #include "cmemory.h"
    17 #include "messageimpl.h"
    18 #include "plurrule_impl.h"
    19 #include "uassert.h"
    20 #include "uhash.h"
    22 #if !UCONFIG_NO_FORMATTING
    24 U_NAMESPACE_BEGIN
    26 static const UChar OTHER_STRING[] = {
    27     0x6F, 0x74, 0x68, 0x65, 0x72, 0  // "other"
    28 };
    30 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralFormat)
    32 PluralFormat::PluralFormat(UErrorCode& status)
    33         : locale(Locale::getDefault()),
    34           msgPattern(status),
    35           numberFormat(NULL),
    36           offset(0) {
    37     init(NULL, UPLURAL_TYPE_CARDINAL, status);
    38 }
    40 PluralFormat::PluralFormat(const Locale& loc, UErrorCode& status)
    41         : locale(loc),
    42           msgPattern(status),
    43           numberFormat(NULL),
    44           offset(0) {
    45     init(NULL, UPLURAL_TYPE_CARDINAL, status);
    46 }
    48 PluralFormat::PluralFormat(const PluralRules& rules, UErrorCode& status)
    49         : locale(Locale::getDefault()),
    50           msgPattern(status),
    51           numberFormat(NULL),
    52           offset(0) {
    53     init(&rules, UPLURAL_TYPE_COUNT, status);
    54 }
    56 PluralFormat::PluralFormat(const Locale& loc,
    57                            const PluralRules& rules,
    58                            UErrorCode& status)
    59         : locale(loc),
    60           msgPattern(status),
    61           numberFormat(NULL),
    62           offset(0) {
    63     init(&rules, UPLURAL_TYPE_COUNT, status);
    64 }
    66 PluralFormat::PluralFormat(const Locale& loc,
    67                            UPluralType type,
    68                            UErrorCode& status)
    69         : locale(loc),
    70           msgPattern(status),
    71           numberFormat(NULL),
    72           offset(0) {
    73     init(NULL, type, status);
    74 }
    76 PluralFormat::PluralFormat(const UnicodeString& pat,
    77                            UErrorCode& status)
    78         : locale(Locale::getDefault()),
    79           msgPattern(status),
    80           numberFormat(NULL),
    81           offset(0) {
    82     init(NULL, UPLURAL_TYPE_CARDINAL, status);
    83     applyPattern(pat, status);
    84 }
    86 PluralFormat::PluralFormat(const Locale& loc,
    87                            const UnicodeString& pat,
    88                            UErrorCode& status)
    89         : locale(loc),
    90           msgPattern(status),
    91           numberFormat(NULL),
    92           offset(0) {
    93     init(NULL, UPLURAL_TYPE_CARDINAL, status);
    94     applyPattern(pat, status);
    95 }
    97 PluralFormat::PluralFormat(const PluralRules& rules,
    98                            const UnicodeString& pat,
    99                            UErrorCode& status)
   100         : locale(Locale::getDefault()),
   101           msgPattern(status),
   102           numberFormat(NULL),
   103           offset(0) {
   104     init(&rules, UPLURAL_TYPE_COUNT, status);
   105     applyPattern(pat, status);
   106 }
   108 PluralFormat::PluralFormat(const Locale& loc,
   109                            const PluralRules& rules,
   110                            const UnicodeString& pat,
   111                            UErrorCode& status)
   112         : locale(loc),
   113           msgPattern(status),
   114           numberFormat(NULL),
   115           offset(0) {
   116     init(&rules, UPLURAL_TYPE_COUNT, status);
   117     applyPattern(pat, status);
   118 }
   120 PluralFormat::PluralFormat(const Locale& loc,
   121                            UPluralType type,
   122                            const UnicodeString& pat,
   123                            UErrorCode& status)
   124         : locale(loc),
   125           msgPattern(status),
   126           numberFormat(NULL),
   127           offset(0) {
   128     init(NULL, type, status);
   129     applyPattern(pat, status);
   130 }
   132 PluralFormat::PluralFormat(const PluralFormat& other)
   133         : Format(other),
   134           locale(other.locale),
   135           msgPattern(other.msgPattern),
   136           numberFormat(NULL),
   137           offset(other.offset) {
   138     copyObjects(other);
   139 }
   141 void
   142 PluralFormat::copyObjects(const PluralFormat& other) {
   143     UErrorCode status = U_ZERO_ERROR;
   144     if (numberFormat != NULL) {
   145         delete numberFormat;
   146     }
   147     if (pluralRulesWrapper.pluralRules != NULL) {
   148         delete pluralRulesWrapper.pluralRules;
   149     }
   151     if (other.numberFormat == NULL) {
   152         numberFormat = NumberFormat::createInstance(locale, status);
   153     } else {
   154         numberFormat = (NumberFormat*)other.numberFormat->clone();
   155     }
   156     if (other.pluralRulesWrapper.pluralRules == NULL) {
   157         pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, status);
   158     } else {
   159         pluralRulesWrapper.pluralRules = other.pluralRulesWrapper.pluralRules->clone();
   160     }
   161 }
   164 PluralFormat::~PluralFormat() {
   165     delete numberFormat;
   166 }
   168 void
   169 PluralFormat::init(const PluralRules* rules, UPluralType type, UErrorCode& status) {
   170     if (U_FAILURE(status)) {
   171         return;
   172     }
   174     if (rules==NULL) {
   175         pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, type, status);
   176     } else {
   177         pluralRulesWrapper.pluralRules = rules->clone();
   178         if (pluralRulesWrapper.pluralRules == NULL) {
   179             status = U_MEMORY_ALLOCATION_ERROR;
   180             return;
   181         }
   182     }
   184     numberFormat= NumberFormat::createInstance(locale, status);
   185 }
   187 void
   188 PluralFormat::applyPattern(const UnicodeString& newPattern, UErrorCode& status) {
   189     msgPattern.parsePluralStyle(newPattern, NULL, status);
   190     if (U_FAILURE(status)) {
   191         msgPattern.clear();
   192         offset = 0;
   193         return;
   194     }
   195     offset = msgPattern.getPluralOffset(0);
   196 }
   198 UnicodeString&
   199 PluralFormat::format(const Formattable& obj,
   200                    UnicodeString& appendTo,
   201                    FieldPosition& pos,
   202                    UErrorCode& status) const
   203 {
   204     if (U_FAILURE(status)) return appendTo;
   206     if (obj.isNumeric()) {
   207         return format(obj, obj.getDouble(), appendTo, pos, status);
   208     } else {
   209         status = U_ILLEGAL_ARGUMENT_ERROR;
   210         return appendTo;
   211     }
   212 }
   214 UnicodeString
   215 PluralFormat::format(int32_t number, UErrorCode& status) const {
   216     FieldPosition fpos(0);
   217     UnicodeString result;
   218     return format(Formattable(number), number, result, fpos, status);
   219 }
   221 UnicodeString
   222 PluralFormat::format(double number, UErrorCode& status) const {
   223     FieldPosition fpos(0);
   224     UnicodeString result;
   225     return format(Formattable(number), number, result, fpos, status);
   226 }
   229 UnicodeString&
   230 PluralFormat::format(int32_t number,
   231                      UnicodeString& appendTo,
   232                      FieldPosition& pos,
   233                      UErrorCode& status) const {
   234     return format(Formattable(number), (double)number, appendTo, pos, status);
   235 }
   237 UnicodeString&
   238 PluralFormat::format(double number,
   239                      UnicodeString& appendTo,
   240                      FieldPosition& pos,
   241                      UErrorCode& status) const {
   242     return format(Formattable(number), (double)number, appendTo, pos, status);
   243 }
   245 UnicodeString&
   246 PluralFormat::format(const Formattable& numberObject, double number,
   247                      UnicodeString& appendTo,
   248                      FieldPosition& pos,
   249                      UErrorCode& status) const {
   250     if (U_FAILURE(status)) {
   251         return appendTo;
   252     }
   253     if (msgPattern.countParts() == 0) {
   254         return numberFormat->format(numberObject, appendTo, pos, status);
   255     }
   256     // Get the appropriate sub-message.
   257     // Select it based on the formatted number-offset.
   258     double numberMinusOffset = number - offset;
   259     UnicodeString numberString;
   260     FieldPosition ignorePos;
   261     FixedDecimal dec(numberMinusOffset);
   262     if (offset == 0) {
   263         numberFormat->format(numberObject, numberString, ignorePos, status);  // could be BigDecimal etc.
   264         DecimalFormat *decFmt = dynamic_cast<DecimalFormat *>(numberFormat);
   265         if(decFmt != NULL) {
   266             dec = decFmt->getFixedDecimal(numberObject, status);
   267         }
   268     } else {
   269         numberFormat->format(numberMinusOffset, numberString, ignorePos, status);
   270         DecimalFormat *decFmt = dynamic_cast<DecimalFormat *>(numberFormat);
   271         if(decFmt != NULL) {
   272             dec = decFmt->getFixedDecimal(numberMinusOffset, status);
   273         }
   274     }
   275     int32_t partIndex = findSubMessage(msgPattern, 0, pluralRulesWrapper, &dec, number, status);
   276     if (U_FAILURE(status)) { return appendTo; }
   277     // Replace syntactic # signs in the top level of this sub-message
   278     // (not in nested arguments) with the formatted number-offset.
   279     const UnicodeString& pattern = msgPattern.getPatternString();
   280     int32_t prevIndex = msgPattern.getPart(partIndex).getLimit();
   281     for (;;) {
   282         const MessagePattern::Part& part = msgPattern.getPart(++partIndex);
   283         const UMessagePatternPartType type = part.getType();
   284         int32_t index = part.getIndex();
   285         if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) {
   286             return appendTo.append(pattern, prevIndex, index - prevIndex);
   287         } else if ((type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) ||
   288             (type == UMSGPAT_PART_TYPE_SKIP_SYNTAX && MessageImpl::jdkAposMode(msgPattern))) {
   289             appendTo.append(pattern, prevIndex, index - prevIndex);
   290             if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) {
   291                 appendTo.append(numberString);
   292             }
   293             prevIndex = part.getLimit();
   294         } else if (type == UMSGPAT_PART_TYPE_ARG_START) {
   295             appendTo.append(pattern, prevIndex, index - prevIndex);
   296             prevIndex = index;
   297             partIndex = msgPattern.getLimitPartIndex(partIndex);
   298             index = msgPattern.getPart(partIndex).getLimit();
   299             MessageImpl::appendReducedApostrophes(pattern, prevIndex, index, appendTo);
   300             prevIndex = index;
   301         }
   302     }
   303 }
   305 UnicodeString&
   306 PluralFormat::toPattern(UnicodeString& appendTo) {
   307     if (0 == msgPattern.countParts()) {
   308         appendTo.setToBogus();
   309     } else {
   310         appendTo.append(msgPattern.getPatternString());
   311     }
   312     return appendTo;
   313 }
   315 void
   316 PluralFormat::setLocale(const Locale& loc, UErrorCode& status) {
   317     if (U_FAILURE(status)) {
   318         return;
   319     }
   320     locale = loc;
   321     msgPattern.clear();
   322     delete numberFormat;
   323     offset = 0;
   324     numberFormat = NULL;
   325     pluralRulesWrapper.reset();
   326     init(NULL, UPLURAL_TYPE_CARDINAL, status);
   327 }
   329 void
   330 PluralFormat::setNumberFormat(const NumberFormat* format, UErrorCode& status) {
   331     if (U_FAILURE(status)) {
   332         return;
   333     }
   334     NumberFormat* nf = (NumberFormat*)format->clone();
   335     if (nf != NULL) {
   336         delete numberFormat;
   337         numberFormat = nf;
   338     } else {
   339         status = U_MEMORY_ALLOCATION_ERROR;
   340     }
   341 }
   343 Format*
   344 PluralFormat::clone() const
   345 {
   346     return new PluralFormat(*this);
   347 }
   350 PluralFormat&
   351 PluralFormat::operator=(const PluralFormat& other) {
   352     if (this != &other) {
   353         locale = other.locale;
   354         msgPattern = other.msgPattern;
   355         offset = other.offset;
   356         copyObjects(other);
   357     }
   359     return *this;
   360 }
   362 UBool
   363 PluralFormat::operator==(const Format& other) const {
   364     if (this == &other) {
   365         return TRUE;
   366     }
   367     if (!Format::operator==(other)) {
   368         return FALSE;
   369     }
   370     const PluralFormat& o = (const PluralFormat&)other;
   371     return
   372         locale == o.locale &&
   373         msgPattern == o.msgPattern &&  // implies same offset
   374         (numberFormat == NULL) == (o.numberFormat == NULL) &&
   375         (numberFormat == NULL || *numberFormat == *o.numberFormat) &&
   376         (pluralRulesWrapper.pluralRules == NULL) == (o.pluralRulesWrapper.pluralRules == NULL) &&
   377         (pluralRulesWrapper.pluralRules == NULL ||
   378             *pluralRulesWrapper.pluralRules == *o.pluralRulesWrapper.pluralRules);
   379 }
   381 UBool
   382 PluralFormat::operator!=(const Format& other) const {
   383     return  !operator==(other);
   384 }
   386 void
   387 PluralFormat::parseObject(const UnicodeString& /*source*/,
   388                         Formattable& /*result*/,
   389                         ParsePosition& pos) const
   390 {
   391     // Parsing not supported.
   392     pos.setErrorIndex(pos.getIndex());
   393 }
   395 int32_t PluralFormat::findSubMessage(const MessagePattern& pattern, int32_t partIndex,
   396                                      const PluralSelector& selector, void *context,
   397                                      double number, UErrorCode& ec) {
   398     if (U_FAILURE(ec)) {
   399         return 0;
   400     }
   401     int32_t count=pattern.countParts();
   402     double offset;
   403     const MessagePattern::Part* part=&pattern.getPart(partIndex);
   404     if (MessagePattern::Part::hasNumericValue(part->getType())) {
   405         offset=pattern.getNumericValue(*part);
   406         ++partIndex;
   407     } else {
   408         offset=0;
   409     }
   410     // The keyword is empty until we need to match against a non-explicit, not-"other" value.
   411     // Then we get the keyword from the selector.
   412     // (In other words, we never call the selector if we match against an explicit value,
   413     // or if the only non-explicit keyword is "other".)
   414     UnicodeString keyword;
   415     UnicodeString other(FALSE, OTHER_STRING, 5);
   416     // When we find a match, we set msgStart>0 and also set this boolean to true
   417     // to avoid matching the keyword again (duplicates are allowed)
   418     // while we continue to look for an explicit-value match.
   419     UBool haveKeywordMatch=FALSE;
   420     // msgStart is 0 until we find any appropriate sub-message.
   421     // We remember the first "other" sub-message if we have not seen any
   422     // appropriate sub-message before.
   423     // We remember the first matching-keyword sub-message if we have not seen
   424     // one of those before.
   425     // (The parser allows [does not check for] duplicate keywords.
   426     // We just have to make sure to take the first one.)
   427     // We avoid matching the keyword twice by also setting haveKeywordMatch=true
   428     // at the first keyword match.
   429     // We keep going until we find an explicit-value match or reach the end of the plural style.
   430     int32_t msgStart=0;
   431     // Iterate over (ARG_SELECTOR [ARG_INT|ARG_DOUBLE] message) tuples
   432     // until ARG_LIMIT or end of plural-only pattern.
   433     do {
   434         part=&pattern.getPart(partIndex++);
   435         const UMessagePatternPartType type = part->getType();
   436         if(type==UMSGPAT_PART_TYPE_ARG_LIMIT) {
   437             break;
   438         }
   439         U_ASSERT (type==UMSGPAT_PART_TYPE_ARG_SELECTOR);
   440         // part is an ARG_SELECTOR followed by an optional explicit value, and then a message
   441         if(MessagePattern::Part::hasNumericValue(pattern.getPartType(partIndex))) {
   442             // explicit value like "=2"
   443             part=&pattern.getPart(partIndex++);
   444             if(number==pattern.getNumericValue(*part)) {
   445                 // matches explicit value
   446                 return partIndex;
   447             }
   448         } else if(!haveKeywordMatch) {
   449             // plural keyword like "few" or "other"
   450             // Compare "other" first and call the selector if this is not "other".
   451             if(pattern.partSubstringMatches(*part, other)) {
   452                 if(msgStart==0) {
   453                     msgStart=partIndex;
   454                     if(0 == keyword.compare(other)) {
   455                         // This is the first "other" sub-message,
   456                         // and the selected keyword is also "other".
   457                         // Do not match "other" again.
   458                         haveKeywordMatch=TRUE;
   459                     }
   460                 }
   461             } else {
   462                 if(keyword.isEmpty()) {
   463                     keyword=selector.select(context, number-offset, ec);
   464                     if(msgStart!=0 && (0 == keyword.compare(other))) {
   465                         // We have already seen an "other" sub-message.
   466                         // Do not match "other" again.
   467                         haveKeywordMatch=TRUE;
   468                         // Skip keyword matching but do getLimitPartIndex().
   469                     }
   470                 }
   471                 if(!haveKeywordMatch && pattern.partSubstringMatches(*part, keyword)) {
   472                     // keyword matches
   473                     msgStart=partIndex;
   474                     // Do not match this keyword again.
   475                     haveKeywordMatch=TRUE;
   476                 }
   477             }
   478         }
   479         partIndex=pattern.getLimitPartIndex(partIndex);
   480     } while(++partIndex<count);
   481     return msgStart;
   482 }
   484 PluralFormat::PluralSelector::~PluralSelector() {}
   486 PluralFormat::PluralSelectorAdapter::~PluralSelectorAdapter() {
   487     delete pluralRules;
   488 }
   490 UnicodeString PluralFormat::PluralSelectorAdapter::select(void *context, double number,
   491                                                           UErrorCode& /*ec*/) const {
   492     (void)number;  // unused except in the assertion
   493     FixedDecimal *dec=static_cast<FixedDecimal *>(context);
   494     U_ASSERT(dec->source==number);
   495     return pluralRules->select(*dec);
   496 }
   498 void PluralFormat::PluralSelectorAdapter::reset() {
   499     delete pluralRules;
   500     pluralRules = NULL;
   501 }
   504 U_NAMESPACE_END
   507 #endif /* #if !UCONFIG_NO_FORMATTING */
   509 //eof

mercurial