intl/icu/source/i18n/plurfmt.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/i18n/plurfmt.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,509 @@
     1.4 +/*
     1.5 +*******************************************************************************
     1.6 +* Copyright (C) 2009-2013, International Business Machines Corporation and
     1.7 +* others. All Rights Reserved.
     1.8 +*******************************************************************************
     1.9 +*
    1.10 +* File PLURFMT.CPP
    1.11 +*******************************************************************************
    1.12 +*/
    1.13 +
    1.14 +#include "unicode/decimfmt.h"
    1.15 +#include "unicode/messagepattern.h"
    1.16 +#include "unicode/plurfmt.h"
    1.17 +#include "unicode/plurrule.h"
    1.18 +#include "unicode/utypes.h"
    1.19 +#include "cmemory.h"
    1.20 +#include "messageimpl.h"
    1.21 +#include "plurrule_impl.h"
    1.22 +#include "uassert.h"
    1.23 +#include "uhash.h"
    1.24 +
    1.25 +#if !UCONFIG_NO_FORMATTING
    1.26 +
    1.27 +U_NAMESPACE_BEGIN
    1.28 +
    1.29 +static const UChar OTHER_STRING[] = {
    1.30 +    0x6F, 0x74, 0x68, 0x65, 0x72, 0  // "other"
    1.31 +};
    1.32 +
    1.33 +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralFormat)
    1.34 +
    1.35 +PluralFormat::PluralFormat(UErrorCode& status)
    1.36 +        : locale(Locale::getDefault()),
    1.37 +          msgPattern(status),
    1.38 +          numberFormat(NULL),
    1.39 +          offset(0) {
    1.40 +    init(NULL, UPLURAL_TYPE_CARDINAL, status);
    1.41 +}
    1.42 +
    1.43 +PluralFormat::PluralFormat(const Locale& loc, UErrorCode& status)
    1.44 +        : locale(loc),
    1.45 +          msgPattern(status),
    1.46 +          numberFormat(NULL),
    1.47 +          offset(0) {
    1.48 +    init(NULL, UPLURAL_TYPE_CARDINAL, status);
    1.49 +}
    1.50 +
    1.51 +PluralFormat::PluralFormat(const PluralRules& rules, UErrorCode& status)
    1.52 +        : locale(Locale::getDefault()),
    1.53 +          msgPattern(status),
    1.54 +          numberFormat(NULL),
    1.55 +          offset(0) {
    1.56 +    init(&rules, UPLURAL_TYPE_COUNT, status);
    1.57 +}
    1.58 +
    1.59 +PluralFormat::PluralFormat(const Locale& loc,
    1.60 +                           const PluralRules& rules,
    1.61 +                           UErrorCode& status)
    1.62 +        : locale(loc),
    1.63 +          msgPattern(status),
    1.64 +          numberFormat(NULL),
    1.65 +          offset(0) {
    1.66 +    init(&rules, UPLURAL_TYPE_COUNT, status);
    1.67 +}
    1.68 +
    1.69 +PluralFormat::PluralFormat(const Locale& loc,
    1.70 +                           UPluralType type,
    1.71 +                           UErrorCode& status)
    1.72 +        : locale(loc),
    1.73 +          msgPattern(status),
    1.74 +          numberFormat(NULL),
    1.75 +          offset(0) {
    1.76 +    init(NULL, type, status);
    1.77 +}
    1.78 +
    1.79 +PluralFormat::PluralFormat(const UnicodeString& pat,
    1.80 +                           UErrorCode& status)
    1.81 +        : locale(Locale::getDefault()),
    1.82 +          msgPattern(status),
    1.83 +          numberFormat(NULL),
    1.84 +          offset(0) {
    1.85 +    init(NULL, UPLURAL_TYPE_CARDINAL, status);
    1.86 +    applyPattern(pat, status);
    1.87 +}
    1.88 +
    1.89 +PluralFormat::PluralFormat(const Locale& loc,
    1.90 +                           const UnicodeString& pat,
    1.91 +                           UErrorCode& status)
    1.92 +        : locale(loc),
    1.93 +          msgPattern(status),
    1.94 +          numberFormat(NULL),
    1.95 +          offset(0) {
    1.96 +    init(NULL, UPLURAL_TYPE_CARDINAL, status);
    1.97 +    applyPattern(pat, status);
    1.98 +}
    1.99 +
   1.100 +PluralFormat::PluralFormat(const PluralRules& rules,
   1.101 +                           const UnicodeString& pat,
   1.102 +                           UErrorCode& status)
   1.103 +        : locale(Locale::getDefault()),
   1.104 +          msgPattern(status),
   1.105 +          numberFormat(NULL),
   1.106 +          offset(0) {
   1.107 +    init(&rules, UPLURAL_TYPE_COUNT, status);
   1.108 +    applyPattern(pat, status);
   1.109 +}
   1.110 +
   1.111 +PluralFormat::PluralFormat(const Locale& loc,
   1.112 +                           const PluralRules& rules,
   1.113 +                           const UnicodeString& pat,
   1.114 +                           UErrorCode& status)
   1.115 +        : locale(loc),
   1.116 +          msgPattern(status),
   1.117 +          numberFormat(NULL),
   1.118 +          offset(0) {
   1.119 +    init(&rules, UPLURAL_TYPE_COUNT, status);
   1.120 +    applyPattern(pat, status);
   1.121 +}
   1.122 +
   1.123 +PluralFormat::PluralFormat(const Locale& loc,
   1.124 +                           UPluralType type,
   1.125 +                           const UnicodeString& pat,
   1.126 +                           UErrorCode& status)
   1.127 +        : locale(loc),
   1.128 +          msgPattern(status),
   1.129 +          numberFormat(NULL),
   1.130 +          offset(0) {
   1.131 +    init(NULL, type, status);
   1.132 +    applyPattern(pat, status);
   1.133 +}
   1.134 +
   1.135 +PluralFormat::PluralFormat(const PluralFormat& other)
   1.136 +        : Format(other),
   1.137 +          locale(other.locale),
   1.138 +          msgPattern(other.msgPattern),
   1.139 +          numberFormat(NULL),
   1.140 +          offset(other.offset) {
   1.141 +    copyObjects(other);
   1.142 +}
   1.143 +
   1.144 +void
   1.145 +PluralFormat::copyObjects(const PluralFormat& other) {
   1.146 +    UErrorCode status = U_ZERO_ERROR;
   1.147 +    if (numberFormat != NULL) {
   1.148 +        delete numberFormat;
   1.149 +    }
   1.150 +    if (pluralRulesWrapper.pluralRules != NULL) {
   1.151 +        delete pluralRulesWrapper.pluralRules;
   1.152 +    }
   1.153 +
   1.154 +    if (other.numberFormat == NULL) {
   1.155 +        numberFormat = NumberFormat::createInstance(locale, status);
   1.156 +    } else {
   1.157 +        numberFormat = (NumberFormat*)other.numberFormat->clone();
   1.158 +    }
   1.159 +    if (other.pluralRulesWrapper.pluralRules == NULL) {
   1.160 +        pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, status);
   1.161 +    } else {
   1.162 +        pluralRulesWrapper.pluralRules = other.pluralRulesWrapper.pluralRules->clone();
   1.163 +    }
   1.164 +}
   1.165 +
   1.166 +
   1.167 +PluralFormat::~PluralFormat() {
   1.168 +    delete numberFormat;
   1.169 +}
   1.170 +
   1.171 +void
   1.172 +PluralFormat::init(const PluralRules* rules, UPluralType type, UErrorCode& status) {
   1.173 +    if (U_FAILURE(status)) {
   1.174 +        return;
   1.175 +    }
   1.176 +
   1.177 +    if (rules==NULL) {
   1.178 +        pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, type, status);
   1.179 +    } else {
   1.180 +        pluralRulesWrapper.pluralRules = rules->clone();
   1.181 +        if (pluralRulesWrapper.pluralRules == NULL) {
   1.182 +            status = U_MEMORY_ALLOCATION_ERROR;
   1.183 +            return;
   1.184 +        }
   1.185 +    }
   1.186 +
   1.187 +    numberFormat= NumberFormat::createInstance(locale, status);
   1.188 +}
   1.189 +
   1.190 +void
   1.191 +PluralFormat::applyPattern(const UnicodeString& newPattern, UErrorCode& status) {
   1.192 +    msgPattern.parsePluralStyle(newPattern, NULL, status);
   1.193 +    if (U_FAILURE(status)) {
   1.194 +        msgPattern.clear();
   1.195 +        offset = 0;
   1.196 +        return;
   1.197 +    }
   1.198 +    offset = msgPattern.getPluralOffset(0);
   1.199 +}
   1.200 +
   1.201 +UnicodeString&
   1.202 +PluralFormat::format(const Formattable& obj,
   1.203 +                   UnicodeString& appendTo,
   1.204 +                   FieldPosition& pos,
   1.205 +                   UErrorCode& status) const
   1.206 +{
   1.207 +    if (U_FAILURE(status)) return appendTo;
   1.208 +
   1.209 +    if (obj.isNumeric()) {
   1.210 +        return format(obj, obj.getDouble(), appendTo, pos, status);
   1.211 +    } else {
   1.212 +        status = U_ILLEGAL_ARGUMENT_ERROR;
   1.213 +        return appendTo;
   1.214 +    }
   1.215 +}
   1.216 +
   1.217 +UnicodeString
   1.218 +PluralFormat::format(int32_t number, UErrorCode& status) const {
   1.219 +    FieldPosition fpos(0);
   1.220 +    UnicodeString result;
   1.221 +    return format(Formattable(number), number, result, fpos, status);
   1.222 +}
   1.223 +
   1.224 +UnicodeString
   1.225 +PluralFormat::format(double number, UErrorCode& status) const {
   1.226 +    FieldPosition fpos(0);
   1.227 +    UnicodeString result;
   1.228 +    return format(Formattable(number), number, result, fpos, status);
   1.229 +}
   1.230 +
   1.231 +
   1.232 +UnicodeString&
   1.233 +PluralFormat::format(int32_t number,
   1.234 +                     UnicodeString& appendTo,
   1.235 +                     FieldPosition& pos,
   1.236 +                     UErrorCode& status) const {
   1.237 +    return format(Formattable(number), (double)number, appendTo, pos, status);
   1.238 +}
   1.239 +
   1.240 +UnicodeString&
   1.241 +PluralFormat::format(double number,
   1.242 +                     UnicodeString& appendTo,
   1.243 +                     FieldPosition& pos,
   1.244 +                     UErrorCode& status) const {
   1.245 +    return format(Formattable(number), (double)number, appendTo, pos, status);
   1.246 +}
   1.247 +
   1.248 +UnicodeString&
   1.249 +PluralFormat::format(const Formattable& numberObject, double number,
   1.250 +                     UnicodeString& appendTo,
   1.251 +                     FieldPosition& pos,
   1.252 +                     UErrorCode& status) const {
   1.253 +    if (U_FAILURE(status)) {
   1.254 +        return appendTo;
   1.255 +    }
   1.256 +    if (msgPattern.countParts() == 0) {
   1.257 +        return numberFormat->format(numberObject, appendTo, pos, status);
   1.258 +    }
   1.259 +    // Get the appropriate sub-message.
   1.260 +    // Select it based on the formatted number-offset.
   1.261 +    double numberMinusOffset = number - offset;
   1.262 +    UnicodeString numberString;
   1.263 +    FieldPosition ignorePos;
   1.264 +    FixedDecimal dec(numberMinusOffset);
   1.265 +    if (offset == 0) {
   1.266 +        numberFormat->format(numberObject, numberString, ignorePos, status);  // could be BigDecimal etc.
   1.267 +        DecimalFormat *decFmt = dynamic_cast<DecimalFormat *>(numberFormat);
   1.268 +        if(decFmt != NULL) {
   1.269 +            dec = decFmt->getFixedDecimal(numberObject, status);
   1.270 +        }
   1.271 +    } else {
   1.272 +        numberFormat->format(numberMinusOffset, numberString, ignorePos, status);
   1.273 +        DecimalFormat *decFmt = dynamic_cast<DecimalFormat *>(numberFormat);
   1.274 +        if(decFmt != NULL) {
   1.275 +            dec = decFmt->getFixedDecimal(numberMinusOffset, status);
   1.276 +        }
   1.277 +    }
   1.278 +    int32_t partIndex = findSubMessage(msgPattern, 0, pluralRulesWrapper, &dec, number, status);
   1.279 +    if (U_FAILURE(status)) { return appendTo; }
   1.280 +    // Replace syntactic # signs in the top level of this sub-message
   1.281 +    // (not in nested arguments) with the formatted number-offset.
   1.282 +    const UnicodeString& pattern = msgPattern.getPatternString();
   1.283 +    int32_t prevIndex = msgPattern.getPart(partIndex).getLimit();
   1.284 +    for (;;) {
   1.285 +        const MessagePattern::Part& part = msgPattern.getPart(++partIndex);
   1.286 +        const UMessagePatternPartType type = part.getType();
   1.287 +        int32_t index = part.getIndex();
   1.288 +        if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) {
   1.289 +            return appendTo.append(pattern, prevIndex, index - prevIndex);
   1.290 +        } else if ((type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) ||
   1.291 +            (type == UMSGPAT_PART_TYPE_SKIP_SYNTAX && MessageImpl::jdkAposMode(msgPattern))) {
   1.292 +            appendTo.append(pattern, prevIndex, index - prevIndex);
   1.293 +            if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) {
   1.294 +                appendTo.append(numberString);
   1.295 +            }
   1.296 +            prevIndex = part.getLimit();
   1.297 +        } else if (type == UMSGPAT_PART_TYPE_ARG_START) {
   1.298 +            appendTo.append(pattern, prevIndex, index - prevIndex);
   1.299 +            prevIndex = index;
   1.300 +            partIndex = msgPattern.getLimitPartIndex(partIndex);
   1.301 +            index = msgPattern.getPart(partIndex).getLimit();
   1.302 +            MessageImpl::appendReducedApostrophes(pattern, prevIndex, index, appendTo);
   1.303 +            prevIndex = index;
   1.304 +        }
   1.305 +    }
   1.306 +}
   1.307 +
   1.308 +UnicodeString&
   1.309 +PluralFormat::toPattern(UnicodeString& appendTo) {
   1.310 +    if (0 == msgPattern.countParts()) {
   1.311 +        appendTo.setToBogus();
   1.312 +    } else {
   1.313 +        appendTo.append(msgPattern.getPatternString());
   1.314 +    }
   1.315 +    return appendTo;
   1.316 +}
   1.317 +
   1.318 +void
   1.319 +PluralFormat::setLocale(const Locale& loc, UErrorCode& status) {
   1.320 +    if (U_FAILURE(status)) {
   1.321 +        return;
   1.322 +    }
   1.323 +    locale = loc;
   1.324 +    msgPattern.clear();
   1.325 +    delete numberFormat;
   1.326 +    offset = 0;
   1.327 +    numberFormat = NULL;
   1.328 +    pluralRulesWrapper.reset();
   1.329 +    init(NULL, UPLURAL_TYPE_CARDINAL, status);
   1.330 +}
   1.331 +
   1.332 +void
   1.333 +PluralFormat::setNumberFormat(const NumberFormat* format, UErrorCode& status) {
   1.334 +    if (U_FAILURE(status)) {
   1.335 +        return;
   1.336 +    }
   1.337 +    NumberFormat* nf = (NumberFormat*)format->clone();
   1.338 +    if (nf != NULL) {
   1.339 +        delete numberFormat;
   1.340 +        numberFormat = nf;
   1.341 +    } else {
   1.342 +        status = U_MEMORY_ALLOCATION_ERROR;
   1.343 +    }
   1.344 +}
   1.345 +
   1.346 +Format*
   1.347 +PluralFormat::clone() const
   1.348 +{
   1.349 +    return new PluralFormat(*this);
   1.350 +}
   1.351 +
   1.352 +
   1.353 +PluralFormat&
   1.354 +PluralFormat::operator=(const PluralFormat& other) {
   1.355 +    if (this != &other) {
   1.356 +        locale = other.locale;
   1.357 +        msgPattern = other.msgPattern;
   1.358 +        offset = other.offset;
   1.359 +        copyObjects(other);
   1.360 +    }
   1.361 +
   1.362 +    return *this;
   1.363 +}
   1.364 +
   1.365 +UBool
   1.366 +PluralFormat::operator==(const Format& other) const {
   1.367 +    if (this == &other) {
   1.368 +        return TRUE;
   1.369 +    }
   1.370 +    if (!Format::operator==(other)) {
   1.371 +        return FALSE;
   1.372 +    }
   1.373 +    const PluralFormat& o = (const PluralFormat&)other;
   1.374 +    return
   1.375 +        locale == o.locale &&
   1.376 +        msgPattern == o.msgPattern &&  // implies same offset
   1.377 +        (numberFormat == NULL) == (o.numberFormat == NULL) &&
   1.378 +        (numberFormat == NULL || *numberFormat == *o.numberFormat) &&
   1.379 +        (pluralRulesWrapper.pluralRules == NULL) == (o.pluralRulesWrapper.pluralRules == NULL) &&
   1.380 +        (pluralRulesWrapper.pluralRules == NULL ||
   1.381 +            *pluralRulesWrapper.pluralRules == *o.pluralRulesWrapper.pluralRules);
   1.382 +}
   1.383 +
   1.384 +UBool
   1.385 +PluralFormat::operator!=(const Format& other) const {
   1.386 +    return  !operator==(other);
   1.387 +}
   1.388 +
   1.389 +void
   1.390 +PluralFormat::parseObject(const UnicodeString& /*source*/,
   1.391 +                        Formattable& /*result*/,
   1.392 +                        ParsePosition& pos) const
   1.393 +{
   1.394 +    // Parsing not supported.
   1.395 +    pos.setErrorIndex(pos.getIndex());
   1.396 +}
   1.397 +
   1.398 +int32_t PluralFormat::findSubMessage(const MessagePattern& pattern, int32_t partIndex,
   1.399 +                                     const PluralSelector& selector, void *context,
   1.400 +                                     double number, UErrorCode& ec) {
   1.401 +    if (U_FAILURE(ec)) {
   1.402 +        return 0;
   1.403 +    }
   1.404 +    int32_t count=pattern.countParts();
   1.405 +    double offset;
   1.406 +    const MessagePattern::Part* part=&pattern.getPart(partIndex);
   1.407 +    if (MessagePattern::Part::hasNumericValue(part->getType())) {
   1.408 +        offset=pattern.getNumericValue(*part);
   1.409 +        ++partIndex;
   1.410 +    } else {
   1.411 +        offset=0;
   1.412 +    }
   1.413 +    // The keyword is empty until we need to match against a non-explicit, not-"other" value.
   1.414 +    // Then we get the keyword from the selector.
   1.415 +    // (In other words, we never call the selector if we match against an explicit value,
   1.416 +    // or if the only non-explicit keyword is "other".)
   1.417 +    UnicodeString keyword;
   1.418 +    UnicodeString other(FALSE, OTHER_STRING, 5);
   1.419 +    // When we find a match, we set msgStart>0 and also set this boolean to true
   1.420 +    // to avoid matching the keyword again (duplicates are allowed)
   1.421 +    // while we continue to look for an explicit-value match.
   1.422 +    UBool haveKeywordMatch=FALSE;
   1.423 +    // msgStart is 0 until we find any appropriate sub-message.
   1.424 +    // We remember the first "other" sub-message if we have not seen any
   1.425 +    // appropriate sub-message before.
   1.426 +    // We remember the first matching-keyword sub-message if we have not seen
   1.427 +    // one of those before.
   1.428 +    // (The parser allows [does not check for] duplicate keywords.
   1.429 +    // We just have to make sure to take the first one.)
   1.430 +    // We avoid matching the keyword twice by also setting haveKeywordMatch=true
   1.431 +    // at the first keyword match.
   1.432 +    // We keep going until we find an explicit-value match or reach the end of the plural style.
   1.433 +    int32_t msgStart=0;
   1.434 +    // Iterate over (ARG_SELECTOR [ARG_INT|ARG_DOUBLE] message) tuples
   1.435 +    // until ARG_LIMIT or end of plural-only pattern.
   1.436 +    do {
   1.437 +        part=&pattern.getPart(partIndex++);
   1.438 +        const UMessagePatternPartType type = part->getType();
   1.439 +        if(type==UMSGPAT_PART_TYPE_ARG_LIMIT) {
   1.440 +            break;
   1.441 +        }
   1.442 +        U_ASSERT (type==UMSGPAT_PART_TYPE_ARG_SELECTOR);
   1.443 +        // part is an ARG_SELECTOR followed by an optional explicit value, and then a message
   1.444 +        if(MessagePattern::Part::hasNumericValue(pattern.getPartType(partIndex))) {
   1.445 +            // explicit value like "=2"
   1.446 +            part=&pattern.getPart(partIndex++);
   1.447 +            if(number==pattern.getNumericValue(*part)) {
   1.448 +                // matches explicit value
   1.449 +                return partIndex;
   1.450 +            }
   1.451 +        } else if(!haveKeywordMatch) {
   1.452 +            // plural keyword like "few" or "other"
   1.453 +            // Compare "other" first and call the selector if this is not "other".
   1.454 +            if(pattern.partSubstringMatches(*part, other)) {
   1.455 +                if(msgStart==0) {
   1.456 +                    msgStart=partIndex;
   1.457 +                    if(0 == keyword.compare(other)) {
   1.458 +                        // This is the first "other" sub-message,
   1.459 +                        // and the selected keyword is also "other".
   1.460 +                        // Do not match "other" again.
   1.461 +                        haveKeywordMatch=TRUE;
   1.462 +                    }
   1.463 +                }
   1.464 +            } else {
   1.465 +                if(keyword.isEmpty()) {
   1.466 +                    keyword=selector.select(context, number-offset, ec);
   1.467 +                    if(msgStart!=0 && (0 == keyword.compare(other))) {
   1.468 +                        // We have already seen an "other" sub-message.
   1.469 +                        // Do not match "other" again.
   1.470 +                        haveKeywordMatch=TRUE;
   1.471 +                        // Skip keyword matching but do getLimitPartIndex().
   1.472 +                    }
   1.473 +                }
   1.474 +                if(!haveKeywordMatch && pattern.partSubstringMatches(*part, keyword)) {
   1.475 +                    // keyword matches
   1.476 +                    msgStart=partIndex;
   1.477 +                    // Do not match this keyword again.
   1.478 +                    haveKeywordMatch=TRUE;
   1.479 +                }
   1.480 +            }
   1.481 +        }
   1.482 +        partIndex=pattern.getLimitPartIndex(partIndex);
   1.483 +    } while(++partIndex<count);
   1.484 +    return msgStart;
   1.485 +}
   1.486 +
   1.487 +PluralFormat::PluralSelector::~PluralSelector() {}
   1.488 +
   1.489 +PluralFormat::PluralSelectorAdapter::~PluralSelectorAdapter() {
   1.490 +    delete pluralRules;
   1.491 +}
   1.492 +
   1.493 +UnicodeString PluralFormat::PluralSelectorAdapter::select(void *context, double number,
   1.494 +                                                          UErrorCode& /*ec*/) const {
   1.495 +    (void)number;  // unused except in the assertion
   1.496 +    FixedDecimal *dec=static_cast<FixedDecimal *>(context);
   1.497 +    U_ASSERT(dec->source==number);
   1.498 +    return pluralRules->select(*dec);
   1.499 +}
   1.500 +
   1.501 +void PluralFormat::PluralSelectorAdapter::reset() {
   1.502 +    delete pluralRules;
   1.503 +    pluralRules = NULL;
   1.504 +}
   1.505 +
   1.506 +
   1.507 +U_NAMESPACE_END
   1.508 +
   1.509 +
   1.510 +#endif /* #if !UCONFIG_NO_FORMATTING */
   1.511 +
   1.512 +//eof

mercurial