1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/i18n/plurfmt.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,509 @@ 1.4 +/* 1.5 +******************************************************************************* 1.6 +* Copyright (C) 2009-2013, International Business Machines Corporation and 1.7 +* others. All Rights Reserved. 1.8 +******************************************************************************* 1.9 +* 1.10 +* File PLURFMT.CPP 1.11 +******************************************************************************* 1.12 +*/ 1.13 + 1.14 +#include "unicode/decimfmt.h" 1.15 +#include "unicode/messagepattern.h" 1.16 +#include "unicode/plurfmt.h" 1.17 +#include "unicode/plurrule.h" 1.18 +#include "unicode/utypes.h" 1.19 +#include "cmemory.h" 1.20 +#include "messageimpl.h" 1.21 +#include "plurrule_impl.h" 1.22 +#include "uassert.h" 1.23 +#include "uhash.h" 1.24 + 1.25 +#if !UCONFIG_NO_FORMATTING 1.26 + 1.27 +U_NAMESPACE_BEGIN 1.28 + 1.29 +static const UChar OTHER_STRING[] = { 1.30 + 0x6F, 0x74, 0x68, 0x65, 0x72, 0 // "other" 1.31 +}; 1.32 + 1.33 +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralFormat) 1.34 + 1.35 +PluralFormat::PluralFormat(UErrorCode& status) 1.36 + : locale(Locale::getDefault()), 1.37 + msgPattern(status), 1.38 + numberFormat(NULL), 1.39 + offset(0) { 1.40 + init(NULL, UPLURAL_TYPE_CARDINAL, status); 1.41 +} 1.42 + 1.43 +PluralFormat::PluralFormat(const Locale& loc, UErrorCode& status) 1.44 + : locale(loc), 1.45 + msgPattern(status), 1.46 + numberFormat(NULL), 1.47 + offset(0) { 1.48 + init(NULL, UPLURAL_TYPE_CARDINAL, status); 1.49 +} 1.50 + 1.51 +PluralFormat::PluralFormat(const PluralRules& rules, UErrorCode& status) 1.52 + : locale(Locale::getDefault()), 1.53 + msgPattern(status), 1.54 + numberFormat(NULL), 1.55 + offset(0) { 1.56 + init(&rules, UPLURAL_TYPE_COUNT, status); 1.57 +} 1.58 + 1.59 +PluralFormat::PluralFormat(const Locale& loc, 1.60 + const PluralRules& rules, 1.61 + UErrorCode& status) 1.62 + : locale(loc), 1.63 + msgPattern(status), 1.64 + numberFormat(NULL), 1.65 + offset(0) { 1.66 + init(&rules, UPLURAL_TYPE_COUNT, status); 1.67 +} 1.68 + 1.69 +PluralFormat::PluralFormat(const Locale& loc, 1.70 + UPluralType type, 1.71 + UErrorCode& status) 1.72 + : locale(loc), 1.73 + msgPattern(status), 1.74 + numberFormat(NULL), 1.75 + offset(0) { 1.76 + init(NULL, type, status); 1.77 +} 1.78 + 1.79 +PluralFormat::PluralFormat(const UnicodeString& pat, 1.80 + UErrorCode& status) 1.81 + : locale(Locale::getDefault()), 1.82 + msgPattern(status), 1.83 + numberFormat(NULL), 1.84 + offset(0) { 1.85 + init(NULL, UPLURAL_TYPE_CARDINAL, status); 1.86 + applyPattern(pat, status); 1.87 +} 1.88 + 1.89 +PluralFormat::PluralFormat(const Locale& loc, 1.90 + const UnicodeString& pat, 1.91 + UErrorCode& status) 1.92 + : locale(loc), 1.93 + msgPattern(status), 1.94 + numberFormat(NULL), 1.95 + offset(0) { 1.96 + init(NULL, UPLURAL_TYPE_CARDINAL, status); 1.97 + applyPattern(pat, status); 1.98 +} 1.99 + 1.100 +PluralFormat::PluralFormat(const PluralRules& rules, 1.101 + const UnicodeString& pat, 1.102 + UErrorCode& status) 1.103 + : locale(Locale::getDefault()), 1.104 + msgPattern(status), 1.105 + numberFormat(NULL), 1.106 + offset(0) { 1.107 + init(&rules, UPLURAL_TYPE_COUNT, status); 1.108 + applyPattern(pat, status); 1.109 +} 1.110 + 1.111 +PluralFormat::PluralFormat(const Locale& loc, 1.112 + const PluralRules& rules, 1.113 + const UnicodeString& pat, 1.114 + UErrorCode& status) 1.115 + : locale(loc), 1.116 + msgPattern(status), 1.117 + numberFormat(NULL), 1.118 + offset(0) { 1.119 + init(&rules, UPLURAL_TYPE_COUNT, status); 1.120 + applyPattern(pat, status); 1.121 +} 1.122 + 1.123 +PluralFormat::PluralFormat(const Locale& loc, 1.124 + UPluralType type, 1.125 + const UnicodeString& pat, 1.126 + UErrorCode& status) 1.127 + : locale(loc), 1.128 + msgPattern(status), 1.129 + numberFormat(NULL), 1.130 + offset(0) { 1.131 + init(NULL, type, status); 1.132 + applyPattern(pat, status); 1.133 +} 1.134 + 1.135 +PluralFormat::PluralFormat(const PluralFormat& other) 1.136 + : Format(other), 1.137 + locale(other.locale), 1.138 + msgPattern(other.msgPattern), 1.139 + numberFormat(NULL), 1.140 + offset(other.offset) { 1.141 + copyObjects(other); 1.142 +} 1.143 + 1.144 +void 1.145 +PluralFormat::copyObjects(const PluralFormat& other) { 1.146 + UErrorCode status = U_ZERO_ERROR; 1.147 + if (numberFormat != NULL) { 1.148 + delete numberFormat; 1.149 + } 1.150 + if (pluralRulesWrapper.pluralRules != NULL) { 1.151 + delete pluralRulesWrapper.pluralRules; 1.152 + } 1.153 + 1.154 + if (other.numberFormat == NULL) { 1.155 + numberFormat = NumberFormat::createInstance(locale, status); 1.156 + } else { 1.157 + numberFormat = (NumberFormat*)other.numberFormat->clone(); 1.158 + } 1.159 + if (other.pluralRulesWrapper.pluralRules == NULL) { 1.160 + pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, status); 1.161 + } else { 1.162 + pluralRulesWrapper.pluralRules = other.pluralRulesWrapper.pluralRules->clone(); 1.163 + } 1.164 +} 1.165 + 1.166 + 1.167 +PluralFormat::~PluralFormat() { 1.168 + delete numberFormat; 1.169 +} 1.170 + 1.171 +void 1.172 +PluralFormat::init(const PluralRules* rules, UPluralType type, UErrorCode& status) { 1.173 + if (U_FAILURE(status)) { 1.174 + return; 1.175 + } 1.176 + 1.177 + if (rules==NULL) { 1.178 + pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, type, status); 1.179 + } else { 1.180 + pluralRulesWrapper.pluralRules = rules->clone(); 1.181 + if (pluralRulesWrapper.pluralRules == NULL) { 1.182 + status = U_MEMORY_ALLOCATION_ERROR; 1.183 + return; 1.184 + } 1.185 + } 1.186 + 1.187 + numberFormat= NumberFormat::createInstance(locale, status); 1.188 +} 1.189 + 1.190 +void 1.191 +PluralFormat::applyPattern(const UnicodeString& newPattern, UErrorCode& status) { 1.192 + msgPattern.parsePluralStyle(newPattern, NULL, status); 1.193 + if (U_FAILURE(status)) { 1.194 + msgPattern.clear(); 1.195 + offset = 0; 1.196 + return; 1.197 + } 1.198 + offset = msgPattern.getPluralOffset(0); 1.199 +} 1.200 + 1.201 +UnicodeString& 1.202 +PluralFormat::format(const Formattable& obj, 1.203 + UnicodeString& appendTo, 1.204 + FieldPosition& pos, 1.205 + UErrorCode& status) const 1.206 +{ 1.207 + if (U_FAILURE(status)) return appendTo; 1.208 + 1.209 + if (obj.isNumeric()) { 1.210 + return format(obj, obj.getDouble(), appendTo, pos, status); 1.211 + } else { 1.212 + status = U_ILLEGAL_ARGUMENT_ERROR; 1.213 + return appendTo; 1.214 + } 1.215 +} 1.216 + 1.217 +UnicodeString 1.218 +PluralFormat::format(int32_t number, UErrorCode& status) const { 1.219 + FieldPosition fpos(0); 1.220 + UnicodeString result; 1.221 + return format(Formattable(number), number, result, fpos, status); 1.222 +} 1.223 + 1.224 +UnicodeString 1.225 +PluralFormat::format(double number, UErrorCode& status) const { 1.226 + FieldPosition fpos(0); 1.227 + UnicodeString result; 1.228 + return format(Formattable(number), number, result, fpos, status); 1.229 +} 1.230 + 1.231 + 1.232 +UnicodeString& 1.233 +PluralFormat::format(int32_t number, 1.234 + UnicodeString& appendTo, 1.235 + FieldPosition& pos, 1.236 + UErrorCode& status) const { 1.237 + return format(Formattable(number), (double)number, appendTo, pos, status); 1.238 +} 1.239 + 1.240 +UnicodeString& 1.241 +PluralFormat::format(double number, 1.242 + UnicodeString& appendTo, 1.243 + FieldPosition& pos, 1.244 + UErrorCode& status) const { 1.245 + return format(Formattable(number), (double)number, appendTo, pos, status); 1.246 +} 1.247 + 1.248 +UnicodeString& 1.249 +PluralFormat::format(const Formattable& numberObject, double number, 1.250 + UnicodeString& appendTo, 1.251 + FieldPosition& pos, 1.252 + UErrorCode& status) const { 1.253 + if (U_FAILURE(status)) { 1.254 + return appendTo; 1.255 + } 1.256 + if (msgPattern.countParts() == 0) { 1.257 + return numberFormat->format(numberObject, appendTo, pos, status); 1.258 + } 1.259 + // Get the appropriate sub-message. 1.260 + // Select it based on the formatted number-offset. 1.261 + double numberMinusOffset = number - offset; 1.262 + UnicodeString numberString; 1.263 + FieldPosition ignorePos; 1.264 + FixedDecimal dec(numberMinusOffset); 1.265 + if (offset == 0) { 1.266 + numberFormat->format(numberObject, numberString, ignorePos, status); // could be BigDecimal etc. 1.267 + DecimalFormat *decFmt = dynamic_cast<DecimalFormat *>(numberFormat); 1.268 + if(decFmt != NULL) { 1.269 + dec = decFmt->getFixedDecimal(numberObject, status); 1.270 + } 1.271 + } else { 1.272 + numberFormat->format(numberMinusOffset, numberString, ignorePos, status); 1.273 + DecimalFormat *decFmt = dynamic_cast<DecimalFormat *>(numberFormat); 1.274 + if(decFmt != NULL) { 1.275 + dec = decFmt->getFixedDecimal(numberMinusOffset, status); 1.276 + } 1.277 + } 1.278 + int32_t partIndex = findSubMessage(msgPattern, 0, pluralRulesWrapper, &dec, number, status); 1.279 + if (U_FAILURE(status)) { return appendTo; } 1.280 + // Replace syntactic # signs in the top level of this sub-message 1.281 + // (not in nested arguments) with the formatted number-offset. 1.282 + const UnicodeString& pattern = msgPattern.getPatternString(); 1.283 + int32_t prevIndex = msgPattern.getPart(partIndex).getLimit(); 1.284 + for (;;) { 1.285 + const MessagePattern::Part& part = msgPattern.getPart(++partIndex); 1.286 + const UMessagePatternPartType type = part.getType(); 1.287 + int32_t index = part.getIndex(); 1.288 + if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) { 1.289 + return appendTo.append(pattern, prevIndex, index - prevIndex); 1.290 + } else if ((type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) || 1.291 + (type == UMSGPAT_PART_TYPE_SKIP_SYNTAX && MessageImpl::jdkAposMode(msgPattern))) { 1.292 + appendTo.append(pattern, prevIndex, index - prevIndex); 1.293 + if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) { 1.294 + appendTo.append(numberString); 1.295 + } 1.296 + prevIndex = part.getLimit(); 1.297 + } else if (type == UMSGPAT_PART_TYPE_ARG_START) { 1.298 + appendTo.append(pattern, prevIndex, index - prevIndex); 1.299 + prevIndex = index; 1.300 + partIndex = msgPattern.getLimitPartIndex(partIndex); 1.301 + index = msgPattern.getPart(partIndex).getLimit(); 1.302 + MessageImpl::appendReducedApostrophes(pattern, prevIndex, index, appendTo); 1.303 + prevIndex = index; 1.304 + } 1.305 + } 1.306 +} 1.307 + 1.308 +UnicodeString& 1.309 +PluralFormat::toPattern(UnicodeString& appendTo) { 1.310 + if (0 == msgPattern.countParts()) { 1.311 + appendTo.setToBogus(); 1.312 + } else { 1.313 + appendTo.append(msgPattern.getPatternString()); 1.314 + } 1.315 + return appendTo; 1.316 +} 1.317 + 1.318 +void 1.319 +PluralFormat::setLocale(const Locale& loc, UErrorCode& status) { 1.320 + if (U_FAILURE(status)) { 1.321 + return; 1.322 + } 1.323 + locale = loc; 1.324 + msgPattern.clear(); 1.325 + delete numberFormat; 1.326 + offset = 0; 1.327 + numberFormat = NULL; 1.328 + pluralRulesWrapper.reset(); 1.329 + init(NULL, UPLURAL_TYPE_CARDINAL, status); 1.330 +} 1.331 + 1.332 +void 1.333 +PluralFormat::setNumberFormat(const NumberFormat* format, UErrorCode& status) { 1.334 + if (U_FAILURE(status)) { 1.335 + return; 1.336 + } 1.337 + NumberFormat* nf = (NumberFormat*)format->clone(); 1.338 + if (nf != NULL) { 1.339 + delete numberFormat; 1.340 + numberFormat = nf; 1.341 + } else { 1.342 + status = U_MEMORY_ALLOCATION_ERROR; 1.343 + } 1.344 +} 1.345 + 1.346 +Format* 1.347 +PluralFormat::clone() const 1.348 +{ 1.349 + return new PluralFormat(*this); 1.350 +} 1.351 + 1.352 + 1.353 +PluralFormat& 1.354 +PluralFormat::operator=(const PluralFormat& other) { 1.355 + if (this != &other) { 1.356 + locale = other.locale; 1.357 + msgPattern = other.msgPattern; 1.358 + offset = other.offset; 1.359 + copyObjects(other); 1.360 + } 1.361 + 1.362 + return *this; 1.363 +} 1.364 + 1.365 +UBool 1.366 +PluralFormat::operator==(const Format& other) const { 1.367 + if (this == &other) { 1.368 + return TRUE; 1.369 + } 1.370 + if (!Format::operator==(other)) { 1.371 + return FALSE; 1.372 + } 1.373 + const PluralFormat& o = (const PluralFormat&)other; 1.374 + return 1.375 + locale == o.locale && 1.376 + msgPattern == o.msgPattern && // implies same offset 1.377 + (numberFormat == NULL) == (o.numberFormat == NULL) && 1.378 + (numberFormat == NULL || *numberFormat == *o.numberFormat) && 1.379 + (pluralRulesWrapper.pluralRules == NULL) == (o.pluralRulesWrapper.pluralRules == NULL) && 1.380 + (pluralRulesWrapper.pluralRules == NULL || 1.381 + *pluralRulesWrapper.pluralRules == *o.pluralRulesWrapper.pluralRules); 1.382 +} 1.383 + 1.384 +UBool 1.385 +PluralFormat::operator!=(const Format& other) const { 1.386 + return !operator==(other); 1.387 +} 1.388 + 1.389 +void 1.390 +PluralFormat::parseObject(const UnicodeString& /*source*/, 1.391 + Formattable& /*result*/, 1.392 + ParsePosition& pos) const 1.393 +{ 1.394 + // Parsing not supported. 1.395 + pos.setErrorIndex(pos.getIndex()); 1.396 +} 1.397 + 1.398 +int32_t PluralFormat::findSubMessage(const MessagePattern& pattern, int32_t partIndex, 1.399 + const PluralSelector& selector, void *context, 1.400 + double number, UErrorCode& ec) { 1.401 + if (U_FAILURE(ec)) { 1.402 + return 0; 1.403 + } 1.404 + int32_t count=pattern.countParts(); 1.405 + double offset; 1.406 + const MessagePattern::Part* part=&pattern.getPart(partIndex); 1.407 + if (MessagePattern::Part::hasNumericValue(part->getType())) { 1.408 + offset=pattern.getNumericValue(*part); 1.409 + ++partIndex; 1.410 + } else { 1.411 + offset=0; 1.412 + } 1.413 + // The keyword is empty until we need to match against a non-explicit, not-"other" value. 1.414 + // Then we get the keyword from the selector. 1.415 + // (In other words, we never call the selector if we match against an explicit value, 1.416 + // or if the only non-explicit keyword is "other".) 1.417 + UnicodeString keyword; 1.418 + UnicodeString other(FALSE, OTHER_STRING, 5); 1.419 + // When we find a match, we set msgStart>0 and also set this boolean to true 1.420 + // to avoid matching the keyword again (duplicates are allowed) 1.421 + // while we continue to look for an explicit-value match. 1.422 + UBool haveKeywordMatch=FALSE; 1.423 + // msgStart is 0 until we find any appropriate sub-message. 1.424 + // We remember the first "other" sub-message if we have not seen any 1.425 + // appropriate sub-message before. 1.426 + // We remember the first matching-keyword sub-message if we have not seen 1.427 + // one of those before. 1.428 + // (The parser allows [does not check for] duplicate keywords. 1.429 + // We just have to make sure to take the first one.) 1.430 + // We avoid matching the keyword twice by also setting haveKeywordMatch=true 1.431 + // at the first keyword match. 1.432 + // We keep going until we find an explicit-value match or reach the end of the plural style. 1.433 + int32_t msgStart=0; 1.434 + // Iterate over (ARG_SELECTOR [ARG_INT|ARG_DOUBLE] message) tuples 1.435 + // until ARG_LIMIT or end of plural-only pattern. 1.436 + do { 1.437 + part=&pattern.getPart(partIndex++); 1.438 + const UMessagePatternPartType type = part->getType(); 1.439 + if(type==UMSGPAT_PART_TYPE_ARG_LIMIT) { 1.440 + break; 1.441 + } 1.442 + U_ASSERT (type==UMSGPAT_PART_TYPE_ARG_SELECTOR); 1.443 + // part is an ARG_SELECTOR followed by an optional explicit value, and then a message 1.444 + if(MessagePattern::Part::hasNumericValue(pattern.getPartType(partIndex))) { 1.445 + // explicit value like "=2" 1.446 + part=&pattern.getPart(partIndex++); 1.447 + if(number==pattern.getNumericValue(*part)) { 1.448 + // matches explicit value 1.449 + return partIndex; 1.450 + } 1.451 + } else if(!haveKeywordMatch) { 1.452 + // plural keyword like "few" or "other" 1.453 + // Compare "other" first and call the selector if this is not "other". 1.454 + if(pattern.partSubstringMatches(*part, other)) { 1.455 + if(msgStart==0) { 1.456 + msgStart=partIndex; 1.457 + if(0 == keyword.compare(other)) { 1.458 + // This is the first "other" sub-message, 1.459 + // and the selected keyword is also "other". 1.460 + // Do not match "other" again. 1.461 + haveKeywordMatch=TRUE; 1.462 + } 1.463 + } 1.464 + } else { 1.465 + if(keyword.isEmpty()) { 1.466 + keyword=selector.select(context, number-offset, ec); 1.467 + if(msgStart!=0 && (0 == keyword.compare(other))) { 1.468 + // We have already seen an "other" sub-message. 1.469 + // Do not match "other" again. 1.470 + haveKeywordMatch=TRUE; 1.471 + // Skip keyword matching but do getLimitPartIndex(). 1.472 + } 1.473 + } 1.474 + if(!haveKeywordMatch && pattern.partSubstringMatches(*part, keyword)) { 1.475 + // keyword matches 1.476 + msgStart=partIndex; 1.477 + // Do not match this keyword again. 1.478 + haveKeywordMatch=TRUE; 1.479 + } 1.480 + } 1.481 + } 1.482 + partIndex=pattern.getLimitPartIndex(partIndex); 1.483 + } while(++partIndex<count); 1.484 + return msgStart; 1.485 +} 1.486 + 1.487 +PluralFormat::PluralSelector::~PluralSelector() {} 1.488 + 1.489 +PluralFormat::PluralSelectorAdapter::~PluralSelectorAdapter() { 1.490 + delete pluralRules; 1.491 +} 1.492 + 1.493 +UnicodeString PluralFormat::PluralSelectorAdapter::select(void *context, double number, 1.494 + UErrorCode& /*ec*/) const { 1.495 + (void)number; // unused except in the assertion 1.496 + FixedDecimal *dec=static_cast<FixedDecimal *>(context); 1.497 + U_ASSERT(dec->source==number); 1.498 + return pluralRules->select(*dec); 1.499 +} 1.500 + 1.501 +void PluralFormat::PluralSelectorAdapter::reset() { 1.502 + delete pluralRules; 1.503 + pluralRules = NULL; 1.504 +} 1.505 + 1.506 + 1.507 +U_NAMESPACE_END 1.508 + 1.509 + 1.510 +#endif /* #if !UCONFIG_NO_FORMATTING */ 1.511 + 1.512 +//eof