intl/icu/source/i18n/plurfmt.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /*
michael@0 2 *******************************************************************************
michael@0 3 * Copyright (C) 2009-2013, International Business Machines Corporation and
michael@0 4 * others. All Rights Reserved.
michael@0 5 *******************************************************************************
michael@0 6 *
michael@0 7 * File PLURFMT.CPP
michael@0 8 *******************************************************************************
michael@0 9 */
michael@0 10
michael@0 11 #include "unicode/decimfmt.h"
michael@0 12 #include "unicode/messagepattern.h"
michael@0 13 #include "unicode/plurfmt.h"
michael@0 14 #include "unicode/plurrule.h"
michael@0 15 #include "unicode/utypes.h"
michael@0 16 #include "cmemory.h"
michael@0 17 #include "messageimpl.h"
michael@0 18 #include "plurrule_impl.h"
michael@0 19 #include "uassert.h"
michael@0 20 #include "uhash.h"
michael@0 21
michael@0 22 #if !UCONFIG_NO_FORMATTING
michael@0 23
michael@0 24 U_NAMESPACE_BEGIN
michael@0 25
michael@0 26 static const UChar OTHER_STRING[] = {
michael@0 27 0x6F, 0x74, 0x68, 0x65, 0x72, 0 // "other"
michael@0 28 };
michael@0 29
michael@0 30 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralFormat)
michael@0 31
michael@0 32 PluralFormat::PluralFormat(UErrorCode& status)
michael@0 33 : locale(Locale::getDefault()),
michael@0 34 msgPattern(status),
michael@0 35 numberFormat(NULL),
michael@0 36 offset(0) {
michael@0 37 init(NULL, UPLURAL_TYPE_CARDINAL, status);
michael@0 38 }
michael@0 39
michael@0 40 PluralFormat::PluralFormat(const Locale& loc, UErrorCode& status)
michael@0 41 : locale(loc),
michael@0 42 msgPattern(status),
michael@0 43 numberFormat(NULL),
michael@0 44 offset(0) {
michael@0 45 init(NULL, UPLURAL_TYPE_CARDINAL, status);
michael@0 46 }
michael@0 47
michael@0 48 PluralFormat::PluralFormat(const PluralRules& rules, UErrorCode& status)
michael@0 49 : locale(Locale::getDefault()),
michael@0 50 msgPattern(status),
michael@0 51 numberFormat(NULL),
michael@0 52 offset(0) {
michael@0 53 init(&rules, UPLURAL_TYPE_COUNT, status);
michael@0 54 }
michael@0 55
michael@0 56 PluralFormat::PluralFormat(const Locale& loc,
michael@0 57 const PluralRules& rules,
michael@0 58 UErrorCode& status)
michael@0 59 : locale(loc),
michael@0 60 msgPattern(status),
michael@0 61 numberFormat(NULL),
michael@0 62 offset(0) {
michael@0 63 init(&rules, UPLURAL_TYPE_COUNT, status);
michael@0 64 }
michael@0 65
michael@0 66 PluralFormat::PluralFormat(const Locale& loc,
michael@0 67 UPluralType type,
michael@0 68 UErrorCode& status)
michael@0 69 : locale(loc),
michael@0 70 msgPattern(status),
michael@0 71 numberFormat(NULL),
michael@0 72 offset(0) {
michael@0 73 init(NULL, type, status);
michael@0 74 }
michael@0 75
michael@0 76 PluralFormat::PluralFormat(const UnicodeString& pat,
michael@0 77 UErrorCode& status)
michael@0 78 : locale(Locale::getDefault()),
michael@0 79 msgPattern(status),
michael@0 80 numberFormat(NULL),
michael@0 81 offset(0) {
michael@0 82 init(NULL, UPLURAL_TYPE_CARDINAL, status);
michael@0 83 applyPattern(pat, status);
michael@0 84 }
michael@0 85
michael@0 86 PluralFormat::PluralFormat(const Locale& loc,
michael@0 87 const UnicodeString& pat,
michael@0 88 UErrorCode& status)
michael@0 89 : locale(loc),
michael@0 90 msgPattern(status),
michael@0 91 numberFormat(NULL),
michael@0 92 offset(0) {
michael@0 93 init(NULL, UPLURAL_TYPE_CARDINAL, status);
michael@0 94 applyPattern(pat, status);
michael@0 95 }
michael@0 96
michael@0 97 PluralFormat::PluralFormat(const PluralRules& rules,
michael@0 98 const UnicodeString& pat,
michael@0 99 UErrorCode& status)
michael@0 100 : locale(Locale::getDefault()),
michael@0 101 msgPattern(status),
michael@0 102 numberFormat(NULL),
michael@0 103 offset(0) {
michael@0 104 init(&rules, UPLURAL_TYPE_COUNT, status);
michael@0 105 applyPattern(pat, status);
michael@0 106 }
michael@0 107
michael@0 108 PluralFormat::PluralFormat(const Locale& loc,
michael@0 109 const PluralRules& rules,
michael@0 110 const UnicodeString& pat,
michael@0 111 UErrorCode& status)
michael@0 112 : locale(loc),
michael@0 113 msgPattern(status),
michael@0 114 numberFormat(NULL),
michael@0 115 offset(0) {
michael@0 116 init(&rules, UPLURAL_TYPE_COUNT, status);
michael@0 117 applyPattern(pat, status);
michael@0 118 }
michael@0 119
michael@0 120 PluralFormat::PluralFormat(const Locale& loc,
michael@0 121 UPluralType type,
michael@0 122 const UnicodeString& pat,
michael@0 123 UErrorCode& status)
michael@0 124 : locale(loc),
michael@0 125 msgPattern(status),
michael@0 126 numberFormat(NULL),
michael@0 127 offset(0) {
michael@0 128 init(NULL, type, status);
michael@0 129 applyPattern(pat, status);
michael@0 130 }
michael@0 131
michael@0 132 PluralFormat::PluralFormat(const PluralFormat& other)
michael@0 133 : Format(other),
michael@0 134 locale(other.locale),
michael@0 135 msgPattern(other.msgPattern),
michael@0 136 numberFormat(NULL),
michael@0 137 offset(other.offset) {
michael@0 138 copyObjects(other);
michael@0 139 }
michael@0 140
michael@0 141 void
michael@0 142 PluralFormat::copyObjects(const PluralFormat& other) {
michael@0 143 UErrorCode status = U_ZERO_ERROR;
michael@0 144 if (numberFormat != NULL) {
michael@0 145 delete numberFormat;
michael@0 146 }
michael@0 147 if (pluralRulesWrapper.pluralRules != NULL) {
michael@0 148 delete pluralRulesWrapper.pluralRules;
michael@0 149 }
michael@0 150
michael@0 151 if (other.numberFormat == NULL) {
michael@0 152 numberFormat = NumberFormat::createInstance(locale, status);
michael@0 153 } else {
michael@0 154 numberFormat = (NumberFormat*)other.numberFormat->clone();
michael@0 155 }
michael@0 156 if (other.pluralRulesWrapper.pluralRules == NULL) {
michael@0 157 pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, status);
michael@0 158 } else {
michael@0 159 pluralRulesWrapper.pluralRules = other.pluralRulesWrapper.pluralRules->clone();
michael@0 160 }
michael@0 161 }
michael@0 162
michael@0 163
michael@0 164 PluralFormat::~PluralFormat() {
michael@0 165 delete numberFormat;
michael@0 166 }
michael@0 167
michael@0 168 void
michael@0 169 PluralFormat::init(const PluralRules* rules, UPluralType type, UErrorCode& status) {
michael@0 170 if (U_FAILURE(status)) {
michael@0 171 return;
michael@0 172 }
michael@0 173
michael@0 174 if (rules==NULL) {
michael@0 175 pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, type, status);
michael@0 176 } else {
michael@0 177 pluralRulesWrapper.pluralRules = rules->clone();
michael@0 178 if (pluralRulesWrapper.pluralRules == NULL) {
michael@0 179 status = U_MEMORY_ALLOCATION_ERROR;
michael@0 180 return;
michael@0 181 }
michael@0 182 }
michael@0 183
michael@0 184 numberFormat= NumberFormat::createInstance(locale, status);
michael@0 185 }
michael@0 186
michael@0 187 void
michael@0 188 PluralFormat::applyPattern(const UnicodeString& newPattern, UErrorCode& status) {
michael@0 189 msgPattern.parsePluralStyle(newPattern, NULL, status);
michael@0 190 if (U_FAILURE(status)) {
michael@0 191 msgPattern.clear();
michael@0 192 offset = 0;
michael@0 193 return;
michael@0 194 }
michael@0 195 offset = msgPattern.getPluralOffset(0);
michael@0 196 }
michael@0 197
michael@0 198 UnicodeString&
michael@0 199 PluralFormat::format(const Formattable& obj,
michael@0 200 UnicodeString& appendTo,
michael@0 201 FieldPosition& pos,
michael@0 202 UErrorCode& status) const
michael@0 203 {
michael@0 204 if (U_FAILURE(status)) return appendTo;
michael@0 205
michael@0 206 if (obj.isNumeric()) {
michael@0 207 return format(obj, obj.getDouble(), appendTo, pos, status);
michael@0 208 } else {
michael@0 209 status = U_ILLEGAL_ARGUMENT_ERROR;
michael@0 210 return appendTo;
michael@0 211 }
michael@0 212 }
michael@0 213
michael@0 214 UnicodeString
michael@0 215 PluralFormat::format(int32_t number, UErrorCode& status) const {
michael@0 216 FieldPosition fpos(0);
michael@0 217 UnicodeString result;
michael@0 218 return format(Formattable(number), number, result, fpos, status);
michael@0 219 }
michael@0 220
michael@0 221 UnicodeString
michael@0 222 PluralFormat::format(double number, UErrorCode& status) const {
michael@0 223 FieldPosition fpos(0);
michael@0 224 UnicodeString result;
michael@0 225 return format(Formattable(number), number, result, fpos, status);
michael@0 226 }
michael@0 227
michael@0 228
michael@0 229 UnicodeString&
michael@0 230 PluralFormat::format(int32_t number,
michael@0 231 UnicodeString& appendTo,
michael@0 232 FieldPosition& pos,
michael@0 233 UErrorCode& status) const {
michael@0 234 return format(Formattable(number), (double)number, appendTo, pos, status);
michael@0 235 }
michael@0 236
michael@0 237 UnicodeString&
michael@0 238 PluralFormat::format(double number,
michael@0 239 UnicodeString& appendTo,
michael@0 240 FieldPosition& pos,
michael@0 241 UErrorCode& status) const {
michael@0 242 return format(Formattable(number), (double)number, appendTo, pos, status);
michael@0 243 }
michael@0 244
michael@0 245 UnicodeString&
michael@0 246 PluralFormat::format(const Formattable& numberObject, double number,
michael@0 247 UnicodeString& appendTo,
michael@0 248 FieldPosition& pos,
michael@0 249 UErrorCode& status) const {
michael@0 250 if (U_FAILURE(status)) {
michael@0 251 return appendTo;
michael@0 252 }
michael@0 253 if (msgPattern.countParts() == 0) {
michael@0 254 return numberFormat->format(numberObject, appendTo, pos, status);
michael@0 255 }
michael@0 256 // Get the appropriate sub-message.
michael@0 257 // Select it based on the formatted number-offset.
michael@0 258 double numberMinusOffset = number - offset;
michael@0 259 UnicodeString numberString;
michael@0 260 FieldPosition ignorePos;
michael@0 261 FixedDecimal dec(numberMinusOffset);
michael@0 262 if (offset == 0) {
michael@0 263 numberFormat->format(numberObject, numberString, ignorePos, status); // could be BigDecimal etc.
michael@0 264 DecimalFormat *decFmt = dynamic_cast<DecimalFormat *>(numberFormat);
michael@0 265 if(decFmt != NULL) {
michael@0 266 dec = decFmt->getFixedDecimal(numberObject, status);
michael@0 267 }
michael@0 268 } else {
michael@0 269 numberFormat->format(numberMinusOffset, numberString, ignorePos, status);
michael@0 270 DecimalFormat *decFmt = dynamic_cast<DecimalFormat *>(numberFormat);
michael@0 271 if(decFmt != NULL) {
michael@0 272 dec = decFmt->getFixedDecimal(numberMinusOffset, status);
michael@0 273 }
michael@0 274 }
michael@0 275 int32_t partIndex = findSubMessage(msgPattern, 0, pluralRulesWrapper, &dec, number, status);
michael@0 276 if (U_FAILURE(status)) { return appendTo; }
michael@0 277 // Replace syntactic # signs in the top level of this sub-message
michael@0 278 // (not in nested arguments) with the formatted number-offset.
michael@0 279 const UnicodeString& pattern = msgPattern.getPatternString();
michael@0 280 int32_t prevIndex = msgPattern.getPart(partIndex).getLimit();
michael@0 281 for (;;) {
michael@0 282 const MessagePattern::Part& part = msgPattern.getPart(++partIndex);
michael@0 283 const UMessagePatternPartType type = part.getType();
michael@0 284 int32_t index = part.getIndex();
michael@0 285 if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) {
michael@0 286 return appendTo.append(pattern, prevIndex, index - prevIndex);
michael@0 287 } else if ((type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) ||
michael@0 288 (type == UMSGPAT_PART_TYPE_SKIP_SYNTAX && MessageImpl::jdkAposMode(msgPattern))) {
michael@0 289 appendTo.append(pattern, prevIndex, index - prevIndex);
michael@0 290 if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) {
michael@0 291 appendTo.append(numberString);
michael@0 292 }
michael@0 293 prevIndex = part.getLimit();
michael@0 294 } else if (type == UMSGPAT_PART_TYPE_ARG_START) {
michael@0 295 appendTo.append(pattern, prevIndex, index - prevIndex);
michael@0 296 prevIndex = index;
michael@0 297 partIndex = msgPattern.getLimitPartIndex(partIndex);
michael@0 298 index = msgPattern.getPart(partIndex).getLimit();
michael@0 299 MessageImpl::appendReducedApostrophes(pattern, prevIndex, index, appendTo);
michael@0 300 prevIndex = index;
michael@0 301 }
michael@0 302 }
michael@0 303 }
michael@0 304
michael@0 305 UnicodeString&
michael@0 306 PluralFormat::toPattern(UnicodeString& appendTo) {
michael@0 307 if (0 == msgPattern.countParts()) {
michael@0 308 appendTo.setToBogus();
michael@0 309 } else {
michael@0 310 appendTo.append(msgPattern.getPatternString());
michael@0 311 }
michael@0 312 return appendTo;
michael@0 313 }
michael@0 314
michael@0 315 void
michael@0 316 PluralFormat::setLocale(const Locale& loc, UErrorCode& status) {
michael@0 317 if (U_FAILURE(status)) {
michael@0 318 return;
michael@0 319 }
michael@0 320 locale = loc;
michael@0 321 msgPattern.clear();
michael@0 322 delete numberFormat;
michael@0 323 offset = 0;
michael@0 324 numberFormat = NULL;
michael@0 325 pluralRulesWrapper.reset();
michael@0 326 init(NULL, UPLURAL_TYPE_CARDINAL, status);
michael@0 327 }
michael@0 328
michael@0 329 void
michael@0 330 PluralFormat::setNumberFormat(const NumberFormat* format, UErrorCode& status) {
michael@0 331 if (U_FAILURE(status)) {
michael@0 332 return;
michael@0 333 }
michael@0 334 NumberFormat* nf = (NumberFormat*)format->clone();
michael@0 335 if (nf != NULL) {
michael@0 336 delete numberFormat;
michael@0 337 numberFormat = nf;
michael@0 338 } else {
michael@0 339 status = U_MEMORY_ALLOCATION_ERROR;
michael@0 340 }
michael@0 341 }
michael@0 342
michael@0 343 Format*
michael@0 344 PluralFormat::clone() const
michael@0 345 {
michael@0 346 return new PluralFormat(*this);
michael@0 347 }
michael@0 348
michael@0 349
michael@0 350 PluralFormat&
michael@0 351 PluralFormat::operator=(const PluralFormat& other) {
michael@0 352 if (this != &other) {
michael@0 353 locale = other.locale;
michael@0 354 msgPattern = other.msgPattern;
michael@0 355 offset = other.offset;
michael@0 356 copyObjects(other);
michael@0 357 }
michael@0 358
michael@0 359 return *this;
michael@0 360 }
michael@0 361
michael@0 362 UBool
michael@0 363 PluralFormat::operator==(const Format& other) const {
michael@0 364 if (this == &other) {
michael@0 365 return TRUE;
michael@0 366 }
michael@0 367 if (!Format::operator==(other)) {
michael@0 368 return FALSE;
michael@0 369 }
michael@0 370 const PluralFormat& o = (const PluralFormat&)other;
michael@0 371 return
michael@0 372 locale == o.locale &&
michael@0 373 msgPattern == o.msgPattern && // implies same offset
michael@0 374 (numberFormat == NULL) == (o.numberFormat == NULL) &&
michael@0 375 (numberFormat == NULL || *numberFormat == *o.numberFormat) &&
michael@0 376 (pluralRulesWrapper.pluralRules == NULL) == (o.pluralRulesWrapper.pluralRules == NULL) &&
michael@0 377 (pluralRulesWrapper.pluralRules == NULL ||
michael@0 378 *pluralRulesWrapper.pluralRules == *o.pluralRulesWrapper.pluralRules);
michael@0 379 }
michael@0 380
michael@0 381 UBool
michael@0 382 PluralFormat::operator!=(const Format& other) const {
michael@0 383 return !operator==(other);
michael@0 384 }
michael@0 385
michael@0 386 void
michael@0 387 PluralFormat::parseObject(const UnicodeString& /*source*/,
michael@0 388 Formattable& /*result*/,
michael@0 389 ParsePosition& pos) const
michael@0 390 {
michael@0 391 // Parsing not supported.
michael@0 392 pos.setErrorIndex(pos.getIndex());
michael@0 393 }
michael@0 394
michael@0 395 int32_t PluralFormat::findSubMessage(const MessagePattern& pattern, int32_t partIndex,
michael@0 396 const PluralSelector& selector, void *context,
michael@0 397 double number, UErrorCode& ec) {
michael@0 398 if (U_FAILURE(ec)) {
michael@0 399 return 0;
michael@0 400 }
michael@0 401 int32_t count=pattern.countParts();
michael@0 402 double offset;
michael@0 403 const MessagePattern::Part* part=&pattern.getPart(partIndex);
michael@0 404 if (MessagePattern::Part::hasNumericValue(part->getType())) {
michael@0 405 offset=pattern.getNumericValue(*part);
michael@0 406 ++partIndex;
michael@0 407 } else {
michael@0 408 offset=0;
michael@0 409 }
michael@0 410 // The keyword is empty until we need to match against a non-explicit, not-"other" value.
michael@0 411 // Then we get the keyword from the selector.
michael@0 412 // (In other words, we never call the selector if we match against an explicit value,
michael@0 413 // or if the only non-explicit keyword is "other".)
michael@0 414 UnicodeString keyword;
michael@0 415 UnicodeString other(FALSE, OTHER_STRING, 5);
michael@0 416 // When we find a match, we set msgStart>0 and also set this boolean to true
michael@0 417 // to avoid matching the keyword again (duplicates are allowed)
michael@0 418 // while we continue to look for an explicit-value match.
michael@0 419 UBool haveKeywordMatch=FALSE;
michael@0 420 // msgStart is 0 until we find any appropriate sub-message.
michael@0 421 // We remember the first "other" sub-message if we have not seen any
michael@0 422 // appropriate sub-message before.
michael@0 423 // We remember the first matching-keyword sub-message if we have not seen
michael@0 424 // one of those before.
michael@0 425 // (The parser allows [does not check for] duplicate keywords.
michael@0 426 // We just have to make sure to take the first one.)
michael@0 427 // We avoid matching the keyword twice by also setting haveKeywordMatch=true
michael@0 428 // at the first keyword match.
michael@0 429 // We keep going until we find an explicit-value match or reach the end of the plural style.
michael@0 430 int32_t msgStart=0;
michael@0 431 // Iterate over (ARG_SELECTOR [ARG_INT|ARG_DOUBLE] message) tuples
michael@0 432 // until ARG_LIMIT or end of plural-only pattern.
michael@0 433 do {
michael@0 434 part=&pattern.getPart(partIndex++);
michael@0 435 const UMessagePatternPartType type = part->getType();
michael@0 436 if(type==UMSGPAT_PART_TYPE_ARG_LIMIT) {
michael@0 437 break;
michael@0 438 }
michael@0 439 U_ASSERT (type==UMSGPAT_PART_TYPE_ARG_SELECTOR);
michael@0 440 // part is an ARG_SELECTOR followed by an optional explicit value, and then a message
michael@0 441 if(MessagePattern::Part::hasNumericValue(pattern.getPartType(partIndex))) {
michael@0 442 // explicit value like "=2"
michael@0 443 part=&pattern.getPart(partIndex++);
michael@0 444 if(number==pattern.getNumericValue(*part)) {
michael@0 445 // matches explicit value
michael@0 446 return partIndex;
michael@0 447 }
michael@0 448 } else if(!haveKeywordMatch) {
michael@0 449 // plural keyword like "few" or "other"
michael@0 450 // Compare "other" first and call the selector if this is not "other".
michael@0 451 if(pattern.partSubstringMatches(*part, other)) {
michael@0 452 if(msgStart==0) {
michael@0 453 msgStart=partIndex;
michael@0 454 if(0 == keyword.compare(other)) {
michael@0 455 // This is the first "other" sub-message,
michael@0 456 // and the selected keyword is also "other".
michael@0 457 // Do not match "other" again.
michael@0 458 haveKeywordMatch=TRUE;
michael@0 459 }
michael@0 460 }
michael@0 461 } else {
michael@0 462 if(keyword.isEmpty()) {
michael@0 463 keyword=selector.select(context, number-offset, ec);
michael@0 464 if(msgStart!=0 && (0 == keyword.compare(other))) {
michael@0 465 // We have already seen an "other" sub-message.
michael@0 466 // Do not match "other" again.
michael@0 467 haveKeywordMatch=TRUE;
michael@0 468 // Skip keyword matching but do getLimitPartIndex().
michael@0 469 }
michael@0 470 }
michael@0 471 if(!haveKeywordMatch && pattern.partSubstringMatches(*part, keyword)) {
michael@0 472 // keyword matches
michael@0 473 msgStart=partIndex;
michael@0 474 // Do not match this keyword again.
michael@0 475 haveKeywordMatch=TRUE;
michael@0 476 }
michael@0 477 }
michael@0 478 }
michael@0 479 partIndex=pattern.getLimitPartIndex(partIndex);
michael@0 480 } while(++partIndex<count);
michael@0 481 return msgStart;
michael@0 482 }
michael@0 483
michael@0 484 PluralFormat::PluralSelector::~PluralSelector() {}
michael@0 485
michael@0 486 PluralFormat::PluralSelectorAdapter::~PluralSelectorAdapter() {
michael@0 487 delete pluralRules;
michael@0 488 }
michael@0 489
michael@0 490 UnicodeString PluralFormat::PluralSelectorAdapter::select(void *context, double number,
michael@0 491 UErrorCode& /*ec*/) const {
michael@0 492 (void)number; // unused except in the assertion
michael@0 493 FixedDecimal *dec=static_cast<FixedDecimal *>(context);
michael@0 494 U_ASSERT(dec->source==number);
michael@0 495 return pluralRules->select(*dec);
michael@0 496 }
michael@0 497
michael@0 498 void PluralFormat::PluralSelectorAdapter::reset() {
michael@0 499 delete pluralRules;
michael@0 500 pluralRules = NULL;
michael@0 501 }
michael@0 502
michael@0 503
michael@0 504 U_NAMESPACE_END
michael@0 505
michael@0 506
michael@0 507 #endif /* #if !UCONFIG_NO_FORMATTING */
michael@0 508
michael@0 509 //eof

mercurial