intl/icu/source/i18n/compactdecimalformat.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/i18n/compactdecimalformat.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,953 @@
     1.4 +/*
     1.5 +*******************************************************************************
     1.6 +* Copyright (C) 1997-2012, International Business Machines Corporation and    *
     1.7 +* others. All Rights Reserved.                                                *
     1.8 +*******************************************************************************
     1.9 +*
    1.10 +* File COMPACTDECIMALFORMAT.CPP
    1.11 +*
    1.12 +********************************************************************************
    1.13 +*/
    1.14 +#include "unicode/utypes.h"
    1.15 +
    1.16 +#if !UCONFIG_NO_FORMATTING
    1.17 +
    1.18 +#include "charstr.h"
    1.19 +#include "cstring.h"
    1.20 +#include "digitlst.h"
    1.21 +#include "mutex.h"
    1.22 +#include "unicode/compactdecimalformat.h"
    1.23 +#include "unicode/numsys.h"
    1.24 +#include "unicode/plurrule.h"
    1.25 +#include "unicode/ures.h"
    1.26 +#include "ucln_in.h"
    1.27 +#include "uhash.h"
    1.28 +#include "umutex.h"
    1.29 +#include "unicode/ures.h"
    1.30 +#include "uresimp.h"
    1.31 +
    1.32 +#define LENGTHOF(array) (int32_t)(sizeof(array) / sizeof((array)[0]))
    1.33 +
    1.34 +// Maps locale name to CDFLocaleData struct.
    1.35 +static UHashtable* gCompactDecimalData = NULL;
    1.36 +static UMutex gCompactDecimalMetaLock = U_MUTEX_INITIALIZER;
    1.37 +
    1.38 +U_NAMESPACE_BEGIN
    1.39 +
    1.40 +static const int32_t MAX_DIGITS = 15;
    1.41 +static const char gOther[] = "other";
    1.42 +static const char gLatnTag[] = "latn";
    1.43 +static const char gNumberElementsTag[] = "NumberElements";
    1.44 +static const char gDecimalFormatTag[] = "decimalFormat";
    1.45 +static const char gPatternsShort[] = "patternsShort";
    1.46 +static const char gPatternsLong[] = "patternsLong";
    1.47 +static const char gRoot[] = "root";
    1.48 +
    1.49 +static const UChar u_0 = 0x30;
    1.50 +static const UChar u_apos = 0x27;
    1.51 +
    1.52 +static const UChar kZero[] = {u_0};
    1.53 +
    1.54 +// Used to unescape single quotes.
    1.55 +enum QuoteState {
    1.56 +  OUTSIDE,
    1.57 +  INSIDE_EMPTY,
    1.58 +  INSIDE_FULL
    1.59 +};
    1.60 +
    1.61 +enum FallbackFlags {
    1.62 +  ANY = 0,
    1.63 +  MUST = 1,
    1.64 +  NOT_ROOT = 2
    1.65 +  // Next one will be 4 then 6 etc.
    1.66 +};
    1.67 +
    1.68 +
    1.69 +// CDFUnit represents a prefix-suffix pair for a particular variant
    1.70 +// and log10 value.
    1.71 +struct CDFUnit : public UMemory {
    1.72 +  UnicodeString prefix;
    1.73 +  UnicodeString suffix;
    1.74 +  inline CDFUnit() : prefix(), suffix() {
    1.75 +    prefix.setToBogus();
    1.76 +  }
    1.77 +  inline ~CDFUnit() {}
    1.78 +  inline UBool isSet() const {
    1.79 +    return !prefix.isBogus();
    1.80 +  }
    1.81 +  inline void markAsSet() {
    1.82 +    prefix.remove();
    1.83 +  }
    1.84 +};
    1.85 +
    1.86 +// CDFLocaleStyleData contains formatting data for a particular locale
    1.87 +// and style.
    1.88 +class CDFLocaleStyleData : public UMemory {
    1.89 + public:
    1.90 +  // What to divide by for each log10 value when formatting. These values
    1.91 +  // will be powers of 10. For English, would be:
    1.92 +  // 1, 1, 1, 1000, 1000, 1000, 1000000, 1000000, 1000000, 1000000000 ...
    1.93 +  double divisors[MAX_DIGITS];
    1.94 +  // Maps plural variants to CDFUnit[MAX_DIGITS] arrays.
    1.95 +  // To format a number x,
    1.96 +  // first compute log10(x). Compute displayNum = (x / divisors[log10(x)]).
    1.97 +  // Compute the plural variant for displayNum
    1.98 +  // (e.g zero, one, two, few, many, other).
    1.99 +  // Compute cdfUnits = unitsByVariant[pluralVariant].
   1.100 +  // Prefix and suffix to use at cdfUnits[log10(x)]
   1.101 +  UHashtable* unitsByVariant;
   1.102 +  inline CDFLocaleStyleData() : unitsByVariant(NULL) {}
   1.103 +  ~CDFLocaleStyleData();
   1.104 +  // Init initializes this object.
   1.105 +  void Init(UErrorCode& status);
   1.106 +  inline UBool isBogus() const {
   1.107 +    return unitsByVariant == NULL;
   1.108 +  }
   1.109 +  void setToBogus();
   1.110 + private:
   1.111 +  CDFLocaleStyleData(const CDFLocaleStyleData&);
   1.112 +  CDFLocaleStyleData& operator=(const CDFLocaleStyleData&);
   1.113 +};
   1.114 +
   1.115 +// CDFLocaleData contains formatting data for a particular locale.
   1.116 +struct CDFLocaleData : public UMemory {
   1.117 +  CDFLocaleStyleData shortData;
   1.118 +  CDFLocaleStyleData longData;
   1.119 +  inline CDFLocaleData() : shortData(), longData() { }
   1.120 +  inline ~CDFLocaleData() { }
   1.121 +  // Init initializes this object.
   1.122 +  void Init(UErrorCode& status);
   1.123 +};
   1.124 +
   1.125 +U_NAMESPACE_END
   1.126 +
   1.127 +U_CDECL_BEGIN
   1.128 +
   1.129 +static UBool U_CALLCONV cdf_cleanup(void) {
   1.130 +  if (gCompactDecimalData != NULL) {
   1.131 +    uhash_close(gCompactDecimalData);
   1.132 +    gCompactDecimalData = NULL;
   1.133 +  }
   1.134 +  return TRUE;
   1.135 +}
   1.136 +
   1.137 +static void U_CALLCONV deleteCDFUnits(void* ptr) {
   1.138 +  delete [] (icu::CDFUnit*) ptr;
   1.139 +}
   1.140 +
   1.141 +static void U_CALLCONV deleteCDFLocaleData(void* ptr) {
   1.142 +  delete (icu::CDFLocaleData*) ptr;
   1.143 +}
   1.144 +
   1.145 +U_CDECL_END
   1.146 +
   1.147 +U_NAMESPACE_BEGIN
   1.148 +
   1.149 +static UBool divisors_equal(const double* lhs, const double* rhs);
   1.150 +static const CDFLocaleStyleData* getCDFLocaleStyleData(const Locale& inLocale, UNumberCompactStyle style, UErrorCode& status);
   1.151 +
   1.152 +static const CDFLocaleStyleData* extractDataByStyleEnum(const CDFLocaleData& data, UNumberCompactStyle style, UErrorCode& status);
   1.153 +static CDFLocaleData* loadCDFLocaleData(const Locale& inLocale, UErrorCode& status);
   1.154 +static void initCDFLocaleData(const Locale& inLocale, CDFLocaleData* result, UErrorCode& status);
   1.155 +static UResourceBundle* tryGetDecimalFallback(const UResourceBundle* numberSystemResource, const char* style, UResourceBundle** fillIn, FallbackFlags flags, UErrorCode& status);
   1.156 +static UResourceBundle* tryGetByKeyWithFallback(const UResourceBundle* rb, const char* path, UResourceBundle** fillIn, FallbackFlags flags, UErrorCode& status);
   1.157 +static UBool isRoot(const UResourceBundle* rb, UErrorCode& status);
   1.158 +static void initCDFLocaleStyleData(const UResourceBundle* decimalFormatBundle, CDFLocaleStyleData* result, UErrorCode& status);
   1.159 +static void populatePower10(const UResourceBundle* power10Bundle, CDFLocaleStyleData* result, UErrorCode& status);
   1.160 +static int32_t populatePrefixSuffix(const char* variant, int32_t log10Value, const UnicodeString& formatStr, UHashtable* result, UErrorCode& status);
   1.161 +static UBool onlySpaces(UnicodeString u);
   1.162 +static void fixQuotes(UnicodeString& s);
   1.163 +static void fillInMissing(CDFLocaleStyleData* result);
   1.164 +static int32_t computeLog10(double x, UBool inRange);
   1.165 +static CDFUnit* createCDFUnit(const char* variant, int32_t log10Value, UHashtable* table, UErrorCode& status);
   1.166 +static const CDFUnit* getCDFUnitFallback(const UHashtable* table, const UnicodeString& variant, int32_t log10Value);
   1.167 +
   1.168 +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CompactDecimalFormat)
   1.169 +
   1.170 +CompactDecimalFormat::CompactDecimalFormat(
   1.171 +    const DecimalFormat& decimalFormat,
   1.172 +    const UHashtable* unitsByVariant,
   1.173 +    const double* divisors,
   1.174 +    PluralRules* pluralRules)
   1.175 +  : DecimalFormat(decimalFormat), _unitsByVariant(unitsByVariant), _divisors(divisors), _pluralRules(pluralRules) {
   1.176 +}
   1.177 +
   1.178 +CompactDecimalFormat::CompactDecimalFormat(const CompactDecimalFormat& source)
   1.179 +    : DecimalFormat(source), _unitsByVariant(source._unitsByVariant), _divisors(source._divisors), _pluralRules(source._pluralRules->clone()) {
   1.180 +}
   1.181 +
   1.182 +CompactDecimalFormat* U_EXPORT2
   1.183 +CompactDecimalFormat::createInstance(
   1.184 +    const Locale& inLocale, UNumberCompactStyle style, UErrorCode& status) {
   1.185 +  LocalPointer<DecimalFormat> decfmt((DecimalFormat*) NumberFormat::makeInstance(inLocale, UNUM_DECIMAL, TRUE, status));
   1.186 +  if (U_FAILURE(status)) {
   1.187 +    return NULL;
   1.188 +  }
   1.189 +  LocalPointer<PluralRules> pluralRules(PluralRules::forLocale(inLocale, status));
   1.190 +  if (U_FAILURE(status)) {
   1.191 +    return NULL;
   1.192 +  }
   1.193 +  const CDFLocaleStyleData* data = getCDFLocaleStyleData(inLocale, style, status);
   1.194 +  if (U_FAILURE(status)) {
   1.195 +    return NULL;
   1.196 +  }
   1.197 +  CompactDecimalFormat* result =
   1.198 +      new CompactDecimalFormat(*decfmt, data->unitsByVariant, data->divisors, pluralRules.getAlias());
   1.199 +  if (result == NULL) {
   1.200 +    status = U_MEMORY_ALLOCATION_ERROR;
   1.201 +    return NULL;
   1.202 +  }
   1.203 +  pluralRules.orphan();
   1.204 +  result->setMaximumSignificantDigits(3);
   1.205 +  result->setSignificantDigitsUsed(TRUE);
   1.206 +  result->setGroupingUsed(FALSE);
   1.207 +  return result;
   1.208 +}
   1.209 +
   1.210 +CompactDecimalFormat&
   1.211 +CompactDecimalFormat::operator=(const CompactDecimalFormat& rhs) {
   1.212 +  if (this != &rhs) {
   1.213 +    DecimalFormat::operator=(rhs);
   1.214 +    _unitsByVariant = rhs._unitsByVariant;
   1.215 +    _divisors = rhs._divisors;
   1.216 +    delete _pluralRules;
   1.217 +    _pluralRules = rhs._pluralRules->clone();
   1.218 +  }
   1.219 +  return *this;
   1.220 +}
   1.221 +
   1.222 +CompactDecimalFormat::~CompactDecimalFormat() {
   1.223 +  delete _pluralRules;
   1.224 +}
   1.225 +
   1.226 +
   1.227 +Format*
   1.228 +CompactDecimalFormat::clone(void) const {
   1.229 +  return new CompactDecimalFormat(*this);
   1.230 +}
   1.231 +
   1.232 +UBool
   1.233 +CompactDecimalFormat::operator==(const Format& that) const {
   1.234 +  if (this == &that) {
   1.235 +    return TRUE;
   1.236 +  }
   1.237 +  return (DecimalFormat::operator==(that) && eqHelper((const CompactDecimalFormat&) that));
   1.238 +}
   1.239 +
   1.240 +UBool
   1.241 +CompactDecimalFormat::eqHelper(const CompactDecimalFormat& that) const {
   1.242 +  return uhash_equals(_unitsByVariant, that._unitsByVariant) && divisors_equal(_divisors, that._divisors) && (*_pluralRules == *that._pluralRules);
   1.243 +}
   1.244 +
   1.245 +UnicodeString&
   1.246 +CompactDecimalFormat::format(
   1.247 +    double number,
   1.248 +    UnicodeString& appendTo,
   1.249 +    FieldPosition& pos) const {
   1.250 +  DigitList orig, rounded;
   1.251 +  orig.set(number);
   1.252 +  UBool isNegative;
   1.253 +  UErrorCode status = U_ZERO_ERROR;
   1.254 +  _round(orig, rounded, isNegative, status);
   1.255 +  if (U_FAILURE(status)) {
   1.256 +    return appendTo;
   1.257 +  }
   1.258 +  double roundedDouble = rounded.getDouble();
   1.259 +  if (isNegative) {
   1.260 +    roundedDouble = -roundedDouble;
   1.261 +  }
   1.262 +  int32_t baseIdx = computeLog10(roundedDouble, TRUE);
   1.263 +  double numberToFormat = roundedDouble / _divisors[baseIdx];
   1.264 +  UnicodeString variant = _pluralRules->select(numberToFormat);
   1.265 +  if (isNegative) {
   1.266 +    numberToFormat = -numberToFormat;
   1.267 +  }
   1.268 +  const CDFUnit* unit = getCDFUnitFallback(_unitsByVariant, variant, baseIdx);
   1.269 +  appendTo += unit->prefix;
   1.270 +  DecimalFormat::format(numberToFormat, appendTo, pos);
   1.271 +  appendTo += unit->suffix;
   1.272 +  return appendTo;
   1.273 +}
   1.274 +
   1.275 +UnicodeString&
   1.276 +CompactDecimalFormat::format(
   1.277 +    double /* number */,
   1.278 +    UnicodeString& appendTo,
   1.279 +    FieldPositionIterator* /* posIter */,
   1.280 +    UErrorCode& status) const {
   1.281 +  status = U_UNSUPPORTED_ERROR;
   1.282 +  return appendTo;
   1.283 +}
   1.284 +
   1.285 +UnicodeString&
   1.286 +CompactDecimalFormat::format(
   1.287 +    int64_t number,
   1.288 +    UnicodeString& appendTo,
   1.289 +    FieldPosition& pos) const {
   1.290 +  return format((double) number, appendTo, pos);
   1.291 +}
   1.292 +
   1.293 +UnicodeString&
   1.294 +CompactDecimalFormat::format(
   1.295 +    int64_t /* number */,
   1.296 +    UnicodeString& appendTo,
   1.297 +    FieldPositionIterator* /* posIter */,
   1.298 +    UErrorCode& status) const {
   1.299 +  status = U_UNSUPPORTED_ERROR;
   1.300 +  return appendTo;
   1.301 +}
   1.302 +
   1.303 +UnicodeString&
   1.304 +CompactDecimalFormat::format(
   1.305 +    const StringPiece& /* number */,
   1.306 +    UnicodeString& appendTo,
   1.307 +    FieldPositionIterator* /* posIter */,
   1.308 +    UErrorCode& status) const {
   1.309 +  status = U_UNSUPPORTED_ERROR;
   1.310 +  return appendTo;
   1.311 +}
   1.312 +
   1.313 +UnicodeString&
   1.314 +CompactDecimalFormat::format(
   1.315 +    const DigitList& /* number */,
   1.316 +    UnicodeString& appendTo,
   1.317 +    FieldPositionIterator* /* posIter */,
   1.318 +    UErrorCode& status) const {
   1.319 +  status = U_UNSUPPORTED_ERROR;
   1.320 +  return appendTo;
   1.321 +}
   1.322 +
   1.323 +UnicodeString&
   1.324 +CompactDecimalFormat::format(const DigitList& /* number */,
   1.325 +                             UnicodeString& appendTo,
   1.326 +                             FieldPosition& /* pos */,
   1.327 +                             UErrorCode& status) const {
   1.328 +  status = U_UNSUPPORTED_ERROR;
   1.329 +  return appendTo;
   1.330 +}
   1.331 +
   1.332 +void
   1.333 +CompactDecimalFormat::parse(
   1.334 +    const UnicodeString& /* text */,
   1.335 +    Formattable& /* result */,
   1.336 +    ParsePosition& /* parsePosition */) const {
   1.337 +}
   1.338 +
   1.339 +void
   1.340 +CompactDecimalFormat::parse(
   1.341 +    const UnicodeString& /* text */,
   1.342 +    Formattable& /* result */,
   1.343 +    UErrorCode& status) const {
   1.344 +  status = U_UNSUPPORTED_ERROR;
   1.345 +}
   1.346 +
   1.347 +CurrencyAmount*
   1.348 +CompactDecimalFormat::parseCurrency(
   1.349 +    const UnicodeString& /* text */,
   1.350 +    ParsePosition& /* pos */) const {
   1.351 +  return NULL;
   1.352 +}
   1.353 +
   1.354 +void CDFLocaleStyleData::Init(UErrorCode& status) {
   1.355 +  if (unitsByVariant != NULL) {
   1.356 +    return;
   1.357 +  }
   1.358 +  unitsByVariant = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &status);
   1.359 +  if (U_FAILURE(status)) {
   1.360 +    return;
   1.361 +  }
   1.362 +  uhash_setKeyDeleter(unitsByVariant, uprv_free);
   1.363 +  uhash_setValueDeleter(unitsByVariant, deleteCDFUnits);
   1.364 +}
   1.365 +
   1.366 +CDFLocaleStyleData::~CDFLocaleStyleData() {
   1.367 +  setToBogus();
   1.368 +}
   1.369 +
   1.370 +void CDFLocaleStyleData::setToBogus() {
   1.371 +  if (unitsByVariant != NULL) {
   1.372 +    uhash_close(unitsByVariant);
   1.373 +    unitsByVariant = NULL;
   1.374 +  }
   1.375 +}
   1.376 +
   1.377 +void CDFLocaleData::Init(UErrorCode& status) {
   1.378 +  shortData.Init(status);
   1.379 +  if (U_FAILURE(status)) {
   1.380 +    return;
   1.381 +  }
   1.382 +  longData.Init(status);
   1.383 +}
   1.384 +
   1.385 +// Helper method for operator=
   1.386 +static UBool divisors_equal(const double* lhs, const double* rhs) {
   1.387 +  for (int32_t i = 0; i < MAX_DIGITS; ++i) {
   1.388 +    if (lhs[i] != rhs[i]) {
   1.389 +      return FALSE;
   1.390 +    }
   1.391 +  }
   1.392 +  return TRUE;
   1.393 +}
   1.394 +
   1.395 +// getCDFLocaleStyleData returns pointer to formatting data for given locale and 
   1.396 +// style within the global cache. On cache miss, getCDFLocaleStyleData loads
   1.397 +// the data from CLDR into the global cache before returning the pointer. If a
   1.398 +// UNUM_LONG data is requested for a locale, and that locale does not have
   1.399 +// UNUM_LONG data, getCDFLocaleStyleData will fall back to UNUM_SHORT data for
   1.400 +// that locale.
   1.401 +static const CDFLocaleStyleData* getCDFLocaleStyleData(const Locale& inLocale, UNumberCompactStyle style, UErrorCode& status) {
   1.402 +  if (U_FAILURE(status)) {
   1.403 +    return NULL;
   1.404 +  }
   1.405 +  CDFLocaleData* result = NULL;
   1.406 +  const char* key = inLocale.getName();
   1.407 +  {
   1.408 +    Mutex lock(&gCompactDecimalMetaLock);
   1.409 +    if (gCompactDecimalData == NULL) {
   1.410 +      gCompactDecimalData = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &status);
   1.411 +      if (U_FAILURE(status)) {
   1.412 +        return NULL;
   1.413 +      }
   1.414 +      uhash_setKeyDeleter(gCompactDecimalData, uprv_free);
   1.415 +      uhash_setValueDeleter(gCompactDecimalData, deleteCDFLocaleData);
   1.416 +      ucln_i18n_registerCleanup(UCLN_I18N_CDFINFO, cdf_cleanup);
   1.417 +    } else {
   1.418 +      result = (CDFLocaleData*) uhash_get(gCompactDecimalData, key);
   1.419 +    }
   1.420 +  }
   1.421 +  if (result != NULL) {
   1.422 +    return extractDataByStyleEnum(*result, style, status);
   1.423 +  }
   1.424 +
   1.425 +  result = loadCDFLocaleData(inLocale, status);
   1.426 +  if (U_FAILURE(status)) {
   1.427 +    return NULL;
   1.428 +  }
   1.429 +
   1.430 +  {
   1.431 +    Mutex lock(&gCompactDecimalMetaLock);
   1.432 +    CDFLocaleData* temp = (CDFLocaleData*) uhash_get(gCompactDecimalData, key);
   1.433 +    if (temp != NULL) {
   1.434 +      delete result;
   1.435 +      result = temp;
   1.436 +    } else {
   1.437 +      uhash_put(gCompactDecimalData, uprv_strdup(key), (void*) result, &status);
   1.438 +      if (U_FAILURE(status)) {
   1.439 +        return NULL;
   1.440 +      }
   1.441 +    }
   1.442 +  }
   1.443 +  return extractDataByStyleEnum(*result, style, status);
   1.444 +}
   1.445 +
   1.446 +static const CDFLocaleStyleData* extractDataByStyleEnum(const CDFLocaleData& data, UNumberCompactStyle style, UErrorCode& status) {
   1.447 +  switch (style) {
   1.448 +    case UNUM_SHORT:
   1.449 +      return &data.shortData;
   1.450 +    case UNUM_LONG:
   1.451 +      if (!data.longData.isBogus()) {
   1.452 +        return &data.longData;
   1.453 +      }
   1.454 +      return &data.shortData;
   1.455 +    default:
   1.456 +      status = U_ILLEGAL_ARGUMENT_ERROR;
   1.457 +      return NULL;
   1.458 +  }
   1.459 +}
   1.460 +
   1.461 +// loadCDFLocaleData loads formatting data from CLDR for a given locale. The
   1.462 +// caller owns the returned pointer.
   1.463 +static CDFLocaleData* loadCDFLocaleData(const Locale& inLocale, UErrorCode& status) {
   1.464 +  if (U_FAILURE(status)) {
   1.465 +    return NULL;
   1.466 +  }
   1.467 +  CDFLocaleData* result = new CDFLocaleData;
   1.468 +  if (result == NULL) {
   1.469 +    status = U_MEMORY_ALLOCATION_ERROR;
   1.470 +    return NULL;
   1.471 +  }
   1.472 +  result->Init(status);
   1.473 +  if (U_FAILURE(status)) {
   1.474 +    delete result;
   1.475 +    return NULL;
   1.476 +  }
   1.477 +
   1.478 +  initCDFLocaleData(inLocale, result, status);
   1.479 +  if (U_FAILURE(status)) {
   1.480 +    delete result;
   1.481 +    return NULL;
   1.482 +  }
   1.483 +  return result;
   1.484 +}
   1.485 +
   1.486 +// initCDFLocaleData initializes result with data from CLDR.
   1.487 +// inLocale is the locale, the CLDR data is stored in result.
   1.488 +// We load the UNUM_SHORT  and UNUM_LONG data looking first in local numbering
   1.489 +// system and not including root locale in fallback. Next we try in the latn
   1.490 +// numbering system where we fallback all the way to root. If we don't find
   1.491 +// UNUM_SHORT data in these three places, we report an error. If we find
   1.492 +// UNUM_SHORT data before finding UNUM_LONG data we make UNUM_LONG data fall
   1.493 +// back to UNUM_SHORT data.
   1.494 +static void initCDFLocaleData(const Locale& inLocale, CDFLocaleData* result, UErrorCode& status) {
   1.495 +  LocalPointer<NumberingSystem> ns(NumberingSystem::createInstance(inLocale, status));
   1.496 +  if (U_FAILURE(status)) {
   1.497 +    return;
   1.498 +  }
   1.499 +  const char* numberingSystemName = ns->getName();
   1.500 +  UResourceBundle* rb = ures_open(NULL, inLocale.getName(), &status);
   1.501 +  rb = ures_getByKeyWithFallback(rb, gNumberElementsTag, rb, &status);
   1.502 +  if (U_FAILURE(status)) {
   1.503 +    ures_close(rb);
   1.504 +    return;
   1.505 +  }
   1.506 +  UResourceBundle* shortDataFillIn = NULL;
   1.507 +  UResourceBundle* longDataFillIn = NULL;
   1.508 +  UResourceBundle* shortData = NULL;
   1.509 +  UResourceBundle* longData = NULL;
   1.510 +
   1.511 +  if (uprv_strcmp(numberingSystemName, gLatnTag) != 0) {
   1.512 +    LocalUResourceBundlePointer localResource(
   1.513 +        tryGetByKeyWithFallback(rb, numberingSystemName, NULL, NOT_ROOT, status));
   1.514 +    shortData = tryGetDecimalFallback(
   1.515 +        localResource.getAlias(), gPatternsShort, &shortDataFillIn, NOT_ROOT, status);
   1.516 +    longData = tryGetDecimalFallback(
   1.517 +        localResource.getAlias(), gPatternsLong, &longDataFillIn, NOT_ROOT, status);
   1.518 +  }
   1.519 +  if (U_FAILURE(status)) {
   1.520 +    ures_close(shortDataFillIn);
   1.521 +    ures_close(longDataFillIn);
   1.522 +    ures_close(rb);
   1.523 +    return;
   1.524 +  }
   1.525 +
   1.526 +  // If we haven't found UNUM_SHORT look in latn numbering system. We must
   1.527 +  // succeed at finding UNUM_SHORT here.
   1.528 +  if (shortData == NULL) {
   1.529 +    LocalUResourceBundlePointer latnResource(tryGetByKeyWithFallback(rb, gLatnTag, NULL, MUST, status));
   1.530 +    shortData = tryGetDecimalFallback(latnResource.getAlias(), gPatternsShort, &shortDataFillIn, MUST, status);
   1.531 +    if (longData == NULL) {
   1.532 +      longData = tryGetDecimalFallback(latnResource.getAlias(), gPatternsLong, &longDataFillIn, ANY, status);
   1.533 +      if (longData != NULL && isRoot(longData, status) && !isRoot(shortData, status)) {
   1.534 +        longData = NULL;
   1.535 +      }
   1.536 +    }
   1.537 +  }
   1.538 +  initCDFLocaleStyleData(shortData, &result->shortData, status);
   1.539 +  ures_close(shortDataFillIn);
   1.540 +  if (U_FAILURE(status)) {
   1.541 +    ures_close(longDataFillIn);
   1.542 +    ures_close(rb);
   1.543 +  }
   1.544 +
   1.545 +  if (longData == NULL) {
   1.546 +    result->longData.setToBogus();
   1.547 +  } else {
   1.548 +    initCDFLocaleStyleData(longData, &result->longData, status);
   1.549 +  }
   1.550 +  ures_close(longDataFillIn);
   1.551 +  ures_close(rb);
   1.552 +}
   1.553 +
   1.554 +/**
   1.555 + * tryGetDecimalFallback attempts to fetch the "decimalFormat" resource bundle
   1.556 + * with a particular style. style is either "patternsShort" or "patternsLong."
   1.557 + * FillIn, flags, and status work in the same way as in tryGetByKeyWithFallback.
   1.558 + */
   1.559 +static UResourceBundle* tryGetDecimalFallback(const UResourceBundle* numberSystemResource, const char* style, UResourceBundle** fillIn, FallbackFlags flags, UErrorCode& status) {
   1.560 +  UResourceBundle* first = tryGetByKeyWithFallback(numberSystemResource, style, fillIn, flags, status);
   1.561 +  UResourceBundle* second = tryGetByKeyWithFallback(first, gDecimalFormatTag, fillIn, flags, status);
   1.562 +  if (fillIn == NULL) {
   1.563 +    ures_close(first);
   1.564 +  }
   1.565 +  return second;
   1.566 +}
   1.567 +
   1.568 +// tryGetByKeyWithFallback returns a sub-resource bundle that matches given
   1.569 +// criteria or NULL if none found. rb is the resource bundle that we are
   1.570 +// searching. If rb == NULL then this function behaves as if no sub-resource
   1.571 +// is found; path is the key of the sub-resource,
   1.572 +// (i.e "foo" but not "foo/bar"); If fillIn is NULL, caller must always call
   1.573 +// ures_close() on returned resource. See below for example when fillIn is
   1.574 +// not NULL. flags is ANY or NOT_ROOT. Optionally, these values
   1.575 +// can be ored with MUST. MUST by itself is the same as ANY | MUST.
   1.576 +// The locale of the returned sub-resource will either match the
   1.577 +// flags or the returned sub-resouce will be NULL. If MUST is included in
   1.578 +// flags, and not suitable sub-resource is found then in addition to returning
   1.579 +// NULL, this function also sets status to U_MISSING_RESOURCE_ERROR. If MUST
   1.580 +// is not included in flags, then this function just returns NULL if no
   1.581 +// such sub-resource is found and will never set status to
   1.582 +// U_MISSING_RESOURCE_ERROR.
   1.583 +//
   1.584 +// Example: This code first searches for "foo/bar" sub-resource without falling
   1.585 +// back to ROOT. Then searches for "baz" sub-resource as last resort.
   1.586 +//
   1.587 +// UResourcebundle* fillIn = NULL;
   1.588 +// UResourceBundle* data = tryGetByKeyWithFallback(rb, "foo", &fillIn, NON_ROOT, status);
   1.589 +// data = tryGetByKeyWithFallback(data, "bar", &fillIn, NON_ROOT, status);
   1.590 +// if (!data) {
   1.591 +//   data = tryGetbyKeyWithFallback(rb, "baz", &fillIn, MUST,  status);
   1.592 +// }
   1.593 +// if (U_FAILURE(status)) {
   1.594 +//   ures_close(fillIn);
   1.595 +//   return;
   1.596 +// }
   1.597 +// doStuffWithNonNullSubresource(data);
   1.598 +//
   1.599 +// /* Wrong! don't do the following as it can leak memory if fillIn gets set
   1.600 +// to NULL. */
   1.601 +// fillIn = tryGetByKeyWithFallback(rb, "wrong", &fillIn, ANY, status);
   1.602 +//
   1.603 +// ures_close(fillIn);
   1.604 +// 
   1.605 +static UResourceBundle* tryGetByKeyWithFallback(const UResourceBundle* rb, const char* path, UResourceBundle** fillIn, FallbackFlags flags, UErrorCode& status) {
   1.606 +  if (U_FAILURE(status)) {
   1.607 +    return NULL;
   1.608 +  }
   1.609 +  UBool must = (flags & MUST);
   1.610 +  if (rb == NULL) {
   1.611 +    if (must) {
   1.612 +      status = U_MISSING_RESOURCE_ERROR;
   1.613 +    }
   1.614 +    return NULL;
   1.615 +  }
   1.616 +  UResourceBundle* result = NULL;
   1.617 +  UResourceBundle* ownedByUs = NULL;
   1.618 +  if (fillIn == NULL) {
   1.619 +    ownedByUs = ures_getByKeyWithFallback(rb, path, NULL, &status);
   1.620 +    result = ownedByUs;
   1.621 +  } else {
   1.622 +    *fillIn = ures_getByKeyWithFallback(rb, path, *fillIn, &status);
   1.623 +    result = *fillIn;
   1.624 +  }
   1.625 +  if (U_FAILURE(status)) {
   1.626 +    ures_close(ownedByUs);
   1.627 +    if (status == U_MISSING_RESOURCE_ERROR && !must) {
   1.628 +      status = U_ZERO_ERROR;
   1.629 +    }
   1.630 +    return NULL;
   1.631 +  }
   1.632 +  flags = (FallbackFlags) (flags & ~MUST);
   1.633 +  switch (flags) {
   1.634 +    case NOT_ROOT:
   1.635 +      {
   1.636 +        UBool bRoot = isRoot(result, status);
   1.637 +        if (bRoot || U_FAILURE(status)) {
   1.638 +          ures_close(ownedByUs);
   1.639 +          if (must && (status == U_ZERO_ERROR)) {
   1.640 +            status = U_MISSING_RESOURCE_ERROR;
   1.641 +          }
   1.642 +          return NULL;
   1.643 +        }
   1.644 +        return result;
   1.645 +      }
   1.646 +    case ANY:
   1.647 +      return result;
   1.648 +    default:
   1.649 +      ures_close(ownedByUs);
   1.650 +      status = U_ILLEGAL_ARGUMENT_ERROR;
   1.651 +      return NULL;
   1.652 +  }
   1.653 +}
   1.654 +
   1.655 +static UBool isRoot(const UResourceBundle* rb, UErrorCode& status) {
   1.656 +  const char* actualLocale = ures_getLocaleByType(
   1.657 +      rb, ULOC_ACTUAL_LOCALE, &status);
   1.658 +  if (U_FAILURE(status)) {
   1.659 +    return FALSE;
   1.660 +  }
   1.661 +  return uprv_strcmp(actualLocale, gRoot) == 0;
   1.662 +}
   1.663 +
   1.664 +
   1.665 +// initCDFLocaleStyleData loads formatting data for a particular style.
   1.666 +// decimalFormatBundle is the "decimalFormat" resource bundle in CLDR.
   1.667 +// Loaded data stored in result.
   1.668 +static void initCDFLocaleStyleData(const UResourceBundle* decimalFormatBundle, CDFLocaleStyleData* result, UErrorCode& status) {
   1.669 +  if (U_FAILURE(status)) {
   1.670 +    return;
   1.671 +  }
   1.672 +  // Iterate through all the powers of 10.
   1.673 +  int32_t size = ures_getSize(decimalFormatBundle);
   1.674 +  UResourceBundle* power10 = NULL;
   1.675 +  for (int32_t i = 0; i < size; ++i) {
   1.676 +    power10 = ures_getByIndex(decimalFormatBundle, i, power10, &status);
   1.677 +    if (U_FAILURE(status)) {
   1.678 +      ures_close(power10);
   1.679 +      return;
   1.680 +    }
   1.681 +    populatePower10(power10, result, status);
   1.682 +    if (U_FAILURE(status)) {
   1.683 +      ures_close(power10);
   1.684 +      return;
   1.685 +    }
   1.686 +  }
   1.687 +  ures_close(power10);
   1.688 +  fillInMissing(result);
   1.689 +}
   1.690 +
   1.691 +// populatePower10 grabs data for a particular power of 10 from CLDR.
   1.692 +// The loaded data is stored in result.
   1.693 +static void populatePower10(const UResourceBundle* power10Bundle, CDFLocaleStyleData* result, UErrorCode& status) {
   1.694 +  if (U_FAILURE(status)) {
   1.695 +    return;
   1.696 +  }
   1.697 +  char* endPtr = NULL;
   1.698 +  double power10 = uprv_strtod(ures_getKey(power10Bundle), &endPtr);
   1.699 +  if (*endPtr != 0) {
   1.700 +    status = U_INTERNAL_PROGRAM_ERROR;
   1.701 +    return;
   1.702 +  }
   1.703 +  int32_t log10Value = computeLog10(power10, FALSE);
   1.704 +  // Silently ignore divisors that are too big.
   1.705 +  if (log10Value == MAX_DIGITS) {
   1.706 +    return;
   1.707 +  }
   1.708 +  int32_t size = ures_getSize(power10Bundle);
   1.709 +  int32_t numZeros = 0;
   1.710 +  UBool otherVariantDefined = FALSE;
   1.711 +  UResourceBundle* variantBundle = NULL;
   1.712 +  // Iterate over all the plural variants for the power of 10
   1.713 +  for (int32_t i = 0; i < size; ++i) {
   1.714 +    variantBundle = ures_getByIndex(power10Bundle, i, variantBundle, &status);
   1.715 +    if (U_FAILURE(status)) {
   1.716 +      ures_close(variantBundle);
   1.717 +      return;
   1.718 +    }
   1.719 +    const char* variant = ures_getKey(variantBundle);
   1.720 +    int32_t resLen;
   1.721 +    const UChar* formatStrP = ures_getString(variantBundle, &resLen, &status);
   1.722 +    if (U_FAILURE(status)) {
   1.723 +      ures_close(variantBundle);
   1.724 +      return;
   1.725 +    }
   1.726 +    UnicodeString formatStr(false, formatStrP, resLen);
   1.727 +    if (uprv_strcmp(variant, gOther) == 0) {
   1.728 +      otherVariantDefined = TRUE;
   1.729 +    }
   1.730 +    int32_t nz = populatePrefixSuffix(
   1.731 +        variant, log10Value, formatStr, result->unitsByVariant, status);
   1.732 +    if (U_FAILURE(status)) {
   1.733 +      ures_close(variantBundle);
   1.734 +      return;
   1.735 +    }
   1.736 +    if (nz != numZeros) {
   1.737 +      // We expect all format strings to have the same number of 0's
   1.738 +      // left of the decimal point.
   1.739 +      if (numZeros != 0) {
   1.740 +        status = U_INTERNAL_PROGRAM_ERROR;
   1.741 +        ures_close(variantBundle);
   1.742 +        return;
   1.743 +      }
   1.744 +      numZeros = nz;
   1.745 +    }
   1.746 +  }
   1.747 +  ures_close(variantBundle);
   1.748 +  // We expect to find an OTHER variant for each power of 10.
   1.749 +  if (!otherVariantDefined) {
   1.750 +    status = U_INTERNAL_PROGRAM_ERROR;
   1.751 +    return;
   1.752 +  }
   1.753 +  double divisor = power10;
   1.754 +  for (int32_t i = 1; i < numZeros; ++i) {
   1.755 +    divisor /= 10.0;
   1.756 +  }
   1.757 +  result->divisors[log10Value] = divisor;
   1.758 +}
   1.759 +
   1.760 +// populatePrefixSuffix Adds a specific prefix-suffix pair to result for a
   1.761 +// given variant and log10 value.
   1.762 +// variant is 'zero', 'one', 'two', 'few', 'many', or 'other'.
   1.763 +// formatStr is the format string from which the prefix and suffix are
   1.764 +// extracted. It is usually of form 'Pefix 000 suffix'.
   1.765 +// populatePrefixSuffix returns the number of 0's found in formatStr
   1.766 +// before the decimal point.
   1.767 +// In the special case that formatStr contains only spaces for prefix
   1.768 +// and suffix, populatePrefixSuffix returns log10Value + 1.
   1.769 +static int32_t populatePrefixSuffix(
   1.770 +    const char* variant, int32_t log10Value, const UnicodeString& formatStr, UHashtable* result, UErrorCode& status) {
   1.771 +  if (U_FAILURE(status)) {
   1.772 +    return 0;
   1.773 +  }
   1.774 +  int32_t firstIdx = formatStr.indexOf(kZero, LENGTHOF(kZero), 0);
   1.775 +  // We must have 0's in format string.
   1.776 +  if (firstIdx == -1) {
   1.777 +    status = U_INTERNAL_PROGRAM_ERROR;
   1.778 +    return 0;
   1.779 +  }
   1.780 +  int32_t lastIdx = formatStr.lastIndexOf(kZero, LENGTHOF(kZero), firstIdx);
   1.781 +  CDFUnit* unit = createCDFUnit(variant, log10Value, result, status);
   1.782 +  if (U_FAILURE(status)) {
   1.783 +    return 0;
   1.784 +  }
   1.785 +  // Everything up to first 0 is the prefix
   1.786 +  unit->prefix = formatStr.tempSubString(0, firstIdx);
   1.787 +  fixQuotes(unit->prefix);
   1.788 +  // Everything beyond the last 0 is the suffix
   1.789 +  unit->suffix = formatStr.tempSubString(lastIdx + 1);
   1.790 +  fixQuotes(unit->suffix);
   1.791 +
   1.792 +  // If there is effectively no prefix or suffix, ignore the actual number of
   1.793 +  // 0's and act as if the number of 0's matches the size of the number.
   1.794 +  if (onlySpaces(unit->prefix) && onlySpaces(unit->suffix)) {
   1.795 +    return log10Value + 1;
   1.796 +  }
   1.797 +
   1.798 +  // Calculate number of zeros before decimal point
   1.799 +  int32_t idx = firstIdx + 1;
   1.800 +  while (idx <= lastIdx && formatStr.charAt(idx) == u_0) {
   1.801 +    ++idx;
   1.802 +  }
   1.803 +  return (idx - firstIdx);
   1.804 +}
   1.805 +
   1.806 +static UBool onlySpaces(UnicodeString u) {
   1.807 +  return u.trim().length() == 0;
   1.808 +}
   1.809 +
   1.810 +// fixQuotes unescapes single quotes. Don''t -> Don't. Letter 'j' -> Letter j.
   1.811 +// Modifies s in place.
   1.812 +static void fixQuotes(UnicodeString& s) {
   1.813 +  QuoteState state = OUTSIDE;
   1.814 +  int32_t len = s.length();
   1.815 +  int32_t dest = 0;
   1.816 +  for (int32_t i = 0; i < len; ++i) {
   1.817 +    UChar ch = s.charAt(i);
   1.818 +    if (ch == u_apos) {
   1.819 +      if (state == INSIDE_EMPTY) {
   1.820 +        s.setCharAt(dest, ch);
   1.821 +        ++dest;
   1.822 +      }
   1.823 +    } else {
   1.824 +      s.setCharAt(dest, ch);
   1.825 +      ++dest;
   1.826 +    }
   1.827 +
   1.828 +    // Update state
   1.829 +    switch (state) {
   1.830 +      case OUTSIDE:
   1.831 +        state = ch == u_apos ? INSIDE_EMPTY : OUTSIDE;
   1.832 +        break;
   1.833 +      case INSIDE_EMPTY:
   1.834 +      case INSIDE_FULL:
   1.835 +        state = ch == u_apos ? OUTSIDE : INSIDE_FULL;
   1.836 +        break;
   1.837 +      default:
   1.838 +        break;
   1.839 +    }
   1.840 +  }
   1.841 +  s.truncate(dest);
   1.842 +}
   1.843 +
   1.844 +// fillInMissing ensures that the data in result is complete.
   1.845 +// result data is complete if for each variant in result, there exists
   1.846 +// a prefix-suffix pair for each log10 value and there also exists
   1.847 +// a divisor for each log10 value.
   1.848 +//
   1.849 +// First this function figures out for which log10 values, the other
   1.850 +// variant already had data. These are the same log10 values defined
   1.851 +// in CLDR. 
   1.852 +//
   1.853 +// For each log10 value not defined in CLDR, it uses the divisor for
   1.854 +// the last defined log10 value or 1.
   1.855 +//
   1.856 +// Then for each variant, it does the following. For each log10
   1.857 +// value not defined in CLDR, copy the prefix-suffix pair from the
   1.858 +// previous log10 value. If log10 value is defined in CLDR but is
   1.859 +// missing from given variant, copy the prefix-suffix pair for that
   1.860 +// log10 value from the 'other' variant.
   1.861 +static void fillInMissing(CDFLocaleStyleData* result) {
   1.862 +  const CDFUnit* otherUnits =
   1.863 +      (const CDFUnit*) uhash_get(result->unitsByVariant, gOther);
   1.864 +  UBool definedInCLDR[MAX_DIGITS];
   1.865 +  double lastDivisor = 1.0;
   1.866 +  for (int32_t i = 0; i < MAX_DIGITS; ++i) {
   1.867 +    if (!otherUnits[i].isSet()) {
   1.868 +      result->divisors[i] = lastDivisor;
   1.869 +      definedInCLDR[i] = FALSE;
   1.870 +    } else {
   1.871 +      lastDivisor = result->divisors[i];
   1.872 +      definedInCLDR[i] = TRUE;
   1.873 +    }
   1.874 +  }
   1.875 +  // Iterate over each variant.
   1.876 +  int32_t pos = -1;
   1.877 +  const UHashElement* element = uhash_nextElement(result->unitsByVariant, &pos);
   1.878 +  for (;element != NULL; element = uhash_nextElement(result->unitsByVariant, &pos)) {
   1.879 +    CDFUnit* units = (CDFUnit*) element->value.pointer;
   1.880 +    for (int32_t i = 0; i < MAX_DIGITS; ++i) {
   1.881 +      if (definedInCLDR[i]) {
   1.882 +        if (!units[i].isSet()) {
   1.883 +          units[i] = otherUnits[i];
   1.884 +        }
   1.885 +      } else {
   1.886 +        if (i == 0) {
   1.887 +          units[0].markAsSet();
   1.888 +        } else {
   1.889 +          units[i] = units[i - 1];
   1.890 +        }
   1.891 +      }
   1.892 +    }
   1.893 +  }
   1.894 +}
   1.895 +
   1.896 +// computeLog10 computes floor(log10(x)). If inRange is TRUE, the biggest
   1.897 +// value computeLog10 will return MAX_DIGITS -1 even for
   1.898 +// numbers > 10^MAX_DIGITS. If inRange is FALSE, computeLog10 will return
   1.899 +// up to MAX_DIGITS.
   1.900 +static int32_t computeLog10(double x, UBool inRange) {
   1.901 +  int32_t result = 0;
   1.902 +  int32_t max = inRange ? MAX_DIGITS - 1 : MAX_DIGITS;
   1.903 +  while (x >= 10.0) {
   1.904 +    x /= 10.0;
   1.905 +    ++result;
   1.906 +    if (result == max) {
   1.907 +      break;
   1.908 +    }
   1.909 +  }
   1.910 +  return result;
   1.911 +}
   1.912 +
   1.913 +// createCDFUnit returns a pointer to the prefix-suffix pair for a given
   1.914 +// variant and log10 value within table. If no such prefix-suffix pair is
   1.915 +// stored in table, one is created within table before returning pointer.
   1.916 +static CDFUnit* createCDFUnit(const char* variant, int32_t log10Value, UHashtable* table, UErrorCode& status) {
   1.917 +  if (U_FAILURE(status)) {
   1.918 +    return NULL;
   1.919 +  }
   1.920 +  CDFUnit *cdfUnit = (CDFUnit*) uhash_get(table, variant);
   1.921 +  if (cdfUnit == NULL) {
   1.922 +    cdfUnit = new CDFUnit[MAX_DIGITS];
   1.923 +    if (cdfUnit == NULL) {
   1.924 +      status = U_MEMORY_ALLOCATION_ERROR;
   1.925 +      return NULL;
   1.926 +    }
   1.927 +    uhash_put(table, uprv_strdup(variant), cdfUnit, &status);
   1.928 +    if (U_FAILURE(status)) {
   1.929 +      return NULL;
   1.930 +    }
   1.931 +  }
   1.932 +  CDFUnit* result = &cdfUnit[log10Value];
   1.933 +  result->markAsSet();
   1.934 +  return result;
   1.935 +}
   1.936 +
   1.937 +// getCDFUnitFallback returns a pointer to the prefix-suffix pair for a given
   1.938 +// variant and log10 value within table. If the given variant doesn't exist, it
   1.939 +// falls back to the OTHER variant. Therefore, this method will always return
   1.940 +// some non-NULL value.
   1.941 +static const CDFUnit* getCDFUnitFallback(const UHashtable* table, const UnicodeString& variant, int32_t log10Value) {
   1.942 +  CharString cvariant;
   1.943 +  UErrorCode status = U_ZERO_ERROR;
   1.944 +  const CDFUnit *cdfUnit = NULL;
   1.945 +  cvariant.appendInvariantChars(variant, status);
   1.946 +  if (!U_FAILURE(status)) {
   1.947 +    cdfUnit = (const CDFUnit*) uhash_get(table, cvariant.data());
   1.948 +  }
   1.949 +  if (cdfUnit == NULL) {
   1.950 +    cdfUnit = (const CDFUnit*) uhash_get(table, gOther);
   1.951 +  }
   1.952 +  return &cdfUnit[log10Value];
   1.953 +}
   1.954 +
   1.955 +U_NAMESPACE_END
   1.956 +#endif

mercurial