Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
michael@0 | 1 | /* |
michael@0 | 2 | ******************************************************************************* |
michael@0 | 3 | * Copyright (C) 1997-2013, International Business Machines Corporation |
michael@0 | 4 | * and others. All Rights Reserved. |
michael@0 | 5 | ******************************************************************************* |
michael@0 | 6 | */ |
michael@0 | 7 | |
michael@0 | 8 | #include "utypeinfo.h" // for 'typeid' to work |
michael@0 | 9 | |
michael@0 | 10 | #include "unicode/rbnf.h" |
michael@0 | 11 | |
michael@0 | 12 | #if U_HAVE_RBNF |
michael@0 | 13 | |
michael@0 | 14 | #include "unicode/normlzr.h" |
michael@0 | 15 | #include "unicode/tblcoll.h" |
michael@0 | 16 | #include "unicode/uchar.h" |
michael@0 | 17 | #include "unicode/ucol.h" |
michael@0 | 18 | #include "unicode/uloc.h" |
michael@0 | 19 | #include "unicode/unum.h" |
michael@0 | 20 | #include "unicode/ures.h" |
michael@0 | 21 | #include "unicode/ustring.h" |
michael@0 | 22 | #include "unicode/utf16.h" |
michael@0 | 23 | #include "unicode/udata.h" |
michael@0 | 24 | #include "nfrs.h" |
michael@0 | 25 | |
michael@0 | 26 | #include "cmemory.h" |
michael@0 | 27 | #include "cstring.h" |
michael@0 | 28 | #include "patternprops.h" |
michael@0 | 29 | #include "uresimp.h" |
michael@0 | 30 | |
michael@0 | 31 | // debugging |
michael@0 | 32 | // #define DEBUG |
michael@0 | 33 | |
michael@0 | 34 | #ifdef DEBUG |
michael@0 | 35 | #include "stdio.h" |
michael@0 | 36 | #endif |
michael@0 | 37 | |
michael@0 | 38 | #define U_ICUDATA_RBNF U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "rbnf" |
michael@0 | 39 | |
michael@0 | 40 | static const UChar gPercentPercent[] = |
michael@0 | 41 | { |
michael@0 | 42 | 0x25, 0x25, 0 |
michael@0 | 43 | }; /* "%%" */ |
michael@0 | 44 | |
michael@0 | 45 | // All urbnf objects are created through openRules, so we init all of the |
michael@0 | 46 | // Unicode string constants required by rbnf, nfrs, or nfr here. |
michael@0 | 47 | static const UChar gLenientParse[] = |
michael@0 | 48 | { |
michael@0 | 49 | 0x25, 0x25, 0x6C, 0x65, 0x6E, 0x69, 0x65, 0x6E, 0x74, 0x2D, 0x70, 0x61, 0x72, 0x73, 0x65, 0x3A, 0 |
michael@0 | 50 | }; /* "%%lenient-parse:" */ |
michael@0 | 51 | static const UChar gSemiColon = 0x003B; |
michael@0 | 52 | static const UChar gSemiPercent[] = |
michael@0 | 53 | { |
michael@0 | 54 | 0x3B, 0x25, 0 |
michael@0 | 55 | }; /* ";%" */ |
michael@0 | 56 | |
michael@0 | 57 | #define kSomeNumberOfBitsDiv2 22 |
michael@0 | 58 | #define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2) |
michael@0 | 59 | #define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble) |
michael@0 | 60 | |
michael@0 | 61 | U_NAMESPACE_BEGIN |
michael@0 | 62 | |
michael@0 | 63 | UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat) |
michael@0 | 64 | |
michael@0 | 65 | /* |
michael@0 | 66 | This is a utility class. It does not use ICU's RTTI. |
michael@0 | 67 | If ICU's RTTI is needed again, you can uncomment the RTTI code and derive from UObject. |
michael@0 | 68 | Please make sure that intltest passes on Windows in Release mode, |
michael@0 | 69 | since the string pooling per compilation unit will mess up how RTTI works. |
michael@0 | 70 | The RTTI code was also removed due to lack of code coverage. |
michael@0 | 71 | */ |
michael@0 | 72 | class LocalizationInfo : public UMemory { |
michael@0 | 73 | protected: |
michael@0 | 74 | virtual ~LocalizationInfo(); |
michael@0 | 75 | uint32_t refcount; |
michael@0 | 76 | |
michael@0 | 77 | public: |
michael@0 | 78 | LocalizationInfo() : refcount(0) {} |
michael@0 | 79 | |
michael@0 | 80 | LocalizationInfo* ref(void) { |
michael@0 | 81 | ++refcount; |
michael@0 | 82 | return this; |
michael@0 | 83 | } |
michael@0 | 84 | |
michael@0 | 85 | LocalizationInfo* unref(void) { |
michael@0 | 86 | if (refcount && --refcount == 0) { |
michael@0 | 87 | delete this; |
michael@0 | 88 | } |
michael@0 | 89 | return NULL; |
michael@0 | 90 | } |
michael@0 | 91 | |
michael@0 | 92 | virtual UBool operator==(const LocalizationInfo* rhs) const; |
michael@0 | 93 | inline UBool operator!=(const LocalizationInfo* rhs) const { return !operator==(rhs); } |
michael@0 | 94 | |
michael@0 | 95 | virtual int32_t getNumberOfRuleSets(void) const = 0; |
michael@0 | 96 | virtual const UChar* getRuleSetName(int32_t index) const = 0; |
michael@0 | 97 | virtual int32_t getNumberOfDisplayLocales(void) const = 0; |
michael@0 | 98 | virtual const UChar* getLocaleName(int32_t index) const = 0; |
michael@0 | 99 | virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const = 0; |
michael@0 | 100 | |
michael@0 | 101 | virtual int32_t indexForLocale(const UChar* locale) const; |
michael@0 | 102 | virtual int32_t indexForRuleSet(const UChar* ruleset) const; |
michael@0 | 103 | |
michael@0 | 104 | // virtual UClassID getDynamicClassID() const = 0; |
michael@0 | 105 | // static UClassID getStaticClassID(void); |
michael@0 | 106 | }; |
michael@0 | 107 | |
michael@0 | 108 | LocalizationInfo::~LocalizationInfo() {} |
michael@0 | 109 | |
michael@0 | 110 | //UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(LocalizationInfo) |
michael@0 | 111 | |
michael@0 | 112 | // if both strings are NULL, this returns TRUE |
michael@0 | 113 | static UBool |
michael@0 | 114 | streq(const UChar* lhs, const UChar* rhs) { |
michael@0 | 115 | if (rhs == lhs) { |
michael@0 | 116 | return TRUE; |
michael@0 | 117 | } |
michael@0 | 118 | if (lhs && rhs) { |
michael@0 | 119 | return u_strcmp(lhs, rhs) == 0; |
michael@0 | 120 | } |
michael@0 | 121 | return FALSE; |
michael@0 | 122 | } |
michael@0 | 123 | |
michael@0 | 124 | UBool |
michael@0 | 125 | LocalizationInfo::operator==(const LocalizationInfo* rhs) const { |
michael@0 | 126 | if (rhs) { |
michael@0 | 127 | if (this == rhs) { |
michael@0 | 128 | return TRUE; |
michael@0 | 129 | } |
michael@0 | 130 | |
michael@0 | 131 | int32_t rsc = getNumberOfRuleSets(); |
michael@0 | 132 | if (rsc == rhs->getNumberOfRuleSets()) { |
michael@0 | 133 | for (int i = 0; i < rsc; ++i) { |
michael@0 | 134 | if (!streq(getRuleSetName(i), rhs->getRuleSetName(i))) { |
michael@0 | 135 | return FALSE; |
michael@0 | 136 | } |
michael@0 | 137 | } |
michael@0 | 138 | int32_t dlc = getNumberOfDisplayLocales(); |
michael@0 | 139 | if (dlc == rhs->getNumberOfDisplayLocales()) { |
michael@0 | 140 | for (int i = 0; i < dlc; ++i) { |
michael@0 | 141 | const UChar* locale = getLocaleName(i); |
michael@0 | 142 | int32_t ix = rhs->indexForLocale(locale); |
michael@0 | 143 | // if no locale, ix is -1, getLocaleName returns null, so streq returns false |
michael@0 | 144 | if (!streq(locale, rhs->getLocaleName(ix))) { |
michael@0 | 145 | return FALSE; |
michael@0 | 146 | } |
michael@0 | 147 | for (int j = 0; j < rsc; ++j) { |
michael@0 | 148 | if (!streq(getDisplayName(i, j), rhs->getDisplayName(ix, j))) { |
michael@0 | 149 | return FALSE; |
michael@0 | 150 | } |
michael@0 | 151 | } |
michael@0 | 152 | } |
michael@0 | 153 | return TRUE; |
michael@0 | 154 | } |
michael@0 | 155 | } |
michael@0 | 156 | } |
michael@0 | 157 | return FALSE; |
michael@0 | 158 | } |
michael@0 | 159 | |
michael@0 | 160 | int32_t |
michael@0 | 161 | LocalizationInfo::indexForLocale(const UChar* locale) const { |
michael@0 | 162 | for (int i = 0; i < getNumberOfDisplayLocales(); ++i) { |
michael@0 | 163 | if (streq(locale, getLocaleName(i))) { |
michael@0 | 164 | return i; |
michael@0 | 165 | } |
michael@0 | 166 | } |
michael@0 | 167 | return -1; |
michael@0 | 168 | } |
michael@0 | 169 | |
michael@0 | 170 | int32_t |
michael@0 | 171 | LocalizationInfo::indexForRuleSet(const UChar* ruleset) const { |
michael@0 | 172 | if (ruleset) { |
michael@0 | 173 | for (int i = 0; i < getNumberOfRuleSets(); ++i) { |
michael@0 | 174 | if (streq(ruleset, getRuleSetName(i))) { |
michael@0 | 175 | return i; |
michael@0 | 176 | } |
michael@0 | 177 | } |
michael@0 | 178 | } |
michael@0 | 179 | return -1; |
michael@0 | 180 | } |
michael@0 | 181 | |
michael@0 | 182 | |
michael@0 | 183 | typedef void (*Fn_Deleter)(void*); |
michael@0 | 184 | |
michael@0 | 185 | class VArray { |
michael@0 | 186 | void** buf; |
michael@0 | 187 | int32_t cap; |
michael@0 | 188 | int32_t size; |
michael@0 | 189 | Fn_Deleter deleter; |
michael@0 | 190 | public: |
michael@0 | 191 | VArray() : buf(NULL), cap(0), size(0), deleter(NULL) {} |
michael@0 | 192 | |
michael@0 | 193 | VArray(Fn_Deleter del) : buf(NULL), cap(0), size(0), deleter(del) {} |
michael@0 | 194 | |
michael@0 | 195 | ~VArray() { |
michael@0 | 196 | if (deleter) { |
michael@0 | 197 | for (int i = 0; i < size; ++i) { |
michael@0 | 198 | (*deleter)(buf[i]); |
michael@0 | 199 | } |
michael@0 | 200 | } |
michael@0 | 201 | uprv_free(buf); |
michael@0 | 202 | } |
michael@0 | 203 | |
michael@0 | 204 | int32_t length() { |
michael@0 | 205 | return size; |
michael@0 | 206 | } |
michael@0 | 207 | |
michael@0 | 208 | void add(void* elem, UErrorCode& status) { |
michael@0 | 209 | if (U_SUCCESS(status)) { |
michael@0 | 210 | if (size == cap) { |
michael@0 | 211 | if (cap == 0) { |
michael@0 | 212 | cap = 1; |
michael@0 | 213 | } else if (cap < 256) { |
michael@0 | 214 | cap *= 2; |
michael@0 | 215 | } else { |
michael@0 | 216 | cap += 256; |
michael@0 | 217 | } |
michael@0 | 218 | if (buf == NULL) { |
michael@0 | 219 | buf = (void**)uprv_malloc(cap * sizeof(void*)); |
michael@0 | 220 | } else { |
michael@0 | 221 | buf = (void**)uprv_realloc(buf, cap * sizeof(void*)); |
michael@0 | 222 | } |
michael@0 | 223 | if (buf == NULL) { |
michael@0 | 224 | // if we couldn't realloc, we leak the memory we've already allocated, but we're in deep trouble anyway |
michael@0 | 225 | status = U_MEMORY_ALLOCATION_ERROR; |
michael@0 | 226 | return; |
michael@0 | 227 | } |
michael@0 | 228 | void* start = &buf[size]; |
michael@0 | 229 | size_t count = (cap - size) * sizeof(void*); |
michael@0 | 230 | uprv_memset(start, 0, count); // fill with nulls, just because |
michael@0 | 231 | } |
michael@0 | 232 | buf[size++] = elem; |
michael@0 | 233 | } |
michael@0 | 234 | } |
michael@0 | 235 | |
michael@0 | 236 | void** release(void) { |
michael@0 | 237 | void** result = buf; |
michael@0 | 238 | buf = NULL; |
michael@0 | 239 | cap = 0; |
michael@0 | 240 | size = 0; |
michael@0 | 241 | return result; |
michael@0 | 242 | } |
michael@0 | 243 | }; |
michael@0 | 244 | |
michael@0 | 245 | class LocDataParser; |
michael@0 | 246 | |
michael@0 | 247 | class StringLocalizationInfo : public LocalizationInfo { |
michael@0 | 248 | UChar* info; |
michael@0 | 249 | UChar*** data; |
michael@0 | 250 | int32_t numRuleSets; |
michael@0 | 251 | int32_t numLocales; |
michael@0 | 252 | |
michael@0 | 253 | friend class LocDataParser; |
michael@0 | 254 | |
michael@0 | 255 | StringLocalizationInfo(UChar* i, UChar*** d, int32_t numRS, int32_t numLocs) |
michael@0 | 256 | : info(i), data(d), numRuleSets(numRS), numLocales(numLocs) |
michael@0 | 257 | { |
michael@0 | 258 | } |
michael@0 | 259 | |
michael@0 | 260 | public: |
michael@0 | 261 | static StringLocalizationInfo* create(const UnicodeString& info, UParseError& perror, UErrorCode& status); |
michael@0 | 262 | |
michael@0 | 263 | virtual ~StringLocalizationInfo(); |
michael@0 | 264 | virtual int32_t getNumberOfRuleSets(void) const { return numRuleSets; } |
michael@0 | 265 | virtual const UChar* getRuleSetName(int32_t index) const; |
michael@0 | 266 | virtual int32_t getNumberOfDisplayLocales(void) const { return numLocales; } |
michael@0 | 267 | virtual const UChar* getLocaleName(int32_t index) const; |
michael@0 | 268 | virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const; |
michael@0 | 269 | |
michael@0 | 270 | // virtual UClassID getDynamicClassID() const; |
michael@0 | 271 | // static UClassID getStaticClassID(void); |
michael@0 | 272 | |
michael@0 | 273 | private: |
michael@0 | 274 | void init(UErrorCode& status) const; |
michael@0 | 275 | }; |
michael@0 | 276 | |
michael@0 | 277 | |
michael@0 | 278 | enum { |
michael@0 | 279 | OPEN_ANGLE = 0x003c, /* '<' */ |
michael@0 | 280 | CLOSE_ANGLE = 0x003e, /* '>' */ |
michael@0 | 281 | COMMA = 0x002c, |
michael@0 | 282 | TICK = 0x0027, |
michael@0 | 283 | QUOTE = 0x0022, |
michael@0 | 284 | SPACE = 0x0020 |
michael@0 | 285 | }; |
michael@0 | 286 | |
michael@0 | 287 | /** |
michael@0 | 288 | * Utility for parsing a localization string and returning a StringLocalizationInfo*. |
michael@0 | 289 | */ |
michael@0 | 290 | class LocDataParser { |
michael@0 | 291 | UChar* data; |
michael@0 | 292 | const UChar* e; |
michael@0 | 293 | UChar* p; |
michael@0 | 294 | UChar ch; |
michael@0 | 295 | UParseError& pe; |
michael@0 | 296 | UErrorCode& ec; |
michael@0 | 297 | |
michael@0 | 298 | public: |
michael@0 | 299 | LocDataParser(UParseError& parseError, UErrorCode& status) |
michael@0 | 300 | : data(NULL), e(NULL), p(NULL), ch(0xffff), pe(parseError), ec(status) {} |
michael@0 | 301 | ~LocDataParser() {} |
michael@0 | 302 | |
michael@0 | 303 | /* |
michael@0 | 304 | * On a successful parse, return a StringLocalizationInfo*, otherwise delete locData, set perror and status, |
michael@0 | 305 | * and return NULL. The StringLocalizationInfo will adopt locData if it is created. |
michael@0 | 306 | */ |
michael@0 | 307 | StringLocalizationInfo* parse(UChar* data, int32_t len); |
michael@0 | 308 | |
michael@0 | 309 | private: |
michael@0 | 310 | |
michael@0 | 311 | void inc(void) { ++p; ch = 0xffff; } |
michael@0 | 312 | UBool checkInc(UChar c) { if (p < e && (ch == c || *p == c)) { inc(); return TRUE; } return FALSE; } |
michael@0 | 313 | UBool check(UChar c) { return p < e && (ch == c || *p == c); } |
michael@0 | 314 | void skipWhitespace(void) { while (p < e && PatternProps::isWhiteSpace(ch != 0xffff ? ch : *p)) inc();} |
michael@0 | 315 | UBool inList(UChar c, const UChar* list) const { |
michael@0 | 316 | if (*list == SPACE && PatternProps::isWhiteSpace(c)) return TRUE; |
michael@0 | 317 | while (*list && *list != c) ++list; return *list == c; |
michael@0 | 318 | } |
michael@0 | 319 | void parseError(const char* msg); |
michael@0 | 320 | |
michael@0 | 321 | StringLocalizationInfo* doParse(void); |
michael@0 | 322 | |
michael@0 | 323 | UChar** nextArray(int32_t& requiredLength); |
michael@0 | 324 | UChar* nextString(void); |
michael@0 | 325 | }; |
michael@0 | 326 | |
michael@0 | 327 | #ifdef DEBUG |
michael@0 | 328 | #define ERROR(msg) parseError(msg); return NULL; |
michael@0 | 329 | #else |
michael@0 | 330 | #define ERROR(msg) parseError(NULL); return NULL; |
michael@0 | 331 | #endif |
michael@0 | 332 | |
michael@0 | 333 | |
michael@0 | 334 | static const UChar DQUOTE_STOPLIST[] = { |
michael@0 | 335 | QUOTE, 0 |
michael@0 | 336 | }; |
michael@0 | 337 | |
michael@0 | 338 | static const UChar SQUOTE_STOPLIST[] = { |
michael@0 | 339 | TICK, 0 |
michael@0 | 340 | }; |
michael@0 | 341 | |
michael@0 | 342 | static const UChar NOQUOTE_STOPLIST[] = { |
michael@0 | 343 | SPACE, COMMA, CLOSE_ANGLE, OPEN_ANGLE, TICK, QUOTE, 0 |
michael@0 | 344 | }; |
michael@0 | 345 | |
michael@0 | 346 | static void |
michael@0 | 347 | DeleteFn(void* p) { |
michael@0 | 348 | uprv_free(p); |
michael@0 | 349 | } |
michael@0 | 350 | |
michael@0 | 351 | StringLocalizationInfo* |
michael@0 | 352 | LocDataParser::parse(UChar* _data, int32_t len) { |
michael@0 | 353 | if (U_FAILURE(ec)) { |
michael@0 | 354 | if (_data) uprv_free(_data); |
michael@0 | 355 | return NULL; |
michael@0 | 356 | } |
michael@0 | 357 | |
michael@0 | 358 | pe.line = 0; |
michael@0 | 359 | pe.offset = -1; |
michael@0 | 360 | pe.postContext[0] = 0; |
michael@0 | 361 | pe.preContext[0] = 0; |
michael@0 | 362 | |
michael@0 | 363 | if (_data == NULL) { |
michael@0 | 364 | ec = U_ILLEGAL_ARGUMENT_ERROR; |
michael@0 | 365 | return NULL; |
michael@0 | 366 | } |
michael@0 | 367 | |
michael@0 | 368 | if (len <= 0) { |
michael@0 | 369 | ec = U_ILLEGAL_ARGUMENT_ERROR; |
michael@0 | 370 | uprv_free(_data); |
michael@0 | 371 | return NULL; |
michael@0 | 372 | } |
michael@0 | 373 | |
michael@0 | 374 | data = _data; |
michael@0 | 375 | e = data + len; |
michael@0 | 376 | p = _data; |
michael@0 | 377 | ch = 0xffff; |
michael@0 | 378 | |
michael@0 | 379 | return doParse(); |
michael@0 | 380 | } |
michael@0 | 381 | |
michael@0 | 382 | |
michael@0 | 383 | StringLocalizationInfo* |
michael@0 | 384 | LocDataParser::doParse(void) { |
michael@0 | 385 | skipWhitespace(); |
michael@0 | 386 | if (!checkInc(OPEN_ANGLE)) { |
michael@0 | 387 | ERROR("Missing open angle"); |
michael@0 | 388 | } else { |
michael@0 | 389 | VArray array(DeleteFn); |
michael@0 | 390 | UBool mightHaveNext = TRUE; |
michael@0 | 391 | int32_t requiredLength = -1; |
michael@0 | 392 | while (mightHaveNext) { |
michael@0 | 393 | mightHaveNext = FALSE; |
michael@0 | 394 | UChar** elem = nextArray(requiredLength); |
michael@0 | 395 | skipWhitespace(); |
michael@0 | 396 | UBool haveComma = check(COMMA); |
michael@0 | 397 | if (elem) { |
michael@0 | 398 | array.add(elem, ec); |
michael@0 | 399 | if (haveComma) { |
michael@0 | 400 | inc(); |
michael@0 | 401 | mightHaveNext = TRUE; |
michael@0 | 402 | } |
michael@0 | 403 | } else if (haveComma) { |
michael@0 | 404 | ERROR("Unexpected character"); |
michael@0 | 405 | } |
michael@0 | 406 | } |
michael@0 | 407 | |
michael@0 | 408 | skipWhitespace(); |
michael@0 | 409 | if (!checkInc(CLOSE_ANGLE)) { |
michael@0 | 410 | if (check(OPEN_ANGLE)) { |
michael@0 | 411 | ERROR("Missing comma in outer array"); |
michael@0 | 412 | } else { |
michael@0 | 413 | ERROR("Missing close angle bracket in outer array"); |
michael@0 | 414 | } |
michael@0 | 415 | } |
michael@0 | 416 | |
michael@0 | 417 | skipWhitespace(); |
michael@0 | 418 | if (p != e) { |
michael@0 | 419 | ERROR("Extra text after close of localization data"); |
michael@0 | 420 | } |
michael@0 | 421 | |
michael@0 | 422 | array.add(NULL, ec); |
michael@0 | 423 | if (U_SUCCESS(ec)) { |
michael@0 | 424 | int32_t numLocs = array.length() - 2; // subtract first, NULL |
michael@0 | 425 | UChar*** result = (UChar***)array.release(); |
michael@0 | 426 | |
michael@0 | 427 | return new StringLocalizationInfo(data, result, requiredLength-2, numLocs); // subtract first, NULL |
michael@0 | 428 | } |
michael@0 | 429 | } |
michael@0 | 430 | |
michael@0 | 431 | ERROR("Unknown error"); |
michael@0 | 432 | } |
michael@0 | 433 | |
michael@0 | 434 | UChar** |
michael@0 | 435 | LocDataParser::nextArray(int32_t& requiredLength) { |
michael@0 | 436 | if (U_FAILURE(ec)) { |
michael@0 | 437 | return NULL; |
michael@0 | 438 | } |
michael@0 | 439 | |
michael@0 | 440 | skipWhitespace(); |
michael@0 | 441 | if (!checkInc(OPEN_ANGLE)) { |
michael@0 | 442 | ERROR("Missing open angle"); |
michael@0 | 443 | } |
michael@0 | 444 | |
michael@0 | 445 | VArray array; |
michael@0 | 446 | UBool mightHaveNext = TRUE; |
michael@0 | 447 | while (mightHaveNext) { |
michael@0 | 448 | mightHaveNext = FALSE; |
michael@0 | 449 | UChar* elem = nextString(); |
michael@0 | 450 | skipWhitespace(); |
michael@0 | 451 | UBool haveComma = check(COMMA); |
michael@0 | 452 | if (elem) { |
michael@0 | 453 | array.add(elem, ec); |
michael@0 | 454 | if (haveComma) { |
michael@0 | 455 | inc(); |
michael@0 | 456 | mightHaveNext = TRUE; |
michael@0 | 457 | } |
michael@0 | 458 | } else if (haveComma) { |
michael@0 | 459 | ERROR("Unexpected comma"); |
michael@0 | 460 | } |
michael@0 | 461 | } |
michael@0 | 462 | skipWhitespace(); |
michael@0 | 463 | if (!checkInc(CLOSE_ANGLE)) { |
michael@0 | 464 | if (check(OPEN_ANGLE)) { |
michael@0 | 465 | ERROR("Missing close angle bracket in inner array"); |
michael@0 | 466 | } else { |
michael@0 | 467 | ERROR("Missing comma in inner array"); |
michael@0 | 468 | } |
michael@0 | 469 | } |
michael@0 | 470 | |
michael@0 | 471 | array.add(NULL, ec); |
michael@0 | 472 | if (U_SUCCESS(ec)) { |
michael@0 | 473 | if (requiredLength == -1) { |
michael@0 | 474 | requiredLength = array.length() + 1; |
michael@0 | 475 | } else if (array.length() != requiredLength) { |
michael@0 | 476 | ec = U_ILLEGAL_ARGUMENT_ERROR; |
michael@0 | 477 | ERROR("Array not of required length"); |
michael@0 | 478 | } |
michael@0 | 479 | |
michael@0 | 480 | return (UChar**)array.release(); |
michael@0 | 481 | } |
michael@0 | 482 | ERROR("Unknown Error"); |
michael@0 | 483 | } |
michael@0 | 484 | |
michael@0 | 485 | UChar* |
michael@0 | 486 | LocDataParser::nextString() { |
michael@0 | 487 | UChar* result = NULL; |
michael@0 | 488 | |
michael@0 | 489 | skipWhitespace(); |
michael@0 | 490 | if (p < e) { |
michael@0 | 491 | const UChar* terminators; |
michael@0 | 492 | UChar c = *p; |
michael@0 | 493 | UBool haveQuote = c == QUOTE || c == TICK; |
michael@0 | 494 | if (haveQuote) { |
michael@0 | 495 | inc(); |
michael@0 | 496 | terminators = c == QUOTE ? DQUOTE_STOPLIST : SQUOTE_STOPLIST; |
michael@0 | 497 | } else { |
michael@0 | 498 | terminators = NOQUOTE_STOPLIST; |
michael@0 | 499 | } |
michael@0 | 500 | UChar* start = p; |
michael@0 | 501 | while (p < e && !inList(*p, terminators)) ++p; |
michael@0 | 502 | if (p == e) { |
michael@0 | 503 | ERROR("Unexpected end of data"); |
michael@0 | 504 | } |
michael@0 | 505 | |
michael@0 | 506 | UChar x = *p; |
michael@0 | 507 | if (p > start) { |
michael@0 | 508 | ch = x; |
michael@0 | 509 | *p = 0x0; // terminate by writing to data |
michael@0 | 510 | result = start; // just point into data |
michael@0 | 511 | } |
michael@0 | 512 | if (haveQuote) { |
michael@0 | 513 | if (x != c) { |
michael@0 | 514 | ERROR("Missing matching quote"); |
michael@0 | 515 | } else if (p == start) { |
michael@0 | 516 | ERROR("Empty string"); |
michael@0 | 517 | } |
michael@0 | 518 | inc(); |
michael@0 | 519 | } else if (x == OPEN_ANGLE || x == TICK || x == QUOTE) { |
michael@0 | 520 | ERROR("Unexpected character in string"); |
michael@0 | 521 | } |
michael@0 | 522 | } |
michael@0 | 523 | |
michael@0 | 524 | // ok for there to be no next string |
michael@0 | 525 | return result; |
michael@0 | 526 | } |
michael@0 | 527 | |
michael@0 | 528 | void |
michael@0 | 529 | LocDataParser::parseError(const char* /*str*/) { |
michael@0 | 530 | if (!data) { |
michael@0 | 531 | return; |
michael@0 | 532 | } |
michael@0 | 533 | |
michael@0 | 534 | const UChar* start = p - U_PARSE_CONTEXT_LEN - 1; |
michael@0 | 535 | if (start < data) { |
michael@0 | 536 | start = data; |
michael@0 | 537 | } |
michael@0 | 538 | for (UChar* x = p; --x >= start;) { |
michael@0 | 539 | if (!*x) { |
michael@0 | 540 | start = x+1; |
michael@0 | 541 | break; |
michael@0 | 542 | } |
michael@0 | 543 | } |
michael@0 | 544 | const UChar* limit = p + U_PARSE_CONTEXT_LEN - 1; |
michael@0 | 545 | if (limit > e) { |
michael@0 | 546 | limit = e; |
michael@0 | 547 | } |
michael@0 | 548 | u_strncpy(pe.preContext, start, (int32_t)(p-start)); |
michael@0 | 549 | pe.preContext[p-start] = 0; |
michael@0 | 550 | u_strncpy(pe.postContext, p, (int32_t)(limit-p)); |
michael@0 | 551 | pe.postContext[limit-p] = 0; |
michael@0 | 552 | pe.offset = (int32_t)(p - data); |
michael@0 | 553 | |
michael@0 | 554 | #ifdef DEBUG |
michael@0 | 555 | fprintf(stderr, "%s at or near character %d: ", str, p-data); |
michael@0 | 556 | |
michael@0 | 557 | UnicodeString msg; |
michael@0 | 558 | msg.append(start, p - start); |
michael@0 | 559 | msg.append((UChar)0x002f); /* SOLIDUS/SLASH */ |
michael@0 | 560 | msg.append(p, limit-p); |
michael@0 | 561 | msg.append("'"); |
michael@0 | 562 | |
michael@0 | 563 | char buf[128]; |
michael@0 | 564 | int32_t len = msg.extract(0, msg.length(), buf, 128); |
michael@0 | 565 | if (len >= 128) { |
michael@0 | 566 | buf[127] = 0; |
michael@0 | 567 | } else { |
michael@0 | 568 | buf[len] = 0; |
michael@0 | 569 | } |
michael@0 | 570 | fprintf(stderr, "%s\n", buf); |
michael@0 | 571 | fflush(stderr); |
michael@0 | 572 | #endif |
michael@0 | 573 | |
michael@0 | 574 | uprv_free(data); |
michael@0 | 575 | data = NULL; |
michael@0 | 576 | p = NULL; |
michael@0 | 577 | e = NULL; |
michael@0 | 578 | |
michael@0 | 579 | if (U_SUCCESS(ec)) { |
michael@0 | 580 | ec = U_PARSE_ERROR; |
michael@0 | 581 | } |
michael@0 | 582 | } |
michael@0 | 583 | |
michael@0 | 584 | //UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringLocalizationInfo) |
michael@0 | 585 | |
michael@0 | 586 | StringLocalizationInfo* |
michael@0 | 587 | StringLocalizationInfo::create(const UnicodeString& info, UParseError& perror, UErrorCode& status) { |
michael@0 | 588 | if (U_FAILURE(status)) { |
michael@0 | 589 | return NULL; |
michael@0 | 590 | } |
michael@0 | 591 | |
michael@0 | 592 | int32_t len = info.length(); |
michael@0 | 593 | if (len == 0) { |
michael@0 | 594 | return NULL; // no error; |
michael@0 | 595 | } |
michael@0 | 596 | |
michael@0 | 597 | UChar* p = (UChar*)uprv_malloc(len * sizeof(UChar)); |
michael@0 | 598 | if (!p) { |
michael@0 | 599 | status = U_MEMORY_ALLOCATION_ERROR; |
michael@0 | 600 | return NULL; |
michael@0 | 601 | } |
michael@0 | 602 | info.extract(p, len, status); |
michael@0 | 603 | if (!U_FAILURE(status)) { |
michael@0 | 604 | status = U_ZERO_ERROR; // clear warning about non-termination |
michael@0 | 605 | } |
michael@0 | 606 | |
michael@0 | 607 | LocDataParser parser(perror, status); |
michael@0 | 608 | return parser.parse(p, len); |
michael@0 | 609 | } |
michael@0 | 610 | |
michael@0 | 611 | StringLocalizationInfo::~StringLocalizationInfo() { |
michael@0 | 612 | for (UChar*** p = (UChar***)data; *p; ++p) { |
michael@0 | 613 | // remaining data is simply pointer into our unicode string data. |
michael@0 | 614 | if (*p) uprv_free(*p); |
michael@0 | 615 | } |
michael@0 | 616 | if (data) uprv_free(data); |
michael@0 | 617 | if (info) uprv_free(info); |
michael@0 | 618 | } |
michael@0 | 619 | |
michael@0 | 620 | |
michael@0 | 621 | const UChar* |
michael@0 | 622 | StringLocalizationInfo::getRuleSetName(int32_t index) const { |
michael@0 | 623 | if (index >= 0 && index < getNumberOfRuleSets()) { |
michael@0 | 624 | return data[0][index]; |
michael@0 | 625 | } |
michael@0 | 626 | return NULL; |
michael@0 | 627 | } |
michael@0 | 628 | |
michael@0 | 629 | const UChar* |
michael@0 | 630 | StringLocalizationInfo::getLocaleName(int32_t index) const { |
michael@0 | 631 | if (index >= 0 && index < getNumberOfDisplayLocales()) { |
michael@0 | 632 | return data[index+1][0]; |
michael@0 | 633 | } |
michael@0 | 634 | return NULL; |
michael@0 | 635 | } |
michael@0 | 636 | |
michael@0 | 637 | const UChar* |
michael@0 | 638 | StringLocalizationInfo::getDisplayName(int32_t localeIndex, int32_t ruleIndex) const { |
michael@0 | 639 | if (localeIndex >= 0 && localeIndex < getNumberOfDisplayLocales() && |
michael@0 | 640 | ruleIndex >= 0 && ruleIndex < getNumberOfRuleSets()) { |
michael@0 | 641 | return data[localeIndex+1][ruleIndex+1]; |
michael@0 | 642 | } |
michael@0 | 643 | return NULL; |
michael@0 | 644 | } |
michael@0 | 645 | |
michael@0 | 646 | // ---------- |
michael@0 | 647 | |
michael@0 | 648 | RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, |
michael@0 | 649 | const UnicodeString& locs, |
michael@0 | 650 | const Locale& alocale, UParseError& perror, UErrorCode& status) |
michael@0 | 651 | : ruleSets(NULL) |
michael@0 | 652 | , ruleSetDescriptions(NULL) |
michael@0 | 653 | , numRuleSets(0) |
michael@0 | 654 | , defaultRuleSet(NULL) |
michael@0 | 655 | , locale(alocale) |
michael@0 | 656 | , collator(NULL) |
michael@0 | 657 | , decimalFormatSymbols(NULL) |
michael@0 | 658 | , lenient(FALSE) |
michael@0 | 659 | , lenientParseRules(NULL) |
michael@0 | 660 | , localizations(NULL) |
michael@0 | 661 | { |
michael@0 | 662 | LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status); |
michael@0 | 663 | init(description, locinfo, perror, status); |
michael@0 | 664 | } |
michael@0 | 665 | |
michael@0 | 666 | RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, |
michael@0 | 667 | const UnicodeString& locs, |
michael@0 | 668 | UParseError& perror, UErrorCode& status) |
michael@0 | 669 | : ruleSets(NULL) |
michael@0 | 670 | , ruleSetDescriptions(NULL) |
michael@0 | 671 | , numRuleSets(0) |
michael@0 | 672 | , defaultRuleSet(NULL) |
michael@0 | 673 | , locale(Locale::getDefault()) |
michael@0 | 674 | , collator(NULL) |
michael@0 | 675 | , decimalFormatSymbols(NULL) |
michael@0 | 676 | , lenient(FALSE) |
michael@0 | 677 | , lenientParseRules(NULL) |
michael@0 | 678 | , localizations(NULL) |
michael@0 | 679 | { |
michael@0 | 680 | LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status); |
michael@0 | 681 | init(description, locinfo, perror, status); |
michael@0 | 682 | } |
michael@0 | 683 | |
michael@0 | 684 | RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, |
michael@0 | 685 | LocalizationInfo* info, |
michael@0 | 686 | const Locale& alocale, UParseError& perror, UErrorCode& status) |
michael@0 | 687 | : ruleSets(NULL) |
michael@0 | 688 | , ruleSetDescriptions(NULL) |
michael@0 | 689 | , numRuleSets(0) |
michael@0 | 690 | , defaultRuleSet(NULL) |
michael@0 | 691 | , locale(alocale) |
michael@0 | 692 | , collator(NULL) |
michael@0 | 693 | , decimalFormatSymbols(NULL) |
michael@0 | 694 | , lenient(FALSE) |
michael@0 | 695 | , lenientParseRules(NULL) |
michael@0 | 696 | , localizations(NULL) |
michael@0 | 697 | { |
michael@0 | 698 | init(description, info, perror, status); |
michael@0 | 699 | } |
michael@0 | 700 | |
michael@0 | 701 | RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, |
michael@0 | 702 | UParseError& perror, |
michael@0 | 703 | UErrorCode& status) |
michael@0 | 704 | : ruleSets(NULL) |
michael@0 | 705 | , ruleSetDescriptions(NULL) |
michael@0 | 706 | , numRuleSets(0) |
michael@0 | 707 | , defaultRuleSet(NULL) |
michael@0 | 708 | , locale(Locale::getDefault()) |
michael@0 | 709 | , collator(NULL) |
michael@0 | 710 | , decimalFormatSymbols(NULL) |
michael@0 | 711 | , lenient(FALSE) |
michael@0 | 712 | , lenientParseRules(NULL) |
michael@0 | 713 | , localizations(NULL) |
michael@0 | 714 | { |
michael@0 | 715 | init(description, NULL, perror, status); |
michael@0 | 716 | } |
michael@0 | 717 | |
michael@0 | 718 | RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, |
michael@0 | 719 | const Locale& aLocale, |
michael@0 | 720 | UParseError& perror, |
michael@0 | 721 | UErrorCode& status) |
michael@0 | 722 | : ruleSets(NULL) |
michael@0 | 723 | , ruleSetDescriptions(NULL) |
michael@0 | 724 | , numRuleSets(0) |
michael@0 | 725 | , defaultRuleSet(NULL) |
michael@0 | 726 | , locale(aLocale) |
michael@0 | 727 | , collator(NULL) |
michael@0 | 728 | , decimalFormatSymbols(NULL) |
michael@0 | 729 | , lenient(FALSE) |
michael@0 | 730 | , lenientParseRules(NULL) |
michael@0 | 731 | , localizations(NULL) |
michael@0 | 732 | { |
michael@0 | 733 | init(description, NULL, perror, status); |
michael@0 | 734 | } |
michael@0 | 735 | |
michael@0 | 736 | RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& alocale, UErrorCode& status) |
michael@0 | 737 | : ruleSets(NULL) |
michael@0 | 738 | , ruleSetDescriptions(NULL) |
michael@0 | 739 | , numRuleSets(0) |
michael@0 | 740 | , defaultRuleSet(NULL) |
michael@0 | 741 | , locale(alocale) |
michael@0 | 742 | , collator(NULL) |
michael@0 | 743 | , decimalFormatSymbols(NULL) |
michael@0 | 744 | , lenient(FALSE) |
michael@0 | 745 | , lenientParseRules(NULL) |
michael@0 | 746 | , localizations(NULL) |
michael@0 | 747 | { |
michael@0 | 748 | if (U_FAILURE(status)) { |
michael@0 | 749 | return; |
michael@0 | 750 | } |
michael@0 | 751 | |
michael@0 | 752 | const char* rules_tag = "RBNFRules"; |
michael@0 | 753 | const char* fmt_tag = ""; |
michael@0 | 754 | switch (tag) { |
michael@0 | 755 | case URBNF_SPELLOUT: fmt_tag = "SpelloutRules"; break; |
michael@0 | 756 | case URBNF_ORDINAL: fmt_tag = "OrdinalRules"; break; |
michael@0 | 757 | case URBNF_DURATION: fmt_tag = "DurationRules"; break; |
michael@0 | 758 | case URBNF_NUMBERING_SYSTEM: fmt_tag = "NumberingSystemRules"; break; |
michael@0 | 759 | default: status = U_ILLEGAL_ARGUMENT_ERROR; return; |
michael@0 | 760 | } |
michael@0 | 761 | |
michael@0 | 762 | // TODO: read localization info from resource |
michael@0 | 763 | LocalizationInfo* locinfo = NULL; |
michael@0 | 764 | |
michael@0 | 765 | UResourceBundle* nfrb = ures_open(U_ICUDATA_RBNF, locale.getName(), &status); |
michael@0 | 766 | if (U_SUCCESS(status)) { |
michael@0 | 767 | setLocaleIDs(ures_getLocaleByType(nfrb, ULOC_VALID_LOCALE, &status), |
michael@0 | 768 | ures_getLocaleByType(nfrb, ULOC_ACTUAL_LOCALE, &status)); |
michael@0 | 769 | |
michael@0 | 770 | UResourceBundle* rbnfRules = ures_getByKeyWithFallback(nfrb, rules_tag, NULL, &status); |
michael@0 | 771 | if (U_FAILURE(status)) { |
michael@0 | 772 | ures_close(nfrb); |
michael@0 | 773 | } |
michael@0 | 774 | UResourceBundle* ruleSets = ures_getByKeyWithFallback(rbnfRules, fmt_tag, NULL, &status); |
michael@0 | 775 | if (U_FAILURE(status)) { |
michael@0 | 776 | ures_close(rbnfRules); |
michael@0 | 777 | ures_close(nfrb); |
michael@0 | 778 | return; |
michael@0 | 779 | } |
michael@0 | 780 | |
michael@0 | 781 | UnicodeString desc; |
michael@0 | 782 | while (ures_hasNext(ruleSets)) { |
michael@0 | 783 | desc.append(ures_getNextUnicodeString(ruleSets,NULL,&status)); |
michael@0 | 784 | } |
michael@0 | 785 | UParseError perror; |
michael@0 | 786 | |
michael@0 | 787 | init (desc, locinfo, perror, status); |
michael@0 | 788 | |
michael@0 | 789 | ures_close(ruleSets); |
michael@0 | 790 | ures_close(rbnfRules); |
michael@0 | 791 | } |
michael@0 | 792 | ures_close(nfrb); |
michael@0 | 793 | } |
michael@0 | 794 | |
michael@0 | 795 | RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs) |
michael@0 | 796 | : NumberFormat(rhs) |
michael@0 | 797 | , ruleSets(NULL) |
michael@0 | 798 | , ruleSetDescriptions(NULL) |
michael@0 | 799 | , numRuleSets(0) |
michael@0 | 800 | , defaultRuleSet(NULL) |
michael@0 | 801 | , locale(rhs.locale) |
michael@0 | 802 | , collator(NULL) |
michael@0 | 803 | , decimalFormatSymbols(NULL) |
michael@0 | 804 | , lenient(FALSE) |
michael@0 | 805 | , lenientParseRules(NULL) |
michael@0 | 806 | , localizations(NULL) |
michael@0 | 807 | { |
michael@0 | 808 | this->operator=(rhs); |
michael@0 | 809 | } |
michael@0 | 810 | |
michael@0 | 811 | // -------- |
michael@0 | 812 | |
michael@0 | 813 | RuleBasedNumberFormat& |
michael@0 | 814 | RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs) |
michael@0 | 815 | { |
michael@0 | 816 | UErrorCode status = U_ZERO_ERROR; |
michael@0 | 817 | dispose(); |
michael@0 | 818 | locale = rhs.locale; |
michael@0 | 819 | lenient = rhs.lenient; |
michael@0 | 820 | |
michael@0 | 821 | UnicodeString rules = rhs.getRules(); |
michael@0 | 822 | UParseError perror; |
michael@0 | 823 | init(rules, rhs.localizations ? rhs.localizations->ref() : NULL, perror, status); |
michael@0 | 824 | |
michael@0 | 825 | return *this; |
michael@0 | 826 | } |
michael@0 | 827 | |
michael@0 | 828 | RuleBasedNumberFormat::~RuleBasedNumberFormat() |
michael@0 | 829 | { |
michael@0 | 830 | dispose(); |
michael@0 | 831 | } |
michael@0 | 832 | |
michael@0 | 833 | Format* |
michael@0 | 834 | RuleBasedNumberFormat::clone(void) const |
michael@0 | 835 | { |
michael@0 | 836 | RuleBasedNumberFormat * result = NULL; |
michael@0 | 837 | UnicodeString rules = getRules(); |
michael@0 | 838 | UErrorCode status = U_ZERO_ERROR; |
michael@0 | 839 | UParseError perror; |
michael@0 | 840 | result = new RuleBasedNumberFormat(rules, localizations, locale, perror, status); |
michael@0 | 841 | /* test for NULL */ |
michael@0 | 842 | if (result == 0) { |
michael@0 | 843 | status = U_MEMORY_ALLOCATION_ERROR; |
michael@0 | 844 | return 0; |
michael@0 | 845 | } |
michael@0 | 846 | if (U_FAILURE(status)) { |
michael@0 | 847 | delete result; |
michael@0 | 848 | result = 0; |
michael@0 | 849 | } else { |
michael@0 | 850 | result->lenient = lenient; |
michael@0 | 851 | } |
michael@0 | 852 | return result; |
michael@0 | 853 | } |
michael@0 | 854 | |
michael@0 | 855 | UBool |
michael@0 | 856 | RuleBasedNumberFormat::operator==(const Format& other) const |
michael@0 | 857 | { |
michael@0 | 858 | if (this == &other) { |
michael@0 | 859 | return TRUE; |
michael@0 | 860 | } |
michael@0 | 861 | |
michael@0 | 862 | if (typeid(*this) == typeid(other)) { |
michael@0 | 863 | const RuleBasedNumberFormat& rhs = (const RuleBasedNumberFormat&)other; |
michael@0 | 864 | if (locale == rhs.locale && |
michael@0 | 865 | lenient == rhs.lenient && |
michael@0 | 866 | (localizations == NULL |
michael@0 | 867 | ? rhs.localizations == NULL |
michael@0 | 868 | : (rhs.localizations == NULL |
michael@0 | 869 | ? FALSE |
michael@0 | 870 | : *localizations == rhs.localizations))) { |
michael@0 | 871 | |
michael@0 | 872 | NFRuleSet** p = ruleSets; |
michael@0 | 873 | NFRuleSet** q = rhs.ruleSets; |
michael@0 | 874 | if (p == NULL) { |
michael@0 | 875 | return q == NULL; |
michael@0 | 876 | } else if (q == NULL) { |
michael@0 | 877 | return FALSE; |
michael@0 | 878 | } |
michael@0 | 879 | while (*p && *q && (**p == **q)) { |
michael@0 | 880 | ++p; |
michael@0 | 881 | ++q; |
michael@0 | 882 | } |
michael@0 | 883 | return *q == NULL && *p == NULL; |
michael@0 | 884 | } |
michael@0 | 885 | } |
michael@0 | 886 | |
michael@0 | 887 | return FALSE; |
michael@0 | 888 | } |
michael@0 | 889 | |
michael@0 | 890 | UnicodeString |
michael@0 | 891 | RuleBasedNumberFormat::getRules() const |
michael@0 | 892 | { |
michael@0 | 893 | UnicodeString result; |
michael@0 | 894 | if (ruleSets != NULL) { |
michael@0 | 895 | for (NFRuleSet** p = ruleSets; *p; ++p) { |
michael@0 | 896 | (*p)->appendRules(result); |
michael@0 | 897 | } |
michael@0 | 898 | } |
michael@0 | 899 | return result; |
michael@0 | 900 | } |
michael@0 | 901 | |
michael@0 | 902 | UnicodeString |
michael@0 | 903 | RuleBasedNumberFormat::getRuleSetName(int32_t index) const |
michael@0 | 904 | { |
michael@0 | 905 | if (localizations) { |
michael@0 | 906 | UnicodeString string(TRUE, localizations->getRuleSetName(index), (int32_t)-1); |
michael@0 | 907 | return string; |
michael@0 | 908 | } else if (ruleSets) { |
michael@0 | 909 | UnicodeString result; |
michael@0 | 910 | for (NFRuleSet** p = ruleSets; *p; ++p) { |
michael@0 | 911 | NFRuleSet* rs = *p; |
michael@0 | 912 | if (rs->isPublic()) { |
michael@0 | 913 | if (--index == -1) { |
michael@0 | 914 | rs->getName(result); |
michael@0 | 915 | return result; |
michael@0 | 916 | } |
michael@0 | 917 | } |
michael@0 | 918 | } |
michael@0 | 919 | } |
michael@0 | 920 | UnicodeString empty; |
michael@0 | 921 | return empty; |
michael@0 | 922 | } |
michael@0 | 923 | |
michael@0 | 924 | int32_t |
michael@0 | 925 | RuleBasedNumberFormat::getNumberOfRuleSetNames() const |
michael@0 | 926 | { |
michael@0 | 927 | int32_t result = 0; |
michael@0 | 928 | if (localizations) { |
michael@0 | 929 | result = localizations->getNumberOfRuleSets(); |
michael@0 | 930 | } else if (ruleSets) { |
michael@0 | 931 | for (NFRuleSet** p = ruleSets; *p; ++p) { |
michael@0 | 932 | if ((**p).isPublic()) { |
michael@0 | 933 | ++result; |
michael@0 | 934 | } |
michael@0 | 935 | } |
michael@0 | 936 | } |
michael@0 | 937 | return result; |
michael@0 | 938 | } |
michael@0 | 939 | |
michael@0 | 940 | int32_t |
michael@0 | 941 | RuleBasedNumberFormat::getNumberOfRuleSetDisplayNameLocales(void) const { |
michael@0 | 942 | if (localizations) { |
michael@0 | 943 | return localizations->getNumberOfDisplayLocales(); |
michael@0 | 944 | } |
michael@0 | 945 | return 0; |
michael@0 | 946 | } |
michael@0 | 947 | |
michael@0 | 948 | Locale |
michael@0 | 949 | RuleBasedNumberFormat::getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const { |
michael@0 | 950 | if (U_FAILURE(status)) { |
michael@0 | 951 | return Locale(""); |
michael@0 | 952 | } |
michael@0 | 953 | if (localizations && index >= 0 && index < localizations->getNumberOfDisplayLocales()) { |
michael@0 | 954 | UnicodeString name(TRUE, localizations->getLocaleName(index), -1); |
michael@0 | 955 | char buffer[64]; |
michael@0 | 956 | int32_t cap = name.length() + 1; |
michael@0 | 957 | char* bp = buffer; |
michael@0 | 958 | if (cap > 64) { |
michael@0 | 959 | bp = (char *)uprv_malloc(cap); |
michael@0 | 960 | if (bp == NULL) { |
michael@0 | 961 | status = U_MEMORY_ALLOCATION_ERROR; |
michael@0 | 962 | return Locale(""); |
michael@0 | 963 | } |
michael@0 | 964 | } |
michael@0 | 965 | name.extract(0, name.length(), bp, cap, UnicodeString::kInvariant); |
michael@0 | 966 | Locale retLocale(bp); |
michael@0 | 967 | if (bp != buffer) { |
michael@0 | 968 | uprv_free(bp); |
michael@0 | 969 | } |
michael@0 | 970 | return retLocale; |
michael@0 | 971 | } |
michael@0 | 972 | status = U_ILLEGAL_ARGUMENT_ERROR; |
michael@0 | 973 | Locale retLocale; |
michael@0 | 974 | return retLocale; |
michael@0 | 975 | } |
michael@0 | 976 | |
michael@0 | 977 | UnicodeString |
michael@0 | 978 | RuleBasedNumberFormat::getRuleSetDisplayName(int32_t index, const Locale& localeParam) { |
michael@0 | 979 | if (localizations && index >= 0 && index < localizations->getNumberOfRuleSets()) { |
michael@0 | 980 | UnicodeString localeName(localeParam.getBaseName(), -1, UnicodeString::kInvariant); |
michael@0 | 981 | int32_t len = localeName.length(); |
michael@0 | 982 | UChar* localeStr = localeName.getBuffer(len + 1); |
michael@0 | 983 | while (len >= 0) { |
michael@0 | 984 | localeStr[len] = 0; |
michael@0 | 985 | int32_t ix = localizations->indexForLocale(localeStr); |
michael@0 | 986 | if (ix >= 0) { |
michael@0 | 987 | UnicodeString name(TRUE, localizations->getDisplayName(ix, index), -1); |
michael@0 | 988 | return name; |
michael@0 | 989 | } |
michael@0 | 990 | |
michael@0 | 991 | // trim trailing portion, skipping over ommitted sections |
michael@0 | 992 | do { --len;} while (len > 0 && localeStr[len] != 0x005f); // underscore |
michael@0 | 993 | while (len > 0 && localeStr[len-1] == 0x005F) --len; |
michael@0 | 994 | } |
michael@0 | 995 | UnicodeString name(TRUE, localizations->getRuleSetName(index), -1); |
michael@0 | 996 | return name; |
michael@0 | 997 | } |
michael@0 | 998 | UnicodeString bogus; |
michael@0 | 999 | bogus.setToBogus(); |
michael@0 | 1000 | return bogus; |
michael@0 | 1001 | } |
michael@0 | 1002 | |
michael@0 | 1003 | UnicodeString |
michael@0 | 1004 | RuleBasedNumberFormat::getRuleSetDisplayName(const UnicodeString& ruleSetName, const Locale& localeParam) { |
michael@0 | 1005 | if (localizations) { |
michael@0 | 1006 | UnicodeString rsn(ruleSetName); |
michael@0 | 1007 | int32_t ix = localizations->indexForRuleSet(rsn.getTerminatedBuffer()); |
michael@0 | 1008 | return getRuleSetDisplayName(ix, localeParam); |
michael@0 | 1009 | } |
michael@0 | 1010 | UnicodeString bogus; |
michael@0 | 1011 | bogus.setToBogus(); |
michael@0 | 1012 | return bogus; |
michael@0 | 1013 | } |
michael@0 | 1014 | |
michael@0 | 1015 | NFRuleSet* |
michael@0 | 1016 | RuleBasedNumberFormat::findRuleSet(const UnicodeString& name, UErrorCode& status) const |
michael@0 | 1017 | { |
michael@0 | 1018 | if (U_SUCCESS(status) && ruleSets) { |
michael@0 | 1019 | for (NFRuleSet** p = ruleSets; *p; ++p) { |
michael@0 | 1020 | NFRuleSet* rs = *p; |
michael@0 | 1021 | if (rs->isNamed(name)) { |
michael@0 | 1022 | return rs; |
michael@0 | 1023 | } |
michael@0 | 1024 | } |
michael@0 | 1025 | status = U_ILLEGAL_ARGUMENT_ERROR; |
michael@0 | 1026 | } |
michael@0 | 1027 | return NULL; |
michael@0 | 1028 | } |
michael@0 | 1029 | |
michael@0 | 1030 | UnicodeString& |
michael@0 | 1031 | RuleBasedNumberFormat::format(int32_t number, |
michael@0 | 1032 | UnicodeString& toAppendTo, |
michael@0 | 1033 | FieldPosition& /* pos */) const |
michael@0 | 1034 | { |
michael@0 | 1035 | if (defaultRuleSet) defaultRuleSet->format((int64_t)number, toAppendTo, toAppendTo.length()); |
michael@0 | 1036 | return toAppendTo; |
michael@0 | 1037 | } |
michael@0 | 1038 | |
michael@0 | 1039 | |
michael@0 | 1040 | UnicodeString& |
michael@0 | 1041 | RuleBasedNumberFormat::format(int64_t number, |
michael@0 | 1042 | UnicodeString& toAppendTo, |
michael@0 | 1043 | FieldPosition& /* pos */) const |
michael@0 | 1044 | { |
michael@0 | 1045 | if (defaultRuleSet) defaultRuleSet->format(number, toAppendTo, toAppendTo.length()); |
michael@0 | 1046 | return toAppendTo; |
michael@0 | 1047 | } |
michael@0 | 1048 | |
michael@0 | 1049 | |
michael@0 | 1050 | UnicodeString& |
michael@0 | 1051 | RuleBasedNumberFormat::format(double number, |
michael@0 | 1052 | UnicodeString& toAppendTo, |
michael@0 | 1053 | FieldPosition& /* pos */) const |
michael@0 | 1054 | { |
michael@0 | 1055 | // Special case for NaN; adapted from what DecimalFormat::_format( double number,...) does. |
michael@0 | 1056 | if (uprv_isNaN(number)) { |
michael@0 | 1057 | DecimalFormatSymbols* decFmtSyms = getDecimalFormatSymbols(); // RuleBasedNumberFormat internal |
michael@0 | 1058 | if (decFmtSyms) { |
michael@0 | 1059 | toAppendTo += decFmtSyms->getConstSymbol(DecimalFormatSymbols::kNaNSymbol); |
michael@0 | 1060 | } |
michael@0 | 1061 | } else if (defaultRuleSet) { |
michael@0 | 1062 | defaultRuleSet->format(number, toAppendTo, toAppendTo.length()); |
michael@0 | 1063 | } |
michael@0 | 1064 | return toAppendTo; |
michael@0 | 1065 | } |
michael@0 | 1066 | |
michael@0 | 1067 | |
michael@0 | 1068 | UnicodeString& |
michael@0 | 1069 | RuleBasedNumberFormat::format(int32_t number, |
michael@0 | 1070 | const UnicodeString& ruleSetName, |
michael@0 | 1071 | UnicodeString& toAppendTo, |
michael@0 | 1072 | FieldPosition& /* pos */, |
michael@0 | 1073 | UErrorCode& status) const |
michael@0 | 1074 | { |
michael@0 | 1075 | // return format((int64_t)number, ruleSetName, toAppendTo, pos, status); |
michael@0 | 1076 | if (U_SUCCESS(status)) { |
michael@0 | 1077 | if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) { |
michael@0 | 1078 | // throw new IllegalArgumentException("Can't use internal rule set"); |
michael@0 | 1079 | status = U_ILLEGAL_ARGUMENT_ERROR; |
michael@0 | 1080 | } else { |
michael@0 | 1081 | NFRuleSet *rs = findRuleSet(ruleSetName, status); |
michael@0 | 1082 | if (rs) { |
michael@0 | 1083 | rs->format((int64_t)number, toAppendTo, toAppendTo.length()); |
michael@0 | 1084 | } |
michael@0 | 1085 | } |
michael@0 | 1086 | } |
michael@0 | 1087 | return toAppendTo; |
michael@0 | 1088 | } |
michael@0 | 1089 | |
michael@0 | 1090 | |
michael@0 | 1091 | UnicodeString& |
michael@0 | 1092 | RuleBasedNumberFormat::format(int64_t number, |
michael@0 | 1093 | const UnicodeString& ruleSetName, |
michael@0 | 1094 | UnicodeString& toAppendTo, |
michael@0 | 1095 | FieldPosition& /* pos */, |
michael@0 | 1096 | UErrorCode& status) const |
michael@0 | 1097 | { |
michael@0 | 1098 | if (U_SUCCESS(status)) { |
michael@0 | 1099 | if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) { |
michael@0 | 1100 | // throw new IllegalArgumentException("Can't use internal rule set"); |
michael@0 | 1101 | status = U_ILLEGAL_ARGUMENT_ERROR; |
michael@0 | 1102 | } else { |
michael@0 | 1103 | NFRuleSet *rs = findRuleSet(ruleSetName, status); |
michael@0 | 1104 | if (rs) { |
michael@0 | 1105 | rs->format(number, toAppendTo, toAppendTo.length()); |
michael@0 | 1106 | } |
michael@0 | 1107 | } |
michael@0 | 1108 | } |
michael@0 | 1109 | return toAppendTo; |
michael@0 | 1110 | } |
michael@0 | 1111 | |
michael@0 | 1112 | |
michael@0 | 1113 | UnicodeString& |
michael@0 | 1114 | RuleBasedNumberFormat::format(double number, |
michael@0 | 1115 | const UnicodeString& ruleSetName, |
michael@0 | 1116 | UnicodeString& toAppendTo, |
michael@0 | 1117 | FieldPosition& /* pos */, |
michael@0 | 1118 | UErrorCode& status) const |
michael@0 | 1119 | { |
michael@0 | 1120 | if (U_SUCCESS(status)) { |
michael@0 | 1121 | if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) { |
michael@0 | 1122 | // throw new IllegalArgumentException("Can't use internal rule set"); |
michael@0 | 1123 | status = U_ILLEGAL_ARGUMENT_ERROR; |
michael@0 | 1124 | } else { |
michael@0 | 1125 | NFRuleSet *rs = findRuleSet(ruleSetName, status); |
michael@0 | 1126 | if (rs) { |
michael@0 | 1127 | rs->format(number, toAppendTo, toAppendTo.length()); |
michael@0 | 1128 | } |
michael@0 | 1129 | } |
michael@0 | 1130 | } |
michael@0 | 1131 | return toAppendTo; |
michael@0 | 1132 | } |
michael@0 | 1133 | |
michael@0 | 1134 | void |
michael@0 | 1135 | RuleBasedNumberFormat::parse(const UnicodeString& text, |
michael@0 | 1136 | Formattable& result, |
michael@0 | 1137 | ParsePosition& parsePosition) const |
michael@0 | 1138 | { |
michael@0 | 1139 | if (!ruleSets) { |
michael@0 | 1140 | parsePosition.setErrorIndex(0); |
michael@0 | 1141 | return; |
michael@0 | 1142 | } |
michael@0 | 1143 | |
michael@0 | 1144 | UnicodeString workingText(text, parsePosition.getIndex()); |
michael@0 | 1145 | ParsePosition workingPos(0); |
michael@0 | 1146 | |
michael@0 | 1147 | ParsePosition high_pp(0); |
michael@0 | 1148 | Formattable high_result; |
michael@0 | 1149 | |
michael@0 | 1150 | for (NFRuleSet** p = ruleSets; *p; ++p) { |
michael@0 | 1151 | NFRuleSet *rp = *p; |
michael@0 | 1152 | if (rp->isPublic() && rp->isParseable()) { |
michael@0 | 1153 | ParsePosition working_pp(0); |
michael@0 | 1154 | Formattable working_result; |
michael@0 | 1155 | |
michael@0 | 1156 | rp->parse(workingText, working_pp, kMaxDouble, working_result); |
michael@0 | 1157 | if (working_pp.getIndex() > high_pp.getIndex()) { |
michael@0 | 1158 | high_pp = working_pp; |
michael@0 | 1159 | high_result = working_result; |
michael@0 | 1160 | |
michael@0 | 1161 | if (high_pp.getIndex() == workingText.length()) { |
michael@0 | 1162 | break; |
michael@0 | 1163 | } |
michael@0 | 1164 | } |
michael@0 | 1165 | } |
michael@0 | 1166 | } |
michael@0 | 1167 | |
michael@0 | 1168 | int32_t startIndex = parsePosition.getIndex(); |
michael@0 | 1169 | parsePosition.setIndex(startIndex + high_pp.getIndex()); |
michael@0 | 1170 | if (high_pp.getIndex() > 0) { |
michael@0 | 1171 | parsePosition.setErrorIndex(-1); |
michael@0 | 1172 | } else { |
michael@0 | 1173 | int32_t errorIndex = (high_pp.getErrorIndex()>0)? high_pp.getErrorIndex(): 0; |
michael@0 | 1174 | parsePosition.setErrorIndex(startIndex + errorIndex); |
michael@0 | 1175 | } |
michael@0 | 1176 | result = high_result; |
michael@0 | 1177 | if (result.getType() == Formattable::kDouble) { |
michael@0 | 1178 | int32_t r = (int32_t)result.getDouble(); |
michael@0 | 1179 | if ((double)r == result.getDouble()) { |
michael@0 | 1180 | result.setLong(r); |
michael@0 | 1181 | } |
michael@0 | 1182 | } |
michael@0 | 1183 | } |
michael@0 | 1184 | |
michael@0 | 1185 | #if !UCONFIG_NO_COLLATION |
michael@0 | 1186 | |
michael@0 | 1187 | void |
michael@0 | 1188 | RuleBasedNumberFormat::setLenient(UBool enabled) |
michael@0 | 1189 | { |
michael@0 | 1190 | lenient = enabled; |
michael@0 | 1191 | if (!enabled && collator) { |
michael@0 | 1192 | delete collator; |
michael@0 | 1193 | collator = NULL; |
michael@0 | 1194 | } |
michael@0 | 1195 | } |
michael@0 | 1196 | |
michael@0 | 1197 | #endif |
michael@0 | 1198 | |
michael@0 | 1199 | void |
michael@0 | 1200 | RuleBasedNumberFormat::setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status) { |
michael@0 | 1201 | if (U_SUCCESS(status)) { |
michael@0 | 1202 | if (ruleSetName.isEmpty()) { |
michael@0 | 1203 | if (localizations) { |
michael@0 | 1204 | UnicodeString name(TRUE, localizations->getRuleSetName(0), -1); |
michael@0 | 1205 | defaultRuleSet = findRuleSet(name, status); |
michael@0 | 1206 | } else { |
michael@0 | 1207 | initDefaultRuleSet(); |
michael@0 | 1208 | } |
michael@0 | 1209 | } else if (ruleSetName.startsWith(UNICODE_STRING_SIMPLE("%%"))) { |
michael@0 | 1210 | status = U_ILLEGAL_ARGUMENT_ERROR; |
michael@0 | 1211 | } else { |
michael@0 | 1212 | NFRuleSet* result = findRuleSet(ruleSetName, status); |
michael@0 | 1213 | if (result != NULL) { |
michael@0 | 1214 | defaultRuleSet = result; |
michael@0 | 1215 | } |
michael@0 | 1216 | } |
michael@0 | 1217 | } |
michael@0 | 1218 | } |
michael@0 | 1219 | |
michael@0 | 1220 | UnicodeString |
michael@0 | 1221 | RuleBasedNumberFormat::getDefaultRuleSetName() const { |
michael@0 | 1222 | UnicodeString result; |
michael@0 | 1223 | if (defaultRuleSet && defaultRuleSet->isPublic()) { |
michael@0 | 1224 | defaultRuleSet->getName(result); |
michael@0 | 1225 | } else { |
michael@0 | 1226 | result.setToBogus(); |
michael@0 | 1227 | } |
michael@0 | 1228 | return result; |
michael@0 | 1229 | } |
michael@0 | 1230 | |
michael@0 | 1231 | void |
michael@0 | 1232 | RuleBasedNumberFormat::initDefaultRuleSet() |
michael@0 | 1233 | { |
michael@0 | 1234 | defaultRuleSet = NULL; |
michael@0 | 1235 | if (!ruleSets) { |
michael@0 | 1236 | return; |
michael@0 | 1237 | } |
michael@0 | 1238 | |
michael@0 | 1239 | const UnicodeString spellout = UNICODE_STRING_SIMPLE("%spellout-numbering"); |
michael@0 | 1240 | const UnicodeString ordinal = UNICODE_STRING_SIMPLE("%digits-ordinal"); |
michael@0 | 1241 | const UnicodeString duration = UNICODE_STRING_SIMPLE("%duration"); |
michael@0 | 1242 | |
michael@0 | 1243 | NFRuleSet**p = &ruleSets[0]; |
michael@0 | 1244 | while (*p) { |
michael@0 | 1245 | if ((*p)->isNamed(spellout) || (*p)->isNamed(ordinal) || (*p)->isNamed(duration)) { |
michael@0 | 1246 | defaultRuleSet = *p; |
michael@0 | 1247 | return; |
michael@0 | 1248 | } else { |
michael@0 | 1249 | ++p; |
michael@0 | 1250 | } |
michael@0 | 1251 | } |
michael@0 | 1252 | |
michael@0 | 1253 | defaultRuleSet = *--p; |
michael@0 | 1254 | if (!defaultRuleSet->isPublic()) { |
michael@0 | 1255 | while (p != ruleSets) { |
michael@0 | 1256 | if ((*--p)->isPublic()) { |
michael@0 | 1257 | defaultRuleSet = *p; |
michael@0 | 1258 | break; |
michael@0 | 1259 | } |
michael@0 | 1260 | } |
michael@0 | 1261 | } |
michael@0 | 1262 | } |
michael@0 | 1263 | |
michael@0 | 1264 | |
michael@0 | 1265 | void |
michael@0 | 1266 | RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* localizationInfos, |
michael@0 | 1267 | UParseError& pErr, UErrorCode& status) |
michael@0 | 1268 | { |
michael@0 | 1269 | // TODO: implement UParseError |
michael@0 | 1270 | uprv_memset(&pErr, 0, sizeof(UParseError)); |
michael@0 | 1271 | // Note: this can leave ruleSets == NULL, so remaining code should check |
michael@0 | 1272 | if (U_FAILURE(status)) { |
michael@0 | 1273 | return; |
michael@0 | 1274 | } |
michael@0 | 1275 | |
michael@0 | 1276 | this->localizations = localizationInfos == NULL ? NULL : localizationInfos->ref(); |
michael@0 | 1277 | |
michael@0 | 1278 | UnicodeString description(rules); |
michael@0 | 1279 | if (!description.length()) { |
michael@0 | 1280 | status = U_MEMORY_ALLOCATION_ERROR; |
michael@0 | 1281 | return; |
michael@0 | 1282 | } |
michael@0 | 1283 | |
michael@0 | 1284 | // start by stripping the trailing whitespace from all the rules |
michael@0 | 1285 | // (this is all the whitespace follwing each semicolon in the |
michael@0 | 1286 | // description). This allows us to look for rule-set boundaries |
michael@0 | 1287 | // by searching for ";%" without having to worry about whitespace |
michael@0 | 1288 | // between the ; and the % |
michael@0 | 1289 | stripWhitespace(description); |
michael@0 | 1290 | |
michael@0 | 1291 | // check to see if there's a set of lenient-parse rules. If there |
michael@0 | 1292 | // is, pull them out into our temporary holding place for them, |
michael@0 | 1293 | // and delete them from the description before the real desciption- |
michael@0 | 1294 | // parsing code sees them |
michael@0 | 1295 | int32_t lp = description.indexOf(gLenientParse, -1, 0); |
michael@0 | 1296 | if (lp != -1) { |
michael@0 | 1297 | // we've got to make sure we're not in the middle of a rule |
michael@0 | 1298 | // (where "%%lenient-parse" would actually get treated as |
michael@0 | 1299 | // rule text) |
michael@0 | 1300 | if (lp == 0 || description.charAt(lp - 1) == gSemiColon) { |
michael@0 | 1301 | // locate the beginning and end of the actual collation |
michael@0 | 1302 | // rules (there may be whitespace between the name and |
michael@0 | 1303 | // the first token in the description) |
michael@0 | 1304 | int lpEnd = description.indexOf(gSemiPercent, 2, lp); |
michael@0 | 1305 | |
michael@0 | 1306 | if (lpEnd == -1) { |
michael@0 | 1307 | lpEnd = description.length() - 1; |
michael@0 | 1308 | } |
michael@0 | 1309 | int lpStart = lp + u_strlen(gLenientParse); |
michael@0 | 1310 | while (PatternProps::isWhiteSpace(description.charAt(lpStart))) { |
michael@0 | 1311 | ++lpStart; |
michael@0 | 1312 | } |
michael@0 | 1313 | |
michael@0 | 1314 | // copy out the lenient-parse rules and delete them |
michael@0 | 1315 | // from the description |
michael@0 | 1316 | lenientParseRules = new UnicodeString(); |
michael@0 | 1317 | /* test for NULL */ |
michael@0 | 1318 | if (lenientParseRules == 0) { |
michael@0 | 1319 | status = U_MEMORY_ALLOCATION_ERROR; |
michael@0 | 1320 | return; |
michael@0 | 1321 | } |
michael@0 | 1322 | lenientParseRules->setTo(description, lpStart, lpEnd - lpStart); |
michael@0 | 1323 | |
michael@0 | 1324 | description.remove(lp, lpEnd + 1 - lp); |
michael@0 | 1325 | } |
michael@0 | 1326 | } |
michael@0 | 1327 | |
michael@0 | 1328 | // pre-flight parsing the description and count the number of |
michael@0 | 1329 | // rule sets (";%" marks the end of one rule set and the beginning |
michael@0 | 1330 | // of the next) |
michael@0 | 1331 | numRuleSets = 0; |
michael@0 | 1332 | for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, p)) { |
michael@0 | 1333 | ++numRuleSets; |
michael@0 | 1334 | ++p; |
michael@0 | 1335 | } |
michael@0 | 1336 | ++numRuleSets; |
michael@0 | 1337 | |
michael@0 | 1338 | // our rule list is an array of the appropriate size |
michael@0 | 1339 | ruleSets = (NFRuleSet **)uprv_malloc((numRuleSets + 1) * sizeof(NFRuleSet *)); |
michael@0 | 1340 | /* test for NULL */ |
michael@0 | 1341 | if (ruleSets == 0) { |
michael@0 | 1342 | status = U_MEMORY_ALLOCATION_ERROR; |
michael@0 | 1343 | return; |
michael@0 | 1344 | } |
michael@0 | 1345 | |
michael@0 | 1346 | for (int i = 0; i <= numRuleSets; ++i) { |
michael@0 | 1347 | ruleSets[i] = NULL; |
michael@0 | 1348 | } |
michael@0 | 1349 | |
michael@0 | 1350 | // divide up the descriptions into individual rule-set descriptions |
michael@0 | 1351 | // and store them in a temporary array. At each step, we also |
michael@0 | 1352 | // new up a rule set, but all this does is initialize its name |
michael@0 | 1353 | // and remove it from its description. We can't actually parse |
michael@0 | 1354 | // the rest of the descriptions and finish initializing everything |
michael@0 | 1355 | // because we have to know the names and locations of all the rule |
michael@0 | 1356 | // sets before we can actually set everything up |
michael@0 | 1357 | if(!numRuleSets) { |
michael@0 | 1358 | status = U_ILLEGAL_ARGUMENT_ERROR; |
michael@0 | 1359 | return; |
michael@0 | 1360 | } |
michael@0 | 1361 | |
michael@0 | 1362 | ruleSetDescriptions = new UnicodeString[numRuleSets]; |
michael@0 | 1363 | if (ruleSetDescriptions == 0) { |
michael@0 | 1364 | status = U_MEMORY_ALLOCATION_ERROR; |
michael@0 | 1365 | return; |
michael@0 | 1366 | } |
michael@0 | 1367 | |
michael@0 | 1368 | { |
michael@0 | 1369 | int curRuleSet = 0; |
michael@0 | 1370 | int32_t start = 0; |
michael@0 | 1371 | for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, start)) { |
michael@0 | 1372 | ruleSetDescriptions[curRuleSet].setTo(description, start, p + 1 - start); |
michael@0 | 1373 | ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status); |
michael@0 | 1374 | if (ruleSets[curRuleSet] == 0) { |
michael@0 | 1375 | status = U_MEMORY_ALLOCATION_ERROR; |
michael@0 | 1376 | return; |
michael@0 | 1377 | } |
michael@0 | 1378 | ++curRuleSet; |
michael@0 | 1379 | start = p + 1; |
michael@0 | 1380 | } |
michael@0 | 1381 | ruleSetDescriptions[curRuleSet].setTo(description, start, description.length() - start); |
michael@0 | 1382 | ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status); |
michael@0 | 1383 | if (ruleSets[curRuleSet] == 0) { |
michael@0 | 1384 | status = U_MEMORY_ALLOCATION_ERROR; |
michael@0 | 1385 | return; |
michael@0 | 1386 | } |
michael@0 | 1387 | } |
michael@0 | 1388 | |
michael@0 | 1389 | // now we can take note of the formatter's default rule set, which |
michael@0 | 1390 | // is the last public rule set in the description (it's the last |
michael@0 | 1391 | // rather than the first so that a user can create a new formatter |
michael@0 | 1392 | // from an existing formatter and change its default behavior just |
michael@0 | 1393 | // by appending more rule sets to the end) |
michael@0 | 1394 | |
michael@0 | 1395 | // {dlf} Initialization of a fraction rule set requires the default rule |
michael@0 | 1396 | // set to be known. For purposes of initialization, this is always the |
michael@0 | 1397 | // last public rule set, no matter what the localization data says. |
michael@0 | 1398 | initDefaultRuleSet(); |
michael@0 | 1399 | |
michael@0 | 1400 | // finally, we can go back through the temporary descriptions |
michael@0 | 1401 | // list and finish seting up the substructure (and we throw |
michael@0 | 1402 | // away the temporary descriptions as we go) |
michael@0 | 1403 | { |
michael@0 | 1404 | for (int i = 0; i < numRuleSets; i++) { |
michael@0 | 1405 | ruleSets[i]->parseRules(ruleSetDescriptions[i], this, status); |
michael@0 | 1406 | } |
michael@0 | 1407 | } |
michael@0 | 1408 | |
michael@0 | 1409 | // Now that the rules are initialized, the 'real' default rule |
michael@0 | 1410 | // set can be adjusted by the localization data. |
michael@0 | 1411 | |
michael@0 | 1412 | // The C code keeps the localization array as is, rather than building |
michael@0 | 1413 | // a separate array of the public rule set names, so we have less work |
michael@0 | 1414 | // to do here-- but we still need to check the names. |
michael@0 | 1415 | |
michael@0 | 1416 | if (localizationInfos) { |
michael@0 | 1417 | // confirm the names, if any aren't in the rules, that's an error |
michael@0 | 1418 | // it is ok if the rules contain public rule sets that are not in this list |
michael@0 | 1419 | for (int32_t i = 0; i < localizationInfos->getNumberOfRuleSets(); ++i) { |
michael@0 | 1420 | UnicodeString name(TRUE, localizationInfos->getRuleSetName(i), -1); |
michael@0 | 1421 | NFRuleSet* rs = findRuleSet(name, status); |
michael@0 | 1422 | if (rs == NULL) { |
michael@0 | 1423 | break; // error |
michael@0 | 1424 | } |
michael@0 | 1425 | if (i == 0) { |
michael@0 | 1426 | defaultRuleSet = rs; |
michael@0 | 1427 | } |
michael@0 | 1428 | } |
michael@0 | 1429 | } else { |
michael@0 | 1430 | defaultRuleSet = getDefaultRuleSet(); |
michael@0 | 1431 | } |
michael@0 | 1432 | } |
michael@0 | 1433 | |
michael@0 | 1434 | void |
michael@0 | 1435 | RuleBasedNumberFormat::stripWhitespace(UnicodeString& description) |
michael@0 | 1436 | { |
michael@0 | 1437 | // iterate through the characters... |
michael@0 | 1438 | UnicodeString result; |
michael@0 | 1439 | |
michael@0 | 1440 | int start = 0; |
michael@0 | 1441 | while (start != -1 && start < description.length()) { |
michael@0 | 1442 | // seek to the first non-whitespace character... |
michael@0 | 1443 | while (start < description.length() |
michael@0 | 1444 | && PatternProps::isWhiteSpace(description.charAt(start))) { |
michael@0 | 1445 | ++start; |
michael@0 | 1446 | } |
michael@0 | 1447 | |
michael@0 | 1448 | // locate the next semicolon in the text and copy the text from |
michael@0 | 1449 | // our current position up to that semicolon into the result |
michael@0 | 1450 | int32_t p = description.indexOf(gSemiColon, start); |
michael@0 | 1451 | if (p == -1) { |
michael@0 | 1452 | // or if we don't find a semicolon, just copy the rest of |
michael@0 | 1453 | // the string into the result |
michael@0 | 1454 | result.append(description, start, description.length() - start); |
michael@0 | 1455 | start = -1; |
michael@0 | 1456 | } |
michael@0 | 1457 | else if (p < description.length()) { |
michael@0 | 1458 | result.append(description, start, p + 1 - start); |
michael@0 | 1459 | start = p + 1; |
michael@0 | 1460 | } |
michael@0 | 1461 | |
michael@0 | 1462 | // when we get here, we've seeked off the end of the sring, and |
michael@0 | 1463 | // we terminate the loop (we continue until *start* is -1 rather |
michael@0 | 1464 | // than until *p* is -1, because otherwise we'd miss the last |
michael@0 | 1465 | // rule in the description) |
michael@0 | 1466 | else { |
michael@0 | 1467 | start = -1; |
michael@0 | 1468 | } |
michael@0 | 1469 | } |
michael@0 | 1470 | |
michael@0 | 1471 | description.setTo(result); |
michael@0 | 1472 | } |
michael@0 | 1473 | |
michael@0 | 1474 | |
michael@0 | 1475 | void |
michael@0 | 1476 | RuleBasedNumberFormat::dispose() |
michael@0 | 1477 | { |
michael@0 | 1478 | if (ruleSets) { |
michael@0 | 1479 | for (NFRuleSet** p = ruleSets; *p; ++p) { |
michael@0 | 1480 | delete *p; |
michael@0 | 1481 | } |
michael@0 | 1482 | uprv_free(ruleSets); |
michael@0 | 1483 | ruleSets = NULL; |
michael@0 | 1484 | } |
michael@0 | 1485 | |
michael@0 | 1486 | if (ruleSetDescriptions) { |
michael@0 | 1487 | delete [] ruleSetDescriptions; |
michael@0 | 1488 | } |
michael@0 | 1489 | |
michael@0 | 1490 | #if !UCONFIG_NO_COLLATION |
michael@0 | 1491 | delete collator; |
michael@0 | 1492 | #endif |
michael@0 | 1493 | collator = NULL; |
michael@0 | 1494 | |
michael@0 | 1495 | delete decimalFormatSymbols; |
michael@0 | 1496 | decimalFormatSymbols = NULL; |
michael@0 | 1497 | |
michael@0 | 1498 | delete lenientParseRules; |
michael@0 | 1499 | lenientParseRules = NULL; |
michael@0 | 1500 | |
michael@0 | 1501 | if (localizations) localizations = localizations->unref(); |
michael@0 | 1502 | } |
michael@0 | 1503 | |
michael@0 | 1504 | |
michael@0 | 1505 | //----------------------------------------------------------------------- |
michael@0 | 1506 | // package-internal API |
michael@0 | 1507 | //----------------------------------------------------------------------- |
michael@0 | 1508 | |
michael@0 | 1509 | /** |
michael@0 | 1510 | * Returns the collator to use for lenient parsing. The collator is lazily created: |
michael@0 | 1511 | * this function creates it the first time it's called. |
michael@0 | 1512 | * @return The collator to use for lenient parsing, or null if lenient parsing |
michael@0 | 1513 | * is turned off. |
michael@0 | 1514 | */ |
michael@0 | 1515 | Collator* |
michael@0 | 1516 | RuleBasedNumberFormat::getCollator() const |
michael@0 | 1517 | { |
michael@0 | 1518 | #if !UCONFIG_NO_COLLATION |
michael@0 | 1519 | if (!ruleSets) { |
michael@0 | 1520 | return NULL; |
michael@0 | 1521 | } |
michael@0 | 1522 | |
michael@0 | 1523 | // lazy-evaulate the collator |
michael@0 | 1524 | if (collator == NULL && lenient) { |
michael@0 | 1525 | // create a default collator based on the formatter's locale, |
michael@0 | 1526 | // then pull out that collator's rules, append any additional |
michael@0 | 1527 | // rules specified in the description, and create a _new_ |
michael@0 | 1528 | // collator based on the combinaiton of those rules |
michael@0 | 1529 | |
michael@0 | 1530 | UErrorCode status = U_ZERO_ERROR; |
michael@0 | 1531 | |
michael@0 | 1532 | Collator* temp = Collator::createInstance(locale, status); |
michael@0 | 1533 | RuleBasedCollator* newCollator; |
michael@0 | 1534 | if (U_SUCCESS(status) && (newCollator = dynamic_cast<RuleBasedCollator*>(temp)) != NULL) { |
michael@0 | 1535 | if (lenientParseRules) { |
michael@0 | 1536 | UnicodeString rules(newCollator->getRules()); |
michael@0 | 1537 | rules.append(*lenientParseRules); |
michael@0 | 1538 | |
michael@0 | 1539 | newCollator = new RuleBasedCollator(rules, status); |
michael@0 | 1540 | // Exit if newCollator could not be created. |
michael@0 | 1541 | if (newCollator == NULL) { |
michael@0 | 1542 | return NULL; |
michael@0 | 1543 | } |
michael@0 | 1544 | } else { |
michael@0 | 1545 | temp = NULL; |
michael@0 | 1546 | } |
michael@0 | 1547 | if (U_SUCCESS(status)) { |
michael@0 | 1548 | newCollator->setAttribute(UCOL_DECOMPOSITION_MODE, UCOL_ON, status); |
michael@0 | 1549 | // cast away const |
michael@0 | 1550 | ((RuleBasedNumberFormat*)this)->collator = newCollator; |
michael@0 | 1551 | } else { |
michael@0 | 1552 | delete newCollator; |
michael@0 | 1553 | } |
michael@0 | 1554 | } |
michael@0 | 1555 | delete temp; |
michael@0 | 1556 | } |
michael@0 | 1557 | #endif |
michael@0 | 1558 | |
michael@0 | 1559 | // if lenient-parse mode is off, this will be null |
michael@0 | 1560 | // (see setLenientParseMode()) |
michael@0 | 1561 | return collator; |
michael@0 | 1562 | } |
michael@0 | 1563 | |
michael@0 | 1564 | |
michael@0 | 1565 | /** |
michael@0 | 1566 | * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat |
michael@0 | 1567 | * instances owned by this formatter. This object is lazily created: this function |
michael@0 | 1568 | * creates it the first time it's called. |
michael@0 | 1569 | * @return The DecimalFormatSymbols object that should be used by all DecimalFormat |
michael@0 | 1570 | * instances owned by this formatter. |
michael@0 | 1571 | */ |
michael@0 | 1572 | DecimalFormatSymbols* |
michael@0 | 1573 | RuleBasedNumberFormat::getDecimalFormatSymbols() const |
michael@0 | 1574 | { |
michael@0 | 1575 | // lazy-evaluate the DecimalFormatSymbols object. This object |
michael@0 | 1576 | // is shared by all DecimalFormat instances belonging to this |
michael@0 | 1577 | // formatter |
michael@0 | 1578 | if (decimalFormatSymbols == NULL) { |
michael@0 | 1579 | UErrorCode status = U_ZERO_ERROR; |
michael@0 | 1580 | DecimalFormatSymbols* temp = new DecimalFormatSymbols(locale, status); |
michael@0 | 1581 | if (U_SUCCESS(status)) { |
michael@0 | 1582 | ((RuleBasedNumberFormat*)this)->decimalFormatSymbols = temp; |
michael@0 | 1583 | } else { |
michael@0 | 1584 | delete temp; |
michael@0 | 1585 | } |
michael@0 | 1586 | } |
michael@0 | 1587 | return decimalFormatSymbols; |
michael@0 | 1588 | } |
michael@0 | 1589 | |
michael@0 | 1590 | // De-owning the current localized symbols and adopt the new symbols. |
michael@0 | 1591 | void |
michael@0 | 1592 | RuleBasedNumberFormat::adoptDecimalFormatSymbols(DecimalFormatSymbols* symbolsToAdopt) |
michael@0 | 1593 | { |
michael@0 | 1594 | if (symbolsToAdopt == NULL) { |
michael@0 | 1595 | return; // do not allow caller to set decimalFormatSymbols to NULL |
michael@0 | 1596 | } |
michael@0 | 1597 | |
michael@0 | 1598 | if (decimalFormatSymbols != NULL) { |
michael@0 | 1599 | delete decimalFormatSymbols; |
michael@0 | 1600 | } |
michael@0 | 1601 | |
michael@0 | 1602 | decimalFormatSymbols = symbolsToAdopt; |
michael@0 | 1603 | |
michael@0 | 1604 | { |
michael@0 | 1605 | // Apply the new decimalFormatSymbols by reparsing the rulesets |
michael@0 | 1606 | UErrorCode status = U_ZERO_ERROR; |
michael@0 | 1607 | |
michael@0 | 1608 | for (int32_t i = 0; i < numRuleSets; i++) { |
michael@0 | 1609 | ruleSets[i]->parseRules(ruleSetDescriptions[i], this, status); |
michael@0 | 1610 | } |
michael@0 | 1611 | } |
michael@0 | 1612 | } |
michael@0 | 1613 | |
michael@0 | 1614 | // Setting the symbols is equlivalent to adopting a newly created localized symbols. |
michael@0 | 1615 | void |
michael@0 | 1616 | RuleBasedNumberFormat::setDecimalFormatSymbols(const DecimalFormatSymbols& symbols) |
michael@0 | 1617 | { |
michael@0 | 1618 | adoptDecimalFormatSymbols(new DecimalFormatSymbols(symbols)); |
michael@0 | 1619 | } |
michael@0 | 1620 | |
michael@0 | 1621 | U_NAMESPACE_END |
michael@0 | 1622 | |
michael@0 | 1623 | /* U_HAVE_RBNF */ |
michael@0 | 1624 | #endif |