michael@0: /* michael@0: ******************************************************************************* michael@0: * Copyright (C) 1997-2013, International Business Machines Corporation michael@0: * and others. All Rights Reserved. michael@0: ******************************************************************************* michael@0: */ michael@0: michael@0: #include "utypeinfo.h" // for 'typeid' to work michael@0: michael@0: #include "unicode/rbnf.h" michael@0: michael@0: #if U_HAVE_RBNF michael@0: michael@0: #include "unicode/normlzr.h" michael@0: #include "unicode/tblcoll.h" michael@0: #include "unicode/uchar.h" michael@0: #include "unicode/ucol.h" michael@0: #include "unicode/uloc.h" michael@0: #include "unicode/unum.h" michael@0: #include "unicode/ures.h" michael@0: #include "unicode/ustring.h" michael@0: #include "unicode/utf16.h" michael@0: #include "unicode/udata.h" michael@0: #include "nfrs.h" michael@0: michael@0: #include "cmemory.h" michael@0: #include "cstring.h" michael@0: #include "patternprops.h" michael@0: #include "uresimp.h" michael@0: michael@0: // debugging michael@0: // #define DEBUG michael@0: michael@0: #ifdef DEBUG michael@0: #include "stdio.h" michael@0: #endif michael@0: michael@0: #define U_ICUDATA_RBNF U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "rbnf" michael@0: michael@0: static const UChar gPercentPercent[] = michael@0: { michael@0: 0x25, 0x25, 0 michael@0: }; /* "%%" */ michael@0: michael@0: // All urbnf objects are created through openRules, so we init all of the michael@0: // Unicode string constants required by rbnf, nfrs, or nfr here. michael@0: static const UChar gLenientParse[] = michael@0: { michael@0: 0x25, 0x25, 0x6C, 0x65, 0x6E, 0x69, 0x65, 0x6E, 0x74, 0x2D, 0x70, 0x61, 0x72, 0x73, 0x65, 0x3A, 0 michael@0: }; /* "%%lenient-parse:" */ michael@0: static const UChar gSemiColon = 0x003B; michael@0: static const UChar gSemiPercent[] = michael@0: { michael@0: 0x3B, 0x25, 0 michael@0: }; /* ";%" */ michael@0: michael@0: #define kSomeNumberOfBitsDiv2 22 michael@0: #define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2) michael@0: #define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble) michael@0: michael@0: U_NAMESPACE_BEGIN michael@0: michael@0: UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat) michael@0: michael@0: /* michael@0: This is a utility class. It does not use ICU's RTTI. michael@0: If ICU's RTTI is needed again, you can uncomment the RTTI code and derive from UObject. michael@0: Please make sure that intltest passes on Windows in Release mode, michael@0: since the string pooling per compilation unit will mess up how RTTI works. michael@0: The RTTI code was also removed due to lack of code coverage. michael@0: */ michael@0: class LocalizationInfo : public UMemory { michael@0: protected: michael@0: virtual ~LocalizationInfo(); michael@0: uint32_t refcount; michael@0: michael@0: public: michael@0: LocalizationInfo() : refcount(0) {} michael@0: michael@0: LocalizationInfo* ref(void) { michael@0: ++refcount; michael@0: return this; michael@0: } michael@0: michael@0: LocalizationInfo* unref(void) { michael@0: if (refcount && --refcount == 0) { michael@0: delete this; michael@0: } michael@0: return NULL; michael@0: } michael@0: michael@0: virtual UBool operator==(const LocalizationInfo* rhs) const; michael@0: inline UBool operator!=(const LocalizationInfo* rhs) const { return !operator==(rhs); } michael@0: michael@0: virtual int32_t getNumberOfRuleSets(void) const = 0; michael@0: virtual const UChar* getRuleSetName(int32_t index) const = 0; michael@0: virtual int32_t getNumberOfDisplayLocales(void) const = 0; michael@0: virtual const UChar* getLocaleName(int32_t index) const = 0; michael@0: virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const = 0; michael@0: michael@0: virtual int32_t indexForLocale(const UChar* locale) const; michael@0: virtual int32_t indexForRuleSet(const UChar* ruleset) const; michael@0: michael@0: // virtual UClassID getDynamicClassID() const = 0; michael@0: // static UClassID getStaticClassID(void); michael@0: }; michael@0: michael@0: LocalizationInfo::~LocalizationInfo() {} michael@0: michael@0: //UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(LocalizationInfo) michael@0: michael@0: // if both strings are NULL, this returns TRUE michael@0: static UBool michael@0: streq(const UChar* lhs, const UChar* rhs) { michael@0: if (rhs == lhs) { michael@0: return TRUE; michael@0: } michael@0: if (lhs && rhs) { michael@0: return u_strcmp(lhs, rhs) == 0; michael@0: } michael@0: return FALSE; michael@0: } michael@0: michael@0: UBool michael@0: LocalizationInfo::operator==(const LocalizationInfo* rhs) const { michael@0: if (rhs) { michael@0: if (this == rhs) { michael@0: return TRUE; michael@0: } michael@0: michael@0: int32_t rsc = getNumberOfRuleSets(); michael@0: if (rsc == rhs->getNumberOfRuleSets()) { michael@0: for (int i = 0; i < rsc; ++i) { michael@0: if (!streq(getRuleSetName(i), rhs->getRuleSetName(i))) { michael@0: return FALSE; michael@0: } michael@0: } michael@0: int32_t dlc = getNumberOfDisplayLocales(); michael@0: if (dlc == rhs->getNumberOfDisplayLocales()) { michael@0: for (int i = 0; i < dlc; ++i) { michael@0: const UChar* locale = getLocaleName(i); michael@0: int32_t ix = rhs->indexForLocale(locale); michael@0: // if no locale, ix is -1, getLocaleName returns null, so streq returns false michael@0: if (!streq(locale, rhs->getLocaleName(ix))) { michael@0: return FALSE; michael@0: } michael@0: for (int j = 0; j < rsc; ++j) { michael@0: if (!streq(getDisplayName(i, j), rhs->getDisplayName(ix, j))) { michael@0: return FALSE; michael@0: } michael@0: } michael@0: } michael@0: return TRUE; michael@0: } michael@0: } michael@0: } michael@0: return FALSE; michael@0: } michael@0: michael@0: int32_t michael@0: LocalizationInfo::indexForLocale(const UChar* locale) const { michael@0: for (int i = 0; i < getNumberOfDisplayLocales(); ++i) { michael@0: if (streq(locale, getLocaleName(i))) { michael@0: return i; michael@0: } michael@0: } michael@0: return -1; michael@0: } michael@0: michael@0: int32_t michael@0: LocalizationInfo::indexForRuleSet(const UChar* ruleset) const { michael@0: if (ruleset) { michael@0: for (int i = 0; i < getNumberOfRuleSets(); ++i) { michael@0: if (streq(ruleset, getRuleSetName(i))) { michael@0: return i; michael@0: } michael@0: } michael@0: } michael@0: return -1; michael@0: } michael@0: michael@0: michael@0: typedef void (*Fn_Deleter)(void*); michael@0: michael@0: class VArray { michael@0: void** buf; michael@0: int32_t cap; michael@0: int32_t size; michael@0: Fn_Deleter deleter; michael@0: public: michael@0: VArray() : buf(NULL), cap(0), size(0), deleter(NULL) {} michael@0: michael@0: VArray(Fn_Deleter del) : buf(NULL), cap(0), size(0), deleter(del) {} michael@0: michael@0: ~VArray() { michael@0: if (deleter) { michael@0: for (int i = 0; i < size; ++i) { michael@0: (*deleter)(buf[i]); michael@0: } michael@0: } michael@0: uprv_free(buf); michael@0: } michael@0: michael@0: int32_t length() { michael@0: return size; michael@0: } michael@0: michael@0: void add(void* elem, UErrorCode& status) { michael@0: if (U_SUCCESS(status)) { michael@0: if (size == cap) { michael@0: if (cap == 0) { michael@0: cap = 1; michael@0: } else if (cap < 256) { michael@0: cap *= 2; michael@0: } else { michael@0: cap += 256; michael@0: } michael@0: if (buf == NULL) { michael@0: buf = (void**)uprv_malloc(cap * sizeof(void*)); michael@0: } else { michael@0: buf = (void**)uprv_realloc(buf, cap * sizeof(void*)); michael@0: } michael@0: if (buf == NULL) { michael@0: // if we couldn't realloc, we leak the memory we've already allocated, but we're in deep trouble anyway michael@0: status = U_MEMORY_ALLOCATION_ERROR; michael@0: return; michael@0: } michael@0: void* start = &buf[size]; michael@0: size_t count = (cap - size) * sizeof(void*); michael@0: uprv_memset(start, 0, count); // fill with nulls, just because michael@0: } michael@0: buf[size++] = elem; michael@0: } michael@0: } michael@0: michael@0: void** release(void) { michael@0: void** result = buf; michael@0: buf = NULL; michael@0: cap = 0; michael@0: size = 0; michael@0: return result; michael@0: } michael@0: }; michael@0: michael@0: class LocDataParser; michael@0: michael@0: class StringLocalizationInfo : public LocalizationInfo { michael@0: UChar* info; michael@0: UChar*** data; michael@0: int32_t numRuleSets; michael@0: int32_t numLocales; michael@0: michael@0: friend class LocDataParser; michael@0: michael@0: StringLocalizationInfo(UChar* i, UChar*** d, int32_t numRS, int32_t numLocs) michael@0: : info(i), data(d), numRuleSets(numRS), numLocales(numLocs) michael@0: { michael@0: } michael@0: michael@0: public: michael@0: static StringLocalizationInfo* create(const UnicodeString& info, UParseError& perror, UErrorCode& status); michael@0: michael@0: virtual ~StringLocalizationInfo(); michael@0: virtual int32_t getNumberOfRuleSets(void) const { return numRuleSets; } michael@0: virtual const UChar* getRuleSetName(int32_t index) const; michael@0: virtual int32_t getNumberOfDisplayLocales(void) const { return numLocales; } michael@0: virtual const UChar* getLocaleName(int32_t index) const; michael@0: virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const; michael@0: michael@0: // virtual UClassID getDynamicClassID() const; michael@0: // static UClassID getStaticClassID(void); michael@0: michael@0: private: michael@0: void init(UErrorCode& status) const; michael@0: }; michael@0: michael@0: michael@0: enum { michael@0: OPEN_ANGLE = 0x003c, /* '<' */ michael@0: CLOSE_ANGLE = 0x003e, /* '>' */ michael@0: COMMA = 0x002c, michael@0: TICK = 0x0027, michael@0: QUOTE = 0x0022, michael@0: SPACE = 0x0020 michael@0: }; michael@0: michael@0: /** michael@0: * Utility for parsing a localization string and returning a StringLocalizationInfo*. michael@0: */ michael@0: class LocDataParser { michael@0: UChar* data; michael@0: const UChar* e; michael@0: UChar* p; michael@0: UChar ch; michael@0: UParseError& pe; michael@0: UErrorCode& ec; michael@0: michael@0: public: michael@0: LocDataParser(UParseError& parseError, UErrorCode& status) michael@0: : data(NULL), e(NULL), p(NULL), ch(0xffff), pe(parseError), ec(status) {} michael@0: ~LocDataParser() {} michael@0: michael@0: /* michael@0: * On a successful parse, return a StringLocalizationInfo*, otherwise delete locData, set perror and status, michael@0: * and return NULL. The StringLocalizationInfo will adopt locData if it is created. michael@0: */ michael@0: StringLocalizationInfo* parse(UChar* data, int32_t len); michael@0: michael@0: private: michael@0: michael@0: void inc(void) { ++p; ch = 0xffff; } michael@0: UBool checkInc(UChar c) { if (p < e && (ch == c || *p == c)) { inc(); return TRUE; } return FALSE; } michael@0: UBool check(UChar c) { return p < e && (ch == c || *p == c); } michael@0: void skipWhitespace(void) { while (p < e && PatternProps::isWhiteSpace(ch != 0xffff ? ch : *p)) inc();} michael@0: UBool inList(UChar c, const UChar* list) const { michael@0: if (*list == SPACE && PatternProps::isWhiteSpace(c)) return TRUE; michael@0: while (*list && *list != c) ++list; return *list == c; michael@0: } michael@0: void parseError(const char* msg); michael@0: michael@0: StringLocalizationInfo* doParse(void); michael@0: michael@0: UChar** nextArray(int32_t& requiredLength); michael@0: UChar* nextString(void); michael@0: }; michael@0: michael@0: #ifdef DEBUG michael@0: #define ERROR(msg) parseError(msg); return NULL; michael@0: #else michael@0: #define ERROR(msg) parseError(NULL); return NULL; michael@0: #endif michael@0: michael@0: michael@0: static const UChar DQUOTE_STOPLIST[] = { michael@0: QUOTE, 0 michael@0: }; michael@0: michael@0: static const UChar SQUOTE_STOPLIST[] = { michael@0: TICK, 0 michael@0: }; michael@0: michael@0: static const UChar NOQUOTE_STOPLIST[] = { michael@0: SPACE, COMMA, CLOSE_ANGLE, OPEN_ANGLE, TICK, QUOTE, 0 michael@0: }; michael@0: michael@0: static void michael@0: DeleteFn(void* p) { michael@0: uprv_free(p); michael@0: } michael@0: michael@0: StringLocalizationInfo* michael@0: LocDataParser::parse(UChar* _data, int32_t len) { michael@0: if (U_FAILURE(ec)) { michael@0: if (_data) uprv_free(_data); michael@0: return NULL; michael@0: } michael@0: michael@0: pe.line = 0; michael@0: pe.offset = -1; michael@0: pe.postContext[0] = 0; michael@0: pe.preContext[0] = 0; michael@0: michael@0: if (_data == NULL) { michael@0: ec = U_ILLEGAL_ARGUMENT_ERROR; michael@0: return NULL; michael@0: } michael@0: michael@0: if (len <= 0) { michael@0: ec = U_ILLEGAL_ARGUMENT_ERROR; michael@0: uprv_free(_data); michael@0: return NULL; michael@0: } michael@0: michael@0: data = _data; michael@0: e = data + len; michael@0: p = _data; michael@0: ch = 0xffff; michael@0: michael@0: return doParse(); michael@0: } michael@0: michael@0: michael@0: StringLocalizationInfo* michael@0: LocDataParser::doParse(void) { michael@0: skipWhitespace(); michael@0: if (!checkInc(OPEN_ANGLE)) { michael@0: ERROR("Missing open angle"); michael@0: } else { michael@0: VArray array(DeleteFn); michael@0: UBool mightHaveNext = TRUE; michael@0: int32_t requiredLength = -1; michael@0: while (mightHaveNext) { michael@0: mightHaveNext = FALSE; michael@0: UChar** elem = nextArray(requiredLength); michael@0: skipWhitespace(); michael@0: UBool haveComma = check(COMMA); michael@0: if (elem) { michael@0: array.add(elem, ec); michael@0: if (haveComma) { michael@0: inc(); michael@0: mightHaveNext = TRUE; michael@0: } michael@0: } else if (haveComma) { michael@0: ERROR("Unexpected character"); michael@0: } michael@0: } michael@0: michael@0: skipWhitespace(); michael@0: if (!checkInc(CLOSE_ANGLE)) { michael@0: if (check(OPEN_ANGLE)) { michael@0: ERROR("Missing comma in outer array"); michael@0: } else { michael@0: ERROR("Missing close angle bracket in outer array"); michael@0: } michael@0: } michael@0: michael@0: skipWhitespace(); michael@0: if (p != e) { michael@0: ERROR("Extra text after close of localization data"); michael@0: } michael@0: michael@0: array.add(NULL, ec); michael@0: if (U_SUCCESS(ec)) { michael@0: int32_t numLocs = array.length() - 2; // subtract first, NULL michael@0: UChar*** result = (UChar***)array.release(); michael@0: michael@0: return new StringLocalizationInfo(data, result, requiredLength-2, numLocs); // subtract first, NULL michael@0: } michael@0: } michael@0: michael@0: ERROR("Unknown error"); michael@0: } michael@0: michael@0: UChar** michael@0: LocDataParser::nextArray(int32_t& requiredLength) { michael@0: if (U_FAILURE(ec)) { michael@0: return NULL; michael@0: } michael@0: michael@0: skipWhitespace(); michael@0: if (!checkInc(OPEN_ANGLE)) { michael@0: ERROR("Missing open angle"); michael@0: } michael@0: michael@0: VArray array; michael@0: UBool mightHaveNext = TRUE; michael@0: while (mightHaveNext) { michael@0: mightHaveNext = FALSE; michael@0: UChar* elem = nextString(); michael@0: skipWhitespace(); michael@0: UBool haveComma = check(COMMA); michael@0: if (elem) { michael@0: array.add(elem, ec); michael@0: if (haveComma) { michael@0: inc(); michael@0: mightHaveNext = TRUE; michael@0: } michael@0: } else if (haveComma) { michael@0: ERROR("Unexpected comma"); michael@0: } michael@0: } michael@0: skipWhitespace(); michael@0: if (!checkInc(CLOSE_ANGLE)) { michael@0: if (check(OPEN_ANGLE)) { michael@0: ERROR("Missing close angle bracket in inner array"); michael@0: } else { michael@0: ERROR("Missing comma in inner array"); michael@0: } michael@0: } michael@0: michael@0: array.add(NULL, ec); michael@0: if (U_SUCCESS(ec)) { michael@0: if (requiredLength == -1) { michael@0: requiredLength = array.length() + 1; michael@0: } else if (array.length() != requiredLength) { michael@0: ec = U_ILLEGAL_ARGUMENT_ERROR; michael@0: ERROR("Array not of required length"); michael@0: } michael@0: michael@0: return (UChar**)array.release(); michael@0: } michael@0: ERROR("Unknown Error"); michael@0: } michael@0: michael@0: UChar* michael@0: LocDataParser::nextString() { michael@0: UChar* result = NULL; michael@0: michael@0: skipWhitespace(); michael@0: if (p < e) { michael@0: const UChar* terminators; michael@0: UChar c = *p; michael@0: UBool haveQuote = c == QUOTE || c == TICK; michael@0: if (haveQuote) { michael@0: inc(); michael@0: terminators = c == QUOTE ? DQUOTE_STOPLIST : SQUOTE_STOPLIST; michael@0: } else { michael@0: terminators = NOQUOTE_STOPLIST; michael@0: } michael@0: UChar* start = p; michael@0: while (p < e && !inList(*p, terminators)) ++p; michael@0: if (p == e) { michael@0: ERROR("Unexpected end of data"); michael@0: } michael@0: michael@0: UChar x = *p; michael@0: if (p > start) { michael@0: ch = x; michael@0: *p = 0x0; // terminate by writing to data michael@0: result = start; // just point into data michael@0: } michael@0: if (haveQuote) { michael@0: if (x != c) { michael@0: ERROR("Missing matching quote"); michael@0: } else if (p == start) { michael@0: ERROR("Empty string"); michael@0: } michael@0: inc(); michael@0: } else if (x == OPEN_ANGLE || x == TICK || x == QUOTE) { michael@0: ERROR("Unexpected character in string"); michael@0: } michael@0: } michael@0: michael@0: // ok for there to be no next string michael@0: return result; michael@0: } michael@0: michael@0: void michael@0: LocDataParser::parseError(const char* /*str*/) { michael@0: if (!data) { michael@0: return; michael@0: } michael@0: michael@0: const UChar* start = p - U_PARSE_CONTEXT_LEN - 1; michael@0: if (start < data) { michael@0: start = data; michael@0: } michael@0: for (UChar* x = p; --x >= start;) { michael@0: if (!*x) { michael@0: start = x+1; michael@0: break; michael@0: } michael@0: } michael@0: const UChar* limit = p + U_PARSE_CONTEXT_LEN - 1; michael@0: if (limit > e) { michael@0: limit = e; michael@0: } michael@0: u_strncpy(pe.preContext, start, (int32_t)(p-start)); michael@0: pe.preContext[p-start] = 0; michael@0: u_strncpy(pe.postContext, p, (int32_t)(limit-p)); michael@0: pe.postContext[limit-p] = 0; michael@0: pe.offset = (int32_t)(p - data); michael@0: michael@0: #ifdef DEBUG michael@0: fprintf(stderr, "%s at or near character %d: ", str, p-data); michael@0: michael@0: UnicodeString msg; michael@0: msg.append(start, p - start); michael@0: msg.append((UChar)0x002f); /* SOLIDUS/SLASH */ michael@0: msg.append(p, limit-p); michael@0: msg.append("'"); michael@0: michael@0: char buf[128]; michael@0: int32_t len = msg.extract(0, msg.length(), buf, 128); michael@0: if (len >= 128) { michael@0: buf[127] = 0; michael@0: } else { michael@0: buf[len] = 0; michael@0: } michael@0: fprintf(stderr, "%s\n", buf); michael@0: fflush(stderr); michael@0: #endif michael@0: michael@0: uprv_free(data); michael@0: data = NULL; michael@0: p = NULL; michael@0: e = NULL; michael@0: michael@0: if (U_SUCCESS(ec)) { michael@0: ec = U_PARSE_ERROR; michael@0: } michael@0: } michael@0: michael@0: //UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringLocalizationInfo) michael@0: michael@0: StringLocalizationInfo* michael@0: StringLocalizationInfo::create(const UnicodeString& info, UParseError& perror, UErrorCode& status) { michael@0: if (U_FAILURE(status)) { michael@0: return NULL; michael@0: } michael@0: michael@0: int32_t len = info.length(); michael@0: if (len == 0) { michael@0: return NULL; // no error; michael@0: } michael@0: michael@0: UChar* p = (UChar*)uprv_malloc(len * sizeof(UChar)); michael@0: if (!p) { michael@0: status = U_MEMORY_ALLOCATION_ERROR; michael@0: return NULL; michael@0: } michael@0: info.extract(p, len, status); michael@0: if (!U_FAILURE(status)) { michael@0: status = U_ZERO_ERROR; // clear warning about non-termination michael@0: } michael@0: michael@0: LocDataParser parser(perror, status); michael@0: return parser.parse(p, len); michael@0: } michael@0: michael@0: StringLocalizationInfo::~StringLocalizationInfo() { michael@0: for (UChar*** p = (UChar***)data; *p; ++p) { michael@0: // remaining data is simply pointer into our unicode string data. michael@0: if (*p) uprv_free(*p); michael@0: } michael@0: if (data) uprv_free(data); michael@0: if (info) uprv_free(info); michael@0: } michael@0: michael@0: michael@0: const UChar* michael@0: StringLocalizationInfo::getRuleSetName(int32_t index) const { michael@0: if (index >= 0 && index < getNumberOfRuleSets()) { michael@0: return data[0][index]; michael@0: } michael@0: return NULL; michael@0: } michael@0: michael@0: const UChar* michael@0: StringLocalizationInfo::getLocaleName(int32_t index) const { michael@0: if (index >= 0 && index < getNumberOfDisplayLocales()) { michael@0: return data[index+1][0]; michael@0: } michael@0: return NULL; michael@0: } michael@0: michael@0: const UChar* michael@0: StringLocalizationInfo::getDisplayName(int32_t localeIndex, int32_t ruleIndex) const { michael@0: if (localeIndex >= 0 && localeIndex < getNumberOfDisplayLocales() && michael@0: ruleIndex >= 0 && ruleIndex < getNumberOfRuleSets()) { michael@0: return data[localeIndex+1][ruleIndex+1]; michael@0: } michael@0: return NULL; michael@0: } michael@0: michael@0: // ---------- michael@0: michael@0: RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, michael@0: const UnicodeString& locs, michael@0: const Locale& alocale, UParseError& perror, UErrorCode& status) michael@0: : ruleSets(NULL) michael@0: , ruleSetDescriptions(NULL) michael@0: , numRuleSets(0) michael@0: , defaultRuleSet(NULL) michael@0: , locale(alocale) michael@0: , collator(NULL) michael@0: , decimalFormatSymbols(NULL) michael@0: , lenient(FALSE) michael@0: , lenientParseRules(NULL) michael@0: , localizations(NULL) michael@0: { michael@0: LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status); michael@0: init(description, locinfo, perror, status); michael@0: } michael@0: michael@0: RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, michael@0: const UnicodeString& locs, michael@0: UParseError& perror, UErrorCode& status) michael@0: : ruleSets(NULL) michael@0: , ruleSetDescriptions(NULL) michael@0: , numRuleSets(0) michael@0: , defaultRuleSet(NULL) michael@0: , locale(Locale::getDefault()) michael@0: , collator(NULL) michael@0: , decimalFormatSymbols(NULL) michael@0: , lenient(FALSE) michael@0: , lenientParseRules(NULL) michael@0: , localizations(NULL) michael@0: { michael@0: LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status); michael@0: init(description, locinfo, perror, status); michael@0: } michael@0: michael@0: RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, michael@0: LocalizationInfo* info, michael@0: const Locale& alocale, UParseError& perror, UErrorCode& status) michael@0: : ruleSets(NULL) michael@0: , ruleSetDescriptions(NULL) michael@0: , numRuleSets(0) michael@0: , defaultRuleSet(NULL) michael@0: , locale(alocale) michael@0: , collator(NULL) michael@0: , decimalFormatSymbols(NULL) michael@0: , lenient(FALSE) michael@0: , lenientParseRules(NULL) michael@0: , localizations(NULL) michael@0: { michael@0: init(description, info, perror, status); michael@0: } michael@0: michael@0: RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, michael@0: UParseError& perror, michael@0: UErrorCode& status) michael@0: : ruleSets(NULL) michael@0: , ruleSetDescriptions(NULL) michael@0: , numRuleSets(0) michael@0: , defaultRuleSet(NULL) michael@0: , locale(Locale::getDefault()) michael@0: , collator(NULL) michael@0: , decimalFormatSymbols(NULL) michael@0: , lenient(FALSE) michael@0: , lenientParseRules(NULL) michael@0: , localizations(NULL) michael@0: { michael@0: init(description, NULL, perror, status); michael@0: } michael@0: michael@0: RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, michael@0: const Locale& aLocale, michael@0: UParseError& perror, michael@0: UErrorCode& status) michael@0: : ruleSets(NULL) michael@0: , ruleSetDescriptions(NULL) michael@0: , numRuleSets(0) michael@0: , defaultRuleSet(NULL) michael@0: , locale(aLocale) michael@0: , collator(NULL) michael@0: , decimalFormatSymbols(NULL) michael@0: , lenient(FALSE) michael@0: , lenientParseRules(NULL) michael@0: , localizations(NULL) michael@0: { michael@0: init(description, NULL, perror, status); michael@0: } michael@0: michael@0: RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& alocale, UErrorCode& status) michael@0: : ruleSets(NULL) michael@0: , ruleSetDescriptions(NULL) michael@0: , numRuleSets(0) michael@0: , defaultRuleSet(NULL) michael@0: , locale(alocale) michael@0: , collator(NULL) michael@0: , decimalFormatSymbols(NULL) michael@0: , lenient(FALSE) michael@0: , lenientParseRules(NULL) michael@0: , localizations(NULL) michael@0: { michael@0: if (U_FAILURE(status)) { michael@0: return; michael@0: } michael@0: michael@0: const char* rules_tag = "RBNFRules"; michael@0: const char* fmt_tag = ""; michael@0: switch (tag) { michael@0: case URBNF_SPELLOUT: fmt_tag = "SpelloutRules"; break; michael@0: case URBNF_ORDINAL: fmt_tag = "OrdinalRules"; break; michael@0: case URBNF_DURATION: fmt_tag = "DurationRules"; break; michael@0: case URBNF_NUMBERING_SYSTEM: fmt_tag = "NumberingSystemRules"; break; michael@0: default: status = U_ILLEGAL_ARGUMENT_ERROR; return; michael@0: } michael@0: michael@0: // TODO: read localization info from resource michael@0: LocalizationInfo* locinfo = NULL; michael@0: michael@0: UResourceBundle* nfrb = ures_open(U_ICUDATA_RBNF, locale.getName(), &status); michael@0: if (U_SUCCESS(status)) { michael@0: setLocaleIDs(ures_getLocaleByType(nfrb, ULOC_VALID_LOCALE, &status), michael@0: ures_getLocaleByType(nfrb, ULOC_ACTUAL_LOCALE, &status)); michael@0: michael@0: UResourceBundle* rbnfRules = ures_getByKeyWithFallback(nfrb, rules_tag, NULL, &status); michael@0: if (U_FAILURE(status)) { michael@0: ures_close(nfrb); michael@0: } michael@0: UResourceBundle* ruleSets = ures_getByKeyWithFallback(rbnfRules, fmt_tag, NULL, &status); michael@0: if (U_FAILURE(status)) { michael@0: ures_close(rbnfRules); michael@0: ures_close(nfrb); michael@0: return; michael@0: } michael@0: michael@0: UnicodeString desc; michael@0: while (ures_hasNext(ruleSets)) { michael@0: desc.append(ures_getNextUnicodeString(ruleSets,NULL,&status)); michael@0: } michael@0: UParseError perror; michael@0: michael@0: init (desc, locinfo, perror, status); michael@0: michael@0: ures_close(ruleSets); michael@0: ures_close(rbnfRules); michael@0: } michael@0: ures_close(nfrb); michael@0: } michael@0: michael@0: RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs) michael@0: : NumberFormat(rhs) michael@0: , ruleSets(NULL) michael@0: , ruleSetDescriptions(NULL) michael@0: , numRuleSets(0) michael@0: , defaultRuleSet(NULL) michael@0: , locale(rhs.locale) michael@0: , collator(NULL) michael@0: , decimalFormatSymbols(NULL) michael@0: , lenient(FALSE) michael@0: , lenientParseRules(NULL) michael@0: , localizations(NULL) michael@0: { michael@0: this->operator=(rhs); michael@0: } michael@0: michael@0: // -------- michael@0: michael@0: RuleBasedNumberFormat& michael@0: RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs) michael@0: { michael@0: UErrorCode status = U_ZERO_ERROR; michael@0: dispose(); michael@0: locale = rhs.locale; michael@0: lenient = rhs.lenient; michael@0: michael@0: UnicodeString rules = rhs.getRules(); michael@0: UParseError perror; michael@0: init(rules, rhs.localizations ? rhs.localizations->ref() : NULL, perror, status); michael@0: michael@0: return *this; michael@0: } michael@0: michael@0: RuleBasedNumberFormat::~RuleBasedNumberFormat() michael@0: { michael@0: dispose(); michael@0: } michael@0: michael@0: Format* michael@0: RuleBasedNumberFormat::clone(void) const michael@0: { michael@0: RuleBasedNumberFormat * result = NULL; michael@0: UnicodeString rules = getRules(); michael@0: UErrorCode status = U_ZERO_ERROR; michael@0: UParseError perror; michael@0: result = new RuleBasedNumberFormat(rules, localizations, locale, perror, status); michael@0: /* test for NULL */ michael@0: if (result == 0) { michael@0: status = U_MEMORY_ALLOCATION_ERROR; michael@0: return 0; michael@0: } michael@0: if (U_FAILURE(status)) { michael@0: delete result; michael@0: result = 0; michael@0: } else { michael@0: result->lenient = lenient; michael@0: } michael@0: return result; michael@0: } michael@0: michael@0: UBool michael@0: RuleBasedNumberFormat::operator==(const Format& other) const michael@0: { michael@0: if (this == &other) { michael@0: return TRUE; michael@0: } michael@0: michael@0: if (typeid(*this) == typeid(other)) { michael@0: const RuleBasedNumberFormat& rhs = (const RuleBasedNumberFormat&)other; michael@0: if (locale == rhs.locale && michael@0: lenient == rhs.lenient && michael@0: (localizations == NULL michael@0: ? rhs.localizations == NULL michael@0: : (rhs.localizations == NULL michael@0: ? FALSE michael@0: : *localizations == rhs.localizations))) { michael@0: michael@0: NFRuleSet** p = ruleSets; michael@0: NFRuleSet** q = rhs.ruleSets; michael@0: if (p == NULL) { michael@0: return q == NULL; michael@0: } else if (q == NULL) { michael@0: return FALSE; michael@0: } michael@0: while (*p && *q && (**p == **q)) { michael@0: ++p; michael@0: ++q; michael@0: } michael@0: return *q == NULL && *p == NULL; michael@0: } michael@0: } michael@0: michael@0: return FALSE; michael@0: } michael@0: michael@0: UnicodeString michael@0: RuleBasedNumberFormat::getRules() const michael@0: { michael@0: UnicodeString result; michael@0: if (ruleSets != NULL) { michael@0: for (NFRuleSet** p = ruleSets; *p; ++p) { michael@0: (*p)->appendRules(result); michael@0: } michael@0: } michael@0: return result; michael@0: } michael@0: michael@0: UnicodeString michael@0: RuleBasedNumberFormat::getRuleSetName(int32_t index) const michael@0: { michael@0: if (localizations) { michael@0: UnicodeString string(TRUE, localizations->getRuleSetName(index), (int32_t)-1); michael@0: return string; michael@0: } else if (ruleSets) { michael@0: UnicodeString result; michael@0: for (NFRuleSet** p = ruleSets; *p; ++p) { michael@0: NFRuleSet* rs = *p; michael@0: if (rs->isPublic()) { michael@0: if (--index == -1) { michael@0: rs->getName(result); michael@0: return result; michael@0: } michael@0: } michael@0: } michael@0: } michael@0: UnicodeString empty; michael@0: return empty; michael@0: } michael@0: michael@0: int32_t michael@0: RuleBasedNumberFormat::getNumberOfRuleSetNames() const michael@0: { michael@0: int32_t result = 0; michael@0: if (localizations) { michael@0: result = localizations->getNumberOfRuleSets(); michael@0: } else if (ruleSets) { michael@0: for (NFRuleSet** p = ruleSets; *p; ++p) { michael@0: if ((**p).isPublic()) { michael@0: ++result; michael@0: } michael@0: } michael@0: } michael@0: return result; michael@0: } michael@0: michael@0: int32_t michael@0: RuleBasedNumberFormat::getNumberOfRuleSetDisplayNameLocales(void) const { michael@0: if (localizations) { michael@0: return localizations->getNumberOfDisplayLocales(); michael@0: } michael@0: return 0; michael@0: } michael@0: michael@0: Locale michael@0: RuleBasedNumberFormat::getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const { michael@0: if (U_FAILURE(status)) { michael@0: return Locale(""); michael@0: } michael@0: if (localizations && index >= 0 && index < localizations->getNumberOfDisplayLocales()) { michael@0: UnicodeString name(TRUE, localizations->getLocaleName(index), -1); michael@0: char buffer[64]; michael@0: int32_t cap = name.length() + 1; michael@0: char* bp = buffer; michael@0: if (cap > 64) { michael@0: bp = (char *)uprv_malloc(cap); michael@0: if (bp == NULL) { michael@0: status = U_MEMORY_ALLOCATION_ERROR; michael@0: return Locale(""); michael@0: } michael@0: } michael@0: name.extract(0, name.length(), bp, cap, UnicodeString::kInvariant); michael@0: Locale retLocale(bp); michael@0: if (bp != buffer) { michael@0: uprv_free(bp); michael@0: } michael@0: return retLocale; michael@0: } michael@0: status = U_ILLEGAL_ARGUMENT_ERROR; michael@0: Locale retLocale; michael@0: return retLocale; michael@0: } michael@0: michael@0: UnicodeString michael@0: RuleBasedNumberFormat::getRuleSetDisplayName(int32_t index, const Locale& localeParam) { michael@0: if (localizations && index >= 0 && index < localizations->getNumberOfRuleSets()) { michael@0: UnicodeString localeName(localeParam.getBaseName(), -1, UnicodeString::kInvariant); michael@0: int32_t len = localeName.length(); michael@0: UChar* localeStr = localeName.getBuffer(len + 1); michael@0: while (len >= 0) { michael@0: localeStr[len] = 0; michael@0: int32_t ix = localizations->indexForLocale(localeStr); michael@0: if (ix >= 0) { michael@0: UnicodeString name(TRUE, localizations->getDisplayName(ix, index), -1); michael@0: return name; michael@0: } michael@0: michael@0: // trim trailing portion, skipping over ommitted sections michael@0: do { --len;} while (len > 0 && localeStr[len] != 0x005f); // underscore michael@0: while (len > 0 && localeStr[len-1] == 0x005F) --len; michael@0: } michael@0: UnicodeString name(TRUE, localizations->getRuleSetName(index), -1); michael@0: return name; michael@0: } michael@0: UnicodeString bogus; michael@0: bogus.setToBogus(); michael@0: return bogus; michael@0: } michael@0: michael@0: UnicodeString michael@0: RuleBasedNumberFormat::getRuleSetDisplayName(const UnicodeString& ruleSetName, const Locale& localeParam) { michael@0: if (localizations) { michael@0: UnicodeString rsn(ruleSetName); michael@0: int32_t ix = localizations->indexForRuleSet(rsn.getTerminatedBuffer()); michael@0: return getRuleSetDisplayName(ix, localeParam); michael@0: } michael@0: UnicodeString bogus; michael@0: bogus.setToBogus(); michael@0: return bogus; michael@0: } michael@0: michael@0: NFRuleSet* michael@0: RuleBasedNumberFormat::findRuleSet(const UnicodeString& name, UErrorCode& status) const michael@0: { michael@0: if (U_SUCCESS(status) && ruleSets) { michael@0: for (NFRuleSet** p = ruleSets; *p; ++p) { michael@0: NFRuleSet* rs = *p; michael@0: if (rs->isNamed(name)) { michael@0: return rs; michael@0: } michael@0: } michael@0: status = U_ILLEGAL_ARGUMENT_ERROR; michael@0: } michael@0: return NULL; michael@0: } michael@0: michael@0: UnicodeString& michael@0: RuleBasedNumberFormat::format(int32_t number, michael@0: UnicodeString& toAppendTo, michael@0: FieldPosition& /* pos */) const michael@0: { michael@0: if (defaultRuleSet) defaultRuleSet->format((int64_t)number, toAppendTo, toAppendTo.length()); michael@0: return toAppendTo; michael@0: } michael@0: michael@0: michael@0: UnicodeString& michael@0: RuleBasedNumberFormat::format(int64_t number, michael@0: UnicodeString& toAppendTo, michael@0: FieldPosition& /* pos */) const michael@0: { michael@0: if (defaultRuleSet) defaultRuleSet->format(number, toAppendTo, toAppendTo.length()); michael@0: return toAppendTo; michael@0: } michael@0: michael@0: michael@0: UnicodeString& michael@0: RuleBasedNumberFormat::format(double number, michael@0: UnicodeString& toAppendTo, michael@0: FieldPosition& /* pos */) const michael@0: { michael@0: // Special case for NaN; adapted from what DecimalFormat::_format( double number,...) does. michael@0: if (uprv_isNaN(number)) { michael@0: DecimalFormatSymbols* decFmtSyms = getDecimalFormatSymbols(); // RuleBasedNumberFormat internal michael@0: if (decFmtSyms) { michael@0: toAppendTo += decFmtSyms->getConstSymbol(DecimalFormatSymbols::kNaNSymbol); michael@0: } michael@0: } else if (defaultRuleSet) { michael@0: defaultRuleSet->format(number, toAppendTo, toAppendTo.length()); michael@0: } michael@0: return toAppendTo; michael@0: } michael@0: michael@0: michael@0: UnicodeString& michael@0: RuleBasedNumberFormat::format(int32_t number, michael@0: const UnicodeString& ruleSetName, michael@0: UnicodeString& toAppendTo, michael@0: FieldPosition& /* pos */, michael@0: UErrorCode& status) const michael@0: { michael@0: // return format((int64_t)number, ruleSetName, toAppendTo, pos, status); michael@0: if (U_SUCCESS(status)) { michael@0: if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) { michael@0: // throw new IllegalArgumentException("Can't use internal rule set"); michael@0: status = U_ILLEGAL_ARGUMENT_ERROR; michael@0: } else { michael@0: NFRuleSet *rs = findRuleSet(ruleSetName, status); michael@0: if (rs) { michael@0: rs->format((int64_t)number, toAppendTo, toAppendTo.length()); michael@0: } michael@0: } michael@0: } michael@0: return toAppendTo; michael@0: } michael@0: michael@0: michael@0: UnicodeString& michael@0: RuleBasedNumberFormat::format(int64_t number, michael@0: const UnicodeString& ruleSetName, michael@0: UnicodeString& toAppendTo, michael@0: FieldPosition& /* pos */, michael@0: UErrorCode& status) const michael@0: { michael@0: if (U_SUCCESS(status)) { michael@0: if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) { michael@0: // throw new IllegalArgumentException("Can't use internal rule set"); michael@0: status = U_ILLEGAL_ARGUMENT_ERROR; michael@0: } else { michael@0: NFRuleSet *rs = findRuleSet(ruleSetName, status); michael@0: if (rs) { michael@0: rs->format(number, toAppendTo, toAppendTo.length()); michael@0: } michael@0: } michael@0: } michael@0: return toAppendTo; michael@0: } michael@0: michael@0: michael@0: UnicodeString& michael@0: RuleBasedNumberFormat::format(double number, michael@0: const UnicodeString& ruleSetName, michael@0: UnicodeString& toAppendTo, michael@0: FieldPosition& /* pos */, michael@0: UErrorCode& status) const michael@0: { michael@0: if (U_SUCCESS(status)) { michael@0: if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) { michael@0: // throw new IllegalArgumentException("Can't use internal rule set"); michael@0: status = U_ILLEGAL_ARGUMENT_ERROR; michael@0: } else { michael@0: NFRuleSet *rs = findRuleSet(ruleSetName, status); michael@0: if (rs) { michael@0: rs->format(number, toAppendTo, toAppendTo.length()); michael@0: } michael@0: } michael@0: } michael@0: return toAppendTo; michael@0: } michael@0: michael@0: void michael@0: RuleBasedNumberFormat::parse(const UnicodeString& text, michael@0: Formattable& result, michael@0: ParsePosition& parsePosition) const michael@0: { michael@0: if (!ruleSets) { michael@0: parsePosition.setErrorIndex(0); michael@0: return; michael@0: } michael@0: michael@0: UnicodeString workingText(text, parsePosition.getIndex()); michael@0: ParsePosition workingPos(0); michael@0: michael@0: ParsePosition high_pp(0); michael@0: Formattable high_result; michael@0: michael@0: for (NFRuleSet** p = ruleSets; *p; ++p) { michael@0: NFRuleSet *rp = *p; michael@0: if (rp->isPublic() && rp->isParseable()) { michael@0: ParsePosition working_pp(0); michael@0: Formattable working_result; michael@0: michael@0: rp->parse(workingText, working_pp, kMaxDouble, working_result); michael@0: if (working_pp.getIndex() > high_pp.getIndex()) { michael@0: high_pp = working_pp; michael@0: high_result = working_result; michael@0: michael@0: if (high_pp.getIndex() == workingText.length()) { michael@0: break; michael@0: } michael@0: } michael@0: } michael@0: } michael@0: michael@0: int32_t startIndex = parsePosition.getIndex(); michael@0: parsePosition.setIndex(startIndex + high_pp.getIndex()); michael@0: if (high_pp.getIndex() > 0) { michael@0: parsePosition.setErrorIndex(-1); michael@0: } else { michael@0: int32_t errorIndex = (high_pp.getErrorIndex()>0)? high_pp.getErrorIndex(): 0; michael@0: parsePosition.setErrorIndex(startIndex + errorIndex); michael@0: } michael@0: result = high_result; michael@0: if (result.getType() == Formattable::kDouble) { michael@0: int32_t r = (int32_t)result.getDouble(); michael@0: if ((double)r == result.getDouble()) { michael@0: result.setLong(r); michael@0: } michael@0: } michael@0: } michael@0: michael@0: #if !UCONFIG_NO_COLLATION michael@0: michael@0: void michael@0: RuleBasedNumberFormat::setLenient(UBool enabled) michael@0: { michael@0: lenient = enabled; michael@0: if (!enabled && collator) { michael@0: delete collator; michael@0: collator = NULL; michael@0: } michael@0: } michael@0: michael@0: #endif michael@0: michael@0: void michael@0: RuleBasedNumberFormat::setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status) { michael@0: if (U_SUCCESS(status)) { michael@0: if (ruleSetName.isEmpty()) { michael@0: if (localizations) { michael@0: UnicodeString name(TRUE, localizations->getRuleSetName(0), -1); michael@0: defaultRuleSet = findRuleSet(name, status); michael@0: } else { michael@0: initDefaultRuleSet(); michael@0: } michael@0: } else if (ruleSetName.startsWith(UNICODE_STRING_SIMPLE("%%"))) { michael@0: status = U_ILLEGAL_ARGUMENT_ERROR; michael@0: } else { michael@0: NFRuleSet* result = findRuleSet(ruleSetName, status); michael@0: if (result != NULL) { michael@0: defaultRuleSet = result; michael@0: } michael@0: } michael@0: } michael@0: } michael@0: michael@0: UnicodeString michael@0: RuleBasedNumberFormat::getDefaultRuleSetName() const { michael@0: UnicodeString result; michael@0: if (defaultRuleSet && defaultRuleSet->isPublic()) { michael@0: defaultRuleSet->getName(result); michael@0: } else { michael@0: result.setToBogus(); michael@0: } michael@0: return result; michael@0: } michael@0: michael@0: void michael@0: RuleBasedNumberFormat::initDefaultRuleSet() michael@0: { michael@0: defaultRuleSet = NULL; michael@0: if (!ruleSets) { michael@0: return; michael@0: } michael@0: michael@0: const UnicodeString spellout = UNICODE_STRING_SIMPLE("%spellout-numbering"); michael@0: const UnicodeString ordinal = UNICODE_STRING_SIMPLE("%digits-ordinal"); michael@0: const UnicodeString duration = UNICODE_STRING_SIMPLE("%duration"); michael@0: michael@0: NFRuleSet**p = &ruleSets[0]; michael@0: while (*p) { michael@0: if ((*p)->isNamed(spellout) || (*p)->isNamed(ordinal) || (*p)->isNamed(duration)) { michael@0: defaultRuleSet = *p; michael@0: return; michael@0: } else { michael@0: ++p; michael@0: } michael@0: } michael@0: michael@0: defaultRuleSet = *--p; michael@0: if (!defaultRuleSet->isPublic()) { michael@0: while (p != ruleSets) { michael@0: if ((*--p)->isPublic()) { michael@0: defaultRuleSet = *p; michael@0: break; michael@0: } michael@0: } michael@0: } michael@0: } michael@0: michael@0: michael@0: void michael@0: RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* localizationInfos, michael@0: UParseError& pErr, UErrorCode& status) michael@0: { michael@0: // TODO: implement UParseError michael@0: uprv_memset(&pErr, 0, sizeof(UParseError)); michael@0: // Note: this can leave ruleSets == NULL, so remaining code should check michael@0: if (U_FAILURE(status)) { michael@0: return; michael@0: } michael@0: michael@0: this->localizations = localizationInfos == NULL ? NULL : localizationInfos->ref(); michael@0: michael@0: UnicodeString description(rules); michael@0: if (!description.length()) { michael@0: status = U_MEMORY_ALLOCATION_ERROR; michael@0: return; michael@0: } michael@0: michael@0: // start by stripping the trailing whitespace from all the rules michael@0: // (this is all the whitespace follwing each semicolon in the michael@0: // description). This allows us to look for rule-set boundaries michael@0: // by searching for ";%" without having to worry about whitespace michael@0: // between the ; and the % michael@0: stripWhitespace(description); michael@0: michael@0: // check to see if there's a set of lenient-parse rules. If there michael@0: // is, pull them out into our temporary holding place for them, michael@0: // and delete them from the description before the real desciption- michael@0: // parsing code sees them michael@0: int32_t lp = description.indexOf(gLenientParse, -1, 0); michael@0: if (lp != -1) { michael@0: // we've got to make sure we're not in the middle of a rule michael@0: // (where "%%lenient-parse" would actually get treated as michael@0: // rule text) michael@0: if (lp == 0 || description.charAt(lp - 1) == gSemiColon) { michael@0: // locate the beginning and end of the actual collation michael@0: // rules (there may be whitespace between the name and michael@0: // the first token in the description) michael@0: int lpEnd = description.indexOf(gSemiPercent, 2, lp); michael@0: michael@0: if (lpEnd == -1) { michael@0: lpEnd = description.length() - 1; michael@0: } michael@0: int lpStart = lp + u_strlen(gLenientParse); michael@0: while (PatternProps::isWhiteSpace(description.charAt(lpStart))) { michael@0: ++lpStart; michael@0: } michael@0: michael@0: // copy out the lenient-parse rules and delete them michael@0: // from the description michael@0: lenientParseRules = new UnicodeString(); michael@0: /* test for NULL */ michael@0: if (lenientParseRules == 0) { michael@0: status = U_MEMORY_ALLOCATION_ERROR; michael@0: return; michael@0: } michael@0: lenientParseRules->setTo(description, lpStart, lpEnd - lpStart); michael@0: michael@0: description.remove(lp, lpEnd + 1 - lp); michael@0: } michael@0: } michael@0: michael@0: // pre-flight parsing the description and count the number of michael@0: // rule sets (";%" marks the end of one rule set and the beginning michael@0: // of the next) michael@0: numRuleSets = 0; michael@0: for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, p)) { michael@0: ++numRuleSets; michael@0: ++p; michael@0: } michael@0: ++numRuleSets; michael@0: michael@0: // our rule list is an array of the appropriate size michael@0: ruleSets = (NFRuleSet **)uprv_malloc((numRuleSets + 1) * sizeof(NFRuleSet *)); michael@0: /* test for NULL */ michael@0: if (ruleSets == 0) { michael@0: status = U_MEMORY_ALLOCATION_ERROR; michael@0: return; michael@0: } michael@0: michael@0: for (int i = 0; i <= numRuleSets; ++i) { michael@0: ruleSets[i] = NULL; michael@0: } michael@0: michael@0: // divide up the descriptions into individual rule-set descriptions michael@0: // and store them in a temporary array. At each step, we also michael@0: // new up a rule set, but all this does is initialize its name michael@0: // and remove it from its description. We can't actually parse michael@0: // the rest of the descriptions and finish initializing everything michael@0: // because we have to know the names and locations of all the rule michael@0: // sets before we can actually set everything up michael@0: if(!numRuleSets) { michael@0: status = U_ILLEGAL_ARGUMENT_ERROR; michael@0: return; michael@0: } michael@0: michael@0: ruleSetDescriptions = new UnicodeString[numRuleSets]; michael@0: if (ruleSetDescriptions == 0) { michael@0: status = U_MEMORY_ALLOCATION_ERROR; michael@0: return; michael@0: } michael@0: michael@0: { michael@0: int curRuleSet = 0; michael@0: int32_t start = 0; michael@0: for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, start)) { michael@0: ruleSetDescriptions[curRuleSet].setTo(description, start, p + 1 - start); michael@0: ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status); michael@0: if (ruleSets[curRuleSet] == 0) { michael@0: status = U_MEMORY_ALLOCATION_ERROR; michael@0: return; michael@0: } michael@0: ++curRuleSet; michael@0: start = p + 1; michael@0: } michael@0: ruleSetDescriptions[curRuleSet].setTo(description, start, description.length() - start); michael@0: ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status); michael@0: if (ruleSets[curRuleSet] == 0) { michael@0: status = U_MEMORY_ALLOCATION_ERROR; michael@0: return; michael@0: } michael@0: } michael@0: michael@0: // now we can take note of the formatter's default rule set, which michael@0: // is the last public rule set in the description (it's the last michael@0: // rather than the first so that a user can create a new formatter michael@0: // from an existing formatter and change its default behavior just michael@0: // by appending more rule sets to the end) michael@0: michael@0: // {dlf} Initialization of a fraction rule set requires the default rule michael@0: // set to be known. For purposes of initialization, this is always the michael@0: // last public rule set, no matter what the localization data says. michael@0: initDefaultRuleSet(); michael@0: michael@0: // finally, we can go back through the temporary descriptions michael@0: // list and finish seting up the substructure (and we throw michael@0: // away the temporary descriptions as we go) michael@0: { michael@0: for (int i = 0; i < numRuleSets; i++) { michael@0: ruleSets[i]->parseRules(ruleSetDescriptions[i], this, status); michael@0: } michael@0: } michael@0: michael@0: // Now that the rules are initialized, the 'real' default rule michael@0: // set can be adjusted by the localization data. michael@0: michael@0: // The C code keeps the localization array as is, rather than building michael@0: // a separate array of the public rule set names, so we have less work michael@0: // to do here-- but we still need to check the names. michael@0: michael@0: if (localizationInfos) { michael@0: // confirm the names, if any aren't in the rules, that's an error michael@0: // it is ok if the rules contain public rule sets that are not in this list michael@0: for (int32_t i = 0; i < localizationInfos->getNumberOfRuleSets(); ++i) { michael@0: UnicodeString name(TRUE, localizationInfos->getRuleSetName(i), -1); michael@0: NFRuleSet* rs = findRuleSet(name, status); michael@0: if (rs == NULL) { michael@0: break; // error michael@0: } michael@0: if (i == 0) { michael@0: defaultRuleSet = rs; michael@0: } michael@0: } michael@0: } else { michael@0: defaultRuleSet = getDefaultRuleSet(); michael@0: } michael@0: } michael@0: michael@0: void michael@0: RuleBasedNumberFormat::stripWhitespace(UnicodeString& description) michael@0: { michael@0: // iterate through the characters... michael@0: UnicodeString result; michael@0: michael@0: int start = 0; michael@0: while (start != -1 && start < description.length()) { michael@0: // seek to the first non-whitespace character... michael@0: while (start < description.length() michael@0: && PatternProps::isWhiteSpace(description.charAt(start))) { michael@0: ++start; michael@0: } michael@0: michael@0: // locate the next semicolon in the text and copy the text from michael@0: // our current position up to that semicolon into the result michael@0: int32_t p = description.indexOf(gSemiColon, start); michael@0: if (p == -1) { michael@0: // or if we don't find a semicolon, just copy the rest of michael@0: // the string into the result michael@0: result.append(description, start, description.length() - start); michael@0: start = -1; michael@0: } michael@0: else if (p < description.length()) { michael@0: result.append(description, start, p + 1 - start); michael@0: start = p + 1; michael@0: } michael@0: michael@0: // when we get here, we've seeked off the end of the sring, and michael@0: // we terminate the loop (we continue until *start* is -1 rather michael@0: // than until *p* is -1, because otherwise we'd miss the last michael@0: // rule in the description) michael@0: else { michael@0: start = -1; michael@0: } michael@0: } michael@0: michael@0: description.setTo(result); michael@0: } michael@0: michael@0: michael@0: void michael@0: RuleBasedNumberFormat::dispose() michael@0: { michael@0: if (ruleSets) { michael@0: for (NFRuleSet** p = ruleSets; *p; ++p) { michael@0: delete *p; michael@0: } michael@0: uprv_free(ruleSets); michael@0: ruleSets = NULL; michael@0: } michael@0: michael@0: if (ruleSetDescriptions) { michael@0: delete [] ruleSetDescriptions; michael@0: } michael@0: michael@0: #if !UCONFIG_NO_COLLATION michael@0: delete collator; michael@0: #endif michael@0: collator = NULL; michael@0: michael@0: delete decimalFormatSymbols; michael@0: decimalFormatSymbols = NULL; michael@0: michael@0: delete lenientParseRules; michael@0: lenientParseRules = NULL; michael@0: michael@0: if (localizations) localizations = localizations->unref(); michael@0: } michael@0: michael@0: michael@0: //----------------------------------------------------------------------- michael@0: // package-internal API michael@0: //----------------------------------------------------------------------- michael@0: michael@0: /** michael@0: * Returns the collator to use for lenient parsing. The collator is lazily created: michael@0: * this function creates it the first time it's called. michael@0: * @return The collator to use for lenient parsing, or null if lenient parsing michael@0: * is turned off. michael@0: */ michael@0: Collator* michael@0: RuleBasedNumberFormat::getCollator() const michael@0: { michael@0: #if !UCONFIG_NO_COLLATION michael@0: if (!ruleSets) { michael@0: return NULL; michael@0: } michael@0: michael@0: // lazy-evaulate the collator michael@0: if (collator == NULL && lenient) { michael@0: // create a default collator based on the formatter's locale, michael@0: // then pull out that collator's rules, append any additional michael@0: // rules specified in the description, and create a _new_ michael@0: // collator based on the combinaiton of those rules michael@0: michael@0: UErrorCode status = U_ZERO_ERROR; michael@0: michael@0: Collator* temp = Collator::createInstance(locale, status); michael@0: RuleBasedCollator* newCollator; michael@0: if (U_SUCCESS(status) && (newCollator = dynamic_cast(temp)) != NULL) { michael@0: if (lenientParseRules) { michael@0: UnicodeString rules(newCollator->getRules()); michael@0: rules.append(*lenientParseRules); michael@0: michael@0: newCollator = new RuleBasedCollator(rules, status); michael@0: // Exit if newCollator could not be created. michael@0: if (newCollator == NULL) { michael@0: return NULL; michael@0: } michael@0: } else { michael@0: temp = NULL; michael@0: } michael@0: if (U_SUCCESS(status)) { michael@0: newCollator->setAttribute(UCOL_DECOMPOSITION_MODE, UCOL_ON, status); michael@0: // cast away const michael@0: ((RuleBasedNumberFormat*)this)->collator = newCollator; michael@0: } else { michael@0: delete newCollator; michael@0: } michael@0: } michael@0: delete temp; michael@0: } michael@0: #endif michael@0: michael@0: // if lenient-parse mode is off, this will be null michael@0: // (see setLenientParseMode()) michael@0: return collator; michael@0: } michael@0: michael@0: michael@0: /** michael@0: * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat michael@0: * instances owned by this formatter. This object is lazily created: this function michael@0: * creates it the first time it's called. michael@0: * @return The DecimalFormatSymbols object that should be used by all DecimalFormat michael@0: * instances owned by this formatter. michael@0: */ michael@0: DecimalFormatSymbols* michael@0: RuleBasedNumberFormat::getDecimalFormatSymbols() const michael@0: { michael@0: // lazy-evaluate the DecimalFormatSymbols object. This object michael@0: // is shared by all DecimalFormat instances belonging to this michael@0: // formatter michael@0: if (decimalFormatSymbols == NULL) { michael@0: UErrorCode status = U_ZERO_ERROR; michael@0: DecimalFormatSymbols* temp = new DecimalFormatSymbols(locale, status); michael@0: if (U_SUCCESS(status)) { michael@0: ((RuleBasedNumberFormat*)this)->decimalFormatSymbols = temp; michael@0: } else { michael@0: delete temp; michael@0: } michael@0: } michael@0: return decimalFormatSymbols; michael@0: } michael@0: michael@0: // De-owning the current localized symbols and adopt the new symbols. michael@0: void michael@0: RuleBasedNumberFormat::adoptDecimalFormatSymbols(DecimalFormatSymbols* symbolsToAdopt) michael@0: { michael@0: if (symbolsToAdopt == NULL) { michael@0: return; // do not allow caller to set decimalFormatSymbols to NULL michael@0: } michael@0: michael@0: if (decimalFormatSymbols != NULL) { michael@0: delete decimalFormatSymbols; michael@0: } michael@0: michael@0: decimalFormatSymbols = symbolsToAdopt; michael@0: michael@0: { michael@0: // Apply the new decimalFormatSymbols by reparsing the rulesets michael@0: UErrorCode status = U_ZERO_ERROR; michael@0: michael@0: for (int32_t i = 0; i < numRuleSets; i++) { michael@0: ruleSets[i]->parseRules(ruleSetDescriptions[i], this, status); michael@0: } michael@0: } michael@0: } michael@0: michael@0: // Setting the symbols is equlivalent to adopting a newly created localized symbols. michael@0: void michael@0: RuleBasedNumberFormat::setDecimalFormatSymbols(const DecimalFormatSymbols& symbols) michael@0: { michael@0: adoptDecimalFormatSymbols(new DecimalFormatSymbols(symbols)); michael@0: } michael@0: michael@0: U_NAMESPACE_END michael@0: michael@0: /* U_HAVE_RBNF */ michael@0: #endif