michael@0: /* michael@0: ******************************************************************************* michael@0: * Copyright (C) 1997-2013, International Business Machines Corporation and * michael@0: * others. All Rights Reserved. * michael@0: ******************************************************************************* michael@0: * michael@0: * File CHOICFMT.CPP michael@0: * michael@0: * Modification History: michael@0: * michael@0: * Date Name Description michael@0: * 02/19/97 aliu Converted from java. michael@0: * 03/20/97 helena Finished first cut of implementation and got rid michael@0: * of nextDouble/previousDouble and replaced with michael@0: * boolean array. michael@0: * 4/10/97 aliu Clean up. Modified to work on AIX. michael@0: * 06/04/97 helena Fixed applyPattern(), toPattern() and not to include michael@0: * wchar.h. michael@0: * 07/09/97 helena Made ParsePosition into a class. michael@0: * 08/06/97 nos removed overloaded constructor, fixed 'format(array)' michael@0: * 07/22/98 stephen JDK 1.2 Sync - removed UBool array (doubleFlags) michael@0: * 02/22/99 stephen Removed character literals for EBCDIC safety michael@0: ******************************************************************************** michael@0: */ michael@0: michael@0: #include "unicode/utypes.h" michael@0: michael@0: #if !UCONFIG_NO_FORMATTING michael@0: michael@0: #include "unicode/choicfmt.h" michael@0: #include "unicode/numfmt.h" michael@0: #include "unicode/locid.h" michael@0: #include "cpputils.h" michael@0: #include "cstring.h" michael@0: #include "messageimpl.h" michael@0: #include "putilimp.h" michael@0: #include "uassert.h" michael@0: #include michael@0: #include michael@0: michael@0: // ***************************************************************************** michael@0: // class ChoiceFormat michael@0: // ***************************************************************************** michael@0: michael@0: U_NAMESPACE_BEGIN michael@0: michael@0: UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ChoiceFormat) michael@0: michael@0: // Special characters used by ChoiceFormat. There are two characters michael@0: // used interchangeably to indicate <=. Either is parsed, but only michael@0: // LESS_EQUAL is generated by toPattern(). michael@0: #define SINGLE_QUOTE ((UChar)0x0027) /*'*/ michael@0: #define LESS_THAN ((UChar)0x003C) /*<*/ michael@0: #define LESS_EQUAL ((UChar)0x0023) /*#*/ michael@0: #define LESS_EQUAL2 ((UChar)0x2264) michael@0: #define VERTICAL_BAR ((UChar)0x007C) /*|*/ michael@0: #define MINUS ((UChar)0x002D) /*-*/ michael@0: michael@0: static const UChar LEFT_CURLY_BRACE = 0x7B; /*{*/ michael@0: static const UChar RIGHT_CURLY_BRACE = 0x7D; /*}*/ michael@0: michael@0: #ifdef INFINITY michael@0: #undef INFINITY michael@0: #endif michael@0: #define INFINITY ((UChar)0x221E) michael@0: michael@0: //static const UChar gPositiveInfinity[] = {INFINITY, 0}; michael@0: //static const UChar gNegativeInfinity[] = {MINUS, INFINITY, 0}; michael@0: #define POSITIVE_INF_STRLEN 1 michael@0: #define NEGATIVE_INF_STRLEN 2 michael@0: michael@0: // ------------------------------------- michael@0: // Creates a ChoiceFormat instance based on the pattern. michael@0: michael@0: ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern, michael@0: UErrorCode& status) michael@0: : constructorErrorCode(status), michael@0: msgPattern(status) michael@0: { michael@0: applyPattern(newPattern, status); michael@0: } michael@0: michael@0: // ------------------------------------- michael@0: // Creates a ChoiceFormat instance with the limit array and michael@0: // format strings for each limit. michael@0: michael@0: ChoiceFormat::ChoiceFormat(const double* limits, michael@0: const UnicodeString* formats, michael@0: int32_t cnt ) michael@0: : constructorErrorCode(U_ZERO_ERROR), michael@0: msgPattern(constructorErrorCode) michael@0: { michael@0: setChoices(limits, NULL, formats, cnt, constructorErrorCode); michael@0: } michael@0: michael@0: // ------------------------------------- michael@0: michael@0: ChoiceFormat::ChoiceFormat(const double* limits, michael@0: const UBool* closures, michael@0: const UnicodeString* formats, michael@0: int32_t cnt ) michael@0: : constructorErrorCode(U_ZERO_ERROR), michael@0: msgPattern(constructorErrorCode) michael@0: { michael@0: setChoices(limits, closures, formats, cnt, constructorErrorCode); michael@0: } michael@0: michael@0: // ------------------------------------- michael@0: // copy constructor michael@0: michael@0: ChoiceFormat::ChoiceFormat(const ChoiceFormat& that) michael@0: : NumberFormat(that), michael@0: constructorErrorCode(that.constructorErrorCode), michael@0: msgPattern(that.msgPattern) michael@0: { michael@0: } michael@0: michael@0: // ------------------------------------- michael@0: // Private constructor that creates a michael@0: // ChoiceFormat instance based on the michael@0: // pattern and populates UParseError michael@0: michael@0: ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern, michael@0: UParseError& parseError, michael@0: UErrorCode& status) michael@0: : constructorErrorCode(status), michael@0: msgPattern(status) michael@0: { michael@0: applyPattern(newPattern,parseError, status); michael@0: } michael@0: // ------------------------------------- michael@0: michael@0: UBool michael@0: ChoiceFormat::operator==(const Format& that) const michael@0: { michael@0: if (this == &that) return TRUE; michael@0: if (!NumberFormat::operator==(that)) return FALSE; michael@0: ChoiceFormat& thatAlias = (ChoiceFormat&)that; michael@0: return msgPattern == thatAlias.msgPattern; michael@0: } michael@0: michael@0: // ------------------------------------- michael@0: // copy constructor michael@0: michael@0: const ChoiceFormat& michael@0: ChoiceFormat::operator=(const ChoiceFormat& that) michael@0: { michael@0: if (this != &that) { michael@0: NumberFormat::operator=(that); michael@0: constructorErrorCode = that.constructorErrorCode; michael@0: msgPattern = that.msgPattern; michael@0: } michael@0: return *this; michael@0: } michael@0: michael@0: // ------------------------------------- michael@0: michael@0: ChoiceFormat::~ChoiceFormat() michael@0: { michael@0: } michael@0: michael@0: // ------------------------------------- michael@0: michael@0: /** michael@0: * Convert a double value to a string without the overhead of NumberFormat. michael@0: */ michael@0: UnicodeString& michael@0: ChoiceFormat::dtos(double value, michael@0: UnicodeString& string) michael@0: { michael@0: /* Buffer to contain the digits and any extra formatting stuff. */ michael@0: char temp[DBL_DIG + 16]; michael@0: char *itrPtr = temp; michael@0: char *expPtr; michael@0: michael@0: sprintf(temp, "%.*g", DBL_DIG, value); michael@0: michael@0: /* Find and convert the decimal point. michael@0: Using setlocale on some machines will cause sprintf to use a comma for certain locales. michael@0: */ michael@0: while (*itrPtr && (*itrPtr == '-' || isdigit(*itrPtr))) { michael@0: itrPtr++; michael@0: } michael@0: if (*itrPtr != 0 && *itrPtr != 'e') { michael@0: /* We reached something that looks like a decimal point. michael@0: In case someone used setlocale(), which changes the decimal point. */ michael@0: *itrPtr = '.'; michael@0: itrPtr++; michael@0: } michael@0: /* Search for the exponent */ michael@0: while (*itrPtr && *itrPtr != 'e') { michael@0: itrPtr++; michael@0: } michael@0: if (*itrPtr == 'e') { michael@0: itrPtr++; michael@0: /* Verify the exponent sign */ michael@0: if (*itrPtr == '+' || *itrPtr == '-') { michael@0: itrPtr++; michael@0: } michael@0: /* Remove leading zeros. You will see this on Windows machines. */ michael@0: expPtr = itrPtr; michael@0: while (*itrPtr == '0') { michael@0: itrPtr++; michael@0: } michael@0: if (*itrPtr && expPtr != itrPtr) { michael@0: /* Shift the exponent without zeros. */ michael@0: while (*itrPtr) { michael@0: *(expPtr++) = *(itrPtr++); michael@0: } michael@0: // NULL terminate michael@0: *expPtr = 0; michael@0: } michael@0: } michael@0: michael@0: string = UnicodeString(temp, -1, US_INV); /* invariant codepage */ michael@0: return string; michael@0: } michael@0: michael@0: // ------------------------------------- michael@0: // calls the overloaded applyPattern method. michael@0: michael@0: void michael@0: ChoiceFormat::applyPattern(const UnicodeString& pattern, michael@0: UErrorCode& status) michael@0: { michael@0: msgPattern.parseChoiceStyle(pattern, NULL, status); michael@0: constructorErrorCode = status; michael@0: } michael@0: michael@0: // ------------------------------------- michael@0: // Applies the pattern to this ChoiceFormat instance. michael@0: michael@0: void michael@0: ChoiceFormat::applyPattern(const UnicodeString& pattern, michael@0: UParseError& parseError, michael@0: UErrorCode& status) michael@0: { michael@0: msgPattern.parseChoiceStyle(pattern, &parseError, status); michael@0: constructorErrorCode = status; michael@0: } michael@0: // ------------------------------------- michael@0: // Returns the input pattern string. michael@0: michael@0: UnicodeString& michael@0: ChoiceFormat::toPattern(UnicodeString& result) const michael@0: { michael@0: return result = msgPattern.getPatternString(); michael@0: } michael@0: michael@0: // ------------------------------------- michael@0: // Sets the limit and format arrays. michael@0: void michael@0: ChoiceFormat::setChoices( const double* limits, michael@0: const UnicodeString* formats, michael@0: int32_t cnt ) michael@0: { michael@0: UErrorCode errorCode = U_ZERO_ERROR; michael@0: setChoices(limits, NULL, formats, cnt, errorCode); michael@0: } michael@0: michael@0: // ------------------------------------- michael@0: // Sets the limit and format arrays. michael@0: void michael@0: ChoiceFormat::setChoices( const double* limits, michael@0: const UBool* closures, michael@0: const UnicodeString* formats, michael@0: int32_t cnt ) michael@0: { michael@0: UErrorCode errorCode = U_ZERO_ERROR; michael@0: setChoices(limits, closures, formats, cnt, errorCode); michael@0: } michael@0: michael@0: void michael@0: ChoiceFormat::setChoices(const double* limits, michael@0: const UBool* closures, michael@0: const UnicodeString* formats, michael@0: int32_t count, michael@0: UErrorCode &errorCode) { michael@0: if (U_FAILURE(errorCode)) { michael@0: return; michael@0: } michael@0: if (limits == NULL || formats == NULL) { michael@0: errorCode = U_ILLEGAL_ARGUMENT_ERROR; michael@0: return; michael@0: } michael@0: // Reconstruct the original input pattern. michael@0: // Modified version of the pre-ICU 4.8 toPattern() implementation. michael@0: UnicodeString result; michael@0: for (int32_t i = 0; i < count; ++i) { michael@0: if (i != 0) { michael@0: result += VERTICAL_BAR; michael@0: } michael@0: UnicodeString buf; michael@0: if (uprv_isPositiveInfinity(limits[i])) { michael@0: result += INFINITY; michael@0: } else if (uprv_isNegativeInfinity(limits[i])) { michael@0: result += MINUS; michael@0: result += INFINITY; michael@0: } else { michael@0: result += dtos(limits[i], buf); michael@0: } michael@0: if (closures != NULL && closures[i]) { michael@0: result += LESS_THAN; michael@0: } else { michael@0: result += LESS_EQUAL; michael@0: } michael@0: // Append formats[i], using quotes if there are special michael@0: // characters. Single quotes themselves must be escaped in michael@0: // either case. michael@0: const UnicodeString& text = formats[i]; michael@0: int32_t textLength = text.length(); michael@0: int32_t nestingLevel = 0; michael@0: for (int32_t j = 0; j < textLength; ++j) { michael@0: UChar c = text[j]; michael@0: if (c == SINGLE_QUOTE && nestingLevel == 0) { michael@0: // Double each top-level apostrophe. michael@0: result.append(c); michael@0: } else if (c == VERTICAL_BAR && nestingLevel == 0) { michael@0: // Surround each pipe symbol with apostrophes for quoting. michael@0: // If the next character is an apostrophe, then that will be doubled, michael@0: // and although the parser will see the apostrophe pairs beginning michael@0: // and ending one character earlier than our doubling, the result michael@0: // is as desired. michael@0: // | -> '|' michael@0: // |' -> '|''' michael@0: // |'' -> '|''''' etc. michael@0: result.append(SINGLE_QUOTE).append(c).append(SINGLE_QUOTE); michael@0: continue; // Skip the append(c) at the end of the loop body. michael@0: } else if (c == LEFT_CURLY_BRACE) { michael@0: ++nestingLevel; michael@0: } else if (c == RIGHT_CURLY_BRACE && nestingLevel > 0) { michael@0: --nestingLevel; michael@0: } michael@0: result.append(c); michael@0: } michael@0: } michael@0: // Apply the reconstructed pattern. michael@0: applyPattern(result, errorCode); michael@0: } michael@0: michael@0: // ------------------------------------- michael@0: // Gets the limit array. michael@0: michael@0: const double* michael@0: ChoiceFormat::getLimits(int32_t& cnt) const michael@0: { michael@0: cnt = 0; michael@0: return NULL; michael@0: } michael@0: michael@0: // ------------------------------------- michael@0: // Gets the closures array. michael@0: michael@0: const UBool* michael@0: ChoiceFormat::getClosures(int32_t& cnt) const michael@0: { michael@0: cnt = 0; michael@0: return NULL; michael@0: } michael@0: michael@0: // ------------------------------------- michael@0: // Gets the format array. michael@0: michael@0: const UnicodeString* michael@0: ChoiceFormat::getFormats(int32_t& cnt) const michael@0: { michael@0: cnt = 0; michael@0: return NULL; michael@0: } michael@0: michael@0: // ------------------------------------- michael@0: // Formats an int64 number, it's actually formatted as michael@0: // a double. The returned format string may differ michael@0: // from the input number because of this. michael@0: michael@0: UnicodeString& michael@0: ChoiceFormat::format(int64_t number, michael@0: UnicodeString& appendTo, michael@0: FieldPosition& status) const michael@0: { michael@0: return format((double) number, appendTo, status); michael@0: } michael@0: michael@0: // ------------------------------------- michael@0: // Formats an int32_t number, it's actually formatted as michael@0: // a double. michael@0: michael@0: UnicodeString& michael@0: ChoiceFormat::format(int32_t number, michael@0: UnicodeString& appendTo, michael@0: FieldPosition& status) const michael@0: { michael@0: return format((double) number, appendTo, status); michael@0: } michael@0: michael@0: // ------------------------------------- michael@0: // Formats a double number. michael@0: michael@0: UnicodeString& michael@0: ChoiceFormat::format(double number, michael@0: UnicodeString& appendTo, michael@0: FieldPosition& /*pos*/) const michael@0: { michael@0: if (msgPattern.countParts() == 0) { michael@0: // No pattern was applied, or it failed. michael@0: return appendTo; michael@0: } michael@0: // Get the appropriate sub-message. michael@0: int32_t msgStart = findSubMessage(msgPattern, 0, number); michael@0: if (!MessageImpl::jdkAposMode(msgPattern)) { michael@0: int32_t patternStart = msgPattern.getPart(msgStart).getLimit(); michael@0: int32_t msgLimit = msgPattern.getLimitPartIndex(msgStart); michael@0: appendTo.append(msgPattern.getPatternString(), michael@0: patternStart, michael@0: msgPattern.getPatternIndex(msgLimit) - patternStart); michael@0: return appendTo; michael@0: } michael@0: // JDK compatibility mode: Remove SKIP_SYNTAX. michael@0: return MessageImpl::appendSubMessageWithoutSkipSyntax(msgPattern, msgStart, appendTo); michael@0: } michael@0: michael@0: int32_t michael@0: ChoiceFormat::findSubMessage(const MessagePattern &pattern, int32_t partIndex, double number) { michael@0: int32_t count = pattern.countParts(); michael@0: int32_t msgStart; michael@0: // Iterate over (ARG_INT|DOUBLE, ARG_SELECTOR, message) tuples michael@0: // until ARG_LIMIT or end of choice-only pattern. michael@0: // Ignore the first number and selector and start the loop on the first message. michael@0: partIndex += 2; michael@0: for (;;) { michael@0: // Skip but remember the current sub-message. michael@0: msgStart = partIndex; michael@0: partIndex = pattern.getLimitPartIndex(partIndex); michael@0: if (++partIndex >= count) { michael@0: // Reached the end of the choice-only pattern. michael@0: // Return with the last sub-message. michael@0: break; michael@0: } michael@0: const MessagePattern::Part &part = pattern.getPart(partIndex++); michael@0: UMessagePatternPartType type = part.getType(); michael@0: if (type == UMSGPAT_PART_TYPE_ARG_LIMIT) { michael@0: // Reached the end of the ChoiceFormat style. michael@0: // Return with the last sub-message. michael@0: break; michael@0: } michael@0: // part is an ARG_INT or ARG_DOUBLE michael@0: U_ASSERT(MessagePattern::Part::hasNumericValue(type)); michael@0: double boundary = pattern.getNumericValue(part); michael@0: // Fetch the ARG_SELECTOR character. michael@0: int32_t selectorIndex = pattern.getPatternIndex(partIndex++); michael@0: UChar boundaryChar = pattern.getPatternString().charAt(selectorIndex); michael@0: if (boundaryChar == LESS_THAN ? !(number > boundary) : !(number >= boundary)) { michael@0: // The number is in the interval between the previous boundary and the current one. michael@0: // Return with the sub-message between them. michael@0: // The !(a>b) and !(a>=b) comparisons are equivalent to michael@0: // (a<=b) and (a= 0) { michael@0: int32_t newIndex = start + len; michael@0: if (newIndex > furthest) { michael@0: furthest = newIndex; michael@0: bestNumber = tempNumber; michael@0: if (furthest == source.length()) { michael@0: break; michael@0: } michael@0: } michael@0: } michael@0: partIndex = msgLimit + 1; michael@0: } michael@0: if (furthest == start) { michael@0: pos.setErrorIndex(start); michael@0: } else { michael@0: pos.setIndex(furthest); michael@0: } michael@0: return bestNumber; michael@0: } michael@0: michael@0: int32_t michael@0: ChoiceFormat::matchStringUntilLimitPart( michael@0: const MessagePattern &pattern, int32_t partIndex, int32_t limitPartIndex, michael@0: const UnicodeString &source, int32_t sourceOffset) { michael@0: int32_t matchingSourceLength = 0; michael@0: const UnicodeString &msgString = pattern.getPatternString(); michael@0: int32_t prevIndex = pattern.getPart(partIndex).getLimit(); michael@0: for (;;) { michael@0: const MessagePattern::Part &part = pattern.getPart(++partIndex); michael@0: if (partIndex == limitPartIndex || part.getType() == UMSGPAT_PART_TYPE_SKIP_SYNTAX) { michael@0: int32_t index = part.getIndex(); michael@0: int32_t length = index - prevIndex; michael@0: if (length != 0 && 0 != source.compare(sourceOffset, length, msgString, prevIndex, length)) { michael@0: return -1; // mismatch michael@0: } michael@0: matchingSourceLength += length; michael@0: if (partIndex == limitPartIndex) { michael@0: return matchingSourceLength; michael@0: } michael@0: prevIndex = part.getLimit(); // SKIP_SYNTAX michael@0: } michael@0: } michael@0: } michael@0: michael@0: // ------------------------------------- michael@0: michael@0: Format* michael@0: ChoiceFormat::clone() const michael@0: { michael@0: ChoiceFormat *aCopy = new ChoiceFormat(*this); michael@0: return aCopy; michael@0: } michael@0: michael@0: U_NAMESPACE_END michael@0: michael@0: #endif /* #if !UCONFIG_NO_FORMATTING */ michael@0: michael@0: //eof