michael@0: /* michael@0: ******************************************************************************* michael@0: * Copyright (C) 2011-2012, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: ******************************************************************************* michael@0: * file name: messagepattern.cpp michael@0: * encoding: US-ASCII michael@0: * tab size: 8 (not used) michael@0: * indentation:4 michael@0: * michael@0: * created on: 2011mar14 michael@0: * created by: Markus W. Scherer michael@0: */ michael@0: michael@0: #include "unicode/utypes.h" michael@0: michael@0: #if !UCONFIG_NO_FORMATTING michael@0: michael@0: #include "unicode/messagepattern.h" michael@0: #include "unicode/unistr.h" michael@0: #include "unicode/utf16.h" michael@0: #include "cmemory.h" michael@0: #include "cstring.h" michael@0: #include "messageimpl.h" michael@0: #include "patternprops.h" michael@0: #include "putilimp.h" michael@0: #include "uassert.h" michael@0: michael@0: U_NAMESPACE_BEGIN michael@0: michael@0: // Unicode character/code point constants ---------------------------------- *** michael@0: michael@0: static const UChar u_pound=0x23; michael@0: static const UChar u_apos=0x27; michael@0: static const UChar u_plus=0x2B; michael@0: static const UChar u_comma=0x2C; michael@0: static const UChar u_minus=0x2D; michael@0: static const UChar u_dot=0x2E; michael@0: static const UChar u_colon=0x3A; michael@0: static const UChar u_lessThan=0x3C; michael@0: static const UChar u_equal=0x3D; michael@0: static const UChar u_A=0x41; michael@0: static const UChar u_C=0x43; michael@0: static const UChar u_D=0x44; michael@0: static const UChar u_E=0x45; michael@0: static const UChar u_H=0x48; michael@0: static const UChar u_I=0x49; michael@0: static const UChar u_L=0x4C; michael@0: static const UChar u_N=0x4E; michael@0: static const UChar u_O=0x4F; michael@0: static const UChar u_P=0x50; michael@0: static const UChar u_R=0x52; michael@0: static const UChar u_S=0x53; michael@0: static const UChar u_T=0x54; michael@0: static const UChar u_U=0x55; michael@0: static const UChar u_Z=0x5A; michael@0: static const UChar u_a=0x61; michael@0: static const UChar u_c=0x63; michael@0: static const UChar u_d=0x64; michael@0: static const UChar u_e=0x65; michael@0: static const UChar u_f=0x66; michael@0: static const UChar u_h=0x68; michael@0: static const UChar u_i=0x69; michael@0: static const UChar u_l=0x6C; michael@0: static const UChar u_n=0x6E; michael@0: static const UChar u_o=0x6F; michael@0: static const UChar u_p=0x70; michael@0: static const UChar u_r=0x72; michael@0: static const UChar u_s=0x73; michael@0: static const UChar u_t=0x74; michael@0: static const UChar u_u=0x75; michael@0: static const UChar u_z=0x7A; michael@0: static const UChar u_leftCurlyBrace=0x7B; michael@0: static const UChar u_pipe=0x7C; michael@0: static const UChar u_rightCurlyBrace=0x7D; michael@0: static const UChar u_lessOrEqual=0x2264; // U+2264 is <= michael@0: michael@0: static const UChar kOffsetColon[]={ // "offset:" michael@0: u_o, u_f, u_f, u_s, u_e, u_t, u_colon michael@0: }; michael@0: michael@0: static const UChar kOther[]={ // "other" michael@0: u_o, u_t, u_h, u_e, u_r michael@0: }; michael@0: michael@0: // MessagePatternList ------------------------------------------------------ *** michael@0: michael@0: template michael@0: class MessagePatternList : public UMemory { michael@0: public: michael@0: MessagePatternList() {} michael@0: void copyFrom(const MessagePatternList &other, michael@0: int32_t length, michael@0: UErrorCode &errorCode); michael@0: UBool ensureCapacityForOneMore(int32_t oldLength, UErrorCode &errorCode); michael@0: UBool equals(const MessagePatternList &other, int32_t length) const { michael@0: for(int32_t i=0; i a; michael@0: }; michael@0: michael@0: template michael@0: void michael@0: MessagePatternList::copyFrom( michael@0: const MessagePatternList &other, michael@0: int32_t length, michael@0: UErrorCode &errorCode) { michael@0: if(U_SUCCESS(errorCode) && length>0) { michael@0: if(length>a.getCapacity() && NULL==a.resize(length)) { michael@0: errorCode=U_MEMORY_ALLOCATION_ERROR; michael@0: return; michael@0: } michael@0: uprv_memcpy(a.getAlias(), other.a.getAlias(), length*sizeof(T)); michael@0: } michael@0: } michael@0: michael@0: template michael@0: UBool michael@0: MessagePatternList::ensureCapacityForOneMore(int32_t oldLength, UErrorCode &errorCode) { michael@0: if(U_FAILURE(errorCode)) { michael@0: return FALSE; michael@0: } michael@0: if(a.getCapacity()>oldLength || a.resize(2*oldLength, oldLength)!=NULL) { michael@0: return TRUE; michael@0: } michael@0: errorCode=U_MEMORY_ALLOCATION_ERROR; michael@0: return FALSE; michael@0: } michael@0: michael@0: // MessagePatternList specializations -------------------------------------- *** michael@0: michael@0: class MessagePatternDoubleList : public MessagePatternList { michael@0: }; michael@0: michael@0: class MessagePatternPartsList : public MessagePatternList { michael@0: }; michael@0: michael@0: // MessagePattern constructors etc. ---------------------------------------- *** michael@0: michael@0: MessagePattern::MessagePattern(UErrorCode &errorCode) michael@0: : aposMode(UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE), michael@0: partsList(NULL), parts(NULL), partsLength(0), michael@0: numericValuesList(NULL), numericValues(NULL), numericValuesLength(0), michael@0: hasArgNames(FALSE), hasArgNumbers(FALSE), needsAutoQuoting(FALSE) { michael@0: init(errorCode); michael@0: } michael@0: michael@0: MessagePattern::MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode) michael@0: : aposMode(mode), michael@0: partsList(NULL), parts(NULL), partsLength(0), michael@0: numericValuesList(NULL), numericValues(NULL), numericValuesLength(0), michael@0: hasArgNames(FALSE), hasArgNumbers(FALSE), needsAutoQuoting(FALSE) { michael@0: init(errorCode); michael@0: } michael@0: michael@0: MessagePattern::MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode) michael@0: : aposMode(UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE), michael@0: partsList(NULL), parts(NULL), partsLength(0), michael@0: numericValuesList(NULL), numericValues(NULL), numericValuesLength(0), michael@0: hasArgNames(FALSE), hasArgNumbers(FALSE), needsAutoQuoting(FALSE) { michael@0: if(init(errorCode)) { michael@0: parse(pattern, parseError, errorCode); michael@0: } michael@0: } michael@0: michael@0: UBool michael@0: MessagePattern::init(UErrorCode &errorCode) { michael@0: if(U_FAILURE(errorCode)) { michael@0: return FALSE; michael@0: } michael@0: partsList=new MessagePatternPartsList(); michael@0: if(partsList==NULL) { michael@0: errorCode=U_MEMORY_ALLOCATION_ERROR; michael@0: return FALSE; michael@0: } michael@0: parts=partsList->a.getAlias(); michael@0: return TRUE; michael@0: } michael@0: michael@0: MessagePattern::MessagePattern(const MessagePattern &other) michael@0: : UObject(other), aposMode(other.aposMode), msg(other.msg), michael@0: partsList(NULL), parts(NULL), partsLength(0), michael@0: numericValuesList(NULL), numericValues(NULL), numericValuesLength(0), michael@0: hasArgNames(other.hasArgNames), hasArgNumbers(other.hasArgNumbers), michael@0: needsAutoQuoting(other.needsAutoQuoting) { michael@0: UErrorCode errorCode=U_ZERO_ERROR; michael@0: if(!copyStorage(other, errorCode)) { michael@0: clear(); michael@0: } michael@0: } michael@0: michael@0: MessagePattern & michael@0: MessagePattern::operator=(const MessagePattern &other) { michael@0: if(this==&other) { michael@0: return *this; michael@0: } michael@0: aposMode=other.aposMode; michael@0: msg=other.msg; michael@0: hasArgNames=other.hasArgNames; michael@0: hasArgNumbers=other.hasArgNumbers; michael@0: needsAutoQuoting=other.needsAutoQuoting; michael@0: UErrorCode errorCode=U_ZERO_ERROR; michael@0: if(!copyStorage(other, errorCode)) { michael@0: clear(); michael@0: } michael@0: return *this; michael@0: } michael@0: michael@0: UBool michael@0: MessagePattern::copyStorage(const MessagePattern &other, UErrorCode &errorCode) { michael@0: if(U_FAILURE(errorCode)) { michael@0: return FALSE; michael@0: } michael@0: parts=NULL; michael@0: partsLength=0; michael@0: numericValues=NULL; michael@0: numericValuesLength=0; michael@0: if(partsList==NULL) { michael@0: partsList=new MessagePatternPartsList(); michael@0: if(partsList==NULL) { michael@0: errorCode=U_MEMORY_ALLOCATION_ERROR; michael@0: return FALSE; michael@0: } michael@0: parts=partsList->a.getAlias(); michael@0: } michael@0: if(other.partsLength>0) { michael@0: partsList->copyFrom(*other.partsList, other.partsLength, errorCode); michael@0: if(U_FAILURE(errorCode)) { michael@0: return FALSE; michael@0: } michael@0: parts=partsList->a.getAlias(); michael@0: partsLength=other.partsLength; michael@0: } michael@0: if(other.numericValuesLength>0) { michael@0: if(numericValuesList==NULL) { michael@0: numericValuesList=new MessagePatternDoubleList(); michael@0: if(numericValuesList==NULL) { michael@0: errorCode=U_MEMORY_ALLOCATION_ERROR; michael@0: return FALSE; michael@0: } michael@0: numericValues=numericValuesList->a.getAlias(); michael@0: } michael@0: numericValuesList->copyFrom( michael@0: *other.numericValuesList, other.numericValuesLength, errorCode); michael@0: if(U_FAILURE(errorCode)) { michael@0: return FALSE; michael@0: } michael@0: numericValues=numericValuesList->a.getAlias(); michael@0: numericValuesLength=other.numericValuesLength; michael@0: } michael@0: return TRUE; michael@0: } michael@0: michael@0: MessagePattern::~MessagePattern() { michael@0: delete partsList; michael@0: delete numericValuesList; michael@0: } michael@0: michael@0: // MessagePattern API ------------------------------------------------------ *** michael@0: michael@0: MessagePattern & michael@0: MessagePattern::parse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode) { michael@0: preParse(pattern, parseError, errorCode); michael@0: parseMessage(0, 0, 0, UMSGPAT_ARG_TYPE_NONE, parseError, errorCode); michael@0: postParse(); michael@0: return *this; michael@0: } michael@0: michael@0: MessagePattern & michael@0: MessagePattern::parseChoiceStyle(const UnicodeString &pattern, michael@0: UParseError *parseError, UErrorCode &errorCode) { michael@0: preParse(pattern, parseError, errorCode); michael@0: parseChoiceStyle(0, 0, parseError, errorCode); michael@0: postParse(); michael@0: return *this; michael@0: } michael@0: michael@0: MessagePattern & michael@0: MessagePattern::parsePluralStyle(const UnicodeString &pattern, michael@0: UParseError *parseError, UErrorCode &errorCode) { michael@0: preParse(pattern, parseError, errorCode); michael@0: parsePluralOrSelectStyle(UMSGPAT_ARG_TYPE_PLURAL, 0, 0, parseError, errorCode); michael@0: postParse(); michael@0: return *this; michael@0: } michael@0: michael@0: MessagePattern & michael@0: MessagePattern::parseSelectStyle(const UnicodeString &pattern, michael@0: UParseError *parseError, UErrorCode &errorCode) { michael@0: preParse(pattern, parseError, errorCode); michael@0: parsePluralOrSelectStyle(UMSGPAT_ARG_TYPE_SELECT, 0, 0, parseError, errorCode); michael@0: postParse(); michael@0: return *this; michael@0: } michael@0: michael@0: void michael@0: MessagePattern::clear() { michael@0: // Mostly the same as preParse(). michael@0: msg.remove(); michael@0: hasArgNames=hasArgNumbers=FALSE; michael@0: needsAutoQuoting=FALSE; michael@0: partsLength=0; michael@0: numericValuesLength=0; michael@0: } michael@0: michael@0: UBool michael@0: MessagePattern::operator==(const MessagePattern &other) const { michael@0: if(this==&other) { michael@0: return TRUE; michael@0: } michael@0: return michael@0: aposMode==other.aposMode && michael@0: msg==other.msg && michael@0: // parts.equals(o.parts) michael@0: partsLength==other.partsLength && michael@0: (partsLength==0 || partsList->equals(*other.partsList, partsLength)); michael@0: // No need to compare numericValues if msg and parts are the same. michael@0: } michael@0: michael@0: int32_t michael@0: MessagePattern::hashCode() const { michael@0: int32_t hash=(aposMode*37+msg.hashCode())*37+partsLength; michael@0: for(int32_t i=0; i0;) { michael@0: const Part &part=getPart(--i); michael@0: if(part.getType()==UMSGPAT_PART_TYPE_INSERT_CHAR) { michael@0: modified.insert(part.index, (UChar)part.value); michael@0: } michael@0: } michael@0: return modified; michael@0: } michael@0: michael@0: double michael@0: MessagePattern::getNumericValue(const Part &part) const { michael@0: UMessagePatternPartType type=part.type; michael@0: if(type==UMSGPAT_PART_TYPE_ARG_INT) { michael@0: return part.value; michael@0: } else if(type==UMSGPAT_PART_TYPE_ARG_DOUBLE) { michael@0: return numericValues[part.value]; michael@0: } else { michael@0: return UMSGPAT_NO_NUMERIC_VALUE; michael@0: } michael@0: } michael@0: michael@0: /** michael@0: * Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified. michael@0: * @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1) michael@0: * @return the "offset:" value. michael@0: * @draft ICU 4.8 michael@0: */ michael@0: double michael@0: MessagePattern::getPluralOffset(int32_t pluralStart) const { michael@0: const Part &part=getPart(pluralStart); michael@0: if(Part::hasNumericValue(part.type)) { michael@0: return getNumericValue(part); michael@0: } else { michael@0: return 0; michael@0: } michael@0: } michael@0: michael@0: // MessagePattern::Part ---------------------------------------------------- *** michael@0: michael@0: UBool michael@0: MessagePattern::Part::operator==(const Part &other) const { michael@0: if(this==&other) { michael@0: return TRUE; michael@0: } michael@0: return michael@0: type==other.type && michael@0: index==other.index && michael@0: length==other.length && michael@0: value==other.value && michael@0: limitPartIndex==other.limitPartIndex; michael@0: } michael@0: michael@0: // MessagePattern parser --------------------------------------------------- *** michael@0: michael@0: void michael@0: MessagePattern::preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode) { michael@0: if(U_FAILURE(errorCode)) { michael@0: return; michael@0: } michael@0: if(parseError!=NULL) { michael@0: parseError->line=0; michael@0: parseError->offset=0; michael@0: parseError->preContext[0]=0; michael@0: parseError->postContext[0]=0; michael@0: } michael@0: msg=pattern; michael@0: hasArgNames=hasArgNumbers=FALSE; michael@0: needsAutoQuoting=FALSE; michael@0: partsLength=0; michael@0: numericValuesLength=0; michael@0: } michael@0: michael@0: void michael@0: MessagePattern::postParse() { michael@0: if(partsList!=NULL) { michael@0: parts=partsList->a.getAlias(); michael@0: } michael@0: if(numericValuesList!=NULL) { michael@0: numericValues=numericValuesList->a.getAlias(); michael@0: } michael@0: } michael@0: michael@0: int32_t michael@0: MessagePattern::parseMessage(int32_t index, int32_t msgStartLength, michael@0: int32_t nestingLevel, UMessagePatternArgType parentType, michael@0: UParseError *parseError, UErrorCode &errorCode) { michael@0: if(U_FAILURE(errorCode)) { michael@0: return 0; michael@0: } michael@0: if(nestingLevel>Part::MAX_VALUE) { michael@0: errorCode=U_INDEX_OUTOFBOUNDS_ERROR; michael@0: return 0; michael@0: } michael@0: int32_t msgStart=partsLength; michael@0: addPart(UMSGPAT_PART_TYPE_MSG_START, index, msgStartLength, nestingLevel, errorCode); michael@0: index+=msgStartLength; michael@0: for(;;) { // while(index=msg.length()) { michael@0: break; michael@0: } michael@0: UChar c=msg.charAt(index++); michael@0: if(c==u_apos) { michael@0: if(index==msg.length()) { michael@0: // The apostrophe is the last character in the pattern. michael@0: // Add a Part for auto-quoting. michael@0: addPart(UMSGPAT_PART_TYPE_INSERT_CHAR, index, 0, michael@0: u_apos, errorCode); // value=char to be inserted michael@0: needsAutoQuoting=TRUE; michael@0: } else { michael@0: c=msg.charAt(index); michael@0: if(c==u_apos) { michael@0: // double apostrophe, skip the second one michael@0: addPart(UMSGPAT_PART_TYPE_SKIP_SYNTAX, index++, 1, 0, errorCode); michael@0: } else if( michael@0: aposMode==UMSGPAT_APOS_DOUBLE_REQUIRED || michael@0: c==u_leftCurlyBrace || c==u_rightCurlyBrace || michael@0: (parentType==UMSGPAT_ARG_TYPE_CHOICE && c==u_pipe) || michael@0: (UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(parentType) && c==u_pound) michael@0: ) { michael@0: // skip the quote-starting apostrophe michael@0: addPart(UMSGPAT_PART_TYPE_SKIP_SYNTAX, index-1, 1, 0, errorCode); michael@0: // find the end of the quoted literal text michael@0: for(;;) { michael@0: index=msg.indexOf(u_apos, index+1); michael@0: if(index>=0) { michael@0: if(/*(index+1)0 && c==u_rightCurlyBrace) || michael@0: (parentType==UMSGPAT_ARG_TYPE_CHOICE && c==u_pipe)) { michael@0: // Finish the message before the terminator. michael@0: // In a choice style, report the "}" substring only for the following ARG_LIMIT, michael@0: // not for this MSG_LIMIT. michael@0: int32_t limitLength=(parentType==UMSGPAT_ARG_TYPE_CHOICE && c==u_rightCurlyBrace) ? 0 : 1; michael@0: addLimitPart(msgStart, UMSGPAT_PART_TYPE_MSG_LIMIT, index-1, limitLength, michael@0: nestingLevel, errorCode); michael@0: if(parentType==UMSGPAT_ARG_TYPE_CHOICE) { michael@0: // Let the choice style parser see the '}' or '|'. michael@0: return index-1; michael@0: } else { michael@0: // continue parsing after the '}' michael@0: return index; michael@0: } michael@0: } // else: c is part of literal text michael@0: } michael@0: if(nestingLevel>0 && !inTopLevelChoiceMessage(nestingLevel, parentType)) { michael@0: setParseError(parseError, 0); // Unmatched '{' braces in message. michael@0: errorCode=U_UNMATCHED_BRACES; michael@0: return 0; michael@0: } michael@0: addLimitPart(msgStart, UMSGPAT_PART_TYPE_MSG_LIMIT, index, 0, nestingLevel, errorCode); michael@0: return index; michael@0: } michael@0: michael@0: int32_t michael@0: MessagePattern::parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel, michael@0: UParseError *parseError, UErrorCode &errorCode) { michael@0: int32_t argStart=partsLength; michael@0: UMessagePatternArgType argType=UMSGPAT_ARG_TYPE_NONE; michael@0: addPart(UMSGPAT_PART_TYPE_ARG_START, index, argStartLength, argType, errorCode); michael@0: if(U_FAILURE(errorCode)) { michael@0: return 0; michael@0: } michael@0: int32_t nameIndex=index=skipWhiteSpace(index+argStartLength); michael@0: if(index==msg.length()) { michael@0: setParseError(parseError, 0); // Unmatched '{' braces in message. michael@0: errorCode=U_UNMATCHED_BRACES; michael@0: return 0; michael@0: } michael@0: // parse argument name or number michael@0: index=skipIdentifier(index); michael@0: int32_t number=parseArgNumber(nameIndex, index); michael@0: if(number>=0) { michael@0: int32_t length=index-nameIndex; michael@0: if(length>Part::MAX_LENGTH || number>Part::MAX_VALUE) { michael@0: setParseError(parseError, nameIndex); // Argument number too large. michael@0: errorCode=U_INDEX_OUTOFBOUNDS_ERROR; michael@0: return 0; michael@0: } michael@0: hasArgNumbers=TRUE; michael@0: addPart(UMSGPAT_PART_TYPE_ARG_NUMBER, nameIndex, length, number, errorCode); michael@0: } else if(number==UMSGPAT_ARG_NAME_NOT_NUMBER) { michael@0: int32_t length=index-nameIndex; michael@0: if(length>Part::MAX_LENGTH) { michael@0: setParseError(parseError, nameIndex); // Argument name too long. michael@0: errorCode=U_INDEX_OUTOFBOUNDS_ERROR; michael@0: return 0; michael@0: } michael@0: hasArgNames=TRUE; michael@0: addPart(UMSGPAT_PART_TYPE_ARG_NAME, nameIndex, length, 0, errorCode); michael@0: } else { // number<-1 (ARG_NAME_NOT_VALID) michael@0: setParseError(parseError, nameIndex); // Bad argument syntax. michael@0: errorCode=U_PATTERN_SYNTAX_ERROR; michael@0: return 0; michael@0: } michael@0: index=skipWhiteSpace(index); michael@0: if(index==msg.length()) { michael@0: setParseError(parseError, 0); // Unmatched '{' braces in message. michael@0: errorCode=U_UNMATCHED_BRACES; michael@0: return 0; michael@0: } michael@0: UChar c=msg.charAt(index); michael@0: if(c==u_rightCurlyBrace) { michael@0: // all done michael@0: } else if(c!=u_comma) { michael@0: setParseError(parseError, nameIndex); // Bad argument syntax. michael@0: errorCode=U_PATTERN_SYNTAX_ERROR; michael@0: return 0; michael@0: } else /* ',' */ { michael@0: // parse argument type: case-sensitive a-zA-Z michael@0: int32_t typeIndex=index=skipWhiteSpace(index+1); michael@0: while(indexPart::MAX_LENGTH) { michael@0: setParseError(parseError, nameIndex); // Argument type name too long. michael@0: errorCode=U_INDEX_OUTOFBOUNDS_ERROR; michael@0: return 0; michael@0: } michael@0: argType=UMSGPAT_ARG_TYPE_SIMPLE; michael@0: if(length==6) { michael@0: // case-insensitive comparisons for complex-type names michael@0: if(isChoice(typeIndex)) { michael@0: argType=UMSGPAT_ARG_TYPE_CHOICE; michael@0: } else if(isPlural(typeIndex)) { michael@0: argType=UMSGPAT_ARG_TYPE_PLURAL; michael@0: } else if(isSelect(typeIndex)) { michael@0: argType=UMSGPAT_ARG_TYPE_SELECT; michael@0: } michael@0: } else if(length==13) { michael@0: if(isSelect(typeIndex) && isOrdinal(typeIndex+6)) { michael@0: argType=UMSGPAT_ARG_TYPE_SELECTORDINAL; michael@0: } michael@0: } michael@0: // change the ARG_START type from NONE to argType michael@0: partsList->a[argStart].value=(int16_t)argType; michael@0: if(argType==UMSGPAT_ARG_TYPE_SIMPLE) { michael@0: addPart(UMSGPAT_PART_TYPE_ARG_TYPE, typeIndex, length, 0, errorCode); michael@0: } michael@0: // look for an argument style (pattern) michael@0: if(c==u_rightCurlyBrace) { michael@0: if(argType!=UMSGPAT_ARG_TYPE_SIMPLE) { michael@0: setParseError(parseError, nameIndex); // No style field for complex argument. michael@0: errorCode=U_PATTERN_SYNTAX_ERROR; michael@0: return 0; michael@0: } michael@0: } else /* ',' */ { michael@0: ++index; michael@0: if(argType==UMSGPAT_ARG_TYPE_SIMPLE) { michael@0: index=parseSimpleStyle(index, parseError, errorCode); michael@0: } else if(argType==UMSGPAT_ARG_TYPE_CHOICE) { michael@0: index=parseChoiceStyle(index, nestingLevel, parseError, errorCode); michael@0: } else { michael@0: index=parsePluralOrSelectStyle(argType, index, nestingLevel, parseError, errorCode); michael@0: } michael@0: } michael@0: } michael@0: // Argument parsing stopped on the '}'. michael@0: addLimitPart(argStart, UMSGPAT_PART_TYPE_ARG_LIMIT, index, 1, argType, errorCode); michael@0: return index+1; michael@0: } michael@0: michael@0: int32_t michael@0: MessagePattern::parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode) { michael@0: if(U_FAILURE(errorCode)) { michael@0: return 0; michael@0: } michael@0: int32_t start=index; michael@0: int32_t nestedBraces=0; michael@0: while(index0) { michael@0: --nestedBraces; michael@0: } else { michael@0: int32_t length=--index-start; michael@0: if(length>Part::MAX_LENGTH) { michael@0: setParseError(parseError, start); // Argument style text too long. michael@0: errorCode=U_INDEX_OUTOFBOUNDS_ERROR; michael@0: return 0; michael@0: } michael@0: addPart(UMSGPAT_PART_TYPE_ARG_STYLE, start, length, 0, errorCode); michael@0: return index; michael@0: } michael@0: } // c is part of literal text michael@0: } michael@0: setParseError(parseError, 0); // Unmatched '{' braces in message. michael@0: errorCode=U_UNMATCHED_BRACES; michael@0: return 0; michael@0: } michael@0: michael@0: int32_t michael@0: MessagePattern::parseChoiceStyle(int32_t index, int32_t nestingLevel, michael@0: UParseError *parseError, UErrorCode &errorCode) { michael@0: if(U_FAILURE(errorCode)) { michael@0: return 0; michael@0: } michael@0: int32_t start=index; michael@0: index=skipWhiteSpace(index); michael@0: if(index==msg.length() || msg.charAt(index)==u_rightCurlyBrace) { michael@0: setParseError(parseError, 0); // Missing choice argument pattern. michael@0: errorCode=U_PATTERN_SYNTAX_ERROR; michael@0: return 0; michael@0: } michael@0: for(;;) { michael@0: // The choice argument style contains |-separated (number, separator, message) triples. michael@0: // Parse the number. michael@0: int32_t numberIndex=index; michael@0: index=skipDouble(index); michael@0: int32_t length=index-numberIndex; michael@0: if(length==0) { michael@0: setParseError(parseError, start); // Bad choice pattern syntax. michael@0: errorCode=U_PATTERN_SYNTAX_ERROR; michael@0: return 0; michael@0: } michael@0: if(length>Part::MAX_LENGTH) { michael@0: setParseError(parseError, numberIndex); // Choice number too long. michael@0: errorCode=U_INDEX_OUTOFBOUNDS_ERROR; michael@0: return 0; michael@0: } michael@0: parseDouble(numberIndex, index, TRUE, parseError, errorCode); // adds ARG_INT or ARG_DOUBLE michael@0: if(U_FAILURE(errorCode)) { michael@0: return 0; michael@0: } michael@0: // Parse the separator. michael@0: index=skipWhiteSpace(index); michael@0: if(index==msg.length()) { michael@0: setParseError(parseError, start); // Bad choice pattern syntax. michael@0: errorCode=U_PATTERN_SYNTAX_ERROR; michael@0: return 0; michael@0: } michael@0: UChar c=msg.charAt(index); michael@0: if(!(c==u_pound || c==u_lessThan || c==u_lessOrEqual)) { // U+2264 is <= michael@0: setParseError(parseError, start); // Expected choice separator (#<\u2264) instead of c. michael@0: errorCode=U_PATTERN_SYNTAX_ERROR; michael@0: return 0; michael@0: } michael@0: addPart(UMSGPAT_PART_TYPE_ARG_SELECTOR, index, 1, 0, errorCode); michael@0: // Parse the message fragment. michael@0: index=parseMessage(++index, 0, nestingLevel+1, UMSGPAT_ARG_TYPE_CHOICE, parseError, errorCode); michael@0: if(U_FAILURE(errorCode)) { michael@0: return 0; michael@0: } michael@0: // parseMessage(..., CHOICE) returns the index of the terminator, or msg.length(). michael@0: if(index==msg.length()) { michael@0: return index; michael@0: } michael@0: if(msg.charAt(index)==u_rightCurlyBrace) { michael@0: if(!inMessageFormatPattern(nestingLevel)) { michael@0: setParseError(parseError, start); // Bad choice pattern syntax. michael@0: errorCode=U_PATTERN_SYNTAX_ERROR; michael@0: return 0; michael@0: } michael@0: return index; michael@0: } // else the terminator is '|' michael@0: index=skipWhiteSpace(index+1); michael@0: } michael@0: } michael@0: michael@0: int32_t michael@0: MessagePattern::parsePluralOrSelectStyle(UMessagePatternArgType argType, michael@0: int32_t index, int32_t nestingLevel, michael@0: UParseError *parseError, UErrorCode &errorCode) { michael@0: if(U_FAILURE(errorCode)) { michael@0: return 0; michael@0: } michael@0: int32_t start=index; michael@0: UBool isEmpty=TRUE; michael@0: UBool hasOther=FALSE; michael@0: for(;;) { michael@0: // First, collect the selector looking for a small set of terminators. michael@0: // It would be a little faster to consider the syntax of each possible michael@0: // token right here, but that makes the code too complicated. michael@0: index=skipWhiteSpace(index); michael@0: UBool eos=index==msg.length(); michael@0: if(eos || msg.charAt(index)==u_rightCurlyBrace) { michael@0: if(eos==inMessageFormatPattern(nestingLevel)) { michael@0: setParseError(parseError, start); // Bad plural/select pattern syntax. michael@0: errorCode=U_PATTERN_SYNTAX_ERROR; michael@0: return 0; michael@0: } michael@0: if(!hasOther) { michael@0: setParseError(parseError, 0); // Missing 'other' keyword in plural/select pattern. michael@0: errorCode=U_DEFAULT_KEYWORD_MISSING; michael@0: return 0; michael@0: } michael@0: return index; michael@0: } michael@0: int32_t selectorIndex=index; michael@0: if(UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) && msg.charAt(selectorIndex)==u_equal) { michael@0: // explicit-value plural selector: =double michael@0: index=skipDouble(index+1); michael@0: int32_t length=index-selectorIndex; michael@0: if(length==1) { michael@0: setParseError(parseError, start); // Bad plural/select pattern syntax. michael@0: errorCode=U_PATTERN_SYNTAX_ERROR; michael@0: return 0; michael@0: } michael@0: if(length>Part::MAX_LENGTH) { michael@0: setParseError(parseError, selectorIndex); // Argument selector too long. michael@0: errorCode=U_INDEX_OUTOFBOUNDS_ERROR; michael@0: return 0; michael@0: } michael@0: addPart(UMSGPAT_PART_TYPE_ARG_SELECTOR, selectorIndex, length, 0, errorCode); michael@0: parseDouble(selectorIndex+1, index, FALSE, michael@0: parseError, errorCode); // adds ARG_INT or ARG_DOUBLE michael@0: } else { michael@0: index=skipIdentifier(index); michael@0: int32_t length=index-selectorIndex; michael@0: if(length==0) { michael@0: setParseError(parseError, start); // Bad plural/select pattern syntax. michael@0: errorCode=U_PATTERN_SYNTAX_ERROR; michael@0: return 0; michael@0: } michael@0: // Note: The ':' in "offset:" is just beyond the skipIdentifier() range. michael@0: if( UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) && length==6 && indexPart::MAX_LENGTH) { michael@0: setParseError(parseError, valueIndex); // Plural offset value too long. michael@0: errorCode=U_INDEX_OUTOFBOUNDS_ERROR; michael@0: return 0; michael@0: } michael@0: parseDouble(valueIndex, index, FALSE, michael@0: parseError, errorCode); // adds ARG_INT or ARG_DOUBLE michael@0: if(U_FAILURE(errorCode)) { michael@0: return 0; michael@0: } michael@0: isEmpty=FALSE; michael@0: continue; // no message fragment after the offset michael@0: } else { michael@0: // normal selector word michael@0: if(length>Part::MAX_LENGTH) { michael@0: setParseError(parseError, selectorIndex); // Argument selector too long. michael@0: errorCode=U_INDEX_OUTOFBOUNDS_ERROR; michael@0: return 0; michael@0: } michael@0: addPart(UMSGPAT_PART_TYPE_ARG_SELECTOR, selectorIndex, length, 0, errorCode); michael@0: if(0==msg.compare(selectorIndex, length, kOther, 0, 5)) { michael@0: hasOther=TRUE; michael@0: } michael@0: } michael@0: } michael@0: if(U_FAILURE(errorCode)) { michael@0: return 0; michael@0: } michael@0: michael@0: // parse the message fragment following the selector michael@0: index=skipWhiteSpace(index); michael@0: if(index==msg.length() || msg.charAt(index)!=u_leftCurlyBrace) { michael@0: setParseError(parseError, selectorIndex); // No message fragment after plural/select selector. michael@0: errorCode=U_PATTERN_SYNTAX_ERROR; michael@0: return 0; michael@0: } michael@0: index=parseMessage(index, 1, nestingLevel+1, argType, parseError, errorCode); michael@0: if(U_FAILURE(errorCode)) { michael@0: return 0; michael@0: } michael@0: isEmpty=FALSE; michael@0: } michael@0: } michael@0: michael@0: int32_t michael@0: MessagePattern::parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit) { michael@0: // If the identifier contains only ASCII digits, then it is an argument _number_ michael@0: // and must not have leading zeros (except "0" itself). michael@0: // Otherwise it is an argument _name_. michael@0: if(start>=limit) { michael@0: return UMSGPAT_ARG_NAME_NOT_VALID; michael@0: } michael@0: int32_t number; michael@0: // Defer numeric errors until we know there are only digits. michael@0: UBool badNumber; michael@0: UChar c=s.charAt(start++); michael@0: if(c==0x30) { michael@0: if(start==limit) { michael@0: return 0; michael@0: } else { michael@0: number=0; michael@0: badNumber=TRUE; // leading zero michael@0: } michael@0: } else if(0x31<=c && c<=0x39) { michael@0: number=c-0x30; michael@0: badNumber=FALSE; michael@0: } else { michael@0: return UMSGPAT_ARG_NAME_NOT_NUMBER; michael@0: } michael@0: while(start=INT32_MAX/10) { michael@0: badNumber=TRUE; // overflow michael@0: } michael@0: number=number*10+(c-0x30); michael@0: } else { michael@0: return UMSGPAT_ARG_NAME_NOT_NUMBER; michael@0: } michael@0: } michael@0: // There are only ASCII digits. michael@0: if(badNumber) { michael@0: return UMSGPAT_ARG_NAME_NOT_VALID; michael@0: } else { michael@0: return number; michael@0: } michael@0: } michael@0: michael@0: void michael@0: MessagePattern::parseDouble(int32_t start, int32_t limit, UBool allowInfinity, michael@0: UParseError *parseError, UErrorCode &errorCode) { michael@0: if(U_FAILURE(errorCode)) { michael@0: return; michael@0: } michael@0: U_ASSERT(start(Part::MAX_VALUE+isNegative)) { michael@0: break; // not a small-enough integer michael@0: } michael@0: if(index==limit) { michael@0: addPart(UMSGPAT_PART_TYPE_ARG_INT, start, limit-start, michael@0: isNegative!=0 ? -value : value, errorCode); michael@0: return; michael@0: } michael@0: c=msg.charAt(index++); michael@0: } michael@0: // Let Double.parseDouble() throw a NumberFormatException. michael@0: char numberChars[128]; michael@0: int32_t capacity=(int32_t)sizeof(numberChars); michael@0: int32_t length=limit-start; michael@0: if(length>=capacity) { michael@0: break; // number too long michael@0: } michael@0: msg.extract(start, length, numberChars, capacity, US_INV); michael@0: if((int32_t)uprv_strlen(numberChars)0x39 && c!=u_e && c!=u_E && c!=0x221e)) { michael@0: break; michael@0: } michael@0: ++index; michael@0: } michael@0: return index; michael@0: } michael@0: michael@0: UBool michael@0: MessagePattern::isArgTypeChar(UChar32 c) { michael@0: return (u_a<=c && c<=u_z) || (u_A<=c && c<=u_Z); michael@0: } michael@0: michael@0: UBool michael@0: MessagePattern::isChoice(int32_t index) { michael@0: UChar c; michael@0: return michael@0: ((c=msg.charAt(index++))==u_c || c==u_C) && michael@0: ((c=msg.charAt(index++))==u_h || c==u_H) && michael@0: ((c=msg.charAt(index++))==u_o || c==u_O) && michael@0: ((c=msg.charAt(index++))==u_i || c==u_I) && michael@0: ((c=msg.charAt(index++))==u_c || c==u_C) && michael@0: ((c=msg.charAt(index))==u_e || c==u_E); michael@0: } michael@0: michael@0: UBool michael@0: MessagePattern::isPlural(int32_t index) { michael@0: UChar c; michael@0: return michael@0: ((c=msg.charAt(index++))==u_p || c==u_P) && michael@0: ((c=msg.charAt(index++))==u_l || c==u_L) && michael@0: ((c=msg.charAt(index++))==u_u || c==u_U) && michael@0: ((c=msg.charAt(index++))==u_r || c==u_R) && michael@0: ((c=msg.charAt(index++))==u_a || c==u_A) && michael@0: ((c=msg.charAt(index))==u_l || c==u_L); michael@0: } michael@0: michael@0: UBool michael@0: MessagePattern::isSelect(int32_t index) { michael@0: UChar c; michael@0: return michael@0: ((c=msg.charAt(index++))==u_s || c==u_S) && michael@0: ((c=msg.charAt(index++))==u_e || c==u_E) && michael@0: ((c=msg.charAt(index++))==u_l || c==u_L) && michael@0: ((c=msg.charAt(index++))==u_e || c==u_E) && michael@0: ((c=msg.charAt(index++))==u_c || c==u_C) && michael@0: ((c=msg.charAt(index))==u_t || c==u_T); michael@0: } michael@0: michael@0: UBool michael@0: MessagePattern::isOrdinal(int32_t index) { michael@0: UChar c; michael@0: return michael@0: ((c=msg.charAt(index++))==u_o || c==u_O) && michael@0: ((c=msg.charAt(index++))==u_r || c==u_R) && michael@0: ((c=msg.charAt(index++))==u_d || c==u_D) && michael@0: ((c=msg.charAt(index++))==u_i || c==u_I) && michael@0: ((c=msg.charAt(index++))==u_n || c==u_N) && michael@0: ((c=msg.charAt(index++))==u_a || c==u_A) && michael@0: ((c=msg.charAt(index))==u_l || c==u_L); michael@0: } michael@0: michael@0: UBool michael@0: MessagePattern::inMessageFormatPattern(int32_t nestingLevel) { michael@0: return nestingLevel>0 || partsList->a[0].type==UMSGPAT_PART_TYPE_MSG_START; michael@0: } michael@0: michael@0: UBool michael@0: MessagePattern::inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType) { michael@0: return michael@0: nestingLevel==1 && michael@0: parentType==UMSGPAT_ARG_TYPE_CHOICE && michael@0: partsList->a[0].type!=UMSGPAT_PART_TYPE_MSG_START; michael@0: } michael@0: michael@0: void michael@0: MessagePattern::addPart(UMessagePatternPartType type, int32_t index, int32_t length, michael@0: int32_t value, UErrorCode &errorCode) { michael@0: if(partsList->ensureCapacityForOneMore(partsLength, errorCode)) { michael@0: Part &part=partsList->a[partsLength++]; michael@0: part.type=type; michael@0: part.index=index; michael@0: part.length=(uint16_t)length; michael@0: part.value=(int16_t)value; michael@0: part.limitPartIndex=0; michael@0: } michael@0: } michael@0: michael@0: void michael@0: MessagePattern::addLimitPart(int32_t start, michael@0: UMessagePatternPartType type, int32_t index, int32_t length, michael@0: int32_t value, UErrorCode &errorCode) { michael@0: partsList->a[start].limitPartIndex=partsLength; michael@0: addPart(type, index, length, value, errorCode); michael@0: } michael@0: michael@0: void michael@0: MessagePattern::addArgDoublePart(double numericValue, int32_t start, int32_t length, michael@0: UErrorCode &errorCode) { michael@0: if(U_FAILURE(errorCode)) { michael@0: return; michael@0: } michael@0: int32_t numericIndex=numericValuesLength; michael@0: if(numericValuesList==NULL) { michael@0: numericValuesList=new MessagePatternDoubleList(); michael@0: if(numericValuesList==NULL) { michael@0: errorCode=U_MEMORY_ALLOCATION_ERROR; michael@0: return; michael@0: } michael@0: } else if(!numericValuesList->ensureCapacityForOneMore(numericValuesLength, errorCode)) { michael@0: return; michael@0: } else { michael@0: if(numericIndex>Part::MAX_VALUE) { michael@0: errorCode=U_INDEX_OUTOFBOUNDS_ERROR; michael@0: return; michael@0: } michael@0: } michael@0: numericValuesList->a[numericValuesLength++]=numericValue; michael@0: addPart(UMSGPAT_PART_TYPE_ARG_DOUBLE, start, length, numericIndex, errorCode); michael@0: } michael@0: michael@0: void michael@0: MessagePattern::setParseError(UParseError *parseError, int32_t index) { michael@0: if(parseError==NULL) { michael@0: return; michael@0: } michael@0: parseError->offset=index; michael@0: michael@0: // Set preContext to some of msg before index. michael@0: // Avoid splitting a surrogate pair. michael@0: int32_t length=index; michael@0: if(length>=U_PARSE_CONTEXT_LEN) { michael@0: length=U_PARSE_CONTEXT_LEN-1; michael@0: if(length>0 && U16_IS_TRAIL(msg[index-length])) { michael@0: --length; michael@0: } michael@0: } michael@0: msg.extract(index-length, length, parseError->preContext); michael@0: parseError->preContext[length]=0; michael@0: michael@0: // Set postContext to some of msg starting at index. michael@0: length=msg.length()-index; michael@0: if(length>=U_PARSE_CONTEXT_LEN) { michael@0: length=U_PARSE_CONTEXT_LEN-1; michael@0: if(length>0 && U16_IS_LEAD(msg[index+length-1])) { michael@0: --length; michael@0: } michael@0: } michael@0: msg.extract(index, length, parseError->postContext); michael@0: parseError->postContext[length]=0; michael@0: } michael@0: michael@0: // MessageImpl ------------------------------------------------------------- *** michael@0: michael@0: void michael@0: MessageImpl::appendReducedApostrophes(const UnicodeString &s, int32_t start, int32_t limit, michael@0: UnicodeString &sb) { michael@0: int32_t doubleApos=-1; michael@0: for(;;) { michael@0: int32_t i=s.indexOf(u_apos, start); michael@0: if(i<0 || i>=limit) { michael@0: sb.append(s, start, limit-start); michael@0: break; michael@0: } michael@0: if(i==doubleApos) { michael@0: // Double apostrophe at start-1 and start==i, append one. michael@0: sb.append(u_apos); michael@0: ++start; michael@0: doubleApos=-1; michael@0: } else { michael@0: // Append text between apostrophes and skip this one. michael@0: sb.append(s, start, i-start); michael@0: doubleApos=start=i+1; michael@0: } michael@0: } michael@0: } michael@0: michael@0: // Ported from second half of ICU4J SelectFormat.format(String). michael@0: UnicodeString & michael@0: MessageImpl::appendSubMessageWithoutSkipSyntax(const MessagePattern &msgPattern, michael@0: int32_t msgStart, michael@0: UnicodeString &result) { michael@0: const UnicodeString &msgString=msgPattern.getPatternString(); michael@0: int32_t prevIndex=msgPattern.getPart(msgStart).getLimit(); michael@0: for(int32_t i=msgStart;;) { michael@0: const MessagePattern::Part &part=msgPattern.getPart(++i); michael@0: UMessagePatternPartType type=part.getType(); michael@0: int32_t index=part.getIndex(); michael@0: if(type==UMSGPAT_PART_TYPE_MSG_LIMIT) { michael@0: return result.append(msgString, prevIndex, index-prevIndex); michael@0: } else if(type==UMSGPAT_PART_TYPE_SKIP_SYNTAX) { michael@0: result.append(msgString, prevIndex, index-prevIndex); michael@0: prevIndex=part.getLimit(); michael@0: } else if(type==UMSGPAT_PART_TYPE_ARG_START) { michael@0: result.append(msgString, prevIndex, index-prevIndex); michael@0: prevIndex=index; michael@0: i=msgPattern.getLimitPartIndex(i); michael@0: index=msgPattern.getPart(i).getLimit(); michael@0: appendReducedApostrophes(msgString, prevIndex, index, result); michael@0: prevIndex=index; michael@0: } michael@0: } michael@0: } michael@0: michael@0: U_NAMESPACE_END michael@0: michael@0: #endif // !UCONFIG_NO_FORMATTING