intl/icu/source/common/messagepattern.cpp

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

michael@0 1 /*
michael@0 2 *******************************************************************************
michael@0 3 * Copyright (C) 2011-2012, International Business Machines
michael@0 4 * Corporation and others. All Rights Reserved.
michael@0 5 *******************************************************************************
michael@0 6 * file name: messagepattern.cpp
michael@0 7 * encoding: US-ASCII
michael@0 8 * tab size: 8 (not used)
michael@0 9 * indentation:4
michael@0 10 *
michael@0 11 * created on: 2011mar14
michael@0 12 * created by: Markus W. Scherer
michael@0 13 */
michael@0 14
michael@0 15 #include "unicode/utypes.h"
michael@0 16
michael@0 17 #if !UCONFIG_NO_FORMATTING
michael@0 18
michael@0 19 #include "unicode/messagepattern.h"
michael@0 20 #include "unicode/unistr.h"
michael@0 21 #include "unicode/utf16.h"
michael@0 22 #include "cmemory.h"
michael@0 23 #include "cstring.h"
michael@0 24 #include "messageimpl.h"
michael@0 25 #include "patternprops.h"
michael@0 26 #include "putilimp.h"
michael@0 27 #include "uassert.h"
michael@0 28
michael@0 29 U_NAMESPACE_BEGIN
michael@0 30
michael@0 31 // Unicode character/code point constants ---------------------------------- ***
michael@0 32
michael@0 33 static const UChar u_pound=0x23;
michael@0 34 static const UChar u_apos=0x27;
michael@0 35 static const UChar u_plus=0x2B;
michael@0 36 static const UChar u_comma=0x2C;
michael@0 37 static const UChar u_minus=0x2D;
michael@0 38 static const UChar u_dot=0x2E;
michael@0 39 static const UChar u_colon=0x3A;
michael@0 40 static const UChar u_lessThan=0x3C;
michael@0 41 static const UChar u_equal=0x3D;
michael@0 42 static const UChar u_A=0x41;
michael@0 43 static const UChar u_C=0x43;
michael@0 44 static const UChar u_D=0x44;
michael@0 45 static const UChar u_E=0x45;
michael@0 46 static const UChar u_H=0x48;
michael@0 47 static const UChar u_I=0x49;
michael@0 48 static const UChar u_L=0x4C;
michael@0 49 static const UChar u_N=0x4E;
michael@0 50 static const UChar u_O=0x4F;
michael@0 51 static const UChar u_P=0x50;
michael@0 52 static const UChar u_R=0x52;
michael@0 53 static const UChar u_S=0x53;
michael@0 54 static const UChar u_T=0x54;
michael@0 55 static const UChar u_U=0x55;
michael@0 56 static const UChar u_Z=0x5A;
michael@0 57 static const UChar u_a=0x61;
michael@0 58 static const UChar u_c=0x63;
michael@0 59 static const UChar u_d=0x64;
michael@0 60 static const UChar u_e=0x65;
michael@0 61 static const UChar u_f=0x66;
michael@0 62 static const UChar u_h=0x68;
michael@0 63 static const UChar u_i=0x69;
michael@0 64 static const UChar u_l=0x6C;
michael@0 65 static const UChar u_n=0x6E;
michael@0 66 static const UChar u_o=0x6F;
michael@0 67 static const UChar u_p=0x70;
michael@0 68 static const UChar u_r=0x72;
michael@0 69 static const UChar u_s=0x73;
michael@0 70 static const UChar u_t=0x74;
michael@0 71 static const UChar u_u=0x75;
michael@0 72 static const UChar u_z=0x7A;
michael@0 73 static const UChar u_leftCurlyBrace=0x7B;
michael@0 74 static const UChar u_pipe=0x7C;
michael@0 75 static const UChar u_rightCurlyBrace=0x7D;
michael@0 76 static const UChar u_lessOrEqual=0x2264; // U+2264 is <=
michael@0 77
michael@0 78 static const UChar kOffsetColon[]={ // "offset:"
michael@0 79 u_o, u_f, u_f, u_s, u_e, u_t, u_colon
michael@0 80 };
michael@0 81
michael@0 82 static const UChar kOther[]={ // "other"
michael@0 83 u_o, u_t, u_h, u_e, u_r
michael@0 84 };
michael@0 85
michael@0 86 // MessagePatternList ------------------------------------------------------ ***
michael@0 87
michael@0 88 template<typename T, int32_t stackCapacity>
michael@0 89 class MessagePatternList : public UMemory {
michael@0 90 public:
michael@0 91 MessagePatternList() {}
michael@0 92 void copyFrom(const MessagePatternList<T, stackCapacity> &other,
michael@0 93 int32_t length,
michael@0 94 UErrorCode &errorCode);
michael@0 95 UBool ensureCapacityForOneMore(int32_t oldLength, UErrorCode &errorCode);
michael@0 96 UBool equals(const MessagePatternList<T, stackCapacity> &other, int32_t length) const {
michael@0 97 for(int32_t i=0; i<length; ++i) {
michael@0 98 if(a[i]!=other.a[i]) { return FALSE; }
michael@0 99 }
michael@0 100 return TRUE;
michael@0 101 }
michael@0 102
michael@0 103 MaybeStackArray<T, stackCapacity> a;
michael@0 104 };
michael@0 105
michael@0 106 template<typename T, int32_t stackCapacity>
michael@0 107 void
michael@0 108 MessagePatternList<T, stackCapacity>::copyFrom(
michael@0 109 const MessagePatternList<T, stackCapacity> &other,
michael@0 110 int32_t length,
michael@0 111 UErrorCode &errorCode) {
michael@0 112 if(U_SUCCESS(errorCode) && length>0) {
michael@0 113 if(length>a.getCapacity() && NULL==a.resize(length)) {
michael@0 114 errorCode=U_MEMORY_ALLOCATION_ERROR;
michael@0 115 return;
michael@0 116 }
michael@0 117 uprv_memcpy(a.getAlias(), other.a.getAlias(), length*sizeof(T));
michael@0 118 }
michael@0 119 }
michael@0 120
michael@0 121 template<typename T, int32_t stackCapacity>
michael@0 122 UBool
michael@0 123 MessagePatternList<T, stackCapacity>::ensureCapacityForOneMore(int32_t oldLength, UErrorCode &errorCode) {
michael@0 124 if(U_FAILURE(errorCode)) {
michael@0 125 return FALSE;
michael@0 126 }
michael@0 127 if(a.getCapacity()>oldLength || a.resize(2*oldLength, oldLength)!=NULL) {
michael@0 128 return TRUE;
michael@0 129 }
michael@0 130 errorCode=U_MEMORY_ALLOCATION_ERROR;
michael@0 131 return FALSE;
michael@0 132 }
michael@0 133
michael@0 134 // MessagePatternList specializations -------------------------------------- ***
michael@0 135
michael@0 136 class MessagePatternDoubleList : public MessagePatternList<double, 8> {
michael@0 137 };
michael@0 138
michael@0 139 class MessagePatternPartsList : public MessagePatternList<MessagePattern::Part, 32> {
michael@0 140 };
michael@0 141
michael@0 142 // MessagePattern constructors etc. ---------------------------------------- ***
michael@0 143
michael@0 144 MessagePattern::MessagePattern(UErrorCode &errorCode)
michael@0 145 : aposMode(UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE),
michael@0 146 partsList(NULL), parts(NULL), partsLength(0),
michael@0 147 numericValuesList(NULL), numericValues(NULL), numericValuesLength(0),
michael@0 148 hasArgNames(FALSE), hasArgNumbers(FALSE), needsAutoQuoting(FALSE) {
michael@0 149 init(errorCode);
michael@0 150 }
michael@0 151
michael@0 152 MessagePattern::MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode)
michael@0 153 : aposMode(mode),
michael@0 154 partsList(NULL), parts(NULL), partsLength(0),
michael@0 155 numericValuesList(NULL), numericValues(NULL), numericValuesLength(0),
michael@0 156 hasArgNames(FALSE), hasArgNumbers(FALSE), needsAutoQuoting(FALSE) {
michael@0 157 init(errorCode);
michael@0 158 }
michael@0 159
michael@0 160 MessagePattern::MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode)
michael@0 161 : aposMode(UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE),
michael@0 162 partsList(NULL), parts(NULL), partsLength(0),
michael@0 163 numericValuesList(NULL), numericValues(NULL), numericValuesLength(0),
michael@0 164 hasArgNames(FALSE), hasArgNumbers(FALSE), needsAutoQuoting(FALSE) {
michael@0 165 if(init(errorCode)) {
michael@0 166 parse(pattern, parseError, errorCode);
michael@0 167 }
michael@0 168 }
michael@0 169
michael@0 170 UBool
michael@0 171 MessagePattern::init(UErrorCode &errorCode) {
michael@0 172 if(U_FAILURE(errorCode)) {
michael@0 173 return FALSE;
michael@0 174 }
michael@0 175 partsList=new MessagePatternPartsList();
michael@0 176 if(partsList==NULL) {
michael@0 177 errorCode=U_MEMORY_ALLOCATION_ERROR;
michael@0 178 return FALSE;
michael@0 179 }
michael@0 180 parts=partsList->a.getAlias();
michael@0 181 return TRUE;
michael@0 182 }
michael@0 183
michael@0 184 MessagePattern::MessagePattern(const MessagePattern &other)
michael@0 185 : UObject(other), aposMode(other.aposMode), msg(other.msg),
michael@0 186 partsList(NULL), parts(NULL), partsLength(0),
michael@0 187 numericValuesList(NULL), numericValues(NULL), numericValuesLength(0),
michael@0 188 hasArgNames(other.hasArgNames), hasArgNumbers(other.hasArgNumbers),
michael@0 189 needsAutoQuoting(other.needsAutoQuoting) {
michael@0 190 UErrorCode errorCode=U_ZERO_ERROR;
michael@0 191 if(!copyStorage(other, errorCode)) {
michael@0 192 clear();
michael@0 193 }
michael@0 194 }
michael@0 195
michael@0 196 MessagePattern &
michael@0 197 MessagePattern::operator=(const MessagePattern &other) {
michael@0 198 if(this==&other) {
michael@0 199 return *this;
michael@0 200 }
michael@0 201 aposMode=other.aposMode;
michael@0 202 msg=other.msg;
michael@0 203 hasArgNames=other.hasArgNames;
michael@0 204 hasArgNumbers=other.hasArgNumbers;
michael@0 205 needsAutoQuoting=other.needsAutoQuoting;
michael@0 206 UErrorCode errorCode=U_ZERO_ERROR;
michael@0 207 if(!copyStorage(other, errorCode)) {
michael@0 208 clear();
michael@0 209 }
michael@0 210 return *this;
michael@0 211 }
michael@0 212
michael@0 213 UBool
michael@0 214 MessagePattern::copyStorage(const MessagePattern &other, UErrorCode &errorCode) {
michael@0 215 if(U_FAILURE(errorCode)) {
michael@0 216 return FALSE;
michael@0 217 }
michael@0 218 parts=NULL;
michael@0 219 partsLength=0;
michael@0 220 numericValues=NULL;
michael@0 221 numericValuesLength=0;
michael@0 222 if(partsList==NULL) {
michael@0 223 partsList=new MessagePatternPartsList();
michael@0 224 if(partsList==NULL) {
michael@0 225 errorCode=U_MEMORY_ALLOCATION_ERROR;
michael@0 226 return FALSE;
michael@0 227 }
michael@0 228 parts=partsList->a.getAlias();
michael@0 229 }
michael@0 230 if(other.partsLength>0) {
michael@0 231 partsList->copyFrom(*other.partsList, other.partsLength, errorCode);
michael@0 232 if(U_FAILURE(errorCode)) {
michael@0 233 return FALSE;
michael@0 234 }
michael@0 235 parts=partsList->a.getAlias();
michael@0 236 partsLength=other.partsLength;
michael@0 237 }
michael@0 238 if(other.numericValuesLength>0) {
michael@0 239 if(numericValuesList==NULL) {
michael@0 240 numericValuesList=new MessagePatternDoubleList();
michael@0 241 if(numericValuesList==NULL) {
michael@0 242 errorCode=U_MEMORY_ALLOCATION_ERROR;
michael@0 243 return FALSE;
michael@0 244 }
michael@0 245 numericValues=numericValuesList->a.getAlias();
michael@0 246 }
michael@0 247 numericValuesList->copyFrom(
michael@0 248 *other.numericValuesList, other.numericValuesLength, errorCode);
michael@0 249 if(U_FAILURE(errorCode)) {
michael@0 250 return FALSE;
michael@0 251 }
michael@0 252 numericValues=numericValuesList->a.getAlias();
michael@0 253 numericValuesLength=other.numericValuesLength;
michael@0 254 }
michael@0 255 return TRUE;
michael@0 256 }
michael@0 257
michael@0 258 MessagePattern::~MessagePattern() {
michael@0 259 delete partsList;
michael@0 260 delete numericValuesList;
michael@0 261 }
michael@0 262
michael@0 263 // MessagePattern API ------------------------------------------------------ ***
michael@0 264
michael@0 265 MessagePattern &
michael@0 266 MessagePattern::parse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode) {
michael@0 267 preParse(pattern, parseError, errorCode);
michael@0 268 parseMessage(0, 0, 0, UMSGPAT_ARG_TYPE_NONE, parseError, errorCode);
michael@0 269 postParse();
michael@0 270 return *this;
michael@0 271 }
michael@0 272
michael@0 273 MessagePattern &
michael@0 274 MessagePattern::parseChoiceStyle(const UnicodeString &pattern,
michael@0 275 UParseError *parseError, UErrorCode &errorCode) {
michael@0 276 preParse(pattern, parseError, errorCode);
michael@0 277 parseChoiceStyle(0, 0, parseError, errorCode);
michael@0 278 postParse();
michael@0 279 return *this;
michael@0 280 }
michael@0 281
michael@0 282 MessagePattern &
michael@0 283 MessagePattern::parsePluralStyle(const UnicodeString &pattern,
michael@0 284 UParseError *parseError, UErrorCode &errorCode) {
michael@0 285 preParse(pattern, parseError, errorCode);
michael@0 286 parsePluralOrSelectStyle(UMSGPAT_ARG_TYPE_PLURAL, 0, 0, parseError, errorCode);
michael@0 287 postParse();
michael@0 288 return *this;
michael@0 289 }
michael@0 290
michael@0 291 MessagePattern &
michael@0 292 MessagePattern::parseSelectStyle(const UnicodeString &pattern,
michael@0 293 UParseError *parseError, UErrorCode &errorCode) {
michael@0 294 preParse(pattern, parseError, errorCode);
michael@0 295 parsePluralOrSelectStyle(UMSGPAT_ARG_TYPE_SELECT, 0, 0, parseError, errorCode);
michael@0 296 postParse();
michael@0 297 return *this;
michael@0 298 }
michael@0 299
michael@0 300 void
michael@0 301 MessagePattern::clear() {
michael@0 302 // Mostly the same as preParse().
michael@0 303 msg.remove();
michael@0 304 hasArgNames=hasArgNumbers=FALSE;
michael@0 305 needsAutoQuoting=FALSE;
michael@0 306 partsLength=0;
michael@0 307 numericValuesLength=0;
michael@0 308 }
michael@0 309
michael@0 310 UBool
michael@0 311 MessagePattern::operator==(const MessagePattern &other) const {
michael@0 312 if(this==&other) {
michael@0 313 return TRUE;
michael@0 314 }
michael@0 315 return
michael@0 316 aposMode==other.aposMode &&
michael@0 317 msg==other.msg &&
michael@0 318 // parts.equals(o.parts)
michael@0 319 partsLength==other.partsLength &&
michael@0 320 (partsLength==0 || partsList->equals(*other.partsList, partsLength));
michael@0 321 // No need to compare numericValues if msg and parts are the same.
michael@0 322 }
michael@0 323
michael@0 324 int32_t
michael@0 325 MessagePattern::hashCode() const {
michael@0 326 int32_t hash=(aposMode*37+msg.hashCode())*37+partsLength;
michael@0 327 for(int32_t i=0; i<partsLength; ++i) {
michael@0 328 hash=hash*37+parts[i].hashCode();
michael@0 329 }
michael@0 330 return hash;
michael@0 331 }
michael@0 332
michael@0 333 int32_t
michael@0 334 MessagePattern::validateArgumentName(const UnicodeString &name) {
michael@0 335 if(!PatternProps::isIdentifier(name.getBuffer(), name.length())) {
michael@0 336 return UMSGPAT_ARG_NAME_NOT_VALID;
michael@0 337 }
michael@0 338 return parseArgNumber(name, 0, name.length());
michael@0 339 }
michael@0 340
michael@0 341 UnicodeString
michael@0 342 MessagePattern::autoQuoteApostropheDeep() const {
michael@0 343 if(!needsAutoQuoting) {
michael@0 344 return msg;
michael@0 345 }
michael@0 346 UnicodeString modified(msg);
michael@0 347 // Iterate backward so that the insertion indexes do not change.
michael@0 348 int32_t count=countParts();
michael@0 349 for(int32_t i=count; i>0;) {
michael@0 350 const Part &part=getPart(--i);
michael@0 351 if(part.getType()==UMSGPAT_PART_TYPE_INSERT_CHAR) {
michael@0 352 modified.insert(part.index, (UChar)part.value);
michael@0 353 }
michael@0 354 }
michael@0 355 return modified;
michael@0 356 }
michael@0 357
michael@0 358 double
michael@0 359 MessagePattern::getNumericValue(const Part &part) const {
michael@0 360 UMessagePatternPartType type=part.type;
michael@0 361 if(type==UMSGPAT_PART_TYPE_ARG_INT) {
michael@0 362 return part.value;
michael@0 363 } else if(type==UMSGPAT_PART_TYPE_ARG_DOUBLE) {
michael@0 364 return numericValues[part.value];
michael@0 365 } else {
michael@0 366 return UMSGPAT_NO_NUMERIC_VALUE;
michael@0 367 }
michael@0 368 }
michael@0 369
michael@0 370 /**
michael@0 371 * Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified.
michael@0 372 * @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1)
michael@0 373 * @return the "offset:" value.
michael@0 374 * @draft ICU 4.8
michael@0 375 */
michael@0 376 double
michael@0 377 MessagePattern::getPluralOffset(int32_t pluralStart) const {
michael@0 378 const Part &part=getPart(pluralStart);
michael@0 379 if(Part::hasNumericValue(part.type)) {
michael@0 380 return getNumericValue(part);
michael@0 381 } else {
michael@0 382 return 0;
michael@0 383 }
michael@0 384 }
michael@0 385
michael@0 386 // MessagePattern::Part ---------------------------------------------------- ***
michael@0 387
michael@0 388 UBool
michael@0 389 MessagePattern::Part::operator==(const Part &other) const {
michael@0 390 if(this==&other) {
michael@0 391 return TRUE;
michael@0 392 }
michael@0 393 return
michael@0 394 type==other.type &&
michael@0 395 index==other.index &&
michael@0 396 length==other.length &&
michael@0 397 value==other.value &&
michael@0 398 limitPartIndex==other.limitPartIndex;
michael@0 399 }
michael@0 400
michael@0 401 // MessagePattern parser --------------------------------------------------- ***
michael@0 402
michael@0 403 void
michael@0 404 MessagePattern::preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode) {
michael@0 405 if(U_FAILURE(errorCode)) {
michael@0 406 return;
michael@0 407 }
michael@0 408 if(parseError!=NULL) {
michael@0 409 parseError->line=0;
michael@0 410 parseError->offset=0;
michael@0 411 parseError->preContext[0]=0;
michael@0 412 parseError->postContext[0]=0;
michael@0 413 }
michael@0 414 msg=pattern;
michael@0 415 hasArgNames=hasArgNumbers=FALSE;
michael@0 416 needsAutoQuoting=FALSE;
michael@0 417 partsLength=0;
michael@0 418 numericValuesLength=0;
michael@0 419 }
michael@0 420
michael@0 421 void
michael@0 422 MessagePattern::postParse() {
michael@0 423 if(partsList!=NULL) {
michael@0 424 parts=partsList->a.getAlias();
michael@0 425 }
michael@0 426 if(numericValuesList!=NULL) {
michael@0 427 numericValues=numericValuesList->a.getAlias();
michael@0 428 }
michael@0 429 }
michael@0 430
michael@0 431 int32_t
michael@0 432 MessagePattern::parseMessage(int32_t index, int32_t msgStartLength,
michael@0 433 int32_t nestingLevel, UMessagePatternArgType parentType,
michael@0 434 UParseError *parseError, UErrorCode &errorCode) {
michael@0 435 if(U_FAILURE(errorCode)) {
michael@0 436 return 0;
michael@0 437 }
michael@0 438 if(nestingLevel>Part::MAX_VALUE) {
michael@0 439 errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
michael@0 440 return 0;
michael@0 441 }
michael@0 442 int32_t msgStart=partsLength;
michael@0 443 addPart(UMSGPAT_PART_TYPE_MSG_START, index, msgStartLength, nestingLevel, errorCode);
michael@0 444 index+=msgStartLength;
michael@0 445 for(;;) { // while(index<msg.length()) with U_FAILURE(errorCode) check
michael@0 446 if(U_FAILURE(errorCode)) {
michael@0 447 return 0;
michael@0 448 }
michael@0 449 if(index>=msg.length()) {
michael@0 450 break;
michael@0 451 }
michael@0 452 UChar c=msg.charAt(index++);
michael@0 453 if(c==u_apos) {
michael@0 454 if(index==msg.length()) {
michael@0 455 // The apostrophe is the last character in the pattern.
michael@0 456 // Add a Part for auto-quoting.
michael@0 457 addPart(UMSGPAT_PART_TYPE_INSERT_CHAR, index, 0,
michael@0 458 u_apos, errorCode); // value=char to be inserted
michael@0 459 needsAutoQuoting=TRUE;
michael@0 460 } else {
michael@0 461 c=msg.charAt(index);
michael@0 462 if(c==u_apos) {
michael@0 463 // double apostrophe, skip the second one
michael@0 464 addPart(UMSGPAT_PART_TYPE_SKIP_SYNTAX, index++, 1, 0, errorCode);
michael@0 465 } else if(
michael@0 466 aposMode==UMSGPAT_APOS_DOUBLE_REQUIRED ||
michael@0 467 c==u_leftCurlyBrace || c==u_rightCurlyBrace ||
michael@0 468 (parentType==UMSGPAT_ARG_TYPE_CHOICE && c==u_pipe) ||
michael@0 469 (UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(parentType) && c==u_pound)
michael@0 470 ) {
michael@0 471 // skip the quote-starting apostrophe
michael@0 472 addPart(UMSGPAT_PART_TYPE_SKIP_SYNTAX, index-1, 1, 0, errorCode);
michael@0 473 // find the end of the quoted literal text
michael@0 474 for(;;) {
michael@0 475 index=msg.indexOf(u_apos, index+1);
michael@0 476 if(index>=0) {
michael@0 477 if(/*(index+1)<msg.length() &&*/ msg.charAt(index+1)==u_apos) {
michael@0 478 // double apostrophe inside quoted literal text
michael@0 479 // still encodes a single apostrophe, skip the second one
michael@0 480 addPart(UMSGPAT_PART_TYPE_SKIP_SYNTAX, ++index, 1, 0, errorCode);
michael@0 481 } else {
michael@0 482 // skip the quote-ending apostrophe
michael@0 483 addPart(UMSGPAT_PART_TYPE_SKIP_SYNTAX, index++, 1, 0, errorCode);
michael@0 484 break;
michael@0 485 }
michael@0 486 } else {
michael@0 487 // The quoted text reaches to the end of the of the message.
michael@0 488 index=msg.length();
michael@0 489 // Add a Part for auto-quoting.
michael@0 490 addPart(UMSGPAT_PART_TYPE_INSERT_CHAR, index, 0,
michael@0 491 u_apos, errorCode); // value=char to be inserted
michael@0 492 needsAutoQuoting=TRUE;
michael@0 493 break;
michael@0 494 }
michael@0 495 }
michael@0 496 } else {
michael@0 497 // Interpret the apostrophe as literal text.
michael@0 498 // Add a Part for auto-quoting.
michael@0 499 addPart(UMSGPAT_PART_TYPE_INSERT_CHAR, index, 0,
michael@0 500 u_apos, errorCode); // value=char to be inserted
michael@0 501 needsAutoQuoting=TRUE;
michael@0 502 }
michael@0 503 }
michael@0 504 } else if(UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(parentType) && c==u_pound) {
michael@0 505 // The unquoted # in a plural message fragment will be replaced
michael@0 506 // with the (number-offset).
michael@0 507 addPart(UMSGPAT_PART_TYPE_REPLACE_NUMBER, index-1, 1, 0, errorCode);
michael@0 508 } else if(c==u_leftCurlyBrace) {
michael@0 509 index=parseArg(index-1, 1, nestingLevel, parseError, errorCode);
michael@0 510 } else if((nestingLevel>0 && c==u_rightCurlyBrace) ||
michael@0 511 (parentType==UMSGPAT_ARG_TYPE_CHOICE && c==u_pipe)) {
michael@0 512 // Finish the message before the terminator.
michael@0 513 // In a choice style, report the "}" substring only for the following ARG_LIMIT,
michael@0 514 // not for this MSG_LIMIT.
michael@0 515 int32_t limitLength=(parentType==UMSGPAT_ARG_TYPE_CHOICE && c==u_rightCurlyBrace) ? 0 : 1;
michael@0 516 addLimitPart(msgStart, UMSGPAT_PART_TYPE_MSG_LIMIT, index-1, limitLength,
michael@0 517 nestingLevel, errorCode);
michael@0 518 if(parentType==UMSGPAT_ARG_TYPE_CHOICE) {
michael@0 519 // Let the choice style parser see the '}' or '|'.
michael@0 520 return index-1;
michael@0 521 } else {
michael@0 522 // continue parsing after the '}'
michael@0 523 return index;
michael@0 524 }
michael@0 525 } // else: c is part of literal text
michael@0 526 }
michael@0 527 if(nestingLevel>0 && !inTopLevelChoiceMessage(nestingLevel, parentType)) {
michael@0 528 setParseError(parseError, 0); // Unmatched '{' braces in message.
michael@0 529 errorCode=U_UNMATCHED_BRACES;
michael@0 530 return 0;
michael@0 531 }
michael@0 532 addLimitPart(msgStart, UMSGPAT_PART_TYPE_MSG_LIMIT, index, 0, nestingLevel, errorCode);
michael@0 533 return index;
michael@0 534 }
michael@0 535
michael@0 536 int32_t
michael@0 537 MessagePattern::parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel,
michael@0 538 UParseError *parseError, UErrorCode &errorCode) {
michael@0 539 int32_t argStart=partsLength;
michael@0 540 UMessagePatternArgType argType=UMSGPAT_ARG_TYPE_NONE;
michael@0 541 addPart(UMSGPAT_PART_TYPE_ARG_START, index, argStartLength, argType, errorCode);
michael@0 542 if(U_FAILURE(errorCode)) {
michael@0 543 return 0;
michael@0 544 }
michael@0 545 int32_t nameIndex=index=skipWhiteSpace(index+argStartLength);
michael@0 546 if(index==msg.length()) {
michael@0 547 setParseError(parseError, 0); // Unmatched '{' braces in message.
michael@0 548 errorCode=U_UNMATCHED_BRACES;
michael@0 549 return 0;
michael@0 550 }
michael@0 551 // parse argument name or number
michael@0 552 index=skipIdentifier(index);
michael@0 553 int32_t number=parseArgNumber(nameIndex, index);
michael@0 554 if(number>=0) {
michael@0 555 int32_t length=index-nameIndex;
michael@0 556 if(length>Part::MAX_LENGTH || number>Part::MAX_VALUE) {
michael@0 557 setParseError(parseError, nameIndex); // Argument number too large.
michael@0 558 errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
michael@0 559 return 0;
michael@0 560 }
michael@0 561 hasArgNumbers=TRUE;
michael@0 562 addPart(UMSGPAT_PART_TYPE_ARG_NUMBER, nameIndex, length, number, errorCode);
michael@0 563 } else if(number==UMSGPAT_ARG_NAME_NOT_NUMBER) {
michael@0 564 int32_t length=index-nameIndex;
michael@0 565 if(length>Part::MAX_LENGTH) {
michael@0 566 setParseError(parseError, nameIndex); // Argument name too long.
michael@0 567 errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
michael@0 568 return 0;
michael@0 569 }
michael@0 570 hasArgNames=TRUE;
michael@0 571 addPart(UMSGPAT_PART_TYPE_ARG_NAME, nameIndex, length, 0, errorCode);
michael@0 572 } else { // number<-1 (ARG_NAME_NOT_VALID)
michael@0 573 setParseError(parseError, nameIndex); // Bad argument syntax.
michael@0 574 errorCode=U_PATTERN_SYNTAX_ERROR;
michael@0 575 return 0;
michael@0 576 }
michael@0 577 index=skipWhiteSpace(index);
michael@0 578 if(index==msg.length()) {
michael@0 579 setParseError(parseError, 0); // Unmatched '{' braces in message.
michael@0 580 errorCode=U_UNMATCHED_BRACES;
michael@0 581 return 0;
michael@0 582 }
michael@0 583 UChar c=msg.charAt(index);
michael@0 584 if(c==u_rightCurlyBrace) {
michael@0 585 // all done
michael@0 586 } else if(c!=u_comma) {
michael@0 587 setParseError(parseError, nameIndex); // Bad argument syntax.
michael@0 588 errorCode=U_PATTERN_SYNTAX_ERROR;
michael@0 589 return 0;
michael@0 590 } else /* ',' */ {
michael@0 591 // parse argument type: case-sensitive a-zA-Z
michael@0 592 int32_t typeIndex=index=skipWhiteSpace(index+1);
michael@0 593 while(index<msg.length() && isArgTypeChar(msg.charAt(index))) {
michael@0 594 ++index;
michael@0 595 }
michael@0 596 int32_t length=index-typeIndex;
michael@0 597 index=skipWhiteSpace(index);
michael@0 598 if(index==msg.length()) {
michael@0 599 setParseError(parseError, 0); // Unmatched '{' braces in message.
michael@0 600 errorCode=U_UNMATCHED_BRACES;
michael@0 601 return 0;
michael@0 602 }
michael@0 603 if(length==0 || ((c=msg.charAt(index))!=u_comma && c!=u_rightCurlyBrace)) {
michael@0 604 setParseError(parseError, nameIndex); // Bad argument syntax.
michael@0 605 errorCode=U_PATTERN_SYNTAX_ERROR;
michael@0 606 return 0;
michael@0 607 }
michael@0 608 if(length>Part::MAX_LENGTH) {
michael@0 609 setParseError(parseError, nameIndex); // Argument type name too long.
michael@0 610 errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
michael@0 611 return 0;
michael@0 612 }
michael@0 613 argType=UMSGPAT_ARG_TYPE_SIMPLE;
michael@0 614 if(length==6) {
michael@0 615 // case-insensitive comparisons for complex-type names
michael@0 616 if(isChoice(typeIndex)) {
michael@0 617 argType=UMSGPAT_ARG_TYPE_CHOICE;
michael@0 618 } else if(isPlural(typeIndex)) {
michael@0 619 argType=UMSGPAT_ARG_TYPE_PLURAL;
michael@0 620 } else if(isSelect(typeIndex)) {
michael@0 621 argType=UMSGPAT_ARG_TYPE_SELECT;
michael@0 622 }
michael@0 623 } else if(length==13) {
michael@0 624 if(isSelect(typeIndex) && isOrdinal(typeIndex+6)) {
michael@0 625 argType=UMSGPAT_ARG_TYPE_SELECTORDINAL;
michael@0 626 }
michael@0 627 }
michael@0 628 // change the ARG_START type from NONE to argType
michael@0 629 partsList->a[argStart].value=(int16_t)argType;
michael@0 630 if(argType==UMSGPAT_ARG_TYPE_SIMPLE) {
michael@0 631 addPart(UMSGPAT_PART_TYPE_ARG_TYPE, typeIndex, length, 0, errorCode);
michael@0 632 }
michael@0 633 // look for an argument style (pattern)
michael@0 634 if(c==u_rightCurlyBrace) {
michael@0 635 if(argType!=UMSGPAT_ARG_TYPE_SIMPLE) {
michael@0 636 setParseError(parseError, nameIndex); // No style field for complex argument.
michael@0 637 errorCode=U_PATTERN_SYNTAX_ERROR;
michael@0 638 return 0;
michael@0 639 }
michael@0 640 } else /* ',' */ {
michael@0 641 ++index;
michael@0 642 if(argType==UMSGPAT_ARG_TYPE_SIMPLE) {
michael@0 643 index=parseSimpleStyle(index, parseError, errorCode);
michael@0 644 } else if(argType==UMSGPAT_ARG_TYPE_CHOICE) {
michael@0 645 index=parseChoiceStyle(index, nestingLevel, parseError, errorCode);
michael@0 646 } else {
michael@0 647 index=parsePluralOrSelectStyle(argType, index, nestingLevel, parseError, errorCode);
michael@0 648 }
michael@0 649 }
michael@0 650 }
michael@0 651 // Argument parsing stopped on the '}'.
michael@0 652 addLimitPart(argStart, UMSGPAT_PART_TYPE_ARG_LIMIT, index, 1, argType, errorCode);
michael@0 653 return index+1;
michael@0 654 }
michael@0 655
michael@0 656 int32_t
michael@0 657 MessagePattern::parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode) {
michael@0 658 if(U_FAILURE(errorCode)) {
michael@0 659 return 0;
michael@0 660 }
michael@0 661 int32_t start=index;
michael@0 662 int32_t nestedBraces=0;
michael@0 663 while(index<msg.length()) {
michael@0 664 UChar c=msg.charAt(index++);
michael@0 665 if(c==u_apos) {
michael@0 666 // Treat apostrophe as quoting but include it in the style part.
michael@0 667 // Find the end of the quoted literal text.
michael@0 668 index=msg.indexOf(u_apos, index);
michael@0 669 if(index<0) {
michael@0 670 // Quoted literal argument style text reaches to the end of the message.
michael@0 671 setParseError(parseError, start);
michael@0 672 errorCode=U_PATTERN_SYNTAX_ERROR;
michael@0 673 return 0;
michael@0 674 }
michael@0 675 // skip the quote-ending apostrophe
michael@0 676 ++index;
michael@0 677 } else if(c==u_leftCurlyBrace) {
michael@0 678 ++nestedBraces;
michael@0 679 } else if(c==u_rightCurlyBrace) {
michael@0 680 if(nestedBraces>0) {
michael@0 681 --nestedBraces;
michael@0 682 } else {
michael@0 683 int32_t length=--index-start;
michael@0 684 if(length>Part::MAX_LENGTH) {
michael@0 685 setParseError(parseError, start); // Argument style text too long.
michael@0 686 errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
michael@0 687 return 0;
michael@0 688 }
michael@0 689 addPart(UMSGPAT_PART_TYPE_ARG_STYLE, start, length, 0, errorCode);
michael@0 690 return index;
michael@0 691 }
michael@0 692 } // c is part of literal text
michael@0 693 }
michael@0 694 setParseError(parseError, 0); // Unmatched '{' braces in message.
michael@0 695 errorCode=U_UNMATCHED_BRACES;
michael@0 696 return 0;
michael@0 697 }
michael@0 698
michael@0 699 int32_t
michael@0 700 MessagePattern::parseChoiceStyle(int32_t index, int32_t nestingLevel,
michael@0 701 UParseError *parseError, UErrorCode &errorCode) {
michael@0 702 if(U_FAILURE(errorCode)) {
michael@0 703 return 0;
michael@0 704 }
michael@0 705 int32_t start=index;
michael@0 706 index=skipWhiteSpace(index);
michael@0 707 if(index==msg.length() || msg.charAt(index)==u_rightCurlyBrace) {
michael@0 708 setParseError(parseError, 0); // Missing choice argument pattern.
michael@0 709 errorCode=U_PATTERN_SYNTAX_ERROR;
michael@0 710 return 0;
michael@0 711 }
michael@0 712 for(;;) {
michael@0 713 // The choice argument style contains |-separated (number, separator, message) triples.
michael@0 714 // Parse the number.
michael@0 715 int32_t numberIndex=index;
michael@0 716 index=skipDouble(index);
michael@0 717 int32_t length=index-numberIndex;
michael@0 718 if(length==0) {
michael@0 719 setParseError(parseError, start); // Bad choice pattern syntax.
michael@0 720 errorCode=U_PATTERN_SYNTAX_ERROR;
michael@0 721 return 0;
michael@0 722 }
michael@0 723 if(length>Part::MAX_LENGTH) {
michael@0 724 setParseError(parseError, numberIndex); // Choice number too long.
michael@0 725 errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
michael@0 726 return 0;
michael@0 727 }
michael@0 728 parseDouble(numberIndex, index, TRUE, parseError, errorCode); // adds ARG_INT or ARG_DOUBLE
michael@0 729 if(U_FAILURE(errorCode)) {
michael@0 730 return 0;
michael@0 731 }
michael@0 732 // Parse the separator.
michael@0 733 index=skipWhiteSpace(index);
michael@0 734 if(index==msg.length()) {
michael@0 735 setParseError(parseError, start); // Bad choice pattern syntax.
michael@0 736 errorCode=U_PATTERN_SYNTAX_ERROR;
michael@0 737 return 0;
michael@0 738 }
michael@0 739 UChar c=msg.charAt(index);
michael@0 740 if(!(c==u_pound || c==u_lessThan || c==u_lessOrEqual)) { // U+2264 is <=
michael@0 741 setParseError(parseError, start); // Expected choice separator (#<\u2264) instead of c.
michael@0 742 errorCode=U_PATTERN_SYNTAX_ERROR;
michael@0 743 return 0;
michael@0 744 }
michael@0 745 addPart(UMSGPAT_PART_TYPE_ARG_SELECTOR, index, 1, 0, errorCode);
michael@0 746 // Parse the message fragment.
michael@0 747 index=parseMessage(++index, 0, nestingLevel+1, UMSGPAT_ARG_TYPE_CHOICE, parseError, errorCode);
michael@0 748 if(U_FAILURE(errorCode)) {
michael@0 749 return 0;
michael@0 750 }
michael@0 751 // parseMessage(..., CHOICE) returns the index of the terminator, or msg.length().
michael@0 752 if(index==msg.length()) {
michael@0 753 return index;
michael@0 754 }
michael@0 755 if(msg.charAt(index)==u_rightCurlyBrace) {
michael@0 756 if(!inMessageFormatPattern(nestingLevel)) {
michael@0 757 setParseError(parseError, start); // Bad choice pattern syntax.
michael@0 758 errorCode=U_PATTERN_SYNTAX_ERROR;
michael@0 759 return 0;
michael@0 760 }
michael@0 761 return index;
michael@0 762 } // else the terminator is '|'
michael@0 763 index=skipWhiteSpace(index+1);
michael@0 764 }
michael@0 765 }
michael@0 766
michael@0 767 int32_t
michael@0 768 MessagePattern::parsePluralOrSelectStyle(UMessagePatternArgType argType,
michael@0 769 int32_t index, int32_t nestingLevel,
michael@0 770 UParseError *parseError, UErrorCode &errorCode) {
michael@0 771 if(U_FAILURE(errorCode)) {
michael@0 772 return 0;
michael@0 773 }
michael@0 774 int32_t start=index;
michael@0 775 UBool isEmpty=TRUE;
michael@0 776 UBool hasOther=FALSE;
michael@0 777 for(;;) {
michael@0 778 // First, collect the selector looking for a small set of terminators.
michael@0 779 // It would be a little faster to consider the syntax of each possible
michael@0 780 // token right here, but that makes the code too complicated.
michael@0 781 index=skipWhiteSpace(index);
michael@0 782 UBool eos=index==msg.length();
michael@0 783 if(eos || msg.charAt(index)==u_rightCurlyBrace) {
michael@0 784 if(eos==inMessageFormatPattern(nestingLevel)) {
michael@0 785 setParseError(parseError, start); // Bad plural/select pattern syntax.
michael@0 786 errorCode=U_PATTERN_SYNTAX_ERROR;
michael@0 787 return 0;
michael@0 788 }
michael@0 789 if(!hasOther) {
michael@0 790 setParseError(parseError, 0); // Missing 'other' keyword in plural/select pattern.
michael@0 791 errorCode=U_DEFAULT_KEYWORD_MISSING;
michael@0 792 return 0;
michael@0 793 }
michael@0 794 return index;
michael@0 795 }
michael@0 796 int32_t selectorIndex=index;
michael@0 797 if(UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) && msg.charAt(selectorIndex)==u_equal) {
michael@0 798 // explicit-value plural selector: =double
michael@0 799 index=skipDouble(index+1);
michael@0 800 int32_t length=index-selectorIndex;
michael@0 801 if(length==1) {
michael@0 802 setParseError(parseError, start); // Bad plural/select pattern syntax.
michael@0 803 errorCode=U_PATTERN_SYNTAX_ERROR;
michael@0 804 return 0;
michael@0 805 }
michael@0 806 if(length>Part::MAX_LENGTH) {
michael@0 807 setParseError(parseError, selectorIndex); // Argument selector too long.
michael@0 808 errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
michael@0 809 return 0;
michael@0 810 }
michael@0 811 addPart(UMSGPAT_PART_TYPE_ARG_SELECTOR, selectorIndex, length, 0, errorCode);
michael@0 812 parseDouble(selectorIndex+1, index, FALSE,
michael@0 813 parseError, errorCode); // adds ARG_INT or ARG_DOUBLE
michael@0 814 } else {
michael@0 815 index=skipIdentifier(index);
michael@0 816 int32_t length=index-selectorIndex;
michael@0 817 if(length==0) {
michael@0 818 setParseError(parseError, start); // Bad plural/select pattern syntax.
michael@0 819 errorCode=U_PATTERN_SYNTAX_ERROR;
michael@0 820 return 0;
michael@0 821 }
michael@0 822 // Note: The ':' in "offset:" is just beyond the skipIdentifier() range.
michael@0 823 if( UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) && length==6 && index<msg.length() &&
michael@0 824 0==msg.compare(selectorIndex, 7, kOffsetColon, 0, 7)
michael@0 825 ) {
michael@0 826 // plural offset, not a selector
michael@0 827 if(!isEmpty) {
michael@0 828 // Plural argument 'offset:' (if present) must precede key-message pairs.
michael@0 829 setParseError(parseError, start);
michael@0 830 errorCode=U_PATTERN_SYNTAX_ERROR;
michael@0 831 return 0;
michael@0 832 }
michael@0 833 // allow whitespace between offset: and its value
michael@0 834 int32_t valueIndex=skipWhiteSpace(index+1); // The ':' is at index.
michael@0 835 index=skipDouble(valueIndex);
michael@0 836 if(index==valueIndex) {
michael@0 837 setParseError(parseError, start); // Missing value for plural 'offset:'.
michael@0 838 errorCode=U_PATTERN_SYNTAX_ERROR;
michael@0 839 return 0;
michael@0 840 }
michael@0 841 if((index-valueIndex)>Part::MAX_LENGTH) {
michael@0 842 setParseError(parseError, valueIndex); // Plural offset value too long.
michael@0 843 errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
michael@0 844 return 0;
michael@0 845 }
michael@0 846 parseDouble(valueIndex, index, FALSE,
michael@0 847 parseError, errorCode); // adds ARG_INT or ARG_DOUBLE
michael@0 848 if(U_FAILURE(errorCode)) {
michael@0 849 return 0;
michael@0 850 }
michael@0 851 isEmpty=FALSE;
michael@0 852 continue; // no message fragment after the offset
michael@0 853 } else {
michael@0 854 // normal selector word
michael@0 855 if(length>Part::MAX_LENGTH) {
michael@0 856 setParseError(parseError, selectorIndex); // Argument selector too long.
michael@0 857 errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
michael@0 858 return 0;
michael@0 859 }
michael@0 860 addPart(UMSGPAT_PART_TYPE_ARG_SELECTOR, selectorIndex, length, 0, errorCode);
michael@0 861 if(0==msg.compare(selectorIndex, length, kOther, 0, 5)) {
michael@0 862 hasOther=TRUE;
michael@0 863 }
michael@0 864 }
michael@0 865 }
michael@0 866 if(U_FAILURE(errorCode)) {
michael@0 867 return 0;
michael@0 868 }
michael@0 869
michael@0 870 // parse the message fragment following the selector
michael@0 871 index=skipWhiteSpace(index);
michael@0 872 if(index==msg.length() || msg.charAt(index)!=u_leftCurlyBrace) {
michael@0 873 setParseError(parseError, selectorIndex); // No message fragment after plural/select selector.
michael@0 874 errorCode=U_PATTERN_SYNTAX_ERROR;
michael@0 875 return 0;
michael@0 876 }
michael@0 877 index=parseMessage(index, 1, nestingLevel+1, argType, parseError, errorCode);
michael@0 878 if(U_FAILURE(errorCode)) {
michael@0 879 return 0;
michael@0 880 }
michael@0 881 isEmpty=FALSE;
michael@0 882 }
michael@0 883 }
michael@0 884
michael@0 885 int32_t
michael@0 886 MessagePattern::parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit) {
michael@0 887 // If the identifier contains only ASCII digits, then it is an argument _number_
michael@0 888 // and must not have leading zeros (except "0" itself).
michael@0 889 // Otherwise it is an argument _name_.
michael@0 890 if(start>=limit) {
michael@0 891 return UMSGPAT_ARG_NAME_NOT_VALID;
michael@0 892 }
michael@0 893 int32_t number;
michael@0 894 // Defer numeric errors until we know there are only digits.
michael@0 895 UBool badNumber;
michael@0 896 UChar c=s.charAt(start++);
michael@0 897 if(c==0x30) {
michael@0 898 if(start==limit) {
michael@0 899 return 0;
michael@0 900 } else {
michael@0 901 number=0;
michael@0 902 badNumber=TRUE; // leading zero
michael@0 903 }
michael@0 904 } else if(0x31<=c && c<=0x39) {
michael@0 905 number=c-0x30;
michael@0 906 badNumber=FALSE;
michael@0 907 } else {
michael@0 908 return UMSGPAT_ARG_NAME_NOT_NUMBER;
michael@0 909 }
michael@0 910 while(start<limit) {
michael@0 911 c=s.charAt(start++);
michael@0 912 if(0x30<=c && c<=0x39) {
michael@0 913 if(number>=INT32_MAX/10) {
michael@0 914 badNumber=TRUE; // overflow
michael@0 915 }
michael@0 916 number=number*10+(c-0x30);
michael@0 917 } else {
michael@0 918 return UMSGPAT_ARG_NAME_NOT_NUMBER;
michael@0 919 }
michael@0 920 }
michael@0 921 // There are only ASCII digits.
michael@0 922 if(badNumber) {
michael@0 923 return UMSGPAT_ARG_NAME_NOT_VALID;
michael@0 924 } else {
michael@0 925 return number;
michael@0 926 }
michael@0 927 }
michael@0 928
michael@0 929 void
michael@0 930 MessagePattern::parseDouble(int32_t start, int32_t limit, UBool allowInfinity,
michael@0 931 UParseError *parseError, UErrorCode &errorCode) {
michael@0 932 if(U_FAILURE(errorCode)) {
michael@0 933 return;
michael@0 934 }
michael@0 935 U_ASSERT(start<limit);
michael@0 936 // fake loop for easy exit and single throw statement
michael@0 937 for(;;) { /*loop doesn't iterate*/
michael@0 938 // fast path for small integers and infinity
michael@0 939 int32_t value=0;
michael@0 940 int32_t isNegative=0; // not boolean so that we can easily add it to value
michael@0 941 int32_t index=start;
michael@0 942 UChar c=msg.charAt(index++);
michael@0 943 if(c==u_minus) {
michael@0 944 isNegative=1;
michael@0 945 if(index==limit) {
michael@0 946 break; // no number
michael@0 947 }
michael@0 948 c=msg.charAt(index++);
michael@0 949 } else if(c==u_plus) {
michael@0 950 if(index==limit) {
michael@0 951 break; // no number
michael@0 952 }
michael@0 953 c=msg.charAt(index++);
michael@0 954 }
michael@0 955 if(c==0x221e) { // infinity
michael@0 956 if(allowInfinity && index==limit) {
michael@0 957 double infinity=uprv_getInfinity();
michael@0 958 addArgDoublePart(
michael@0 959 isNegative!=0 ? -infinity : infinity,
michael@0 960 start, limit-start, errorCode);
michael@0 961 return;
michael@0 962 } else {
michael@0 963 break;
michael@0 964 }
michael@0 965 }
michael@0 966 // try to parse the number as a small integer but fall back to a double
michael@0 967 while('0'<=c && c<='9') {
michael@0 968 value=value*10+(c-'0');
michael@0 969 if(value>(Part::MAX_VALUE+isNegative)) {
michael@0 970 break; // not a small-enough integer
michael@0 971 }
michael@0 972 if(index==limit) {
michael@0 973 addPart(UMSGPAT_PART_TYPE_ARG_INT, start, limit-start,
michael@0 974 isNegative!=0 ? -value : value, errorCode);
michael@0 975 return;
michael@0 976 }
michael@0 977 c=msg.charAt(index++);
michael@0 978 }
michael@0 979 // Let Double.parseDouble() throw a NumberFormatException.
michael@0 980 char numberChars[128];
michael@0 981 int32_t capacity=(int32_t)sizeof(numberChars);
michael@0 982 int32_t length=limit-start;
michael@0 983 if(length>=capacity) {
michael@0 984 break; // number too long
michael@0 985 }
michael@0 986 msg.extract(start, length, numberChars, capacity, US_INV);
michael@0 987 if((int32_t)uprv_strlen(numberChars)<length) {
michael@0 988 break; // contains non-invariant character that was turned into NUL
michael@0 989 }
michael@0 990 char *end;
michael@0 991 double numericValue=uprv_strtod(numberChars, &end);
michael@0 992 if(end!=(numberChars+length)) {
michael@0 993 break; // parsing error
michael@0 994 }
michael@0 995 addArgDoublePart(numericValue, start, length, errorCode);
michael@0 996 return;
michael@0 997 }
michael@0 998 setParseError(parseError, start /*, limit*/); // Bad syntax for numeric value.
michael@0 999 errorCode=U_PATTERN_SYNTAX_ERROR;
michael@0 1000 return;
michael@0 1001 }
michael@0 1002
michael@0 1003 int32_t
michael@0 1004 MessagePattern::skipWhiteSpace(int32_t index) {
michael@0 1005 const UChar *s=msg.getBuffer();
michael@0 1006 int32_t msgLength=msg.length();
michael@0 1007 const UChar *t=PatternProps::skipWhiteSpace(s+index, msgLength-index);
michael@0 1008 return (int32_t)(t-s);
michael@0 1009 }
michael@0 1010
michael@0 1011 int32_t
michael@0 1012 MessagePattern::skipIdentifier(int32_t index) {
michael@0 1013 const UChar *s=msg.getBuffer();
michael@0 1014 int32_t msgLength=msg.length();
michael@0 1015 const UChar *t=PatternProps::skipIdentifier(s+index, msgLength-index);
michael@0 1016 return (int32_t)(t-s);
michael@0 1017 }
michael@0 1018
michael@0 1019 int32_t
michael@0 1020 MessagePattern::skipDouble(int32_t index) {
michael@0 1021 int32_t msgLength=msg.length();
michael@0 1022 while(index<msgLength) {
michael@0 1023 UChar c=msg.charAt(index);
michael@0 1024 // U+221E: Allow the infinity symbol, for ChoiceFormat patterns.
michael@0 1025 if((c<0x30 && c!=u_plus && c!=u_minus && c!=u_dot) || (c>0x39 && c!=u_e && c!=u_E && c!=0x221e)) {
michael@0 1026 break;
michael@0 1027 }
michael@0 1028 ++index;
michael@0 1029 }
michael@0 1030 return index;
michael@0 1031 }
michael@0 1032
michael@0 1033 UBool
michael@0 1034 MessagePattern::isArgTypeChar(UChar32 c) {
michael@0 1035 return (u_a<=c && c<=u_z) || (u_A<=c && c<=u_Z);
michael@0 1036 }
michael@0 1037
michael@0 1038 UBool
michael@0 1039 MessagePattern::isChoice(int32_t index) {
michael@0 1040 UChar c;
michael@0 1041 return
michael@0 1042 ((c=msg.charAt(index++))==u_c || c==u_C) &&
michael@0 1043 ((c=msg.charAt(index++))==u_h || c==u_H) &&
michael@0 1044 ((c=msg.charAt(index++))==u_o || c==u_O) &&
michael@0 1045 ((c=msg.charAt(index++))==u_i || c==u_I) &&
michael@0 1046 ((c=msg.charAt(index++))==u_c || c==u_C) &&
michael@0 1047 ((c=msg.charAt(index))==u_e || c==u_E);
michael@0 1048 }
michael@0 1049
michael@0 1050 UBool
michael@0 1051 MessagePattern::isPlural(int32_t index) {
michael@0 1052 UChar c;
michael@0 1053 return
michael@0 1054 ((c=msg.charAt(index++))==u_p || c==u_P) &&
michael@0 1055 ((c=msg.charAt(index++))==u_l || c==u_L) &&
michael@0 1056 ((c=msg.charAt(index++))==u_u || c==u_U) &&
michael@0 1057 ((c=msg.charAt(index++))==u_r || c==u_R) &&
michael@0 1058 ((c=msg.charAt(index++))==u_a || c==u_A) &&
michael@0 1059 ((c=msg.charAt(index))==u_l || c==u_L);
michael@0 1060 }
michael@0 1061
michael@0 1062 UBool
michael@0 1063 MessagePattern::isSelect(int32_t index) {
michael@0 1064 UChar c;
michael@0 1065 return
michael@0 1066 ((c=msg.charAt(index++))==u_s || c==u_S) &&
michael@0 1067 ((c=msg.charAt(index++))==u_e || c==u_E) &&
michael@0 1068 ((c=msg.charAt(index++))==u_l || c==u_L) &&
michael@0 1069 ((c=msg.charAt(index++))==u_e || c==u_E) &&
michael@0 1070 ((c=msg.charAt(index++))==u_c || c==u_C) &&
michael@0 1071 ((c=msg.charAt(index))==u_t || c==u_T);
michael@0 1072 }
michael@0 1073
michael@0 1074 UBool
michael@0 1075 MessagePattern::isOrdinal(int32_t index) {
michael@0 1076 UChar c;
michael@0 1077 return
michael@0 1078 ((c=msg.charAt(index++))==u_o || c==u_O) &&
michael@0 1079 ((c=msg.charAt(index++))==u_r || c==u_R) &&
michael@0 1080 ((c=msg.charAt(index++))==u_d || c==u_D) &&
michael@0 1081 ((c=msg.charAt(index++))==u_i || c==u_I) &&
michael@0 1082 ((c=msg.charAt(index++))==u_n || c==u_N) &&
michael@0 1083 ((c=msg.charAt(index++))==u_a || c==u_A) &&
michael@0 1084 ((c=msg.charAt(index))==u_l || c==u_L);
michael@0 1085 }
michael@0 1086
michael@0 1087 UBool
michael@0 1088 MessagePattern::inMessageFormatPattern(int32_t nestingLevel) {
michael@0 1089 return nestingLevel>0 || partsList->a[0].type==UMSGPAT_PART_TYPE_MSG_START;
michael@0 1090 }
michael@0 1091
michael@0 1092 UBool
michael@0 1093 MessagePattern::inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType) {
michael@0 1094 return
michael@0 1095 nestingLevel==1 &&
michael@0 1096 parentType==UMSGPAT_ARG_TYPE_CHOICE &&
michael@0 1097 partsList->a[0].type!=UMSGPAT_PART_TYPE_MSG_START;
michael@0 1098 }
michael@0 1099
michael@0 1100 void
michael@0 1101 MessagePattern::addPart(UMessagePatternPartType type, int32_t index, int32_t length,
michael@0 1102 int32_t value, UErrorCode &errorCode) {
michael@0 1103 if(partsList->ensureCapacityForOneMore(partsLength, errorCode)) {
michael@0 1104 Part &part=partsList->a[partsLength++];
michael@0 1105 part.type=type;
michael@0 1106 part.index=index;
michael@0 1107 part.length=(uint16_t)length;
michael@0 1108 part.value=(int16_t)value;
michael@0 1109 part.limitPartIndex=0;
michael@0 1110 }
michael@0 1111 }
michael@0 1112
michael@0 1113 void
michael@0 1114 MessagePattern::addLimitPart(int32_t start,
michael@0 1115 UMessagePatternPartType type, int32_t index, int32_t length,
michael@0 1116 int32_t value, UErrorCode &errorCode) {
michael@0 1117 partsList->a[start].limitPartIndex=partsLength;
michael@0 1118 addPart(type, index, length, value, errorCode);
michael@0 1119 }
michael@0 1120
michael@0 1121 void
michael@0 1122 MessagePattern::addArgDoublePart(double numericValue, int32_t start, int32_t length,
michael@0 1123 UErrorCode &errorCode) {
michael@0 1124 if(U_FAILURE(errorCode)) {
michael@0 1125 return;
michael@0 1126 }
michael@0 1127 int32_t numericIndex=numericValuesLength;
michael@0 1128 if(numericValuesList==NULL) {
michael@0 1129 numericValuesList=new MessagePatternDoubleList();
michael@0 1130 if(numericValuesList==NULL) {
michael@0 1131 errorCode=U_MEMORY_ALLOCATION_ERROR;
michael@0 1132 return;
michael@0 1133 }
michael@0 1134 } else if(!numericValuesList->ensureCapacityForOneMore(numericValuesLength, errorCode)) {
michael@0 1135 return;
michael@0 1136 } else {
michael@0 1137 if(numericIndex>Part::MAX_VALUE) {
michael@0 1138 errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
michael@0 1139 return;
michael@0 1140 }
michael@0 1141 }
michael@0 1142 numericValuesList->a[numericValuesLength++]=numericValue;
michael@0 1143 addPart(UMSGPAT_PART_TYPE_ARG_DOUBLE, start, length, numericIndex, errorCode);
michael@0 1144 }
michael@0 1145
michael@0 1146 void
michael@0 1147 MessagePattern::setParseError(UParseError *parseError, int32_t index) {
michael@0 1148 if(parseError==NULL) {
michael@0 1149 return;
michael@0 1150 }
michael@0 1151 parseError->offset=index;
michael@0 1152
michael@0 1153 // Set preContext to some of msg before index.
michael@0 1154 // Avoid splitting a surrogate pair.
michael@0 1155 int32_t length=index;
michael@0 1156 if(length>=U_PARSE_CONTEXT_LEN) {
michael@0 1157 length=U_PARSE_CONTEXT_LEN-1;
michael@0 1158 if(length>0 && U16_IS_TRAIL(msg[index-length])) {
michael@0 1159 --length;
michael@0 1160 }
michael@0 1161 }
michael@0 1162 msg.extract(index-length, length, parseError->preContext);
michael@0 1163 parseError->preContext[length]=0;
michael@0 1164
michael@0 1165 // Set postContext to some of msg starting at index.
michael@0 1166 length=msg.length()-index;
michael@0 1167 if(length>=U_PARSE_CONTEXT_LEN) {
michael@0 1168 length=U_PARSE_CONTEXT_LEN-1;
michael@0 1169 if(length>0 && U16_IS_LEAD(msg[index+length-1])) {
michael@0 1170 --length;
michael@0 1171 }
michael@0 1172 }
michael@0 1173 msg.extract(index, length, parseError->postContext);
michael@0 1174 parseError->postContext[length]=0;
michael@0 1175 }
michael@0 1176
michael@0 1177 // MessageImpl ------------------------------------------------------------- ***
michael@0 1178
michael@0 1179 void
michael@0 1180 MessageImpl::appendReducedApostrophes(const UnicodeString &s, int32_t start, int32_t limit,
michael@0 1181 UnicodeString &sb) {
michael@0 1182 int32_t doubleApos=-1;
michael@0 1183 for(;;) {
michael@0 1184 int32_t i=s.indexOf(u_apos, start);
michael@0 1185 if(i<0 || i>=limit) {
michael@0 1186 sb.append(s, start, limit-start);
michael@0 1187 break;
michael@0 1188 }
michael@0 1189 if(i==doubleApos) {
michael@0 1190 // Double apostrophe at start-1 and start==i, append one.
michael@0 1191 sb.append(u_apos);
michael@0 1192 ++start;
michael@0 1193 doubleApos=-1;
michael@0 1194 } else {
michael@0 1195 // Append text between apostrophes and skip this one.
michael@0 1196 sb.append(s, start, i-start);
michael@0 1197 doubleApos=start=i+1;
michael@0 1198 }
michael@0 1199 }
michael@0 1200 }
michael@0 1201
michael@0 1202 // Ported from second half of ICU4J SelectFormat.format(String).
michael@0 1203 UnicodeString &
michael@0 1204 MessageImpl::appendSubMessageWithoutSkipSyntax(const MessagePattern &msgPattern,
michael@0 1205 int32_t msgStart,
michael@0 1206 UnicodeString &result) {
michael@0 1207 const UnicodeString &msgString=msgPattern.getPatternString();
michael@0 1208 int32_t prevIndex=msgPattern.getPart(msgStart).getLimit();
michael@0 1209 for(int32_t i=msgStart;;) {
michael@0 1210 const MessagePattern::Part &part=msgPattern.getPart(++i);
michael@0 1211 UMessagePatternPartType type=part.getType();
michael@0 1212 int32_t index=part.getIndex();
michael@0 1213 if(type==UMSGPAT_PART_TYPE_MSG_LIMIT) {
michael@0 1214 return result.append(msgString, prevIndex, index-prevIndex);
michael@0 1215 } else if(type==UMSGPAT_PART_TYPE_SKIP_SYNTAX) {
michael@0 1216 result.append(msgString, prevIndex, index-prevIndex);
michael@0 1217 prevIndex=part.getLimit();
michael@0 1218 } else if(type==UMSGPAT_PART_TYPE_ARG_START) {
michael@0 1219 result.append(msgString, prevIndex, index-prevIndex);
michael@0 1220 prevIndex=index;
michael@0 1221 i=msgPattern.getLimitPartIndex(i);
michael@0 1222 index=msgPattern.getPart(i).getLimit();
michael@0 1223 appendReducedApostrophes(msgString, prevIndex, index, result);
michael@0 1224 prevIndex=index;
michael@0 1225 }
michael@0 1226 }
michael@0 1227 }
michael@0 1228
michael@0 1229 U_NAMESPACE_END
michael@0 1230
michael@0 1231 #endif // !UCONFIG_NO_FORMATTING

mercurial