intl/icu/source/common/unicode/messagepattern.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /*
michael@0 2 *******************************************************************************
michael@0 3 * Copyright (C) 2011-2013, International Business Machines
michael@0 4 * Corporation and others. All Rights Reserved.
michael@0 5 *******************************************************************************
michael@0 6 * file name: messagepattern.h
michael@0 7 * encoding: US-ASCII
michael@0 8 * tab size: 8 (not used)
michael@0 9 * indentation:4
michael@0 10 *
michael@0 11 * created on: 2011mar14
michael@0 12 * created by: Markus W. Scherer
michael@0 13 */
michael@0 14
michael@0 15 #ifndef __MESSAGEPATTERN_H__
michael@0 16 #define __MESSAGEPATTERN_H__
michael@0 17
michael@0 18 /**
michael@0 19 * \file
michael@0 20 * \brief C++ API: MessagePattern class: Parses and represents ICU MessageFormat patterns.
michael@0 21 */
michael@0 22
michael@0 23 #include "unicode/utypes.h"
michael@0 24
michael@0 25 #if !UCONFIG_NO_FORMATTING
michael@0 26
michael@0 27 #include "unicode/parseerr.h"
michael@0 28 #include "unicode/unistr.h"
michael@0 29
michael@0 30 /**
michael@0 31 * Mode for when an apostrophe starts quoted literal text for MessageFormat output.
michael@0 32 * The default is DOUBLE_OPTIONAL unless overridden via uconfig.h
michael@0 33 * (UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE).
michael@0 34 * <p>
michael@0 35 * A pair of adjacent apostrophes always results in a single apostrophe in the output,
michael@0 36 * even when the pair is between two single, text-quoting apostrophes.
michael@0 37 * <p>
michael@0 38 * The following table shows examples of desired MessageFormat.format() output
michael@0 39 * with the pattern strings that yield that output.
michael@0 40 * <p>
michael@0 41 * <table>
michael@0 42 * <tr>
michael@0 43 * <th>Desired output</th>
michael@0 44 * <th>DOUBLE_OPTIONAL</th>
michael@0 45 * <th>DOUBLE_REQUIRED</th>
michael@0 46 * </tr>
michael@0 47 * <tr>
michael@0 48 * <td>I see {many}</td>
michael@0 49 * <td>I see '{many}'</td>
michael@0 50 * <td>(same)</td>
michael@0 51 * </tr>
michael@0 52 * <tr>
michael@0 53 * <td>I said {'Wow!'}</td>
michael@0 54 * <td>I said '{''Wow!''}'</td>
michael@0 55 * <td>(same)</td>
michael@0 56 * </tr>
michael@0 57 * <tr>
michael@0 58 * <td>I don't know</td>
michael@0 59 * <td>I don't know OR<br> I don''t know</td>
michael@0 60 * <td>I don''t know</td>
michael@0 61 * </tr>
michael@0 62 * </table>
michael@0 63 * @stable ICU 4.8
michael@0 64 * @see UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE
michael@0 65 */
michael@0 66 enum UMessagePatternApostropheMode {
michael@0 67 /**
michael@0 68 * A literal apostrophe is represented by
michael@0 69 * either a single or a double apostrophe pattern character.
michael@0 70 * Within a MessageFormat pattern, a single apostrophe only starts quoted literal text
michael@0 71 * if it immediately precedes a curly brace {},
michael@0 72 * or a pipe symbol | if inside a choice format,
michael@0 73 * or a pound symbol # if inside a plural format.
michael@0 74 * <p>
michael@0 75 * This is the default behavior starting with ICU 4.8.
michael@0 76 * @stable ICU 4.8
michael@0 77 */
michael@0 78 UMSGPAT_APOS_DOUBLE_OPTIONAL,
michael@0 79 /**
michael@0 80 * A literal apostrophe must be represented by
michael@0 81 * a double apostrophe pattern character.
michael@0 82 * A single apostrophe always starts quoted literal text.
michael@0 83 * <p>
michael@0 84 * This is the behavior of ICU 4.6 and earlier, and of the JDK.
michael@0 85 * @stable ICU 4.8
michael@0 86 */
michael@0 87 UMSGPAT_APOS_DOUBLE_REQUIRED
michael@0 88 };
michael@0 89 /**
michael@0 90 * @stable ICU 4.8
michael@0 91 */
michael@0 92 typedef enum UMessagePatternApostropheMode UMessagePatternApostropheMode;
michael@0 93
michael@0 94 /**
michael@0 95 * MessagePattern::Part type constants.
michael@0 96 * @stable ICU 4.8
michael@0 97 */
michael@0 98 enum UMessagePatternPartType {
michael@0 99 /**
michael@0 100 * Start of a message pattern (main or nested).
michael@0 101 * The length is 0 for the top-level message
michael@0 102 * and for a choice argument sub-message, otherwise 1 for the '{'.
michael@0 103 * The value indicates the nesting level, starting with 0 for the main message.
michael@0 104 * <p>
michael@0 105 * There is always a later MSG_LIMIT part.
michael@0 106 * @stable ICU 4.8
michael@0 107 */
michael@0 108 UMSGPAT_PART_TYPE_MSG_START,
michael@0 109 /**
michael@0 110 * End of a message pattern (main or nested).
michael@0 111 * The length is 0 for the top-level message and
michael@0 112 * the last sub-message of a choice argument,
michael@0 113 * otherwise 1 for the '}' or (in a choice argument style) the '|'.
michael@0 114 * The value indicates the nesting level, starting with 0 for the main message.
michael@0 115 * @stable ICU 4.8
michael@0 116 */
michael@0 117 UMSGPAT_PART_TYPE_MSG_LIMIT,
michael@0 118 /**
michael@0 119 * Indicates a substring of the pattern string which is to be skipped when formatting.
michael@0 120 * For example, an apostrophe that begins or ends quoted text
michael@0 121 * would be indicated with such a part.
michael@0 122 * The value is undefined and currently always 0.
michael@0 123 * @stable ICU 4.8
michael@0 124 */
michael@0 125 UMSGPAT_PART_TYPE_SKIP_SYNTAX,
michael@0 126 /**
michael@0 127 * Indicates that a syntax character needs to be inserted for auto-quoting.
michael@0 128 * The length is 0.
michael@0 129 * The value is the character code of the insertion character. (U+0027=APOSTROPHE)
michael@0 130 * @stable ICU 4.8
michael@0 131 */
michael@0 132 UMSGPAT_PART_TYPE_INSERT_CHAR,
michael@0 133 /**
michael@0 134 * Indicates a syntactic (non-escaped) # symbol in a plural variant.
michael@0 135 * When formatting, replace this part's substring with the
michael@0 136 * (value-offset) for the plural argument value.
michael@0 137 * The value is undefined and currently always 0.
michael@0 138 * @stable ICU 4.8
michael@0 139 */
michael@0 140 UMSGPAT_PART_TYPE_REPLACE_NUMBER,
michael@0 141 /**
michael@0 142 * Start of an argument.
michael@0 143 * The length is 1 for the '{'.
michael@0 144 * The value is the ordinal value of the ArgType. Use getArgType().
michael@0 145 * <p>
michael@0 146 * This part is followed by either an ARG_NUMBER or ARG_NAME,
michael@0 147 * followed by optional argument sub-parts (see UMessagePatternArgType constants)
michael@0 148 * and finally an ARG_LIMIT part.
michael@0 149 * @stable ICU 4.8
michael@0 150 */
michael@0 151 UMSGPAT_PART_TYPE_ARG_START,
michael@0 152 /**
michael@0 153 * End of an argument.
michael@0 154 * The length is 1 for the '}'.
michael@0 155 * The value is the ordinal value of the ArgType. Use getArgType().
michael@0 156 * @stable ICU 4.8
michael@0 157 */
michael@0 158 UMSGPAT_PART_TYPE_ARG_LIMIT,
michael@0 159 /**
michael@0 160 * The argument number, provided by the value.
michael@0 161 * @stable ICU 4.8
michael@0 162 */
michael@0 163 UMSGPAT_PART_TYPE_ARG_NUMBER,
michael@0 164 /**
michael@0 165 * The argument name.
michael@0 166 * The value is undefined and currently always 0.
michael@0 167 * @stable ICU 4.8
michael@0 168 */
michael@0 169 UMSGPAT_PART_TYPE_ARG_NAME,
michael@0 170 /**
michael@0 171 * The argument type.
michael@0 172 * The value is undefined and currently always 0.
michael@0 173 * @stable ICU 4.8
michael@0 174 */
michael@0 175 UMSGPAT_PART_TYPE_ARG_TYPE,
michael@0 176 /**
michael@0 177 * The argument style text.
michael@0 178 * The value is undefined and currently always 0.
michael@0 179 * @stable ICU 4.8
michael@0 180 */
michael@0 181 UMSGPAT_PART_TYPE_ARG_STYLE,
michael@0 182 /**
michael@0 183 * A selector substring in a "complex" argument style.
michael@0 184 * The value is undefined and currently always 0.
michael@0 185 * @stable ICU 4.8
michael@0 186 */
michael@0 187 UMSGPAT_PART_TYPE_ARG_SELECTOR,
michael@0 188 /**
michael@0 189 * An integer value, for example the offset or an explicit selector value
michael@0 190 * in a PluralFormat style.
michael@0 191 * The part value is the integer value.
michael@0 192 * @stable ICU 4.8
michael@0 193 */
michael@0 194 UMSGPAT_PART_TYPE_ARG_INT,
michael@0 195 /**
michael@0 196 * A numeric value, for example the offset or an explicit selector value
michael@0 197 * in a PluralFormat style.
michael@0 198 * The part value is an index into an internal array of numeric values;
michael@0 199 * use getNumericValue().
michael@0 200 * @stable ICU 4.8
michael@0 201 */
michael@0 202 UMSGPAT_PART_TYPE_ARG_DOUBLE
michael@0 203 };
michael@0 204 /**
michael@0 205 * @stable ICU 4.8
michael@0 206 */
michael@0 207 typedef enum UMessagePatternPartType UMessagePatternPartType;
michael@0 208
michael@0 209 /**
michael@0 210 * Argument type constants.
michael@0 211 * Returned by Part.getArgType() for ARG_START and ARG_LIMIT parts.
michael@0 212 *
michael@0 213 * Messages nested inside an argument are each delimited by MSG_START and MSG_LIMIT,
michael@0 214 * with a nesting level one greater than the surrounding message.
michael@0 215 * @stable ICU 4.8
michael@0 216 */
michael@0 217 enum UMessagePatternArgType {
michael@0 218 /**
michael@0 219 * The argument has no specified type.
michael@0 220 * @stable ICU 4.8
michael@0 221 */
michael@0 222 UMSGPAT_ARG_TYPE_NONE,
michael@0 223 /**
michael@0 224 * The argument has a "simple" type which is provided by the ARG_TYPE part.
michael@0 225 * An ARG_STYLE part might follow that.
michael@0 226 * @stable ICU 4.8
michael@0 227 */
michael@0 228 UMSGPAT_ARG_TYPE_SIMPLE,
michael@0 229 /**
michael@0 230 * The argument is a ChoiceFormat with one or more
michael@0 231 * ((ARG_INT | ARG_DOUBLE), ARG_SELECTOR, message) tuples.
michael@0 232 * @stable ICU 4.8
michael@0 233 */
michael@0 234 UMSGPAT_ARG_TYPE_CHOICE,
michael@0 235 /**
michael@0 236 * The argument is a cardinal-number PluralFormat with an optional ARG_INT or ARG_DOUBLE offset
michael@0 237 * (e.g., offset:1)
michael@0 238 * and one or more (ARG_SELECTOR [explicit-value] message) tuples.
michael@0 239 * If the selector has an explicit value (e.g., =2), then
michael@0 240 * that value is provided by the ARG_INT or ARG_DOUBLE part preceding the message.
michael@0 241 * Otherwise the message immediately follows the ARG_SELECTOR.
michael@0 242 * @stable ICU 4.8
michael@0 243 */
michael@0 244 UMSGPAT_ARG_TYPE_PLURAL,
michael@0 245 /**
michael@0 246 * The argument is a SelectFormat with one or more (ARG_SELECTOR, message) pairs.
michael@0 247 * @stable ICU 4.8
michael@0 248 */
michael@0 249 UMSGPAT_ARG_TYPE_SELECT,
michael@0 250 /**
michael@0 251 * The argument is an ordinal-number PluralFormat
michael@0 252 * with the same style parts sequence and semantics as UMSGPAT_ARG_TYPE_PLURAL.
michael@0 253 * @stable ICU 50
michael@0 254 */
michael@0 255 UMSGPAT_ARG_TYPE_SELECTORDINAL
michael@0 256 };
michael@0 257 /**
michael@0 258 * @stable ICU 4.8
michael@0 259 */
michael@0 260 typedef enum UMessagePatternArgType UMessagePatternArgType;
michael@0 261
michael@0 262 /**
michael@0 263 * \def UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE
michael@0 264 * Returns TRUE if the argument type has a plural style part sequence and semantics,
michael@0 265 * for example UMSGPAT_ARG_TYPE_PLURAL and UMSGPAT_ARG_TYPE_SELECTORDINAL.
michael@0 266 * @stable ICU 50
michael@0 267 */
michael@0 268 #define UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) \
michael@0 269 ((argType)==UMSGPAT_ARG_TYPE_PLURAL || (argType)==UMSGPAT_ARG_TYPE_SELECTORDINAL)
michael@0 270
michael@0 271 enum {
michael@0 272 /**
michael@0 273 * Return value from MessagePattern.validateArgumentName() for when
michael@0 274 * the string is a valid "pattern identifier" but not a number.
michael@0 275 * @stable ICU 4.8
michael@0 276 */
michael@0 277 UMSGPAT_ARG_NAME_NOT_NUMBER=-1,
michael@0 278
michael@0 279 /**
michael@0 280 * Return value from MessagePattern.validateArgumentName() for when
michael@0 281 * the string is invalid.
michael@0 282 * It might not be a valid "pattern identifier",
michael@0 283 * or it have only ASCII digits but there is a leading zero or the number is too large.
michael@0 284 * @stable ICU 4.8
michael@0 285 */
michael@0 286 UMSGPAT_ARG_NAME_NOT_VALID=-2
michael@0 287 };
michael@0 288
michael@0 289 /**
michael@0 290 * Special value that is returned by getNumericValue(Part) when no
michael@0 291 * numeric value is defined for a part.
michael@0 292 * @see MessagePattern.getNumericValue()
michael@0 293 * @stable ICU 4.8
michael@0 294 */
michael@0 295 #define UMSGPAT_NO_NUMERIC_VALUE ((double)(-123456789))
michael@0 296
michael@0 297 U_NAMESPACE_BEGIN
michael@0 298
michael@0 299 class MessagePatternDoubleList;
michael@0 300 class MessagePatternPartsList;
michael@0 301
michael@0 302 /**
michael@0 303 * Parses and represents ICU MessageFormat patterns.
michael@0 304 * Also handles patterns for ChoiceFormat, PluralFormat and SelectFormat.
michael@0 305 * Used in the implementations of those classes as well as in tools
michael@0 306 * for message validation, translation and format conversion.
michael@0 307 * <p>
michael@0 308 * The parser handles all syntax relevant for identifying message arguments.
michael@0 309 * This includes "complex" arguments whose style strings contain
michael@0 310 * nested MessageFormat pattern substrings.
michael@0 311 * For "simple" arguments (with no nested MessageFormat pattern substrings),
michael@0 312 * the argument style is not parsed any further.
michael@0 313 * <p>
michael@0 314 * The parser handles named and numbered message arguments and allows both in one message.
michael@0 315 * <p>
michael@0 316 * Once a pattern has been parsed successfully, iterate through the parsed data
michael@0 317 * with countParts(), getPart() and related methods.
michael@0 318 * <p>
michael@0 319 * The data logically represents a parse tree, but is stored and accessed
michael@0 320 * as a list of "parts" for fast and simple parsing and to minimize object allocations.
michael@0 321 * Arguments and nested messages are best handled via recursion.
michael@0 322 * For every _START "part", MessagePattern.getLimitPartIndex() efficiently returns
michael@0 323 * the index of the corresponding _LIMIT "part".
michael@0 324 * <p>
michael@0 325 * List of "parts":
michael@0 326 * <pre>
michael@0 327 * message = MSG_START (SKIP_SYNTAX | INSERT_CHAR | REPLACE_NUMBER | argument)* MSG_LIMIT
michael@0 328 * argument = noneArg | simpleArg | complexArg
michael@0 329 * complexArg = choiceArg | pluralArg | selectArg
michael@0 330 *
michael@0 331 * noneArg = ARG_START.NONE (ARG_NAME | ARG_NUMBER) ARG_LIMIT.NONE
michael@0 332 * simpleArg = ARG_START.SIMPLE (ARG_NAME | ARG_NUMBER) ARG_TYPE [ARG_STYLE] ARG_LIMIT.SIMPLE
michael@0 333 * choiceArg = ARG_START.CHOICE (ARG_NAME | ARG_NUMBER) choiceStyle ARG_LIMIT.CHOICE
michael@0 334 * pluralArg = ARG_START.PLURAL (ARG_NAME | ARG_NUMBER) pluralStyle ARG_LIMIT.PLURAL
michael@0 335 * selectArg = ARG_START.SELECT (ARG_NAME | ARG_NUMBER) selectStyle ARG_LIMIT.SELECT
michael@0 336 *
michael@0 337 * choiceStyle = ((ARG_INT | ARG_DOUBLE) ARG_SELECTOR message)+
michael@0 338 * pluralStyle = [ARG_INT | ARG_DOUBLE] (ARG_SELECTOR [ARG_INT | ARG_DOUBLE] message)+
michael@0 339 * selectStyle = (ARG_SELECTOR message)+
michael@0 340 * </pre>
michael@0 341 * <ul>
michael@0 342 * <li>Literal output text is not represented directly by "parts" but accessed
michael@0 343 * between parts of a message, from one part's getLimit() to the next part's getIndex().
michael@0 344 * <li><code>ARG_START.CHOICE</code> stands for an ARG_START Part with ArgType CHOICE.
michael@0 345 * <li>In the choiceStyle, the ARG_SELECTOR has the '<', the '#' or
michael@0 346 * the less-than-or-equal-to sign (U+2264).
michael@0 347 * <li>In the pluralStyle, the first, optional numeric Part has the "offset:" value.
michael@0 348 * The optional numeric Part between each (ARG_SELECTOR, message) pair
michael@0 349 * is the value of an explicit-number selector like "=2",
michael@0 350 * otherwise the selector is a non-numeric identifier.
michael@0 351 * <li>The REPLACE_NUMBER Part can occur only in an immediate sub-message of the pluralStyle.
michael@0 352 * </ul>
michael@0 353 * <p>
michael@0 354 * This class is not intended for public subclassing.
michael@0 355 *
michael@0 356 * @stable ICU 4.8
michael@0 357 */
michael@0 358 class U_COMMON_API MessagePattern : public UObject {
michael@0 359 public:
michael@0 360 /**
michael@0 361 * Constructs an empty MessagePattern with default UMessagePatternApostropheMode.
michael@0 362 * @param errorCode Standard ICU error code. Its input value must
michael@0 363 * pass the U_SUCCESS() test, or else the function returns
michael@0 364 * immediately. Check for U_FAILURE() on output or use with
michael@0 365 * function chaining. (See User Guide for details.)
michael@0 366 * @stable ICU 4.8
michael@0 367 */
michael@0 368 MessagePattern(UErrorCode &errorCode);
michael@0 369
michael@0 370 /**
michael@0 371 * Constructs an empty MessagePattern.
michael@0 372 * @param mode Explicit UMessagePatternApostropheMode.
michael@0 373 * @param errorCode Standard ICU error code. Its input value must
michael@0 374 * pass the U_SUCCESS() test, or else the function returns
michael@0 375 * immediately. Check for U_FAILURE() on output or use with
michael@0 376 * function chaining. (See User Guide for details.)
michael@0 377 * @stable ICU 4.8
michael@0 378 */
michael@0 379 MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode);
michael@0 380
michael@0 381 /**
michael@0 382 * Constructs a MessagePattern with default UMessagePatternApostropheMode and
michael@0 383 * parses the MessageFormat pattern string.
michael@0 384 * @param pattern a MessageFormat pattern string
michael@0 385 * @param parseError Struct to receive information on the position
michael@0 386 * of an error within the pattern.
michael@0 387 * Can be NULL.
michael@0 388 * @param errorCode Standard ICU error code. Its input value must
michael@0 389 * pass the U_SUCCESS() test, or else the function returns
michael@0 390 * immediately. Check for U_FAILURE() on output or use with
michael@0 391 * function chaining. (See User Guide for details.)
michael@0 392 * TODO: turn @throws into UErrorCode specifics?
michael@0 393 * @throws IllegalArgumentException for syntax errors in the pattern string
michael@0 394 * @throws IndexOutOfBoundsException if certain limits are exceeded
michael@0 395 * (e.g., argument number too high, argument name too long, etc.)
michael@0 396 * @throws NumberFormatException if a number could not be parsed
michael@0 397 * @stable ICU 4.8
michael@0 398 */
michael@0 399 MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
michael@0 400
michael@0 401 /**
michael@0 402 * Copy constructor.
michael@0 403 * @param other Object to copy.
michael@0 404 * @stable ICU 4.8
michael@0 405 */
michael@0 406 MessagePattern(const MessagePattern &other);
michael@0 407
michael@0 408 /**
michael@0 409 * Assignment operator.
michael@0 410 * @param other Object to copy.
michael@0 411 * @return *this=other
michael@0 412 * @stable ICU 4.8
michael@0 413 */
michael@0 414 MessagePattern &operator=(const MessagePattern &other);
michael@0 415
michael@0 416 /**
michael@0 417 * Destructor.
michael@0 418 * @stable ICU 4.8
michael@0 419 */
michael@0 420 virtual ~MessagePattern();
michael@0 421
michael@0 422 /**
michael@0 423 * Parses a MessageFormat pattern string.
michael@0 424 * @param pattern a MessageFormat pattern string
michael@0 425 * @param parseError Struct to receive information on the position
michael@0 426 * of an error within the pattern.
michael@0 427 * Can be NULL.
michael@0 428 * @param errorCode Standard ICU error code. Its input value must
michael@0 429 * pass the U_SUCCESS() test, or else the function returns
michael@0 430 * immediately. Check for U_FAILURE() on output or use with
michael@0 431 * function chaining. (See User Guide for details.)
michael@0 432 * @return *this
michael@0 433 * @throws IllegalArgumentException for syntax errors in the pattern string
michael@0 434 * @throws IndexOutOfBoundsException if certain limits are exceeded
michael@0 435 * (e.g., argument number too high, argument name too long, etc.)
michael@0 436 * @throws NumberFormatException if a number could not be parsed
michael@0 437 * @stable ICU 4.8
michael@0 438 */
michael@0 439 MessagePattern &parse(const UnicodeString &pattern,
michael@0 440 UParseError *parseError, UErrorCode &errorCode);
michael@0 441
michael@0 442 /**
michael@0 443 * Parses a ChoiceFormat pattern string.
michael@0 444 * @param pattern a ChoiceFormat pattern string
michael@0 445 * @param parseError Struct to receive information on the position
michael@0 446 * of an error within the pattern.
michael@0 447 * Can be NULL.
michael@0 448 * @param errorCode Standard ICU error code. Its input value must
michael@0 449 * pass the U_SUCCESS() test, or else the function returns
michael@0 450 * immediately. Check for U_FAILURE() on output or use with
michael@0 451 * function chaining. (See User Guide for details.)
michael@0 452 * @return *this
michael@0 453 * @throws IllegalArgumentException for syntax errors in the pattern string
michael@0 454 * @throws IndexOutOfBoundsException if certain limits are exceeded
michael@0 455 * (e.g., argument number too high, argument name too long, etc.)
michael@0 456 * @throws NumberFormatException if a number could not be parsed
michael@0 457 * @stable ICU 4.8
michael@0 458 */
michael@0 459 MessagePattern &parseChoiceStyle(const UnicodeString &pattern,
michael@0 460 UParseError *parseError, UErrorCode &errorCode);
michael@0 461
michael@0 462 /**
michael@0 463 * Parses a PluralFormat pattern string.
michael@0 464 * @param pattern a PluralFormat pattern string
michael@0 465 * @param parseError Struct to receive information on the position
michael@0 466 * of an error within the pattern.
michael@0 467 * Can be NULL.
michael@0 468 * @param errorCode Standard ICU error code. Its input value must
michael@0 469 * pass the U_SUCCESS() test, or else the function returns
michael@0 470 * immediately. Check for U_FAILURE() on output or use with
michael@0 471 * function chaining. (See User Guide for details.)
michael@0 472 * @return *this
michael@0 473 * @throws IllegalArgumentException for syntax errors in the pattern string
michael@0 474 * @throws IndexOutOfBoundsException if certain limits are exceeded
michael@0 475 * (e.g., argument number too high, argument name too long, etc.)
michael@0 476 * @throws NumberFormatException if a number could not be parsed
michael@0 477 * @stable ICU 4.8
michael@0 478 */
michael@0 479 MessagePattern &parsePluralStyle(const UnicodeString &pattern,
michael@0 480 UParseError *parseError, UErrorCode &errorCode);
michael@0 481
michael@0 482 /**
michael@0 483 * Parses a SelectFormat pattern string.
michael@0 484 * @param pattern a SelectFormat pattern string
michael@0 485 * @param parseError Struct to receive information on the position
michael@0 486 * of an error within the pattern.
michael@0 487 * Can be NULL.
michael@0 488 * @param errorCode Standard ICU error code. Its input value must
michael@0 489 * pass the U_SUCCESS() test, or else the function returns
michael@0 490 * immediately. Check for U_FAILURE() on output or use with
michael@0 491 * function chaining. (See User Guide for details.)
michael@0 492 * @return *this
michael@0 493 * @throws IllegalArgumentException for syntax errors in the pattern string
michael@0 494 * @throws IndexOutOfBoundsException if certain limits are exceeded
michael@0 495 * (e.g., argument number too high, argument name too long, etc.)
michael@0 496 * @throws NumberFormatException if a number could not be parsed
michael@0 497 * @stable ICU 4.8
michael@0 498 */
michael@0 499 MessagePattern &parseSelectStyle(const UnicodeString &pattern,
michael@0 500 UParseError *parseError, UErrorCode &errorCode);
michael@0 501
michael@0 502 /**
michael@0 503 * Clears this MessagePattern.
michael@0 504 * countParts() will return 0.
michael@0 505 * @stable ICU 4.8
michael@0 506 */
michael@0 507 void clear();
michael@0 508
michael@0 509 /**
michael@0 510 * Clears this MessagePattern and sets the UMessagePatternApostropheMode.
michael@0 511 * countParts() will return 0.
michael@0 512 * @param mode The new UMessagePatternApostropheMode.
michael@0 513 * @stable ICU 4.8
michael@0 514 */
michael@0 515 void clearPatternAndSetApostropheMode(UMessagePatternApostropheMode mode) {
michael@0 516 clear();
michael@0 517 aposMode=mode;
michael@0 518 }
michael@0 519
michael@0 520 /**
michael@0 521 * @param other another object to compare with.
michael@0 522 * @return TRUE if this object is equivalent to the other one.
michael@0 523 * @stable ICU 4.8
michael@0 524 */
michael@0 525 UBool operator==(const MessagePattern &other) const;
michael@0 526
michael@0 527 /**
michael@0 528 * @param other another object to compare with.
michael@0 529 * @return FALSE if this object is equivalent to the other one.
michael@0 530 * @stable ICU 4.8
michael@0 531 */
michael@0 532 inline UBool operator!=(const MessagePattern &other) const {
michael@0 533 return !operator==(other);
michael@0 534 }
michael@0 535
michael@0 536 /**
michael@0 537 * @return A hash code for this object.
michael@0 538 * @stable ICU 4.8
michael@0 539 */
michael@0 540 int32_t hashCode() const;
michael@0 541
michael@0 542 /**
michael@0 543 * @return this instance's UMessagePatternApostropheMode.
michael@0 544 * @stable ICU 4.8
michael@0 545 */
michael@0 546 UMessagePatternApostropheMode getApostropheMode() const {
michael@0 547 return aposMode;
michael@0 548 }
michael@0 549
michael@0 550 // Java has package-private jdkAposMode() here.
michael@0 551 // In C++, this is declared in the MessageImpl class.
michael@0 552
michael@0 553 /**
michael@0 554 * @return the parsed pattern string (null if none was parsed).
michael@0 555 * @stable ICU 4.8
michael@0 556 */
michael@0 557 const UnicodeString &getPatternString() const {
michael@0 558 return msg;
michael@0 559 }
michael@0 560
michael@0 561 /**
michael@0 562 * Does the parsed pattern have named arguments like {first_name}?
michael@0 563 * @return TRUE if the parsed pattern has at least one named argument.
michael@0 564 * @stable ICU 4.8
michael@0 565 */
michael@0 566 UBool hasNamedArguments() const {
michael@0 567 return hasArgNames;
michael@0 568 }
michael@0 569
michael@0 570 /**
michael@0 571 * Does the parsed pattern have numbered arguments like {2}?
michael@0 572 * @return TRUE if the parsed pattern has at least one numbered argument.
michael@0 573 * @stable ICU 4.8
michael@0 574 */
michael@0 575 UBool hasNumberedArguments() const {
michael@0 576 return hasArgNumbers;
michael@0 577 }
michael@0 578
michael@0 579 /**
michael@0 580 * Validates and parses an argument name or argument number string.
michael@0 581 * An argument name must be a "pattern identifier", that is, it must contain
michael@0 582 * no Unicode Pattern_Syntax or Pattern_White_Space characters.
michael@0 583 * If it only contains ASCII digits, then it must be a small integer with no leading zero.
michael@0 584 * @param name Input string.
michael@0 585 * @return &gt;=0 if the name is a valid number,
michael@0 586 * ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
michael@0 587 * ARG_NAME_NOT_VALID (-2) if it is neither.
michael@0 588 * @stable ICU 4.8
michael@0 589 */
michael@0 590 static int32_t validateArgumentName(const UnicodeString &name);
michael@0 591
michael@0 592 /**
michael@0 593 * Returns a version of the parsed pattern string where each ASCII apostrophe
michael@0 594 * is doubled (escaped) if it is not already, and if it is not interpreted as quoting syntax.
michael@0 595 * <p>
michael@0 596 * For example, this turns "I don't '{know}' {gender,select,female{h''er}other{h'im}}."
michael@0 597 * into "I don''t '{know}' {gender,select,female{h''er}other{h''im}}."
michael@0 598 * @return the deep-auto-quoted version of the parsed pattern string.
michael@0 599 * @see MessageFormat.autoQuoteApostrophe()
michael@0 600 * @stable ICU 4.8
michael@0 601 */
michael@0 602 UnicodeString autoQuoteApostropheDeep() const;
michael@0 603
michael@0 604 class Part;
michael@0 605
michael@0 606 /**
michael@0 607 * Returns the number of "parts" created by parsing the pattern string.
michael@0 608 * Returns 0 if no pattern has been parsed or clear() was called.
michael@0 609 * @return the number of pattern parts.
michael@0 610 * @stable ICU 4.8
michael@0 611 */
michael@0 612 int32_t countParts() const {
michael@0 613 return partsLength;
michael@0 614 }
michael@0 615
michael@0 616 /**
michael@0 617 * Gets the i-th pattern "part".
michael@0 618 * @param i The index of the Part data. (0..countParts()-1)
michael@0 619 * @return the i-th pattern "part".
michael@0 620 * @stable ICU 4.8
michael@0 621 */
michael@0 622 const Part &getPart(int32_t i) const {
michael@0 623 return parts[i];
michael@0 624 }
michael@0 625
michael@0 626 /**
michael@0 627 * Returns the UMessagePatternPartType of the i-th pattern "part".
michael@0 628 * Convenience method for getPart(i).getType().
michael@0 629 * @param i The index of the Part data. (0..countParts()-1)
michael@0 630 * @return The UMessagePatternPartType of the i-th Part.
michael@0 631 * @stable ICU 4.8
michael@0 632 */
michael@0 633 UMessagePatternPartType getPartType(int32_t i) const {
michael@0 634 return getPart(i).type;
michael@0 635 }
michael@0 636
michael@0 637 /**
michael@0 638 * Returns the pattern index of the specified pattern "part".
michael@0 639 * Convenience method for getPart(partIndex).getIndex().
michael@0 640 * @param partIndex The index of the Part data. (0..countParts()-1)
michael@0 641 * @return The pattern index of this Part.
michael@0 642 * @stable ICU 4.8
michael@0 643 */
michael@0 644 int32_t getPatternIndex(int32_t partIndex) const {
michael@0 645 return getPart(partIndex).index;
michael@0 646 }
michael@0 647
michael@0 648 /**
michael@0 649 * Returns the substring of the pattern string indicated by the Part.
michael@0 650 * Convenience method for getPatternString().substring(part.getIndex(), part.getLimit()).
michael@0 651 * @param part a part of this MessagePattern.
michael@0 652 * @return the substring associated with part.
michael@0 653 * @stable ICU 4.8
michael@0 654 */
michael@0 655 UnicodeString getSubstring(const Part &part) const {
michael@0 656 return msg.tempSubString(part.index, part.length);
michael@0 657 }
michael@0 658
michael@0 659 /**
michael@0 660 * Compares the part's substring with the input string s.
michael@0 661 * @param part a part of this MessagePattern.
michael@0 662 * @param s a string.
michael@0 663 * @return TRUE if getSubstring(part).equals(s).
michael@0 664 * @stable ICU 4.8
michael@0 665 */
michael@0 666 UBool partSubstringMatches(const Part &part, const UnicodeString &s) const {
michael@0 667 return 0==msg.compare(part.index, part.length, s);
michael@0 668 }
michael@0 669
michael@0 670 /**
michael@0 671 * Returns the numeric value associated with an ARG_INT or ARG_DOUBLE.
michael@0 672 * @param part a part of this MessagePattern.
michael@0 673 * @return the part's numeric value, or UMSGPAT_NO_NUMERIC_VALUE if this is not a numeric part.
michael@0 674 * @stable ICU 4.8
michael@0 675 */
michael@0 676 double getNumericValue(const Part &part) const;
michael@0 677
michael@0 678 /**
michael@0 679 * Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified.
michael@0 680 * @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1)
michael@0 681 * @return the "offset:" value.
michael@0 682 * @stable ICU 4.8
michael@0 683 */
michael@0 684 double getPluralOffset(int32_t pluralStart) const;
michael@0 685
michael@0 686 /**
michael@0 687 * Returns the index of the ARG|MSG_LIMIT part corresponding to the ARG|MSG_START at start.
michael@0 688 * @param start The index of some Part data (0..countParts()-1);
michael@0 689 * this Part should be of Type ARG_START or MSG_START.
michael@0 690 * @return The first i>start where getPart(i).getType()==ARG|MSG_LIMIT at the same nesting level,
michael@0 691 * or start itself if getPartType(msgStart)!=ARG|MSG_START.
michael@0 692 * @stable ICU 4.8
michael@0 693 */
michael@0 694 int32_t getLimitPartIndex(int32_t start) const {
michael@0 695 int32_t limit=getPart(start).limitPartIndex;
michael@0 696 if(limit<start) {
michael@0 697 return start;
michael@0 698 }
michael@0 699 return limit;
michael@0 700 }
michael@0 701
michael@0 702 /**
michael@0 703 * A message pattern "part", representing a pattern parsing event.
michael@0 704 * There is a part for the start and end of a message or argument,
michael@0 705 * for quoting and escaping of and with ASCII apostrophes,
michael@0 706 * and for syntax elements of "complex" arguments.
michael@0 707 * @stable ICU 4.8
michael@0 708 */
michael@0 709 class Part : public UMemory {
michael@0 710 public:
michael@0 711 /**
michael@0 712 * Default constructor, do not use.
michael@0 713 * @internal
michael@0 714 */
michael@0 715 Part() {}
michael@0 716
michael@0 717 /**
michael@0 718 * Returns the type of this part.
michael@0 719 * @return the part type.
michael@0 720 * @stable ICU 4.8
michael@0 721 */
michael@0 722 UMessagePatternPartType getType() const {
michael@0 723 return type;
michael@0 724 }
michael@0 725
michael@0 726 /**
michael@0 727 * Returns the pattern string index associated with this Part.
michael@0 728 * @return this part's pattern string index.
michael@0 729 * @stable ICU 4.8
michael@0 730 */
michael@0 731 int32_t getIndex() const {
michael@0 732 return index;
michael@0 733 }
michael@0 734
michael@0 735 /**
michael@0 736 * Returns the length of the pattern substring associated with this Part.
michael@0 737 * This is 0 for some parts.
michael@0 738 * @return this part's pattern substring length.
michael@0 739 * @stable ICU 4.8
michael@0 740 */
michael@0 741 int32_t getLength() const {
michael@0 742 return length;
michael@0 743 }
michael@0 744
michael@0 745 /**
michael@0 746 * Returns the pattern string limit (exclusive-end) index associated with this Part.
michael@0 747 * Convenience method for getIndex()+getLength().
michael@0 748 * @return this part's pattern string limit index, same as getIndex()+getLength().
michael@0 749 * @stable ICU 4.8
michael@0 750 */
michael@0 751 int32_t getLimit() const {
michael@0 752 return index+length;
michael@0 753 }
michael@0 754
michael@0 755 /**
michael@0 756 * Returns a value associated with this part.
michael@0 757 * See the documentation of each part type for details.
michael@0 758 * @return the part value.
michael@0 759 * @stable ICU 4.8
michael@0 760 */
michael@0 761 int32_t getValue() const {
michael@0 762 return value;
michael@0 763 }
michael@0 764
michael@0 765 /**
michael@0 766 * Returns the argument type if this part is of type ARG_START or ARG_LIMIT,
michael@0 767 * otherwise UMSGPAT_ARG_TYPE_NONE.
michael@0 768 * @return the argument type for this part.
michael@0 769 * @stable ICU 4.8
michael@0 770 */
michael@0 771 UMessagePatternArgType getArgType() const {
michael@0 772 UMessagePatternPartType type=getType();
michael@0 773 if(type==UMSGPAT_PART_TYPE_ARG_START || type==UMSGPAT_PART_TYPE_ARG_LIMIT) {
michael@0 774 return (UMessagePatternArgType)value;
michael@0 775 } else {
michael@0 776 return UMSGPAT_ARG_TYPE_NONE;
michael@0 777 }
michael@0 778 }
michael@0 779
michael@0 780 /**
michael@0 781 * Indicates whether the Part type has a numeric value.
michael@0 782 * If so, then that numeric value can be retrieved via MessagePattern.getNumericValue().
michael@0 783 * @param type The Part type to be tested.
michael@0 784 * @return TRUE if the Part type has a numeric value.
michael@0 785 * @stable ICU 4.8
michael@0 786 */
michael@0 787 static UBool hasNumericValue(UMessagePatternPartType type) {
michael@0 788 return type==UMSGPAT_PART_TYPE_ARG_INT || type==UMSGPAT_PART_TYPE_ARG_DOUBLE;
michael@0 789 }
michael@0 790
michael@0 791 /**
michael@0 792 * @param other another object to compare with.
michael@0 793 * @return TRUE if this object is equivalent to the other one.
michael@0 794 * @stable ICU 4.8
michael@0 795 */
michael@0 796 UBool operator==(const Part &other) const;
michael@0 797
michael@0 798 /**
michael@0 799 * @param other another object to compare with.
michael@0 800 * @return FALSE if this object is equivalent to the other one.
michael@0 801 * @stable ICU 4.8
michael@0 802 */
michael@0 803 inline UBool operator!=(const Part &other) const {
michael@0 804 return !operator==(other);
michael@0 805 }
michael@0 806
michael@0 807 /**
michael@0 808 * @return A hash code for this object.
michael@0 809 * @stable ICU 4.8
michael@0 810 */
michael@0 811 int32_t hashCode() const {
michael@0 812 return ((type*37+index)*37+length)*37+value;
michael@0 813 }
michael@0 814
michael@0 815 private:
michael@0 816 friend class MessagePattern;
michael@0 817
michael@0 818 static const int32_t MAX_LENGTH=0xffff;
michael@0 819 static const int32_t MAX_VALUE=0x7fff;
michael@0 820
michael@0 821 // Some fields are not final because they are modified during pattern parsing.
michael@0 822 // After pattern parsing, the parts are effectively immutable.
michael@0 823 UMessagePatternPartType type;
michael@0 824 int32_t index;
michael@0 825 uint16_t length;
michael@0 826 int16_t value;
michael@0 827 int32_t limitPartIndex;
michael@0 828 };
michael@0 829
michael@0 830 private:
michael@0 831 void preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
michael@0 832
michael@0 833 void postParse();
michael@0 834
michael@0 835 int32_t parseMessage(int32_t index, int32_t msgStartLength,
michael@0 836 int32_t nestingLevel, UMessagePatternArgType parentType,
michael@0 837 UParseError *parseError, UErrorCode &errorCode);
michael@0 838
michael@0 839 int32_t parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel,
michael@0 840 UParseError *parseError, UErrorCode &errorCode);
michael@0 841
michael@0 842 int32_t parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode);
michael@0 843
michael@0 844 int32_t parseChoiceStyle(int32_t index, int32_t nestingLevel,
michael@0 845 UParseError *parseError, UErrorCode &errorCode);
michael@0 846
michael@0 847 int32_t parsePluralOrSelectStyle(UMessagePatternArgType argType, int32_t index, int32_t nestingLevel,
michael@0 848 UParseError *parseError, UErrorCode &errorCode);
michael@0 849
michael@0 850 /**
michael@0 851 * Validates and parses an argument name or argument number string.
michael@0 852 * This internal method assumes that the input substring is a "pattern identifier".
michael@0 853 * @return &gt;=0 if the name is a valid number,
michael@0 854 * ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
michael@0 855 * ARG_NAME_NOT_VALID (-2) if it is neither.
michael@0 856 * @see #validateArgumentName(String)
michael@0 857 */
michael@0 858 static int32_t parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit);
michael@0 859
michael@0 860 int32_t parseArgNumber(int32_t start, int32_t limit) {
michael@0 861 return parseArgNumber(msg, start, limit);
michael@0 862 }
michael@0 863
michael@0 864 /**
michael@0 865 * Parses a number from the specified message substring.
michael@0 866 * @param start start index into the message string
michael@0 867 * @param limit limit index into the message string, must be start<limit
michael@0 868 * @param allowInfinity TRUE if U+221E is allowed (for ChoiceFormat)
michael@0 869 * @param parseError
michael@0 870 * @param errorCode
michael@0 871 */
michael@0 872 void parseDouble(int32_t start, int32_t limit, UBool allowInfinity,
michael@0 873 UParseError *parseError, UErrorCode &errorCode);
michael@0 874
michael@0 875 // Java has package-private appendReducedApostrophes() here.
michael@0 876 // In C++, this is declared in the MessageImpl class.
michael@0 877
michael@0 878 int32_t skipWhiteSpace(int32_t index);
michael@0 879
michael@0 880 int32_t skipIdentifier(int32_t index);
michael@0 881
michael@0 882 /**
michael@0 883 * Skips a sequence of characters that could occur in a double value.
michael@0 884 * Does not fully parse or validate the value.
michael@0 885 */
michael@0 886 int32_t skipDouble(int32_t index);
michael@0 887
michael@0 888 static UBool isArgTypeChar(UChar32 c);
michael@0 889
michael@0 890 UBool isChoice(int32_t index);
michael@0 891
michael@0 892 UBool isPlural(int32_t index);
michael@0 893
michael@0 894 UBool isSelect(int32_t index);
michael@0 895
michael@0 896 UBool isOrdinal(int32_t index);
michael@0 897
michael@0 898 /**
michael@0 899 * @return TRUE if we are inside a MessageFormat (sub-)pattern,
michael@0 900 * as opposed to inside a top-level choice/plural/select pattern.
michael@0 901 */
michael@0 902 UBool inMessageFormatPattern(int32_t nestingLevel);
michael@0 903
michael@0 904 /**
michael@0 905 * @return TRUE if we are in a MessageFormat sub-pattern
michael@0 906 * of a top-level ChoiceFormat pattern.
michael@0 907 */
michael@0 908 UBool inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType);
michael@0 909
michael@0 910 void addPart(UMessagePatternPartType type, int32_t index, int32_t length,
michael@0 911 int32_t value, UErrorCode &errorCode);
michael@0 912
michael@0 913 void addLimitPart(int32_t start,
michael@0 914 UMessagePatternPartType type, int32_t index, int32_t length,
michael@0 915 int32_t value, UErrorCode &errorCode);
michael@0 916
michael@0 917 void addArgDoublePart(double numericValue, int32_t start, int32_t length, UErrorCode &errorCode);
michael@0 918
michael@0 919 void setParseError(UParseError *parseError, int32_t index);
michael@0 920
michael@0 921 UBool init(UErrorCode &errorCode);
michael@0 922 UBool copyStorage(const MessagePattern &other, UErrorCode &errorCode);
michael@0 923
michael@0 924 UMessagePatternApostropheMode aposMode;
michael@0 925 UnicodeString msg;
michael@0 926 // ArrayList<Part> parts=new ArrayList<Part>();
michael@0 927 MessagePatternPartsList *partsList;
michael@0 928 Part *parts;
michael@0 929 int32_t partsLength;
michael@0 930 // ArrayList<Double> numericValues;
michael@0 931 MessagePatternDoubleList *numericValuesList;
michael@0 932 double *numericValues;
michael@0 933 int32_t numericValuesLength;
michael@0 934 UBool hasArgNames;
michael@0 935 UBool hasArgNumbers;
michael@0 936 UBool needsAutoQuoting;
michael@0 937 };
michael@0 938
michael@0 939 U_NAMESPACE_END
michael@0 940
michael@0 941 #endif // !UCONFIG_NO_FORMATTING
michael@0 942
michael@0 943 #endif // __MESSAGEPATTERN_H__

mercurial