Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
michael@0 | 1 | /* |
michael@0 | 2 | ******************************************************************************* |
michael@0 | 3 | * Copyright (C) 2011-2013, International Business Machines |
michael@0 | 4 | * Corporation and others. All Rights Reserved. |
michael@0 | 5 | ******************************************************************************* |
michael@0 | 6 | * file name: messagepattern.h |
michael@0 | 7 | * encoding: US-ASCII |
michael@0 | 8 | * tab size: 8 (not used) |
michael@0 | 9 | * indentation:4 |
michael@0 | 10 | * |
michael@0 | 11 | * created on: 2011mar14 |
michael@0 | 12 | * created by: Markus W. Scherer |
michael@0 | 13 | */ |
michael@0 | 14 | |
michael@0 | 15 | #ifndef __MESSAGEPATTERN_H__ |
michael@0 | 16 | #define __MESSAGEPATTERN_H__ |
michael@0 | 17 | |
michael@0 | 18 | /** |
michael@0 | 19 | * \file |
michael@0 | 20 | * \brief C++ API: MessagePattern class: Parses and represents ICU MessageFormat patterns. |
michael@0 | 21 | */ |
michael@0 | 22 | |
michael@0 | 23 | #include "unicode/utypes.h" |
michael@0 | 24 | |
michael@0 | 25 | #if !UCONFIG_NO_FORMATTING |
michael@0 | 26 | |
michael@0 | 27 | #include "unicode/parseerr.h" |
michael@0 | 28 | #include "unicode/unistr.h" |
michael@0 | 29 | |
michael@0 | 30 | /** |
michael@0 | 31 | * Mode for when an apostrophe starts quoted literal text for MessageFormat output. |
michael@0 | 32 | * The default is DOUBLE_OPTIONAL unless overridden via uconfig.h |
michael@0 | 33 | * (UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE). |
michael@0 | 34 | * <p> |
michael@0 | 35 | * A pair of adjacent apostrophes always results in a single apostrophe in the output, |
michael@0 | 36 | * even when the pair is between two single, text-quoting apostrophes. |
michael@0 | 37 | * <p> |
michael@0 | 38 | * The following table shows examples of desired MessageFormat.format() output |
michael@0 | 39 | * with the pattern strings that yield that output. |
michael@0 | 40 | * <p> |
michael@0 | 41 | * <table> |
michael@0 | 42 | * <tr> |
michael@0 | 43 | * <th>Desired output</th> |
michael@0 | 44 | * <th>DOUBLE_OPTIONAL</th> |
michael@0 | 45 | * <th>DOUBLE_REQUIRED</th> |
michael@0 | 46 | * </tr> |
michael@0 | 47 | * <tr> |
michael@0 | 48 | * <td>I see {many}</td> |
michael@0 | 49 | * <td>I see '{many}'</td> |
michael@0 | 50 | * <td>(same)</td> |
michael@0 | 51 | * </tr> |
michael@0 | 52 | * <tr> |
michael@0 | 53 | * <td>I said {'Wow!'}</td> |
michael@0 | 54 | * <td>I said '{''Wow!''}'</td> |
michael@0 | 55 | * <td>(same)</td> |
michael@0 | 56 | * </tr> |
michael@0 | 57 | * <tr> |
michael@0 | 58 | * <td>I don't know</td> |
michael@0 | 59 | * <td>I don't know OR<br> I don''t know</td> |
michael@0 | 60 | * <td>I don''t know</td> |
michael@0 | 61 | * </tr> |
michael@0 | 62 | * </table> |
michael@0 | 63 | * @stable ICU 4.8 |
michael@0 | 64 | * @see UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE |
michael@0 | 65 | */ |
michael@0 | 66 | enum UMessagePatternApostropheMode { |
michael@0 | 67 | /** |
michael@0 | 68 | * A literal apostrophe is represented by |
michael@0 | 69 | * either a single or a double apostrophe pattern character. |
michael@0 | 70 | * Within a MessageFormat pattern, a single apostrophe only starts quoted literal text |
michael@0 | 71 | * if it immediately precedes a curly brace {}, |
michael@0 | 72 | * or a pipe symbol | if inside a choice format, |
michael@0 | 73 | * or a pound symbol # if inside a plural format. |
michael@0 | 74 | * <p> |
michael@0 | 75 | * This is the default behavior starting with ICU 4.8. |
michael@0 | 76 | * @stable ICU 4.8 |
michael@0 | 77 | */ |
michael@0 | 78 | UMSGPAT_APOS_DOUBLE_OPTIONAL, |
michael@0 | 79 | /** |
michael@0 | 80 | * A literal apostrophe must be represented by |
michael@0 | 81 | * a double apostrophe pattern character. |
michael@0 | 82 | * A single apostrophe always starts quoted literal text. |
michael@0 | 83 | * <p> |
michael@0 | 84 | * This is the behavior of ICU 4.6 and earlier, and of the JDK. |
michael@0 | 85 | * @stable ICU 4.8 |
michael@0 | 86 | */ |
michael@0 | 87 | UMSGPAT_APOS_DOUBLE_REQUIRED |
michael@0 | 88 | }; |
michael@0 | 89 | /** |
michael@0 | 90 | * @stable ICU 4.8 |
michael@0 | 91 | */ |
michael@0 | 92 | typedef enum UMessagePatternApostropheMode UMessagePatternApostropheMode; |
michael@0 | 93 | |
michael@0 | 94 | /** |
michael@0 | 95 | * MessagePattern::Part type constants. |
michael@0 | 96 | * @stable ICU 4.8 |
michael@0 | 97 | */ |
michael@0 | 98 | enum UMessagePatternPartType { |
michael@0 | 99 | /** |
michael@0 | 100 | * Start of a message pattern (main or nested). |
michael@0 | 101 | * The length is 0 for the top-level message |
michael@0 | 102 | * and for a choice argument sub-message, otherwise 1 for the '{'. |
michael@0 | 103 | * The value indicates the nesting level, starting with 0 for the main message. |
michael@0 | 104 | * <p> |
michael@0 | 105 | * There is always a later MSG_LIMIT part. |
michael@0 | 106 | * @stable ICU 4.8 |
michael@0 | 107 | */ |
michael@0 | 108 | UMSGPAT_PART_TYPE_MSG_START, |
michael@0 | 109 | /** |
michael@0 | 110 | * End of a message pattern (main or nested). |
michael@0 | 111 | * The length is 0 for the top-level message and |
michael@0 | 112 | * the last sub-message of a choice argument, |
michael@0 | 113 | * otherwise 1 for the '}' or (in a choice argument style) the '|'. |
michael@0 | 114 | * The value indicates the nesting level, starting with 0 for the main message. |
michael@0 | 115 | * @stable ICU 4.8 |
michael@0 | 116 | */ |
michael@0 | 117 | UMSGPAT_PART_TYPE_MSG_LIMIT, |
michael@0 | 118 | /** |
michael@0 | 119 | * Indicates a substring of the pattern string which is to be skipped when formatting. |
michael@0 | 120 | * For example, an apostrophe that begins or ends quoted text |
michael@0 | 121 | * would be indicated with such a part. |
michael@0 | 122 | * The value is undefined and currently always 0. |
michael@0 | 123 | * @stable ICU 4.8 |
michael@0 | 124 | */ |
michael@0 | 125 | UMSGPAT_PART_TYPE_SKIP_SYNTAX, |
michael@0 | 126 | /** |
michael@0 | 127 | * Indicates that a syntax character needs to be inserted for auto-quoting. |
michael@0 | 128 | * The length is 0. |
michael@0 | 129 | * The value is the character code of the insertion character. (U+0027=APOSTROPHE) |
michael@0 | 130 | * @stable ICU 4.8 |
michael@0 | 131 | */ |
michael@0 | 132 | UMSGPAT_PART_TYPE_INSERT_CHAR, |
michael@0 | 133 | /** |
michael@0 | 134 | * Indicates a syntactic (non-escaped) # symbol in a plural variant. |
michael@0 | 135 | * When formatting, replace this part's substring with the |
michael@0 | 136 | * (value-offset) for the plural argument value. |
michael@0 | 137 | * The value is undefined and currently always 0. |
michael@0 | 138 | * @stable ICU 4.8 |
michael@0 | 139 | */ |
michael@0 | 140 | UMSGPAT_PART_TYPE_REPLACE_NUMBER, |
michael@0 | 141 | /** |
michael@0 | 142 | * Start of an argument. |
michael@0 | 143 | * The length is 1 for the '{'. |
michael@0 | 144 | * The value is the ordinal value of the ArgType. Use getArgType(). |
michael@0 | 145 | * <p> |
michael@0 | 146 | * This part is followed by either an ARG_NUMBER or ARG_NAME, |
michael@0 | 147 | * followed by optional argument sub-parts (see UMessagePatternArgType constants) |
michael@0 | 148 | * and finally an ARG_LIMIT part. |
michael@0 | 149 | * @stable ICU 4.8 |
michael@0 | 150 | */ |
michael@0 | 151 | UMSGPAT_PART_TYPE_ARG_START, |
michael@0 | 152 | /** |
michael@0 | 153 | * End of an argument. |
michael@0 | 154 | * The length is 1 for the '}'. |
michael@0 | 155 | * The value is the ordinal value of the ArgType. Use getArgType(). |
michael@0 | 156 | * @stable ICU 4.8 |
michael@0 | 157 | */ |
michael@0 | 158 | UMSGPAT_PART_TYPE_ARG_LIMIT, |
michael@0 | 159 | /** |
michael@0 | 160 | * The argument number, provided by the value. |
michael@0 | 161 | * @stable ICU 4.8 |
michael@0 | 162 | */ |
michael@0 | 163 | UMSGPAT_PART_TYPE_ARG_NUMBER, |
michael@0 | 164 | /** |
michael@0 | 165 | * The argument name. |
michael@0 | 166 | * The value is undefined and currently always 0. |
michael@0 | 167 | * @stable ICU 4.8 |
michael@0 | 168 | */ |
michael@0 | 169 | UMSGPAT_PART_TYPE_ARG_NAME, |
michael@0 | 170 | /** |
michael@0 | 171 | * The argument type. |
michael@0 | 172 | * The value is undefined and currently always 0. |
michael@0 | 173 | * @stable ICU 4.8 |
michael@0 | 174 | */ |
michael@0 | 175 | UMSGPAT_PART_TYPE_ARG_TYPE, |
michael@0 | 176 | /** |
michael@0 | 177 | * The argument style text. |
michael@0 | 178 | * The value is undefined and currently always 0. |
michael@0 | 179 | * @stable ICU 4.8 |
michael@0 | 180 | */ |
michael@0 | 181 | UMSGPAT_PART_TYPE_ARG_STYLE, |
michael@0 | 182 | /** |
michael@0 | 183 | * A selector substring in a "complex" argument style. |
michael@0 | 184 | * The value is undefined and currently always 0. |
michael@0 | 185 | * @stable ICU 4.8 |
michael@0 | 186 | */ |
michael@0 | 187 | UMSGPAT_PART_TYPE_ARG_SELECTOR, |
michael@0 | 188 | /** |
michael@0 | 189 | * An integer value, for example the offset or an explicit selector value |
michael@0 | 190 | * in a PluralFormat style. |
michael@0 | 191 | * The part value is the integer value. |
michael@0 | 192 | * @stable ICU 4.8 |
michael@0 | 193 | */ |
michael@0 | 194 | UMSGPAT_PART_TYPE_ARG_INT, |
michael@0 | 195 | /** |
michael@0 | 196 | * A numeric value, for example the offset or an explicit selector value |
michael@0 | 197 | * in a PluralFormat style. |
michael@0 | 198 | * The part value is an index into an internal array of numeric values; |
michael@0 | 199 | * use getNumericValue(). |
michael@0 | 200 | * @stable ICU 4.8 |
michael@0 | 201 | */ |
michael@0 | 202 | UMSGPAT_PART_TYPE_ARG_DOUBLE |
michael@0 | 203 | }; |
michael@0 | 204 | /** |
michael@0 | 205 | * @stable ICU 4.8 |
michael@0 | 206 | */ |
michael@0 | 207 | typedef enum UMessagePatternPartType UMessagePatternPartType; |
michael@0 | 208 | |
michael@0 | 209 | /** |
michael@0 | 210 | * Argument type constants. |
michael@0 | 211 | * Returned by Part.getArgType() for ARG_START and ARG_LIMIT parts. |
michael@0 | 212 | * |
michael@0 | 213 | * Messages nested inside an argument are each delimited by MSG_START and MSG_LIMIT, |
michael@0 | 214 | * with a nesting level one greater than the surrounding message. |
michael@0 | 215 | * @stable ICU 4.8 |
michael@0 | 216 | */ |
michael@0 | 217 | enum UMessagePatternArgType { |
michael@0 | 218 | /** |
michael@0 | 219 | * The argument has no specified type. |
michael@0 | 220 | * @stable ICU 4.8 |
michael@0 | 221 | */ |
michael@0 | 222 | UMSGPAT_ARG_TYPE_NONE, |
michael@0 | 223 | /** |
michael@0 | 224 | * The argument has a "simple" type which is provided by the ARG_TYPE part. |
michael@0 | 225 | * An ARG_STYLE part might follow that. |
michael@0 | 226 | * @stable ICU 4.8 |
michael@0 | 227 | */ |
michael@0 | 228 | UMSGPAT_ARG_TYPE_SIMPLE, |
michael@0 | 229 | /** |
michael@0 | 230 | * The argument is a ChoiceFormat with one or more |
michael@0 | 231 | * ((ARG_INT | ARG_DOUBLE), ARG_SELECTOR, message) tuples. |
michael@0 | 232 | * @stable ICU 4.8 |
michael@0 | 233 | */ |
michael@0 | 234 | UMSGPAT_ARG_TYPE_CHOICE, |
michael@0 | 235 | /** |
michael@0 | 236 | * The argument is a cardinal-number PluralFormat with an optional ARG_INT or ARG_DOUBLE offset |
michael@0 | 237 | * (e.g., offset:1) |
michael@0 | 238 | * and one or more (ARG_SELECTOR [explicit-value] message) tuples. |
michael@0 | 239 | * If the selector has an explicit value (e.g., =2), then |
michael@0 | 240 | * that value is provided by the ARG_INT or ARG_DOUBLE part preceding the message. |
michael@0 | 241 | * Otherwise the message immediately follows the ARG_SELECTOR. |
michael@0 | 242 | * @stable ICU 4.8 |
michael@0 | 243 | */ |
michael@0 | 244 | UMSGPAT_ARG_TYPE_PLURAL, |
michael@0 | 245 | /** |
michael@0 | 246 | * The argument is a SelectFormat with one or more (ARG_SELECTOR, message) pairs. |
michael@0 | 247 | * @stable ICU 4.8 |
michael@0 | 248 | */ |
michael@0 | 249 | UMSGPAT_ARG_TYPE_SELECT, |
michael@0 | 250 | /** |
michael@0 | 251 | * The argument is an ordinal-number PluralFormat |
michael@0 | 252 | * with the same style parts sequence and semantics as UMSGPAT_ARG_TYPE_PLURAL. |
michael@0 | 253 | * @stable ICU 50 |
michael@0 | 254 | */ |
michael@0 | 255 | UMSGPAT_ARG_TYPE_SELECTORDINAL |
michael@0 | 256 | }; |
michael@0 | 257 | /** |
michael@0 | 258 | * @stable ICU 4.8 |
michael@0 | 259 | */ |
michael@0 | 260 | typedef enum UMessagePatternArgType UMessagePatternArgType; |
michael@0 | 261 | |
michael@0 | 262 | /** |
michael@0 | 263 | * \def UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE |
michael@0 | 264 | * Returns TRUE if the argument type has a plural style part sequence and semantics, |
michael@0 | 265 | * for example UMSGPAT_ARG_TYPE_PLURAL and UMSGPAT_ARG_TYPE_SELECTORDINAL. |
michael@0 | 266 | * @stable ICU 50 |
michael@0 | 267 | */ |
michael@0 | 268 | #define UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) \ |
michael@0 | 269 | ((argType)==UMSGPAT_ARG_TYPE_PLURAL || (argType)==UMSGPAT_ARG_TYPE_SELECTORDINAL) |
michael@0 | 270 | |
michael@0 | 271 | enum { |
michael@0 | 272 | /** |
michael@0 | 273 | * Return value from MessagePattern.validateArgumentName() for when |
michael@0 | 274 | * the string is a valid "pattern identifier" but not a number. |
michael@0 | 275 | * @stable ICU 4.8 |
michael@0 | 276 | */ |
michael@0 | 277 | UMSGPAT_ARG_NAME_NOT_NUMBER=-1, |
michael@0 | 278 | |
michael@0 | 279 | /** |
michael@0 | 280 | * Return value from MessagePattern.validateArgumentName() for when |
michael@0 | 281 | * the string is invalid. |
michael@0 | 282 | * It might not be a valid "pattern identifier", |
michael@0 | 283 | * or it have only ASCII digits but there is a leading zero or the number is too large. |
michael@0 | 284 | * @stable ICU 4.8 |
michael@0 | 285 | */ |
michael@0 | 286 | UMSGPAT_ARG_NAME_NOT_VALID=-2 |
michael@0 | 287 | }; |
michael@0 | 288 | |
michael@0 | 289 | /** |
michael@0 | 290 | * Special value that is returned by getNumericValue(Part) when no |
michael@0 | 291 | * numeric value is defined for a part. |
michael@0 | 292 | * @see MessagePattern.getNumericValue() |
michael@0 | 293 | * @stable ICU 4.8 |
michael@0 | 294 | */ |
michael@0 | 295 | #define UMSGPAT_NO_NUMERIC_VALUE ((double)(-123456789)) |
michael@0 | 296 | |
michael@0 | 297 | U_NAMESPACE_BEGIN |
michael@0 | 298 | |
michael@0 | 299 | class MessagePatternDoubleList; |
michael@0 | 300 | class MessagePatternPartsList; |
michael@0 | 301 | |
michael@0 | 302 | /** |
michael@0 | 303 | * Parses and represents ICU MessageFormat patterns. |
michael@0 | 304 | * Also handles patterns for ChoiceFormat, PluralFormat and SelectFormat. |
michael@0 | 305 | * Used in the implementations of those classes as well as in tools |
michael@0 | 306 | * for message validation, translation and format conversion. |
michael@0 | 307 | * <p> |
michael@0 | 308 | * The parser handles all syntax relevant for identifying message arguments. |
michael@0 | 309 | * This includes "complex" arguments whose style strings contain |
michael@0 | 310 | * nested MessageFormat pattern substrings. |
michael@0 | 311 | * For "simple" arguments (with no nested MessageFormat pattern substrings), |
michael@0 | 312 | * the argument style is not parsed any further. |
michael@0 | 313 | * <p> |
michael@0 | 314 | * The parser handles named and numbered message arguments and allows both in one message. |
michael@0 | 315 | * <p> |
michael@0 | 316 | * Once a pattern has been parsed successfully, iterate through the parsed data |
michael@0 | 317 | * with countParts(), getPart() and related methods. |
michael@0 | 318 | * <p> |
michael@0 | 319 | * The data logically represents a parse tree, but is stored and accessed |
michael@0 | 320 | * as a list of "parts" for fast and simple parsing and to minimize object allocations. |
michael@0 | 321 | * Arguments and nested messages are best handled via recursion. |
michael@0 | 322 | * For every _START "part", MessagePattern.getLimitPartIndex() efficiently returns |
michael@0 | 323 | * the index of the corresponding _LIMIT "part". |
michael@0 | 324 | * <p> |
michael@0 | 325 | * List of "parts": |
michael@0 | 326 | * <pre> |
michael@0 | 327 | * message = MSG_START (SKIP_SYNTAX | INSERT_CHAR | REPLACE_NUMBER | argument)* MSG_LIMIT |
michael@0 | 328 | * argument = noneArg | simpleArg | complexArg |
michael@0 | 329 | * complexArg = choiceArg | pluralArg | selectArg |
michael@0 | 330 | * |
michael@0 | 331 | * noneArg = ARG_START.NONE (ARG_NAME | ARG_NUMBER) ARG_LIMIT.NONE |
michael@0 | 332 | * simpleArg = ARG_START.SIMPLE (ARG_NAME | ARG_NUMBER) ARG_TYPE [ARG_STYLE] ARG_LIMIT.SIMPLE |
michael@0 | 333 | * choiceArg = ARG_START.CHOICE (ARG_NAME | ARG_NUMBER) choiceStyle ARG_LIMIT.CHOICE |
michael@0 | 334 | * pluralArg = ARG_START.PLURAL (ARG_NAME | ARG_NUMBER) pluralStyle ARG_LIMIT.PLURAL |
michael@0 | 335 | * selectArg = ARG_START.SELECT (ARG_NAME | ARG_NUMBER) selectStyle ARG_LIMIT.SELECT |
michael@0 | 336 | * |
michael@0 | 337 | * choiceStyle = ((ARG_INT | ARG_DOUBLE) ARG_SELECTOR message)+ |
michael@0 | 338 | * pluralStyle = [ARG_INT | ARG_DOUBLE] (ARG_SELECTOR [ARG_INT | ARG_DOUBLE] message)+ |
michael@0 | 339 | * selectStyle = (ARG_SELECTOR message)+ |
michael@0 | 340 | * </pre> |
michael@0 | 341 | * <ul> |
michael@0 | 342 | * <li>Literal output text is not represented directly by "parts" but accessed |
michael@0 | 343 | * between parts of a message, from one part's getLimit() to the next part's getIndex(). |
michael@0 | 344 | * <li><code>ARG_START.CHOICE</code> stands for an ARG_START Part with ArgType CHOICE. |
michael@0 | 345 | * <li>In the choiceStyle, the ARG_SELECTOR has the '<', the '#' or |
michael@0 | 346 | * the less-than-or-equal-to sign (U+2264). |
michael@0 | 347 | * <li>In the pluralStyle, the first, optional numeric Part has the "offset:" value. |
michael@0 | 348 | * The optional numeric Part between each (ARG_SELECTOR, message) pair |
michael@0 | 349 | * is the value of an explicit-number selector like "=2", |
michael@0 | 350 | * otherwise the selector is a non-numeric identifier. |
michael@0 | 351 | * <li>The REPLACE_NUMBER Part can occur only in an immediate sub-message of the pluralStyle. |
michael@0 | 352 | * </ul> |
michael@0 | 353 | * <p> |
michael@0 | 354 | * This class is not intended for public subclassing. |
michael@0 | 355 | * |
michael@0 | 356 | * @stable ICU 4.8 |
michael@0 | 357 | */ |
michael@0 | 358 | class U_COMMON_API MessagePattern : public UObject { |
michael@0 | 359 | public: |
michael@0 | 360 | /** |
michael@0 | 361 | * Constructs an empty MessagePattern with default UMessagePatternApostropheMode. |
michael@0 | 362 | * @param errorCode Standard ICU error code. Its input value must |
michael@0 | 363 | * pass the U_SUCCESS() test, or else the function returns |
michael@0 | 364 | * immediately. Check for U_FAILURE() on output or use with |
michael@0 | 365 | * function chaining. (See User Guide for details.) |
michael@0 | 366 | * @stable ICU 4.8 |
michael@0 | 367 | */ |
michael@0 | 368 | MessagePattern(UErrorCode &errorCode); |
michael@0 | 369 | |
michael@0 | 370 | /** |
michael@0 | 371 | * Constructs an empty MessagePattern. |
michael@0 | 372 | * @param mode Explicit UMessagePatternApostropheMode. |
michael@0 | 373 | * @param errorCode Standard ICU error code. Its input value must |
michael@0 | 374 | * pass the U_SUCCESS() test, or else the function returns |
michael@0 | 375 | * immediately. Check for U_FAILURE() on output or use with |
michael@0 | 376 | * function chaining. (See User Guide for details.) |
michael@0 | 377 | * @stable ICU 4.8 |
michael@0 | 378 | */ |
michael@0 | 379 | MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode); |
michael@0 | 380 | |
michael@0 | 381 | /** |
michael@0 | 382 | * Constructs a MessagePattern with default UMessagePatternApostropheMode and |
michael@0 | 383 | * parses the MessageFormat pattern string. |
michael@0 | 384 | * @param pattern a MessageFormat pattern string |
michael@0 | 385 | * @param parseError Struct to receive information on the position |
michael@0 | 386 | * of an error within the pattern. |
michael@0 | 387 | * Can be NULL. |
michael@0 | 388 | * @param errorCode Standard ICU error code. Its input value must |
michael@0 | 389 | * pass the U_SUCCESS() test, or else the function returns |
michael@0 | 390 | * immediately. Check for U_FAILURE() on output or use with |
michael@0 | 391 | * function chaining. (See User Guide for details.) |
michael@0 | 392 | * TODO: turn @throws into UErrorCode specifics? |
michael@0 | 393 | * @throws IllegalArgumentException for syntax errors in the pattern string |
michael@0 | 394 | * @throws IndexOutOfBoundsException if certain limits are exceeded |
michael@0 | 395 | * (e.g., argument number too high, argument name too long, etc.) |
michael@0 | 396 | * @throws NumberFormatException if a number could not be parsed |
michael@0 | 397 | * @stable ICU 4.8 |
michael@0 | 398 | */ |
michael@0 | 399 | MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode); |
michael@0 | 400 | |
michael@0 | 401 | /** |
michael@0 | 402 | * Copy constructor. |
michael@0 | 403 | * @param other Object to copy. |
michael@0 | 404 | * @stable ICU 4.8 |
michael@0 | 405 | */ |
michael@0 | 406 | MessagePattern(const MessagePattern &other); |
michael@0 | 407 | |
michael@0 | 408 | /** |
michael@0 | 409 | * Assignment operator. |
michael@0 | 410 | * @param other Object to copy. |
michael@0 | 411 | * @return *this=other |
michael@0 | 412 | * @stable ICU 4.8 |
michael@0 | 413 | */ |
michael@0 | 414 | MessagePattern &operator=(const MessagePattern &other); |
michael@0 | 415 | |
michael@0 | 416 | /** |
michael@0 | 417 | * Destructor. |
michael@0 | 418 | * @stable ICU 4.8 |
michael@0 | 419 | */ |
michael@0 | 420 | virtual ~MessagePattern(); |
michael@0 | 421 | |
michael@0 | 422 | /** |
michael@0 | 423 | * Parses a MessageFormat pattern string. |
michael@0 | 424 | * @param pattern a MessageFormat pattern string |
michael@0 | 425 | * @param parseError Struct to receive information on the position |
michael@0 | 426 | * of an error within the pattern. |
michael@0 | 427 | * Can be NULL. |
michael@0 | 428 | * @param errorCode Standard ICU error code. Its input value must |
michael@0 | 429 | * pass the U_SUCCESS() test, or else the function returns |
michael@0 | 430 | * immediately. Check for U_FAILURE() on output or use with |
michael@0 | 431 | * function chaining. (See User Guide for details.) |
michael@0 | 432 | * @return *this |
michael@0 | 433 | * @throws IllegalArgumentException for syntax errors in the pattern string |
michael@0 | 434 | * @throws IndexOutOfBoundsException if certain limits are exceeded |
michael@0 | 435 | * (e.g., argument number too high, argument name too long, etc.) |
michael@0 | 436 | * @throws NumberFormatException if a number could not be parsed |
michael@0 | 437 | * @stable ICU 4.8 |
michael@0 | 438 | */ |
michael@0 | 439 | MessagePattern &parse(const UnicodeString &pattern, |
michael@0 | 440 | UParseError *parseError, UErrorCode &errorCode); |
michael@0 | 441 | |
michael@0 | 442 | /** |
michael@0 | 443 | * Parses a ChoiceFormat pattern string. |
michael@0 | 444 | * @param pattern a ChoiceFormat pattern string |
michael@0 | 445 | * @param parseError Struct to receive information on the position |
michael@0 | 446 | * of an error within the pattern. |
michael@0 | 447 | * Can be NULL. |
michael@0 | 448 | * @param errorCode Standard ICU error code. Its input value must |
michael@0 | 449 | * pass the U_SUCCESS() test, or else the function returns |
michael@0 | 450 | * immediately. Check for U_FAILURE() on output or use with |
michael@0 | 451 | * function chaining. (See User Guide for details.) |
michael@0 | 452 | * @return *this |
michael@0 | 453 | * @throws IllegalArgumentException for syntax errors in the pattern string |
michael@0 | 454 | * @throws IndexOutOfBoundsException if certain limits are exceeded |
michael@0 | 455 | * (e.g., argument number too high, argument name too long, etc.) |
michael@0 | 456 | * @throws NumberFormatException if a number could not be parsed |
michael@0 | 457 | * @stable ICU 4.8 |
michael@0 | 458 | */ |
michael@0 | 459 | MessagePattern &parseChoiceStyle(const UnicodeString &pattern, |
michael@0 | 460 | UParseError *parseError, UErrorCode &errorCode); |
michael@0 | 461 | |
michael@0 | 462 | /** |
michael@0 | 463 | * Parses a PluralFormat pattern string. |
michael@0 | 464 | * @param pattern a PluralFormat pattern string |
michael@0 | 465 | * @param parseError Struct to receive information on the position |
michael@0 | 466 | * of an error within the pattern. |
michael@0 | 467 | * Can be NULL. |
michael@0 | 468 | * @param errorCode Standard ICU error code. Its input value must |
michael@0 | 469 | * pass the U_SUCCESS() test, or else the function returns |
michael@0 | 470 | * immediately. Check for U_FAILURE() on output or use with |
michael@0 | 471 | * function chaining. (See User Guide for details.) |
michael@0 | 472 | * @return *this |
michael@0 | 473 | * @throws IllegalArgumentException for syntax errors in the pattern string |
michael@0 | 474 | * @throws IndexOutOfBoundsException if certain limits are exceeded |
michael@0 | 475 | * (e.g., argument number too high, argument name too long, etc.) |
michael@0 | 476 | * @throws NumberFormatException if a number could not be parsed |
michael@0 | 477 | * @stable ICU 4.8 |
michael@0 | 478 | */ |
michael@0 | 479 | MessagePattern &parsePluralStyle(const UnicodeString &pattern, |
michael@0 | 480 | UParseError *parseError, UErrorCode &errorCode); |
michael@0 | 481 | |
michael@0 | 482 | /** |
michael@0 | 483 | * Parses a SelectFormat pattern string. |
michael@0 | 484 | * @param pattern a SelectFormat pattern string |
michael@0 | 485 | * @param parseError Struct to receive information on the position |
michael@0 | 486 | * of an error within the pattern. |
michael@0 | 487 | * Can be NULL. |
michael@0 | 488 | * @param errorCode Standard ICU error code. Its input value must |
michael@0 | 489 | * pass the U_SUCCESS() test, or else the function returns |
michael@0 | 490 | * immediately. Check for U_FAILURE() on output or use with |
michael@0 | 491 | * function chaining. (See User Guide for details.) |
michael@0 | 492 | * @return *this |
michael@0 | 493 | * @throws IllegalArgumentException for syntax errors in the pattern string |
michael@0 | 494 | * @throws IndexOutOfBoundsException if certain limits are exceeded |
michael@0 | 495 | * (e.g., argument number too high, argument name too long, etc.) |
michael@0 | 496 | * @throws NumberFormatException if a number could not be parsed |
michael@0 | 497 | * @stable ICU 4.8 |
michael@0 | 498 | */ |
michael@0 | 499 | MessagePattern &parseSelectStyle(const UnicodeString &pattern, |
michael@0 | 500 | UParseError *parseError, UErrorCode &errorCode); |
michael@0 | 501 | |
michael@0 | 502 | /** |
michael@0 | 503 | * Clears this MessagePattern. |
michael@0 | 504 | * countParts() will return 0. |
michael@0 | 505 | * @stable ICU 4.8 |
michael@0 | 506 | */ |
michael@0 | 507 | void clear(); |
michael@0 | 508 | |
michael@0 | 509 | /** |
michael@0 | 510 | * Clears this MessagePattern and sets the UMessagePatternApostropheMode. |
michael@0 | 511 | * countParts() will return 0. |
michael@0 | 512 | * @param mode The new UMessagePatternApostropheMode. |
michael@0 | 513 | * @stable ICU 4.8 |
michael@0 | 514 | */ |
michael@0 | 515 | void clearPatternAndSetApostropheMode(UMessagePatternApostropheMode mode) { |
michael@0 | 516 | clear(); |
michael@0 | 517 | aposMode=mode; |
michael@0 | 518 | } |
michael@0 | 519 | |
michael@0 | 520 | /** |
michael@0 | 521 | * @param other another object to compare with. |
michael@0 | 522 | * @return TRUE if this object is equivalent to the other one. |
michael@0 | 523 | * @stable ICU 4.8 |
michael@0 | 524 | */ |
michael@0 | 525 | UBool operator==(const MessagePattern &other) const; |
michael@0 | 526 | |
michael@0 | 527 | /** |
michael@0 | 528 | * @param other another object to compare with. |
michael@0 | 529 | * @return FALSE if this object is equivalent to the other one. |
michael@0 | 530 | * @stable ICU 4.8 |
michael@0 | 531 | */ |
michael@0 | 532 | inline UBool operator!=(const MessagePattern &other) const { |
michael@0 | 533 | return !operator==(other); |
michael@0 | 534 | } |
michael@0 | 535 | |
michael@0 | 536 | /** |
michael@0 | 537 | * @return A hash code for this object. |
michael@0 | 538 | * @stable ICU 4.8 |
michael@0 | 539 | */ |
michael@0 | 540 | int32_t hashCode() const; |
michael@0 | 541 | |
michael@0 | 542 | /** |
michael@0 | 543 | * @return this instance's UMessagePatternApostropheMode. |
michael@0 | 544 | * @stable ICU 4.8 |
michael@0 | 545 | */ |
michael@0 | 546 | UMessagePatternApostropheMode getApostropheMode() const { |
michael@0 | 547 | return aposMode; |
michael@0 | 548 | } |
michael@0 | 549 | |
michael@0 | 550 | // Java has package-private jdkAposMode() here. |
michael@0 | 551 | // In C++, this is declared in the MessageImpl class. |
michael@0 | 552 | |
michael@0 | 553 | /** |
michael@0 | 554 | * @return the parsed pattern string (null if none was parsed). |
michael@0 | 555 | * @stable ICU 4.8 |
michael@0 | 556 | */ |
michael@0 | 557 | const UnicodeString &getPatternString() const { |
michael@0 | 558 | return msg; |
michael@0 | 559 | } |
michael@0 | 560 | |
michael@0 | 561 | /** |
michael@0 | 562 | * Does the parsed pattern have named arguments like {first_name}? |
michael@0 | 563 | * @return TRUE if the parsed pattern has at least one named argument. |
michael@0 | 564 | * @stable ICU 4.8 |
michael@0 | 565 | */ |
michael@0 | 566 | UBool hasNamedArguments() const { |
michael@0 | 567 | return hasArgNames; |
michael@0 | 568 | } |
michael@0 | 569 | |
michael@0 | 570 | /** |
michael@0 | 571 | * Does the parsed pattern have numbered arguments like {2}? |
michael@0 | 572 | * @return TRUE if the parsed pattern has at least one numbered argument. |
michael@0 | 573 | * @stable ICU 4.8 |
michael@0 | 574 | */ |
michael@0 | 575 | UBool hasNumberedArguments() const { |
michael@0 | 576 | return hasArgNumbers; |
michael@0 | 577 | } |
michael@0 | 578 | |
michael@0 | 579 | /** |
michael@0 | 580 | * Validates and parses an argument name or argument number string. |
michael@0 | 581 | * An argument name must be a "pattern identifier", that is, it must contain |
michael@0 | 582 | * no Unicode Pattern_Syntax or Pattern_White_Space characters. |
michael@0 | 583 | * If it only contains ASCII digits, then it must be a small integer with no leading zero. |
michael@0 | 584 | * @param name Input string. |
michael@0 | 585 | * @return >=0 if the name is a valid number, |
michael@0 | 586 | * ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits, |
michael@0 | 587 | * ARG_NAME_NOT_VALID (-2) if it is neither. |
michael@0 | 588 | * @stable ICU 4.8 |
michael@0 | 589 | */ |
michael@0 | 590 | static int32_t validateArgumentName(const UnicodeString &name); |
michael@0 | 591 | |
michael@0 | 592 | /** |
michael@0 | 593 | * Returns a version of the parsed pattern string where each ASCII apostrophe |
michael@0 | 594 | * is doubled (escaped) if it is not already, and if it is not interpreted as quoting syntax. |
michael@0 | 595 | * <p> |
michael@0 | 596 | * For example, this turns "I don't '{know}' {gender,select,female{h''er}other{h'im}}." |
michael@0 | 597 | * into "I don''t '{know}' {gender,select,female{h''er}other{h''im}}." |
michael@0 | 598 | * @return the deep-auto-quoted version of the parsed pattern string. |
michael@0 | 599 | * @see MessageFormat.autoQuoteApostrophe() |
michael@0 | 600 | * @stable ICU 4.8 |
michael@0 | 601 | */ |
michael@0 | 602 | UnicodeString autoQuoteApostropheDeep() const; |
michael@0 | 603 | |
michael@0 | 604 | class Part; |
michael@0 | 605 | |
michael@0 | 606 | /** |
michael@0 | 607 | * Returns the number of "parts" created by parsing the pattern string. |
michael@0 | 608 | * Returns 0 if no pattern has been parsed or clear() was called. |
michael@0 | 609 | * @return the number of pattern parts. |
michael@0 | 610 | * @stable ICU 4.8 |
michael@0 | 611 | */ |
michael@0 | 612 | int32_t countParts() const { |
michael@0 | 613 | return partsLength; |
michael@0 | 614 | } |
michael@0 | 615 | |
michael@0 | 616 | /** |
michael@0 | 617 | * Gets the i-th pattern "part". |
michael@0 | 618 | * @param i The index of the Part data. (0..countParts()-1) |
michael@0 | 619 | * @return the i-th pattern "part". |
michael@0 | 620 | * @stable ICU 4.8 |
michael@0 | 621 | */ |
michael@0 | 622 | const Part &getPart(int32_t i) const { |
michael@0 | 623 | return parts[i]; |
michael@0 | 624 | } |
michael@0 | 625 | |
michael@0 | 626 | /** |
michael@0 | 627 | * Returns the UMessagePatternPartType of the i-th pattern "part". |
michael@0 | 628 | * Convenience method for getPart(i).getType(). |
michael@0 | 629 | * @param i The index of the Part data. (0..countParts()-1) |
michael@0 | 630 | * @return The UMessagePatternPartType of the i-th Part. |
michael@0 | 631 | * @stable ICU 4.8 |
michael@0 | 632 | */ |
michael@0 | 633 | UMessagePatternPartType getPartType(int32_t i) const { |
michael@0 | 634 | return getPart(i).type; |
michael@0 | 635 | } |
michael@0 | 636 | |
michael@0 | 637 | /** |
michael@0 | 638 | * Returns the pattern index of the specified pattern "part". |
michael@0 | 639 | * Convenience method for getPart(partIndex).getIndex(). |
michael@0 | 640 | * @param partIndex The index of the Part data. (0..countParts()-1) |
michael@0 | 641 | * @return The pattern index of this Part. |
michael@0 | 642 | * @stable ICU 4.8 |
michael@0 | 643 | */ |
michael@0 | 644 | int32_t getPatternIndex(int32_t partIndex) const { |
michael@0 | 645 | return getPart(partIndex).index; |
michael@0 | 646 | } |
michael@0 | 647 | |
michael@0 | 648 | /** |
michael@0 | 649 | * Returns the substring of the pattern string indicated by the Part. |
michael@0 | 650 | * Convenience method for getPatternString().substring(part.getIndex(), part.getLimit()). |
michael@0 | 651 | * @param part a part of this MessagePattern. |
michael@0 | 652 | * @return the substring associated with part. |
michael@0 | 653 | * @stable ICU 4.8 |
michael@0 | 654 | */ |
michael@0 | 655 | UnicodeString getSubstring(const Part &part) const { |
michael@0 | 656 | return msg.tempSubString(part.index, part.length); |
michael@0 | 657 | } |
michael@0 | 658 | |
michael@0 | 659 | /** |
michael@0 | 660 | * Compares the part's substring with the input string s. |
michael@0 | 661 | * @param part a part of this MessagePattern. |
michael@0 | 662 | * @param s a string. |
michael@0 | 663 | * @return TRUE if getSubstring(part).equals(s). |
michael@0 | 664 | * @stable ICU 4.8 |
michael@0 | 665 | */ |
michael@0 | 666 | UBool partSubstringMatches(const Part &part, const UnicodeString &s) const { |
michael@0 | 667 | return 0==msg.compare(part.index, part.length, s); |
michael@0 | 668 | } |
michael@0 | 669 | |
michael@0 | 670 | /** |
michael@0 | 671 | * Returns the numeric value associated with an ARG_INT or ARG_DOUBLE. |
michael@0 | 672 | * @param part a part of this MessagePattern. |
michael@0 | 673 | * @return the part's numeric value, or UMSGPAT_NO_NUMERIC_VALUE if this is not a numeric part. |
michael@0 | 674 | * @stable ICU 4.8 |
michael@0 | 675 | */ |
michael@0 | 676 | double getNumericValue(const Part &part) const; |
michael@0 | 677 | |
michael@0 | 678 | /** |
michael@0 | 679 | * Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified. |
michael@0 | 680 | * @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1) |
michael@0 | 681 | * @return the "offset:" value. |
michael@0 | 682 | * @stable ICU 4.8 |
michael@0 | 683 | */ |
michael@0 | 684 | double getPluralOffset(int32_t pluralStart) const; |
michael@0 | 685 | |
michael@0 | 686 | /** |
michael@0 | 687 | * Returns the index of the ARG|MSG_LIMIT part corresponding to the ARG|MSG_START at start. |
michael@0 | 688 | * @param start The index of some Part data (0..countParts()-1); |
michael@0 | 689 | * this Part should be of Type ARG_START or MSG_START. |
michael@0 | 690 | * @return The first i>start where getPart(i).getType()==ARG|MSG_LIMIT at the same nesting level, |
michael@0 | 691 | * or start itself if getPartType(msgStart)!=ARG|MSG_START. |
michael@0 | 692 | * @stable ICU 4.8 |
michael@0 | 693 | */ |
michael@0 | 694 | int32_t getLimitPartIndex(int32_t start) const { |
michael@0 | 695 | int32_t limit=getPart(start).limitPartIndex; |
michael@0 | 696 | if(limit<start) { |
michael@0 | 697 | return start; |
michael@0 | 698 | } |
michael@0 | 699 | return limit; |
michael@0 | 700 | } |
michael@0 | 701 | |
michael@0 | 702 | /** |
michael@0 | 703 | * A message pattern "part", representing a pattern parsing event. |
michael@0 | 704 | * There is a part for the start and end of a message or argument, |
michael@0 | 705 | * for quoting and escaping of and with ASCII apostrophes, |
michael@0 | 706 | * and for syntax elements of "complex" arguments. |
michael@0 | 707 | * @stable ICU 4.8 |
michael@0 | 708 | */ |
michael@0 | 709 | class Part : public UMemory { |
michael@0 | 710 | public: |
michael@0 | 711 | /** |
michael@0 | 712 | * Default constructor, do not use. |
michael@0 | 713 | * @internal |
michael@0 | 714 | */ |
michael@0 | 715 | Part() {} |
michael@0 | 716 | |
michael@0 | 717 | /** |
michael@0 | 718 | * Returns the type of this part. |
michael@0 | 719 | * @return the part type. |
michael@0 | 720 | * @stable ICU 4.8 |
michael@0 | 721 | */ |
michael@0 | 722 | UMessagePatternPartType getType() const { |
michael@0 | 723 | return type; |
michael@0 | 724 | } |
michael@0 | 725 | |
michael@0 | 726 | /** |
michael@0 | 727 | * Returns the pattern string index associated with this Part. |
michael@0 | 728 | * @return this part's pattern string index. |
michael@0 | 729 | * @stable ICU 4.8 |
michael@0 | 730 | */ |
michael@0 | 731 | int32_t getIndex() const { |
michael@0 | 732 | return index; |
michael@0 | 733 | } |
michael@0 | 734 | |
michael@0 | 735 | /** |
michael@0 | 736 | * Returns the length of the pattern substring associated with this Part. |
michael@0 | 737 | * This is 0 for some parts. |
michael@0 | 738 | * @return this part's pattern substring length. |
michael@0 | 739 | * @stable ICU 4.8 |
michael@0 | 740 | */ |
michael@0 | 741 | int32_t getLength() const { |
michael@0 | 742 | return length; |
michael@0 | 743 | } |
michael@0 | 744 | |
michael@0 | 745 | /** |
michael@0 | 746 | * Returns the pattern string limit (exclusive-end) index associated with this Part. |
michael@0 | 747 | * Convenience method for getIndex()+getLength(). |
michael@0 | 748 | * @return this part's pattern string limit index, same as getIndex()+getLength(). |
michael@0 | 749 | * @stable ICU 4.8 |
michael@0 | 750 | */ |
michael@0 | 751 | int32_t getLimit() const { |
michael@0 | 752 | return index+length; |
michael@0 | 753 | } |
michael@0 | 754 | |
michael@0 | 755 | /** |
michael@0 | 756 | * Returns a value associated with this part. |
michael@0 | 757 | * See the documentation of each part type for details. |
michael@0 | 758 | * @return the part value. |
michael@0 | 759 | * @stable ICU 4.8 |
michael@0 | 760 | */ |
michael@0 | 761 | int32_t getValue() const { |
michael@0 | 762 | return value; |
michael@0 | 763 | } |
michael@0 | 764 | |
michael@0 | 765 | /** |
michael@0 | 766 | * Returns the argument type if this part is of type ARG_START or ARG_LIMIT, |
michael@0 | 767 | * otherwise UMSGPAT_ARG_TYPE_NONE. |
michael@0 | 768 | * @return the argument type for this part. |
michael@0 | 769 | * @stable ICU 4.8 |
michael@0 | 770 | */ |
michael@0 | 771 | UMessagePatternArgType getArgType() const { |
michael@0 | 772 | UMessagePatternPartType type=getType(); |
michael@0 | 773 | if(type==UMSGPAT_PART_TYPE_ARG_START || type==UMSGPAT_PART_TYPE_ARG_LIMIT) { |
michael@0 | 774 | return (UMessagePatternArgType)value; |
michael@0 | 775 | } else { |
michael@0 | 776 | return UMSGPAT_ARG_TYPE_NONE; |
michael@0 | 777 | } |
michael@0 | 778 | } |
michael@0 | 779 | |
michael@0 | 780 | /** |
michael@0 | 781 | * Indicates whether the Part type has a numeric value. |
michael@0 | 782 | * If so, then that numeric value can be retrieved via MessagePattern.getNumericValue(). |
michael@0 | 783 | * @param type The Part type to be tested. |
michael@0 | 784 | * @return TRUE if the Part type has a numeric value. |
michael@0 | 785 | * @stable ICU 4.8 |
michael@0 | 786 | */ |
michael@0 | 787 | static UBool hasNumericValue(UMessagePatternPartType type) { |
michael@0 | 788 | return type==UMSGPAT_PART_TYPE_ARG_INT || type==UMSGPAT_PART_TYPE_ARG_DOUBLE; |
michael@0 | 789 | } |
michael@0 | 790 | |
michael@0 | 791 | /** |
michael@0 | 792 | * @param other another object to compare with. |
michael@0 | 793 | * @return TRUE if this object is equivalent to the other one. |
michael@0 | 794 | * @stable ICU 4.8 |
michael@0 | 795 | */ |
michael@0 | 796 | UBool operator==(const Part &other) const; |
michael@0 | 797 | |
michael@0 | 798 | /** |
michael@0 | 799 | * @param other another object to compare with. |
michael@0 | 800 | * @return FALSE if this object is equivalent to the other one. |
michael@0 | 801 | * @stable ICU 4.8 |
michael@0 | 802 | */ |
michael@0 | 803 | inline UBool operator!=(const Part &other) const { |
michael@0 | 804 | return !operator==(other); |
michael@0 | 805 | } |
michael@0 | 806 | |
michael@0 | 807 | /** |
michael@0 | 808 | * @return A hash code for this object. |
michael@0 | 809 | * @stable ICU 4.8 |
michael@0 | 810 | */ |
michael@0 | 811 | int32_t hashCode() const { |
michael@0 | 812 | return ((type*37+index)*37+length)*37+value; |
michael@0 | 813 | } |
michael@0 | 814 | |
michael@0 | 815 | private: |
michael@0 | 816 | friend class MessagePattern; |
michael@0 | 817 | |
michael@0 | 818 | static const int32_t MAX_LENGTH=0xffff; |
michael@0 | 819 | static const int32_t MAX_VALUE=0x7fff; |
michael@0 | 820 | |
michael@0 | 821 | // Some fields are not final because they are modified during pattern parsing. |
michael@0 | 822 | // After pattern parsing, the parts are effectively immutable. |
michael@0 | 823 | UMessagePatternPartType type; |
michael@0 | 824 | int32_t index; |
michael@0 | 825 | uint16_t length; |
michael@0 | 826 | int16_t value; |
michael@0 | 827 | int32_t limitPartIndex; |
michael@0 | 828 | }; |
michael@0 | 829 | |
michael@0 | 830 | private: |
michael@0 | 831 | void preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode); |
michael@0 | 832 | |
michael@0 | 833 | void postParse(); |
michael@0 | 834 | |
michael@0 | 835 | int32_t parseMessage(int32_t index, int32_t msgStartLength, |
michael@0 | 836 | int32_t nestingLevel, UMessagePatternArgType parentType, |
michael@0 | 837 | UParseError *parseError, UErrorCode &errorCode); |
michael@0 | 838 | |
michael@0 | 839 | int32_t parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel, |
michael@0 | 840 | UParseError *parseError, UErrorCode &errorCode); |
michael@0 | 841 | |
michael@0 | 842 | int32_t parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode); |
michael@0 | 843 | |
michael@0 | 844 | int32_t parseChoiceStyle(int32_t index, int32_t nestingLevel, |
michael@0 | 845 | UParseError *parseError, UErrorCode &errorCode); |
michael@0 | 846 | |
michael@0 | 847 | int32_t parsePluralOrSelectStyle(UMessagePatternArgType argType, int32_t index, int32_t nestingLevel, |
michael@0 | 848 | UParseError *parseError, UErrorCode &errorCode); |
michael@0 | 849 | |
michael@0 | 850 | /** |
michael@0 | 851 | * Validates and parses an argument name or argument number string. |
michael@0 | 852 | * This internal method assumes that the input substring is a "pattern identifier". |
michael@0 | 853 | * @return >=0 if the name is a valid number, |
michael@0 | 854 | * ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits, |
michael@0 | 855 | * ARG_NAME_NOT_VALID (-2) if it is neither. |
michael@0 | 856 | * @see #validateArgumentName(String) |
michael@0 | 857 | */ |
michael@0 | 858 | static int32_t parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit); |
michael@0 | 859 | |
michael@0 | 860 | int32_t parseArgNumber(int32_t start, int32_t limit) { |
michael@0 | 861 | return parseArgNumber(msg, start, limit); |
michael@0 | 862 | } |
michael@0 | 863 | |
michael@0 | 864 | /** |
michael@0 | 865 | * Parses a number from the specified message substring. |
michael@0 | 866 | * @param start start index into the message string |
michael@0 | 867 | * @param limit limit index into the message string, must be start<limit |
michael@0 | 868 | * @param allowInfinity TRUE if U+221E is allowed (for ChoiceFormat) |
michael@0 | 869 | * @param parseError |
michael@0 | 870 | * @param errorCode |
michael@0 | 871 | */ |
michael@0 | 872 | void parseDouble(int32_t start, int32_t limit, UBool allowInfinity, |
michael@0 | 873 | UParseError *parseError, UErrorCode &errorCode); |
michael@0 | 874 | |
michael@0 | 875 | // Java has package-private appendReducedApostrophes() here. |
michael@0 | 876 | // In C++, this is declared in the MessageImpl class. |
michael@0 | 877 | |
michael@0 | 878 | int32_t skipWhiteSpace(int32_t index); |
michael@0 | 879 | |
michael@0 | 880 | int32_t skipIdentifier(int32_t index); |
michael@0 | 881 | |
michael@0 | 882 | /** |
michael@0 | 883 | * Skips a sequence of characters that could occur in a double value. |
michael@0 | 884 | * Does not fully parse or validate the value. |
michael@0 | 885 | */ |
michael@0 | 886 | int32_t skipDouble(int32_t index); |
michael@0 | 887 | |
michael@0 | 888 | static UBool isArgTypeChar(UChar32 c); |
michael@0 | 889 | |
michael@0 | 890 | UBool isChoice(int32_t index); |
michael@0 | 891 | |
michael@0 | 892 | UBool isPlural(int32_t index); |
michael@0 | 893 | |
michael@0 | 894 | UBool isSelect(int32_t index); |
michael@0 | 895 | |
michael@0 | 896 | UBool isOrdinal(int32_t index); |
michael@0 | 897 | |
michael@0 | 898 | /** |
michael@0 | 899 | * @return TRUE if we are inside a MessageFormat (sub-)pattern, |
michael@0 | 900 | * as opposed to inside a top-level choice/plural/select pattern. |
michael@0 | 901 | */ |
michael@0 | 902 | UBool inMessageFormatPattern(int32_t nestingLevel); |
michael@0 | 903 | |
michael@0 | 904 | /** |
michael@0 | 905 | * @return TRUE if we are in a MessageFormat sub-pattern |
michael@0 | 906 | * of a top-level ChoiceFormat pattern. |
michael@0 | 907 | */ |
michael@0 | 908 | UBool inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType); |
michael@0 | 909 | |
michael@0 | 910 | void addPart(UMessagePatternPartType type, int32_t index, int32_t length, |
michael@0 | 911 | int32_t value, UErrorCode &errorCode); |
michael@0 | 912 | |
michael@0 | 913 | void addLimitPart(int32_t start, |
michael@0 | 914 | UMessagePatternPartType type, int32_t index, int32_t length, |
michael@0 | 915 | int32_t value, UErrorCode &errorCode); |
michael@0 | 916 | |
michael@0 | 917 | void addArgDoublePart(double numericValue, int32_t start, int32_t length, UErrorCode &errorCode); |
michael@0 | 918 | |
michael@0 | 919 | void setParseError(UParseError *parseError, int32_t index); |
michael@0 | 920 | |
michael@0 | 921 | UBool init(UErrorCode &errorCode); |
michael@0 | 922 | UBool copyStorage(const MessagePattern &other, UErrorCode &errorCode); |
michael@0 | 923 | |
michael@0 | 924 | UMessagePatternApostropheMode aposMode; |
michael@0 | 925 | UnicodeString msg; |
michael@0 | 926 | // ArrayList<Part> parts=new ArrayList<Part>(); |
michael@0 | 927 | MessagePatternPartsList *partsList; |
michael@0 | 928 | Part *parts; |
michael@0 | 929 | int32_t partsLength; |
michael@0 | 930 | // ArrayList<Double> numericValues; |
michael@0 | 931 | MessagePatternDoubleList *numericValuesList; |
michael@0 | 932 | double *numericValues; |
michael@0 | 933 | int32_t numericValuesLength; |
michael@0 | 934 | UBool hasArgNames; |
michael@0 | 935 | UBool hasArgNumbers; |
michael@0 | 936 | UBool needsAutoQuoting; |
michael@0 | 937 | }; |
michael@0 | 938 | |
michael@0 | 939 | U_NAMESPACE_END |
michael@0 | 940 | |
michael@0 | 941 | #endif // !UCONFIG_NO_FORMATTING |
michael@0 | 942 | |
michael@0 | 943 | #endif // __MESSAGEPATTERN_H__ |