1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/unicode/messagepattern.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,943 @@ 1.4 +/* 1.5 +******************************************************************************* 1.6 +* Copyright (C) 2011-2013, International Business Machines 1.7 +* Corporation and others. All Rights Reserved. 1.8 +******************************************************************************* 1.9 +* file name: messagepattern.h 1.10 +* encoding: US-ASCII 1.11 +* tab size: 8 (not used) 1.12 +* indentation:4 1.13 +* 1.14 +* created on: 2011mar14 1.15 +* created by: Markus W. Scherer 1.16 +*/ 1.17 + 1.18 +#ifndef __MESSAGEPATTERN_H__ 1.19 +#define __MESSAGEPATTERN_H__ 1.20 + 1.21 +/** 1.22 + * \file 1.23 + * \brief C++ API: MessagePattern class: Parses and represents ICU MessageFormat patterns. 1.24 + */ 1.25 + 1.26 +#include "unicode/utypes.h" 1.27 + 1.28 +#if !UCONFIG_NO_FORMATTING 1.29 + 1.30 +#include "unicode/parseerr.h" 1.31 +#include "unicode/unistr.h" 1.32 + 1.33 +/** 1.34 + * Mode for when an apostrophe starts quoted literal text for MessageFormat output. 1.35 + * The default is DOUBLE_OPTIONAL unless overridden via uconfig.h 1.36 + * (UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE). 1.37 + * <p> 1.38 + * A pair of adjacent apostrophes always results in a single apostrophe in the output, 1.39 + * even when the pair is between two single, text-quoting apostrophes. 1.40 + * <p> 1.41 + * The following table shows examples of desired MessageFormat.format() output 1.42 + * with the pattern strings that yield that output. 1.43 + * <p> 1.44 + * <table> 1.45 + * <tr> 1.46 + * <th>Desired output</th> 1.47 + * <th>DOUBLE_OPTIONAL</th> 1.48 + * <th>DOUBLE_REQUIRED</th> 1.49 + * </tr> 1.50 + * <tr> 1.51 + * <td>I see {many}</td> 1.52 + * <td>I see '{many}'</td> 1.53 + * <td>(same)</td> 1.54 + * </tr> 1.55 + * <tr> 1.56 + * <td>I said {'Wow!'}</td> 1.57 + * <td>I said '{''Wow!''}'</td> 1.58 + * <td>(same)</td> 1.59 + * </tr> 1.60 + * <tr> 1.61 + * <td>I don't know</td> 1.62 + * <td>I don't know OR<br> I don''t know</td> 1.63 + * <td>I don''t know</td> 1.64 + * </tr> 1.65 + * </table> 1.66 + * @stable ICU 4.8 1.67 + * @see UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE 1.68 + */ 1.69 +enum UMessagePatternApostropheMode { 1.70 + /** 1.71 + * A literal apostrophe is represented by 1.72 + * either a single or a double apostrophe pattern character. 1.73 + * Within a MessageFormat pattern, a single apostrophe only starts quoted literal text 1.74 + * if it immediately precedes a curly brace {}, 1.75 + * or a pipe symbol | if inside a choice format, 1.76 + * or a pound symbol # if inside a plural format. 1.77 + * <p> 1.78 + * This is the default behavior starting with ICU 4.8. 1.79 + * @stable ICU 4.8 1.80 + */ 1.81 + UMSGPAT_APOS_DOUBLE_OPTIONAL, 1.82 + /** 1.83 + * A literal apostrophe must be represented by 1.84 + * a double apostrophe pattern character. 1.85 + * A single apostrophe always starts quoted literal text. 1.86 + * <p> 1.87 + * This is the behavior of ICU 4.6 and earlier, and of the JDK. 1.88 + * @stable ICU 4.8 1.89 + */ 1.90 + UMSGPAT_APOS_DOUBLE_REQUIRED 1.91 +}; 1.92 +/** 1.93 + * @stable ICU 4.8 1.94 + */ 1.95 +typedef enum UMessagePatternApostropheMode UMessagePatternApostropheMode; 1.96 + 1.97 +/** 1.98 + * MessagePattern::Part type constants. 1.99 + * @stable ICU 4.8 1.100 + */ 1.101 +enum UMessagePatternPartType { 1.102 + /** 1.103 + * Start of a message pattern (main or nested). 1.104 + * The length is 0 for the top-level message 1.105 + * and for a choice argument sub-message, otherwise 1 for the '{'. 1.106 + * The value indicates the nesting level, starting with 0 for the main message. 1.107 + * <p> 1.108 + * There is always a later MSG_LIMIT part. 1.109 + * @stable ICU 4.8 1.110 + */ 1.111 + UMSGPAT_PART_TYPE_MSG_START, 1.112 + /** 1.113 + * End of a message pattern (main or nested). 1.114 + * The length is 0 for the top-level message and 1.115 + * the last sub-message of a choice argument, 1.116 + * otherwise 1 for the '}' or (in a choice argument style) the '|'. 1.117 + * The value indicates the nesting level, starting with 0 for the main message. 1.118 + * @stable ICU 4.8 1.119 + */ 1.120 + UMSGPAT_PART_TYPE_MSG_LIMIT, 1.121 + /** 1.122 + * Indicates a substring of the pattern string which is to be skipped when formatting. 1.123 + * For example, an apostrophe that begins or ends quoted text 1.124 + * would be indicated with such a part. 1.125 + * The value is undefined and currently always 0. 1.126 + * @stable ICU 4.8 1.127 + */ 1.128 + UMSGPAT_PART_TYPE_SKIP_SYNTAX, 1.129 + /** 1.130 + * Indicates that a syntax character needs to be inserted for auto-quoting. 1.131 + * The length is 0. 1.132 + * The value is the character code of the insertion character. (U+0027=APOSTROPHE) 1.133 + * @stable ICU 4.8 1.134 + */ 1.135 + UMSGPAT_PART_TYPE_INSERT_CHAR, 1.136 + /** 1.137 + * Indicates a syntactic (non-escaped) # symbol in a plural variant. 1.138 + * When formatting, replace this part's substring with the 1.139 + * (value-offset) for the plural argument value. 1.140 + * The value is undefined and currently always 0. 1.141 + * @stable ICU 4.8 1.142 + */ 1.143 + UMSGPAT_PART_TYPE_REPLACE_NUMBER, 1.144 + /** 1.145 + * Start of an argument. 1.146 + * The length is 1 for the '{'. 1.147 + * The value is the ordinal value of the ArgType. Use getArgType(). 1.148 + * <p> 1.149 + * This part is followed by either an ARG_NUMBER or ARG_NAME, 1.150 + * followed by optional argument sub-parts (see UMessagePatternArgType constants) 1.151 + * and finally an ARG_LIMIT part. 1.152 + * @stable ICU 4.8 1.153 + */ 1.154 + UMSGPAT_PART_TYPE_ARG_START, 1.155 + /** 1.156 + * End of an argument. 1.157 + * The length is 1 for the '}'. 1.158 + * The value is the ordinal value of the ArgType. Use getArgType(). 1.159 + * @stable ICU 4.8 1.160 + */ 1.161 + UMSGPAT_PART_TYPE_ARG_LIMIT, 1.162 + /** 1.163 + * The argument number, provided by the value. 1.164 + * @stable ICU 4.8 1.165 + */ 1.166 + UMSGPAT_PART_TYPE_ARG_NUMBER, 1.167 + /** 1.168 + * The argument name. 1.169 + * The value is undefined and currently always 0. 1.170 + * @stable ICU 4.8 1.171 + */ 1.172 + UMSGPAT_PART_TYPE_ARG_NAME, 1.173 + /** 1.174 + * The argument type. 1.175 + * The value is undefined and currently always 0. 1.176 + * @stable ICU 4.8 1.177 + */ 1.178 + UMSGPAT_PART_TYPE_ARG_TYPE, 1.179 + /** 1.180 + * The argument style text. 1.181 + * The value is undefined and currently always 0. 1.182 + * @stable ICU 4.8 1.183 + */ 1.184 + UMSGPAT_PART_TYPE_ARG_STYLE, 1.185 + /** 1.186 + * A selector substring in a "complex" argument style. 1.187 + * The value is undefined and currently always 0. 1.188 + * @stable ICU 4.8 1.189 + */ 1.190 + UMSGPAT_PART_TYPE_ARG_SELECTOR, 1.191 + /** 1.192 + * An integer value, for example the offset or an explicit selector value 1.193 + * in a PluralFormat style. 1.194 + * The part value is the integer value. 1.195 + * @stable ICU 4.8 1.196 + */ 1.197 + UMSGPAT_PART_TYPE_ARG_INT, 1.198 + /** 1.199 + * A numeric value, for example the offset or an explicit selector value 1.200 + * in a PluralFormat style. 1.201 + * The part value is an index into an internal array of numeric values; 1.202 + * use getNumericValue(). 1.203 + * @stable ICU 4.8 1.204 + */ 1.205 + UMSGPAT_PART_TYPE_ARG_DOUBLE 1.206 +}; 1.207 +/** 1.208 + * @stable ICU 4.8 1.209 + */ 1.210 +typedef enum UMessagePatternPartType UMessagePatternPartType; 1.211 + 1.212 +/** 1.213 + * Argument type constants. 1.214 + * Returned by Part.getArgType() for ARG_START and ARG_LIMIT parts. 1.215 + * 1.216 + * Messages nested inside an argument are each delimited by MSG_START and MSG_LIMIT, 1.217 + * with a nesting level one greater than the surrounding message. 1.218 + * @stable ICU 4.8 1.219 + */ 1.220 +enum UMessagePatternArgType { 1.221 + /** 1.222 + * The argument has no specified type. 1.223 + * @stable ICU 4.8 1.224 + */ 1.225 + UMSGPAT_ARG_TYPE_NONE, 1.226 + /** 1.227 + * The argument has a "simple" type which is provided by the ARG_TYPE part. 1.228 + * An ARG_STYLE part might follow that. 1.229 + * @stable ICU 4.8 1.230 + */ 1.231 + UMSGPAT_ARG_TYPE_SIMPLE, 1.232 + /** 1.233 + * The argument is a ChoiceFormat with one or more 1.234 + * ((ARG_INT | ARG_DOUBLE), ARG_SELECTOR, message) tuples. 1.235 + * @stable ICU 4.8 1.236 + */ 1.237 + UMSGPAT_ARG_TYPE_CHOICE, 1.238 + /** 1.239 + * The argument is a cardinal-number PluralFormat with an optional ARG_INT or ARG_DOUBLE offset 1.240 + * (e.g., offset:1) 1.241 + * and one or more (ARG_SELECTOR [explicit-value] message) tuples. 1.242 + * If the selector has an explicit value (e.g., =2), then 1.243 + * that value is provided by the ARG_INT or ARG_DOUBLE part preceding the message. 1.244 + * Otherwise the message immediately follows the ARG_SELECTOR. 1.245 + * @stable ICU 4.8 1.246 + */ 1.247 + UMSGPAT_ARG_TYPE_PLURAL, 1.248 + /** 1.249 + * The argument is a SelectFormat with one or more (ARG_SELECTOR, message) pairs. 1.250 + * @stable ICU 4.8 1.251 + */ 1.252 + UMSGPAT_ARG_TYPE_SELECT, 1.253 + /** 1.254 + * The argument is an ordinal-number PluralFormat 1.255 + * with the same style parts sequence and semantics as UMSGPAT_ARG_TYPE_PLURAL. 1.256 + * @stable ICU 50 1.257 + */ 1.258 + UMSGPAT_ARG_TYPE_SELECTORDINAL 1.259 +}; 1.260 +/** 1.261 + * @stable ICU 4.8 1.262 + */ 1.263 +typedef enum UMessagePatternArgType UMessagePatternArgType; 1.264 + 1.265 +/** 1.266 + * \def UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE 1.267 + * Returns TRUE if the argument type has a plural style part sequence and semantics, 1.268 + * for example UMSGPAT_ARG_TYPE_PLURAL and UMSGPAT_ARG_TYPE_SELECTORDINAL. 1.269 + * @stable ICU 50 1.270 + */ 1.271 +#define UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) \ 1.272 + ((argType)==UMSGPAT_ARG_TYPE_PLURAL || (argType)==UMSGPAT_ARG_TYPE_SELECTORDINAL) 1.273 + 1.274 +enum { 1.275 + /** 1.276 + * Return value from MessagePattern.validateArgumentName() for when 1.277 + * the string is a valid "pattern identifier" but not a number. 1.278 + * @stable ICU 4.8 1.279 + */ 1.280 + UMSGPAT_ARG_NAME_NOT_NUMBER=-1, 1.281 + 1.282 + /** 1.283 + * Return value from MessagePattern.validateArgumentName() for when 1.284 + * the string is invalid. 1.285 + * It might not be a valid "pattern identifier", 1.286 + * or it have only ASCII digits but there is a leading zero or the number is too large. 1.287 + * @stable ICU 4.8 1.288 + */ 1.289 + UMSGPAT_ARG_NAME_NOT_VALID=-2 1.290 +}; 1.291 + 1.292 +/** 1.293 + * Special value that is returned by getNumericValue(Part) when no 1.294 + * numeric value is defined for a part. 1.295 + * @see MessagePattern.getNumericValue() 1.296 + * @stable ICU 4.8 1.297 + */ 1.298 +#define UMSGPAT_NO_NUMERIC_VALUE ((double)(-123456789)) 1.299 + 1.300 +U_NAMESPACE_BEGIN 1.301 + 1.302 +class MessagePatternDoubleList; 1.303 +class MessagePatternPartsList; 1.304 + 1.305 +/** 1.306 + * Parses and represents ICU MessageFormat patterns. 1.307 + * Also handles patterns for ChoiceFormat, PluralFormat and SelectFormat. 1.308 + * Used in the implementations of those classes as well as in tools 1.309 + * for message validation, translation and format conversion. 1.310 + * <p> 1.311 + * The parser handles all syntax relevant for identifying message arguments. 1.312 + * This includes "complex" arguments whose style strings contain 1.313 + * nested MessageFormat pattern substrings. 1.314 + * For "simple" arguments (with no nested MessageFormat pattern substrings), 1.315 + * the argument style is not parsed any further. 1.316 + * <p> 1.317 + * The parser handles named and numbered message arguments and allows both in one message. 1.318 + * <p> 1.319 + * Once a pattern has been parsed successfully, iterate through the parsed data 1.320 + * with countParts(), getPart() and related methods. 1.321 + * <p> 1.322 + * The data logically represents a parse tree, but is stored and accessed 1.323 + * as a list of "parts" for fast and simple parsing and to minimize object allocations. 1.324 + * Arguments and nested messages are best handled via recursion. 1.325 + * For every _START "part", MessagePattern.getLimitPartIndex() efficiently returns 1.326 + * the index of the corresponding _LIMIT "part". 1.327 + * <p> 1.328 + * List of "parts": 1.329 + * <pre> 1.330 + * message = MSG_START (SKIP_SYNTAX | INSERT_CHAR | REPLACE_NUMBER | argument)* MSG_LIMIT 1.331 + * argument = noneArg | simpleArg | complexArg 1.332 + * complexArg = choiceArg | pluralArg | selectArg 1.333 + * 1.334 + * noneArg = ARG_START.NONE (ARG_NAME | ARG_NUMBER) ARG_LIMIT.NONE 1.335 + * simpleArg = ARG_START.SIMPLE (ARG_NAME | ARG_NUMBER) ARG_TYPE [ARG_STYLE] ARG_LIMIT.SIMPLE 1.336 + * choiceArg = ARG_START.CHOICE (ARG_NAME | ARG_NUMBER) choiceStyle ARG_LIMIT.CHOICE 1.337 + * pluralArg = ARG_START.PLURAL (ARG_NAME | ARG_NUMBER) pluralStyle ARG_LIMIT.PLURAL 1.338 + * selectArg = ARG_START.SELECT (ARG_NAME | ARG_NUMBER) selectStyle ARG_LIMIT.SELECT 1.339 + * 1.340 + * choiceStyle = ((ARG_INT | ARG_DOUBLE) ARG_SELECTOR message)+ 1.341 + * pluralStyle = [ARG_INT | ARG_DOUBLE] (ARG_SELECTOR [ARG_INT | ARG_DOUBLE] message)+ 1.342 + * selectStyle = (ARG_SELECTOR message)+ 1.343 + * </pre> 1.344 + * <ul> 1.345 + * <li>Literal output text is not represented directly by "parts" but accessed 1.346 + * between parts of a message, from one part's getLimit() to the next part's getIndex(). 1.347 + * <li><code>ARG_START.CHOICE</code> stands for an ARG_START Part with ArgType CHOICE. 1.348 + * <li>In the choiceStyle, the ARG_SELECTOR has the '<', the '#' or 1.349 + * the less-than-or-equal-to sign (U+2264). 1.350 + * <li>In the pluralStyle, the first, optional numeric Part has the "offset:" value. 1.351 + * The optional numeric Part between each (ARG_SELECTOR, message) pair 1.352 + * is the value of an explicit-number selector like "=2", 1.353 + * otherwise the selector is a non-numeric identifier. 1.354 + * <li>The REPLACE_NUMBER Part can occur only in an immediate sub-message of the pluralStyle. 1.355 + * </ul> 1.356 + * <p> 1.357 + * This class is not intended for public subclassing. 1.358 + * 1.359 + * @stable ICU 4.8 1.360 + */ 1.361 +class U_COMMON_API MessagePattern : public UObject { 1.362 +public: 1.363 + /** 1.364 + * Constructs an empty MessagePattern with default UMessagePatternApostropheMode. 1.365 + * @param errorCode Standard ICU error code. Its input value must 1.366 + * pass the U_SUCCESS() test, or else the function returns 1.367 + * immediately. Check for U_FAILURE() on output or use with 1.368 + * function chaining. (See User Guide for details.) 1.369 + * @stable ICU 4.8 1.370 + */ 1.371 + MessagePattern(UErrorCode &errorCode); 1.372 + 1.373 + /** 1.374 + * Constructs an empty MessagePattern. 1.375 + * @param mode Explicit UMessagePatternApostropheMode. 1.376 + * @param errorCode Standard ICU error code. Its input value must 1.377 + * pass the U_SUCCESS() test, or else the function returns 1.378 + * immediately. Check for U_FAILURE() on output or use with 1.379 + * function chaining. (See User Guide for details.) 1.380 + * @stable ICU 4.8 1.381 + */ 1.382 + MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode); 1.383 + 1.384 + /** 1.385 + * Constructs a MessagePattern with default UMessagePatternApostropheMode and 1.386 + * parses the MessageFormat pattern string. 1.387 + * @param pattern a MessageFormat pattern string 1.388 + * @param parseError Struct to receive information on the position 1.389 + * of an error within the pattern. 1.390 + * Can be NULL. 1.391 + * @param errorCode Standard ICU error code. Its input value must 1.392 + * pass the U_SUCCESS() test, or else the function returns 1.393 + * immediately. Check for U_FAILURE() on output or use with 1.394 + * function chaining. (See User Guide for details.) 1.395 + * TODO: turn @throws into UErrorCode specifics? 1.396 + * @throws IllegalArgumentException for syntax errors in the pattern string 1.397 + * @throws IndexOutOfBoundsException if certain limits are exceeded 1.398 + * (e.g., argument number too high, argument name too long, etc.) 1.399 + * @throws NumberFormatException if a number could not be parsed 1.400 + * @stable ICU 4.8 1.401 + */ 1.402 + MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode); 1.403 + 1.404 + /** 1.405 + * Copy constructor. 1.406 + * @param other Object to copy. 1.407 + * @stable ICU 4.8 1.408 + */ 1.409 + MessagePattern(const MessagePattern &other); 1.410 + 1.411 + /** 1.412 + * Assignment operator. 1.413 + * @param other Object to copy. 1.414 + * @return *this=other 1.415 + * @stable ICU 4.8 1.416 + */ 1.417 + MessagePattern &operator=(const MessagePattern &other); 1.418 + 1.419 + /** 1.420 + * Destructor. 1.421 + * @stable ICU 4.8 1.422 + */ 1.423 + virtual ~MessagePattern(); 1.424 + 1.425 + /** 1.426 + * Parses a MessageFormat pattern string. 1.427 + * @param pattern a MessageFormat pattern string 1.428 + * @param parseError Struct to receive information on the position 1.429 + * of an error within the pattern. 1.430 + * Can be NULL. 1.431 + * @param errorCode Standard ICU error code. Its input value must 1.432 + * pass the U_SUCCESS() test, or else the function returns 1.433 + * immediately. Check for U_FAILURE() on output or use with 1.434 + * function chaining. (See User Guide for details.) 1.435 + * @return *this 1.436 + * @throws IllegalArgumentException for syntax errors in the pattern string 1.437 + * @throws IndexOutOfBoundsException if certain limits are exceeded 1.438 + * (e.g., argument number too high, argument name too long, etc.) 1.439 + * @throws NumberFormatException if a number could not be parsed 1.440 + * @stable ICU 4.8 1.441 + */ 1.442 + MessagePattern &parse(const UnicodeString &pattern, 1.443 + UParseError *parseError, UErrorCode &errorCode); 1.444 + 1.445 + /** 1.446 + * Parses a ChoiceFormat pattern string. 1.447 + * @param pattern a ChoiceFormat pattern string 1.448 + * @param parseError Struct to receive information on the position 1.449 + * of an error within the pattern. 1.450 + * Can be NULL. 1.451 + * @param errorCode Standard ICU error code. Its input value must 1.452 + * pass the U_SUCCESS() test, or else the function returns 1.453 + * immediately. Check for U_FAILURE() on output or use with 1.454 + * function chaining. (See User Guide for details.) 1.455 + * @return *this 1.456 + * @throws IllegalArgumentException for syntax errors in the pattern string 1.457 + * @throws IndexOutOfBoundsException if certain limits are exceeded 1.458 + * (e.g., argument number too high, argument name too long, etc.) 1.459 + * @throws NumberFormatException if a number could not be parsed 1.460 + * @stable ICU 4.8 1.461 + */ 1.462 + MessagePattern &parseChoiceStyle(const UnicodeString &pattern, 1.463 + UParseError *parseError, UErrorCode &errorCode); 1.464 + 1.465 + /** 1.466 + * Parses a PluralFormat pattern string. 1.467 + * @param pattern a PluralFormat pattern string 1.468 + * @param parseError Struct to receive information on the position 1.469 + * of an error within the pattern. 1.470 + * Can be NULL. 1.471 + * @param errorCode Standard ICU error code. Its input value must 1.472 + * pass the U_SUCCESS() test, or else the function returns 1.473 + * immediately. Check for U_FAILURE() on output or use with 1.474 + * function chaining. (See User Guide for details.) 1.475 + * @return *this 1.476 + * @throws IllegalArgumentException for syntax errors in the pattern string 1.477 + * @throws IndexOutOfBoundsException if certain limits are exceeded 1.478 + * (e.g., argument number too high, argument name too long, etc.) 1.479 + * @throws NumberFormatException if a number could not be parsed 1.480 + * @stable ICU 4.8 1.481 + */ 1.482 + MessagePattern &parsePluralStyle(const UnicodeString &pattern, 1.483 + UParseError *parseError, UErrorCode &errorCode); 1.484 + 1.485 + /** 1.486 + * Parses a SelectFormat pattern string. 1.487 + * @param pattern a SelectFormat pattern string 1.488 + * @param parseError Struct to receive information on the position 1.489 + * of an error within the pattern. 1.490 + * Can be NULL. 1.491 + * @param errorCode Standard ICU error code. Its input value must 1.492 + * pass the U_SUCCESS() test, or else the function returns 1.493 + * immediately. Check for U_FAILURE() on output or use with 1.494 + * function chaining. (See User Guide for details.) 1.495 + * @return *this 1.496 + * @throws IllegalArgumentException for syntax errors in the pattern string 1.497 + * @throws IndexOutOfBoundsException if certain limits are exceeded 1.498 + * (e.g., argument number too high, argument name too long, etc.) 1.499 + * @throws NumberFormatException if a number could not be parsed 1.500 + * @stable ICU 4.8 1.501 + */ 1.502 + MessagePattern &parseSelectStyle(const UnicodeString &pattern, 1.503 + UParseError *parseError, UErrorCode &errorCode); 1.504 + 1.505 + /** 1.506 + * Clears this MessagePattern. 1.507 + * countParts() will return 0. 1.508 + * @stable ICU 4.8 1.509 + */ 1.510 + void clear(); 1.511 + 1.512 + /** 1.513 + * Clears this MessagePattern and sets the UMessagePatternApostropheMode. 1.514 + * countParts() will return 0. 1.515 + * @param mode The new UMessagePatternApostropheMode. 1.516 + * @stable ICU 4.8 1.517 + */ 1.518 + void clearPatternAndSetApostropheMode(UMessagePatternApostropheMode mode) { 1.519 + clear(); 1.520 + aposMode=mode; 1.521 + } 1.522 + 1.523 + /** 1.524 + * @param other another object to compare with. 1.525 + * @return TRUE if this object is equivalent to the other one. 1.526 + * @stable ICU 4.8 1.527 + */ 1.528 + UBool operator==(const MessagePattern &other) const; 1.529 + 1.530 + /** 1.531 + * @param other another object to compare with. 1.532 + * @return FALSE if this object is equivalent to the other one. 1.533 + * @stable ICU 4.8 1.534 + */ 1.535 + inline UBool operator!=(const MessagePattern &other) const { 1.536 + return !operator==(other); 1.537 + } 1.538 + 1.539 + /** 1.540 + * @return A hash code for this object. 1.541 + * @stable ICU 4.8 1.542 + */ 1.543 + int32_t hashCode() const; 1.544 + 1.545 + /** 1.546 + * @return this instance's UMessagePatternApostropheMode. 1.547 + * @stable ICU 4.8 1.548 + */ 1.549 + UMessagePatternApostropheMode getApostropheMode() const { 1.550 + return aposMode; 1.551 + } 1.552 + 1.553 + // Java has package-private jdkAposMode() here. 1.554 + // In C++, this is declared in the MessageImpl class. 1.555 + 1.556 + /** 1.557 + * @return the parsed pattern string (null if none was parsed). 1.558 + * @stable ICU 4.8 1.559 + */ 1.560 + const UnicodeString &getPatternString() const { 1.561 + return msg; 1.562 + } 1.563 + 1.564 + /** 1.565 + * Does the parsed pattern have named arguments like {first_name}? 1.566 + * @return TRUE if the parsed pattern has at least one named argument. 1.567 + * @stable ICU 4.8 1.568 + */ 1.569 + UBool hasNamedArguments() const { 1.570 + return hasArgNames; 1.571 + } 1.572 + 1.573 + /** 1.574 + * Does the parsed pattern have numbered arguments like {2}? 1.575 + * @return TRUE if the parsed pattern has at least one numbered argument. 1.576 + * @stable ICU 4.8 1.577 + */ 1.578 + UBool hasNumberedArguments() const { 1.579 + return hasArgNumbers; 1.580 + } 1.581 + 1.582 + /** 1.583 + * Validates and parses an argument name or argument number string. 1.584 + * An argument name must be a "pattern identifier", that is, it must contain 1.585 + * no Unicode Pattern_Syntax or Pattern_White_Space characters. 1.586 + * If it only contains ASCII digits, then it must be a small integer with no leading zero. 1.587 + * @param name Input string. 1.588 + * @return >=0 if the name is a valid number, 1.589 + * ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits, 1.590 + * ARG_NAME_NOT_VALID (-2) if it is neither. 1.591 + * @stable ICU 4.8 1.592 + */ 1.593 + static int32_t validateArgumentName(const UnicodeString &name); 1.594 + 1.595 + /** 1.596 + * Returns a version of the parsed pattern string where each ASCII apostrophe 1.597 + * is doubled (escaped) if it is not already, and if it is not interpreted as quoting syntax. 1.598 + * <p> 1.599 + * For example, this turns "I don't '{know}' {gender,select,female{h''er}other{h'im}}." 1.600 + * into "I don''t '{know}' {gender,select,female{h''er}other{h''im}}." 1.601 + * @return the deep-auto-quoted version of the parsed pattern string. 1.602 + * @see MessageFormat.autoQuoteApostrophe() 1.603 + * @stable ICU 4.8 1.604 + */ 1.605 + UnicodeString autoQuoteApostropheDeep() const; 1.606 + 1.607 + class Part; 1.608 + 1.609 + /** 1.610 + * Returns the number of "parts" created by parsing the pattern string. 1.611 + * Returns 0 if no pattern has been parsed or clear() was called. 1.612 + * @return the number of pattern parts. 1.613 + * @stable ICU 4.8 1.614 + */ 1.615 + int32_t countParts() const { 1.616 + return partsLength; 1.617 + } 1.618 + 1.619 + /** 1.620 + * Gets the i-th pattern "part". 1.621 + * @param i The index of the Part data. (0..countParts()-1) 1.622 + * @return the i-th pattern "part". 1.623 + * @stable ICU 4.8 1.624 + */ 1.625 + const Part &getPart(int32_t i) const { 1.626 + return parts[i]; 1.627 + } 1.628 + 1.629 + /** 1.630 + * Returns the UMessagePatternPartType of the i-th pattern "part". 1.631 + * Convenience method for getPart(i).getType(). 1.632 + * @param i The index of the Part data. (0..countParts()-1) 1.633 + * @return The UMessagePatternPartType of the i-th Part. 1.634 + * @stable ICU 4.8 1.635 + */ 1.636 + UMessagePatternPartType getPartType(int32_t i) const { 1.637 + return getPart(i).type; 1.638 + } 1.639 + 1.640 + /** 1.641 + * Returns the pattern index of the specified pattern "part". 1.642 + * Convenience method for getPart(partIndex).getIndex(). 1.643 + * @param partIndex The index of the Part data. (0..countParts()-1) 1.644 + * @return The pattern index of this Part. 1.645 + * @stable ICU 4.8 1.646 + */ 1.647 + int32_t getPatternIndex(int32_t partIndex) const { 1.648 + return getPart(partIndex).index; 1.649 + } 1.650 + 1.651 + /** 1.652 + * Returns the substring of the pattern string indicated by the Part. 1.653 + * Convenience method for getPatternString().substring(part.getIndex(), part.getLimit()). 1.654 + * @param part a part of this MessagePattern. 1.655 + * @return the substring associated with part. 1.656 + * @stable ICU 4.8 1.657 + */ 1.658 + UnicodeString getSubstring(const Part &part) const { 1.659 + return msg.tempSubString(part.index, part.length); 1.660 + } 1.661 + 1.662 + /** 1.663 + * Compares the part's substring with the input string s. 1.664 + * @param part a part of this MessagePattern. 1.665 + * @param s a string. 1.666 + * @return TRUE if getSubstring(part).equals(s). 1.667 + * @stable ICU 4.8 1.668 + */ 1.669 + UBool partSubstringMatches(const Part &part, const UnicodeString &s) const { 1.670 + return 0==msg.compare(part.index, part.length, s); 1.671 + } 1.672 + 1.673 + /** 1.674 + * Returns the numeric value associated with an ARG_INT or ARG_DOUBLE. 1.675 + * @param part a part of this MessagePattern. 1.676 + * @return the part's numeric value, or UMSGPAT_NO_NUMERIC_VALUE if this is not a numeric part. 1.677 + * @stable ICU 4.8 1.678 + */ 1.679 + double getNumericValue(const Part &part) const; 1.680 + 1.681 + /** 1.682 + * Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified. 1.683 + * @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1) 1.684 + * @return the "offset:" value. 1.685 + * @stable ICU 4.8 1.686 + */ 1.687 + double getPluralOffset(int32_t pluralStart) const; 1.688 + 1.689 + /** 1.690 + * Returns the index of the ARG|MSG_LIMIT part corresponding to the ARG|MSG_START at start. 1.691 + * @param start The index of some Part data (0..countParts()-1); 1.692 + * this Part should be of Type ARG_START or MSG_START. 1.693 + * @return The first i>start where getPart(i).getType()==ARG|MSG_LIMIT at the same nesting level, 1.694 + * or start itself if getPartType(msgStart)!=ARG|MSG_START. 1.695 + * @stable ICU 4.8 1.696 + */ 1.697 + int32_t getLimitPartIndex(int32_t start) const { 1.698 + int32_t limit=getPart(start).limitPartIndex; 1.699 + if(limit<start) { 1.700 + return start; 1.701 + } 1.702 + return limit; 1.703 + } 1.704 + 1.705 + /** 1.706 + * A message pattern "part", representing a pattern parsing event. 1.707 + * There is a part for the start and end of a message or argument, 1.708 + * for quoting and escaping of and with ASCII apostrophes, 1.709 + * and for syntax elements of "complex" arguments. 1.710 + * @stable ICU 4.8 1.711 + */ 1.712 + class Part : public UMemory { 1.713 + public: 1.714 + /** 1.715 + * Default constructor, do not use. 1.716 + * @internal 1.717 + */ 1.718 + Part() {} 1.719 + 1.720 + /** 1.721 + * Returns the type of this part. 1.722 + * @return the part type. 1.723 + * @stable ICU 4.8 1.724 + */ 1.725 + UMessagePatternPartType getType() const { 1.726 + return type; 1.727 + } 1.728 + 1.729 + /** 1.730 + * Returns the pattern string index associated with this Part. 1.731 + * @return this part's pattern string index. 1.732 + * @stable ICU 4.8 1.733 + */ 1.734 + int32_t getIndex() const { 1.735 + return index; 1.736 + } 1.737 + 1.738 + /** 1.739 + * Returns the length of the pattern substring associated with this Part. 1.740 + * This is 0 for some parts. 1.741 + * @return this part's pattern substring length. 1.742 + * @stable ICU 4.8 1.743 + */ 1.744 + int32_t getLength() const { 1.745 + return length; 1.746 + } 1.747 + 1.748 + /** 1.749 + * Returns the pattern string limit (exclusive-end) index associated with this Part. 1.750 + * Convenience method for getIndex()+getLength(). 1.751 + * @return this part's pattern string limit index, same as getIndex()+getLength(). 1.752 + * @stable ICU 4.8 1.753 + */ 1.754 + int32_t getLimit() const { 1.755 + return index+length; 1.756 + } 1.757 + 1.758 + /** 1.759 + * Returns a value associated with this part. 1.760 + * See the documentation of each part type for details. 1.761 + * @return the part value. 1.762 + * @stable ICU 4.8 1.763 + */ 1.764 + int32_t getValue() const { 1.765 + return value; 1.766 + } 1.767 + 1.768 + /** 1.769 + * Returns the argument type if this part is of type ARG_START or ARG_LIMIT, 1.770 + * otherwise UMSGPAT_ARG_TYPE_NONE. 1.771 + * @return the argument type for this part. 1.772 + * @stable ICU 4.8 1.773 + */ 1.774 + UMessagePatternArgType getArgType() const { 1.775 + UMessagePatternPartType type=getType(); 1.776 + if(type==UMSGPAT_PART_TYPE_ARG_START || type==UMSGPAT_PART_TYPE_ARG_LIMIT) { 1.777 + return (UMessagePatternArgType)value; 1.778 + } else { 1.779 + return UMSGPAT_ARG_TYPE_NONE; 1.780 + } 1.781 + } 1.782 + 1.783 + /** 1.784 + * Indicates whether the Part type has a numeric value. 1.785 + * If so, then that numeric value can be retrieved via MessagePattern.getNumericValue(). 1.786 + * @param type The Part type to be tested. 1.787 + * @return TRUE if the Part type has a numeric value. 1.788 + * @stable ICU 4.8 1.789 + */ 1.790 + static UBool hasNumericValue(UMessagePatternPartType type) { 1.791 + return type==UMSGPAT_PART_TYPE_ARG_INT || type==UMSGPAT_PART_TYPE_ARG_DOUBLE; 1.792 + } 1.793 + 1.794 + /** 1.795 + * @param other another object to compare with. 1.796 + * @return TRUE if this object is equivalent to the other one. 1.797 + * @stable ICU 4.8 1.798 + */ 1.799 + UBool operator==(const Part &other) const; 1.800 + 1.801 + /** 1.802 + * @param other another object to compare with. 1.803 + * @return FALSE if this object is equivalent to the other one. 1.804 + * @stable ICU 4.8 1.805 + */ 1.806 + inline UBool operator!=(const Part &other) const { 1.807 + return !operator==(other); 1.808 + } 1.809 + 1.810 + /** 1.811 + * @return A hash code for this object. 1.812 + * @stable ICU 4.8 1.813 + */ 1.814 + int32_t hashCode() const { 1.815 + return ((type*37+index)*37+length)*37+value; 1.816 + } 1.817 + 1.818 + private: 1.819 + friend class MessagePattern; 1.820 + 1.821 + static const int32_t MAX_LENGTH=0xffff; 1.822 + static const int32_t MAX_VALUE=0x7fff; 1.823 + 1.824 + // Some fields are not final because they are modified during pattern parsing. 1.825 + // After pattern parsing, the parts are effectively immutable. 1.826 + UMessagePatternPartType type; 1.827 + int32_t index; 1.828 + uint16_t length; 1.829 + int16_t value; 1.830 + int32_t limitPartIndex; 1.831 + }; 1.832 + 1.833 +private: 1.834 + void preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode); 1.835 + 1.836 + void postParse(); 1.837 + 1.838 + int32_t parseMessage(int32_t index, int32_t msgStartLength, 1.839 + int32_t nestingLevel, UMessagePatternArgType parentType, 1.840 + UParseError *parseError, UErrorCode &errorCode); 1.841 + 1.842 + int32_t parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel, 1.843 + UParseError *parseError, UErrorCode &errorCode); 1.844 + 1.845 + int32_t parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode); 1.846 + 1.847 + int32_t parseChoiceStyle(int32_t index, int32_t nestingLevel, 1.848 + UParseError *parseError, UErrorCode &errorCode); 1.849 + 1.850 + int32_t parsePluralOrSelectStyle(UMessagePatternArgType argType, int32_t index, int32_t nestingLevel, 1.851 + UParseError *parseError, UErrorCode &errorCode); 1.852 + 1.853 + /** 1.854 + * Validates and parses an argument name or argument number string. 1.855 + * This internal method assumes that the input substring is a "pattern identifier". 1.856 + * @return >=0 if the name is a valid number, 1.857 + * ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits, 1.858 + * ARG_NAME_NOT_VALID (-2) if it is neither. 1.859 + * @see #validateArgumentName(String) 1.860 + */ 1.861 + static int32_t parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit); 1.862 + 1.863 + int32_t parseArgNumber(int32_t start, int32_t limit) { 1.864 + return parseArgNumber(msg, start, limit); 1.865 + } 1.866 + 1.867 + /** 1.868 + * Parses a number from the specified message substring. 1.869 + * @param start start index into the message string 1.870 + * @param limit limit index into the message string, must be start<limit 1.871 + * @param allowInfinity TRUE if U+221E is allowed (for ChoiceFormat) 1.872 + * @param parseError 1.873 + * @param errorCode 1.874 + */ 1.875 + void parseDouble(int32_t start, int32_t limit, UBool allowInfinity, 1.876 + UParseError *parseError, UErrorCode &errorCode); 1.877 + 1.878 + // Java has package-private appendReducedApostrophes() here. 1.879 + // In C++, this is declared in the MessageImpl class. 1.880 + 1.881 + int32_t skipWhiteSpace(int32_t index); 1.882 + 1.883 + int32_t skipIdentifier(int32_t index); 1.884 + 1.885 + /** 1.886 + * Skips a sequence of characters that could occur in a double value. 1.887 + * Does not fully parse or validate the value. 1.888 + */ 1.889 + int32_t skipDouble(int32_t index); 1.890 + 1.891 + static UBool isArgTypeChar(UChar32 c); 1.892 + 1.893 + UBool isChoice(int32_t index); 1.894 + 1.895 + UBool isPlural(int32_t index); 1.896 + 1.897 + UBool isSelect(int32_t index); 1.898 + 1.899 + UBool isOrdinal(int32_t index); 1.900 + 1.901 + /** 1.902 + * @return TRUE if we are inside a MessageFormat (sub-)pattern, 1.903 + * as opposed to inside a top-level choice/plural/select pattern. 1.904 + */ 1.905 + UBool inMessageFormatPattern(int32_t nestingLevel); 1.906 + 1.907 + /** 1.908 + * @return TRUE if we are in a MessageFormat sub-pattern 1.909 + * of a top-level ChoiceFormat pattern. 1.910 + */ 1.911 + UBool inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType); 1.912 + 1.913 + void addPart(UMessagePatternPartType type, int32_t index, int32_t length, 1.914 + int32_t value, UErrorCode &errorCode); 1.915 + 1.916 + void addLimitPart(int32_t start, 1.917 + UMessagePatternPartType type, int32_t index, int32_t length, 1.918 + int32_t value, UErrorCode &errorCode); 1.919 + 1.920 + void addArgDoublePart(double numericValue, int32_t start, int32_t length, UErrorCode &errorCode); 1.921 + 1.922 + void setParseError(UParseError *parseError, int32_t index); 1.923 + 1.924 + UBool init(UErrorCode &errorCode); 1.925 + UBool copyStorage(const MessagePattern &other, UErrorCode &errorCode); 1.926 + 1.927 + UMessagePatternApostropheMode aposMode; 1.928 + UnicodeString msg; 1.929 + // ArrayList<Part> parts=new ArrayList<Part>(); 1.930 + MessagePatternPartsList *partsList; 1.931 + Part *parts; 1.932 + int32_t partsLength; 1.933 + // ArrayList<Double> numericValues; 1.934 + MessagePatternDoubleList *numericValuesList; 1.935 + double *numericValues; 1.936 + int32_t numericValuesLength; 1.937 + UBool hasArgNames; 1.938 + UBool hasArgNumbers; 1.939 + UBool needsAutoQuoting; 1.940 +}; 1.941 + 1.942 +U_NAMESPACE_END 1.943 + 1.944 +#endif // !UCONFIG_NO_FORMATTING 1.945 + 1.946 +#endif // __MESSAGEPATTERN_H__