intl/icu/source/common/unicode/messagepattern.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/common/unicode/messagepattern.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,943 @@
     1.4 +/*
     1.5 +*******************************************************************************
     1.6 +*   Copyright (C) 2011-2013, International Business Machines
     1.7 +*   Corporation and others.  All Rights Reserved.
     1.8 +*******************************************************************************
     1.9 +*   file name:  messagepattern.h
    1.10 +*   encoding:   US-ASCII
    1.11 +*   tab size:   8 (not used)
    1.12 +*   indentation:4
    1.13 +*
    1.14 +*   created on: 2011mar14
    1.15 +*   created by: Markus W. Scherer
    1.16 +*/
    1.17 +
    1.18 +#ifndef __MESSAGEPATTERN_H__
    1.19 +#define __MESSAGEPATTERN_H__
    1.20 +
    1.21 +/**
    1.22 + * \file
    1.23 + * \brief C++ API: MessagePattern class: Parses and represents ICU MessageFormat patterns.
    1.24 + */
    1.25 +
    1.26 +#include "unicode/utypes.h"
    1.27 +
    1.28 +#if !UCONFIG_NO_FORMATTING
    1.29 +
    1.30 +#include "unicode/parseerr.h"
    1.31 +#include "unicode/unistr.h"
    1.32 +
    1.33 +/**
    1.34 + * Mode for when an apostrophe starts quoted literal text for MessageFormat output.
    1.35 + * The default is DOUBLE_OPTIONAL unless overridden via uconfig.h
    1.36 + * (UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE).
    1.37 + * <p>
    1.38 + * A pair of adjacent apostrophes always results in a single apostrophe in the output,
    1.39 + * even when the pair is between two single, text-quoting apostrophes.
    1.40 + * <p>
    1.41 + * The following table shows examples of desired MessageFormat.format() output
    1.42 + * with the pattern strings that yield that output.
    1.43 + * <p>
    1.44 + * <table>
    1.45 + *   <tr>
    1.46 + *     <th>Desired output</th>
    1.47 + *     <th>DOUBLE_OPTIONAL</th>
    1.48 + *     <th>DOUBLE_REQUIRED</th>
    1.49 + *   </tr>
    1.50 + *   <tr>
    1.51 + *     <td>I see {many}</td>
    1.52 + *     <td>I see '{many}'</td>
    1.53 + *     <td>(same)</td>
    1.54 + *   </tr>
    1.55 + *   <tr>
    1.56 + *     <td>I said {'Wow!'}</td>
    1.57 + *     <td>I said '{''Wow!''}'</td>
    1.58 + *     <td>(same)</td>
    1.59 + *   </tr>
    1.60 + *   <tr>
    1.61 + *     <td>I don't know</td>
    1.62 + *     <td>I don't know OR<br> I don''t know</td>
    1.63 + *     <td>I don''t know</td>
    1.64 + *   </tr>
    1.65 + * </table>
    1.66 + * @stable ICU 4.8
    1.67 + * @see UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE
    1.68 + */
    1.69 +enum UMessagePatternApostropheMode {
    1.70 +    /**
    1.71 +     * A literal apostrophe is represented by
    1.72 +     * either a single or a double apostrophe pattern character.
    1.73 +     * Within a MessageFormat pattern, a single apostrophe only starts quoted literal text
    1.74 +     * if it immediately precedes a curly brace {},
    1.75 +     * or a pipe symbol | if inside a choice format,
    1.76 +     * or a pound symbol # if inside a plural format.
    1.77 +     * <p>
    1.78 +     * This is the default behavior starting with ICU 4.8.
    1.79 +     * @stable ICU 4.8
    1.80 +     */
    1.81 +    UMSGPAT_APOS_DOUBLE_OPTIONAL,
    1.82 +    /**
    1.83 +     * A literal apostrophe must be represented by
    1.84 +     * a double apostrophe pattern character.
    1.85 +     * A single apostrophe always starts quoted literal text.
    1.86 +     * <p>
    1.87 +     * This is the behavior of ICU 4.6 and earlier, and of the JDK.
    1.88 +     * @stable ICU 4.8
    1.89 +     */
    1.90 +    UMSGPAT_APOS_DOUBLE_REQUIRED
    1.91 +};
    1.92 +/**
    1.93 + * @stable ICU 4.8
    1.94 + */
    1.95 +typedef enum UMessagePatternApostropheMode UMessagePatternApostropheMode;
    1.96 +
    1.97 +/**
    1.98 + * MessagePattern::Part type constants.
    1.99 + * @stable ICU 4.8
   1.100 + */
   1.101 +enum UMessagePatternPartType {
   1.102 +    /**
   1.103 +     * Start of a message pattern (main or nested).
   1.104 +     * The length is 0 for the top-level message
   1.105 +     * and for a choice argument sub-message, otherwise 1 for the '{'.
   1.106 +     * The value indicates the nesting level, starting with 0 for the main message.
   1.107 +     * <p>
   1.108 +     * There is always a later MSG_LIMIT part.
   1.109 +     * @stable ICU 4.8
   1.110 +     */
   1.111 +    UMSGPAT_PART_TYPE_MSG_START,
   1.112 +    /**
   1.113 +     * End of a message pattern (main or nested).
   1.114 +     * The length is 0 for the top-level message and
   1.115 +     * the last sub-message of a choice argument,
   1.116 +     * otherwise 1 for the '}' or (in a choice argument style) the '|'.
   1.117 +     * The value indicates the nesting level, starting with 0 for the main message.
   1.118 +     * @stable ICU 4.8
   1.119 +     */
   1.120 +    UMSGPAT_PART_TYPE_MSG_LIMIT,
   1.121 +    /**
   1.122 +     * Indicates a substring of the pattern string which is to be skipped when formatting.
   1.123 +     * For example, an apostrophe that begins or ends quoted text
   1.124 +     * would be indicated with such a part.
   1.125 +     * The value is undefined and currently always 0.
   1.126 +     * @stable ICU 4.8
   1.127 +     */
   1.128 +    UMSGPAT_PART_TYPE_SKIP_SYNTAX,
   1.129 +    /**
   1.130 +     * Indicates that a syntax character needs to be inserted for auto-quoting.
   1.131 +     * The length is 0.
   1.132 +     * The value is the character code of the insertion character. (U+0027=APOSTROPHE)
   1.133 +     * @stable ICU 4.8
   1.134 +     */
   1.135 +    UMSGPAT_PART_TYPE_INSERT_CHAR,
   1.136 +    /**
   1.137 +     * Indicates a syntactic (non-escaped) # symbol in a plural variant.
   1.138 +     * When formatting, replace this part's substring with the
   1.139 +     * (value-offset) for the plural argument value.
   1.140 +     * The value is undefined and currently always 0.
   1.141 +     * @stable ICU 4.8
   1.142 +     */
   1.143 +    UMSGPAT_PART_TYPE_REPLACE_NUMBER,
   1.144 +    /**
   1.145 +     * Start of an argument.
   1.146 +     * The length is 1 for the '{'.
   1.147 +     * The value is the ordinal value of the ArgType. Use getArgType().
   1.148 +     * <p>
   1.149 +     * This part is followed by either an ARG_NUMBER or ARG_NAME,
   1.150 +     * followed by optional argument sub-parts (see UMessagePatternArgType constants)
   1.151 +     * and finally an ARG_LIMIT part.
   1.152 +     * @stable ICU 4.8
   1.153 +     */
   1.154 +    UMSGPAT_PART_TYPE_ARG_START,
   1.155 +    /**
   1.156 +     * End of an argument.
   1.157 +     * The length is 1 for the '}'.
   1.158 +     * The value is the ordinal value of the ArgType. Use getArgType().
   1.159 +     * @stable ICU 4.8
   1.160 +     */
   1.161 +    UMSGPAT_PART_TYPE_ARG_LIMIT,
   1.162 +    /**
   1.163 +     * The argument number, provided by the value.
   1.164 +     * @stable ICU 4.8
   1.165 +     */
   1.166 +    UMSGPAT_PART_TYPE_ARG_NUMBER,
   1.167 +    /**
   1.168 +     * The argument name.
   1.169 +     * The value is undefined and currently always 0.
   1.170 +     * @stable ICU 4.8
   1.171 +     */
   1.172 +    UMSGPAT_PART_TYPE_ARG_NAME,
   1.173 +    /**
   1.174 +     * The argument type.
   1.175 +     * The value is undefined and currently always 0.
   1.176 +     * @stable ICU 4.8
   1.177 +     */
   1.178 +    UMSGPAT_PART_TYPE_ARG_TYPE,
   1.179 +    /**
   1.180 +     * The argument style text.
   1.181 +     * The value is undefined and currently always 0.
   1.182 +     * @stable ICU 4.8
   1.183 +     */
   1.184 +    UMSGPAT_PART_TYPE_ARG_STYLE,
   1.185 +    /**
   1.186 +     * A selector substring in a "complex" argument style.
   1.187 +     * The value is undefined and currently always 0.
   1.188 +     * @stable ICU 4.8
   1.189 +     */
   1.190 +    UMSGPAT_PART_TYPE_ARG_SELECTOR,
   1.191 +    /**
   1.192 +     * An integer value, for example the offset or an explicit selector value
   1.193 +     * in a PluralFormat style.
   1.194 +     * The part value is the integer value.
   1.195 +     * @stable ICU 4.8
   1.196 +     */
   1.197 +    UMSGPAT_PART_TYPE_ARG_INT,
   1.198 +    /**
   1.199 +     * A numeric value, for example the offset or an explicit selector value
   1.200 +     * in a PluralFormat style.
   1.201 +     * The part value is an index into an internal array of numeric values;
   1.202 +     * use getNumericValue().
   1.203 +     * @stable ICU 4.8
   1.204 +     */
   1.205 +    UMSGPAT_PART_TYPE_ARG_DOUBLE
   1.206 +};
   1.207 +/**
   1.208 + * @stable ICU 4.8
   1.209 + */
   1.210 +typedef enum UMessagePatternPartType UMessagePatternPartType;
   1.211 +
   1.212 +/**
   1.213 + * Argument type constants.
   1.214 + * Returned by Part.getArgType() for ARG_START and ARG_LIMIT parts.
   1.215 + *
   1.216 + * Messages nested inside an argument are each delimited by MSG_START and MSG_LIMIT,
   1.217 + * with a nesting level one greater than the surrounding message.
   1.218 + * @stable ICU 4.8
   1.219 + */
   1.220 +enum UMessagePatternArgType {
   1.221 +    /**
   1.222 +     * The argument has no specified type.
   1.223 +     * @stable ICU 4.8
   1.224 +     */
   1.225 +    UMSGPAT_ARG_TYPE_NONE,
   1.226 +    /**
   1.227 +     * The argument has a "simple" type which is provided by the ARG_TYPE part.
   1.228 +     * An ARG_STYLE part might follow that.
   1.229 +     * @stable ICU 4.8
   1.230 +     */
   1.231 +    UMSGPAT_ARG_TYPE_SIMPLE,
   1.232 +    /**
   1.233 +     * The argument is a ChoiceFormat with one or more
   1.234 +     * ((ARG_INT | ARG_DOUBLE), ARG_SELECTOR, message) tuples.
   1.235 +     * @stable ICU 4.8
   1.236 +     */
   1.237 +    UMSGPAT_ARG_TYPE_CHOICE,
   1.238 +    /**
   1.239 +     * The argument is a cardinal-number PluralFormat with an optional ARG_INT or ARG_DOUBLE offset
   1.240 +     * (e.g., offset:1)
   1.241 +     * and one or more (ARG_SELECTOR [explicit-value] message) tuples.
   1.242 +     * If the selector has an explicit value (e.g., =2), then
   1.243 +     * that value is provided by the ARG_INT or ARG_DOUBLE part preceding the message.
   1.244 +     * Otherwise the message immediately follows the ARG_SELECTOR.
   1.245 +     * @stable ICU 4.8
   1.246 +     */
   1.247 +    UMSGPAT_ARG_TYPE_PLURAL,
   1.248 +    /**
   1.249 +     * The argument is a SelectFormat with one or more (ARG_SELECTOR, message) pairs.
   1.250 +     * @stable ICU 4.8
   1.251 +     */
   1.252 +    UMSGPAT_ARG_TYPE_SELECT,
   1.253 +    /**
   1.254 +     * The argument is an ordinal-number PluralFormat
   1.255 +     * with the same style parts sequence and semantics as UMSGPAT_ARG_TYPE_PLURAL.
   1.256 +     * @stable ICU 50
   1.257 +     */
   1.258 +    UMSGPAT_ARG_TYPE_SELECTORDINAL
   1.259 +};
   1.260 +/**
   1.261 + * @stable ICU 4.8
   1.262 + */
   1.263 +typedef enum UMessagePatternArgType UMessagePatternArgType;
   1.264 +
   1.265 +/**
   1.266 + * \def UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE
   1.267 + * Returns TRUE if the argument type has a plural style part sequence and semantics,
   1.268 + * for example UMSGPAT_ARG_TYPE_PLURAL and UMSGPAT_ARG_TYPE_SELECTORDINAL.
   1.269 + * @stable ICU 50
   1.270 + */
   1.271 +#define UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) \
   1.272 +    ((argType)==UMSGPAT_ARG_TYPE_PLURAL || (argType)==UMSGPAT_ARG_TYPE_SELECTORDINAL)
   1.273 +
   1.274 +enum {
   1.275 +    /**
   1.276 +     * Return value from MessagePattern.validateArgumentName() for when
   1.277 +     * the string is a valid "pattern identifier" but not a number.
   1.278 +     * @stable ICU 4.8
   1.279 +     */
   1.280 +    UMSGPAT_ARG_NAME_NOT_NUMBER=-1,
   1.281 +
   1.282 +    /**
   1.283 +     * Return value from MessagePattern.validateArgumentName() for when
   1.284 +     * the string is invalid.
   1.285 +     * It might not be a valid "pattern identifier",
   1.286 +     * or it have only ASCII digits but there is a leading zero or the number is too large.
   1.287 +     * @stable ICU 4.8
   1.288 +     */
   1.289 +    UMSGPAT_ARG_NAME_NOT_VALID=-2
   1.290 +};
   1.291 +
   1.292 +/**
   1.293 + * Special value that is returned by getNumericValue(Part) when no
   1.294 + * numeric value is defined for a part.
   1.295 + * @see MessagePattern.getNumericValue()
   1.296 + * @stable ICU 4.8
   1.297 + */
   1.298 +#define UMSGPAT_NO_NUMERIC_VALUE ((double)(-123456789))
   1.299 +
   1.300 +U_NAMESPACE_BEGIN
   1.301 +
   1.302 +class MessagePatternDoubleList;
   1.303 +class MessagePatternPartsList;
   1.304 +
   1.305 +/**
   1.306 + * Parses and represents ICU MessageFormat patterns.
   1.307 + * Also handles patterns for ChoiceFormat, PluralFormat and SelectFormat.
   1.308 + * Used in the implementations of those classes as well as in tools
   1.309 + * for message validation, translation and format conversion.
   1.310 + * <p>
   1.311 + * The parser handles all syntax relevant for identifying message arguments.
   1.312 + * This includes "complex" arguments whose style strings contain
   1.313 + * nested MessageFormat pattern substrings.
   1.314 + * For "simple" arguments (with no nested MessageFormat pattern substrings),
   1.315 + * the argument style is not parsed any further.
   1.316 + * <p>
   1.317 + * The parser handles named and numbered message arguments and allows both in one message.
   1.318 + * <p>
   1.319 + * Once a pattern has been parsed successfully, iterate through the parsed data
   1.320 + * with countParts(), getPart() and related methods.
   1.321 + * <p>
   1.322 + * The data logically represents a parse tree, but is stored and accessed
   1.323 + * as a list of "parts" for fast and simple parsing and to minimize object allocations.
   1.324 + * Arguments and nested messages are best handled via recursion.
   1.325 + * For every _START "part", MessagePattern.getLimitPartIndex() efficiently returns
   1.326 + * the index of the corresponding _LIMIT "part".
   1.327 + * <p>
   1.328 + * List of "parts":
   1.329 + * <pre>
   1.330 + * message = MSG_START (SKIP_SYNTAX | INSERT_CHAR | REPLACE_NUMBER | argument)* MSG_LIMIT
   1.331 + * argument = noneArg | simpleArg | complexArg
   1.332 + * complexArg = choiceArg | pluralArg | selectArg
   1.333 + *
   1.334 + * noneArg = ARG_START.NONE (ARG_NAME | ARG_NUMBER) ARG_LIMIT.NONE
   1.335 + * simpleArg = ARG_START.SIMPLE (ARG_NAME | ARG_NUMBER) ARG_TYPE [ARG_STYLE] ARG_LIMIT.SIMPLE
   1.336 + * choiceArg = ARG_START.CHOICE (ARG_NAME | ARG_NUMBER) choiceStyle ARG_LIMIT.CHOICE
   1.337 + * pluralArg = ARG_START.PLURAL (ARG_NAME | ARG_NUMBER) pluralStyle ARG_LIMIT.PLURAL
   1.338 + * selectArg = ARG_START.SELECT (ARG_NAME | ARG_NUMBER) selectStyle ARG_LIMIT.SELECT
   1.339 + *
   1.340 + * choiceStyle = ((ARG_INT | ARG_DOUBLE) ARG_SELECTOR message)+
   1.341 + * pluralStyle = [ARG_INT | ARG_DOUBLE] (ARG_SELECTOR [ARG_INT | ARG_DOUBLE] message)+
   1.342 + * selectStyle = (ARG_SELECTOR message)+
   1.343 + * </pre>
   1.344 + * <ul>
   1.345 + *   <li>Literal output text is not represented directly by "parts" but accessed
   1.346 + *       between parts of a message, from one part's getLimit() to the next part's getIndex().
   1.347 + *   <li><code>ARG_START.CHOICE</code> stands for an ARG_START Part with ArgType CHOICE.
   1.348 + *   <li>In the choiceStyle, the ARG_SELECTOR has the '<', the '#' or
   1.349 + *       the less-than-or-equal-to sign (U+2264).
   1.350 + *   <li>In the pluralStyle, the first, optional numeric Part has the "offset:" value.
   1.351 + *       The optional numeric Part between each (ARG_SELECTOR, message) pair
   1.352 + *       is the value of an explicit-number selector like "=2",
   1.353 + *       otherwise the selector is a non-numeric identifier.
   1.354 + *   <li>The REPLACE_NUMBER Part can occur only in an immediate sub-message of the pluralStyle.
   1.355 + * </ul>
   1.356 + * <p>
   1.357 + * This class is not intended for public subclassing.
   1.358 + *
   1.359 + * @stable ICU 4.8
   1.360 + */
   1.361 +class U_COMMON_API MessagePattern : public UObject {
   1.362 +public:
   1.363 +    /**
   1.364 +     * Constructs an empty MessagePattern with default UMessagePatternApostropheMode.
   1.365 +     * @param errorCode Standard ICU error code. Its input value must
   1.366 +     *                  pass the U_SUCCESS() test, or else the function returns
   1.367 +     *                  immediately. Check for U_FAILURE() on output or use with
   1.368 +     *                  function chaining. (See User Guide for details.)
   1.369 +     * @stable ICU 4.8
   1.370 +     */
   1.371 +    MessagePattern(UErrorCode &errorCode);
   1.372 +
   1.373 +    /**
   1.374 +     * Constructs an empty MessagePattern.
   1.375 +     * @param mode Explicit UMessagePatternApostropheMode.
   1.376 +     * @param errorCode Standard ICU error code. Its input value must
   1.377 +     *                  pass the U_SUCCESS() test, or else the function returns
   1.378 +     *                  immediately. Check for U_FAILURE() on output or use with
   1.379 +     *                  function chaining. (See User Guide for details.)
   1.380 +     * @stable ICU 4.8
   1.381 +     */
   1.382 +    MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode);
   1.383 +
   1.384 +    /**
   1.385 +     * Constructs a MessagePattern with default UMessagePatternApostropheMode and
   1.386 +     * parses the MessageFormat pattern string.
   1.387 +     * @param pattern a MessageFormat pattern string
   1.388 +     * @param parseError Struct to receive information on the position
   1.389 +     *                   of an error within the pattern.
   1.390 +     *                   Can be NULL.
   1.391 +     * @param errorCode Standard ICU error code. Its input value must
   1.392 +     *                  pass the U_SUCCESS() test, or else the function returns
   1.393 +     *                  immediately. Check for U_FAILURE() on output or use with
   1.394 +     *                  function chaining. (See User Guide for details.)
   1.395 +     * TODO: turn @throws into UErrorCode specifics?
   1.396 +     * @throws IllegalArgumentException for syntax errors in the pattern string
   1.397 +     * @throws IndexOutOfBoundsException if certain limits are exceeded
   1.398 +     *         (e.g., argument number too high, argument name too long, etc.)
   1.399 +     * @throws NumberFormatException if a number could not be parsed
   1.400 +     * @stable ICU 4.8
   1.401 +     */
   1.402 +    MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
   1.403 +
   1.404 +    /**
   1.405 +     * Copy constructor.
   1.406 +     * @param other Object to copy.
   1.407 +     * @stable ICU 4.8
   1.408 +     */
   1.409 +    MessagePattern(const MessagePattern &other);
   1.410 +
   1.411 +    /**
   1.412 +     * Assignment operator.
   1.413 +     * @param other Object to copy.
   1.414 +     * @return *this=other
   1.415 +     * @stable ICU 4.8
   1.416 +     */
   1.417 +    MessagePattern &operator=(const MessagePattern &other);
   1.418 +
   1.419 +    /**
   1.420 +     * Destructor.
   1.421 +     * @stable ICU 4.8
   1.422 +     */
   1.423 +    virtual ~MessagePattern();
   1.424 +
   1.425 +    /**
   1.426 +     * Parses a MessageFormat pattern string.
   1.427 +     * @param pattern a MessageFormat pattern string
   1.428 +     * @param parseError Struct to receive information on the position
   1.429 +     *                   of an error within the pattern.
   1.430 +     *                   Can be NULL.
   1.431 +     * @param errorCode Standard ICU error code. Its input value must
   1.432 +     *                  pass the U_SUCCESS() test, or else the function returns
   1.433 +     *                  immediately. Check for U_FAILURE() on output or use with
   1.434 +     *                  function chaining. (See User Guide for details.)
   1.435 +     * @return *this
   1.436 +     * @throws IllegalArgumentException for syntax errors in the pattern string
   1.437 +     * @throws IndexOutOfBoundsException if certain limits are exceeded
   1.438 +     *         (e.g., argument number too high, argument name too long, etc.)
   1.439 +     * @throws NumberFormatException if a number could not be parsed
   1.440 +     * @stable ICU 4.8
   1.441 +     */
   1.442 +    MessagePattern &parse(const UnicodeString &pattern,
   1.443 +                          UParseError *parseError, UErrorCode &errorCode);
   1.444 +
   1.445 +    /**
   1.446 +     * Parses a ChoiceFormat pattern string.
   1.447 +     * @param pattern a ChoiceFormat pattern string
   1.448 +     * @param parseError Struct to receive information on the position
   1.449 +     *                   of an error within the pattern.
   1.450 +     *                   Can be NULL.
   1.451 +     * @param errorCode Standard ICU error code. Its input value must
   1.452 +     *                  pass the U_SUCCESS() test, or else the function returns
   1.453 +     *                  immediately. Check for U_FAILURE() on output or use with
   1.454 +     *                  function chaining. (See User Guide for details.)
   1.455 +     * @return *this
   1.456 +     * @throws IllegalArgumentException for syntax errors in the pattern string
   1.457 +     * @throws IndexOutOfBoundsException if certain limits are exceeded
   1.458 +     *         (e.g., argument number too high, argument name too long, etc.)
   1.459 +     * @throws NumberFormatException if a number could not be parsed
   1.460 +     * @stable ICU 4.8
   1.461 +     */
   1.462 +    MessagePattern &parseChoiceStyle(const UnicodeString &pattern,
   1.463 +                                     UParseError *parseError, UErrorCode &errorCode);
   1.464 +
   1.465 +    /**
   1.466 +     * Parses a PluralFormat pattern string.
   1.467 +     * @param pattern a PluralFormat pattern string
   1.468 +     * @param parseError Struct to receive information on the position
   1.469 +     *                   of an error within the pattern.
   1.470 +     *                   Can be NULL.
   1.471 +     * @param errorCode Standard ICU error code. Its input value must
   1.472 +     *                  pass the U_SUCCESS() test, or else the function returns
   1.473 +     *                  immediately. Check for U_FAILURE() on output or use with
   1.474 +     *                  function chaining. (See User Guide for details.)
   1.475 +     * @return *this
   1.476 +     * @throws IllegalArgumentException for syntax errors in the pattern string
   1.477 +     * @throws IndexOutOfBoundsException if certain limits are exceeded
   1.478 +     *         (e.g., argument number too high, argument name too long, etc.)
   1.479 +     * @throws NumberFormatException if a number could not be parsed
   1.480 +     * @stable ICU 4.8
   1.481 +     */
   1.482 +    MessagePattern &parsePluralStyle(const UnicodeString &pattern,
   1.483 +                                     UParseError *parseError, UErrorCode &errorCode);
   1.484 +
   1.485 +    /**
   1.486 +     * Parses a SelectFormat pattern string.
   1.487 +     * @param pattern a SelectFormat pattern string
   1.488 +     * @param parseError Struct to receive information on the position
   1.489 +     *                   of an error within the pattern.
   1.490 +     *                   Can be NULL.
   1.491 +     * @param errorCode Standard ICU error code. Its input value must
   1.492 +     *                  pass the U_SUCCESS() test, or else the function returns
   1.493 +     *                  immediately. Check for U_FAILURE() on output or use with
   1.494 +     *                  function chaining. (See User Guide for details.)
   1.495 +     * @return *this
   1.496 +     * @throws IllegalArgumentException for syntax errors in the pattern string
   1.497 +     * @throws IndexOutOfBoundsException if certain limits are exceeded
   1.498 +     *         (e.g., argument number too high, argument name too long, etc.)
   1.499 +     * @throws NumberFormatException if a number could not be parsed
   1.500 +     * @stable ICU 4.8
   1.501 +     */
   1.502 +    MessagePattern &parseSelectStyle(const UnicodeString &pattern,
   1.503 +                                     UParseError *parseError, UErrorCode &errorCode);
   1.504 +
   1.505 +    /**
   1.506 +     * Clears this MessagePattern.
   1.507 +     * countParts() will return 0.
   1.508 +     * @stable ICU 4.8
   1.509 +     */
   1.510 +    void clear();
   1.511 +
   1.512 +    /**
   1.513 +     * Clears this MessagePattern and sets the UMessagePatternApostropheMode.
   1.514 +     * countParts() will return 0.
   1.515 +     * @param mode The new UMessagePatternApostropheMode.
   1.516 +     * @stable ICU 4.8
   1.517 +     */
   1.518 +    void clearPatternAndSetApostropheMode(UMessagePatternApostropheMode mode) {
   1.519 +        clear();
   1.520 +        aposMode=mode;
   1.521 +    }
   1.522 +
   1.523 +    /**
   1.524 +     * @param other another object to compare with.
   1.525 +     * @return TRUE if this object is equivalent to the other one.
   1.526 +     * @stable ICU 4.8
   1.527 +     */
   1.528 +    UBool operator==(const MessagePattern &other) const;
   1.529 +
   1.530 +    /**
   1.531 +     * @param other another object to compare with.
   1.532 +     * @return FALSE if this object is equivalent to the other one.
   1.533 +     * @stable ICU 4.8
   1.534 +     */
   1.535 +    inline UBool operator!=(const MessagePattern &other) const {
   1.536 +        return !operator==(other);
   1.537 +    }
   1.538 +
   1.539 +    /**
   1.540 +     * @return A hash code for this object.
   1.541 +     * @stable ICU 4.8
   1.542 +     */
   1.543 +    int32_t hashCode() const;
   1.544 +
   1.545 +    /**
   1.546 +     * @return this instance's UMessagePatternApostropheMode.
   1.547 +     * @stable ICU 4.8
   1.548 +     */
   1.549 +    UMessagePatternApostropheMode getApostropheMode() const {
   1.550 +        return aposMode;
   1.551 +    }
   1.552 +
   1.553 +    // Java has package-private jdkAposMode() here.
   1.554 +    // In C++, this is declared in the MessageImpl class.
   1.555 +
   1.556 +    /**
   1.557 +     * @return the parsed pattern string (null if none was parsed).
   1.558 +     * @stable ICU 4.8
   1.559 +     */
   1.560 +    const UnicodeString &getPatternString() const {
   1.561 +        return msg;
   1.562 +    }
   1.563 +
   1.564 +    /**
   1.565 +     * Does the parsed pattern have named arguments like {first_name}?
   1.566 +     * @return TRUE if the parsed pattern has at least one named argument.
   1.567 +     * @stable ICU 4.8
   1.568 +     */
   1.569 +    UBool hasNamedArguments() const {
   1.570 +        return hasArgNames;
   1.571 +    }
   1.572 +
   1.573 +    /**
   1.574 +     * Does the parsed pattern have numbered arguments like {2}?
   1.575 +     * @return TRUE if the parsed pattern has at least one numbered argument.
   1.576 +     * @stable ICU 4.8
   1.577 +     */
   1.578 +    UBool hasNumberedArguments() const {
   1.579 +        return hasArgNumbers;
   1.580 +    }
   1.581 +
   1.582 +    /**
   1.583 +     * Validates and parses an argument name or argument number string.
   1.584 +     * An argument name must be a "pattern identifier", that is, it must contain
   1.585 +     * no Unicode Pattern_Syntax or Pattern_White_Space characters.
   1.586 +     * If it only contains ASCII digits, then it must be a small integer with no leading zero.
   1.587 +     * @param name Input string.
   1.588 +     * @return &gt;=0 if the name is a valid number,
   1.589 +     *         ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
   1.590 +     *         ARG_NAME_NOT_VALID (-2) if it is neither.
   1.591 +     * @stable ICU 4.8
   1.592 +     */
   1.593 +    static int32_t validateArgumentName(const UnicodeString &name);
   1.594 +
   1.595 +    /**
   1.596 +     * Returns a version of the parsed pattern string where each ASCII apostrophe
   1.597 +     * is doubled (escaped) if it is not already, and if it is not interpreted as quoting syntax.
   1.598 +     * <p>
   1.599 +     * For example, this turns "I don't '{know}' {gender,select,female{h''er}other{h'im}}."
   1.600 +     * into "I don''t '{know}' {gender,select,female{h''er}other{h''im}}."
   1.601 +     * @return the deep-auto-quoted version of the parsed pattern string.
   1.602 +     * @see MessageFormat.autoQuoteApostrophe()
   1.603 +     * @stable ICU 4.8
   1.604 +     */
   1.605 +    UnicodeString autoQuoteApostropheDeep() const;
   1.606 +
   1.607 +    class Part;
   1.608 +
   1.609 +    /**
   1.610 +     * Returns the number of "parts" created by parsing the pattern string.
   1.611 +     * Returns 0 if no pattern has been parsed or clear() was called.
   1.612 +     * @return the number of pattern parts.
   1.613 +     * @stable ICU 4.8
   1.614 +     */
   1.615 +    int32_t countParts() const {
   1.616 +        return partsLength;
   1.617 +    }
   1.618 +
   1.619 +    /**
   1.620 +     * Gets the i-th pattern "part".
   1.621 +     * @param i The index of the Part data. (0..countParts()-1)
   1.622 +     * @return the i-th pattern "part".
   1.623 +     * @stable ICU 4.8
   1.624 +     */
   1.625 +    const Part &getPart(int32_t i) const {
   1.626 +        return parts[i];
   1.627 +    }
   1.628 +
   1.629 +    /**
   1.630 +     * Returns the UMessagePatternPartType of the i-th pattern "part".
   1.631 +     * Convenience method for getPart(i).getType().
   1.632 +     * @param i The index of the Part data. (0..countParts()-1)
   1.633 +     * @return The UMessagePatternPartType of the i-th Part.
   1.634 +     * @stable ICU 4.8
   1.635 +     */
   1.636 +    UMessagePatternPartType getPartType(int32_t i) const {
   1.637 +        return getPart(i).type;
   1.638 +    }
   1.639 +
   1.640 +    /**
   1.641 +     * Returns the pattern index of the specified pattern "part".
   1.642 +     * Convenience method for getPart(partIndex).getIndex().
   1.643 +     * @param partIndex The index of the Part data. (0..countParts()-1)
   1.644 +     * @return The pattern index of this Part.
   1.645 +     * @stable ICU 4.8
   1.646 +     */
   1.647 +    int32_t getPatternIndex(int32_t partIndex) const {
   1.648 +        return getPart(partIndex).index;
   1.649 +    }
   1.650 +
   1.651 +    /**
   1.652 +     * Returns the substring of the pattern string indicated by the Part.
   1.653 +     * Convenience method for getPatternString().substring(part.getIndex(), part.getLimit()).
   1.654 +     * @param part a part of this MessagePattern.
   1.655 +     * @return the substring associated with part.
   1.656 +     * @stable ICU 4.8
   1.657 +     */
   1.658 +    UnicodeString getSubstring(const Part &part) const {
   1.659 +        return msg.tempSubString(part.index, part.length);
   1.660 +    }
   1.661 +
   1.662 +    /**
   1.663 +     * Compares the part's substring with the input string s.
   1.664 +     * @param part a part of this MessagePattern.
   1.665 +     * @param s a string.
   1.666 +     * @return TRUE if getSubstring(part).equals(s).
   1.667 +     * @stable ICU 4.8
   1.668 +     */
   1.669 +    UBool partSubstringMatches(const Part &part, const UnicodeString &s) const {
   1.670 +        return 0==msg.compare(part.index, part.length, s);
   1.671 +    }
   1.672 +
   1.673 +    /**
   1.674 +     * Returns the numeric value associated with an ARG_INT or ARG_DOUBLE.
   1.675 +     * @param part a part of this MessagePattern.
   1.676 +     * @return the part's numeric value, or UMSGPAT_NO_NUMERIC_VALUE if this is not a numeric part.
   1.677 +     * @stable ICU 4.8
   1.678 +     */
   1.679 +    double getNumericValue(const Part &part) const;
   1.680 +
   1.681 +    /**
   1.682 +     * Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified.
   1.683 +     * @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1)
   1.684 +     * @return the "offset:" value.
   1.685 +     * @stable ICU 4.8
   1.686 +     */
   1.687 +    double getPluralOffset(int32_t pluralStart) const;
   1.688 +
   1.689 +    /**
   1.690 +     * Returns the index of the ARG|MSG_LIMIT part corresponding to the ARG|MSG_START at start.
   1.691 +     * @param start The index of some Part data (0..countParts()-1);
   1.692 +     *        this Part should be of Type ARG_START or MSG_START.
   1.693 +     * @return The first i>start where getPart(i).getType()==ARG|MSG_LIMIT at the same nesting level,
   1.694 +     *         or start itself if getPartType(msgStart)!=ARG|MSG_START.
   1.695 +     * @stable ICU 4.8
   1.696 +     */
   1.697 +    int32_t getLimitPartIndex(int32_t start) const {
   1.698 +        int32_t limit=getPart(start).limitPartIndex;
   1.699 +        if(limit<start) {
   1.700 +            return start;
   1.701 +        }
   1.702 +        return limit;
   1.703 +    }
   1.704 +
   1.705 +    /**
   1.706 +     * A message pattern "part", representing a pattern parsing event.
   1.707 +     * There is a part for the start and end of a message or argument,
   1.708 +     * for quoting and escaping of and with ASCII apostrophes,
   1.709 +     * and for syntax elements of "complex" arguments.
   1.710 +     * @stable ICU 4.8
   1.711 +     */
   1.712 +    class Part : public UMemory {
   1.713 +    public:
   1.714 +        /**
   1.715 +         * Default constructor, do not use.
   1.716 +         * @internal
   1.717 +         */
   1.718 +        Part() {}
   1.719 +
   1.720 +        /**
   1.721 +         * Returns the type of this part.
   1.722 +         * @return the part type.
   1.723 +         * @stable ICU 4.8
   1.724 +         */
   1.725 +        UMessagePatternPartType getType() const {
   1.726 +            return type;
   1.727 +        }
   1.728 +
   1.729 +        /**
   1.730 +         * Returns the pattern string index associated with this Part.
   1.731 +         * @return this part's pattern string index.
   1.732 +         * @stable ICU 4.8
   1.733 +         */
   1.734 +        int32_t getIndex() const {
   1.735 +            return index;
   1.736 +        }
   1.737 +
   1.738 +        /**
   1.739 +         * Returns the length of the pattern substring associated with this Part.
   1.740 +         * This is 0 for some parts.
   1.741 +         * @return this part's pattern substring length.
   1.742 +         * @stable ICU 4.8
   1.743 +         */
   1.744 +        int32_t getLength() const {
   1.745 +            return length;
   1.746 +        }
   1.747 +
   1.748 +        /**
   1.749 +         * Returns the pattern string limit (exclusive-end) index associated with this Part.
   1.750 +         * Convenience method for getIndex()+getLength().
   1.751 +         * @return this part's pattern string limit index, same as getIndex()+getLength().
   1.752 +         * @stable ICU 4.8
   1.753 +         */
   1.754 +        int32_t getLimit() const {
   1.755 +            return index+length;
   1.756 +        }
   1.757 +
   1.758 +        /**
   1.759 +         * Returns a value associated with this part.
   1.760 +         * See the documentation of each part type for details.
   1.761 +         * @return the part value.
   1.762 +         * @stable ICU 4.8
   1.763 +         */
   1.764 +        int32_t getValue() const {
   1.765 +            return value;
   1.766 +        }
   1.767 +
   1.768 +        /**
   1.769 +         * Returns the argument type if this part is of type ARG_START or ARG_LIMIT,
   1.770 +         * otherwise UMSGPAT_ARG_TYPE_NONE.
   1.771 +         * @return the argument type for this part.
   1.772 +         * @stable ICU 4.8
   1.773 +         */
   1.774 +        UMessagePatternArgType getArgType() const {
   1.775 +            UMessagePatternPartType type=getType();
   1.776 +            if(type==UMSGPAT_PART_TYPE_ARG_START || type==UMSGPAT_PART_TYPE_ARG_LIMIT) {
   1.777 +                return (UMessagePatternArgType)value;
   1.778 +            } else {
   1.779 +                return UMSGPAT_ARG_TYPE_NONE;
   1.780 +            }
   1.781 +        }
   1.782 +
   1.783 +        /**
   1.784 +         * Indicates whether the Part type has a numeric value.
   1.785 +         * If so, then that numeric value can be retrieved via MessagePattern.getNumericValue().
   1.786 +         * @param type The Part type to be tested.
   1.787 +         * @return TRUE if the Part type has a numeric value.
   1.788 +         * @stable ICU 4.8
   1.789 +         */
   1.790 +        static UBool hasNumericValue(UMessagePatternPartType type) {
   1.791 +            return type==UMSGPAT_PART_TYPE_ARG_INT || type==UMSGPAT_PART_TYPE_ARG_DOUBLE;
   1.792 +        }
   1.793 +
   1.794 +        /**
   1.795 +         * @param other another object to compare with.
   1.796 +         * @return TRUE if this object is equivalent to the other one.
   1.797 +         * @stable ICU 4.8
   1.798 +         */
   1.799 +        UBool operator==(const Part &other) const;
   1.800 +
   1.801 +        /**
   1.802 +         * @param other another object to compare with.
   1.803 +         * @return FALSE if this object is equivalent to the other one.
   1.804 +         * @stable ICU 4.8
   1.805 +         */
   1.806 +        inline UBool operator!=(const Part &other) const {
   1.807 +            return !operator==(other);
   1.808 +        }
   1.809 +
   1.810 +        /**
   1.811 +         * @return A hash code for this object.
   1.812 +         * @stable ICU 4.8
   1.813 +         */
   1.814 +        int32_t hashCode() const {
   1.815 +            return ((type*37+index)*37+length)*37+value;
   1.816 +        }
   1.817 +
   1.818 +    private:
   1.819 +        friend class MessagePattern;
   1.820 +
   1.821 +        static const int32_t MAX_LENGTH=0xffff;
   1.822 +        static const int32_t MAX_VALUE=0x7fff;
   1.823 +
   1.824 +        // Some fields are not final because they are modified during pattern parsing.
   1.825 +        // After pattern parsing, the parts are effectively immutable.
   1.826 +        UMessagePatternPartType type;
   1.827 +        int32_t index;
   1.828 +        uint16_t length;
   1.829 +        int16_t value;
   1.830 +        int32_t limitPartIndex;
   1.831 +    };
   1.832 +
   1.833 +private:
   1.834 +    void preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
   1.835 +
   1.836 +    void postParse();
   1.837 +
   1.838 +    int32_t parseMessage(int32_t index, int32_t msgStartLength,
   1.839 +                         int32_t nestingLevel, UMessagePatternArgType parentType,
   1.840 +                         UParseError *parseError, UErrorCode &errorCode);
   1.841 +
   1.842 +    int32_t parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel,
   1.843 +                     UParseError *parseError, UErrorCode &errorCode);
   1.844 +
   1.845 +    int32_t parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode);
   1.846 +
   1.847 +    int32_t parseChoiceStyle(int32_t index, int32_t nestingLevel,
   1.848 +                             UParseError *parseError, UErrorCode &errorCode);
   1.849 +
   1.850 +    int32_t parsePluralOrSelectStyle(UMessagePatternArgType argType, int32_t index, int32_t nestingLevel,
   1.851 +                                     UParseError *parseError, UErrorCode &errorCode);
   1.852 +
   1.853 +    /**
   1.854 +     * Validates and parses an argument name or argument number string.
   1.855 +     * This internal method assumes that the input substring is a "pattern identifier".
   1.856 +     * @return &gt;=0 if the name is a valid number,
   1.857 +     *         ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
   1.858 +     *         ARG_NAME_NOT_VALID (-2) if it is neither.
   1.859 +     * @see #validateArgumentName(String)
   1.860 +     */
   1.861 +    static int32_t parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit);
   1.862 +
   1.863 +    int32_t parseArgNumber(int32_t start, int32_t limit) {
   1.864 +        return parseArgNumber(msg, start, limit);
   1.865 +    }
   1.866 +
   1.867 +    /**
   1.868 +     * Parses a number from the specified message substring.
   1.869 +     * @param start start index into the message string
   1.870 +     * @param limit limit index into the message string, must be start<limit
   1.871 +     * @param allowInfinity TRUE if U+221E is allowed (for ChoiceFormat)
   1.872 +     * @param parseError
   1.873 +     * @param errorCode
   1.874 +     */
   1.875 +    void parseDouble(int32_t start, int32_t limit, UBool allowInfinity,
   1.876 +                     UParseError *parseError, UErrorCode &errorCode);
   1.877 +
   1.878 +    // Java has package-private appendReducedApostrophes() here.
   1.879 +    // In C++, this is declared in the MessageImpl class.
   1.880 +
   1.881 +    int32_t skipWhiteSpace(int32_t index);
   1.882 +
   1.883 +    int32_t skipIdentifier(int32_t index);
   1.884 +
   1.885 +    /**
   1.886 +     * Skips a sequence of characters that could occur in a double value.
   1.887 +     * Does not fully parse or validate the value.
   1.888 +     */
   1.889 +    int32_t skipDouble(int32_t index);
   1.890 +
   1.891 +    static UBool isArgTypeChar(UChar32 c);
   1.892 +
   1.893 +    UBool isChoice(int32_t index);
   1.894 +
   1.895 +    UBool isPlural(int32_t index);
   1.896 +
   1.897 +    UBool isSelect(int32_t index);
   1.898 +
   1.899 +    UBool isOrdinal(int32_t index);
   1.900 +
   1.901 +    /**
   1.902 +     * @return TRUE if we are inside a MessageFormat (sub-)pattern,
   1.903 +     *         as opposed to inside a top-level choice/plural/select pattern.
   1.904 +     */
   1.905 +    UBool inMessageFormatPattern(int32_t nestingLevel);
   1.906 +
   1.907 +    /**
   1.908 +     * @return TRUE if we are in a MessageFormat sub-pattern
   1.909 +     *         of a top-level ChoiceFormat pattern.
   1.910 +     */
   1.911 +    UBool inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType);
   1.912 +
   1.913 +    void addPart(UMessagePatternPartType type, int32_t index, int32_t length,
   1.914 +                 int32_t value, UErrorCode &errorCode);
   1.915 +
   1.916 +    void addLimitPart(int32_t start,
   1.917 +                      UMessagePatternPartType type, int32_t index, int32_t length,
   1.918 +                      int32_t value, UErrorCode &errorCode);
   1.919 +
   1.920 +    void addArgDoublePart(double numericValue, int32_t start, int32_t length, UErrorCode &errorCode);
   1.921 +
   1.922 +    void setParseError(UParseError *parseError, int32_t index);
   1.923 +
   1.924 +    UBool init(UErrorCode &errorCode);
   1.925 +    UBool copyStorage(const MessagePattern &other, UErrorCode &errorCode);
   1.926 +
   1.927 +    UMessagePatternApostropheMode aposMode;
   1.928 +    UnicodeString msg;
   1.929 +    // ArrayList<Part> parts=new ArrayList<Part>();
   1.930 +    MessagePatternPartsList *partsList;
   1.931 +    Part *parts;
   1.932 +    int32_t partsLength;
   1.933 +    // ArrayList<Double> numericValues;
   1.934 +    MessagePatternDoubleList *numericValuesList;
   1.935 +    double *numericValues;
   1.936 +    int32_t numericValuesLength;
   1.937 +    UBool hasArgNames;
   1.938 +    UBool hasArgNumbers;
   1.939 +    UBool needsAutoQuoting;
   1.940 +};
   1.941 +
   1.942 +U_NAMESPACE_END
   1.943 +
   1.944 +#endif  // !UCONFIG_NO_FORMATTING
   1.945 +
   1.946 +#endif  // __MESSAGEPATTERN_H__

mercurial