intl/icu/source/common/unicode/messagepattern.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /*
     2 *******************************************************************************
     3 *   Copyright (C) 2011-2013, International Business Machines
     4 *   Corporation and others.  All Rights Reserved.
     5 *******************************************************************************
     6 *   file name:  messagepattern.h
     7 *   encoding:   US-ASCII
     8 *   tab size:   8 (not used)
     9 *   indentation:4
    10 *
    11 *   created on: 2011mar14
    12 *   created by: Markus W. Scherer
    13 */
    15 #ifndef __MESSAGEPATTERN_H__
    16 #define __MESSAGEPATTERN_H__
    18 /**
    19  * \file
    20  * \brief C++ API: MessagePattern class: Parses and represents ICU MessageFormat patterns.
    21  */
    23 #include "unicode/utypes.h"
    25 #if !UCONFIG_NO_FORMATTING
    27 #include "unicode/parseerr.h"
    28 #include "unicode/unistr.h"
    30 /**
    31  * Mode for when an apostrophe starts quoted literal text for MessageFormat output.
    32  * The default is DOUBLE_OPTIONAL unless overridden via uconfig.h
    33  * (UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE).
    34  * <p>
    35  * A pair of adjacent apostrophes always results in a single apostrophe in the output,
    36  * even when the pair is between two single, text-quoting apostrophes.
    37  * <p>
    38  * The following table shows examples of desired MessageFormat.format() output
    39  * with the pattern strings that yield that output.
    40  * <p>
    41  * <table>
    42  *   <tr>
    43  *     <th>Desired output</th>
    44  *     <th>DOUBLE_OPTIONAL</th>
    45  *     <th>DOUBLE_REQUIRED</th>
    46  *   </tr>
    47  *   <tr>
    48  *     <td>I see {many}</td>
    49  *     <td>I see '{many}'</td>
    50  *     <td>(same)</td>
    51  *   </tr>
    52  *   <tr>
    53  *     <td>I said {'Wow!'}</td>
    54  *     <td>I said '{''Wow!''}'</td>
    55  *     <td>(same)</td>
    56  *   </tr>
    57  *   <tr>
    58  *     <td>I don't know</td>
    59  *     <td>I don't know OR<br> I don''t know</td>
    60  *     <td>I don''t know</td>
    61  *   </tr>
    62  * </table>
    63  * @stable ICU 4.8
    64  * @see UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE
    65  */
    66 enum UMessagePatternApostropheMode {
    67     /**
    68      * A literal apostrophe is represented by
    69      * either a single or a double apostrophe pattern character.
    70      * Within a MessageFormat pattern, a single apostrophe only starts quoted literal text
    71      * if it immediately precedes a curly brace {},
    72      * or a pipe symbol | if inside a choice format,
    73      * or a pound symbol # if inside a plural format.
    74      * <p>
    75      * This is the default behavior starting with ICU 4.8.
    76      * @stable ICU 4.8
    77      */
    78     UMSGPAT_APOS_DOUBLE_OPTIONAL,
    79     /**
    80      * A literal apostrophe must be represented by
    81      * a double apostrophe pattern character.
    82      * A single apostrophe always starts quoted literal text.
    83      * <p>
    84      * This is the behavior of ICU 4.6 and earlier, and of the JDK.
    85      * @stable ICU 4.8
    86      */
    87     UMSGPAT_APOS_DOUBLE_REQUIRED
    88 };
    89 /**
    90  * @stable ICU 4.8
    91  */
    92 typedef enum UMessagePatternApostropheMode UMessagePatternApostropheMode;
    94 /**
    95  * MessagePattern::Part type constants.
    96  * @stable ICU 4.8
    97  */
    98 enum UMessagePatternPartType {
    99     /**
   100      * Start of a message pattern (main or nested).
   101      * The length is 0 for the top-level message
   102      * and for a choice argument sub-message, otherwise 1 for the '{'.
   103      * The value indicates the nesting level, starting with 0 for the main message.
   104      * <p>
   105      * There is always a later MSG_LIMIT part.
   106      * @stable ICU 4.8
   107      */
   108     UMSGPAT_PART_TYPE_MSG_START,
   109     /**
   110      * End of a message pattern (main or nested).
   111      * The length is 0 for the top-level message and
   112      * the last sub-message of a choice argument,
   113      * otherwise 1 for the '}' or (in a choice argument style) the '|'.
   114      * The value indicates the nesting level, starting with 0 for the main message.
   115      * @stable ICU 4.8
   116      */
   117     UMSGPAT_PART_TYPE_MSG_LIMIT,
   118     /**
   119      * Indicates a substring of the pattern string which is to be skipped when formatting.
   120      * For example, an apostrophe that begins or ends quoted text
   121      * would be indicated with such a part.
   122      * The value is undefined and currently always 0.
   123      * @stable ICU 4.8
   124      */
   125     UMSGPAT_PART_TYPE_SKIP_SYNTAX,
   126     /**
   127      * Indicates that a syntax character needs to be inserted for auto-quoting.
   128      * The length is 0.
   129      * The value is the character code of the insertion character. (U+0027=APOSTROPHE)
   130      * @stable ICU 4.8
   131      */
   132     UMSGPAT_PART_TYPE_INSERT_CHAR,
   133     /**
   134      * Indicates a syntactic (non-escaped) # symbol in a plural variant.
   135      * When formatting, replace this part's substring with the
   136      * (value-offset) for the plural argument value.
   137      * The value is undefined and currently always 0.
   138      * @stable ICU 4.8
   139      */
   140     UMSGPAT_PART_TYPE_REPLACE_NUMBER,
   141     /**
   142      * Start of an argument.
   143      * The length is 1 for the '{'.
   144      * The value is the ordinal value of the ArgType. Use getArgType().
   145      * <p>
   146      * This part is followed by either an ARG_NUMBER or ARG_NAME,
   147      * followed by optional argument sub-parts (see UMessagePatternArgType constants)
   148      * and finally an ARG_LIMIT part.
   149      * @stable ICU 4.8
   150      */
   151     UMSGPAT_PART_TYPE_ARG_START,
   152     /**
   153      * End of an argument.
   154      * The length is 1 for the '}'.
   155      * The value is the ordinal value of the ArgType. Use getArgType().
   156      * @stable ICU 4.8
   157      */
   158     UMSGPAT_PART_TYPE_ARG_LIMIT,
   159     /**
   160      * The argument number, provided by the value.
   161      * @stable ICU 4.8
   162      */
   163     UMSGPAT_PART_TYPE_ARG_NUMBER,
   164     /**
   165      * The argument name.
   166      * The value is undefined and currently always 0.
   167      * @stable ICU 4.8
   168      */
   169     UMSGPAT_PART_TYPE_ARG_NAME,
   170     /**
   171      * The argument type.
   172      * The value is undefined and currently always 0.
   173      * @stable ICU 4.8
   174      */
   175     UMSGPAT_PART_TYPE_ARG_TYPE,
   176     /**
   177      * The argument style text.
   178      * The value is undefined and currently always 0.
   179      * @stable ICU 4.8
   180      */
   181     UMSGPAT_PART_TYPE_ARG_STYLE,
   182     /**
   183      * A selector substring in a "complex" argument style.
   184      * The value is undefined and currently always 0.
   185      * @stable ICU 4.8
   186      */
   187     UMSGPAT_PART_TYPE_ARG_SELECTOR,
   188     /**
   189      * An integer value, for example the offset or an explicit selector value
   190      * in a PluralFormat style.
   191      * The part value is the integer value.
   192      * @stable ICU 4.8
   193      */
   194     UMSGPAT_PART_TYPE_ARG_INT,
   195     /**
   196      * A numeric value, for example the offset or an explicit selector value
   197      * in a PluralFormat style.
   198      * The part value is an index into an internal array of numeric values;
   199      * use getNumericValue().
   200      * @stable ICU 4.8
   201      */
   202     UMSGPAT_PART_TYPE_ARG_DOUBLE
   203 };
   204 /**
   205  * @stable ICU 4.8
   206  */
   207 typedef enum UMessagePatternPartType UMessagePatternPartType;
   209 /**
   210  * Argument type constants.
   211  * Returned by Part.getArgType() for ARG_START and ARG_LIMIT parts.
   212  *
   213  * Messages nested inside an argument are each delimited by MSG_START and MSG_LIMIT,
   214  * with a nesting level one greater than the surrounding message.
   215  * @stable ICU 4.8
   216  */
   217 enum UMessagePatternArgType {
   218     /**
   219      * The argument has no specified type.
   220      * @stable ICU 4.8
   221      */
   222     UMSGPAT_ARG_TYPE_NONE,
   223     /**
   224      * The argument has a "simple" type which is provided by the ARG_TYPE part.
   225      * An ARG_STYLE part might follow that.
   226      * @stable ICU 4.8
   227      */
   228     UMSGPAT_ARG_TYPE_SIMPLE,
   229     /**
   230      * The argument is a ChoiceFormat with one or more
   231      * ((ARG_INT | ARG_DOUBLE), ARG_SELECTOR, message) tuples.
   232      * @stable ICU 4.8
   233      */
   234     UMSGPAT_ARG_TYPE_CHOICE,
   235     /**
   236      * The argument is a cardinal-number PluralFormat with an optional ARG_INT or ARG_DOUBLE offset
   237      * (e.g., offset:1)
   238      * and one or more (ARG_SELECTOR [explicit-value] message) tuples.
   239      * If the selector has an explicit value (e.g., =2), then
   240      * that value is provided by the ARG_INT or ARG_DOUBLE part preceding the message.
   241      * Otherwise the message immediately follows the ARG_SELECTOR.
   242      * @stable ICU 4.8
   243      */
   244     UMSGPAT_ARG_TYPE_PLURAL,
   245     /**
   246      * The argument is a SelectFormat with one or more (ARG_SELECTOR, message) pairs.
   247      * @stable ICU 4.8
   248      */
   249     UMSGPAT_ARG_TYPE_SELECT,
   250     /**
   251      * The argument is an ordinal-number PluralFormat
   252      * with the same style parts sequence and semantics as UMSGPAT_ARG_TYPE_PLURAL.
   253      * @stable ICU 50
   254      */
   255     UMSGPAT_ARG_TYPE_SELECTORDINAL
   256 };
   257 /**
   258  * @stable ICU 4.8
   259  */
   260 typedef enum UMessagePatternArgType UMessagePatternArgType;
   262 /**
   263  * \def UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE
   264  * Returns TRUE if the argument type has a plural style part sequence and semantics,
   265  * for example UMSGPAT_ARG_TYPE_PLURAL and UMSGPAT_ARG_TYPE_SELECTORDINAL.
   266  * @stable ICU 50
   267  */
   268 #define UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) \
   269     ((argType)==UMSGPAT_ARG_TYPE_PLURAL || (argType)==UMSGPAT_ARG_TYPE_SELECTORDINAL)
   271 enum {
   272     /**
   273      * Return value from MessagePattern.validateArgumentName() for when
   274      * the string is a valid "pattern identifier" but not a number.
   275      * @stable ICU 4.8
   276      */
   277     UMSGPAT_ARG_NAME_NOT_NUMBER=-1,
   279     /**
   280      * Return value from MessagePattern.validateArgumentName() for when
   281      * the string is invalid.
   282      * It might not be a valid "pattern identifier",
   283      * or it have only ASCII digits but there is a leading zero or the number is too large.
   284      * @stable ICU 4.8
   285      */
   286     UMSGPAT_ARG_NAME_NOT_VALID=-2
   287 };
   289 /**
   290  * Special value that is returned by getNumericValue(Part) when no
   291  * numeric value is defined for a part.
   292  * @see MessagePattern.getNumericValue()
   293  * @stable ICU 4.8
   294  */
   295 #define UMSGPAT_NO_NUMERIC_VALUE ((double)(-123456789))
   297 U_NAMESPACE_BEGIN
   299 class MessagePatternDoubleList;
   300 class MessagePatternPartsList;
   302 /**
   303  * Parses and represents ICU MessageFormat patterns.
   304  * Also handles patterns for ChoiceFormat, PluralFormat and SelectFormat.
   305  * Used in the implementations of those classes as well as in tools
   306  * for message validation, translation and format conversion.
   307  * <p>
   308  * The parser handles all syntax relevant for identifying message arguments.
   309  * This includes "complex" arguments whose style strings contain
   310  * nested MessageFormat pattern substrings.
   311  * For "simple" arguments (with no nested MessageFormat pattern substrings),
   312  * the argument style is not parsed any further.
   313  * <p>
   314  * The parser handles named and numbered message arguments and allows both in one message.
   315  * <p>
   316  * Once a pattern has been parsed successfully, iterate through the parsed data
   317  * with countParts(), getPart() and related methods.
   318  * <p>
   319  * The data logically represents a parse tree, but is stored and accessed
   320  * as a list of "parts" for fast and simple parsing and to minimize object allocations.
   321  * Arguments and nested messages are best handled via recursion.
   322  * For every _START "part", MessagePattern.getLimitPartIndex() efficiently returns
   323  * the index of the corresponding _LIMIT "part".
   324  * <p>
   325  * List of "parts":
   326  * <pre>
   327  * message = MSG_START (SKIP_SYNTAX | INSERT_CHAR | REPLACE_NUMBER | argument)* MSG_LIMIT
   328  * argument = noneArg | simpleArg | complexArg
   329  * complexArg = choiceArg | pluralArg | selectArg
   330  *
   331  * noneArg = ARG_START.NONE (ARG_NAME | ARG_NUMBER) ARG_LIMIT.NONE
   332  * simpleArg = ARG_START.SIMPLE (ARG_NAME | ARG_NUMBER) ARG_TYPE [ARG_STYLE] ARG_LIMIT.SIMPLE
   333  * choiceArg = ARG_START.CHOICE (ARG_NAME | ARG_NUMBER) choiceStyle ARG_LIMIT.CHOICE
   334  * pluralArg = ARG_START.PLURAL (ARG_NAME | ARG_NUMBER) pluralStyle ARG_LIMIT.PLURAL
   335  * selectArg = ARG_START.SELECT (ARG_NAME | ARG_NUMBER) selectStyle ARG_LIMIT.SELECT
   336  *
   337  * choiceStyle = ((ARG_INT | ARG_DOUBLE) ARG_SELECTOR message)+
   338  * pluralStyle = [ARG_INT | ARG_DOUBLE] (ARG_SELECTOR [ARG_INT | ARG_DOUBLE] message)+
   339  * selectStyle = (ARG_SELECTOR message)+
   340  * </pre>
   341  * <ul>
   342  *   <li>Literal output text is not represented directly by "parts" but accessed
   343  *       between parts of a message, from one part's getLimit() to the next part's getIndex().
   344  *   <li><code>ARG_START.CHOICE</code> stands for an ARG_START Part with ArgType CHOICE.
   345  *   <li>In the choiceStyle, the ARG_SELECTOR has the '<', the '#' or
   346  *       the less-than-or-equal-to sign (U+2264).
   347  *   <li>In the pluralStyle, the first, optional numeric Part has the "offset:" value.
   348  *       The optional numeric Part between each (ARG_SELECTOR, message) pair
   349  *       is the value of an explicit-number selector like "=2",
   350  *       otherwise the selector is a non-numeric identifier.
   351  *   <li>The REPLACE_NUMBER Part can occur only in an immediate sub-message of the pluralStyle.
   352  * </ul>
   353  * <p>
   354  * This class is not intended for public subclassing.
   355  *
   356  * @stable ICU 4.8
   357  */
   358 class U_COMMON_API MessagePattern : public UObject {
   359 public:
   360     /**
   361      * Constructs an empty MessagePattern with default UMessagePatternApostropheMode.
   362      * @param errorCode Standard ICU error code. Its input value must
   363      *                  pass the U_SUCCESS() test, or else the function returns
   364      *                  immediately. Check for U_FAILURE() on output or use with
   365      *                  function chaining. (See User Guide for details.)
   366      * @stable ICU 4.8
   367      */
   368     MessagePattern(UErrorCode &errorCode);
   370     /**
   371      * Constructs an empty MessagePattern.
   372      * @param mode Explicit UMessagePatternApostropheMode.
   373      * @param errorCode Standard ICU error code. Its input value must
   374      *                  pass the U_SUCCESS() test, or else the function returns
   375      *                  immediately. Check for U_FAILURE() on output or use with
   376      *                  function chaining. (See User Guide for details.)
   377      * @stable ICU 4.8
   378      */
   379     MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode);
   381     /**
   382      * Constructs a MessagePattern with default UMessagePatternApostropheMode and
   383      * parses the MessageFormat pattern string.
   384      * @param pattern a MessageFormat pattern string
   385      * @param parseError Struct to receive information on the position
   386      *                   of an error within the pattern.
   387      *                   Can be NULL.
   388      * @param errorCode Standard ICU error code. Its input value must
   389      *                  pass the U_SUCCESS() test, or else the function returns
   390      *                  immediately. Check for U_FAILURE() on output or use with
   391      *                  function chaining. (See User Guide for details.)
   392      * TODO: turn @throws into UErrorCode specifics?
   393      * @throws IllegalArgumentException for syntax errors in the pattern string
   394      * @throws IndexOutOfBoundsException if certain limits are exceeded
   395      *         (e.g., argument number too high, argument name too long, etc.)
   396      * @throws NumberFormatException if a number could not be parsed
   397      * @stable ICU 4.8
   398      */
   399     MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
   401     /**
   402      * Copy constructor.
   403      * @param other Object to copy.
   404      * @stable ICU 4.8
   405      */
   406     MessagePattern(const MessagePattern &other);
   408     /**
   409      * Assignment operator.
   410      * @param other Object to copy.
   411      * @return *this=other
   412      * @stable ICU 4.8
   413      */
   414     MessagePattern &operator=(const MessagePattern &other);
   416     /**
   417      * Destructor.
   418      * @stable ICU 4.8
   419      */
   420     virtual ~MessagePattern();
   422     /**
   423      * Parses a MessageFormat pattern string.
   424      * @param pattern a MessageFormat pattern string
   425      * @param parseError Struct to receive information on the position
   426      *                   of an error within the pattern.
   427      *                   Can be NULL.
   428      * @param errorCode Standard ICU error code. Its input value must
   429      *                  pass the U_SUCCESS() test, or else the function returns
   430      *                  immediately. Check for U_FAILURE() on output or use with
   431      *                  function chaining. (See User Guide for details.)
   432      * @return *this
   433      * @throws IllegalArgumentException for syntax errors in the pattern string
   434      * @throws IndexOutOfBoundsException if certain limits are exceeded
   435      *         (e.g., argument number too high, argument name too long, etc.)
   436      * @throws NumberFormatException if a number could not be parsed
   437      * @stable ICU 4.8
   438      */
   439     MessagePattern &parse(const UnicodeString &pattern,
   440                           UParseError *parseError, UErrorCode &errorCode);
   442     /**
   443      * Parses a ChoiceFormat pattern string.
   444      * @param pattern a ChoiceFormat pattern string
   445      * @param parseError Struct to receive information on the position
   446      *                   of an error within the pattern.
   447      *                   Can be NULL.
   448      * @param errorCode Standard ICU error code. Its input value must
   449      *                  pass the U_SUCCESS() test, or else the function returns
   450      *                  immediately. Check for U_FAILURE() on output or use with
   451      *                  function chaining. (See User Guide for details.)
   452      * @return *this
   453      * @throws IllegalArgumentException for syntax errors in the pattern string
   454      * @throws IndexOutOfBoundsException if certain limits are exceeded
   455      *         (e.g., argument number too high, argument name too long, etc.)
   456      * @throws NumberFormatException if a number could not be parsed
   457      * @stable ICU 4.8
   458      */
   459     MessagePattern &parseChoiceStyle(const UnicodeString &pattern,
   460                                      UParseError *parseError, UErrorCode &errorCode);
   462     /**
   463      * Parses a PluralFormat pattern string.
   464      * @param pattern a PluralFormat pattern string
   465      * @param parseError Struct to receive information on the position
   466      *                   of an error within the pattern.
   467      *                   Can be NULL.
   468      * @param errorCode Standard ICU error code. Its input value must
   469      *                  pass the U_SUCCESS() test, or else the function returns
   470      *                  immediately. Check for U_FAILURE() on output or use with
   471      *                  function chaining. (See User Guide for details.)
   472      * @return *this
   473      * @throws IllegalArgumentException for syntax errors in the pattern string
   474      * @throws IndexOutOfBoundsException if certain limits are exceeded
   475      *         (e.g., argument number too high, argument name too long, etc.)
   476      * @throws NumberFormatException if a number could not be parsed
   477      * @stable ICU 4.8
   478      */
   479     MessagePattern &parsePluralStyle(const UnicodeString &pattern,
   480                                      UParseError *parseError, UErrorCode &errorCode);
   482     /**
   483      * Parses a SelectFormat pattern string.
   484      * @param pattern a SelectFormat pattern string
   485      * @param parseError Struct to receive information on the position
   486      *                   of an error within the pattern.
   487      *                   Can be NULL.
   488      * @param errorCode Standard ICU error code. Its input value must
   489      *                  pass the U_SUCCESS() test, or else the function returns
   490      *                  immediately. Check for U_FAILURE() on output or use with
   491      *                  function chaining. (See User Guide for details.)
   492      * @return *this
   493      * @throws IllegalArgumentException for syntax errors in the pattern string
   494      * @throws IndexOutOfBoundsException if certain limits are exceeded
   495      *         (e.g., argument number too high, argument name too long, etc.)
   496      * @throws NumberFormatException if a number could not be parsed
   497      * @stable ICU 4.8
   498      */
   499     MessagePattern &parseSelectStyle(const UnicodeString &pattern,
   500                                      UParseError *parseError, UErrorCode &errorCode);
   502     /**
   503      * Clears this MessagePattern.
   504      * countParts() will return 0.
   505      * @stable ICU 4.8
   506      */
   507     void clear();
   509     /**
   510      * Clears this MessagePattern and sets the UMessagePatternApostropheMode.
   511      * countParts() will return 0.
   512      * @param mode The new UMessagePatternApostropheMode.
   513      * @stable ICU 4.8
   514      */
   515     void clearPatternAndSetApostropheMode(UMessagePatternApostropheMode mode) {
   516         clear();
   517         aposMode=mode;
   518     }
   520     /**
   521      * @param other another object to compare with.
   522      * @return TRUE if this object is equivalent to the other one.
   523      * @stable ICU 4.8
   524      */
   525     UBool operator==(const MessagePattern &other) const;
   527     /**
   528      * @param other another object to compare with.
   529      * @return FALSE if this object is equivalent to the other one.
   530      * @stable ICU 4.8
   531      */
   532     inline UBool operator!=(const MessagePattern &other) const {
   533         return !operator==(other);
   534     }
   536     /**
   537      * @return A hash code for this object.
   538      * @stable ICU 4.8
   539      */
   540     int32_t hashCode() const;
   542     /**
   543      * @return this instance's UMessagePatternApostropheMode.
   544      * @stable ICU 4.8
   545      */
   546     UMessagePatternApostropheMode getApostropheMode() const {
   547         return aposMode;
   548     }
   550     // Java has package-private jdkAposMode() here.
   551     // In C++, this is declared in the MessageImpl class.
   553     /**
   554      * @return the parsed pattern string (null if none was parsed).
   555      * @stable ICU 4.8
   556      */
   557     const UnicodeString &getPatternString() const {
   558         return msg;
   559     }
   561     /**
   562      * Does the parsed pattern have named arguments like {first_name}?
   563      * @return TRUE if the parsed pattern has at least one named argument.
   564      * @stable ICU 4.8
   565      */
   566     UBool hasNamedArguments() const {
   567         return hasArgNames;
   568     }
   570     /**
   571      * Does the parsed pattern have numbered arguments like {2}?
   572      * @return TRUE if the parsed pattern has at least one numbered argument.
   573      * @stable ICU 4.8
   574      */
   575     UBool hasNumberedArguments() const {
   576         return hasArgNumbers;
   577     }
   579     /**
   580      * Validates and parses an argument name or argument number string.
   581      * An argument name must be a "pattern identifier", that is, it must contain
   582      * no Unicode Pattern_Syntax or Pattern_White_Space characters.
   583      * If it only contains ASCII digits, then it must be a small integer with no leading zero.
   584      * @param name Input string.
   585      * @return &gt;=0 if the name is a valid number,
   586      *         ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
   587      *         ARG_NAME_NOT_VALID (-2) if it is neither.
   588      * @stable ICU 4.8
   589      */
   590     static int32_t validateArgumentName(const UnicodeString &name);
   592     /**
   593      * Returns a version of the parsed pattern string where each ASCII apostrophe
   594      * is doubled (escaped) if it is not already, and if it is not interpreted as quoting syntax.
   595      * <p>
   596      * For example, this turns "I don't '{know}' {gender,select,female{h''er}other{h'im}}."
   597      * into "I don''t '{know}' {gender,select,female{h''er}other{h''im}}."
   598      * @return the deep-auto-quoted version of the parsed pattern string.
   599      * @see MessageFormat.autoQuoteApostrophe()
   600      * @stable ICU 4.8
   601      */
   602     UnicodeString autoQuoteApostropheDeep() const;
   604     class Part;
   606     /**
   607      * Returns the number of "parts" created by parsing the pattern string.
   608      * Returns 0 if no pattern has been parsed or clear() was called.
   609      * @return the number of pattern parts.
   610      * @stable ICU 4.8
   611      */
   612     int32_t countParts() const {
   613         return partsLength;
   614     }
   616     /**
   617      * Gets the i-th pattern "part".
   618      * @param i The index of the Part data. (0..countParts()-1)
   619      * @return the i-th pattern "part".
   620      * @stable ICU 4.8
   621      */
   622     const Part &getPart(int32_t i) const {
   623         return parts[i];
   624     }
   626     /**
   627      * Returns the UMessagePatternPartType of the i-th pattern "part".
   628      * Convenience method for getPart(i).getType().
   629      * @param i The index of the Part data. (0..countParts()-1)
   630      * @return The UMessagePatternPartType of the i-th Part.
   631      * @stable ICU 4.8
   632      */
   633     UMessagePatternPartType getPartType(int32_t i) const {
   634         return getPart(i).type;
   635     }
   637     /**
   638      * Returns the pattern index of the specified pattern "part".
   639      * Convenience method for getPart(partIndex).getIndex().
   640      * @param partIndex The index of the Part data. (0..countParts()-1)
   641      * @return The pattern index of this Part.
   642      * @stable ICU 4.8
   643      */
   644     int32_t getPatternIndex(int32_t partIndex) const {
   645         return getPart(partIndex).index;
   646     }
   648     /**
   649      * Returns the substring of the pattern string indicated by the Part.
   650      * Convenience method for getPatternString().substring(part.getIndex(), part.getLimit()).
   651      * @param part a part of this MessagePattern.
   652      * @return the substring associated with part.
   653      * @stable ICU 4.8
   654      */
   655     UnicodeString getSubstring(const Part &part) const {
   656         return msg.tempSubString(part.index, part.length);
   657     }
   659     /**
   660      * Compares the part's substring with the input string s.
   661      * @param part a part of this MessagePattern.
   662      * @param s a string.
   663      * @return TRUE if getSubstring(part).equals(s).
   664      * @stable ICU 4.8
   665      */
   666     UBool partSubstringMatches(const Part &part, const UnicodeString &s) const {
   667         return 0==msg.compare(part.index, part.length, s);
   668     }
   670     /**
   671      * Returns the numeric value associated with an ARG_INT or ARG_DOUBLE.
   672      * @param part a part of this MessagePattern.
   673      * @return the part's numeric value, or UMSGPAT_NO_NUMERIC_VALUE if this is not a numeric part.
   674      * @stable ICU 4.8
   675      */
   676     double getNumericValue(const Part &part) const;
   678     /**
   679      * Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified.
   680      * @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1)
   681      * @return the "offset:" value.
   682      * @stable ICU 4.8
   683      */
   684     double getPluralOffset(int32_t pluralStart) const;
   686     /**
   687      * Returns the index of the ARG|MSG_LIMIT part corresponding to the ARG|MSG_START at start.
   688      * @param start The index of some Part data (0..countParts()-1);
   689      *        this Part should be of Type ARG_START or MSG_START.
   690      * @return The first i>start where getPart(i).getType()==ARG|MSG_LIMIT at the same nesting level,
   691      *         or start itself if getPartType(msgStart)!=ARG|MSG_START.
   692      * @stable ICU 4.8
   693      */
   694     int32_t getLimitPartIndex(int32_t start) const {
   695         int32_t limit=getPart(start).limitPartIndex;
   696         if(limit<start) {
   697             return start;
   698         }
   699         return limit;
   700     }
   702     /**
   703      * A message pattern "part", representing a pattern parsing event.
   704      * There is a part for the start and end of a message or argument,
   705      * for quoting and escaping of and with ASCII apostrophes,
   706      * and for syntax elements of "complex" arguments.
   707      * @stable ICU 4.8
   708      */
   709     class Part : public UMemory {
   710     public:
   711         /**
   712          * Default constructor, do not use.
   713          * @internal
   714          */
   715         Part() {}
   717         /**
   718          * Returns the type of this part.
   719          * @return the part type.
   720          * @stable ICU 4.8
   721          */
   722         UMessagePatternPartType getType() const {
   723             return type;
   724         }
   726         /**
   727          * Returns the pattern string index associated with this Part.
   728          * @return this part's pattern string index.
   729          * @stable ICU 4.8
   730          */
   731         int32_t getIndex() const {
   732             return index;
   733         }
   735         /**
   736          * Returns the length of the pattern substring associated with this Part.
   737          * This is 0 for some parts.
   738          * @return this part's pattern substring length.
   739          * @stable ICU 4.8
   740          */
   741         int32_t getLength() const {
   742             return length;
   743         }
   745         /**
   746          * Returns the pattern string limit (exclusive-end) index associated with this Part.
   747          * Convenience method for getIndex()+getLength().
   748          * @return this part's pattern string limit index, same as getIndex()+getLength().
   749          * @stable ICU 4.8
   750          */
   751         int32_t getLimit() const {
   752             return index+length;
   753         }
   755         /**
   756          * Returns a value associated with this part.
   757          * See the documentation of each part type for details.
   758          * @return the part value.
   759          * @stable ICU 4.8
   760          */
   761         int32_t getValue() const {
   762             return value;
   763         }
   765         /**
   766          * Returns the argument type if this part is of type ARG_START or ARG_LIMIT,
   767          * otherwise UMSGPAT_ARG_TYPE_NONE.
   768          * @return the argument type for this part.
   769          * @stable ICU 4.8
   770          */
   771         UMessagePatternArgType getArgType() const {
   772             UMessagePatternPartType type=getType();
   773             if(type==UMSGPAT_PART_TYPE_ARG_START || type==UMSGPAT_PART_TYPE_ARG_LIMIT) {
   774                 return (UMessagePatternArgType)value;
   775             } else {
   776                 return UMSGPAT_ARG_TYPE_NONE;
   777             }
   778         }
   780         /**
   781          * Indicates whether the Part type has a numeric value.
   782          * If so, then that numeric value can be retrieved via MessagePattern.getNumericValue().
   783          * @param type The Part type to be tested.
   784          * @return TRUE if the Part type has a numeric value.
   785          * @stable ICU 4.8
   786          */
   787         static UBool hasNumericValue(UMessagePatternPartType type) {
   788             return type==UMSGPAT_PART_TYPE_ARG_INT || type==UMSGPAT_PART_TYPE_ARG_DOUBLE;
   789         }
   791         /**
   792          * @param other another object to compare with.
   793          * @return TRUE if this object is equivalent to the other one.
   794          * @stable ICU 4.8
   795          */
   796         UBool operator==(const Part &other) const;
   798         /**
   799          * @param other another object to compare with.
   800          * @return FALSE if this object is equivalent to the other one.
   801          * @stable ICU 4.8
   802          */
   803         inline UBool operator!=(const Part &other) const {
   804             return !operator==(other);
   805         }
   807         /**
   808          * @return A hash code for this object.
   809          * @stable ICU 4.8
   810          */
   811         int32_t hashCode() const {
   812             return ((type*37+index)*37+length)*37+value;
   813         }
   815     private:
   816         friend class MessagePattern;
   818         static const int32_t MAX_LENGTH=0xffff;
   819         static const int32_t MAX_VALUE=0x7fff;
   821         // Some fields are not final because they are modified during pattern parsing.
   822         // After pattern parsing, the parts are effectively immutable.
   823         UMessagePatternPartType type;
   824         int32_t index;
   825         uint16_t length;
   826         int16_t value;
   827         int32_t limitPartIndex;
   828     };
   830 private:
   831     void preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
   833     void postParse();
   835     int32_t parseMessage(int32_t index, int32_t msgStartLength,
   836                          int32_t nestingLevel, UMessagePatternArgType parentType,
   837                          UParseError *parseError, UErrorCode &errorCode);
   839     int32_t parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel,
   840                      UParseError *parseError, UErrorCode &errorCode);
   842     int32_t parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode);
   844     int32_t parseChoiceStyle(int32_t index, int32_t nestingLevel,
   845                              UParseError *parseError, UErrorCode &errorCode);
   847     int32_t parsePluralOrSelectStyle(UMessagePatternArgType argType, int32_t index, int32_t nestingLevel,
   848                                      UParseError *parseError, UErrorCode &errorCode);
   850     /**
   851      * Validates and parses an argument name or argument number string.
   852      * This internal method assumes that the input substring is a "pattern identifier".
   853      * @return &gt;=0 if the name is a valid number,
   854      *         ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
   855      *         ARG_NAME_NOT_VALID (-2) if it is neither.
   856      * @see #validateArgumentName(String)
   857      */
   858     static int32_t parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit);
   860     int32_t parseArgNumber(int32_t start, int32_t limit) {
   861         return parseArgNumber(msg, start, limit);
   862     }
   864     /**
   865      * Parses a number from the specified message substring.
   866      * @param start start index into the message string
   867      * @param limit limit index into the message string, must be start<limit
   868      * @param allowInfinity TRUE if U+221E is allowed (for ChoiceFormat)
   869      * @param parseError
   870      * @param errorCode
   871      */
   872     void parseDouble(int32_t start, int32_t limit, UBool allowInfinity,
   873                      UParseError *parseError, UErrorCode &errorCode);
   875     // Java has package-private appendReducedApostrophes() here.
   876     // In C++, this is declared in the MessageImpl class.
   878     int32_t skipWhiteSpace(int32_t index);
   880     int32_t skipIdentifier(int32_t index);
   882     /**
   883      * Skips a sequence of characters that could occur in a double value.
   884      * Does not fully parse or validate the value.
   885      */
   886     int32_t skipDouble(int32_t index);
   888     static UBool isArgTypeChar(UChar32 c);
   890     UBool isChoice(int32_t index);
   892     UBool isPlural(int32_t index);
   894     UBool isSelect(int32_t index);
   896     UBool isOrdinal(int32_t index);
   898     /**
   899      * @return TRUE if we are inside a MessageFormat (sub-)pattern,
   900      *         as opposed to inside a top-level choice/plural/select pattern.
   901      */
   902     UBool inMessageFormatPattern(int32_t nestingLevel);
   904     /**
   905      * @return TRUE if we are in a MessageFormat sub-pattern
   906      *         of a top-level ChoiceFormat pattern.
   907      */
   908     UBool inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType);
   910     void addPart(UMessagePatternPartType type, int32_t index, int32_t length,
   911                  int32_t value, UErrorCode &errorCode);
   913     void addLimitPart(int32_t start,
   914                       UMessagePatternPartType type, int32_t index, int32_t length,
   915                       int32_t value, UErrorCode &errorCode);
   917     void addArgDoublePart(double numericValue, int32_t start, int32_t length, UErrorCode &errorCode);
   919     void setParseError(UParseError *parseError, int32_t index);
   921     UBool init(UErrorCode &errorCode);
   922     UBool copyStorage(const MessagePattern &other, UErrorCode &errorCode);
   924     UMessagePatternApostropheMode aposMode;
   925     UnicodeString msg;
   926     // ArrayList<Part> parts=new ArrayList<Part>();
   927     MessagePatternPartsList *partsList;
   928     Part *parts;
   929     int32_t partsLength;
   930     // ArrayList<Double> numericValues;
   931     MessagePatternDoubleList *numericValuesList;
   932     double *numericValues;
   933     int32_t numericValuesLength;
   934     UBool hasArgNames;
   935     UBool hasArgNumbers;
   936     UBool needsAutoQuoting;
   937 };
   939 U_NAMESPACE_END
   941 #endif  // !UCONFIG_NO_FORMATTING
   943 #endif  // __MESSAGEPATTERN_H__

mercurial