intl/icu/source/i18n/unicode/selfmt.h

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rwxr-xr-x

Correct previous dual key logic pending first delivery installment.

     1 /********************************************************************
     2  * COPYRIGHT:
     3  * Copyright (c) 1997-2011, International Business Machines Corporation and
     4  * others. All Rights Reserved.
     5  * Copyright (C) 2010 , Yahoo! Inc.
     6  ********************************************************************
     7  *
     8  * File SELFMT.H
     9  *
    10  * Modification History:
    11  *
    12  *   Date        Name        Description
    13  *   11/11/09    kirtig      Finished first cut of implementation.
    14  ********************************************************************/
    16 #ifndef SELFMT
    17 #define SELFMT
    19 #include "unicode/messagepattern.h"
    20 #include "unicode/numfmt.h"
    21 #include "unicode/utypes.h"
    23 /**
    24  * \file
    25  * \brief C++ API: SelectFormat object
    26  */
    28 #if !UCONFIG_NO_FORMATTING
    30 U_NAMESPACE_BEGIN
    32 class MessageFormat;
    34 /**
    35   * <p><code>SelectFormat</code> supports the creation of  internationalized
    36   * messages by selecting phrases based on keywords. The pattern  specifies
    37   * how to map keywords to phrases and provides a default phrase. The
    38   * object provided to the format method is a string that's matched
    39   * against the keywords. If there is a match, the corresponding phrase
    40   * is selected; otherwise, the default phrase is used.</p>
    41   *
    42   * <h4>Using <code>SelectFormat</code> for Gender Agreement</h4>
    43   *
    44   * <p>Note: Typically, select formatting is done via <code>MessageFormat</code>
    45   * with a <code>select</code> argument type,
    46   * rather than using a stand-alone <code>SelectFormat</code>.</p>
    47   *
    48   * <p>The main use case for the select format is gender based  inflection.
    49   * When names or nouns are inserted into sentences, their gender can  affect pronouns,
    50   * verb forms, articles, and adjectives. Special care needs to be
    51   * taken for the case where the gender cannot be determined.
    52   * The impact varies between languages:</p>
    53   * \htmlonly
    54   * <ul>
    55   * <li>English has three genders, and unknown gender is handled as a  special
    56   * case. Names use the gender of the named person (if known), nouns  referring
    57   * to people use natural gender, and inanimate objects are usually  neutral.
    58   * The gender only affects pronouns: "he", "she", "it", "they".
    59   *
    60   * <li>German differs from English in that the gender of nouns is  rather
    61   * arbitrary, even for nouns referring to people ("M&#x00E4;dchen", girl, is  neutral).
    62   * The gender affects pronouns ("er", "sie", "es"), articles ("der",  "die",
    63   * "das"), and adjective forms ("guter Mann", "gute Frau", "gutes  M&#x00E4;dchen").
    64   *
    65   * <li>French has only two genders; as in German the gender of nouns
    66   * is rather arbitrary - for sun and moon, the genders
    67   * are the opposite of those in German. The gender affects
    68   * pronouns ("il", "elle"), articles ("le", "la"),
    69   * adjective forms ("bon", "bonne"), and sometimes
    70   * verb forms ("all&#x00E9;", "all&#x00E9;e").
    71   *
    72   * <li>Polish distinguishes five genders (or noun classes),
    73   * human masculine, animate non-human masculine, inanimate masculine,
    74   * feminine, and neuter.
    75   * </ul>
    76   * \endhtmlonly
    77   * <p>Some other languages have noun classes that are not related to  gender,
    78   * but similar in grammatical use.
    79   * Some African languages have around 20 noun classes.</p>
    80   *
    81   * <p><b>Note:</b>For the gender of a <i>person</i> in a given sentence,
    82   * we usually need to distinguish only between female, male and other/unknown.</p>
    83   *
    84   * <p>To enable localizers to create sentence patterns that take their
    85   * language's gender dependencies into consideration, software has to  provide
    86   * information about the gender associated with a noun or name to
    87   * <code>MessageFormat</code>.
    88   * Two main cases can be distinguished:</p>
    89   *
    90   * <ul>
    91   * <li>For people, natural gender information should be maintained  for each person.
    92   * Keywords like "male", "female", "mixed" (for groups of people)
    93   * and "unknown" could be used.
    94   *
    95   * <li>For nouns, grammatical gender information should be maintained  for
    96   * each noun and per language, e.g., in resource bundles.
    97   * The keywords "masculine", "feminine", and "neuter" are commonly  used,
    98   * but some languages may require other keywords.
    99   * </ul>
   100   *
   101   * <p>The resulting keyword is provided to <code>MessageFormat</code>  as a
   102   * parameter separate from the name or noun it's associated with. For  example,
   103   * to generate a message such as "Jean went to Paris", three separate  arguments
   104   * would be provided: The name of the person as argument 0, the  gender of
   105   * the person as argument 1, and the name of the city as argument 2.
   106   * The sentence pattern for English, where the gender of the person has
   107   * no impact on this simple sentence, would not refer to argument 1  at all:</p>
   108   *
   109   * <pre>{0} went to {2}.</pre>
   110   *
   111   * <p><b>Note:</b> The entire sentence should be included (and partially repeated)
   112   * inside each phrase. Otherwise translators would have to be trained on how to
   113   * move bits of the sentence in and out of the select argument of a message.
   114   * (The examples below do not follow this recommendation!)</p>
   115   *
   116   * <p>The sentence pattern for French, where the gender of the person affects
   117   * the form of the participle, uses a select format based on argument 1:</p>
   118   *
   119   * \htmlonly<pre>{0} est {1, select, female {all&#x00E9;e} other {all&#x00E9;}} &#x00E0; {2}.</pre>\endhtmlonly
   120   *
   121   * <p>Patterns can be nested, so that it's possible to handle  interactions of
   122   * number and gender where necessary. For example, if the above  sentence should
   123   * allow for the names of several people to be inserted, the  following sentence
   124   * pattern can be used (with argument 0 the list of people's names,
   125   * argument 1 the number of people, argument 2 their combined gender, and
   126   * argument 3 the city name):</p>
   127   *
   128   * \htmlonly
   129   * <pre>{0} {1, plural,
   130   *                 one {est {2, select, female {all&#x00E9;e} other  {all&#x00E9;}}}
   131   *                 other {sont {2, select, female {all&#x00E9;es} other {all&#x00E9;s}}}
   132   *          }&#x00E0; {3}.</pre>
   133   * \endhtmlonly
   134   *
   135   * <h4>Patterns and Their Interpretation</h4>
   136   *
   137   * <p>The <code>SelectFormat</code> pattern string defines the phrase output
   138   * for each user-defined keyword.
   139   * The pattern is a sequence of (keyword, message) pairs.
   140   * A keyword is a "pattern identifier": [^[[:Pattern_Syntax:][:Pattern_White_Space:]]]+</p>
   141   *
   142   * <p>Each message is a MessageFormat pattern string enclosed in {curly braces}.</p>
   143   *
   144   * <p>You always have to define a phrase for the default keyword
   145   * <code>other</code>; this phrase is returned when the keyword
   146   * provided to
   147   * the <code>format</code> method matches no other keyword.
   148   * If a pattern does not provide a phrase for <code>other</code>, the  method
   149   * it's provided to returns the error  <code>U_DEFAULT_KEYWORD_MISSING</code>.
   150   * <br>
   151   * Pattern_White_Space between keywords and messages is ignored.
   152   * Pattern_White_Space within a message is preserved and output.</p>
   153   *
   154   * <p><pre>Example:
   155   * \htmlonly
   156   *
   157   * UErrorCode status = U_ZERO_ERROR;
   158   * MessageFormat *msgFmt = new MessageFormat(UnicodeString("{0} est  {1, select, female {all&#x00E9;e} other {all&#x00E9;}} &#x00E0; Paris."), Locale("fr"),  status);
   159   * if (U_FAILURE(status)) {
   160   *       return;
   161   * }
   162   * FieldPosition ignore(FieldPosition::DONT_CARE);
   163   * UnicodeString result;
   164   *
   165   * char* str1= "Kirti,female";
   166   * Formattable args1[] = {"Kirti","female"};
   167   * msgFmt->format(args1, 2, result, ignore, status);
   168   * cout << "Input is " << str1 << " and result is: " << result << endl;
   169   * delete msgFmt;
   170   *
   171   * \endhtmlonly
   172   * </pre>
   173   * </p>
   174   *
   175   * Produces the output:<br>
   176   * \htmlonly
   177   * <code>Kirti est all&#x00E9;e &#x00E0; Paris.</code>
   178   * \endhtmlonly
   179   *
   180   * @stable ICU 4.4
   181   */
   183 class U_I18N_API SelectFormat : public Format {
   184 public:
   186     /**
   187      * Creates a new <code>SelectFormat</code> for a given pattern string.
   188      * @param  pattern the pattern for this <code>SelectFormat</code>.
   189      *                 errors are returned to status if the pattern is invalid.
   190      * @param status   output param set to success/failure code on exit, which
   191      *                 must not indicate a failure before the function call.
   192      * @stable ICU 4.4
   193      */
   194     SelectFormat(const UnicodeString& pattern, UErrorCode& status);
   196     /**
   197      * copy constructor.
   198      * @stable ICU 4.4
   199      */
   200     SelectFormat(const SelectFormat& other);
   202     /**
   203      * Destructor.
   204      * @stable ICU 4.4
   205      */
   206     virtual ~SelectFormat();
   208     /**
   209      * Sets the pattern used by this select format.
   210      * for the keyword rules.
   211      * Patterns and their interpretation are specified in the class description.
   212      *
   213      * @param pattern the pattern for this select format
   214      *                errors are returned to status if the pattern is invalid.
   215      * @param status  output param set to success/failure code on exit, which
   216      *                must not indicate a failure before the function call.
   217      * @stable ICU 4.4
   218      */
   219     void applyPattern(const UnicodeString& pattern, UErrorCode& status);
   222     using Format::format;
   224     /**
   225      * Selects the phrase for  the given keyword
   226      *
   227      * @param keyword  The keyword that is used to select an alternative.
   228      * @param appendTo output parameter to receive result.
   229      *                 result is appended to existing contents.
   230      * @param pos      On input: an alignment field, if desired.
   231      *                 On output: the offsets of the alignment field.
   232      * @param status  output param set to success/failure code on exit, which
   233      *                 must not indicate a failure before the function call.
   234      * @return         Reference to 'appendTo' parameter.
   235      * @stable ICU 4.4
   236      */
   237     UnicodeString& format(const UnicodeString& keyword,
   238                             UnicodeString& appendTo,
   239                             FieldPosition& pos,
   240                             UErrorCode& status) const;
   242     /**
   243      * Assignment operator
   244      *
   245      * @param other    the SelectFormat object to copy from.
   246      * @stable ICU 4.4
   247      */
   248     SelectFormat& operator=(const SelectFormat& other);
   250     /**
   251      * Return true if another object is semantically equal to this one.
   252      *
   253      * @param other    the SelectFormat object to be compared with.
   254      * @return         true if other is semantically equal to this.
   255      * @stable ICU 4.4
   256      */
   257     virtual UBool operator==(const Format& other) const;
   259     /**
   260      * Return true if another object is semantically unequal to this one.
   261      *
   262      * @param other    the SelectFormat object to be compared with.
   263      * @return         true if other is semantically unequal to this.
   264      * @stable ICU 4.4
   265      */
   266     virtual UBool operator!=(const Format& other) const;
   268     /**
   269      * Clones this Format object polymorphically.  The caller owns the
   270      * result and should delete it when done.
   271      * @stable ICU 4.4
   272      */
   273     virtual Format* clone(void) const;
   275     /**
   276      * Format an object to produce a string.
   277      * This method handles keyword strings.
   278      * If the Formattable object is not a <code>UnicodeString</code>,
   279      * then it returns a failing UErrorCode.
   280      *
   281      * @param obj       A keyword string that is used to select an alternative.
   282      * @param appendTo  output parameter to receive result.
   283      *                  Result is appended to existing contents.
   284      * @param pos       On input: an alignment field, if desired.
   285      *                  On output: the offsets of the alignment field.
   286      * @param status    output param filled with success/failure status.
   287      * @return          Reference to 'appendTo' parameter.
   288      * @stable ICU 4.4
   289      */
   290     UnicodeString& format(const Formattable& obj,
   291                          UnicodeString& appendTo,
   292                          FieldPosition& pos,
   293                          UErrorCode& status) const;
   295     /**
   296      * Returns the pattern from applyPattern() or constructor.
   297      *
   298      * @param  appendTo  output parameter to receive result.
   299      *                  Result is appended to existing contents.
   300      * @return the UnicodeString with inserted pattern.
   301      * @stable ICU 4.4
   302      */
   303     UnicodeString& toPattern(UnicodeString& appendTo);
   305     /**
   306      * This method is not yet supported by <code>SelectFormat</code>.
   307      * <P>
   308      * Before calling, set parse_pos.index to the offset you want to start
   309      * parsing at in the source. After calling, parse_pos.index is the end of
   310      * the text you parsed. If error occurs, index is unchanged.
   311      * <P>
   312      * When parsing, leading whitespace is discarded (with a successful parse),
   313      * while trailing whitespace is left as is.
   314      * <P>
   315      * See Format::parseObject() for more.
   316      *
   317      * @param source     The string to be parsed into an object.
   318      * @param result     Formattable to be set to the parse result.
   319      *     If parse fails, return contents are undefined.
   320      * @param parse_pos The position to start parsing at. Upon return
   321      *     this param is set to the position after the
   322      *     last character successfully parsed. If the
   323      *     source is not parsed successfully, this param
   324      *     will remain unchanged.
   325      * @stable ICU 4.4
   326      */
   327     virtual void parseObject(const UnicodeString& source,
   328                             Formattable& result,
   329                             ParsePosition& parse_pos) const;
   331     /**
   332      * ICU "poor man's RTTI", returns a UClassID for this class.
   333      * @stable ICU 4.4
   334      */
   335     static UClassID U_EXPORT2 getStaticClassID(void);
   337     /**
   338      * ICU "poor man's RTTI", returns a UClassID for the actual class.
   339      * @stable ICU 4.4
   340      */
   341     virtual UClassID getDynamicClassID() const;
   343 private:
   344     friend class MessageFormat;
   346     SelectFormat();   // default constructor not implemented.
   348     /**
   349      * Finds the SelectFormat sub-message for the given keyword, or the "other" sub-message.
   350      * @param pattern A MessagePattern.
   351      * @param partIndex the index of the first SelectFormat argument style part.
   352      * @param keyword a keyword to be matched to one of the SelectFormat argument's keywords.
   353      * @param ec Error code.
   354      * @return the sub-message start part index.
   355      */
   356     static int32_t findSubMessage(const MessagePattern& pattern, int32_t partIndex,
   357                                   const UnicodeString& keyword, UErrorCode& ec);
   359     MessagePattern msgPattern;
   360 };
   362 U_NAMESPACE_END
   364 #endif /* #if !UCONFIG_NO_FORMATTING */
   366 #endif // _SELFMT
   367 //eof

mercurial