intl/icu/source/i18n/unicode/selfmt.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/i18n/unicode/selfmt.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,367 @@
     1.4 +/********************************************************************
     1.5 + * COPYRIGHT:
     1.6 + * Copyright (c) 1997-2011, International Business Machines Corporation and
     1.7 + * others. All Rights Reserved.
     1.8 + * Copyright (C) 2010 , Yahoo! Inc.
     1.9 + ********************************************************************
    1.10 + *
    1.11 + * File SELFMT.H
    1.12 + *
    1.13 + * Modification History:
    1.14 + *
    1.15 + *   Date        Name        Description
    1.16 + *   11/11/09    kirtig      Finished first cut of implementation.
    1.17 + ********************************************************************/
    1.18 +
    1.19 +#ifndef SELFMT
    1.20 +#define SELFMT
    1.21 +
    1.22 +#include "unicode/messagepattern.h"
    1.23 +#include "unicode/numfmt.h"
    1.24 +#include "unicode/utypes.h"
    1.25 +
    1.26 +/**
    1.27 + * \file
    1.28 + * \brief C++ API: SelectFormat object
    1.29 + */
    1.30 +
    1.31 +#if !UCONFIG_NO_FORMATTING
    1.32 +
    1.33 +U_NAMESPACE_BEGIN
    1.34 +
    1.35 +class MessageFormat;
    1.36 +
    1.37 +/**
    1.38 +  * <p><code>SelectFormat</code> supports the creation of  internationalized
    1.39 +  * messages by selecting phrases based on keywords. The pattern  specifies
    1.40 +  * how to map keywords to phrases and provides a default phrase. The
    1.41 +  * object provided to the format method is a string that's matched
    1.42 +  * against the keywords. If there is a match, the corresponding phrase
    1.43 +  * is selected; otherwise, the default phrase is used.</p>
    1.44 +  *
    1.45 +  * <h4>Using <code>SelectFormat</code> for Gender Agreement</h4>
    1.46 +  *
    1.47 +  * <p>Note: Typically, select formatting is done via <code>MessageFormat</code>
    1.48 +  * with a <code>select</code> argument type,
    1.49 +  * rather than using a stand-alone <code>SelectFormat</code>.</p>
    1.50 +  *
    1.51 +  * <p>The main use case for the select format is gender based  inflection.
    1.52 +  * When names or nouns are inserted into sentences, their gender can  affect pronouns,
    1.53 +  * verb forms, articles, and adjectives. Special care needs to be
    1.54 +  * taken for the case where the gender cannot be determined.
    1.55 +  * The impact varies between languages:</p>
    1.56 +  * \htmlonly
    1.57 +  * <ul>
    1.58 +  * <li>English has three genders, and unknown gender is handled as a  special
    1.59 +  * case. Names use the gender of the named person (if known), nouns  referring
    1.60 +  * to people use natural gender, and inanimate objects are usually  neutral.
    1.61 +  * The gender only affects pronouns: "he", "she", "it", "they".
    1.62 +  *
    1.63 +  * <li>German differs from English in that the gender of nouns is  rather
    1.64 +  * arbitrary, even for nouns referring to people ("M&#x00E4;dchen", girl, is  neutral).
    1.65 +  * The gender affects pronouns ("er", "sie", "es"), articles ("der",  "die",
    1.66 +  * "das"), and adjective forms ("guter Mann", "gute Frau", "gutes  M&#x00E4;dchen").
    1.67 +  *
    1.68 +  * <li>French has only two genders; as in German the gender of nouns
    1.69 +  * is rather arbitrary - for sun and moon, the genders
    1.70 +  * are the opposite of those in German. The gender affects
    1.71 +  * pronouns ("il", "elle"), articles ("le", "la"),
    1.72 +  * adjective forms ("bon", "bonne"), and sometimes
    1.73 +  * verb forms ("all&#x00E9;", "all&#x00E9;e").
    1.74 +  *
    1.75 +  * <li>Polish distinguishes five genders (or noun classes),
    1.76 +  * human masculine, animate non-human masculine, inanimate masculine,
    1.77 +  * feminine, and neuter.
    1.78 +  * </ul>
    1.79 +  * \endhtmlonly
    1.80 +  * <p>Some other languages have noun classes that are not related to  gender,
    1.81 +  * but similar in grammatical use.
    1.82 +  * Some African languages have around 20 noun classes.</p>
    1.83 +  *
    1.84 +  * <p><b>Note:</b>For the gender of a <i>person</i> in a given sentence,
    1.85 +  * we usually need to distinguish only between female, male and other/unknown.</p>
    1.86 +  *
    1.87 +  * <p>To enable localizers to create sentence patterns that take their
    1.88 +  * language's gender dependencies into consideration, software has to  provide
    1.89 +  * information about the gender associated with a noun or name to
    1.90 +  * <code>MessageFormat</code>.
    1.91 +  * Two main cases can be distinguished:</p>
    1.92 +  *
    1.93 +  * <ul>
    1.94 +  * <li>For people, natural gender information should be maintained  for each person.
    1.95 +  * Keywords like "male", "female", "mixed" (for groups of people)
    1.96 +  * and "unknown" could be used.
    1.97 +  *
    1.98 +  * <li>For nouns, grammatical gender information should be maintained  for
    1.99 +  * each noun and per language, e.g., in resource bundles.
   1.100 +  * The keywords "masculine", "feminine", and "neuter" are commonly  used,
   1.101 +  * but some languages may require other keywords.
   1.102 +  * </ul>
   1.103 +  *
   1.104 +  * <p>The resulting keyword is provided to <code>MessageFormat</code>  as a
   1.105 +  * parameter separate from the name or noun it's associated with. For  example,
   1.106 +  * to generate a message such as "Jean went to Paris", three separate  arguments
   1.107 +  * would be provided: The name of the person as argument 0, the  gender of
   1.108 +  * the person as argument 1, and the name of the city as argument 2.
   1.109 +  * The sentence pattern for English, where the gender of the person has
   1.110 +  * no impact on this simple sentence, would not refer to argument 1  at all:</p>
   1.111 +  *
   1.112 +  * <pre>{0} went to {2}.</pre>
   1.113 +  *
   1.114 +  * <p><b>Note:</b> The entire sentence should be included (and partially repeated)
   1.115 +  * inside each phrase. Otherwise translators would have to be trained on how to
   1.116 +  * move bits of the sentence in and out of the select argument of a message.
   1.117 +  * (The examples below do not follow this recommendation!)</p>
   1.118 +  *
   1.119 +  * <p>The sentence pattern for French, where the gender of the person affects
   1.120 +  * the form of the participle, uses a select format based on argument 1:</p>
   1.121 +  *
   1.122 +  * \htmlonly<pre>{0} est {1, select, female {all&#x00E9;e} other {all&#x00E9;}} &#x00E0; {2}.</pre>\endhtmlonly
   1.123 +  *
   1.124 +  * <p>Patterns can be nested, so that it's possible to handle  interactions of
   1.125 +  * number and gender where necessary. For example, if the above  sentence should
   1.126 +  * allow for the names of several people to be inserted, the  following sentence
   1.127 +  * pattern can be used (with argument 0 the list of people's names,
   1.128 +  * argument 1 the number of people, argument 2 their combined gender, and
   1.129 +  * argument 3 the city name):</p>
   1.130 +  *
   1.131 +  * \htmlonly
   1.132 +  * <pre>{0} {1, plural,
   1.133 +  *                 one {est {2, select, female {all&#x00E9;e} other  {all&#x00E9;}}}
   1.134 +  *                 other {sont {2, select, female {all&#x00E9;es} other {all&#x00E9;s}}}
   1.135 +  *          }&#x00E0; {3}.</pre>
   1.136 +  * \endhtmlonly
   1.137 +  *
   1.138 +  * <h4>Patterns and Their Interpretation</h4>
   1.139 +  *
   1.140 +  * <p>The <code>SelectFormat</code> pattern string defines the phrase output
   1.141 +  * for each user-defined keyword.
   1.142 +  * The pattern is a sequence of (keyword, message) pairs.
   1.143 +  * A keyword is a "pattern identifier": [^[[:Pattern_Syntax:][:Pattern_White_Space:]]]+</p>
   1.144 +  *
   1.145 +  * <p>Each message is a MessageFormat pattern string enclosed in {curly braces}.</p>
   1.146 +  *
   1.147 +  * <p>You always have to define a phrase for the default keyword
   1.148 +  * <code>other</code>; this phrase is returned when the keyword
   1.149 +  * provided to
   1.150 +  * the <code>format</code> method matches no other keyword.
   1.151 +  * If a pattern does not provide a phrase for <code>other</code>, the  method
   1.152 +  * it's provided to returns the error  <code>U_DEFAULT_KEYWORD_MISSING</code>.
   1.153 +  * <br>
   1.154 +  * Pattern_White_Space between keywords and messages is ignored.
   1.155 +  * Pattern_White_Space within a message is preserved and output.</p>
   1.156 +  *
   1.157 +  * <p><pre>Example:
   1.158 +  * \htmlonly
   1.159 +  *
   1.160 +  * UErrorCode status = U_ZERO_ERROR;
   1.161 +  * MessageFormat *msgFmt = new MessageFormat(UnicodeString("{0} est  {1, select, female {all&#x00E9;e} other {all&#x00E9;}} &#x00E0; Paris."), Locale("fr"),  status);
   1.162 +  * if (U_FAILURE(status)) {
   1.163 +  *       return;
   1.164 +  * }
   1.165 +  * FieldPosition ignore(FieldPosition::DONT_CARE);
   1.166 +  * UnicodeString result;
   1.167 +  *
   1.168 +  * char* str1= "Kirti,female";
   1.169 +  * Formattable args1[] = {"Kirti","female"};
   1.170 +  * msgFmt->format(args1, 2, result, ignore, status);
   1.171 +  * cout << "Input is " << str1 << " and result is: " << result << endl;
   1.172 +  * delete msgFmt;
   1.173 +  *
   1.174 +  * \endhtmlonly
   1.175 +  * </pre>
   1.176 +  * </p>
   1.177 +  *
   1.178 +  * Produces the output:<br>
   1.179 +  * \htmlonly
   1.180 +  * <code>Kirti est all&#x00E9;e &#x00E0; Paris.</code>
   1.181 +  * \endhtmlonly
   1.182 +  *
   1.183 +  * @stable ICU 4.4
   1.184 +  */
   1.185 +
   1.186 +class U_I18N_API SelectFormat : public Format {
   1.187 +public:
   1.188 +
   1.189 +    /**
   1.190 +     * Creates a new <code>SelectFormat</code> for a given pattern string.
   1.191 +     * @param  pattern the pattern for this <code>SelectFormat</code>.
   1.192 +     *                 errors are returned to status if the pattern is invalid.
   1.193 +     * @param status   output param set to success/failure code on exit, which
   1.194 +     *                 must not indicate a failure before the function call.
   1.195 +     * @stable ICU 4.4
   1.196 +     */
   1.197 +    SelectFormat(const UnicodeString& pattern, UErrorCode& status);
   1.198 +
   1.199 +    /**
   1.200 +     * copy constructor.
   1.201 +     * @stable ICU 4.4
   1.202 +     */
   1.203 +    SelectFormat(const SelectFormat& other);
   1.204 +
   1.205 +    /**
   1.206 +     * Destructor.
   1.207 +     * @stable ICU 4.4
   1.208 +     */
   1.209 +    virtual ~SelectFormat();
   1.210 +
   1.211 +    /**
   1.212 +     * Sets the pattern used by this select format.
   1.213 +     * for the keyword rules.
   1.214 +     * Patterns and their interpretation are specified in the class description.
   1.215 +     *
   1.216 +     * @param pattern the pattern for this select format
   1.217 +     *                errors are returned to status if the pattern is invalid.
   1.218 +     * @param status  output param set to success/failure code on exit, which
   1.219 +     *                must not indicate a failure before the function call.
   1.220 +     * @stable ICU 4.4
   1.221 +     */
   1.222 +    void applyPattern(const UnicodeString& pattern, UErrorCode& status);
   1.223 +
   1.224 +
   1.225 +    using Format::format;
   1.226 +
   1.227 +    /**
   1.228 +     * Selects the phrase for  the given keyword
   1.229 +     *
   1.230 +     * @param keyword  The keyword that is used to select an alternative.
   1.231 +     * @param appendTo output parameter to receive result.
   1.232 +     *                 result is appended to existing contents.
   1.233 +     * @param pos      On input: an alignment field, if desired.
   1.234 +     *                 On output: the offsets of the alignment field.
   1.235 +     * @param status  output param set to success/failure code on exit, which
   1.236 +     *                 must not indicate a failure before the function call.
   1.237 +     * @return         Reference to 'appendTo' parameter.
   1.238 +     * @stable ICU 4.4
   1.239 +     */
   1.240 +    UnicodeString& format(const UnicodeString& keyword,
   1.241 +                            UnicodeString& appendTo,
   1.242 +                            FieldPosition& pos,
   1.243 +                            UErrorCode& status) const;
   1.244 +
   1.245 +    /**
   1.246 +     * Assignment operator
   1.247 +     *
   1.248 +     * @param other    the SelectFormat object to copy from.
   1.249 +     * @stable ICU 4.4
   1.250 +     */
   1.251 +    SelectFormat& operator=(const SelectFormat& other);
   1.252 +
   1.253 +    /**
   1.254 +     * Return true if another object is semantically equal to this one.
   1.255 +     *
   1.256 +     * @param other    the SelectFormat object to be compared with.
   1.257 +     * @return         true if other is semantically equal to this.
   1.258 +     * @stable ICU 4.4
   1.259 +     */
   1.260 +    virtual UBool operator==(const Format& other) const;
   1.261 +
   1.262 +    /**
   1.263 +     * Return true if another object is semantically unequal to this one.
   1.264 +     *
   1.265 +     * @param other    the SelectFormat object to be compared with.
   1.266 +     * @return         true if other is semantically unequal to this.
   1.267 +     * @stable ICU 4.4
   1.268 +     */
   1.269 +    virtual UBool operator!=(const Format& other) const;
   1.270 +
   1.271 +    /**
   1.272 +     * Clones this Format object polymorphically.  The caller owns the
   1.273 +     * result and should delete it when done.
   1.274 +     * @stable ICU 4.4
   1.275 +     */
   1.276 +    virtual Format* clone(void) const;
   1.277 +
   1.278 +    /**
   1.279 +     * Format an object to produce a string.
   1.280 +     * This method handles keyword strings.
   1.281 +     * If the Formattable object is not a <code>UnicodeString</code>,
   1.282 +     * then it returns a failing UErrorCode.
   1.283 +     *
   1.284 +     * @param obj       A keyword string that is used to select an alternative.
   1.285 +     * @param appendTo  output parameter to receive result.
   1.286 +     *                  Result is appended to existing contents.
   1.287 +     * @param pos       On input: an alignment field, if desired.
   1.288 +     *                  On output: the offsets of the alignment field.
   1.289 +     * @param status    output param filled with success/failure status.
   1.290 +     * @return          Reference to 'appendTo' parameter.
   1.291 +     * @stable ICU 4.4
   1.292 +     */
   1.293 +    UnicodeString& format(const Formattable& obj,
   1.294 +                         UnicodeString& appendTo,
   1.295 +                         FieldPosition& pos,
   1.296 +                         UErrorCode& status) const;
   1.297 +
   1.298 +    /**
   1.299 +     * Returns the pattern from applyPattern() or constructor.
   1.300 +     *
   1.301 +     * @param  appendTo  output parameter to receive result.
   1.302 +     *                  Result is appended to existing contents.
   1.303 +     * @return the UnicodeString with inserted pattern.
   1.304 +     * @stable ICU 4.4
   1.305 +     */
   1.306 +    UnicodeString& toPattern(UnicodeString& appendTo);
   1.307 +
   1.308 +    /**
   1.309 +     * This method is not yet supported by <code>SelectFormat</code>.
   1.310 +     * <P>
   1.311 +     * Before calling, set parse_pos.index to the offset you want to start
   1.312 +     * parsing at in the source. After calling, parse_pos.index is the end of
   1.313 +     * the text you parsed. If error occurs, index is unchanged.
   1.314 +     * <P>
   1.315 +     * When parsing, leading whitespace is discarded (with a successful parse),
   1.316 +     * while trailing whitespace is left as is.
   1.317 +     * <P>
   1.318 +     * See Format::parseObject() for more.
   1.319 +     *
   1.320 +     * @param source     The string to be parsed into an object.
   1.321 +     * @param result     Formattable to be set to the parse result.
   1.322 +     *     If parse fails, return contents are undefined.
   1.323 +     * @param parse_pos The position to start parsing at. Upon return
   1.324 +     *     this param is set to the position after the
   1.325 +     *     last character successfully parsed. If the
   1.326 +     *     source is not parsed successfully, this param
   1.327 +     *     will remain unchanged.
   1.328 +     * @stable ICU 4.4
   1.329 +     */
   1.330 +    virtual void parseObject(const UnicodeString& source,
   1.331 +                            Formattable& result,
   1.332 +                            ParsePosition& parse_pos) const;
   1.333 +
   1.334 +    /**
   1.335 +     * ICU "poor man's RTTI", returns a UClassID for this class.
   1.336 +     * @stable ICU 4.4
   1.337 +     */
   1.338 +    static UClassID U_EXPORT2 getStaticClassID(void);
   1.339 +
   1.340 +    /**
   1.341 +     * ICU "poor man's RTTI", returns a UClassID for the actual class.
   1.342 +     * @stable ICU 4.4
   1.343 +     */
   1.344 +    virtual UClassID getDynamicClassID() const;
   1.345 +
   1.346 +private:
   1.347 +    friend class MessageFormat;
   1.348 +
   1.349 +    SelectFormat();   // default constructor not implemented.
   1.350 +
   1.351 +    /**
   1.352 +     * Finds the SelectFormat sub-message for the given keyword, or the "other" sub-message.
   1.353 +     * @param pattern A MessagePattern.
   1.354 +     * @param partIndex the index of the first SelectFormat argument style part.
   1.355 +     * @param keyword a keyword to be matched to one of the SelectFormat argument's keywords.
   1.356 +     * @param ec Error code.
   1.357 +     * @return the sub-message start part index.
   1.358 +     */
   1.359 +    static int32_t findSubMessage(const MessagePattern& pattern, int32_t partIndex,
   1.360 +                                  const UnicodeString& keyword, UErrorCode& ec);
   1.361 +
   1.362 +    MessagePattern msgPattern;
   1.363 +};
   1.364 +
   1.365 +U_NAMESPACE_END
   1.366 +
   1.367 +#endif /* #if !UCONFIG_NO_FORMATTING */
   1.368 +
   1.369 +#endif // _SELFMT
   1.370 +//eof

mercurial