intl/icu/source/i18n/unicode/selfmt.h

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rwxr-xr-x

Correct previous dual key logic pending first delivery installment.

michael@0 1 /********************************************************************
michael@0 2 * COPYRIGHT:
michael@0 3 * Copyright (c) 1997-2011, International Business Machines Corporation and
michael@0 4 * others. All Rights Reserved.
michael@0 5 * Copyright (C) 2010 , Yahoo! Inc.
michael@0 6 ********************************************************************
michael@0 7 *
michael@0 8 * File SELFMT.H
michael@0 9 *
michael@0 10 * Modification History:
michael@0 11 *
michael@0 12 * Date Name Description
michael@0 13 * 11/11/09 kirtig Finished first cut of implementation.
michael@0 14 ********************************************************************/
michael@0 15
michael@0 16 #ifndef SELFMT
michael@0 17 #define SELFMT
michael@0 18
michael@0 19 #include "unicode/messagepattern.h"
michael@0 20 #include "unicode/numfmt.h"
michael@0 21 #include "unicode/utypes.h"
michael@0 22
michael@0 23 /**
michael@0 24 * \file
michael@0 25 * \brief C++ API: SelectFormat object
michael@0 26 */
michael@0 27
michael@0 28 #if !UCONFIG_NO_FORMATTING
michael@0 29
michael@0 30 U_NAMESPACE_BEGIN
michael@0 31
michael@0 32 class MessageFormat;
michael@0 33
michael@0 34 /**
michael@0 35 * <p><code>SelectFormat</code> supports the creation of internationalized
michael@0 36 * messages by selecting phrases based on keywords. The pattern specifies
michael@0 37 * how to map keywords to phrases and provides a default phrase. The
michael@0 38 * object provided to the format method is a string that's matched
michael@0 39 * against the keywords. If there is a match, the corresponding phrase
michael@0 40 * is selected; otherwise, the default phrase is used.</p>
michael@0 41 *
michael@0 42 * <h4>Using <code>SelectFormat</code> for Gender Agreement</h4>
michael@0 43 *
michael@0 44 * <p>Note: Typically, select formatting is done via <code>MessageFormat</code>
michael@0 45 * with a <code>select</code> argument type,
michael@0 46 * rather than using a stand-alone <code>SelectFormat</code>.</p>
michael@0 47 *
michael@0 48 * <p>The main use case for the select format is gender based inflection.
michael@0 49 * When names or nouns are inserted into sentences, their gender can affect pronouns,
michael@0 50 * verb forms, articles, and adjectives. Special care needs to be
michael@0 51 * taken for the case where the gender cannot be determined.
michael@0 52 * The impact varies between languages:</p>
michael@0 53 * \htmlonly
michael@0 54 * <ul>
michael@0 55 * <li>English has three genders, and unknown gender is handled as a special
michael@0 56 * case. Names use the gender of the named person (if known), nouns referring
michael@0 57 * to people use natural gender, and inanimate objects are usually neutral.
michael@0 58 * The gender only affects pronouns: "he", "she", "it", "they".
michael@0 59 *
michael@0 60 * <li>German differs from English in that the gender of nouns is rather
michael@0 61 * arbitrary, even for nouns referring to people ("M&#x00E4;dchen", girl, is neutral).
michael@0 62 * The gender affects pronouns ("er", "sie", "es"), articles ("der", "die",
michael@0 63 * "das"), and adjective forms ("guter Mann", "gute Frau", "gutes M&#x00E4;dchen").
michael@0 64 *
michael@0 65 * <li>French has only two genders; as in German the gender of nouns
michael@0 66 * is rather arbitrary - for sun and moon, the genders
michael@0 67 * are the opposite of those in German. The gender affects
michael@0 68 * pronouns ("il", "elle"), articles ("le", "la"),
michael@0 69 * adjective forms ("bon", "bonne"), and sometimes
michael@0 70 * verb forms ("all&#x00E9;", "all&#x00E9;e").
michael@0 71 *
michael@0 72 * <li>Polish distinguishes five genders (or noun classes),
michael@0 73 * human masculine, animate non-human masculine, inanimate masculine,
michael@0 74 * feminine, and neuter.
michael@0 75 * </ul>
michael@0 76 * \endhtmlonly
michael@0 77 * <p>Some other languages have noun classes that are not related to gender,
michael@0 78 * but similar in grammatical use.
michael@0 79 * Some African languages have around 20 noun classes.</p>
michael@0 80 *
michael@0 81 * <p><b>Note:</b>For the gender of a <i>person</i> in a given sentence,
michael@0 82 * we usually need to distinguish only between female, male and other/unknown.</p>
michael@0 83 *
michael@0 84 * <p>To enable localizers to create sentence patterns that take their
michael@0 85 * language's gender dependencies into consideration, software has to provide
michael@0 86 * information about the gender associated with a noun or name to
michael@0 87 * <code>MessageFormat</code>.
michael@0 88 * Two main cases can be distinguished:</p>
michael@0 89 *
michael@0 90 * <ul>
michael@0 91 * <li>For people, natural gender information should be maintained for each person.
michael@0 92 * Keywords like "male", "female", "mixed" (for groups of people)
michael@0 93 * and "unknown" could be used.
michael@0 94 *
michael@0 95 * <li>For nouns, grammatical gender information should be maintained for
michael@0 96 * each noun and per language, e.g., in resource bundles.
michael@0 97 * The keywords "masculine", "feminine", and "neuter" are commonly used,
michael@0 98 * but some languages may require other keywords.
michael@0 99 * </ul>
michael@0 100 *
michael@0 101 * <p>The resulting keyword is provided to <code>MessageFormat</code> as a
michael@0 102 * parameter separate from the name or noun it's associated with. For example,
michael@0 103 * to generate a message such as "Jean went to Paris", three separate arguments
michael@0 104 * would be provided: The name of the person as argument 0, the gender of
michael@0 105 * the person as argument 1, and the name of the city as argument 2.
michael@0 106 * The sentence pattern for English, where the gender of the person has
michael@0 107 * no impact on this simple sentence, would not refer to argument 1 at all:</p>
michael@0 108 *
michael@0 109 * <pre>{0} went to {2}.</pre>
michael@0 110 *
michael@0 111 * <p><b>Note:</b> The entire sentence should be included (and partially repeated)
michael@0 112 * inside each phrase. Otherwise translators would have to be trained on how to
michael@0 113 * move bits of the sentence in and out of the select argument of a message.
michael@0 114 * (The examples below do not follow this recommendation!)</p>
michael@0 115 *
michael@0 116 * <p>The sentence pattern for French, where the gender of the person affects
michael@0 117 * the form of the participle, uses a select format based on argument 1:</p>
michael@0 118 *
michael@0 119 * \htmlonly<pre>{0} est {1, select, female {all&#x00E9;e} other {all&#x00E9;}} &#x00E0; {2}.</pre>\endhtmlonly
michael@0 120 *
michael@0 121 * <p>Patterns can be nested, so that it's possible to handle interactions of
michael@0 122 * number and gender where necessary. For example, if the above sentence should
michael@0 123 * allow for the names of several people to be inserted, the following sentence
michael@0 124 * pattern can be used (with argument 0 the list of people's names,
michael@0 125 * argument 1 the number of people, argument 2 their combined gender, and
michael@0 126 * argument 3 the city name):</p>
michael@0 127 *
michael@0 128 * \htmlonly
michael@0 129 * <pre>{0} {1, plural,
michael@0 130 * one {est {2, select, female {all&#x00E9;e} other {all&#x00E9;}}}
michael@0 131 * other {sont {2, select, female {all&#x00E9;es} other {all&#x00E9;s}}}
michael@0 132 * }&#x00E0; {3}.</pre>
michael@0 133 * \endhtmlonly
michael@0 134 *
michael@0 135 * <h4>Patterns and Their Interpretation</h4>
michael@0 136 *
michael@0 137 * <p>The <code>SelectFormat</code> pattern string defines the phrase output
michael@0 138 * for each user-defined keyword.
michael@0 139 * The pattern is a sequence of (keyword, message) pairs.
michael@0 140 * A keyword is a "pattern identifier": [^[[:Pattern_Syntax:][:Pattern_White_Space:]]]+</p>
michael@0 141 *
michael@0 142 * <p>Each message is a MessageFormat pattern string enclosed in {curly braces}.</p>
michael@0 143 *
michael@0 144 * <p>You always have to define a phrase for the default keyword
michael@0 145 * <code>other</code>; this phrase is returned when the keyword
michael@0 146 * provided to
michael@0 147 * the <code>format</code> method matches no other keyword.
michael@0 148 * If a pattern does not provide a phrase for <code>other</code>, the method
michael@0 149 * it's provided to returns the error <code>U_DEFAULT_KEYWORD_MISSING</code>.
michael@0 150 * <br>
michael@0 151 * Pattern_White_Space between keywords and messages is ignored.
michael@0 152 * Pattern_White_Space within a message is preserved and output.</p>
michael@0 153 *
michael@0 154 * <p><pre>Example:
michael@0 155 * \htmlonly
michael@0 156 *
michael@0 157 * UErrorCode status = U_ZERO_ERROR;
michael@0 158 * MessageFormat *msgFmt = new MessageFormat(UnicodeString("{0} est {1, select, female {all&#x00E9;e} other {all&#x00E9;}} &#x00E0; Paris."), Locale("fr"), status);
michael@0 159 * if (U_FAILURE(status)) {
michael@0 160 * return;
michael@0 161 * }
michael@0 162 * FieldPosition ignore(FieldPosition::DONT_CARE);
michael@0 163 * UnicodeString result;
michael@0 164 *
michael@0 165 * char* str1= "Kirti,female";
michael@0 166 * Formattable args1[] = {"Kirti","female"};
michael@0 167 * msgFmt->format(args1, 2, result, ignore, status);
michael@0 168 * cout << "Input is " << str1 << " and result is: " << result << endl;
michael@0 169 * delete msgFmt;
michael@0 170 *
michael@0 171 * \endhtmlonly
michael@0 172 * </pre>
michael@0 173 * </p>
michael@0 174 *
michael@0 175 * Produces the output:<br>
michael@0 176 * \htmlonly
michael@0 177 * <code>Kirti est all&#x00E9;e &#x00E0; Paris.</code>
michael@0 178 * \endhtmlonly
michael@0 179 *
michael@0 180 * @stable ICU 4.4
michael@0 181 */
michael@0 182
michael@0 183 class U_I18N_API SelectFormat : public Format {
michael@0 184 public:
michael@0 185
michael@0 186 /**
michael@0 187 * Creates a new <code>SelectFormat</code> for a given pattern string.
michael@0 188 * @param pattern the pattern for this <code>SelectFormat</code>.
michael@0 189 * errors are returned to status if the pattern is invalid.
michael@0 190 * @param status output param set to success/failure code on exit, which
michael@0 191 * must not indicate a failure before the function call.
michael@0 192 * @stable ICU 4.4
michael@0 193 */
michael@0 194 SelectFormat(const UnicodeString& pattern, UErrorCode& status);
michael@0 195
michael@0 196 /**
michael@0 197 * copy constructor.
michael@0 198 * @stable ICU 4.4
michael@0 199 */
michael@0 200 SelectFormat(const SelectFormat& other);
michael@0 201
michael@0 202 /**
michael@0 203 * Destructor.
michael@0 204 * @stable ICU 4.4
michael@0 205 */
michael@0 206 virtual ~SelectFormat();
michael@0 207
michael@0 208 /**
michael@0 209 * Sets the pattern used by this select format.
michael@0 210 * for the keyword rules.
michael@0 211 * Patterns and their interpretation are specified in the class description.
michael@0 212 *
michael@0 213 * @param pattern the pattern for this select format
michael@0 214 * errors are returned to status if the pattern is invalid.
michael@0 215 * @param status output param set to success/failure code on exit, which
michael@0 216 * must not indicate a failure before the function call.
michael@0 217 * @stable ICU 4.4
michael@0 218 */
michael@0 219 void applyPattern(const UnicodeString& pattern, UErrorCode& status);
michael@0 220
michael@0 221
michael@0 222 using Format::format;
michael@0 223
michael@0 224 /**
michael@0 225 * Selects the phrase for the given keyword
michael@0 226 *
michael@0 227 * @param keyword The keyword that is used to select an alternative.
michael@0 228 * @param appendTo output parameter to receive result.
michael@0 229 * result is appended to existing contents.
michael@0 230 * @param pos On input: an alignment field, if desired.
michael@0 231 * On output: the offsets of the alignment field.
michael@0 232 * @param status output param set to success/failure code on exit, which
michael@0 233 * must not indicate a failure before the function call.
michael@0 234 * @return Reference to 'appendTo' parameter.
michael@0 235 * @stable ICU 4.4
michael@0 236 */
michael@0 237 UnicodeString& format(const UnicodeString& keyword,
michael@0 238 UnicodeString& appendTo,
michael@0 239 FieldPosition& pos,
michael@0 240 UErrorCode& status) const;
michael@0 241
michael@0 242 /**
michael@0 243 * Assignment operator
michael@0 244 *
michael@0 245 * @param other the SelectFormat object to copy from.
michael@0 246 * @stable ICU 4.4
michael@0 247 */
michael@0 248 SelectFormat& operator=(const SelectFormat& other);
michael@0 249
michael@0 250 /**
michael@0 251 * Return true if another object is semantically equal to this one.
michael@0 252 *
michael@0 253 * @param other the SelectFormat object to be compared with.
michael@0 254 * @return true if other is semantically equal to this.
michael@0 255 * @stable ICU 4.4
michael@0 256 */
michael@0 257 virtual UBool operator==(const Format& other) const;
michael@0 258
michael@0 259 /**
michael@0 260 * Return true if another object is semantically unequal to this one.
michael@0 261 *
michael@0 262 * @param other the SelectFormat object to be compared with.
michael@0 263 * @return true if other is semantically unequal to this.
michael@0 264 * @stable ICU 4.4
michael@0 265 */
michael@0 266 virtual UBool operator!=(const Format& other) const;
michael@0 267
michael@0 268 /**
michael@0 269 * Clones this Format object polymorphically. The caller owns the
michael@0 270 * result and should delete it when done.
michael@0 271 * @stable ICU 4.4
michael@0 272 */
michael@0 273 virtual Format* clone(void) const;
michael@0 274
michael@0 275 /**
michael@0 276 * Format an object to produce a string.
michael@0 277 * This method handles keyword strings.
michael@0 278 * If the Formattable object is not a <code>UnicodeString</code>,
michael@0 279 * then it returns a failing UErrorCode.
michael@0 280 *
michael@0 281 * @param obj A keyword string that is used to select an alternative.
michael@0 282 * @param appendTo output parameter to receive result.
michael@0 283 * Result is appended to existing contents.
michael@0 284 * @param pos On input: an alignment field, if desired.
michael@0 285 * On output: the offsets of the alignment field.
michael@0 286 * @param status output param filled with success/failure status.
michael@0 287 * @return Reference to 'appendTo' parameter.
michael@0 288 * @stable ICU 4.4
michael@0 289 */
michael@0 290 UnicodeString& format(const Formattable& obj,
michael@0 291 UnicodeString& appendTo,
michael@0 292 FieldPosition& pos,
michael@0 293 UErrorCode& status) const;
michael@0 294
michael@0 295 /**
michael@0 296 * Returns the pattern from applyPattern() or constructor.
michael@0 297 *
michael@0 298 * @param appendTo output parameter to receive result.
michael@0 299 * Result is appended to existing contents.
michael@0 300 * @return the UnicodeString with inserted pattern.
michael@0 301 * @stable ICU 4.4
michael@0 302 */
michael@0 303 UnicodeString& toPattern(UnicodeString& appendTo);
michael@0 304
michael@0 305 /**
michael@0 306 * This method is not yet supported by <code>SelectFormat</code>.
michael@0 307 * <P>
michael@0 308 * Before calling, set parse_pos.index to the offset you want to start
michael@0 309 * parsing at in the source. After calling, parse_pos.index is the end of
michael@0 310 * the text you parsed. If error occurs, index is unchanged.
michael@0 311 * <P>
michael@0 312 * When parsing, leading whitespace is discarded (with a successful parse),
michael@0 313 * while trailing whitespace is left as is.
michael@0 314 * <P>
michael@0 315 * See Format::parseObject() for more.
michael@0 316 *
michael@0 317 * @param source The string to be parsed into an object.
michael@0 318 * @param result Formattable to be set to the parse result.
michael@0 319 * If parse fails, return contents are undefined.
michael@0 320 * @param parse_pos The position to start parsing at. Upon return
michael@0 321 * this param is set to the position after the
michael@0 322 * last character successfully parsed. If the
michael@0 323 * source is not parsed successfully, this param
michael@0 324 * will remain unchanged.
michael@0 325 * @stable ICU 4.4
michael@0 326 */
michael@0 327 virtual void parseObject(const UnicodeString& source,
michael@0 328 Formattable& result,
michael@0 329 ParsePosition& parse_pos) const;
michael@0 330
michael@0 331 /**
michael@0 332 * ICU "poor man's RTTI", returns a UClassID for this class.
michael@0 333 * @stable ICU 4.4
michael@0 334 */
michael@0 335 static UClassID U_EXPORT2 getStaticClassID(void);
michael@0 336
michael@0 337 /**
michael@0 338 * ICU "poor man's RTTI", returns a UClassID for the actual class.
michael@0 339 * @stable ICU 4.4
michael@0 340 */
michael@0 341 virtual UClassID getDynamicClassID() const;
michael@0 342
michael@0 343 private:
michael@0 344 friend class MessageFormat;
michael@0 345
michael@0 346 SelectFormat(); // default constructor not implemented.
michael@0 347
michael@0 348 /**
michael@0 349 * Finds the SelectFormat sub-message for the given keyword, or the "other" sub-message.
michael@0 350 * @param pattern A MessagePattern.
michael@0 351 * @param partIndex the index of the first SelectFormat argument style part.
michael@0 352 * @param keyword a keyword to be matched to one of the SelectFormat argument's keywords.
michael@0 353 * @param ec Error code.
michael@0 354 * @return the sub-message start part index.
michael@0 355 */
michael@0 356 static int32_t findSubMessage(const MessagePattern& pattern, int32_t partIndex,
michael@0 357 const UnicodeString& keyword, UErrorCode& ec);
michael@0 358
michael@0 359 MessagePattern msgPattern;
michael@0 360 };
michael@0 361
michael@0 362 U_NAMESPACE_END
michael@0 363
michael@0 364 #endif /* #if !UCONFIG_NO_FORMATTING */
michael@0 365
michael@0 366 #endif // _SELFMT
michael@0 367 //eof

mercurial