1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/i18n/unicode/rbnf.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,992 @@ 1.4 +/* 1.5 +******************************************************************************* 1.6 +* Copyright (C) 1997-2013, International Business Machines Corporation and others. 1.7 +* All Rights Reserved. 1.8 +******************************************************************************* 1.9 +*/ 1.10 + 1.11 +#ifndef RBNF_H 1.12 +#define RBNF_H 1.13 + 1.14 +#include "unicode/utypes.h" 1.15 + 1.16 +/** 1.17 + * \file 1.18 + * \brief C++ API: Rule Based Number Format 1.19 + */ 1.20 + 1.21 +/** 1.22 + * \def U_HAVE_RBNF 1.23 + * This will be 0 if RBNF support is not included in ICU 1.24 + * and 1 if it is. 1.25 + * 1.26 + * @stable ICU 2.4 1.27 + */ 1.28 +#if UCONFIG_NO_FORMATTING 1.29 +#define U_HAVE_RBNF 0 1.30 +#else 1.31 +#define U_HAVE_RBNF 1 1.32 + 1.33 +#include "unicode/coll.h" 1.34 +#include "unicode/dcfmtsym.h" 1.35 +#include "unicode/fmtable.h" 1.36 +#include "unicode/locid.h" 1.37 +#include "unicode/numfmt.h" 1.38 +#include "unicode/unistr.h" 1.39 +#include "unicode/strenum.h" 1.40 + 1.41 +U_NAMESPACE_BEGIN 1.42 + 1.43 +class NFRuleSet; 1.44 +class LocalizationInfo; 1.45 + 1.46 +/** 1.47 + * Tags for the predefined rulesets. 1.48 + * 1.49 + * @stable ICU 2.2 1.50 + */ 1.51 +enum URBNFRuleSetTag { 1.52 + URBNF_SPELLOUT, 1.53 + URBNF_ORDINAL, 1.54 + URBNF_DURATION, 1.55 + URBNF_NUMBERING_SYSTEM, 1.56 + URBNF_COUNT 1.57 +}; 1.58 + 1.59 +#if UCONFIG_NO_COLLATION 1.60 +class Collator; 1.61 +#endif 1.62 + 1.63 +/** 1.64 + * The RuleBasedNumberFormat class formats numbers according to a set of rules. This number formatter is 1.65 + * typically used for spelling out numeric values in words (e.g., 25,3476 as 1.66 + * "twenty-five thousand three hundred seventy-six" or "vingt-cinq mille trois 1.67 + * cents soixante-seize" or 1.68 + * "fünfundzwanzigtausenddreihundertsechsundsiebzig"), but can also be used for 1.69 + * other complicated formatting tasks, such as formatting a number of seconds as hours, 1.70 + * minutes and seconds (e.g., 3,730 as "1:02:10"). 1.71 + * 1.72 + * <p>The resources contain three predefined formatters for each locale: spellout, which 1.73 + * spells out a value in words (123 is "one hundred twenty-three"); ordinal, which 1.74 + * appends an ordinal suffix to the end of a numeral (123 is "123rd"); and 1.75 + * duration, which shows a duration in seconds as hours, minutes, and seconds (123 is 1.76 + * "2:03"). The client can also define more specialized <tt>RuleBasedNumberFormat</tt>s 1.77 + * by supplying programmer-defined rule sets.</p> 1.78 + * 1.79 + * <p>The behavior of a <tt>RuleBasedNumberFormat</tt> is specified by a textual description 1.80 + * that is either passed to the constructor as a <tt>String</tt> or loaded from a resource 1.81 + * bundle. In its simplest form, the description consists of a semicolon-delimited list of <em>rules.</em> 1.82 + * Each rule has a string of output text and a value or range of values it is applicable to. 1.83 + * In a typical spellout rule set, the first twenty rules are the words for the numbers from 1.84 + * 0 to 19:</p> 1.85 + * 1.86 + * <pre>zero; one; two; three; four; five; six; seven; eight; nine; 1.87 + * ten; eleven; twelve; thirteen; fourteen; fifteen; sixteen; seventeen; eighteen; nineteen;</pre> 1.88 + * 1.89 + * <p>For larger numbers, we can use the preceding set of rules to format the ones place, and 1.90 + * we only have to supply the words for the multiples of 10:</p> 1.91 + * 1.92 + * <pre> 20: twenty[->>]; 1.93 + * 30: thirty[->>]; 1.94 + * 40: forty[->>]; 1.95 + * 50: fifty[->>]; 1.96 + * 60: sixty[->>]; 1.97 + * 70: seventy[->>]; 1.98 + * 80: eighty[->>]; 1.99 + * 90: ninety[->>];</pre> 1.100 + * 1.101 + * <p>In these rules, the <em>base value</em> is spelled out explicitly and set off from the 1.102 + * rule's output text with a colon. The rules are in a sorted list, and a rule is applicable 1.103 + * to all numbers from its own base value to one less than the next rule's base value. The 1.104 + * ">>" token is called a <em>substitution</em> and tells the fomatter to 1.105 + * isolate the number's ones digit, format it using this same set of rules, and place the 1.106 + * result at the position of the ">>" token. Text in brackets is omitted if 1.107 + * the number being formatted is an even multiple of 10 (the hyphen is a literal hyphen; 24 1.108 + * is "twenty-four," not "twenty four").</p> 1.109 + * 1.110 + * <p>For even larger numbers, we can actually look up several parts of the number in the 1.111 + * list:</p> 1.112 + * 1.113 + * <pre>100: << hundred[ >>];</pre> 1.114 + * 1.115 + * <p>The "<<" represents a new kind of substitution. The << isolates 1.116 + * the hundreds digit (and any digits to its left), formats it using this same rule set, and 1.117 + * places the result where the "<<" was. Notice also that the meaning of 1.118 + * >> has changed: it now refers to both the tens and the ones digits. The meaning of 1.119 + * both substitutions depends on the rule's base value. The base value determines the rule's <em>divisor,</em> 1.120 + * which is the highest power of 10 that is less than or equal to the base value (the user 1.121 + * can change this). To fill in the substitutions, the formatter divides the number being 1.122 + * formatted by the divisor. The integral quotient is used to fill in the << 1.123 + * substitution, and the remainder is used to fill in the >> substitution. The meaning 1.124 + * of the brackets changes similarly: text in brackets is omitted if the value being 1.125 + * formatted is an even multiple of the rule's divisor. The rules are applied recursively, so 1.126 + * if a substitution is filled in with text that includes another substitution, that 1.127 + * substitution is also filled in.</p> 1.128 + * 1.129 + * <p>This rule covers values up to 999, at which point we add another rule:</p> 1.130 + * 1.131 + * <pre>1000: << thousand[ >>];</pre> 1.132 + * 1.133 + * <p>Again, the meanings of the brackets and substitution tokens shift because the rule's 1.134 + * base value is a higher power of 10, changing the rule's divisor. This rule can actually be 1.135 + * used all the way up to 999,999. This allows us to finish out the rules as follows:</p> 1.136 + * 1.137 + * <pre> 1,000,000: << million[ >>]; 1.138 + * 1,000,000,000: << billion[ >>]; 1.139 + * 1,000,000,000,000: << trillion[ >>]; 1.140 + * 1,000,000,000,000,000: OUT OF RANGE!;</pre> 1.141 + * 1.142 + * <p>Commas, periods, and spaces can be used in the base values to improve legibility and 1.143 + * are ignored by the rule parser. The last rule in the list is customarily treated as an 1.144 + * "overflow rule," applying to everything from its base value on up, and often (as 1.145 + * in this example) being used to print out an error message or default representation. 1.146 + * Notice also that the size of the major groupings in large numbers is controlled by the 1.147 + * spacing of the rules: because in English we group numbers by thousand, the higher rules 1.148 + * are separated from each other by a factor of 1,000.</p> 1.149 + * 1.150 + * <p>To see how these rules actually work in practice, consider the following example: 1.151 + * Formatting 25,430 with this rule set would work like this:</p> 1.152 + * 1.153 + * <table border="0" width="100%"> 1.154 + * <tr> 1.155 + * <td><strong><< thousand >></strong></td> 1.156 + * <td>[the rule whose base value is 1,000 is applicable to 25,340]</td> 1.157 + * </tr> 1.158 + * <tr> 1.159 + * <td><strong>twenty->></strong> thousand >></td> 1.160 + * <td>[25,340 over 1,000 is 25. The rule for 20 applies.]</td> 1.161 + * </tr> 1.162 + * <tr> 1.163 + * <td>twenty-<strong>five</strong> thousand >></td> 1.164 + * <td>[25 mod 10 is 5. The rule for 5 is "five."</td> 1.165 + * </tr> 1.166 + * <tr> 1.167 + * <td>twenty-five thousand <strong><< hundred >></strong></td> 1.168 + * <td>[25,340 mod 1,000 is 340. The rule for 100 applies.]</td> 1.169 + * </tr> 1.170 + * <tr> 1.171 + * <td>twenty-five thousand <strong>three</strong> hundred >></td> 1.172 + * <td>[340 over 100 is 3. The rule for 3 is "three."]</td> 1.173 + * </tr> 1.174 + * <tr> 1.175 + * <td>twenty-five thousand three hundred <strong>forty</strong></td> 1.176 + * <td>[340 mod 100 is 40. The rule for 40 applies. Since 40 divides 1.177 + * evenly by 10, the hyphen and substitution in the brackets are omitted.]</td> 1.178 + * </tr> 1.179 + * </table> 1.180 + * 1.181 + * <p>The above syntax suffices only to format positive integers. To format negative numbers, 1.182 + * we add a special rule:</p> 1.183 + * 1.184 + * <pre>-x: minus >>;</pre> 1.185 + * 1.186 + * <p>This is called a <em>negative-number rule,</em> and is identified by "-x" 1.187 + * where the base value would be. This rule is used to format all negative numbers. the 1.188 + * >> token here means "find the number's absolute value, format it with these 1.189 + * rules, and put the result here."</p> 1.190 + * 1.191 + * <p>We also add a special rule called a <em>fraction rule </em>for numbers with fractional 1.192 + * parts:</p> 1.193 + * 1.194 + * <pre>x.x: << point >>;</pre> 1.195 + * 1.196 + * <p>This rule is used for all positive non-integers (negative non-integers pass through the 1.197 + * negative-number rule first and then through this rule). Here, the << token refers to 1.198 + * the number's integral part, and the >> to the number's fractional part. The 1.199 + * fractional part is formatted as a series of single-digit numbers (e.g., 123.456 would be 1.200 + * formatted as "one hundred twenty-three point four five six").</p> 1.201 + * 1.202 + * <p>To see how this rule syntax is applied to various languages, examine the resource data.</p> 1.203 + * 1.204 + * <p>There is actually much more flexibility built into the rule language than the 1.205 + * description above shows. A formatter may own multiple rule sets, which can be selected by 1.206 + * the caller, and which can use each other to fill in their substitutions. Substitutions can 1.207 + * also be filled in with digits, using a DecimalFormat object. There is syntax that can be 1.208 + * used to alter a rule's divisor in various ways. And there is provision for much more 1.209 + * flexible fraction handling. A complete description of the rule syntax follows:</p> 1.210 + * 1.211 + * <hr> 1.212 + * 1.213 + * <p>The description of a <tt>RuleBasedNumberFormat</tt>'s behavior consists of one or more <em>rule 1.214 + * sets.</em> Each rule set consists of a name, a colon, and a list of <em>rules.</em> A rule 1.215 + * set name must begin with a % sign. Rule sets with names that begin with a single % sign 1.216 + * are <em>public:</em> the caller can specify that they be used to format and parse numbers. 1.217 + * Rule sets with names that begin with %% are <em>private:</em> they exist only for the use 1.218 + * of other rule sets. If a formatter only has one rule set, the name may be omitted.</p> 1.219 + * 1.220 + * <p>The user can also specify a special "rule set" named <tt>%%lenient-parse</tt>. 1.221 + * The body of <tt>%%lenient-parse</tt> isn't a set of number-formatting rules, but a <tt>RuleBasedCollator</tt> 1.222 + * description which is used to define equivalences for lenient parsing. For more information 1.223 + * on the syntax, see <tt>RuleBasedCollator</tt>. For more information on lenient parsing, 1.224 + * see <tt>setLenientParse()</tt>. <em>Note:</em> symbols that have syntactic meaning 1.225 + * in collation rules, such as '&', have no particular meaning when appearing outside 1.226 + * of the <tt>lenient-parse</tt> rule set.</p> 1.227 + * 1.228 + * <p>The body of a rule set consists of an ordered, semicolon-delimited list of <em>rules.</em> 1.229 + * Internally, every rule has a base value, a divisor, rule text, and zero, one, or two <em>substitutions.</em> 1.230 + * These parameters are controlled by the description syntax, which consists of a <em>rule 1.231 + * descriptor,</em> a colon, and a <em>rule body.</em></p> 1.232 + * 1.233 + * <p>A rule descriptor can take one of the following forms (text in <em>italics</em> is the 1.234 + * name of a token):</p> 1.235 + * 1.236 + * <table border="0" width="100%"> 1.237 + * <tr> 1.238 + * <td><em>bv</em>:</td> 1.239 + * <td><em>bv</em> specifies the rule's base value. <em>bv</em> is a decimal 1.240 + * number expressed using ASCII digits. <em>bv</em> may contain spaces, period, and commas, 1.241 + * which are ignored. The rule's divisor is the highest power of 10 less than or equal to 1.242 + * the base value.</td> 1.243 + * </tr> 1.244 + * <tr> 1.245 + * <td><em>bv</em>/<em>rad</em>:</td> 1.246 + * <td><em>bv</em> specifies the rule's base value. The rule's divisor is the 1.247 + * highest power of <em>rad</em> less than or equal to the base value.</td> 1.248 + * </tr> 1.249 + * <tr> 1.250 + * <td><em>bv</em>>:</td> 1.251 + * <td><em>bv</em> specifies the rule's base value. To calculate the divisor, 1.252 + * let the radix be 10, and the exponent be the highest exponent of the radix that yields a 1.253 + * result less than or equal to the base value. Every > character after the base value 1.254 + * decreases the exponent by 1. If the exponent is positive or 0, the divisor is the radix 1.255 + * raised to the power of the exponent; otherwise, the divisor is 1.</td> 1.256 + * </tr> 1.257 + * <tr> 1.258 + * <td><em>bv</em>/<em>rad</em>>:</td> 1.259 + * <td><em>bv</em> specifies the rule's base value. To calculate the divisor, 1.260 + * let the radix be <em>rad</em>, and the exponent be the highest exponent of the radix that 1.261 + * yields a result less than or equal to the base value. Every > character after the radix 1.262 + * decreases the exponent by 1. If the exponent is positive or 0, the divisor is the radix 1.263 + * raised to the power of the exponent; otherwise, the divisor is 1.</td> 1.264 + * </tr> 1.265 + * <tr> 1.266 + * <td>-x:</td> 1.267 + * <td>The rule is a negative-number rule.</td> 1.268 + * </tr> 1.269 + * <tr> 1.270 + * <td>x.x:</td> 1.271 + * <td>The rule is an <em>improper fraction rule.</em></td> 1.272 + * </tr> 1.273 + * <tr> 1.274 + * <td>0.x:</td> 1.275 + * <td>The rule is a <em>proper fraction rule.</em></td> 1.276 + * </tr> 1.277 + * <tr> 1.278 + * <td>x.0:</td> 1.279 + * <td>The rule is a <em>master rule.</em></td> 1.280 + * </tr> 1.281 + * <tr> 1.282 + * <td><em>nothing</em></td> 1.283 + * <td>If the rule's rule descriptor is left out, the base value is one plus the 1.284 + * preceding rule's base value (or zero if this is the first rule in the list) in a normal 1.285 + * rule set. In a fraction rule set, the base value is the same as the preceding rule's 1.286 + * base value.</td> 1.287 + * </tr> 1.288 + * </table> 1.289 + * 1.290 + * <p>A rule set may be either a regular rule set or a <em>fraction rule set,</em> depending 1.291 + * on whether it is used to format a number's integral part (or the whole number) or a 1.292 + * number's fractional part. Using a rule set to format a rule's fractional part makes it a 1.293 + * fraction rule set.</p> 1.294 + * 1.295 + * <p>Which rule is used to format a number is defined according to one of the following 1.296 + * algorithms: If the rule set is a regular rule set, do the following: 1.297 + * 1.298 + * <ul> 1.299 + * <li>If the rule set includes a master rule (and the number was passed in as a <tt>double</tt>), 1.300 + * use the master rule. (If the number being formatted was passed in as a <tt>long</tt>, 1.301 + * the master rule is ignored.)</li> 1.302 + * <li>If the number is negative, use the negative-number rule.</li> 1.303 + * <li>If the number has a fractional part and is greater than 1, use the improper fraction 1.304 + * rule.</li> 1.305 + * <li>If the number has a fractional part and is between 0 and 1, use the proper fraction 1.306 + * rule.</li> 1.307 + * <li>Binary-search the rule list for the rule with the highest base value less than or equal 1.308 + * to the number. If that rule has two substitutions, its base value is not an even multiple 1.309 + * of its divisor, and the number <em>is</em> an even multiple of the rule's divisor, use the 1.310 + * rule that precedes it in the rule list. Otherwise, use the rule itself.</li> 1.311 + * </ul> 1.312 + * 1.313 + * <p>If the rule set is a fraction rule set, do the following: 1.314 + * 1.315 + * <ul> 1.316 + * <li>Ignore negative-number and fraction rules.</li> 1.317 + * <li>For each rule in the list, multiply the number being formatted (which will always be 1.318 + * between 0 and 1) by the rule's base value. Keep track of the distance between the result 1.319 + * the nearest integer.</li> 1.320 + * <li>Use the rule that produced the result closest to zero in the above calculation. In the 1.321 + * event of a tie or a direct hit, use the first matching rule encountered. (The idea here is 1.322 + * to try each rule's base value as a possible denominator of a fraction. Whichever 1.323 + * denominator produces the fraction closest in value to the number being formatted wins.) If 1.324 + * the rule following the matching rule has the same base value, use it if the numerator of 1.325 + * the fraction is anything other than 1; if the numerator is 1, use the original matching 1.326 + * rule. (This is to allow singular and plural forms of the rule text without a lot of extra 1.327 + * hassle.)</li> 1.328 + * </ul> 1.329 + * 1.330 + * <p>A rule's body consists of a string of characters terminated by a semicolon. The rule 1.331 + * may include zero, one, or two <em>substitution tokens,</em> and a range of text in 1.332 + * brackets. The brackets denote optional text (and may also include one or both 1.333 + * substitutions). The exact meanings of the substitution tokens, and under what conditions 1.334 + * optional text is omitted, depend on the syntax of the substitution token and the context. 1.335 + * The rest of the text in a rule body is literal text that is output when the rule matches 1.336 + * the number being formatted.</p> 1.337 + * 1.338 + * <p>A substitution token begins and ends with a <em>token character.</em> The token 1.339 + * character and the context together specify a mathematical operation to be performed on the 1.340 + * number being formatted. An optional <em>substitution descriptor </em>specifies how the 1.341 + * value resulting from that operation is used to fill in the substitution. The position of 1.342 + * the substitution token in the rule body specifies the location of the resultant text in 1.343 + * the original rule text.</p> 1.344 + * 1.345 + * <p>The meanings of the substitution token characters are as follows:</p> 1.346 + * 1.347 + * <table border="0" width="100%"> 1.348 + * <tr> 1.349 + * <td>>></td> 1.350 + * <td>in normal rule</td> 1.351 + * <td>Divide the number by the rule's divisor and format the remainder</td> 1.352 + * </tr> 1.353 + * <tr> 1.354 + * <td></td> 1.355 + * <td>in negative-number rule</td> 1.356 + * <td>Find the absolute value of the number and format the result</td> 1.357 + * </tr> 1.358 + * <tr> 1.359 + * <td></td> 1.360 + * <td>in fraction or master rule</td> 1.361 + * <td>Isolate the number's fractional part and format it.</td> 1.362 + * </tr> 1.363 + * <tr> 1.364 + * <td></td> 1.365 + * <td>in rule in fraction rule set</td> 1.366 + * <td>Not allowed.</td> 1.367 + * </tr> 1.368 + * <tr> 1.369 + * <td>>>></td> 1.370 + * <td>in normal rule</td> 1.371 + * <td>Divide the number by the rule's divisor and format the remainder, 1.372 + * but bypass the normal rule-selection process and just use the 1.373 + * rule that precedes this one in this rule list.</td> 1.374 + * </tr> 1.375 + * <tr> 1.376 + * <td></td> 1.377 + * <td>in all other rules</td> 1.378 + * <td>Not allowed.</td> 1.379 + * </tr> 1.380 + * <tr> 1.381 + * <td><<</td> 1.382 + * <td>in normal rule</td> 1.383 + * <td>Divide the number by the rule's divisor and format the quotient</td> 1.384 + * </tr> 1.385 + * <tr> 1.386 + * <td></td> 1.387 + * <td>in negative-number rule</td> 1.388 + * <td>Not allowed.</td> 1.389 + * </tr> 1.390 + * <tr> 1.391 + * <td></td> 1.392 + * <td>in fraction or master rule</td> 1.393 + * <td>Isolate the number's integral part and format it.</td> 1.394 + * </tr> 1.395 + * <tr> 1.396 + * <td></td> 1.397 + * <td>in rule in fraction rule set</td> 1.398 + * <td>Multiply the number by the rule's base value and format the result.</td> 1.399 + * </tr> 1.400 + * <tr> 1.401 + * <td>==</td> 1.402 + * <td>in all rule sets</td> 1.403 + * <td>Format the number unchanged</td> 1.404 + * </tr> 1.405 + * <tr> 1.406 + * <td>[]</td> 1.407 + * <td>in normal rule</td> 1.408 + * <td>Omit the optional text if the number is an even multiple of the rule's divisor</td> 1.409 + * </tr> 1.410 + * <tr> 1.411 + * <td></td> 1.412 + * <td>in negative-number rule</td> 1.413 + * <td>Not allowed.</td> 1.414 + * </tr> 1.415 + * <tr> 1.416 + * <td></td> 1.417 + * <td>in improper-fraction rule</td> 1.418 + * <td>Omit the optional text if the number is between 0 and 1 (same as specifying both an 1.419 + * x.x rule and a 0.x rule)</td> 1.420 + * </tr> 1.421 + * <tr> 1.422 + * <td></td> 1.423 + * <td>in master rule</td> 1.424 + * <td>Omit the optional text if the number is an integer (same as specifying both an x.x 1.425 + * rule and an x.0 rule)</td> 1.426 + * </tr> 1.427 + * <tr> 1.428 + * <td></td> 1.429 + * <td>in proper-fraction rule</td> 1.430 + * <td>Not allowed.</td> 1.431 + * </tr> 1.432 + * <tr> 1.433 + * <td></td> 1.434 + * <td>in rule in fraction rule set</td> 1.435 + * <td>Omit the optional text if multiplying the number by the rule's base value yields 1.</td> 1.436 + * </tr> 1.437 + * </table> 1.438 + * 1.439 + * <p>The substitution descriptor (i.e., the text between the token characters) may take one 1.440 + * of three forms:</p> 1.441 + * 1.442 + * <table border="0" width="100%"> 1.443 + * <tr> 1.444 + * <td>a rule set name</td> 1.445 + * <td>Perform the mathematical operation on the number, and format the result using the 1.446 + * named rule set.</td> 1.447 + * </tr> 1.448 + * <tr> 1.449 + * <td>a DecimalFormat pattern</td> 1.450 + * <td>Perform the mathematical operation on the number, and format the result using a 1.451 + * DecimalFormat with the specified pattern. The pattern must begin with 0 or #.</td> 1.452 + * </tr> 1.453 + * <tr> 1.454 + * <td>nothing</td> 1.455 + * <td>Perform the mathematical operation on the number, and format the result using the rule 1.456 + * set containing the current rule, except: 1.457 + * <ul> 1.458 + * <li>You can't have an empty substitution descriptor with a == substitution.</li> 1.459 + * <li>If you omit the substitution descriptor in a >> substitution in a fraction rule, 1.460 + * format the result one digit at a time using the rule set containing the current rule.</li> 1.461 + * <li>If you omit the substitution descriptor in a << substitution in a rule in a 1.462 + * fraction rule set, format the result using the default rule set for this formatter.</li> 1.463 + * </ul> 1.464 + * </td> 1.465 + * </tr> 1.466 + * </table> 1.467 + * 1.468 + * <p>Whitespace is ignored between a rule set name and a rule set body, between a rule 1.469 + * descriptor and a rule body, or between rules. If a rule body begins with an apostrophe, 1.470 + * the apostrophe is ignored, but all text after it becomes significant (this is how you can 1.471 + * have a rule's rule text begin with whitespace). There is no escape function: the semicolon 1.472 + * is not allowed in rule set names or in rule text, and the colon is not allowed in rule set 1.473 + * names. The characters beginning a substitution token are always treated as the beginning 1.474 + * of a substitution token.</p> 1.475 + * 1.476 + * <p>See the resource data and the demo program for annotated examples of real rule sets 1.477 + * using these features.</p> 1.478 + * 1.479 + * <p><em>User subclasses are not supported.</em> While clients may write 1.480 + * subclasses, such code will not necessarily work and will not be 1.481 + * guaranteed to work stably from release to release. 1.482 + * 1.483 + * <p><b>Localizations</b></p> 1.484 + * <p>Constructors are available that allow the specification of localizations for the 1.485 + * public rule sets (and also allow more control over what public rule sets are available). 1.486 + * Localization data is represented as a textual description. The description represents 1.487 + * an array of arrays of string. The first element is an array of the public rule set names, 1.488 + * each of these must be one of the public rule set names that appear in the rules. Only 1.489 + * names in this array will be treated as public rule set names by the API. Each subsequent 1.490 + * element is an array of localizations of these names. The first element of one of these 1.491 + * subarrays is the locale name, and the remaining elements are localizations of the 1.492 + * public rule set names, in the same order as they were listed in the first arrray.</p> 1.493 + * <p>In the syntax, angle brackets '<', '>' are used to delimit the arrays, and comma ',' is used 1.494 + * to separate elements of an array. Whitespace is ignored, unless quoted.</p> 1.495 + * <p>For example:<pre> 1.496 + * < < %foo, %bar, %baz >, 1.497 + * < en, Foo, Bar, Baz >, 1.498 + * < fr, 'le Foo', 'le Bar', 'le Baz' > 1.499 + * < zh, \\u7532, \\u4e59, \\u4e19 > > 1.500 + * </pre></p> 1.501 + * @author Richard Gillam 1.502 + * @see NumberFormat 1.503 + * @see DecimalFormat 1.504 + * @stable ICU 2.0 1.505 + */ 1.506 +class U_I18N_API RuleBasedNumberFormat : public NumberFormat { 1.507 +public: 1.508 + 1.509 + //----------------------------------------------------------------------- 1.510 + // constructors 1.511 + //----------------------------------------------------------------------- 1.512 + 1.513 + /** 1.514 + * Creates a RuleBasedNumberFormat that behaves according to the description 1.515 + * passed in. The formatter uses the default locale. 1.516 + * @param rules A description of the formatter's desired behavior. 1.517 + * See the class documentation for a complete explanation of the description 1.518 + * syntax. 1.519 + * @param perror The parse error if an error was encountered. 1.520 + * @param status The status indicating whether the constructor succeeded. 1.521 + * @stable ICU 3.2 1.522 + */ 1.523 + RuleBasedNumberFormat(const UnicodeString& rules, UParseError& perror, UErrorCode& status); 1.524 + 1.525 + /** 1.526 + * Creates a RuleBasedNumberFormat that behaves according to the description 1.527 + * passed in. The formatter uses the default locale. 1.528 + * <p> 1.529 + * The localizations data provides information about the public 1.530 + * rule sets and their localized display names for different 1.531 + * locales. The first element in the list is an array of the names 1.532 + * of the public rule sets. The first element in this array is 1.533 + * the initial default ruleset. The remaining elements in the 1.534 + * list are arrays of localizations of the names of the public 1.535 + * rule sets. Each of these is one longer than the initial array, 1.536 + * with the first String being the ULocale ID, and the remaining 1.537 + * Strings being the localizations of the rule set names, in the 1.538 + * same order as the initial array. Arrays are NULL-terminated. 1.539 + * @param rules A description of the formatter's desired behavior. 1.540 + * See the class documentation for a complete explanation of the description 1.541 + * syntax. 1.542 + * @param localizations the localization information. 1.543 + * names in the description. These will be copied by the constructor. 1.544 + * @param perror The parse error if an error was encountered. 1.545 + * @param status The status indicating whether the constructor succeeded. 1.546 + * @stable ICU 3.2 1.547 + */ 1.548 + RuleBasedNumberFormat(const UnicodeString& rules, const UnicodeString& localizations, 1.549 + UParseError& perror, UErrorCode& status); 1.550 + 1.551 + /** 1.552 + * Creates a RuleBasedNumberFormat that behaves according to the rules 1.553 + * passed in. The formatter uses the specified locale to determine the 1.554 + * characters to use when formatting numerals, and to define equivalences 1.555 + * for lenient parsing. 1.556 + * @param rules The formatter rules. 1.557 + * See the class documentation for a complete explanation of the rule 1.558 + * syntax. 1.559 + * @param locale A locale that governs which characters are used for 1.560 + * formatting values in numerals and which characters are equivalent in 1.561 + * lenient parsing. 1.562 + * @param perror The parse error if an error was encountered. 1.563 + * @param status The status indicating whether the constructor succeeded. 1.564 + * @stable ICU 2.0 1.565 + */ 1.566 + RuleBasedNumberFormat(const UnicodeString& rules, const Locale& locale, 1.567 + UParseError& perror, UErrorCode& status); 1.568 + 1.569 + /** 1.570 + * Creates a RuleBasedNumberFormat that behaves according to the description 1.571 + * passed in. The formatter uses the default locale. 1.572 + * <p> 1.573 + * The localizations data provides information about the public 1.574 + * rule sets and their localized display names for different 1.575 + * locales. The first element in the list is an array of the names 1.576 + * of the public rule sets. The first element in this array is 1.577 + * the initial default ruleset. The remaining elements in the 1.578 + * list are arrays of localizations of the names of the public 1.579 + * rule sets. Each of these is one longer than the initial array, 1.580 + * with the first String being the ULocale ID, and the remaining 1.581 + * Strings being the localizations of the rule set names, in the 1.582 + * same order as the initial array. Arrays are NULL-terminated. 1.583 + * @param rules A description of the formatter's desired behavior. 1.584 + * See the class documentation for a complete explanation of the description 1.585 + * syntax. 1.586 + * @param localizations a list of localizations for the rule set 1.587 + * names in the description. These will be copied by the constructor. 1.588 + * @param locale A locale that governs which characters are used for 1.589 + * formatting values in numerals and which characters are equivalent in 1.590 + * lenient parsing. 1.591 + * @param perror The parse error if an error was encountered. 1.592 + * @param status The status indicating whether the constructor succeeded. 1.593 + * @stable ICU 3.2 1.594 + */ 1.595 + RuleBasedNumberFormat(const UnicodeString& rules, const UnicodeString& localizations, 1.596 + const Locale& locale, UParseError& perror, UErrorCode& status); 1.597 + 1.598 + /** 1.599 + * Creates a RuleBasedNumberFormat from a predefined ruleset. The selector 1.600 + * code choosed among three possible predefined formats: spellout, ordinal, 1.601 + * and duration. 1.602 + * @param tag A selector code specifying which kind of formatter to create for that 1.603 + * locale. There are four legal values: URBNF_SPELLOUT, which creates a formatter that 1.604 + * spells out a value in words in the desired language, URBNF_ORDINAL, which attaches 1.605 + * an ordinal suffix from the desired language to the end of a number (e.g. "123rd"), 1.606 + * URBNF_DURATION, which formats a duration in seconds as hours, minutes, and seconds, 1.607 + * and URBNF_NUMBERING_SYSTEM, which is used to invoke rules for alternate numbering 1.608 + * systems such as the Hebrew numbering system, or for Roman Numerals, etc. 1.609 + * @param locale The locale for the formatter. 1.610 + * @param status The status indicating whether the constructor succeeded. 1.611 + * @stable ICU 2.0 1.612 + */ 1.613 + RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& locale, UErrorCode& status); 1.614 + 1.615 + //----------------------------------------------------------------------- 1.616 + // boilerplate 1.617 + //----------------------------------------------------------------------- 1.618 + 1.619 + /** 1.620 + * Copy constructor 1.621 + * @param rhs the object to be copied from. 1.622 + * @stable ICU 2.6 1.623 + */ 1.624 + RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs); 1.625 + 1.626 + /** 1.627 + * Assignment operator 1.628 + * @param rhs the object to be copied from. 1.629 + * @stable ICU 2.6 1.630 + */ 1.631 + RuleBasedNumberFormat& operator=(const RuleBasedNumberFormat& rhs); 1.632 + 1.633 + /** 1.634 + * Release memory allocated for a RuleBasedNumberFormat when you are finished with it. 1.635 + * @stable ICU 2.6 1.636 + */ 1.637 + virtual ~RuleBasedNumberFormat(); 1.638 + 1.639 + /** 1.640 + * Clone this object polymorphically. The caller is responsible 1.641 + * for deleting the result when done. 1.642 + * @return A copy of the object. 1.643 + * @stable ICU 2.6 1.644 + */ 1.645 + virtual Format* clone(void) const; 1.646 + 1.647 + /** 1.648 + * Return true if the given Format objects are semantically equal. 1.649 + * Objects of different subclasses are considered unequal. 1.650 + * @param other the object to be compared with. 1.651 + * @return true if the given Format objects are semantically equal. 1.652 + * @stable ICU 2.6 1.653 + */ 1.654 + virtual UBool operator==(const Format& other) const; 1.655 + 1.656 +//----------------------------------------------------------------------- 1.657 +// public API functions 1.658 +//----------------------------------------------------------------------- 1.659 + 1.660 + /** 1.661 + * return the rules that were provided to the RuleBasedNumberFormat. 1.662 + * @return the result String that was passed in 1.663 + * @stable ICU 2.0 1.664 + */ 1.665 + virtual UnicodeString getRules() const; 1.666 + 1.667 + /** 1.668 + * Return the number of public rule set names. 1.669 + * @return the number of public rule set names. 1.670 + * @stable ICU 2.0 1.671 + */ 1.672 + virtual int32_t getNumberOfRuleSetNames() const; 1.673 + 1.674 + /** 1.675 + * Return the name of the index'th public ruleSet. If index is not valid, 1.676 + * the function returns null. 1.677 + * @param index the index of the ruleset 1.678 + * @return the name of the index'th public ruleSet. 1.679 + * @stable ICU 2.0 1.680 + */ 1.681 + virtual UnicodeString getRuleSetName(int32_t index) const; 1.682 + 1.683 + /** 1.684 + * Return the number of locales for which we have localized rule set display names. 1.685 + * @return the number of locales for which we have localized rule set display names. 1.686 + * @stable ICU 3.2 1.687 + */ 1.688 + virtual int32_t getNumberOfRuleSetDisplayNameLocales(void) const; 1.689 + 1.690 + /** 1.691 + * Return the index'th display name locale. 1.692 + * @param index the index of the locale 1.693 + * @param status set to a failure code when this function fails 1.694 + * @return the locale 1.695 + * @see #getNumberOfRuleSetDisplayNameLocales 1.696 + * @stable ICU 3.2 1.697 + */ 1.698 + virtual Locale getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const; 1.699 + 1.700 + /** 1.701 + * Return the rule set display names for the provided locale. These are in the same order 1.702 + * as those returned by getRuleSetName. The locale is matched against the locales for 1.703 + * which there is display name data, using normal fallback rules. If no locale matches, 1.704 + * the default display names are returned. (These are the internal rule set names minus 1.705 + * the leading '%'.) 1.706 + * @param index the index of the rule set 1.707 + * @param locale the locale (returned by getRuleSetDisplayNameLocales) for which the localized 1.708 + * display name is desired 1.709 + * @return the display name for the given index, which might be bogus if there is an error 1.710 + * @see #getRuleSetName 1.711 + * @stable ICU 3.2 1.712 + */ 1.713 + virtual UnicodeString getRuleSetDisplayName(int32_t index, 1.714 + const Locale& locale = Locale::getDefault()); 1.715 + 1.716 + /** 1.717 + * Return the rule set display name for the provided rule set and locale. 1.718 + * The locale is matched against the locales for which there is display name data, using 1.719 + * normal fallback rules. If no locale matches, the default display name is returned. 1.720 + * @return the display name for the rule set 1.721 + * @stable ICU 3.2 1.722 + * @see #getRuleSetDisplayName 1.723 + */ 1.724 + virtual UnicodeString getRuleSetDisplayName(const UnicodeString& ruleSetName, 1.725 + const Locale& locale = Locale::getDefault()); 1.726 + 1.727 + 1.728 + using NumberFormat::format; 1.729 + 1.730 + /** 1.731 + * Formats the specified 32-bit number using the default ruleset. 1.732 + * @param number The number to format. 1.733 + * @param toAppendTo the string that will hold the (appended) result 1.734 + * @param pos the fieldposition 1.735 + * @return A textual representation of the number. 1.736 + * @stable ICU 2.0 1.737 + */ 1.738 + virtual UnicodeString& format(int32_t number, 1.739 + UnicodeString& toAppendTo, 1.740 + FieldPosition& pos) const; 1.741 + 1.742 + /** 1.743 + * Formats the specified 64-bit number using the default ruleset. 1.744 + * @param number The number to format. 1.745 + * @param toAppendTo the string that will hold the (appended) result 1.746 + * @param pos the fieldposition 1.747 + * @return A textual representation of the number. 1.748 + * @stable ICU 2.1 1.749 + */ 1.750 + virtual UnicodeString& format(int64_t number, 1.751 + UnicodeString& toAppendTo, 1.752 + FieldPosition& pos) const; 1.753 + /** 1.754 + * Formats the specified number using the default ruleset. 1.755 + * @param number The number to format. 1.756 + * @param toAppendTo the string that will hold the (appended) result 1.757 + * @param pos the fieldposition 1.758 + * @return A textual representation of the number. 1.759 + * @stable ICU 2.0 1.760 + */ 1.761 + virtual UnicodeString& format(double number, 1.762 + UnicodeString& toAppendTo, 1.763 + FieldPosition& pos) const; 1.764 + 1.765 + /** 1.766 + * Formats the specified number using the named ruleset. 1.767 + * @param number The number to format. 1.768 + * @param ruleSetName The name of the rule set to format the number with. 1.769 + * This must be the name of a valid public rule set for this formatter. 1.770 + * @param toAppendTo the string that will hold the (appended) result 1.771 + * @param pos the fieldposition 1.772 + * @param status the status 1.773 + * @return A textual representation of the number. 1.774 + * @stable ICU 2.0 1.775 + */ 1.776 + virtual UnicodeString& format(int32_t number, 1.777 + const UnicodeString& ruleSetName, 1.778 + UnicodeString& toAppendTo, 1.779 + FieldPosition& pos, 1.780 + UErrorCode& status) const; 1.781 + /** 1.782 + * Formats the specified 64-bit number using the named ruleset. 1.783 + * @param number The number to format. 1.784 + * @param ruleSetName The name of the rule set to format the number with. 1.785 + * This must be the name of a valid public rule set for this formatter. 1.786 + * @param toAppendTo the string that will hold the (appended) result 1.787 + * @param pos the fieldposition 1.788 + * @param status the status 1.789 + * @return A textual representation of the number. 1.790 + * @stable ICU 2.1 1.791 + */ 1.792 + virtual UnicodeString& format(int64_t number, 1.793 + const UnicodeString& ruleSetName, 1.794 + UnicodeString& toAppendTo, 1.795 + FieldPosition& pos, 1.796 + UErrorCode& status) const; 1.797 + /** 1.798 + * Formats the specified number using the named ruleset. 1.799 + * @param number The number to format. 1.800 + * @param ruleSetName The name of the rule set to format the number with. 1.801 + * This must be the name of a valid public rule set for this formatter. 1.802 + * @param toAppendTo the string that will hold the (appended) result 1.803 + * @param pos the fieldposition 1.804 + * @param status the status 1.805 + * @return A textual representation of the number. 1.806 + * @stable ICU 2.0 1.807 + */ 1.808 + virtual UnicodeString& format(double number, 1.809 + const UnicodeString& ruleSetName, 1.810 + UnicodeString& toAppendTo, 1.811 + FieldPosition& pos, 1.812 + UErrorCode& status) const; 1.813 + 1.814 + using NumberFormat::parse; 1.815 + 1.816 + /** 1.817 + * Parses the specfied string, beginning at the specified position, according 1.818 + * to this formatter's rules. This will match the string against all of the 1.819 + * formatter's public rule sets and return the value corresponding to the longest 1.820 + * parseable substring. This function's behavior is affected by the lenient 1.821 + * parse mode. 1.822 + * @param text The string to parse 1.823 + * @param result the result of the parse, either a double or a long. 1.824 + * @param parsePosition On entry, contains the position of the first character 1.825 + * in "text" to examine. On exit, has been updated to contain the position 1.826 + * of the first character in "text" that wasn't consumed by the parse. 1.827 + * @see #setLenient 1.828 + * @stable ICU 2.0 1.829 + */ 1.830 + virtual void parse(const UnicodeString& text, 1.831 + Formattable& result, 1.832 + ParsePosition& parsePosition) const; 1.833 + 1.834 +#if !UCONFIG_NO_COLLATION 1.835 + 1.836 + /** 1.837 + * Turns lenient parse mode on and off. 1.838 + * 1.839 + * When in lenient parse mode, the formatter uses a Collator for parsing the text. 1.840 + * Only primary differences are treated as significant. This means that case 1.841 + * differences, accent differences, alternate spellings of the same letter 1.842 + * (e.g., ae and a-umlaut in German), ignorable characters, etc. are ignored in 1.843 + * matching the text. In many cases, numerals will be accepted in place of words 1.844 + * or phrases as well. 1.845 + * 1.846 + * For example, all of the following will correctly parse as 255 in English in 1.847 + * lenient-parse mode: 1.848 + * <br>"two hundred fifty-five" 1.849 + * <br>"two hundred fifty five" 1.850 + * <br>"TWO HUNDRED FIFTY-FIVE" 1.851 + * <br>"twohundredfiftyfive" 1.852 + * <br>"2 hundred fifty-5" 1.853 + * 1.854 + * The Collator used is determined by the locale that was 1.855 + * passed to this object on construction. The description passed to this object 1.856 + * on construction may supply additional collation rules that are appended to the 1.857 + * end of the default collator for the locale, enabling additional equivalences 1.858 + * (such as adding more ignorable characters or permitting spelled-out version of 1.859 + * symbols; see the demo program for examples). 1.860 + * 1.861 + * It's important to emphasize that even strict parsing is relatively lenient: it 1.862 + * will accept some text that it won't produce as output. In English, for example, 1.863 + * it will correctly parse "two hundred zero" and "fifteen hundred". 1.864 + * 1.865 + * @param enabled If true, turns lenient-parse mode on; if false, turns it off. 1.866 + * @see RuleBasedCollator 1.867 + * @stable ICU 2.0 1.868 + */ 1.869 + virtual void setLenient(UBool enabled); 1.870 + 1.871 + /** 1.872 + * Returns true if lenient-parse mode is turned on. Lenient parsing is off 1.873 + * by default. 1.874 + * @return true if lenient-parse mode is turned on. 1.875 + * @see #setLenient 1.876 + * @stable ICU 2.0 1.877 + */ 1.878 + virtual inline UBool isLenient(void) const; 1.879 + 1.880 +#endif 1.881 + 1.882 + /** 1.883 + * Override the default rule set to use. If ruleSetName is null, reset 1.884 + * to the initial default rule set. If the rule set is not a public rule set name, 1.885 + * U_ILLEGAL_ARGUMENT_ERROR is returned in status. 1.886 + * @param ruleSetName the name of the rule set, or null to reset the initial default. 1.887 + * @param status set to failure code when a problem occurs. 1.888 + * @stable ICU 2.6 1.889 + */ 1.890 + virtual void setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status); 1.891 + 1.892 + /** 1.893 + * Return the name of the current default rule set. If the current rule set is 1.894 + * not public, returns a bogus (and empty) UnicodeString. 1.895 + * @return the name of the current default rule set 1.896 + * @stable ICU 3.0 1.897 + */ 1.898 + virtual UnicodeString getDefaultRuleSetName() const; 1.899 + 1.900 +public: 1.901 + /** 1.902 + * ICU "poor man's RTTI", returns a UClassID for this class. 1.903 + * 1.904 + * @stable ICU 2.8 1.905 + */ 1.906 + static UClassID U_EXPORT2 getStaticClassID(void); 1.907 + 1.908 + /** 1.909 + * ICU "poor man's RTTI", returns a UClassID for the actual class. 1.910 + * 1.911 + * @stable ICU 2.8 1.912 + */ 1.913 + virtual UClassID getDynamicClassID(void) const; 1.914 + 1.915 + /** 1.916 + * Sets the decimal format symbols, which is generally not changed 1.917 + * by the programmer or user. The formatter takes ownership of 1.918 + * symbolsToAdopt; the client must not delete it. 1.919 + * 1.920 + * @param symbolsToAdopt DecimalFormatSymbols to be adopted. 1.921 + * @stable ICU 49 1.922 + */ 1.923 + virtual void adoptDecimalFormatSymbols(DecimalFormatSymbols* symbolsToAdopt); 1.924 + 1.925 + /** 1.926 + * Sets the decimal format symbols, which is generally not changed 1.927 + * by the programmer or user. A clone of the symbols is created and 1.928 + * the symbols is _not_ adopted; the client is still responsible for 1.929 + * deleting it. 1.930 + * 1.931 + * @param symbols DecimalFormatSymbols. 1.932 + * @stable ICU 49 1.933 + */ 1.934 + virtual void setDecimalFormatSymbols(const DecimalFormatSymbols& symbols); 1.935 + 1.936 +private: 1.937 + RuleBasedNumberFormat(); // default constructor not implemented 1.938 + 1.939 + // this will ref the localizations if they are not NULL 1.940 + // caller must deref to get adoption 1.941 + RuleBasedNumberFormat(const UnicodeString& description, LocalizationInfo* localizations, 1.942 + const Locale& locale, UParseError& perror, UErrorCode& status); 1.943 + 1.944 + void init(const UnicodeString& rules, LocalizationInfo* localizations, UParseError& perror, UErrorCode& status); 1.945 + void dispose(); 1.946 + void stripWhitespace(UnicodeString& src); 1.947 + void initDefaultRuleSet(); 1.948 + void format(double number, NFRuleSet& ruleSet); 1.949 + NFRuleSet* findRuleSet(const UnicodeString& name, UErrorCode& status) const; 1.950 + 1.951 + /* friend access */ 1.952 + friend class NFSubstitution; 1.953 + friend class NFRule; 1.954 + friend class FractionalPartSubstitution; 1.955 + 1.956 + inline NFRuleSet * getDefaultRuleSet() const; 1.957 + Collator * getCollator() const; 1.958 + DecimalFormatSymbols * getDecimalFormatSymbols() const; 1.959 + 1.960 +private: 1.961 + NFRuleSet **ruleSets; 1.962 + UnicodeString* ruleSetDescriptions; 1.963 + int32_t numRuleSets; 1.964 + NFRuleSet *defaultRuleSet; 1.965 + Locale locale; 1.966 + Collator* collator; 1.967 + DecimalFormatSymbols* decimalFormatSymbols; 1.968 + UBool lenient; 1.969 + UnicodeString* lenientParseRules; 1.970 + LocalizationInfo* localizations; 1.971 +}; 1.972 + 1.973 +// --------------- 1.974 + 1.975 +#if !UCONFIG_NO_COLLATION 1.976 + 1.977 +inline UBool 1.978 +RuleBasedNumberFormat::isLenient(void) const { 1.979 + return lenient; 1.980 +} 1.981 + 1.982 +#endif 1.983 + 1.984 +inline NFRuleSet* 1.985 +RuleBasedNumberFormat::getDefaultRuleSet() const { 1.986 + return defaultRuleSet; 1.987 +} 1.988 + 1.989 +U_NAMESPACE_END 1.990 + 1.991 +/* U_HAVE_RBNF */ 1.992 +#endif 1.993 + 1.994 +/* RBNF_H */ 1.995 +#endif