|
1 /* |
|
2 ******************************************************************************* |
|
3 * Copyright (C) 2011-2013, International Business Machines |
|
4 * Corporation and others. All Rights Reserved. |
|
5 ******************************************************************************* |
|
6 * file name: messagepattern.h |
|
7 * encoding: US-ASCII |
|
8 * tab size: 8 (not used) |
|
9 * indentation:4 |
|
10 * |
|
11 * created on: 2011mar14 |
|
12 * created by: Markus W. Scherer |
|
13 */ |
|
14 |
|
15 #ifndef __MESSAGEPATTERN_H__ |
|
16 #define __MESSAGEPATTERN_H__ |
|
17 |
|
18 /** |
|
19 * \file |
|
20 * \brief C++ API: MessagePattern class: Parses and represents ICU MessageFormat patterns. |
|
21 */ |
|
22 |
|
23 #include "unicode/utypes.h" |
|
24 |
|
25 #if !UCONFIG_NO_FORMATTING |
|
26 |
|
27 #include "unicode/parseerr.h" |
|
28 #include "unicode/unistr.h" |
|
29 |
|
30 /** |
|
31 * Mode for when an apostrophe starts quoted literal text for MessageFormat output. |
|
32 * The default is DOUBLE_OPTIONAL unless overridden via uconfig.h |
|
33 * (UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE). |
|
34 * <p> |
|
35 * A pair of adjacent apostrophes always results in a single apostrophe in the output, |
|
36 * even when the pair is between two single, text-quoting apostrophes. |
|
37 * <p> |
|
38 * The following table shows examples of desired MessageFormat.format() output |
|
39 * with the pattern strings that yield that output. |
|
40 * <p> |
|
41 * <table> |
|
42 * <tr> |
|
43 * <th>Desired output</th> |
|
44 * <th>DOUBLE_OPTIONAL</th> |
|
45 * <th>DOUBLE_REQUIRED</th> |
|
46 * </tr> |
|
47 * <tr> |
|
48 * <td>I see {many}</td> |
|
49 * <td>I see '{many}'</td> |
|
50 * <td>(same)</td> |
|
51 * </tr> |
|
52 * <tr> |
|
53 * <td>I said {'Wow!'}</td> |
|
54 * <td>I said '{''Wow!''}'</td> |
|
55 * <td>(same)</td> |
|
56 * </tr> |
|
57 * <tr> |
|
58 * <td>I don't know</td> |
|
59 * <td>I don't know OR<br> I don''t know</td> |
|
60 * <td>I don''t know</td> |
|
61 * </tr> |
|
62 * </table> |
|
63 * @stable ICU 4.8 |
|
64 * @see UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE |
|
65 */ |
|
66 enum UMessagePatternApostropheMode { |
|
67 /** |
|
68 * A literal apostrophe is represented by |
|
69 * either a single or a double apostrophe pattern character. |
|
70 * Within a MessageFormat pattern, a single apostrophe only starts quoted literal text |
|
71 * if it immediately precedes a curly brace {}, |
|
72 * or a pipe symbol | if inside a choice format, |
|
73 * or a pound symbol # if inside a plural format. |
|
74 * <p> |
|
75 * This is the default behavior starting with ICU 4.8. |
|
76 * @stable ICU 4.8 |
|
77 */ |
|
78 UMSGPAT_APOS_DOUBLE_OPTIONAL, |
|
79 /** |
|
80 * A literal apostrophe must be represented by |
|
81 * a double apostrophe pattern character. |
|
82 * A single apostrophe always starts quoted literal text. |
|
83 * <p> |
|
84 * This is the behavior of ICU 4.6 and earlier, and of the JDK. |
|
85 * @stable ICU 4.8 |
|
86 */ |
|
87 UMSGPAT_APOS_DOUBLE_REQUIRED |
|
88 }; |
|
89 /** |
|
90 * @stable ICU 4.8 |
|
91 */ |
|
92 typedef enum UMessagePatternApostropheMode UMessagePatternApostropheMode; |
|
93 |
|
94 /** |
|
95 * MessagePattern::Part type constants. |
|
96 * @stable ICU 4.8 |
|
97 */ |
|
98 enum UMessagePatternPartType { |
|
99 /** |
|
100 * Start of a message pattern (main or nested). |
|
101 * The length is 0 for the top-level message |
|
102 * and for a choice argument sub-message, otherwise 1 for the '{'. |
|
103 * The value indicates the nesting level, starting with 0 for the main message. |
|
104 * <p> |
|
105 * There is always a later MSG_LIMIT part. |
|
106 * @stable ICU 4.8 |
|
107 */ |
|
108 UMSGPAT_PART_TYPE_MSG_START, |
|
109 /** |
|
110 * End of a message pattern (main or nested). |
|
111 * The length is 0 for the top-level message and |
|
112 * the last sub-message of a choice argument, |
|
113 * otherwise 1 for the '}' or (in a choice argument style) the '|'. |
|
114 * The value indicates the nesting level, starting with 0 for the main message. |
|
115 * @stable ICU 4.8 |
|
116 */ |
|
117 UMSGPAT_PART_TYPE_MSG_LIMIT, |
|
118 /** |
|
119 * Indicates a substring of the pattern string which is to be skipped when formatting. |
|
120 * For example, an apostrophe that begins or ends quoted text |
|
121 * would be indicated with such a part. |
|
122 * The value is undefined and currently always 0. |
|
123 * @stable ICU 4.8 |
|
124 */ |
|
125 UMSGPAT_PART_TYPE_SKIP_SYNTAX, |
|
126 /** |
|
127 * Indicates that a syntax character needs to be inserted for auto-quoting. |
|
128 * The length is 0. |
|
129 * The value is the character code of the insertion character. (U+0027=APOSTROPHE) |
|
130 * @stable ICU 4.8 |
|
131 */ |
|
132 UMSGPAT_PART_TYPE_INSERT_CHAR, |
|
133 /** |
|
134 * Indicates a syntactic (non-escaped) # symbol in a plural variant. |
|
135 * When formatting, replace this part's substring with the |
|
136 * (value-offset) for the plural argument value. |
|
137 * The value is undefined and currently always 0. |
|
138 * @stable ICU 4.8 |
|
139 */ |
|
140 UMSGPAT_PART_TYPE_REPLACE_NUMBER, |
|
141 /** |
|
142 * Start of an argument. |
|
143 * The length is 1 for the '{'. |
|
144 * The value is the ordinal value of the ArgType. Use getArgType(). |
|
145 * <p> |
|
146 * This part is followed by either an ARG_NUMBER or ARG_NAME, |
|
147 * followed by optional argument sub-parts (see UMessagePatternArgType constants) |
|
148 * and finally an ARG_LIMIT part. |
|
149 * @stable ICU 4.8 |
|
150 */ |
|
151 UMSGPAT_PART_TYPE_ARG_START, |
|
152 /** |
|
153 * End of an argument. |
|
154 * The length is 1 for the '}'. |
|
155 * The value is the ordinal value of the ArgType. Use getArgType(). |
|
156 * @stable ICU 4.8 |
|
157 */ |
|
158 UMSGPAT_PART_TYPE_ARG_LIMIT, |
|
159 /** |
|
160 * The argument number, provided by the value. |
|
161 * @stable ICU 4.8 |
|
162 */ |
|
163 UMSGPAT_PART_TYPE_ARG_NUMBER, |
|
164 /** |
|
165 * The argument name. |
|
166 * The value is undefined and currently always 0. |
|
167 * @stable ICU 4.8 |
|
168 */ |
|
169 UMSGPAT_PART_TYPE_ARG_NAME, |
|
170 /** |
|
171 * The argument type. |
|
172 * The value is undefined and currently always 0. |
|
173 * @stable ICU 4.8 |
|
174 */ |
|
175 UMSGPAT_PART_TYPE_ARG_TYPE, |
|
176 /** |
|
177 * The argument style text. |
|
178 * The value is undefined and currently always 0. |
|
179 * @stable ICU 4.8 |
|
180 */ |
|
181 UMSGPAT_PART_TYPE_ARG_STYLE, |
|
182 /** |
|
183 * A selector substring in a "complex" argument style. |
|
184 * The value is undefined and currently always 0. |
|
185 * @stable ICU 4.8 |
|
186 */ |
|
187 UMSGPAT_PART_TYPE_ARG_SELECTOR, |
|
188 /** |
|
189 * An integer value, for example the offset or an explicit selector value |
|
190 * in a PluralFormat style. |
|
191 * The part value is the integer value. |
|
192 * @stable ICU 4.8 |
|
193 */ |
|
194 UMSGPAT_PART_TYPE_ARG_INT, |
|
195 /** |
|
196 * A numeric value, for example the offset or an explicit selector value |
|
197 * in a PluralFormat style. |
|
198 * The part value is an index into an internal array of numeric values; |
|
199 * use getNumericValue(). |
|
200 * @stable ICU 4.8 |
|
201 */ |
|
202 UMSGPAT_PART_TYPE_ARG_DOUBLE |
|
203 }; |
|
204 /** |
|
205 * @stable ICU 4.8 |
|
206 */ |
|
207 typedef enum UMessagePatternPartType UMessagePatternPartType; |
|
208 |
|
209 /** |
|
210 * Argument type constants. |
|
211 * Returned by Part.getArgType() for ARG_START and ARG_LIMIT parts. |
|
212 * |
|
213 * Messages nested inside an argument are each delimited by MSG_START and MSG_LIMIT, |
|
214 * with a nesting level one greater than the surrounding message. |
|
215 * @stable ICU 4.8 |
|
216 */ |
|
217 enum UMessagePatternArgType { |
|
218 /** |
|
219 * The argument has no specified type. |
|
220 * @stable ICU 4.8 |
|
221 */ |
|
222 UMSGPAT_ARG_TYPE_NONE, |
|
223 /** |
|
224 * The argument has a "simple" type which is provided by the ARG_TYPE part. |
|
225 * An ARG_STYLE part might follow that. |
|
226 * @stable ICU 4.8 |
|
227 */ |
|
228 UMSGPAT_ARG_TYPE_SIMPLE, |
|
229 /** |
|
230 * The argument is a ChoiceFormat with one or more |
|
231 * ((ARG_INT | ARG_DOUBLE), ARG_SELECTOR, message) tuples. |
|
232 * @stable ICU 4.8 |
|
233 */ |
|
234 UMSGPAT_ARG_TYPE_CHOICE, |
|
235 /** |
|
236 * The argument is a cardinal-number PluralFormat with an optional ARG_INT or ARG_DOUBLE offset |
|
237 * (e.g., offset:1) |
|
238 * and one or more (ARG_SELECTOR [explicit-value] message) tuples. |
|
239 * If the selector has an explicit value (e.g., =2), then |
|
240 * that value is provided by the ARG_INT or ARG_DOUBLE part preceding the message. |
|
241 * Otherwise the message immediately follows the ARG_SELECTOR. |
|
242 * @stable ICU 4.8 |
|
243 */ |
|
244 UMSGPAT_ARG_TYPE_PLURAL, |
|
245 /** |
|
246 * The argument is a SelectFormat with one or more (ARG_SELECTOR, message) pairs. |
|
247 * @stable ICU 4.8 |
|
248 */ |
|
249 UMSGPAT_ARG_TYPE_SELECT, |
|
250 /** |
|
251 * The argument is an ordinal-number PluralFormat |
|
252 * with the same style parts sequence and semantics as UMSGPAT_ARG_TYPE_PLURAL. |
|
253 * @stable ICU 50 |
|
254 */ |
|
255 UMSGPAT_ARG_TYPE_SELECTORDINAL |
|
256 }; |
|
257 /** |
|
258 * @stable ICU 4.8 |
|
259 */ |
|
260 typedef enum UMessagePatternArgType UMessagePatternArgType; |
|
261 |
|
262 /** |
|
263 * \def UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE |
|
264 * Returns TRUE if the argument type has a plural style part sequence and semantics, |
|
265 * for example UMSGPAT_ARG_TYPE_PLURAL and UMSGPAT_ARG_TYPE_SELECTORDINAL. |
|
266 * @stable ICU 50 |
|
267 */ |
|
268 #define UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) \ |
|
269 ((argType)==UMSGPAT_ARG_TYPE_PLURAL || (argType)==UMSGPAT_ARG_TYPE_SELECTORDINAL) |
|
270 |
|
271 enum { |
|
272 /** |
|
273 * Return value from MessagePattern.validateArgumentName() for when |
|
274 * the string is a valid "pattern identifier" but not a number. |
|
275 * @stable ICU 4.8 |
|
276 */ |
|
277 UMSGPAT_ARG_NAME_NOT_NUMBER=-1, |
|
278 |
|
279 /** |
|
280 * Return value from MessagePattern.validateArgumentName() for when |
|
281 * the string is invalid. |
|
282 * It might not be a valid "pattern identifier", |
|
283 * or it have only ASCII digits but there is a leading zero or the number is too large. |
|
284 * @stable ICU 4.8 |
|
285 */ |
|
286 UMSGPAT_ARG_NAME_NOT_VALID=-2 |
|
287 }; |
|
288 |
|
289 /** |
|
290 * Special value that is returned by getNumericValue(Part) when no |
|
291 * numeric value is defined for a part. |
|
292 * @see MessagePattern.getNumericValue() |
|
293 * @stable ICU 4.8 |
|
294 */ |
|
295 #define UMSGPAT_NO_NUMERIC_VALUE ((double)(-123456789)) |
|
296 |
|
297 U_NAMESPACE_BEGIN |
|
298 |
|
299 class MessagePatternDoubleList; |
|
300 class MessagePatternPartsList; |
|
301 |
|
302 /** |
|
303 * Parses and represents ICU MessageFormat patterns. |
|
304 * Also handles patterns for ChoiceFormat, PluralFormat and SelectFormat. |
|
305 * Used in the implementations of those classes as well as in tools |
|
306 * for message validation, translation and format conversion. |
|
307 * <p> |
|
308 * The parser handles all syntax relevant for identifying message arguments. |
|
309 * This includes "complex" arguments whose style strings contain |
|
310 * nested MessageFormat pattern substrings. |
|
311 * For "simple" arguments (with no nested MessageFormat pattern substrings), |
|
312 * the argument style is not parsed any further. |
|
313 * <p> |
|
314 * The parser handles named and numbered message arguments and allows both in one message. |
|
315 * <p> |
|
316 * Once a pattern has been parsed successfully, iterate through the parsed data |
|
317 * with countParts(), getPart() and related methods. |
|
318 * <p> |
|
319 * The data logically represents a parse tree, but is stored and accessed |
|
320 * as a list of "parts" for fast and simple parsing and to minimize object allocations. |
|
321 * Arguments and nested messages are best handled via recursion. |
|
322 * For every _START "part", MessagePattern.getLimitPartIndex() efficiently returns |
|
323 * the index of the corresponding _LIMIT "part". |
|
324 * <p> |
|
325 * List of "parts": |
|
326 * <pre> |
|
327 * message = MSG_START (SKIP_SYNTAX | INSERT_CHAR | REPLACE_NUMBER | argument)* MSG_LIMIT |
|
328 * argument = noneArg | simpleArg | complexArg |
|
329 * complexArg = choiceArg | pluralArg | selectArg |
|
330 * |
|
331 * noneArg = ARG_START.NONE (ARG_NAME | ARG_NUMBER) ARG_LIMIT.NONE |
|
332 * simpleArg = ARG_START.SIMPLE (ARG_NAME | ARG_NUMBER) ARG_TYPE [ARG_STYLE] ARG_LIMIT.SIMPLE |
|
333 * choiceArg = ARG_START.CHOICE (ARG_NAME | ARG_NUMBER) choiceStyle ARG_LIMIT.CHOICE |
|
334 * pluralArg = ARG_START.PLURAL (ARG_NAME | ARG_NUMBER) pluralStyle ARG_LIMIT.PLURAL |
|
335 * selectArg = ARG_START.SELECT (ARG_NAME | ARG_NUMBER) selectStyle ARG_LIMIT.SELECT |
|
336 * |
|
337 * choiceStyle = ((ARG_INT | ARG_DOUBLE) ARG_SELECTOR message)+ |
|
338 * pluralStyle = [ARG_INT | ARG_DOUBLE] (ARG_SELECTOR [ARG_INT | ARG_DOUBLE] message)+ |
|
339 * selectStyle = (ARG_SELECTOR message)+ |
|
340 * </pre> |
|
341 * <ul> |
|
342 * <li>Literal output text is not represented directly by "parts" but accessed |
|
343 * between parts of a message, from one part's getLimit() to the next part's getIndex(). |
|
344 * <li><code>ARG_START.CHOICE</code> stands for an ARG_START Part with ArgType CHOICE. |
|
345 * <li>In the choiceStyle, the ARG_SELECTOR has the '<', the '#' or |
|
346 * the less-than-or-equal-to sign (U+2264). |
|
347 * <li>In the pluralStyle, the first, optional numeric Part has the "offset:" value. |
|
348 * The optional numeric Part between each (ARG_SELECTOR, message) pair |
|
349 * is the value of an explicit-number selector like "=2", |
|
350 * otherwise the selector is a non-numeric identifier. |
|
351 * <li>The REPLACE_NUMBER Part can occur only in an immediate sub-message of the pluralStyle. |
|
352 * </ul> |
|
353 * <p> |
|
354 * This class is not intended for public subclassing. |
|
355 * |
|
356 * @stable ICU 4.8 |
|
357 */ |
|
358 class U_COMMON_API MessagePattern : public UObject { |
|
359 public: |
|
360 /** |
|
361 * Constructs an empty MessagePattern with default UMessagePatternApostropheMode. |
|
362 * @param errorCode Standard ICU error code. Its input value must |
|
363 * pass the U_SUCCESS() test, or else the function returns |
|
364 * immediately. Check for U_FAILURE() on output or use with |
|
365 * function chaining. (See User Guide for details.) |
|
366 * @stable ICU 4.8 |
|
367 */ |
|
368 MessagePattern(UErrorCode &errorCode); |
|
369 |
|
370 /** |
|
371 * Constructs an empty MessagePattern. |
|
372 * @param mode Explicit UMessagePatternApostropheMode. |
|
373 * @param errorCode Standard ICU error code. Its input value must |
|
374 * pass the U_SUCCESS() test, or else the function returns |
|
375 * immediately. Check for U_FAILURE() on output or use with |
|
376 * function chaining. (See User Guide for details.) |
|
377 * @stable ICU 4.8 |
|
378 */ |
|
379 MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode); |
|
380 |
|
381 /** |
|
382 * Constructs a MessagePattern with default UMessagePatternApostropheMode and |
|
383 * parses the MessageFormat pattern string. |
|
384 * @param pattern a MessageFormat pattern string |
|
385 * @param parseError Struct to receive information on the position |
|
386 * of an error within the pattern. |
|
387 * Can be NULL. |
|
388 * @param errorCode Standard ICU error code. Its input value must |
|
389 * pass the U_SUCCESS() test, or else the function returns |
|
390 * immediately. Check for U_FAILURE() on output or use with |
|
391 * function chaining. (See User Guide for details.) |
|
392 * TODO: turn @throws into UErrorCode specifics? |
|
393 * @throws IllegalArgumentException for syntax errors in the pattern string |
|
394 * @throws IndexOutOfBoundsException if certain limits are exceeded |
|
395 * (e.g., argument number too high, argument name too long, etc.) |
|
396 * @throws NumberFormatException if a number could not be parsed |
|
397 * @stable ICU 4.8 |
|
398 */ |
|
399 MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode); |
|
400 |
|
401 /** |
|
402 * Copy constructor. |
|
403 * @param other Object to copy. |
|
404 * @stable ICU 4.8 |
|
405 */ |
|
406 MessagePattern(const MessagePattern &other); |
|
407 |
|
408 /** |
|
409 * Assignment operator. |
|
410 * @param other Object to copy. |
|
411 * @return *this=other |
|
412 * @stable ICU 4.8 |
|
413 */ |
|
414 MessagePattern &operator=(const MessagePattern &other); |
|
415 |
|
416 /** |
|
417 * Destructor. |
|
418 * @stable ICU 4.8 |
|
419 */ |
|
420 virtual ~MessagePattern(); |
|
421 |
|
422 /** |
|
423 * Parses a MessageFormat pattern string. |
|
424 * @param pattern a MessageFormat pattern string |
|
425 * @param parseError Struct to receive information on the position |
|
426 * of an error within the pattern. |
|
427 * Can be NULL. |
|
428 * @param errorCode Standard ICU error code. Its input value must |
|
429 * pass the U_SUCCESS() test, or else the function returns |
|
430 * immediately. Check for U_FAILURE() on output or use with |
|
431 * function chaining. (See User Guide for details.) |
|
432 * @return *this |
|
433 * @throws IllegalArgumentException for syntax errors in the pattern string |
|
434 * @throws IndexOutOfBoundsException if certain limits are exceeded |
|
435 * (e.g., argument number too high, argument name too long, etc.) |
|
436 * @throws NumberFormatException if a number could not be parsed |
|
437 * @stable ICU 4.8 |
|
438 */ |
|
439 MessagePattern &parse(const UnicodeString &pattern, |
|
440 UParseError *parseError, UErrorCode &errorCode); |
|
441 |
|
442 /** |
|
443 * Parses a ChoiceFormat pattern string. |
|
444 * @param pattern a ChoiceFormat pattern string |
|
445 * @param parseError Struct to receive information on the position |
|
446 * of an error within the pattern. |
|
447 * Can be NULL. |
|
448 * @param errorCode Standard ICU error code. Its input value must |
|
449 * pass the U_SUCCESS() test, or else the function returns |
|
450 * immediately. Check for U_FAILURE() on output or use with |
|
451 * function chaining. (See User Guide for details.) |
|
452 * @return *this |
|
453 * @throws IllegalArgumentException for syntax errors in the pattern string |
|
454 * @throws IndexOutOfBoundsException if certain limits are exceeded |
|
455 * (e.g., argument number too high, argument name too long, etc.) |
|
456 * @throws NumberFormatException if a number could not be parsed |
|
457 * @stable ICU 4.8 |
|
458 */ |
|
459 MessagePattern &parseChoiceStyle(const UnicodeString &pattern, |
|
460 UParseError *parseError, UErrorCode &errorCode); |
|
461 |
|
462 /** |
|
463 * Parses a PluralFormat pattern string. |
|
464 * @param pattern a PluralFormat pattern string |
|
465 * @param parseError Struct to receive information on the position |
|
466 * of an error within the pattern. |
|
467 * Can be NULL. |
|
468 * @param errorCode Standard ICU error code. Its input value must |
|
469 * pass the U_SUCCESS() test, or else the function returns |
|
470 * immediately. Check for U_FAILURE() on output or use with |
|
471 * function chaining. (See User Guide for details.) |
|
472 * @return *this |
|
473 * @throws IllegalArgumentException for syntax errors in the pattern string |
|
474 * @throws IndexOutOfBoundsException if certain limits are exceeded |
|
475 * (e.g., argument number too high, argument name too long, etc.) |
|
476 * @throws NumberFormatException if a number could not be parsed |
|
477 * @stable ICU 4.8 |
|
478 */ |
|
479 MessagePattern &parsePluralStyle(const UnicodeString &pattern, |
|
480 UParseError *parseError, UErrorCode &errorCode); |
|
481 |
|
482 /** |
|
483 * Parses a SelectFormat pattern string. |
|
484 * @param pattern a SelectFormat pattern string |
|
485 * @param parseError Struct to receive information on the position |
|
486 * of an error within the pattern. |
|
487 * Can be NULL. |
|
488 * @param errorCode Standard ICU error code. Its input value must |
|
489 * pass the U_SUCCESS() test, or else the function returns |
|
490 * immediately. Check for U_FAILURE() on output or use with |
|
491 * function chaining. (See User Guide for details.) |
|
492 * @return *this |
|
493 * @throws IllegalArgumentException for syntax errors in the pattern string |
|
494 * @throws IndexOutOfBoundsException if certain limits are exceeded |
|
495 * (e.g., argument number too high, argument name too long, etc.) |
|
496 * @throws NumberFormatException if a number could not be parsed |
|
497 * @stable ICU 4.8 |
|
498 */ |
|
499 MessagePattern &parseSelectStyle(const UnicodeString &pattern, |
|
500 UParseError *parseError, UErrorCode &errorCode); |
|
501 |
|
502 /** |
|
503 * Clears this MessagePattern. |
|
504 * countParts() will return 0. |
|
505 * @stable ICU 4.8 |
|
506 */ |
|
507 void clear(); |
|
508 |
|
509 /** |
|
510 * Clears this MessagePattern and sets the UMessagePatternApostropheMode. |
|
511 * countParts() will return 0. |
|
512 * @param mode The new UMessagePatternApostropheMode. |
|
513 * @stable ICU 4.8 |
|
514 */ |
|
515 void clearPatternAndSetApostropheMode(UMessagePatternApostropheMode mode) { |
|
516 clear(); |
|
517 aposMode=mode; |
|
518 } |
|
519 |
|
520 /** |
|
521 * @param other another object to compare with. |
|
522 * @return TRUE if this object is equivalent to the other one. |
|
523 * @stable ICU 4.8 |
|
524 */ |
|
525 UBool operator==(const MessagePattern &other) const; |
|
526 |
|
527 /** |
|
528 * @param other another object to compare with. |
|
529 * @return FALSE if this object is equivalent to the other one. |
|
530 * @stable ICU 4.8 |
|
531 */ |
|
532 inline UBool operator!=(const MessagePattern &other) const { |
|
533 return !operator==(other); |
|
534 } |
|
535 |
|
536 /** |
|
537 * @return A hash code for this object. |
|
538 * @stable ICU 4.8 |
|
539 */ |
|
540 int32_t hashCode() const; |
|
541 |
|
542 /** |
|
543 * @return this instance's UMessagePatternApostropheMode. |
|
544 * @stable ICU 4.8 |
|
545 */ |
|
546 UMessagePatternApostropheMode getApostropheMode() const { |
|
547 return aposMode; |
|
548 } |
|
549 |
|
550 // Java has package-private jdkAposMode() here. |
|
551 // In C++, this is declared in the MessageImpl class. |
|
552 |
|
553 /** |
|
554 * @return the parsed pattern string (null if none was parsed). |
|
555 * @stable ICU 4.8 |
|
556 */ |
|
557 const UnicodeString &getPatternString() const { |
|
558 return msg; |
|
559 } |
|
560 |
|
561 /** |
|
562 * Does the parsed pattern have named arguments like {first_name}? |
|
563 * @return TRUE if the parsed pattern has at least one named argument. |
|
564 * @stable ICU 4.8 |
|
565 */ |
|
566 UBool hasNamedArguments() const { |
|
567 return hasArgNames; |
|
568 } |
|
569 |
|
570 /** |
|
571 * Does the parsed pattern have numbered arguments like {2}? |
|
572 * @return TRUE if the parsed pattern has at least one numbered argument. |
|
573 * @stable ICU 4.8 |
|
574 */ |
|
575 UBool hasNumberedArguments() const { |
|
576 return hasArgNumbers; |
|
577 } |
|
578 |
|
579 /** |
|
580 * Validates and parses an argument name or argument number string. |
|
581 * An argument name must be a "pattern identifier", that is, it must contain |
|
582 * no Unicode Pattern_Syntax or Pattern_White_Space characters. |
|
583 * If it only contains ASCII digits, then it must be a small integer with no leading zero. |
|
584 * @param name Input string. |
|
585 * @return >=0 if the name is a valid number, |
|
586 * ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits, |
|
587 * ARG_NAME_NOT_VALID (-2) if it is neither. |
|
588 * @stable ICU 4.8 |
|
589 */ |
|
590 static int32_t validateArgumentName(const UnicodeString &name); |
|
591 |
|
592 /** |
|
593 * Returns a version of the parsed pattern string where each ASCII apostrophe |
|
594 * is doubled (escaped) if it is not already, and if it is not interpreted as quoting syntax. |
|
595 * <p> |
|
596 * For example, this turns "I don't '{know}' {gender,select,female{h''er}other{h'im}}." |
|
597 * into "I don''t '{know}' {gender,select,female{h''er}other{h''im}}." |
|
598 * @return the deep-auto-quoted version of the parsed pattern string. |
|
599 * @see MessageFormat.autoQuoteApostrophe() |
|
600 * @stable ICU 4.8 |
|
601 */ |
|
602 UnicodeString autoQuoteApostropheDeep() const; |
|
603 |
|
604 class Part; |
|
605 |
|
606 /** |
|
607 * Returns the number of "parts" created by parsing the pattern string. |
|
608 * Returns 0 if no pattern has been parsed or clear() was called. |
|
609 * @return the number of pattern parts. |
|
610 * @stable ICU 4.8 |
|
611 */ |
|
612 int32_t countParts() const { |
|
613 return partsLength; |
|
614 } |
|
615 |
|
616 /** |
|
617 * Gets the i-th pattern "part". |
|
618 * @param i The index of the Part data. (0..countParts()-1) |
|
619 * @return the i-th pattern "part". |
|
620 * @stable ICU 4.8 |
|
621 */ |
|
622 const Part &getPart(int32_t i) const { |
|
623 return parts[i]; |
|
624 } |
|
625 |
|
626 /** |
|
627 * Returns the UMessagePatternPartType of the i-th pattern "part". |
|
628 * Convenience method for getPart(i).getType(). |
|
629 * @param i The index of the Part data. (0..countParts()-1) |
|
630 * @return The UMessagePatternPartType of the i-th Part. |
|
631 * @stable ICU 4.8 |
|
632 */ |
|
633 UMessagePatternPartType getPartType(int32_t i) const { |
|
634 return getPart(i).type; |
|
635 } |
|
636 |
|
637 /** |
|
638 * Returns the pattern index of the specified pattern "part". |
|
639 * Convenience method for getPart(partIndex).getIndex(). |
|
640 * @param partIndex The index of the Part data. (0..countParts()-1) |
|
641 * @return The pattern index of this Part. |
|
642 * @stable ICU 4.8 |
|
643 */ |
|
644 int32_t getPatternIndex(int32_t partIndex) const { |
|
645 return getPart(partIndex).index; |
|
646 } |
|
647 |
|
648 /** |
|
649 * Returns the substring of the pattern string indicated by the Part. |
|
650 * Convenience method for getPatternString().substring(part.getIndex(), part.getLimit()). |
|
651 * @param part a part of this MessagePattern. |
|
652 * @return the substring associated with part. |
|
653 * @stable ICU 4.8 |
|
654 */ |
|
655 UnicodeString getSubstring(const Part &part) const { |
|
656 return msg.tempSubString(part.index, part.length); |
|
657 } |
|
658 |
|
659 /** |
|
660 * Compares the part's substring with the input string s. |
|
661 * @param part a part of this MessagePattern. |
|
662 * @param s a string. |
|
663 * @return TRUE if getSubstring(part).equals(s). |
|
664 * @stable ICU 4.8 |
|
665 */ |
|
666 UBool partSubstringMatches(const Part &part, const UnicodeString &s) const { |
|
667 return 0==msg.compare(part.index, part.length, s); |
|
668 } |
|
669 |
|
670 /** |
|
671 * Returns the numeric value associated with an ARG_INT or ARG_DOUBLE. |
|
672 * @param part a part of this MessagePattern. |
|
673 * @return the part's numeric value, or UMSGPAT_NO_NUMERIC_VALUE if this is not a numeric part. |
|
674 * @stable ICU 4.8 |
|
675 */ |
|
676 double getNumericValue(const Part &part) const; |
|
677 |
|
678 /** |
|
679 * Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified. |
|
680 * @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1) |
|
681 * @return the "offset:" value. |
|
682 * @stable ICU 4.8 |
|
683 */ |
|
684 double getPluralOffset(int32_t pluralStart) const; |
|
685 |
|
686 /** |
|
687 * Returns the index of the ARG|MSG_LIMIT part corresponding to the ARG|MSG_START at start. |
|
688 * @param start The index of some Part data (0..countParts()-1); |
|
689 * this Part should be of Type ARG_START or MSG_START. |
|
690 * @return The first i>start where getPart(i).getType()==ARG|MSG_LIMIT at the same nesting level, |
|
691 * or start itself if getPartType(msgStart)!=ARG|MSG_START. |
|
692 * @stable ICU 4.8 |
|
693 */ |
|
694 int32_t getLimitPartIndex(int32_t start) const { |
|
695 int32_t limit=getPart(start).limitPartIndex; |
|
696 if(limit<start) { |
|
697 return start; |
|
698 } |
|
699 return limit; |
|
700 } |
|
701 |
|
702 /** |
|
703 * A message pattern "part", representing a pattern parsing event. |
|
704 * There is a part for the start and end of a message or argument, |
|
705 * for quoting and escaping of and with ASCII apostrophes, |
|
706 * and for syntax elements of "complex" arguments. |
|
707 * @stable ICU 4.8 |
|
708 */ |
|
709 class Part : public UMemory { |
|
710 public: |
|
711 /** |
|
712 * Default constructor, do not use. |
|
713 * @internal |
|
714 */ |
|
715 Part() {} |
|
716 |
|
717 /** |
|
718 * Returns the type of this part. |
|
719 * @return the part type. |
|
720 * @stable ICU 4.8 |
|
721 */ |
|
722 UMessagePatternPartType getType() const { |
|
723 return type; |
|
724 } |
|
725 |
|
726 /** |
|
727 * Returns the pattern string index associated with this Part. |
|
728 * @return this part's pattern string index. |
|
729 * @stable ICU 4.8 |
|
730 */ |
|
731 int32_t getIndex() const { |
|
732 return index; |
|
733 } |
|
734 |
|
735 /** |
|
736 * Returns the length of the pattern substring associated with this Part. |
|
737 * This is 0 for some parts. |
|
738 * @return this part's pattern substring length. |
|
739 * @stable ICU 4.8 |
|
740 */ |
|
741 int32_t getLength() const { |
|
742 return length; |
|
743 } |
|
744 |
|
745 /** |
|
746 * Returns the pattern string limit (exclusive-end) index associated with this Part. |
|
747 * Convenience method for getIndex()+getLength(). |
|
748 * @return this part's pattern string limit index, same as getIndex()+getLength(). |
|
749 * @stable ICU 4.8 |
|
750 */ |
|
751 int32_t getLimit() const { |
|
752 return index+length; |
|
753 } |
|
754 |
|
755 /** |
|
756 * Returns a value associated with this part. |
|
757 * See the documentation of each part type for details. |
|
758 * @return the part value. |
|
759 * @stable ICU 4.8 |
|
760 */ |
|
761 int32_t getValue() const { |
|
762 return value; |
|
763 } |
|
764 |
|
765 /** |
|
766 * Returns the argument type if this part is of type ARG_START or ARG_LIMIT, |
|
767 * otherwise UMSGPAT_ARG_TYPE_NONE. |
|
768 * @return the argument type for this part. |
|
769 * @stable ICU 4.8 |
|
770 */ |
|
771 UMessagePatternArgType getArgType() const { |
|
772 UMessagePatternPartType type=getType(); |
|
773 if(type==UMSGPAT_PART_TYPE_ARG_START || type==UMSGPAT_PART_TYPE_ARG_LIMIT) { |
|
774 return (UMessagePatternArgType)value; |
|
775 } else { |
|
776 return UMSGPAT_ARG_TYPE_NONE; |
|
777 } |
|
778 } |
|
779 |
|
780 /** |
|
781 * Indicates whether the Part type has a numeric value. |
|
782 * If so, then that numeric value can be retrieved via MessagePattern.getNumericValue(). |
|
783 * @param type The Part type to be tested. |
|
784 * @return TRUE if the Part type has a numeric value. |
|
785 * @stable ICU 4.8 |
|
786 */ |
|
787 static UBool hasNumericValue(UMessagePatternPartType type) { |
|
788 return type==UMSGPAT_PART_TYPE_ARG_INT || type==UMSGPAT_PART_TYPE_ARG_DOUBLE; |
|
789 } |
|
790 |
|
791 /** |
|
792 * @param other another object to compare with. |
|
793 * @return TRUE if this object is equivalent to the other one. |
|
794 * @stable ICU 4.8 |
|
795 */ |
|
796 UBool operator==(const Part &other) const; |
|
797 |
|
798 /** |
|
799 * @param other another object to compare with. |
|
800 * @return FALSE if this object is equivalent to the other one. |
|
801 * @stable ICU 4.8 |
|
802 */ |
|
803 inline UBool operator!=(const Part &other) const { |
|
804 return !operator==(other); |
|
805 } |
|
806 |
|
807 /** |
|
808 * @return A hash code for this object. |
|
809 * @stable ICU 4.8 |
|
810 */ |
|
811 int32_t hashCode() const { |
|
812 return ((type*37+index)*37+length)*37+value; |
|
813 } |
|
814 |
|
815 private: |
|
816 friend class MessagePattern; |
|
817 |
|
818 static const int32_t MAX_LENGTH=0xffff; |
|
819 static const int32_t MAX_VALUE=0x7fff; |
|
820 |
|
821 // Some fields are not final because they are modified during pattern parsing. |
|
822 // After pattern parsing, the parts are effectively immutable. |
|
823 UMessagePatternPartType type; |
|
824 int32_t index; |
|
825 uint16_t length; |
|
826 int16_t value; |
|
827 int32_t limitPartIndex; |
|
828 }; |
|
829 |
|
830 private: |
|
831 void preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode); |
|
832 |
|
833 void postParse(); |
|
834 |
|
835 int32_t parseMessage(int32_t index, int32_t msgStartLength, |
|
836 int32_t nestingLevel, UMessagePatternArgType parentType, |
|
837 UParseError *parseError, UErrorCode &errorCode); |
|
838 |
|
839 int32_t parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel, |
|
840 UParseError *parseError, UErrorCode &errorCode); |
|
841 |
|
842 int32_t parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode); |
|
843 |
|
844 int32_t parseChoiceStyle(int32_t index, int32_t nestingLevel, |
|
845 UParseError *parseError, UErrorCode &errorCode); |
|
846 |
|
847 int32_t parsePluralOrSelectStyle(UMessagePatternArgType argType, int32_t index, int32_t nestingLevel, |
|
848 UParseError *parseError, UErrorCode &errorCode); |
|
849 |
|
850 /** |
|
851 * Validates and parses an argument name or argument number string. |
|
852 * This internal method assumes that the input substring is a "pattern identifier". |
|
853 * @return >=0 if the name is a valid number, |
|
854 * ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits, |
|
855 * ARG_NAME_NOT_VALID (-2) if it is neither. |
|
856 * @see #validateArgumentName(String) |
|
857 */ |
|
858 static int32_t parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit); |
|
859 |
|
860 int32_t parseArgNumber(int32_t start, int32_t limit) { |
|
861 return parseArgNumber(msg, start, limit); |
|
862 } |
|
863 |
|
864 /** |
|
865 * Parses a number from the specified message substring. |
|
866 * @param start start index into the message string |
|
867 * @param limit limit index into the message string, must be start<limit |
|
868 * @param allowInfinity TRUE if U+221E is allowed (for ChoiceFormat) |
|
869 * @param parseError |
|
870 * @param errorCode |
|
871 */ |
|
872 void parseDouble(int32_t start, int32_t limit, UBool allowInfinity, |
|
873 UParseError *parseError, UErrorCode &errorCode); |
|
874 |
|
875 // Java has package-private appendReducedApostrophes() here. |
|
876 // In C++, this is declared in the MessageImpl class. |
|
877 |
|
878 int32_t skipWhiteSpace(int32_t index); |
|
879 |
|
880 int32_t skipIdentifier(int32_t index); |
|
881 |
|
882 /** |
|
883 * Skips a sequence of characters that could occur in a double value. |
|
884 * Does not fully parse or validate the value. |
|
885 */ |
|
886 int32_t skipDouble(int32_t index); |
|
887 |
|
888 static UBool isArgTypeChar(UChar32 c); |
|
889 |
|
890 UBool isChoice(int32_t index); |
|
891 |
|
892 UBool isPlural(int32_t index); |
|
893 |
|
894 UBool isSelect(int32_t index); |
|
895 |
|
896 UBool isOrdinal(int32_t index); |
|
897 |
|
898 /** |
|
899 * @return TRUE if we are inside a MessageFormat (sub-)pattern, |
|
900 * as opposed to inside a top-level choice/plural/select pattern. |
|
901 */ |
|
902 UBool inMessageFormatPattern(int32_t nestingLevel); |
|
903 |
|
904 /** |
|
905 * @return TRUE if we are in a MessageFormat sub-pattern |
|
906 * of a top-level ChoiceFormat pattern. |
|
907 */ |
|
908 UBool inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType); |
|
909 |
|
910 void addPart(UMessagePatternPartType type, int32_t index, int32_t length, |
|
911 int32_t value, UErrorCode &errorCode); |
|
912 |
|
913 void addLimitPart(int32_t start, |
|
914 UMessagePatternPartType type, int32_t index, int32_t length, |
|
915 int32_t value, UErrorCode &errorCode); |
|
916 |
|
917 void addArgDoublePart(double numericValue, int32_t start, int32_t length, UErrorCode &errorCode); |
|
918 |
|
919 void setParseError(UParseError *parseError, int32_t index); |
|
920 |
|
921 UBool init(UErrorCode &errorCode); |
|
922 UBool copyStorage(const MessagePattern &other, UErrorCode &errorCode); |
|
923 |
|
924 UMessagePatternApostropheMode aposMode; |
|
925 UnicodeString msg; |
|
926 // ArrayList<Part> parts=new ArrayList<Part>(); |
|
927 MessagePatternPartsList *partsList; |
|
928 Part *parts; |
|
929 int32_t partsLength; |
|
930 // ArrayList<Double> numericValues; |
|
931 MessagePatternDoubleList *numericValuesList; |
|
932 double *numericValues; |
|
933 int32_t numericValuesLength; |
|
934 UBool hasArgNames; |
|
935 UBool hasArgNumbers; |
|
936 UBool needsAutoQuoting; |
|
937 }; |
|
938 |
|
939 U_NAMESPACE_END |
|
940 |
|
941 #endif // !UCONFIG_NO_FORMATTING |
|
942 |
|
943 #endif // __MESSAGEPATTERN_H__ |