|
1 /* |
|
2 ****************************************************************************** |
|
3 * Copyright (C) 1996-2012, International Business Machines * |
|
4 * Corporation and others. All Rights Reserved. * |
|
5 ****************************************************************************** |
|
6 */ |
|
7 |
|
8 /** |
|
9 * \file |
|
10 * \brief C++ API: Collation Service. |
|
11 */ |
|
12 |
|
13 /** |
|
14 * File coll.h |
|
15 * |
|
16 * Created by: Helena Shih |
|
17 * |
|
18 * Modification History: |
|
19 * |
|
20 * Date Name Description |
|
21 * 02/5/97 aliu Modified createDefault to load collation data from |
|
22 * binary files when possible. Added related methods |
|
23 * createCollationFromFile, chopLocale, createPathName. |
|
24 * 02/11/97 aliu Added members addToCache, findInCache, and fgCache. |
|
25 * 02/12/97 aliu Modified to create objects from RuleBasedCollator cache. |
|
26 * Moved cache out of Collation class. |
|
27 * 02/13/97 aliu Moved several methods out of this class and into |
|
28 * RuleBasedCollator, with modifications. Modified |
|
29 * createDefault() to call new RuleBasedCollator(Locale&) |
|
30 * constructor. General clean up and documentation. |
|
31 * 02/20/97 helena Added clone, operator==, operator!=, operator=, copy |
|
32 * constructor and getDynamicClassID. |
|
33 * 03/25/97 helena Updated with platform independent data types. |
|
34 * 05/06/97 helena Added memory allocation error detection. |
|
35 * 06/20/97 helena Java class name change. |
|
36 * 09/03/97 helena Added createCollationKeyValues(). |
|
37 * 02/10/98 damiba Added compare() with length as parameter. |
|
38 * 04/23/99 stephen Removed EDecompositionMode, merged with |
|
39 * Normalizer::EMode. |
|
40 * 11/02/99 helena Collator performance enhancements. Eliminates the |
|
41 * UnicodeString construction and special case for NO_OP. |
|
42 * 11/23/99 srl More performance enhancements. Inlining of |
|
43 * critical accessors. |
|
44 * 05/15/00 helena Added version information API. |
|
45 * 01/29/01 synwee Modified into a C++ wrapper which calls C apis |
|
46 * (ucoll.h). |
|
47 */ |
|
48 |
|
49 #ifndef COLL_H |
|
50 #define COLL_H |
|
51 |
|
52 #include "unicode/utypes.h" |
|
53 |
|
54 #if !UCONFIG_NO_COLLATION |
|
55 |
|
56 #include "unicode/uobject.h" |
|
57 #include "unicode/ucol.h" |
|
58 #include "unicode/normlzr.h" |
|
59 #include "unicode/locid.h" |
|
60 #include "unicode/uniset.h" |
|
61 #include "unicode/umisc.h" |
|
62 #include "unicode/uiter.h" |
|
63 #include "unicode/stringpiece.h" |
|
64 |
|
65 U_NAMESPACE_BEGIN |
|
66 |
|
67 class StringEnumeration; |
|
68 |
|
69 #if !UCONFIG_NO_SERVICE |
|
70 /** |
|
71 * @stable ICU 2.6 |
|
72 */ |
|
73 class CollatorFactory; |
|
74 #endif |
|
75 |
|
76 /** |
|
77 * @stable ICU 2.0 |
|
78 */ |
|
79 class CollationKey; |
|
80 |
|
81 /** |
|
82 * The <code>Collator</code> class performs locale-sensitive string |
|
83 * comparison.<br> |
|
84 * You use this class to build searching and sorting routines for natural |
|
85 * language text.<br> |
|
86 * <em>Important: </em>The ICU collation service has been reimplemented |
|
87 * in order to achieve better performance and UCA compliance. |
|
88 * For details, see the |
|
89 * <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm"> |
|
90 * collation design document</a>. |
|
91 * <p> |
|
92 * <code>Collator</code> is an abstract base class. Subclasses implement |
|
93 * specific collation strategies. One subclass, |
|
94 * <code>RuleBasedCollator</code>, is currently provided and is applicable |
|
95 * to a wide set of languages. Other subclasses may be created to handle more |
|
96 * specialized needs. |
|
97 * <p> |
|
98 * Like other locale-sensitive classes, you can use the static factory method, |
|
99 * <code>createInstance</code>, to obtain the appropriate |
|
100 * <code>Collator</code> object for a given locale. You will only need to |
|
101 * look at the subclasses of <code>Collator</code> if you need to |
|
102 * understand the details of a particular collation strategy or if you need to |
|
103 * modify that strategy. |
|
104 * <p> |
|
105 * The following example shows how to compare two strings using the |
|
106 * <code>Collator</code> for the default locale. |
|
107 * \htmlonly<blockquote>\endhtmlonly |
|
108 * <pre> |
|
109 * \code |
|
110 * // Compare two strings in the default locale |
|
111 * UErrorCode success = U_ZERO_ERROR; |
|
112 * Collator* myCollator = Collator::createInstance(success); |
|
113 * if (myCollator->compare("abc", "ABC") < 0) |
|
114 * cout << "abc is less than ABC" << endl; |
|
115 * else |
|
116 * cout << "abc is greater than or equal to ABC" << endl; |
|
117 * \endcode |
|
118 * </pre> |
|
119 * \htmlonly</blockquote>\endhtmlonly |
|
120 * <p> |
|
121 * You can set a <code>Collator</code>'s <em>strength</em> property to |
|
122 * determine the level of difference considered significant in comparisons. |
|
123 * Five strengths are provided: <code>PRIMARY</code>, <code>SECONDARY</code>, |
|
124 * <code>TERTIARY</code>, <code>QUATERNARY</code> and <code>IDENTICAL</code>. |
|
125 * The exact assignment of strengths to language features is locale dependant. |
|
126 * For example, in Czech, "e" and "f" are considered primary differences, |
|
127 * while "e" and "\u00EA" are secondary differences, "e" and "E" are tertiary |
|
128 * differences and "e" and "e" are identical. The following shows how both case |
|
129 * and accents could be ignored for US English. |
|
130 * \htmlonly<blockquote>\endhtmlonly |
|
131 * <pre> |
|
132 * \code |
|
133 * //Get the Collator for US English and set its strength to PRIMARY |
|
134 * UErrorCode success = U_ZERO_ERROR; |
|
135 * Collator* usCollator = Collator::createInstance(Locale::US, success); |
|
136 * usCollator->setStrength(Collator::PRIMARY); |
|
137 * if (usCollator->compare("abc", "ABC") == 0) |
|
138 * cout << "'abc' and 'ABC' strings are equivalent with strength PRIMARY" << endl; |
|
139 * \endcode |
|
140 * </pre> |
|
141 * \htmlonly</blockquote>\endhtmlonly |
|
142 * <p> |
|
143 * For comparing strings exactly once, the <code>compare</code> method |
|
144 * provides the best performance. When sorting a list of strings however, it |
|
145 * is generally necessary to compare each string multiple times. In this case, |
|
146 * sort keys provide better performance. The <code>getSortKey</code> methods |
|
147 * convert a string to a series of bytes that can be compared bitwise against |
|
148 * other sort keys using <code>strcmp()</code>. Sort keys are written as |
|
149 * zero-terminated byte strings. They consist of several substrings, one for |
|
150 * each collation strength level, that are delimited by 0x01 bytes. |
|
151 * If the string code points are appended for UCOL_IDENTICAL, then they are |
|
152 * processed for correct code point order comparison and may contain 0x01 |
|
153 * bytes but not zero bytes. |
|
154 * </p> |
|
155 * <p> |
|
156 * An older set of APIs returns a <code>CollationKey</code> object that wraps |
|
157 * the sort key bytes instead of returning the bytes themselves. |
|
158 * Its use is deprecated, but it is still available for compatibility with |
|
159 * Java. |
|
160 * </p> |
|
161 * <p> |
|
162 * <strong>Note:</strong> <code>Collator</code>s with different Locale, |
|
163 * and CollationStrength settings will return different sort |
|
164 * orders for the same set of strings. Locales have specific collation rules, |
|
165 * and the way in which secondary and tertiary differences are taken into |
|
166 * account, for example, will result in a different sorting order for same |
|
167 * strings. |
|
168 * </p> |
|
169 * @see RuleBasedCollator |
|
170 * @see CollationKey |
|
171 * @see CollationElementIterator |
|
172 * @see Locale |
|
173 * @see Normalizer |
|
174 * @version 2.0 11/15/01 |
|
175 */ |
|
176 |
|
177 class U_I18N_API Collator : public UObject { |
|
178 public: |
|
179 |
|
180 // Collator public enums ----------------------------------------------- |
|
181 |
|
182 /** |
|
183 * Base letter represents a primary difference. Set comparison level to |
|
184 * PRIMARY to ignore secondary and tertiary differences.<br> |
|
185 * Use this to set the strength of a Collator object.<br> |
|
186 * Example of primary difference, "abc" < "abd" |
|
187 * |
|
188 * Diacritical differences on the same base letter represent a secondary |
|
189 * difference. Set comparison level to SECONDARY to ignore tertiary |
|
190 * differences. Use this to set the strength of a Collator object.<br> |
|
191 * Example of secondary difference, "ä" >> "a". |
|
192 * |
|
193 * Uppercase and lowercase versions of the same character represents a |
|
194 * tertiary difference. Set comparison level to TERTIARY to include all |
|
195 * comparison differences. Use this to set the strength of a Collator |
|
196 * object.<br> |
|
197 * Example of tertiary difference, "abc" <<< "ABC". |
|
198 * |
|
199 * Two characters are considered "identical" when they have the same unicode |
|
200 * spellings.<br> |
|
201 * For example, "ä" == "ä". |
|
202 * |
|
203 * UCollationStrength is also used to determine the strength of sort keys |
|
204 * generated from Collator objects. |
|
205 * @stable ICU 2.0 |
|
206 */ |
|
207 enum ECollationStrength |
|
208 { |
|
209 PRIMARY = UCOL_PRIMARY, // 0 |
|
210 SECONDARY = UCOL_SECONDARY, // 1 |
|
211 TERTIARY = UCOL_TERTIARY, // 2 |
|
212 QUATERNARY = UCOL_QUATERNARY, // 3 |
|
213 IDENTICAL = UCOL_IDENTICAL // 15 |
|
214 }; |
|
215 |
|
216 /** |
|
217 * LESS is returned if source string is compared to be less than target |
|
218 * string in the compare() method. |
|
219 * EQUAL is returned if source string is compared to be equal to target |
|
220 * string in the compare() method. |
|
221 * GREATER is returned if source string is compared to be greater than |
|
222 * target string in the compare() method. |
|
223 * @see Collator#compare |
|
224 * @deprecated ICU 2.6. Use C enum UCollationResult defined in ucol.h |
|
225 */ |
|
226 enum EComparisonResult |
|
227 { |
|
228 LESS = UCOL_LESS, // -1 |
|
229 EQUAL = UCOL_EQUAL, // 0 |
|
230 GREATER = UCOL_GREATER // 1 |
|
231 }; |
|
232 |
|
233 // Collator public destructor ----------------------------------------- |
|
234 |
|
235 /** |
|
236 * Destructor |
|
237 * @stable ICU 2.0 |
|
238 */ |
|
239 virtual ~Collator(); |
|
240 |
|
241 // Collator public methods -------------------------------------------- |
|
242 |
|
243 /** |
|
244 * Returns TRUE if "other" is the same as "this". |
|
245 * |
|
246 * The base class implementation returns TRUE if "other" has the same type/class as "this": |
|
247 * <code>typeid(*this) == typeid(other)</code>. |
|
248 * |
|
249 * Subclass implementations should do something like the following: |
|
250 * <pre> |
|
251 * if (this == &other) { return TRUE; } |
|
252 * if (!Collator::operator==(other)) { return FALSE; } // not the same class |
|
253 * |
|
254 * const MyCollator &o = (const MyCollator&)other; |
|
255 * (compare this vs. o's subclass fields) |
|
256 * </pre> |
|
257 * @param other Collator object to be compared |
|
258 * @return TRUE if other is the same as this. |
|
259 * @stable ICU 2.0 |
|
260 */ |
|
261 virtual UBool operator==(const Collator& other) const; |
|
262 |
|
263 /** |
|
264 * Returns true if "other" is not the same as "this". |
|
265 * Calls ! operator==(const Collator&) const which works for all subclasses. |
|
266 * @param other Collator object to be compared |
|
267 * @return TRUE if other is not the same as this. |
|
268 * @stable ICU 2.0 |
|
269 */ |
|
270 virtual UBool operator!=(const Collator& other) const; |
|
271 |
|
272 /** |
|
273 * Makes a copy of this object. |
|
274 * @return a copy of this object, owned by the caller |
|
275 * @stable ICU 2.0 |
|
276 */ |
|
277 virtual Collator* clone(void) const = 0; |
|
278 |
|
279 /** |
|
280 * Creates the Collator object for the current default locale. |
|
281 * The default locale is determined by Locale::getDefault. |
|
282 * The UErrorCode& err parameter is used to return status information to the user. |
|
283 * To check whether the construction succeeded or not, you should check the |
|
284 * value of U_SUCCESS(err). If you wish more detailed information, you can |
|
285 * check for informational error results which still indicate success. |
|
286 * U_USING_FALLBACK_ERROR indicates that a fall back locale was used. For |
|
287 * example, 'de_CH' was requested, but nothing was found there, so 'de' was |
|
288 * used. U_USING_DEFAULT_ERROR indicates that the default locale data was |
|
289 * used; neither the requested locale nor any of its fall back locales |
|
290 * could be found. |
|
291 * The caller owns the returned object and is responsible for deleting it. |
|
292 * |
|
293 * @param err the error code status. |
|
294 * @return the collation object of the default locale.(for example, en_US) |
|
295 * @see Locale#getDefault |
|
296 * @stable ICU 2.0 |
|
297 */ |
|
298 static Collator* U_EXPORT2 createInstance(UErrorCode& err); |
|
299 |
|
300 /** |
|
301 * Gets the table-based collation object for the desired locale. The |
|
302 * resource of the desired locale will be loaded by ResourceLoader. |
|
303 * Locale::ENGLISH is the base collation table and all other languages are |
|
304 * built on top of it with additional language-specific modifications. |
|
305 * The UErrorCode& err parameter is used to return status information to the user. |
|
306 * To check whether the construction succeeded or not, you should check |
|
307 * the value of U_SUCCESS(err). If you wish more detailed information, you |
|
308 * can check for informational error results which still indicate success. |
|
309 * U_USING_FALLBACK_ERROR indicates that a fall back locale was used. For |
|
310 * example, 'de_CH' was requested, but nothing was found there, so 'de' was |
|
311 * used. U_USING_DEFAULT_ERROR indicates that the default locale data was |
|
312 * used; neither the requested locale nor any of its fall back locales |
|
313 * could be found. |
|
314 * The caller owns the returned object and is responsible for deleting it. |
|
315 * @param loc The locale ID for which to open a collator. |
|
316 * @param err the error code status. |
|
317 * @return the created table-based collation object based on the desired |
|
318 * locale. |
|
319 * @see Locale |
|
320 * @see ResourceLoader |
|
321 * @stable ICU 2.2 |
|
322 */ |
|
323 static Collator* U_EXPORT2 createInstance(const Locale& loc, UErrorCode& err); |
|
324 |
|
325 #ifdef U_USE_COLLATION_OBSOLETE_2_6 |
|
326 /** |
|
327 * Create a Collator with a specific version. |
|
328 * This is the same as createInstance(loc, err) except that getVersion() of |
|
329 * the returned object is guaranteed to be the same as the version |
|
330 * parameter. |
|
331 * This is designed to be used to open the same collator for a given |
|
332 * locale even when ICU is updated. |
|
333 * The same locale and version guarantees the same sort keys and |
|
334 * comparison results. |
|
335 * <p> |
|
336 * Note: this API will be removed in a future release. Use |
|
337 * <tt>createInstance(const Locale&, UErrorCode&) instead.</tt></p> |
|
338 * |
|
339 * @param loc The locale ID for which to open a collator. |
|
340 * @param version The requested collator version. |
|
341 * @param err A reference to a UErrorCode, |
|
342 * must not indicate a failure before calling this function. |
|
343 * @return A pointer to a Collator, or 0 if an error occurred |
|
344 * or a collator with the requested version is not available. |
|
345 * |
|
346 * @see getVersion |
|
347 * @obsolete ICU 2.6 |
|
348 */ |
|
349 static Collator *createInstance(const Locale &loc, UVersionInfo version, UErrorCode &err); |
|
350 #endif |
|
351 |
|
352 /** |
|
353 * The comparison function compares the character data stored in two |
|
354 * different strings. Returns information about whether a string is less |
|
355 * than, greater than or equal to another string. |
|
356 * @param source the source string to be compared with. |
|
357 * @param target the string that is to be compared with the source string. |
|
358 * @return Returns a byte value. GREATER if source is greater |
|
359 * than target; EQUAL if source is equal to target; LESS if source is less |
|
360 * than target |
|
361 * @deprecated ICU 2.6 use the overload with UErrorCode & |
|
362 */ |
|
363 virtual EComparisonResult compare(const UnicodeString& source, |
|
364 const UnicodeString& target) const; |
|
365 |
|
366 /** |
|
367 * The comparison function compares the character data stored in two |
|
368 * different strings. Returns information about whether a string is less |
|
369 * than, greater than or equal to another string. |
|
370 * @param source the source string to be compared with. |
|
371 * @param target the string that is to be compared with the source string. |
|
372 * @param status possible error code |
|
373 * @return Returns an enum value. UCOL_GREATER if source is greater |
|
374 * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less |
|
375 * than target |
|
376 * @stable ICU 2.6 |
|
377 */ |
|
378 virtual UCollationResult compare(const UnicodeString& source, |
|
379 const UnicodeString& target, |
|
380 UErrorCode &status) const = 0; |
|
381 |
|
382 /** |
|
383 * Does the same thing as compare but limits the comparison to a specified |
|
384 * length |
|
385 * @param source the source string to be compared with. |
|
386 * @param target the string that is to be compared with the source string. |
|
387 * @param length the length the comparison is limited to |
|
388 * @return Returns a byte value. GREATER if source (up to the specified |
|
389 * length) is greater than target; EQUAL if source (up to specified |
|
390 * length) is equal to target; LESS if source (up to the specified |
|
391 * length) is less than target. |
|
392 * @deprecated ICU 2.6 use the overload with UErrorCode & |
|
393 */ |
|
394 virtual EComparisonResult compare(const UnicodeString& source, |
|
395 const UnicodeString& target, |
|
396 int32_t length) const; |
|
397 |
|
398 /** |
|
399 * Does the same thing as compare but limits the comparison to a specified |
|
400 * length |
|
401 * @param source the source string to be compared with. |
|
402 * @param target the string that is to be compared with the source string. |
|
403 * @param length the length the comparison is limited to |
|
404 * @param status possible error code |
|
405 * @return Returns an enum value. UCOL_GREATER if source (up to the specified |
|
406 * length) is greater than target; UCOL_EQUAL if source (up to specified |
|
407 * length) is equal to target; UCOL_LESS if source (up to the specified |
|
408 * length) is less than target. |
|
409 * @stable ICU 2.6 |
|
410 */ |
|
411 virtual UCollationResult compare(const UnicodeString& source, |
|
412 const UnicodeString& target, |
|
413 int32_t length, |
|
414 UErrorCode &status) const = 0; |
|
415 |
|
416 /** |
|
417 * The comparison function compares the character data stored in two |
|
418 * different string arrays. Returns information about whether a string array |
|
419 * is less than, greater than or equal to another string array. |
|
420 * <p>Example of use: |
|
421 * <pre> |
|
422 * . UChar ABC[] = {0x41, 0x42, 0x43, 0}; // = "ABC" |
|
423 * . UChar abc[] = {0x61, 0x62, 0x63, 0}; // = "abc" |
|
424 * . UErrorCode status = U_ZERO_ERROR; |
|
425 * . Collator *myCollation = |
|
426 * . Collator::createInstance(Locale::US, status); |
|
427 * . if (U_FAILURE(status)) return; |
|
428 * . myCollation->setStrength(Collator::PRIMARY); |
|
429 * . // result would be Collator::EQUAL ("abc" == "ABC") |
|
430 * . // (no primary difference between "abc" and "ABC") |
|
431 * . Collator::EComparisonResult result = |
|
432 * . myCollation->compare(abc, 3, ABC, 3); |
|
433 * . myCollation->setStrength(Collator::TERTIARY); |
|
434 * . // result would be Collator::LESS ("abc" <<< "ABC") |
|
435 * . // (with tertiary difference between "abc" and "ABC") |
|
436 * . result = myCollation->compare(abc, 3, ABC, 3); |
|
437 * </pre> |
|
438 * @param source the source string array to be compared with. |
|
439 * @param sourceLength the length of the source string array. If this value |
|
440 * is equal to -1, the string array is null-terminated. |
|
441 * @param target the string that is to be compared with the source string. |
|
442 * @param targetLength the length of the target string array. If this value |
|
443 * is equal to -1, the string array is null-terminated. |
|
444 * @return Returns a byte value. GREATER if source is greater than target; |
|
445 * EQUAL if source is equal to target; LESS if source is less than |
|
446 * target |
|
447 * @deprecated ICU 2.6 use the overload with UErrorCode & |
|
448 */ |
|
449 virtual EComparisonResult compare(const UChar* source, int32_t sourceLength, |
|
450 const UChar* target, int32_t targetLength) |
|
451 const; |
|
452 |
|
453 /** |
|
454 * The comparison function compares the character data stored in two |
|
455 * different string arrays. Returns information about whether a string array |
|
456 * is less than, greater than or equal to another string array. |
|
457 * @param source the source string array to be compared with. |
|
458 * @param sourceLength the length of the source string array. If this value |
|
459 * is equal to -1, the string array is null-terminated. |
|
460 * @param target the string that is to be compared with the source string. |
|
461 * @param targetLength the length of the target string array. If this value |
|
462 * is equal to -1, the string array is null-terminated. |
|
463 * @param status possible error code |
|
464 * @return Returns an enum value. UCOL_GREATER if source is greater |
|
465 * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less |
|
466 * than target |
|
467 * @stable ICU 2.6 |
|
468 */ |
|
469 virtual UCollationResult compare(const UChar* source, int32_t sourceLength, |
|
470 const UChar* target, int32_t targetLength, |
|
471 UErrorCode &status) const = 0; |
|
472 |
|
473 /** |
|
474 * Compares two strings using the Collator. |
|
475 * Returns whether the first one compares less than/equal to/greater than |
|
476 * the second one. |
|
477 * This version takes UCharIterator input. |
|
478 * @param sIter the first ("source") string iterator |
|
479 * @param tIter the second ("target") string iterator |
|
480 * @param status ICU status |
|
481 * @return UCOL_LESS, UCOL_EQUAL or UCOL_GREATER |
|
482 * @stable ICU 4.2 |
|
483 */ |
|
484 virtual UCollationResult compare(UCharIterator &sIter, |
|
485 UCharIterator &tIter, |
|
486 UErrorCode &status) const; |
|
487 |
|
488 /** |
|
489 * Compares two UTF-8 strings using the Collator. |
|
490 * Returns whether the first one compares less than/equal to/greater than |
|
491 * the second one. |
|
492 * This version takes UTF-8 input. |
|
493 * Note that a StringPiece can be implicitly constructed |
|
494 * from a std::string or a NUL-terminated const char * string. |
|
495 * @param source the first UTF-8 string |
|
496 * @param target the second UTF-8 string |
|
497 * @param status ICU status |
|
498 * @return UCOL_LESS, UCOL_EQUAL or UCOL_GREATER |
|
499 * @stable ICU 4.2 |
|
500 */ |
|
501 virtual UCollationResult compareUTF8(const StringPiece &source, |
|
502 const StringPiece &target, |
|
503 UErrorCode &status) const; |
|
504 |
|
505 /** |
|
506 * Transforms the string into a series of characters that can be compared |
|
507 * with CollationKey::compareTo. It is not possible to restore the original |
|
508 * string from the chars in the sort key. The generated sort key handles |
|
509 * only a limited number of ignorable characters. |
|
510 * <p>Use CollationKey::equals or CollationKey::compare to compare the |
|
511 * generated sort keys. |
|
512 * If the source string is null, a null collation key will be returned. |
|
513 * @param source the source string to be transformed into a sort key. |
|
514 * @param key the collation key to be filled in |
|
515 * @param status the error code status. |
|
516 * @return the collation key of the string based on the collation rules. |
|
517 * @see CollationKey#compare |
|
518 * @stable ICU 2.0 |
|
519 */ |
|
520 virtual CollationKey& getCollationKey(const UnicodeString& source, |
|
521 CollationKey& key, |
|
522 UErrorCode& status) const = 0; |
|
523 |
|
524 /** |
|
525 * Transforms the string into a series of characters that can be compared |
|
526 * with CollationKey::compareTo. It is not possible to restore the original |
|
527 * string from the chars in the sort key. The generated sort key handles |
|
528 * only a limited number of ignorable characters. |
|
529 * <p>Use CollationKey::equals or CollationKey::compare to compare the |
|
530 * generated sort keys. |
|
531 * <p>If the source string is null, a null collation key will be returned. |
|
532 * @param source the source string to be transformed into a sort key. |
|
533 * @param sourceLength length of the collation key |
|
534 * @param key the collation key to be filled in |
|
535 * @param status the error code status. |
|
536 * @return the collation key of the string based on the collation rules. |
|
537 * @see CollationKey#compare |
|
538 * @stable ICU 2.0 |
|
539 */ |
|
540 virtual CollationKey& getCollationKey(const UChar*source, |
|
541 int32_t sourceLength, |
|
542 CollationKey& key, |
|
543 UErrorCode& status) const = 0; |
|
544 /** |
|
545 * Generates the hash code for the collation object |
|
546 * @stable ICU 2.0 |
|
547 */ |
|
548 virtual int32_t hashCode(void) const = 0; |
|
549 |
|
550 /** |
|
551 * Gets the locale of the Collator |
|
552 * |
|
553 * @param type can be either requested, valid or actual locale. For more |
|
554 * information see the definition of ULocDataLocaleType in |
|
555 * uloc.h |
|
556 * @param status the error code status. |
|
557 * @return locale where the collation data lives. If the collator |
|
558 * was instantiated from rules, locale is empty. |
|
559 * @deprecated ICU 2.8 This API is under consideration for revision |
|
560 * in ICU 3.0. |
|
561 */ |
|
562 virtual Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const = 0; |
|
563 |
|
564 /** |
|
565 * Convenience method for comparing two strings based on the collation rules. |
|
566 * @param source the source string to be compared with. |
|
567 * @param target the target string to be compared with. |
|
568 * @return true if the first string is greater than the second one, |
|
569 * according to the collation rules. false, otherwise. |
|
570 * @see Collator#compare |
|
571 * @stable ICU 2.0 |
|
572 */ |
|
573 UBool greater(const UnicodeString& source, const UnicodeString& target) |
|
574 const; |
|
575 |
|
576 /** |
|
577 * Convenience method for comparing two strings based on the collation rules. |
|
578 * @param source the source string to be compared with. |
|
579 * @param target the target string to be compared with. |
|
580 * @return true if the first string is greater than or equal to the second |
|
581 * one, according to the collation rules. false, otherwise. |
|
582 * @see Collator#compare |
|
583 * @stable ICU 2.0 |
|
584 */ |
|
585 UBool greaterOrEqual(const UnicodeString& source, |
|
586 const UnicodeString& target) const; |
|
587 |
|
588 /** |
|
589 * Convenience method for comparing two strings based on the collation rules. |
|
590 * @param source the source string to be compared with. |
|
591 * @param target the target string to be compared with. |
|
592 * @return true if the strings are equal according to the collation rules. |
|
593 * false, otherwise. |
|
594 * @see Collator#compare |
|
595 * @stable ICU 2.0 |
|
596 */ |
|
597 UBool equals(const UnicodeString& source, const UnicodeString& target) const; |
|
598 |
|
599 /** |
|
600 * Determines the minimum strength that will be used in comparison or |
|
601 * transformation. |
|
602 * <p>E.g. with strength == SECONDARY, the tertiary difference is ignored |
|
603 * <p>E.g. with strength == PRIMARY, the secondary and tertiary difference |
|
604 * are ignored. |
|
605 * @return the current comparison level. |
|
606 * @see Collator#setStrength |
|
607 * @deprecated ICU 2.6 Use getAttribute(UCOL_STRENGTH...) instead |
|
608 */ |
|
609 virtual ECollationStrength getStrength(void) const; |
|
610 |
|
611 /** |
|
612 * Sets the minimum strength to be used in comparison or transformation. |
|
613 * <p>Example of use: |
|
614 * <pre> |
|
615 * \code |
|
616 * UErrorCode status = U_ZERO_ERROR; |
|
617 * Collator*myCollation = Collator::createInstance(Locale::US, status); |
|
618 * if (U_FAILURE(status)) return; |
|
619 * myCollation->setStrength(Collator::PRIMARY); |
|
620 * // result will be "abc" == "ABC" |
|
621 * // tertiary differences will be ignored |
|
622 * Collator::ComparisonResult result = myCollation->compare("abc", "ABC"); |
|
623 * \endcode |
|
624 * </pre> |
|
625 * @see Collator#getStrength |
|
626 * @param newStrength the new comparison level. |
|
627 * @deprecated ICU 2.6 Use setAttribute(UCOL_STRENGTH...) instead |
|
628 */ |
|
629 virtual void setStrength(ECollationStrength newStrength); |
|
630 |
|
631 /** |
|
632 * Retrieves the reordering codes for this collator. |
|
633 * @param dest The array to fill with the script ordering. |
|
634 * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function |
|
635 * will only return the length of the result without writing any of the result string (pre-flighting). |
|
636 * @param status A reference to an error code value, which must not indicate |
|
637 * a failure before the function call. |
|
638 * @return The length of the script ordering array. |
|
639 * @see ucol_setReorderCodes |
|
640 * @see Collator#getEquivalentReorderCodes |
|
641 * @see Collator#setReorderCodes |
|
642 * @see UScriptCode |
|
643 * @see UColReorderCode |
|
644 * @stable ICU 4.8 |
|
645 */ |
|
646 virtual int32_t getReorderCodes(int32_t *dest, |
|
647 int32_t destCapacity, |
|
648 UErrorCode& status) const; |
|
649 |
|
650 /** |
|
651 * Sets the ordering of scripts for this collator. |
|
652 * |
|
653 * <p>The reordering codes are a combination of script codes and reorder codes. |
|
654 * @param reorderCodes An array of script codes in the new order. This can be NULL if the |
|
655 * length is also set to 0. An empty array will clear any reordering codes on the collator. |
|
656 * @param reorderCodesLength The length of reorderCodes. |
|
657 * @param status error code |
|
658 * @see Collator#getReorderCodes |
|
659 * @see Collator#getEquivalentReorderCodes |
|
660 * @see UScriptCode |
|
661 * @see UColReorderCode |
|
662 * @stable ICU 4.8 |
|
663 */ |
|
664 virtual void setReorderCodes(const int32_t* reorderCodes, |
|
665 int32_t reorderCodesLength, |
|
666 UErrorCode& status) ; |
|
667 |
|
668 /** |
|
669 * Retrieves the reorder codes that are grouped with the given reorder code. Some reorder |
|
670 * codes will be grouped and must reorder together. |
|
671 * @param reorderCode The reorder code to determine equivalence for. |
|
672 * @param dest The array to fill with the script equivalene reordering codes. |
|
673 * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the |
|
674 * function will only return the length of the result without writing any of the result |
|
675 * string (pre-flighting). |
|
676 * @param status A reference to an error code value, which must not indicate |
|
677 * a failure before the function call. |
|
678 * @return The length of the of the reordering code equivalence array. |
|
679 * @see ucol_setReorderCodes |
|
680 * @see Collator#getReorderCodes |
|
681 * @see Collator#setReorderCodes |
|
682 * @see UScriptCode |
|
683 * @see UColReorderCode |
|
684 * @stable ICU 4.8 |
|
685 */ |
|
686 static int32_t U_EXPORT2 getEquivalentReorderCodes(int32_t reorderCode, |
|
687 int32_t* dest, |
|
688 int32_t destCapacity, |
|
689 UErrorCode& status); |
|
690 |
|
691 /** |
|
692 * Get name of the object for the desired Locale, in the desired langauge |
|
693 * @param objectLocale must be from getAvailableLocales |
|
694 * @param displayLocale specifies the desired locale for output |
|
695 * @param name the fill-in parameter of the return value |
|
696 * @return display-able name of the object for the object locale in the |
|
697 * desired language |
|
698 * @stable ICU 2.0 |
|
699 */ |
|
700 static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale, |
|
701 const Locale& displayLocale, |
|
702 UnicodeString& name); |
|
703 |
|
704 /** |
|
705 * Get name of the object for the desired Locale, in the langauge of the |
|
706 * default locale. |
|
707 * @param objectLocale must be from getAvailableLocales |
|
708 * @param name the fill-in parameter of the return value |
|
709 * @return name of the object for the desired locale in the default language |
|
710 * @stable ICU 2.0 |
|
711 */ |
|
712 static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale, |
|
713 UnicodeString& name); |
|
714 |
|
715 /** |
|
716 * Get the set of Locales for which Collations are installed. |
|
717 * |
|
718 * <p>Note this does not include locales supported by registered collators. |
|
719 * If collators might have been registered, use the overload of getAvailableLocales |
|
720 * that returns a StringEnumeration.</p> |
|
721 * |
|
722 * @param count the output parameter of number of elements in the locale list |
|
723 * @return the list of available locales for which collations are installed |
|
724 * @stable ICU 2.0 |
|
725 */ |
|
726 static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count); |
|
727 |
|
728 /** |
|
729 * Return a StringEnumeration over the locales available at the time of the call, |
|
730 * including registered locales. If a severe error occurs (such as out of memory |
|
731 * condition) this will return null. If there is no locale data, an empty enumeration |
|
732 * will be returned. |
|
733 * @return a StringEnumeration over the locales available at the time of the call |
|
734 * @stable ICU 2.6 |
|
735 */ |
|
736 static StringEnumeration* U_EXPORT2 getAvailableLocales(void); |
|
737 |
|
738 /** |
|
739 * Create a string enumerator of all possible keywords that are relevant to |
|
740 * collation. At this point, the only recognized keyword for this |
|
741 * service is "collation". |
|
742 * @param status input-output error code |
|
743 * @return a string enumeration over locale strings. The caller is |
|
744 * responsible for closing the result. |
|
745 * @stable ICU 3.0 |
|
746 */ |
|
747 static StringEnumeration* U_EXPORT2 getKeywords(UErrorCode& status); |
|
748 |
|
749 /** |
|
750 * Given a keyword, create a string enumeration of all values |
|
751 * for that keyword that are currently in use. |
|
752 * @param keyword a particular keyword as enumerated by |
|
753 * ucol_getKeywords. If any other keyword is passed in, status is set |
|
754 * to U_ILLEGAL_ARGUMENT_ERROR. |
|
755 * @param status input-output error code |
|
756 * @return a string enumeration over collation keyword values, or NULL |
|
757 * upon error. The caller is responsible for deleting the result. |
|
758 * @stable ICU 3.0 |
|
759 */ |
|
760 static StringEnumeration* U_EXPORT2 getKeywordValues(const char *keyword, UErrorCode& status); |
|
761 |
|
762 /** |
|
763 * Given a key and a locale, returns an array of string values in a preferred |
|
764 * order that would make a difference. These are all and only those values where |
|
765 * the open (creation) of the service with the locale formed from the input locale |
|
766 * plus input keyword and that value has different behavior than creation with the |
|
767 * input locale alone. |
|
768 * @param keyword one of the keys supported by this service. For now, only |
|
769 * "collation" is supported. |
|
770 * @param locale the locale |
|
771 * @param commonlyUsed if set to true it will return only commonly used values |
|
772 * with the given locale in preferred order. Otherwise, |
|
773 * it will return all the available values for the locale. |
|
774 * @param status ICU status |
|
775 * @return a string enumeration over keyword values for the given key and the locale. |
|
776 * @stable ICU 4.2 |
|
777 */ |
|
778 static StringEnumeration* U_EXPORT2 getKeywordValuesForLocale(const char* keyword, const Locale& locale, |
|
779 UBool commonlyUsed, UErrorCode& status); |
|
780 |
|
781 /** |
|
782 * Return the functionally equivalent locale for the given |
|
783 * requested locale, with respect to given keyword, for the |
|
784 * collation service. If two locales return the same result, then |
|
785 * collators instantiated for these locales will behave |
|
786 * equivalently. The converse is not always true; two collators |
|
787 * may in fact be equivalent, but return different results, due to |
|
788 * internal details. The return result has no other meaning than |
|
789 * that stated above, and implies nothing as to the relationship |
|
790 * between the two locales. This is intended for use by |
|
791 * applications who wish to cache collators, or otherwise reuse |
|
792 * collators when possible. The functional equivalent may change |
|
793 * over time. For more information, please see the <a |
|
794 * href="http://icu-project.org/userguide/locale.html#services"> |
|
795 * Locales and Services</a> section of the ICU User Guide. |
|
796 * @param keyword a particular keyword as enumerated by |
|
797 * ucol_getKeywords. |
|
798 * @param locale the requested locale |
|
799 * @param isAvailable reference to a fillin parameter that |
|
800 * indicates whether the requested locale was 'available' to the |
|
801 * collation service. A locale is defined as 'available' if it |
|
802 * physically exists within the collation locale data. |
|
803 * @param status reference to input-output error code |
|
804 * @return the functionally equivalent collation locale, or the root |
|
805 * locale upon error. |
|
806 * @stable ICU 3.0 |
|
807 */ |
|
808 static Locale U_EXPORT2 getFunctionalEquivalent(const char* keyword, const Locale& locale, |
|
809 UBool& isAvailable, UErrorCode& status); |
|
810 |
|
811 #if !UCONFIG_NO_SERVICE |
|
812 /** |
|
813 * Register a new Collator. The collator will be adopted. |
|
814 * @param toAdopt the Collator instance to be adopted |
|
815 * @param locale the locale with which the collator will be associated |
|
816 * @param status the in/out status code, no special meanings are assigned |
|
817 * @return a registry key that can be used to unregister this collator |
|
818 * @stable ICU 2.6 |
|
819 */ |
|
820 static URegistryKey U_EXPORT2 registerInstance(Collator* toAdopt, const Locale& locale, UErrorCode& status); |
|
821 |
|
822 /** |
|
823 * Register a new CollatorFactory. The factory will be adopted. |
|
824 * @param toAdopt the CollatorFactory instance to be adopted |
|
825 * @param status the in/out status code, no special meanings are assigned |
|
826 * @return a registry key that can be used to unregister this collator |
|
827 * @stable ICU 2.6 |
|
828 */ |
|
829 static URegistryKey U_EXPORT2 registerFactory(CollatorFactory* toAdopt, UErrorCode& status); |
|
830 |
|
831 /** |
|
832 * Unregister a previously-registered Collator or CollatorFactory |
|
833 * using the key returned from the register call. Key becomes |
|
834 * invalid after a successful call and should not be used again. |
|
835 * The object corresponding to the key will be deleted. |
|
836 * @param key the registry key returned by a previous call to registerInstance |
|
837 * @param status the in/out status code, no special meanings are assigned |
|
838 * @return TRUE if the collator for the key was successfully unregistered |
|
839 * @stable ICU 2.6 |
|
840 */ |
|
841 static UBool U_EXPORT2 unregister(URegistryKey key, UErrorCode& status); |
|
842 #endif /* UCONFIG_NO_SERVICE */ |
|
843 |
|
844 /** |
|
845 * Gets the version information for a Collator. |
|
846 * @param info the version # information, the result will be filled in |
|
847 * @stable ICU 2.0 |
|
848 */ |
|
849 virtual void getVersion(UVersionInfo info) const = 0; |
|
850 |
|
851 /** |
|
852 * Returns a unique class ID POLYMORPHICALLY. Pure virtual method. |
|
853 * This method is to implement a simple version of RTTI, since not all C++ |
|
854 * compilers support genuine RTTI. Polymorphic operator==() and clone() |
|
855 * methods call this method. |
|
856 * @return The class ID for this object. All objects of a given class have |
|
857 * the same class ID. Objects of other classes have different class |
|
858 * IDs. |
|
859 * @stable ICU 2.0 |
|
860 */ |
|
861 virtual UClassID getDynamicClassID(void) const = 0; |
|
862 |
|
863 /** |
|
864 * Universal attribute setter |
|
865 * @param attr attribute type |
|
866 * @param value attribute value |
|
867 * @param status to indicate whether the operation went on smoothly or |
|
868 * there were errors |
|
869 * @stable ICU 2.2 |
|
870 */ |
|
871 virtual void setAttribute(UColAttribute attr, UColAttributeValue value, |
|
872 UErrorCode &status) = 0; |
|
873 |
|
874 /** |
|
875 * Universal attribute getter |
|
876 * @param attr attribute type |
|
877 * @param status to indicate whether the operation went on smoothly or |
|
878 * there were errors |
|
879 * @return attribute value |
|
880 * @stable ICU 2.2 |
|
881 */ |
|
882 virtual UColAttributeValue getAttribute(UColAttribute attr, |
|
883 UErrorCode &status) const = 0; |
|
884 |
|
885 /** |
|
886 * Sets the variable top to a collation element value of a string supplied. |
|
887 * @param varTop one or more (if contraction) UChars to which the variable top should be set |
|
888 * @param len length of variable top string. If -1 it is considered to be zero terminated. |
|
889 * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br> |
|
890 * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction<br> |
|
891 * U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes |
|
892 * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined |
|
893 * @stable ICU 2.0 |
|
894 */ |
|
895 virtual uint32_t setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status) = 0; |
|
896 |
|
897 /** |
|
898 * Sets the variable top to a collation element value of a string supplied. |
|
899 * @param varTop an UnicodeString size 1 or more (if contraction) of UChars to which the variable top should be set |
|
900 * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br> |
|
901 * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction<br> |
|
902 * U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes |
|
903 * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined |
|
904 * @stable ICU 2.0 |
|
905 */ |
|
906 virtual uint32_t setVariableTop(const UnicodeString &varTop, UErrorCode &status) = 0; |
|
907 |
|
908 /** |
|
909 * Sets the variable top to a collation element value supplied. Variable top is set to the upper 16 bits. |
|
910 * Lower 16 bits are ignored. |
|
911 * @param varTop CE value, as returned by setVariableTop or ucol)getVariableTop |
|
912 * @param status error code (not changed by function) |
|
913 * @stable ICU 2.0 |
|
914 */ |
|
915 virtual void setVariableTop(uint32_t varTop, UErrorCode &status) = 0; |
|
916 |
|
917 /** |
|
918 * Gets the variable top value of a Collator. |
|
919 * Lower 16 bits are undefined and should be ignored. |
|
920 * @param status error code (not changed by function). If error code is set, the return value is undefined. |
|
921 * @stable ICU 2.0 |
|
922 */ |
|
923 virtual uint32_t getVariableTop(UErrorCode &status) const = 0; |
|
924 |
|
925 /** |
|
926 * Get an UnicodeSet that contains all the characters and sequences |
|
927 * tailored in this collator. |
|
928 * @param status error code of the operation |
|
929 * @return a pointer to a UnicodeSet object containing all the |
|
930 * code points and sequences that may sort differently than |
|
931 * in the UCA. The object must be disposed of by using delete |
|
932 * @stable ICU 2.4 |
|
933 */ |
|
934 virtual UnicodeSet *getTailoredSet(UErrorCode &status) const; |
|
935 |
|
936 /** |
|
937 * Same as clone(). |
|
938 * The base class implementation simply calls clone(). |
|
939 * @return a copy of this object, owned by the caller |
|
940 * @see clone() |
|
941 * @deprecated ICU 50 no need to have two methods for cloning |
|
942 */ |
|
943 virtual Collator* safeClone(void) const; |
|
944 |
|
945 /** |
|
946 * Get the sort key as an array of bytes from an UnicodeString. |
|
947 * Sort key byte arrays are zero-terminated and can be compared using |
|
948 * strcmp(). |
|
949 * @param source string to be processed. |
|
950 * @param result buffer to store result in. If NULL, number of bytes needed |
|
951 * will be returned. |
|
952 * @param resultLength length of the result buffer. If if not enough the |
|
953 * buffer will be filled to capacity. |
|
954 * @return Number of bytes needed for storing the sort key |
|
955 * @stable ICU 2.2 |
|
956 */ |
|
957 virtual int32_t getSortKey(const UnicodeString& source, |
|
958 uint8_t* result, |
|
959 int32_t resultLength) const = 0; |
|
960 |
|
961 /** |
|
962 * Get the sort key as an array of bytes from an UChar buffer. |
|
963 * Sort key byte arrays are zero-terminated and can be compared using |
|
964 * strcmp(). |
|
965 * @param source string to be processed. |
|
966 * @param sourceLength length of string to be processed. |
|
967 * If -1, the string is 0 terminated and length will be decided by the |
|
968 * function. |
|
969 * @param result buffer to store result in. If NULL, number of bytes needed |
|
970 * will be returned. |
|
971 * @param resultLength length of the result buffer. If if not enough the |
|
972 * buffer will be filled to capacity. |
|
973 * @return Number of bytes needed for storing the sort key |
|
974 * @stable ICU 2.2 |
|
975 */ |
|
976 virtual int32_t getSortKey(const UChar*source, int32_t sourceLength, |
|
977 uint8_t*result, int32_t resultLength) const = 0; |
|
978 |
|
979 /** |
|
980 * Produce a bound for a given sortkey and a number of levels. |
|
981 * Return value is always the number of bytes needed, regardless of |
|
982 * whether the result buffer was big enough or even valid.<br> |
|
983 * Resulting bounds can be used to produce a range of strings that are |
|
984 * between upper and lower bounds. For example, if bounds are produced |
|
985 * for a sortkey of string "smith", strings between upper and lower |
|
986 * bounds with one level would include "Smith", "SMITH", "sMiTh".<br> |
|
987 * There are two upper bounds that can be produced. If UCOL_BOUND_UPPER |
|
988 * is produced, strings matched would be as above. However, if bound |
|
989 * produced using UCOL_BOUND_UPPER_LONG is used, the above example will |
|
990 * also match "Smithsonian" and similar.<br> |
|
991 * For more on usage, see example in cintltst/capitst.c in procedure |
|
992 * TestBounds. |
|
993 * Sort keys may be compared using <TT>strcmp</TT>. |
|
994 * @param source The source sortkey. |
|
995 * @param sourceLength The length of source, or -1 if null-terminated. |
|
996 * (If an unmodified sortkey is passed, it is always null |
|
997 * terminated). |
|
998 * @param boundType Type of bound required. It can be UCOL_BOUND_LOWER, which |
|
999 * produces a lower inclusive bound, UCOL_BOUND_UPPER, that |
|
1000 * produces upper bound that matches strings of the same length |
|
1001 * or UCOL_BOUND_UPPER_LONG that matches strings that have the |
|
1002 * same starting substring as the source string. |
|
1003 * @param noOfLevels Number of levels required in the resulting bound (for most |
|
1004 * uses, the recommended value is 1). See users guide for |
|
1005 * explanation on number of levels a sortkey can have. |
|
1006 * @param result A pointer to a buffer to receive the resulting sortkey. |
|
1007 * @param resultLength The maximum size of result. |
|
1008 * @param status Used for returning error code if something went wrong. If the |
|
1009 * number of levels requested is higher than the number of levels |
|
1010 * in the source key, a warning (U_SORT_KEY_TOO_SHORT_WARNING) is |
|
1011 * issued. |
|
1012 * @return The size needed to fully store the bound. |
|
1013 * @see ucol_keyHashCode |
|
1014 * @stable ICU 2.1 |
|
1015 */ |
|
1016 static int32_t U_EXPORT2 getBound(const uint8_t *source, |
|
1017 int32_t sourceLength, |
|
1018 UColBoundMode boundType, |
|
1019 uint32_t noOfLevels, |
|
1020 uint8_t *result, |
|
1021 int32_t resultLength, |
|
1022 UErrorCode &status); |
|
1023 |
|
1024 |
|
1025 protected: |
|
1026 |
|
1027 // Collator protected constructors ------------------------------------- |
|
1028 |
|
1029 /** |
|
1030 * Default constructor. |
|
1031 * Constructor is different from the old default Collator constructor. |
|
1032 * The task for determing the default collation strength and normalization |
|
1033 * mode is left to the child class. |
|
1034 * @stable ICU 2.0 |
|
1035 */ |
|
1036 Collator(); |
|
1037 |
|
1038 #ifndef U_HIDE_DEPRECATED_API |
|
1039 /** |
|
1040 * Constructor. |
|
1041 * Empty constructor, does not handle the arguments. |
|
1042 * This constructor is done for backward compatibility with 1.7 and 1.8. |
|
1043 * The task for handling the argument collation strength and normalization |
|
1044 * mode is left to the child class. |
|
1045 * @param collationStrength collation strength |
|
1046 * @param decompositionMode |
|
1047 * @deprecated ICU 2.4. Subclasses should use the default constructor |
|
1048 * instead and handle the strength and normalization mode themselves. |
|
1049 */ |
|
1050 Collator(UCollationStrength collationStrength, |
|
1051 UNormalizationMode decompositionMode); |
|
1052 #endif /* U_HIDE_DEPRECATED_API */ |
|
1053 |
|
1054 /** |
|
1055 * Copy constructor. |
|
1056 * @param other Collator object to be copied from |
|
1057 * @stable ICU 2.0 |
|
1058 */ |
|
1059 Collator(const Collator& other); |
|
1060 |
|
1061 // Collator protected methods ----------------------------------------- |
|
1062 |
|
1063 |
|
1064 /** |
|
1065 * Used internally by registraton to define the requested and valid locales. |
|
1066 * @param requestedLocale the requested locale |
|
1067 * @param validLocale the valid locale |
|
1068 * @param actualLocale the actual locale |
|
1069 * @internal |
|
1070 */ |
|
1071 virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale); |
|
1072 |
|
1073 public: |
|
1074 #if !UCONFIG_NO_SERVICE |
|
1075 #ifndef U_HIDE_INTERNAL_API |
|
1076 /** |
|
1077 * used only by ucol_open, not for public use |
|
1078 * @internal |
|
1079 */ |
|
1080 static UCollator* createUCollator(const char* loc, UErrorCode* status); |
|
1081 #endif /* U_HIDE_INTERNAL_API */ |
|
1082 #endif |
|
1083 |
|
1084 /** Get the short definition string for a collator. This internal API harvests the collator's |
|
1085 * locale and the attribute set and produces a string that can be used for opening |
|
1086 * a collator with the same properties using the ucol_openFromShortString API. |
|
1087 * This string will be normalized. |
|
1088 * The structure and the syntax of the string is defined in the "Naming collators" |
|
1089 * section of the users guide: |
|
1090 * http://icu-project.org/userguide/Collate_Concepts.html#Naming_Collators |
|
1091 * This function supports preflighting. |
|
1092 * |
|
1093 * This is internal, and intended to be used with delegate converters. |
|
1094 * |
|
1095 * @param locale a locale that will appear as a collators locale in the resulting |
|
1096 * short string definition. If NULL, the locale will be harvested |
|
1097 * from the collator. |
|
1098 * @param buffer space to hold the resulting string |
|
1099 * @param capacity capacity of the buffer |
|
1100 * @param status for returning errors. All the preflighting errors are featured |
|
1101 * @return length of the resulting string |
|
1102 * @see ucol_openFromShortString |
|
1103 * @see ucol_normalizeShortDefinitionString |
|
1104 * @see ucol_getShortDefinitionString |
|
1105 * @internal |
|
1106 */ |
|
1107 virtual int32_t internalGetShortDefinitionString(const char *locale, |
|
1108 char *buffer, |
|
1109 int32_t capacity, |
|
1110 UErrorCode &status) const; |
|
1111 private: |
|
1112 /** |
|
1113 * Assignment operator. Private for now. |
|
1114 * @internal |
|
1115 */ |
|
1116 Collator& operator=(const Collator& other); |
|
1117 |
|
1118 friend class CFactory; |
|
1119 friend class SimpleCFactory; |
|
1120 friend class ICUCollatorFactory; |
|
1121 friend class ICUCollatorService; |
|
1122 static Collator* makeInstance(const Locale& desiredLocale, |
|
1123 UErrorCode& status); |
|
1124 |
|
1125 // Collator private data members --------------------------------------- |
|
1126 |
|
1127 /* |
|
1128 synwee : removed as attributes to be handled by child class |
|
1129 UCollationStrength strength; |
|
1130 Normalizer::EMode decmp; |
|
1131 */ |
|
1132 /* This is useless information */ |
|
1133 /* static const UVersionInfo fVersion;*/ |
|
1134 }; |
|
1135 |
|
1136 #if !UCONFIG_NO_SERVICE |
|
1137 /** |
|
1138 * A factory, used with registerFactory, the creates multiple collators and provides |
|
1139 * display names for them. A factory supports some number of locales-- these are the |
|
1140 * locales for which it can create collators. The factory can be visible, in which |
|
1141 * case the supported locales will be enumerated by getAvailableLocales, or invisible, |
|
1142 * in which they are not. Invisible locales are still supported, they are just not |
|
1143 * listed by getAvailableLocales. |
|
1144 * <p> |
|
1145 * If standard locale display names are sufficient, Collator instances can |
|
1146 * be registered using registerInstance instead.</p> |
|
1147 * <p> |
|
1148 * Note: if the collators are to be used from C APIs, they must be instances |
|
1149 * of RuleBasedCollator.</p> |
|
1150 * |
|
1151 * @stable ICU 2.6 |
|
1152 */ |
|
1153 class U_I18N_API CollatorFactory : public UObject { |
|
1154 public: |
|
1155 |
|
1156 /** |
|
1157 * Destructor |
|
1158 * @stable ICU 3.0 |
|
1159 */ |
|
1160 virtual ~CollatorFactory(); |
|
1161 |
|
1162 /** |
|
1163 * Return true if this factory is visible. Default is true. |
|
1164 * If not visible, the locales supported by this factory will not |
|
1165 * be listed by getAvailableLocales. |
|
1166 * @return true if the factory is visible. |
|
1167 * @stable ICU 2.6 |
|
1168 */ |
|
1169 virtual UBool visible(void) const; |
|
1170 |
|
1171 /** |
|
1172 * Return a collator for the provided locale. If the locale |
|
1173 * is not supported, return NULL. |
|
1174 * @param loc the locale identifying the collator to be created. |
|
1175 * @return a new collator if the locale is supported, otherwise NULL. |
|
1176 * @stable ICU 2.6 |
|
1177 */ |
|
1178 virtual Collator* createCollator(const Locale& loc) = 0; |
|
1179 |
|
1180 /** |
|
1181 * Return the name of the collator for the objectLocale, localized for the displayLocale. |
|
1182 * If objectLocale is not supported, or the factory is not visible, set the result string |
|
1183 * to bogus. |
|
1184 * @param objectLocale the locale identifying the collator |
|
1185 * @param displayLocale the locale for which the display name of the collator should be localized |
|
1186 * @param result an output parameter for the display name, set to bogus if not supported. |
|
1187 * @return the display name |
|
1188 * @stable ICU 2.6 |
|
1189 */ |
|
1190 virtual UnicodeString& getDisplayName(const Locale& objectLocale, |
|
1191 const Locale& displayLocale, |
|
1192 UnicodeString& result); |
|
1193 |
|
1194 /** |
|
1195 * Return an array of all the locale names directly supported by this factory. |
|
1196 * The number of names is returned in count. This array is owned by the factory. |
|
1197 * Its contents must never change. |
|
1198 * @param count output parameter for the number of locales supported by the factory |
|
1199 * @param status the in/out error code |
|
1200 * @return a pointer to an array of count UnicodeStrings. |
|
1201 * @stable ICU 2.6 |
|
1202 */ |
|
1203 virtual const UnicodeString * getSupportedIDs(int32_t &count, UErrorCode& status) = 0; |
|
1204 }; |
|
1205 #endif /* UCONFIG_NO_SERVICE */ |
|
1206 |
|
1207 // Collator inline methods ----------------------------------------------- |
|
1208 |
|
1209 U_NAMESPACE_END |
|
1210 |
|
1211 #endif /* #if !UCONFIG_NO_COLLATION */ |
|
1212 |
|
1213 #endif |