1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/i18n/unicode/coleitr.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,384 @@ 1.4 +/* 1.5 + ****************************************************************************** 1.6 + * Copyright (C) 1997-2013, International Business Machines 1.7 + * Corporation and others. All Rights Reserved. 1.8 + ****************************************************************************** 1.9 + */ 1.10 + 1.11 +/** 1.12 + * \file 1.13 + * \brief C++ API: Collation Element Iterator. 1.14 + */ 1.15 + 1.16 +/** 1.17 +* File coleitr.h 1.18 +* 1.19 +* 1.20 +* 1.21 +* Created by: Helena Shih 1.22 +* 1.23 +* Modification History: 1.24 +* 1.25 +* Date Name Description 1.26 +* 1.27 +* 8/18/97 helena Added internal API documentation. 1.28 +* 08/03/98 erm Synched with 1.2 version CollationElementIterator.java 1.29 +* 12/10/99 aliu Ported Thai collation support from Java. 1.30 +* 01/25/01 swquek Modified into a C++ wrapper calling C APIs (ucoliter.h) 1.31 +* 02/19/01 swquek Removed CollationElementsIterator() since it is 1.32 +* private constructor and no calls are made to it 1.33 +*/ 1.34 + 1.35 +#ifndef COLEITR_H 1.36 +#define COLEITR_H 1.37 + 1.38 +#include "unicode/utypes.h" 1.39 + 1.40 + 1.41 +#if !UCONFIG_NO_COLLATION 1.42 + 1.43 +#include "unicode/uobject.h" 1.44 +#include "unicode/tblcoll.h" 1.45 +#include "unicode/ucoleitr.h" 1.46 + 1.47 +/** 1.48 + * The UCollationElements struct. 1.49 + * For usage in C programs. 1.50 + * @stable ICU 2.0 1.51 + */ 1.52 +typedef struct UCollationElements UCollationElements; 1.53 + 1.54 +U_NAMESPACE_BEGIN 1.55 + 1.56 +/** 1.57 +* The CollationElementIterator class is used as an iterator to walk through 1.58 +* each character of an international string. Use the iterator to return the 1.59 +* ordering priority of the positioned character. The ordering priority of a 1.60 +* character, which we refer to as a key, defines how a character is collated in 1.61 +* the given collation object. 1.62 +* For example, consider the following in Spanish: 1.63 +* <pre> 1.64 +* "ca" -> the first key is key('c') and second key is key('a'). 1.65 +* "cha" -> the first key is key('ch') and second key is key('a').</pre> 1.66 +* And in German, 1.67 +* <pre> \htmlonly "æb"-> the first key is key('a'), the second key is key('e'), and 1.68 +* the third key is key('b'). \endhtmlonly </pre> 1.69 +* The key of a character, is an integer composed of primary order(short), 1.70 +* secondary order(char), and tertiary order(char). Java strictly defines the 1.71 +* size and signedness of its primitive data types. Therefore, the static 1.72 +* functions primaryOrder(), secondaryOrder(), and tertiaryOrder() return 1.73 +* int32_t to ensure the correctness of the key value. 1.74 +* <p>Example of the iterator usage: (without error checking) 1.75 +* <pre> 1.76 +* \code 1.77 +* void CollationElementIterator_Example() 1.78 +* { 1.79 +* UnicodeString str = "This is a test"; 1.80 +* UErrorCode success = U_ZERO_ERROR; 1.81 +* RuleBasedCollator* rbc = 1.82 +* (RuleBasedCollator*) RuleBasedCollator::createInstance(success); 1.83 +* CollationElementIterator* c = 1.84 +* rbc->createCollationElementIterator( str ); 1.85 +* int32_t order = c->next(success); 1.86 +* c->reset(); 1.87 +* order = c->previous(success); 1.88 +* delete c; 1.89 +* delete rbc; 1.90 +* } 1.91 +* \endcode 1.92 +* </pre> 1.93 +* <p> 1.94 +* The method next() returns the collation order of the next character based on 1.95 +* the comparison level of the collator. The method previous() returns the 1.96 +* collation order of the previous character based on the comparison level of 1.97 +* the collator. The Collation Element Iterator moves only in one direction 1.98 +* between calls to reset(), setOffset(), or setText(). That is, next() 1.99 +* and previous() can not be inter-used. Whenever previous() is to be called after 1.100 +* next() or vice versa, reset(), setOffset() or setText() has to be called first 1.101 +* to reset the status, shifting pointers to either the end or the start of 1.102 +* the string (reset() or setText()), or the specified position (setOffset()). 1.103 +* Hence at the next call of next() or previous(), the first or last collation order, 1.104 +* or collation order at the spefcifieid position will be returned. If a change of 1.105 +* direction is done without one of these calls, the result is undefined. 1.106 +* <p> 1.107 +* The result of a forward iterate (next()) and reversed result of the backward 1.108 +* iterate (previous()) on the same string are equivalent, if collation orders 1.109 +* with the value UCOL_IGNORABLE are ignored. 1.110 +* Character based on the comparison level of the collator. A collation order 1.111 +* consists of primary order, secondary order and tertiary order. The data 1.112 +* type of the collation order is <strong>t_int32</strong>. 1.113 +* 1.114 +* Note, CollationElementIterator should not be subclassed. 1.115 +* @see Collator 1.116 +* @see RuleBasedCollator 1.117 +* @version 1.8 Jan 16 2001 1.118 +*/ 1.119 +class U_I18N_API CollationElementIterator : public UObject { 1.120 +public: 1.121 + 1.122 + // CollationElementIterator public data member ------------------------------ 1.123 + 1.124 + enum { 1.125 + /** 1.126 + * NULLORDER indicates that an error has occured while processing 1.127 + * @stable ICU 2.0 1.128 + */ 1.129 + NULLORDER = (int32_t)0xffffffff 1.130 + }; 1.131 + 1.132 + // CollationElementIterator public constructor/destructor ------------------- 1.133 + 1.134 + /** 1.135 + * Copy constructor. 1.136 + * 1.137 + * @param other the object to be copied from 1.138 + * @stable ICU 2.0 1.139 + */ 1.140 + CollationElementIterator(const CollationElementIterator& other); 1.141 + 1.142 + /** 1.143 + * Destructor 1.144 + * @stable ICU 2.0 1.145 + */ 1.146 + virtual ~CollationElementIterator(); 1.147 + 1.148 + // CollationElementIterator public methods ---------------------------------- 1.149 + 1.150 + /** 1.151 + * Returns true if "other" is the same as "this" 1.152 + * 1.153 + * @param other the object to be compared 1.154 + * @return true if "other" is the same as "this" 1.155 + * @stable ICU 2.0 1.156 + */ 1.157 + UBool operator==(const CollationElementIterator& other) const; 1.158 + 1.159 + /** 1.160 + * Returns true if "other" is not the same as "this". 1.161 + * 1.162 + * @param other the object to be compared 1.163 + * @return true if "other" is not the same as "this" 1.164 + * @stable ICU 2.0 1.165 + */ 1.166 + UBool operator!=(const CollationElementIterator& other) const; 1.167 + 1.168 + /** 1.169 + * Resets the cursor to the beginning of the string. 1.170 + * @stable ICU 2.0 1.171 + */ 1.172 + void reset(void); 1.173 + 1.174 + /** 1.175 + * Gets the ordering priority of the next character in the string. 1.176 + * @param status the error code status. 1.177 + * @return the next character's ordering. otherwise returns NULLORDER if an 1.178 + * error has occured or if the end of string has been reached 1.179 + * @stable ICU 2.0 1.180 + */ 1.181 + int32_t next(UErrorCode& status); 1.182 + 1.183 + /** 1.184 + * Get the ordering priority of the previous collation element in the string. 1.185 + * @param status the error code status. 1.186 + * @return the previous element's ordering. otherwise returns NULLORDER if an 1.187 + * error has occured or if the start of string has been reached 1.188 + * @stable ICU 2.0 1.189 + */ 1.190 + int32_t previous(UErrorCode& status); 1.191 + 1.192 + /** 1.193 + * Gets the primary order of a collation order. 1.194 + * @param order the collation order 1.195 + * @return the primary order of a collation order. 1.196 + * @stable ICU 2.0 1.197 + */ 1.198 + static inline int32_t primaryOrder(int32_t order); 1.199 + 1.200 + /** 1.201 + * Gets the secondary order of a collation order. 1.202 + * @param order the collation order 1.203 + * @return the secondary order of a collation order. 1.204 + * @stable ICU 2.0 1.205 + */ 1.206 + static inline int32_t secondaryOrder(int32_t order); 1.207 + 1.208 + /** 1.209 + * Gets the tertiary order of a collation order. 1.210 + * @param order the collation order 1.211 + * @return the tertiary order of a collation order. 1.212 + * @stable ICU 2.0 1.213 + */ 1.214 + static inline int32_t tertiaryOrder(int32_t order); 1.215 + 1.216 + /** 1.217 + * Return the maximum length of any expansion sequences that end with the 1.218 + * specified comparison order. 1.219 + * @param order a collation order returned by previous or next. 1.220 + * @return maximum size of the expansion sequences ending with the collation 1.221 + * element or 1 if collation element does not occur at the end of any 1.222 + * expansion sequence 1.223 + * @stable ICU 2.0 1.224 + */ 1.225 + int32_t getMaxExpansion(int32_t order) const; 1.226 + 1.227 + /** 1.228 + * Gets the comparison order in the desired strength. Ignore the other 1.229 + * differences. 1.230 + * @param order The order value 1.231 + * @stable ICU 2.0 1.232 + */ 1.233 + int32_t strengthOrder(int32_t order) const; 1.234 + 1.235 + /** 1.236 + * Sets the source string. 1.237 + * @param str the source string. 1.238 + * @param status the error code status. 1.239 + * @stable ICU 2.0 1.240 + */ 1.241 + void setText(const UnicodeString& str, UErrorCode& status); 1.242 + 1.243 + /** 1.244 + * Sets the source string. 1.245 + * @param str the source character iterator. 1.246 + * @param status the error code status. 1.247 + * @stable ICU 2.0 1.248 + */ 1.249 + void setText(CharacterIterator& str, UErrorCode& status); 1.250 + 1.251 + /** 1.252 + * Checks if a comparison order is ignorable. 1.253 + * @param order the collation order. 1.254 + * @return TRUE if a character is ignorable, FALSE otherwise. 1.255 + * @stable ICU 2.0 1.256 + */ 1.257 + static inline UBool isIgnorable(int32_t order); 1.258 + 1.259 + /** 1.260 + * Gets the offset of the currently processed character in the source string. 1.261 + * @return the offset of the character. 1.262 + * @stable ICU 2.0 1.263 + */ 1.264 + int32_t getOffset(void) const; 1.265 + 1.266 + /** 1.267 + * Sets the offset of the currently processed character in the source string. 1.268 + * @param newOffset the new offset. 1.269 + * @param status the error code status. 1.270 + * @return the offset of the character. 1.271 + * @stable ICU 2.0 1.272 + */ 1.273 + void setOffset(int32_t newOffset, UErrorCode& status); 1.274 + 1.275 + /** 1.276 + * ICU "poor man's RTTI", returns a UClassID for the actual class. 1.277 + * 1.278 + * @stable ICU 2.2 1.279 + */ 1.280 + virtual UClassID getDynamicClassID() const; 1.281 + 1.282 + /** 1.283 + * ICU "poor man's RTTI", returns a UClassID for this class. 1.284 + * 1.285 + * @stable ICU 2.2 1.286 + */ 1.287 + static UClassID U_EXPORT2 getStaticClassID(); 1.288 + 1.289 +private: 1.290 + friend class RuleBasedCollator; 1.291 + 1.292 + /** 1.293 + * CollationElementIterator constructor. This takes the source string and the 1.294 + * collation object. The cursor will walk thru the source string based on the 1.295 + * predefined collation rules. If the source string is empty, NULLORDER will 1.296 + * be returned on the calls to next(). 1.297 + * @param sourceText the source string. 1.298 + * @param order the collation object. 1.299 + * @param status the error code status. 1.300 + */ 1.301 + CollationElementIterator(const UnicodeString& sourceText, 1.302 + const RuleBasedCollator* order, UErrorCode& status); 1.303 + 1.304 + /** 1.305 + * CollationElementIterator constructor. This takes the source string and the 1.306 + * collation object. The cursor will walk thru the source string based on the 1.307 + * predefined collation rules. If the source string is empty, NULLORDER will 1.308 + * be returned on the calls to next(). 1.309 + * @param sourceText the source string. 1.310 + * @param order the collation object. 1.311 + * @param status the error code status. 1.312 + */ 1.313 + CollationElementIterator(const CharacterIterator& sourceText, 1.314 + const RuleBasedCollator* order, UErrorCode& status); 1.315 + 1.316 + /** 1.317 + * Assignment operator 1.318 + * 1.319 + * @param other the object to be copied 1.320 + */ 1.321 + const CollationElementIterator& 1.322 + operator=(const CollationElementIterator& other); 1.323 + 1.324 + CollationElementIterator(); // default constructor not implemented 1.325 + 1.326 + // CollationElementIterator private data members ---------------------------- 1.327 + 1.328 + /** 1.329 + * Data wrapper for collation elements 1.330 + */ 1.331 + UCollationElements *m_data_; 1.332 + 1.333 + /** 1.334 + * Indicates if m_data_ belongs to this object. 1.335 + */ 1.336 + UBool isDataOwned_; 1.337 +}; 1.338 + 1.339 +// CollationElementIterator inline method defination -------------------------- 1.340 + 1.341 +/** 1.342 +* Get the primary order of a collation order. 1.343 +* @param order the collation order 1.344 +* @return the primary order of a collation order. 1.345 +*/ 1.346 +inline int32_t CollationElementIterator::primaryOrder(int32_t order) 1.347 +{ 1.348 + order &= RuleBasedCollator::PRIMARYORDERMASK; 1.349 + return (order >> RuleBasedCollator::PRIMARYORDERSHIFT); 1.350 +} 1.351 + 1.352 +/** 1.353 +* Get the secondary order of a collation order. 1.354 +* @param order the collation order 1.355 +* @return the secondary order of a collation order. 1.356 +*/ 1.357 +inline int32_t CollationElementIterator::secondaryOrder(int32_t order) 1.358 +{ 1.359 + order = order & RuleBasedCollator::SECONDARYORDERMASK; 1.360 + return (order >> RuleBasedCollator::SECONDARYORDERSHIFT); 1.361 +} 1.362 + 1.363 +/** 1.364 +* Get the tertiary order of a collation order. 1.365 +* @param order the collation order 1.366 +* @return the tertiary order of a collation order. 1.367 +*/ 1.368 +inline int32_t CollationElementIterator::tertiaryOrder(int32_t order) 1.369 +{ 1.370 + return (order &= RuleBasedCollator::TERTIARYORDERMASK); 1.371 +} 1.372 + 1.373 +inline int32_t CollationElementIterator::getMaxExpansion(int32_t order) const 1.374 +{ 1.375 + return ucol_getMaxExpansion(m_data_, (uint32_t)order); 1.376 +} 1.377 + 1.378 +inline UBool CollationElementIterator::isIgnorable(int32_t order) 1.379 +{ 1.380 + return (primaryOrder(order) == RuleBasedCollator::PRIMIGNORABLE); 1.381 +} 1.382 + 1.383 +U_NAMESPACE_END 1.384 + 1.385 +#endif /* #if !UCONFIG_NO_COLLATION */ 1.386 + 1.387 +#endif