intl/icu/source/i18n/unicode/coleitr.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/i18n/unicode/coleitr.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,384 @@
     1.4 +/*
     1.5 + ******************************************************************************
     1.6 + *   Copyright (C) 1997-2013, International Business Machines
     1.7 + *   Corporation and others.  All Rights Reserved.
     1.8 + ******************************************************************************
     1.9 + */
    1.10 +
    1.11 +/**
    1.12 + * \file 
    1.13 + * \brief C++ API: Collation Element Iterator.
    1.14 + */
    1.15 +
    1.16 +/**
    1.17 +* File coleitr.h
    1.18 +*
    1.19 +* 
    1.20 +*
    1.21 +* Created by: Helena Shih
    1.22 +*
    1.23 +* Modification History:
    1.24 +*
    1.25 +*  Date       Name        Description
    1.26 +*
    1.27 +*  8/18/97    helena      Added internal API documentation.
    1.28 +* 08/03/98    erm         Synched with 1.2 version CollationElementIterator.java
    1.29 +* 12/10/99    aliu        Ported Thai collation support from Java.
    1.30 +* 01/25/01    swquek      Modified into a C++ wrapper calling C APIs (ucoliter.h)
    1.31 +* 02/19/01    swquek      Removed CollationElementsIterator() since it is 
    1.32 +*                         private constructor and no calls are made to it
    1.33 +*/
    1.34 +
    1.35 +#ifndef COLEITR_H
    1.36 +#define COLEITR_H
    1.37 +
    1.38 +#include "unicode/utypes.h"
    1.39 +
    1.40 + 
    1.41 +#if !UCONFIG_NO_COLLATION
    1.42 +
    1.43 +#include "unicode/uobject.h"
    1.44 +#include "unicode/tblcoll.h"
    1.45 +#include "unicode/ucoleitr.h"
    1.46 +
    1.47 +/** 
    1.48 + * The UCollationElements struct.
    1.49 + * For usage in C programs.
    1.50 + * @stable ICU 2.0
    1.51 + */
    1.52 +typedef struct UCollationElements UCollationElements;
    1.53 +
    1.54 +U_NAMESPACE_BEGIN
    1.55 +
    1.56 +/**
    1.57 +* The CollationElementIterator class is used as an iterator to walk through     
    1.58 +* each character of an international string. Use the iterator to return the
    1.59 +* ordering priority of the positioned character. The ordering priority of a 
    1.60 +* character, which we refer to as a key, defines how a character is collated in 
    1.61 +* the given collation object.
    1.62 +* For example, consider the following in Spanish:
    1.63 +* <pre>
    1.64 +*        "ca" -> the first key is key('c') and second key is key('a').
    1.65 +*        "cha" -> the first key is key('ch') and second key is key('a').</pre>
    1.66 +* And in German,
    1.67 +* <pre> \htmlonly       "&#x00E6;b"-> the first key is key('a'), the second key is key('e'), and
    1.68 +*        the third key is key('b'). \endhtmlonly </pre>
    1.69 +* The key of a character, is an integer composed of primary order(short),
    1.70 +* secondary order(char), and tertiary order(char). Java strictly defines the 
    1.71 +* size and signedness of its primitive data types. Therefore, the static
    1.72 +* functions primaryOrder(), secondaryOrder(), and tertiaryOrder() return 
    1.73 +* int32_t to ensure the correctness of the key value.
    1.74 +* <p>Example of the iterator usage: (without error checking)
    1.75 +* <pre>
    1.76 +* \code
    1.77 +*   void CollationElementIterator_Example()
    1.78 +*   {
    1.79 +*       UnicodeString str = "This is a test";
    1.80 +*       UErrorCode success = U_ZERO_ERROR;
    1.81 +*       RuleBasedCollator* rbc =
    1.82 +*           (RuleBasedCollator*) RuleBasedCollator::createInstance(success);
    1.83 +*       CollationElementIterator* c =
    1.84 +*           rbc->createCollationElementIterator( str );
    1.85 +*       int32_t order = c->next(success);
    1.86 +*       c->reset();
    1.87 +*       order = c->previous(success);
    1.88 +*       delete c;
    1.89 +*       delete rbc;
    1.90 +*   }
    1.91 +* \endcode
    1.92 +* </pre>
    1.93 +* <p>
    1.94 +* The method next() returns the collation order of the next character based on
    1.95 +* the comparison level of the collator. The method previous() returns the
    1.96 +* collation order of the previous character based on the comparison level of
    1.97 +* the collator. The Collation Element Iterator moves only in one direction
    1.98 +* between calls to reset(), setOffset(), or setText(). That is, next() 
    1.99 +* and previous() can not be inter-used. Whenever previous() is to be called after 
   1.100 +* next() or vice versa, reset(), setOffset() or setText() has to be called first
   1.101 +* to reset the status, shifting pointers to either the end or the start of
   1.102 +* the string (reset() or setText()), or the specified position (setOffset()).
   1.103 +* Hence at the next call of next() or previous(), the first or last collation order,
   1.104 +* or collation order at the spefcifieid position will be returned. If a change of
   1.105 +* direction is done without one of these calls, the result is undefined.
   1.106 +* <p>
   1.107 +* The result of a forward iterate (next()) and reversed result of the backward
   1.108 +* iterate (previous()) on the same string are equivalent, if collation orders
   1.109 +* with the value UCOL_IGNORABLE are ignored.
   1.110 +* Character based on the comparison level of the collator.  A collation order 
   1.111 +* consists of primary order, secondary order and tertiary order.  The data 
   1.112 +* type of the collation order is <strong>t_int32</strong>. 
   1.113 +*
   1.114 +* Note, CollationElementIterator should not be subclassed.
   1.115 +* @see     Collator
   1.116 +* @see     RuleBasedCollator
   1.117 +* @version 1.8 Jan 16 2001
   1.118 +*/
   1.119 +class U_I18N_API CollationElementIterator : public UObject {
   1.120 +public: 
   1.121 +
   1.122 +    // CollationElementIterator public data member ------------------------------
   1.123 +
   1.124 +    enum {
   1.125 +        /**
   1.126 +         * NULLORDER indicates that an error has occured while processing
   1.127 +         * @stable ICU 2.0
   1.128 +         */
   1.129 +        NULLORDER = (int32_t)0xffffffff
   1.130 +    };
   1.131 +
   1.132 +    // CollationElementIterator public constructor/destructor -------------------
   1.133 +
   1.134 +    /**
   1.135 +    * Copy constructor.
   1.136 +    *
   1.137 +    * @param other    the object to be copied from
   1.138 +    * @stable ICU 2.0
   1.139 +    */
   1.140 +    CollationElementIterator(const CollationElementIterator& other);
   1.141 +
   1.142 +    /** 
   1.143 +    * Destructor
   1.144 +    * @stable ICU 2.0
   1.145 +    */
   1.146 +    virtual ~CollationElementIterator();
   1.147 +
   1.148 +    // CollationElementIterator public methods ----------------------------------
   1.149 +
   1.150 +    /**
   1.151 +    * Returns true if "other" is the same as "this"
   1.152 +    *
   1.153 +    * @param other    the object to be compared
   1.154 +    * @return         true if "other" is the same as "this"
   1.155 +    * @stable ICU 2.0
   1.156 +    */
   1.157 +    UBool operator==(const CollationElementIterator& other) const;
   1.158 +
   1.159 +    /**
   1.160 +    * Returns true if "other" is not the same as "this".
   1.161 +    *
   1.162 +    * @param other    the object to be compared
   1.163 +    * @return         true if "other" is not the same as "this"
   1.164 +    * @stable ICU 2.0
   1.165 +    */
   1.166 +    UBool operator!=(const CollationElementIterator& other) const;
   1.167 +
   1.168 +    /**
   1.169 +    * Resets the cursor to the beginning of the string.
   1.170 +    * @stable ICU 2.0
   1.171 +    */
   1.172 +    void reset(void);
   1.173 +
   1.174 +    /**
   1.175 +    * Gets the ordering priority of the next character in the string.
   1.176 +    * @param status the error code status.
   1.177 +    * @return the next character's ordering. otherwise returns NULLORDER if an 
   1.178 +    *         error has occured or if the end of string has been reached
   1.179 +    * @stable ICU 2.0
   1.180 +    */
   1.181 +    int32_t next(UErrorCode& status);
   1.182 +
   1.183 +    /**
   1.184 +    * Get the ordering priority of the previous collation element in the string.
   1.185 +    * @param status the error code status.
   1.186 +    * @return the previous element's ordering. otherwise returns NULLORDER if an 
   1.187 +    *         error has occured or if the start of string has been reached
   1.188 +    * @stable ICU 2.0
   1.189 +    */
   1.190 +    int32_t previous(UErrorCode& status);
   1.191 +
   1.192 +    /**
   1.193 +    * Gets the primary order of a collation order.
   1.194 +    * @param order the collation order
   1.195 +    * @return the primary order of a collation order.
   1.196 +    * @stable ICU 2.0
   1.197 +    */
   1.198 +    static inline int32_t primaryOrder(int32_t order);
   1.199 +
   1.200 +    /**
   1.201 +    * Gets the secondary order of a collation order.
   1.202 +    * @param order the collation order
   1.203 +    * @return the secondary order of a collation order.
   1.204 +    * @stable ICU 2.0
   1.205 +    */
   1.206 +    static inline int32_t secondaryOrder(int32_t order);
   1.207 +
   1.208 +    /**
   1.209 +    * Gets the tertiary order of a collation order.
   1.210 +    * @param order the collation order
   1.211 +    * @return the tertiary order of a collation order.
   1.212 +    * @stable ICU 2.0
   1.213 +    */
   1.214 +    static inline int32_t tertiaryOrder(int32_t order);
   1.215 +
   1.216 +    /**
   1.217 +    * Return the maximum length of any expansion sequences that end with the 
   1.218 +    * specified comparison order.
   1.219 +    * @param order a collation order returned by previous or next.
   1.220 +    * @return maximum size of the expansion sequences ending with the collation 
   1.221 +    *         element or 1 if collation element does not occur at the end of any 
   1.222 +    *         expansion sequence
   1.223 +    * @stable ICU 2.0
   1.224 +    */
   1.225 +    int32_t getMaxExpansion(int32_t order) const;
   1.226 +
   1.227 +    /**
   1.228 +    * Gets the comparison order in the desired strength. Ignore the other
   1.229 +    * differences.
   1.230 +    * @param order The order value
   1.231 +    * @stable ICU 2.0
   1.232 +    */
   1.233 +    int32_t strengthOrder(int32_t order) const;
   1.234 +
   1.235 +    /**
   1.236 +    * Sets the source string.
   1.237 +    * @param str the source string.
   1.238 +    * @param status the error code status.
   1.239 +    * @stable ICU 2.0
   1.240 +    */
   1.241 +    void setText(const UnicodeString& str, UErrorCode& status);
   1.242 +
   1.243 +    /**
   1.244 +    * Sets the source string.
   1.245 +    * @param str the source character iterator.
   1.246 +    * @param status the error code status.
   1.247 +    * @stable ICU 2.0
   1.248 +    */
   1.249 +    void setText(CharacterIterator& str, UErrorCode& status);
   1.250 +
   1.251 +    /**
   1.252 +    * Checks if a comparison order is ignorable.
   1.253 +    * @param order the collation order.
   1.254 +    * @return TRUE if a character is ignorable, FALSE otherwise.
   1.255 +    * @stable ICU 2.0
   1.256 +    */
   1.257 +    static inline UBool isIgnorable(int32_t order);
   1.258 +
   1.259 +    /**
   1.260 +    * Gets the offset of the currently processed character in the source string.
   1.261 +    * @return the offset of the character.
   1.262 +    * @stable ICU 2.0
   1.263 +    */
   1.264 +    int32_t getOffset(void) const;
   1.265 +
   1.266 +    /**
   1.267 +    * Sets the offset of the currently processed character in the source string.
   1.268 +    * @param newOffset the new offset.
   1.269 +    * @param status the error code status.
   1.270 +    * @return the offset of the character.
   1.271 +    * @stable ICU 2.0
   1.272 +    */
   1.273 +    void setOffset(int32_t newOffset, UErrorCode& status);
   1.274 +
   1.275 +    /**
   1.276 +    * ICU "poor man's RTTI", returns a UClassID for the actual class.
   1.277 +    *
   1.278 +    * @stable ICU 2.2
   1.279 +    */
   1.280 +    virtual UClassID getDynamicClassID() const;
   1.281 +
   1.282 +    /**
   1.283 +    * ICU "poor man's RTTI", returns a UClassID for this class.
   1.284 +    *
   1.285 +    * @stable ICU 2.2
   1.286 +    */
   1.287 +    static UClassID U_EXPORT2 getStaticClassID();
   1.288 +
   1.289 +private:
   1.290 +    friend class RuleBasedCollator;
   1.291 +
   1.292 +    /**
   1.293 +    * CollationElementIterator constructor. This takes the source string and the 
   1.294 +    * collation object. The cursor will walk thru the source string based on the 
   1.295 +    * predefined collation rules. If the source string is empty, NULLORDER will 
   1.296 +    * be returned on the calls to next().
   1.297 +    * @param sourceText    the source string.
   1.298 +    * @param order         the collation object.
   1.299 +    * @param status        the error code status.
   1.300 +    */
   1.301 +    CollationElementIterator(const UnicodeString& sourceText,
   1.302 +        const RuleBasedCollator* order, UErrorCode& status);
   1.303 +
   1.304 +    /**
   1.305 +    * CollationElementIterator constructor. This takes the source string and the 
   1.306 +    * collation object.  The cursor will walk thru the source string based on the 
   1.307 +    * predefined collation rules.  If the source string is empty, NULLORDER will 
   1.308 +    * be returned on the calls to next().
   1.309 +    * @param sourceText    the source string.
   1.310 +    * @param order         the collation object.
   1.311 +    * @param status        the error code status.
   1.312 +    */
   1.313 +    CollationElementIterator(const CharacterIterator& sourceText,
   1.314 +        const RuleBasedCollator* order, UErrorCode& status);
   1.315 +
   1.316 +    /**
   1.317 +    * Assignment operator
   1.318 +    *
   1.319 +    * @param other    the object to be copied
   1.320 +    */
   1.321 +    const CollationElementIterator&
   1.322 +        operator=(const CollationElementIterator& other);
   1.323 +
   1.324 +    CollationElementIterator(); // default constructor not implemented
   1.325 +
   1.326 +    // CollationElementIterator private data members ----------------------------
   1.327 +
   1.328 +    /**
   1.329 +    * Data wrapper for collation elements
   1.330 +    */
   1.331 +    UCollationElements *m_data_;
   1.332 +
   1.333 +    /**
   1.334 +    * Indicates if m_data_ belongs to this object.
   1.335 +    */
   1.336 +    UBool isDataOwned_;
   1.337 +};
   1.338 +
   1.339 +// CollationElementIterator inline method defination --------------------------
   1.340 +
   1.341 +/**
   1.342 +* Get the primary order of a collation order.
   1.343 +* @param order the collation order
   1.344 +* @return the primary order of a collation order.
   1.345 +*/
   1.346 +inline int32_t CollationElementIterator::primaryOrder(int32_t order)
   1.347 +{
   1.348 +    order &= RuleBasedCollator::PRIMARYORDERMASK;
   1.349 +    return (order >> RuleBasedCollator::PRIMARYORDERSHIFT);
   1.350 +}
   1.351 +
   1.352 +/**
   1.353 +* Get the secondary order of a collation order.
   1.354 +* @param order the collation order
   1.355 +* @return the secondary order of a collation order.
   1.356 +*/
   1.357 +inline int32_t CollationElementIterator::secondaryOrder(int32_t order)
   1.358 +{
   1.359 +    order = order & RuleBasedCollator::SECONDARYORDERMASK;
   1.360 +    return (order >> RuleBasedCollator::SECONDARYORDERSHIFT);
   1.361 +}
   1.362 +
   1.363 +/**
   1.364 +* Get the tertiary order of a collation order.
   1.365 +* @param order the collation order
   1.366 +* @return the tertiary order of a collation order.
   1.367 +*/
   1.368 +inline int32_t CollationElementIterator::tertiaryOrder(int32_t order)
   1.369 +{
   1.370 +    return (order &= RuleBasedCollator::TERTIARYORDERMASK);
   1.371 +}
   1.372 +
   1.373 +inline int32_t CollationElementIterator::getMaxExpansion(int32_t order) const
   1.374 +{
   1.375 +    return ucol_getMaxExpansion(m_data_, (uint32_t)order);
   1.376 +}
   1.377 +
   1.378 +inline UBool CollationElementIterator::isIgnorable(int32_t order)
   1.379 +{
   1.380 +    return (primaryOrder(order) == RuleBasedCollator::PRIMIGNORABLE);
   1.381 +}
   1.382 +
   1.383 +U_NAMESPACE_END
   1.384 +
   1.385 +#endif /* #if !UCONFIG_NO_COLLATION */
   1.386 +
   1.387 +#endif

mercurial