1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/unicode/brkiter.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,648 @@ 1.4 +/* 1.5 +******************************************************************************** 1.6 +* Copyright (C) 1997-2013, International Business Machines 1.7 +* Corporation and others. All Rights Reserved. 1.8 +******************************************************************************** 1.9 +* 1.10 +* File brkiter.h 1.11 +* 1.12 +* Modification History: 1.13 +* 1.14 +* Date Name Description 1.15 +* 02/18/97 aliu Added typedef for TextCount. Made DONE const. 1.16 +* 05/07/97 aliu Fixed DLL declaration. 1.17 +* 07/09/97 jfitz Renamed BreakIterator and interface synced with JDK 1.18 +* 08/11/98 helena Sync-up JDK1.2. 1.19 +* 01/13/2000 helena Added UErrorCode parameter to createXXXInstance methods. 1.20 +******************************************************************************** 1.21 +*/ 1.22 + 1.23 +#ifndef BRKITER_H 1.24 +#define BRKITER_H 1.25 + 1.26 +#include "unicode/utypes.h" 1.27 + 1.28 +/** 1.29 + * \file 1.30 + * \brief C++ API: Break Iterator. 1.31 + */ 1.32 + 1.33 +#if UCONFIG_NO_BREAK_ITERATION 1.34 + 1.35 +U_NAMESPACE_BEGIN 1.36 + 1.37 +/* 1.38 + * Allow the declaration of APIs with pointers to BreakIterator 1.39 + * even when break iteration is removed from the build. 1.40 + */ 1.41 +class BreakIterator; 1.42 + 1.43 +U_NAMESPACE_END 1.44 + 1.45 +#else 1.46 + 1.47 +#include "unicode/uobject.h" 1.48 +#include "unicode/unistr.h" 1.49 +#include "unicode/chariter.h" 1.50 +#include "unicode/locid.h" 1.51 +#include "unicode/ubrk.h" 1.52 +#include "unicode/strenum.h" 1.53 +#include "unicode/utext.h" 1.54 +#include "unicode/umisc.h" 1.55 + 1.56 +U_NAMESPACE_BEGIN 1.57 + 1.58 +/** 1.59 + * The BreakIterator class implements methods for finding the location 1.60 + * of boundaries in text. BreakIterator is an abstract base class. 1.61 + * Instances of BreakIterator maintain a current position and scan over 1.62 + * text returning the index of characters where boundaries occur. 1.63 + * <p> 1.64 + * Line boundary analysis determines where a text string can be broken 1.65 + * when line-wrapping. The mechanism correctly handles punctuation and 1.66 + * hyphenated words. 1.67 + * <p> 1.68 + * Sentence boundary analysis allows selection with correct 1.69 + * interpretation of periods within numbers and abbreviations, and 1.70 + * trailing punctuation marks such as quotation marks and parentheses. 1.71 + * <p> 1.72 + * Word boundary analysis is used by search and replace functions, as 1.73 + * well as within text editing applications that allow the user to 1.74 + * select words with a double click. Word selection provides correct 1.75 + * interpretation of punctuation marks within and following 1.76 + * words. Characters that are not part of a word, such as symbols or 1.77 + * punctuation marks, have word-breaks on both sides. 1.78 + * <p> 1.79 + * Character boundary analysis allows users to interact with 1.80 + * characters as they expect to, for example, when moving the cursor 1.81 + * through a text string. Character boundary analysis provides correct 1.82 + * navigation of through character strings, regardless of how the 1.83 + * character is stored. For example, an accented character might be 1.84 + * stored as a base character and a diacritical mark. What users 1.85 + * consider to be a character can differ between languages. 1.86 + * <p> 1.87 + * The text boundary positions are found according to the rules 1.88 + * described in Unicode Standard Annex #29, Text Boundaries, and 1.89 + * Unicode Standard Annex #14, Line Breaking Properties. These 1.90 + * are available at http://www.unicode.org/reports/tr14/ and 1.91 + * http://www.unicode.org/reports/tr29/. 1.92 + * <p> 1.93 + * In addition to the C++ API defined in this header file, a 1.94 + * plain C API with equivalent functionality is defined in the 1.95 + * file ubrk.h 1.96 + * <p> 1.97 + * Code snippets illustrating the use of the Break Iterator APIs 1.98 + * are available in the ICU User Guide, 1.99 + * http://icu-project.org/userguide/boundaryAnalysis.html 1.100 + * and in the sample program icu/source/samples/break/break.cpp 1.101 + * 1.102 + */ 1.103 +class U_COMMON_API BreakIterator : public UObject { 1.104 +public: 1.105 + /** 1.106 + * destructor 1.107 + * @stable ICU 2.0 1.108 + */ 1.109 + virtual ~BreakIterator(); 1.110 + 1.111 + /** 1.112 + * Return true if another object is semantically equal to this 1.113 + * one. The other object should be an instance of the same subclass of 1.114 + * BreakIterator. Objects of different subclasses are considered 1.115 + * unequal. 1.116 + * <P> 1.117 + * Return true if this BreakIterator is at the same position in the 1.118 + * same text, and is the same class and type (word, line, etc.) of 1.119 + * BreakIterator, as the argument. Text is considered the same if 1.120 + * it contains the same characters, it need not be the same 1.121 + * object, and styles are not considered. 1.122 + * @stable ICU 2.0 1.123 + */ 1.124 + virtual UBool operator==(const BreakIterator&) const = 0; 1.125 + 1.126 + /** 1.127 + * Returns the complement of the result of operator== 1.128 + * @param rhs The BreakIterator to be compared for inequality 1.129 + * @return the complement of the result of operator== 1.130 + * @stable ICU 2.0 1.131 + */ 1.132 + UBool operator!=(const BreakIterator& rhs) const { return !operator==(rhs); } 1.133 + 1.134 + /** 1.135 + * Return a polymorphic copy of this object. This is an abstract 1.136 + * method which subclasses implement. 1.137 + * @stable ICU 2.0 1.138 + */ 1.139 + virtual BreakIterator* clone(void) const = 0; 1.140 + 1.141 + /** 1.142 + * Return a polymorphic class ID for this object. Different subclasses 1.143 + * will return distinct unequal values. 1.144 + * @stable ICU 2.0 1.145 + */ 1.146 + virtual UClassID getDynamicClassID(void) const = 0; 1.147 + 1.148 + /** 1.149 + * Return a CharacterIterator over the text being analyzed. 1.150 + * @stable ICU 2.0 1.151 + */ 1.152 + virtual CharacterIterator& getText(void) const = 0; 1.153 + 1.154 + 1.155 + /** 1.156 + * Get a UText for the text being analyzed. 1.157 + * The returned UText is a shallow clone of the UText used internally 1.158 + * by the break iterator implementation. It can safely be used to 1.159 + * access the text without impacting any break iterator operations, 1.160 + * but the underlying text itself must not be altered. 1.161 + * 1.162 + * @param fillIn A UText to be filled in. If NULL, a new UText will be 1.163 + * allocated to hold the result. 1.164 + * @param status receives any error codes. 1.165 + * @return The current UText for this break iterator. If an input 1.166 + * UText was provided, it will always be returned. 1.167 + * @stable ICU 3.4 1.168 + */ 1.169 + virtual UText *getUText(UText *fillIn, UErrorCode &status) const = 0; 1.170 + 1.171 + /** 1.172 + * Change the text over which this operates. The text boundary is 1.173 + * reset to the start. 1.174 + * @param text The UnicodeString used to change the text. 1.175 + * @stable ICU 2.0 1.176 + */ 1.177 + virtual void setText(const UnicodeString &text) = 0; 1.178 + 1.179 + /** 1.180 + * Reset the break iterator to operate over the text represented by 1.181 + * the UText. The iterator position is reset to the start. 1.182 + * 1.183 + * This function makes a shallow clone of the supplied UText. This means 1.184 + * that the caller is free to immediately close or otherwise reuse the 1.185 + * Utext that was passed as a parameter, but that the underlying text itself 1.186 + * must not be altered while being referenced by the break iterator. 1.187 + * 1.188 + * All index positions returned by break iterator functions are 1.189 + * native indices from the UText. For example, when breaking UTF-8 1.190 + * encoded text, the break positions returned by next(), previous(), etc. 1.191 + * will be UTF-8 string indices, not UTF-16 positions. 1.192 + * 1.193 + * @param text The UText used to change the text. 1.194 + * @param status receives any error codes. 1.195 + * @stable ICU 3.4 1.196 + */ 1.197 + virtual void setText(UText *text, UErrorCode &status) = 0; 1.198 + 1.199 + /** 1.200 + * Change the text over which this operates. The text boundary is 1.201 + * reset to the start. 1.202 + * Note that setText(UText *) provides similar functionality to this function, 1.203 + * and is more efficient. 1.204 + * @param it The CharacterIterator used to change the text. 1.205 + * @stable ICU 2.0 1.206 + */ 1.207 + virtual void adoptText(CharacterIterator* it) = 0; 1.208 + 1.209 + enum { 1.210 + /** 1.211 + * DONE is returned by previous() and next() after all valid 1.212 + * boundaries have been returned. 1.213 + * @stable ICU 2.0 1.214 + */ 1.215 + DONE = (int32_t)-1 1.216 + }; 1.217 + 1.218 + /** 1.219 + * Set the iterator position to the index of the first character in the text being scanned. 1.220 + * @return The index of the first character in the text being scanned. 1.221 + * @stable ICU 2.0 1.222 + */ 1.223 + virtual int32_t first(void) = 0; 1.224 + 1.225 + /** 1.226 + * Set the iterator position to the index immediately BEYOND the last character in the text being scanned. 1.227 + * @return The index immediately BEYOND the last character in the text being scanned. 1.228 + * @stable ICU 2.0 1.229 + */ 1.230 + virtual int32_t last(void) = 0; 1.231 + 1.232 + /** 1.233 + * Set the iterator position to the boundary preceding the current boundary. 1.234 + * @return The character index of the previous text boundary or DONE if all 1.235 + * boundaries have been returned. 1.236 + * @stable ICU 2.0 1.237 + */ 1.238 + virtual int32_t previous(void) = 0; 1.239 + 1.240 + /** 1.241 + * Advance the iterator to the boundary following the current boundary. 1.242 + * @return The character index of the next text boundary or DONE if all 1.243 + * boundaries have been returned. 1.244 + * @stable ICU 2.0 1.245 + */ 1.246 + virtual int32_t next(void) = 0; 1.247 + 1.248 + /** 1.249 + * Return character index of the current interator position within the text. 1.250 + * @return The boundary most recently returned. 1.251 + * @stable ICU 2.0 1.252 + */ 1.253 + virtual int32_t current(void) const = 0; 1.254 + 1.255 + /** 1.256 + * Advance the iterator to the first boundary following the specified offset. 1.257 + * The value returned is always greater than the offset or 1.258 + * the value BreakIterator.DONE 1.259 + * @param offset the offset to begin scanning. 1.260 + * @return The first boundary after the specified offset. 1.261 + * @stable ICU 2.0 1.262 + */ 1.263 + virtual int32_t following(int32_t offset) = 0; 1.264 + 1.265 + /** 1.266 + * Set the iterator position to the first boundary preceding the specified offset. 1.267 + * The value returned is always smaller than the offset or 1.268 + * the value BreakIterator.DONE 1.269 + * @param offset the offset to begin scanning. 1.270 + * @return The first boundary before the specified offset. 1.271 + * @stable ICU 2.0 1.272 + */ 1.273 + virtual int32_t preceding(int32_t offset) = 0; 1.274 + 1.275 + /** 1.276 + * Return true if the specfied position is a boundary position. 1.277 + * As a side effect, the current position of the iterator is set 1.278 + * to the first boundary position at or following the specified offset. 1.279 + * @param offset the offset to check. 1.280 + * @return True if "offset" is a boundary position. 1.281 + * @stable ICU 2.0 1.282 + */ 1.283 + virtual UBool isBoundary(int32_t offset) = 0; 1.284 + 1.285 + /** 1.286 + * Set the iterator position to the nth boundary from the current boundary 1.287 + * @param n the number of boundaries to move by. A value of 0 1.288 + * does nothing. Negative values move to previous boundaries 1.289 + * and positive values move to later boundaries. 1.290 + * @return The new iterator position, or 1.291 + * DONE if there are fewer than |n| boundaries in the specfied direction. 1.292 + * @stable ICU 2.0 1.293 + */ 1.294 + virtual int32_t next(int32_t n) = 0; 1.295 + 1.296 + /** 1.297 + * For RuleBasedBreakIterators, return the status tag from the 1.298 + * break rule that determined the most recently 1.299 + * returned break position. 1.300 + * <p> 1.301 + * For break iterator types that do not support a rule status, 1.302 + * a default value of 0 is returned. 1.303 + * <p> 1.304 + * @return the status from the break rule that determined the most recently 1.305 + * returned break position. 1.306 + * @see RuleBaseBreakIterator::getRuleStatus() 1.307 + * @see UWordBreak 1.308 + * @draft ICU 52 1.309 + */ 1.310 + virtual int32_t getRuleStatus() const; 1.311 + 1.312 + /** 1.313 + * For RuleBasedBreakIterators, get the status (tag) values from the break rule(s) 1.314 + * that determined the most recently returned break position. 1.315 + * <p> 1.316 + * For break iterator types that do not support rule status, 1.317 + * no values are returned. 1.318 + * <p> 1.319 + * The returned status value(s) are stored into an array provided by the caller. 1.320 + * The values are stored in sorted (ascending) order. 1.321 + * If the capacity of the output array is insufficient to hold the data, 1.322 + * the output will be truncated to the available length, and a 1.323 + * U_BUFFER_OVERFLOW_ERROR will be signaled. 1.324 + * <p> 1.325 + * @see RuleBaseBreakIterator::getRuleStatusVec 1.326 + * 1.327 + * @param fillInVec an array to be filled in with the status values. 1.328 + * @param capacity the length of the supplied vector. A length of zero causes 1.329 + * the function to return the number of status values, in the 1.330 + * normal way, without attemtping to store any values. 1.331 + * @param status receives error codes. 1.332 + * @return The number of rule status values from rules that determined 1.333 + * the most recent boundary returned by the break iterator. 1.334 + * In the event of a U_BUFFER_OVERFLOW_ERROR, the return value 1.335 + * is the total number of status values that were available, 1.336 + * not the reduced number that were actually returned. 1.337 + * @see getRuleStatus 1.338 + * @draft ICU 52 1.339 + */ 1.340 + virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status); 1.341 + 1.342 + /** 1.343 + * Create BreakIterator for word-breaks using the given locale. 1.344 + * Returns an instance of a BreakIterator implementing word breaks. 1.345 + * WordBreak is useful for word selection (ex. double click) 1.346 + * @param where the locale. 1.347 + * @param status the error code 1.348 + * @return A BreakIterator for word-breaks. The UErrorCode& status 1.349 + * parameter is used to return status information to the user. 1.350 + * To check whether the construction succeeded or not, you should check 1.351 + * the value of U_SUCCESS(err). If you wish more detailed information, you 1.352 + * can check for informational error results which still indicate success. 1.353 + * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For 1.354 + * example, 'de_CH' was requested, but nothing was found there, so 'de' was 1.355 + * used. U_USING_DEFAULT_WARNING indicates that the default locale data was 1.356 + * used; neither the requested locale nor any of its fall back locales 1.357 + * could be found. 1.358 + * The caller owns the returned object and is responsible for deleting it. 1.359 + * @stable ICU 2.0 1.360 + */ 1.361 + static BreakIterator* U_EXPORT2 1.362 + createWordInstance(const Locale& where, UErrorCode& status); 1.363 + 1.364 + /** 1.365 + * Create BreakIterator for line-breaks using specified locale. 1.366 + * Returns an instance of a BreakIterator implementing line breaks. Line 1.367 + * breaks are logically possible line breaks, actual line breaks are 1.368 + * usually determined based on display width. 1.369 + * LineBreak is useful for word wrapping text. 1.370 + * @param where the locale. 1.371 + * @param status The error code. 1.372 + * @return A BreakIterator for line-breaks. The UErrorCode& status 1.373 + * parameter is used to return status information to the user. 1.374 + * To check whether the construction succeeded or not, you should check 1.375 + * the value of U_SUCCESS(err). If you wish more detailed information, you 1.376 + * can check for informational error results which still indicate success. 1.377 + * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For 1.378 + * example, 'de_CH' was requested, but nothing was found there, so 'de' was 1.379 + * used. U_USING_DEFAULT_WARNING indicates that the default locale data was 1.380 + * used; neither the requested locale nor any of its fall back locales 1.381 + * could be found. 1.382 + * The caller owns the returned object and is responsible for deleting it. 1.383 + * @stable ICU 2.0 1.384 + */ 1.385 + static BreakIterator* U_EXPORT2 1.386 + createLineInstance(const Locale& where, UErrorCode& status); 1.387 + 1.388 + /** 1.389 + * Create BreakIterator for character-breaks using specified locale 1.390 + * Returns an instance of a BreakIterator implementing character breaks. 1.391 + * Character breaks are boundaries of combining character sequences. 1.392 + * @param where the locale. 1.393 + * @param status The error code. 1.394 + * @return A BreakIterator for character-breaks. The UErrorCode& status 1.395 + * parameter is used to return status information to the user. 1.396 + * To check whether the construction succeeded or not, you should check 1.397 + * the value of U_SUCCESS(err). If you wish more detailed information, you 1.398 + * can check for informational error results which still indicate success. 1.399 + * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For 1.400 + * example, 'de_CH' was requested, but nothing was found there, so 'de' was 1.401 + * used. U_USING_DEFAULT_WARNING indicates that the default locale data was 1.402 + * used; neither the requested locale nor any of its fall back locales 1.403 + * could be found. 1.404 + * The caller owns the returned object and is responsible for deleting it. 1.405 + * @stable ICU 2.0 1.406 + */ 1.407 + static BreakIterator* U_EXPORT2 1.408 + createCharacterInstance(const Locale& where, UErrorCode& status); 1.409 + 1.410 + /** 1.411 + * Create BreakIterator for sentence-breaks using specified locale 1.412 + * Returns an instance of a BreakIterator implementing sentence breaks. 1.413 + * @param where the locale. 1.414 + * @param status The error code. 1.415 + * @return A BreakIterator for sentence-breaks. The UErrorCode& status 1.416 + * parameter is used to return status information to the user. 1.417 + * To check whether the construction succeeded or not, you should check 1.418 + * the value of U_SUCCESS(err). If you wish more detailed information, you 1.419 + * can check for informational error results which still indicate success. 1.420 + * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For 1.421 + * example, 'de_CH' was requested, but nothing was found there, so 'de' was 1.422 + * used. U_USING_DEFAULT_WARNING indicates that the default locale data was 1.423 + * used; neither the requested locale nor any of its fall back locales 1.424 + * could be found. 1.425 + * The caller owns the returned object and is responsible for deleting it. 1.426 + * @stable ICU 2.0 1.427 + */ 1.428 + static BreakIterator* U_EXPORT2 1.429 + createSentenceInstance(const Locale& where, UErrorCode& status); 1.430 + 1.431 + /** 1.432 + * Create BreakIterator for title-casing breaks using the specified locale 1.433 + * Returns an instance of a BreakIterator implementing title breaks. 1.434 + * The iterator returned locates title boundaries as described for 1.435 + * Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration, 1.436 + * please use Word Boundary iterator.{@link #createWordInstance } 1.437 + * 1.438 + * @param where the locale. 1.439 + * @param status The error code. 1.440 + * @return A BreakIterator for title-breaks. The UErrorCode& status 1.441 + * parameter is used to return status information to the user. 1.442 + * To check whether the construction succeeded or not, you should check 1.443 + * the value of U_SUCCESS(err). If you wish more detailed information, you 1.444 + * can check for informational error results which still indicate success. 1.445 + * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For 1.446 + * example, 'de_CH' was requested, but nothing was found there, so 'de' was 1.447 + * used. U_USING_DEFAULT_WARNING indicates that the default locale data was 1.448 + * used; neither the requested locale nor any of its fall back locales 1.449 + * could be found. 1.450 + * The caller owns the returned object and is responsible for deleting it. 1.451 + * @stable ICU 2.1 1.452 + */ 1.453 + static BreakIterator* U_EXPORT2 1.454 + createTitleInstance(const Locale& where, UErrorCode& status); 1.455 + 1.456 + /** 1.457 + * Get the set of Locales for which TextBoundaries are installed. 1.458 + * <p><b>Note:</b> this will not return locales added through the register 1.459 + * call. To see the registered locales too, use the getAvailableLocales 1.460 + * function that returns a StringEnumeration object </p> 1.461 + * @param count the output parameter of number of elements in the locale list 1.462 + * @return available locales 1.463 + * @stable ICU 2.0 1.464 + */ 1.465 + static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count); 1.466 + 1.467 + /** 1.468 + * Get name of the object for the desired Locale, in the desired langauge. 1.469 + * @param objectLocale must be from getAvailableLocales. 1.470 + * @param displayLocale specifies the desired locale for output. 1.471 + * @param name the fill-in parameter of the return value 1.472 + * Uses best match. 1.473 + * @return user-displayable name 1.474 + * @stable ICU 2.0 1.475 + */ 1.476 + static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale, 1.477 + const Locale& displayLocale, 1.478 + UnicodeString& name); 1.479 + 1.480 + /** 1.481 + * Get name of the object for the desired Locale, in the langauge of the 1.482 + * default locale. 1.483 + * @param objectLocale must be from getMatchingLocales 1.484 + * @param name the fill-in parameter of the return value 1.485 + * @return user-displayable name 1.486 + * @stable ICU 2.0 1.487 + */ 1.488 + static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale, 1.489 + UnicodeString& name); 1.490 + 1.491 + /** 1.492 + * Deprecated functionality. Use clone() instead. 1.493 + * 1.494 + * Thread safe client-buffer-based cloning operation 1.495 + * Do NOT call delete on a safeclone, since 'new' is not used to create it. 1.496 + * @param stackBuffer user allocated space for the new clone. If NULL new memory will be allocated. 1.497 + * If buffer is not large enough, new memory will be allocated. 1.498 + * @param BufferSize reference to size of allocated space. 1.499 + * If BufferSize == 0, a sufficient size for use in cloning will 1.500 + * be returned ('pre-flighting') 1.501 + * If BufferSize is not enough for a stack-based safe clone, 1.502 + * new memory will be allocated. 1.503 + * @param status to indicate whether the operation went on smoothly or there were errors 1.504 + * An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were 1.505 + * necessary. 1.506 + * @return pointer to the new clone 1.507 + * 1.508 + * @deprecated ICU 52. Use clone() instead. 1.509 + */ 1.510 + virtual BreakIterator * createBufferClone(void *stackBuffer, 1.511 + int32_t &BufferSize, 1.512 + UErrorCode &status) = 0; 1.513 + 1.514 +#ifndef U_HIDE_DEPRECATED_API 1.515 + 1.516 + /** 1.517 + * Determine whether the BreakIterator was created in user memory by 1.518 + * createBufferClone(), and thus should not be deleted. Such objects 1.519 + * must be closed by an explicit call to the destructor (not delete). 1.520 + * @deprecated ICU 52. Always delete the BreakIterator. 1.521 + */ 1.522 + inline UBool isBufferClone(void); 1.523 + 1.524 +#endif /* U_HIDE_DEPRECATED_API */ 1.525 + 1.526 +#if !UCONFIG_NO_SERVICE 1.527 + /** 1.528 + * Register a new break iterator of the indicated kind, to use in the given locale. 1.529 + * The break iterator will be adopted. Clones of the iterator will be returned 1.530 + * if a request for a break iterator of the given kind matches or falls back to 1.531 + * this locale. 1.532 + * @param toAdopt the BreakIterator instance to be adopted 1.533 + * @param locale the Locale for which this instance is to be registered 1.534 + * @param kind the type of iterator for which this instance is to be registered 1.535 + * @param status the in/out status code, no special meanings are assigned 1.536 + * @return a registry key that can be used to unregister this instance 1.537 + * @stable ICU 2.4 1.538 + */ 1.539 + static URegistryKey U_EXPORT2 registerInstance(BreakIterator* toAdopt, 1.540 + const Locale& locale, 1.541 + UBreakIteratorType kind, 1.542 + UErrorCode& status); 1.543 + 1.544 + /** 1.545 + * Unregister a previously-registered BreakIterator using the key returned from the 1.546 + * register call. Key becomes invalid after a successful call and should not be used again. 1.547 + * The BreakIterator corresponding to the key will be deleted. 1.548 + * @param key the registry key returned by a previous call to registerInstance 1.549 + * @param status the in/out status code, no special meanings are assigned 1.550 + * @return TRUE if the iterator for the key was successfully unregistered 1.551 + * @stable ICU 2.4 1.552 + */ 1.553 + static UBool U_EXPORT2 unregister(URegistryKey key, UErrorCode& status); 1.554 + 1.555 + /** 1.556 + * Return a StringEnumeration over the locales available at the time of the call, 1.557 + * including registered locales. 1.558 + * @return a StringEnumeration over the locales available at the time of the call 1.559 + * @stable ICU 2.4 1.560 + */ 1.561 + static StringEnumeration* U_EXPORT2 getAvailableLocales(void); 1.562 +#endif 1.563 + 1.564 + /** 1.565 + * Returns the locale for this break iterator. Two flavors are available: valid and 1.566 + * actual locale. 1.567 + * @stable ICU 2.8 1.568 + */ 1.569 + Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const; 1.570 + 1.571 +#ifndef U_HIDE_INTERNAL_API 1.572 + /** Get the locale for this break iterator object. You can choose between valid and actual locale. 1.573 + * @param type type of the locale we're looking for (valid or actual) 1.574 + * @param status error code for the operation 1.575 + * @return the locale 1.576 + * @internal 1.577 + */ 1.578 + const char *getLocaleID(ULocDataLocaleType type, UErrorCode& status) const; 1.579 +#endif /* U_HIDE_INTERNAL_API */ 1.580 + 1.581 + /** 1.582 + * Set the subject text string upon which the break iterator is operating 1.583 + * without changing any other aspect of the matching state. 1.584 + * The new and previous text strings must have the same content. 1.585 + * 1.586 + * This function is intended for use in environments where ICU is operating on 1.587 + * strings that may move around in memory. It provides a mechanism for notifying 1.588 + * ICU that the string has been relocated, and providing a new UText to access the 1.589 + * string in its new position. 1.590 + * 1.591 + * Note that the break iterator implementation never copies the underlying text 1.592 + * of a string being processed, but always operates directly on the original text 1.593 + * provided by the user. Refreshing simply drops the references to the old text 1.594 + * and replaces them with references to the new. 1.595 + * 1.596 + * Caution: this function is normally used only by very specialized, 1.597 + * system-level code. One example use case is with garbage collection that moves 1.598 + * the text in memory. 1.599 + * 1.600 + * @param input The new (moved) text string. 1.601 + * @param status Receives errors detected by this function. 1.602 + * @return *this 1.603 + * 1.604 + * @stable ICU 49 1.605 + */ 1.606 + virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) = 0; 1.607 + 1.608 + private: 1.609 + static BreakIterator* buildInstance(const Locale& loc, const char *type, int32_t kind, UErrorCode& status); 1.610 + static BreakIterator* createInstance(const Locale& loc, int32_t kind, UErrorCode& status); 1.611 + static BreakIterator* makeInstance(const Locale& loc, int32_t kind, UErrorCode& status); 1.612 + 1.613 + friend class ICUBreakIteratorFactory; 1.614 + friend class ICUBreakIteratorService; 1.615 + 1.616 +protected: 1.617 + // Do not enclose protected default/copy constructors with #ifndef U_HIDE_INTERNAL_API 1.618 + // or else the compiler will create a public ones. 1.619 + /** @internal */ 1.620 + BreakIterator(); 1.621 + /** @internal */ 1.622 + BreakIterator (const BreakIterator &other) : UObject(other) {} 1.623 + 1.624 +private: 1.625 + 1.626 + /** @internal */ 1.627 + char actualLocale[ULOC_FULLNAME_CAPACITY]; 1.628 + char validLocale[ULOC_FULLNAME_CAPACITY]; 1.629 + 1.630 + /** 1.631 + * The assignment operator has no real implementation. 1.632 + * It's provided to make the compiler happy. Do not call. 1.633 + */ 1.634 + BreakIterator& operator=(const BreakIterator&); 1.635 +}; 1.636 + 1.637 +#ifndef U_HIDE_DEPRECATED_API 1.638 + 1.639 +inline UBool BreakIterator::isBufferClone() 1.640 +{ 1.641 + return FALSE; 1.642 +} 1.643 + 1.644 +#endif /* U_HIDE_DEPRECATED_API */ 1.645 + 1.646 +U_NAMESPACE_END 1.647 + 1.648 +#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ 1.649 + 1.650 +#endif // _BRKITER 1.651 +//eof