intl/icu/source/common/unicode/brkiter.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /*
michael@0 2 ********************************************************************************
michael@0 3 * Copyright (C) 1997-2013, International Business Machines
michael@0 4 * Corporation and others. All Rights Reserved.
michael@0 5 ********************************************************************************
michael@0 6 *
michael@0 7 * File brkiter.h
michael@0 8 *
michael@0 9 * Modification History:
michael@0 10 *
michael@0 11 * Date Name Description
michael@0 12 * 02/18/97 aliu Added typedef for TextCount. Made DONE const.
michael@0 13 * 05/07/97 aliu Fixed DLL declaration.
michael@0 14 * 07/09/97 jfitz Renamed BreakIterator and interface synced with JDK
michael@0 15 * 08/11/98 helena Sync-up JDK1.2.
michael@0 16 * 01/13/2000 helena Added UErrorCode parameter to createXXXInstance methods.
michael@0 17 ********************************************************************************
michael@0 18 */
michael@0 19
michael@0 20 #ifndef BRKITER_H
michael@0 21 #define BRKITER_H
michael@0 22
michael@0 23 #include "unicode/utypes.h"
michael@0 24
michael@0 25 /**
michael@0 26 * \file
michael@0 27 * \brief C++ API: Break Iterator.
michael@0 28 */
michael@0 29
michael@0 30 #if UCONFIG_NO_BREAK_ITERATION
michael@0 31
michael@0 32 U_NAMESPACE_BEGIN
michael@0 33
michael@0 34 /*
michael@0 35 * Allow the declaration of APIs with pointers to BreakIterator
michael@0 36 * even when break iteration is removed from the build.
michael@0 37 */
michael@0 38 class BreakIterator;
michael@0 39
michael@0 40 U_NAMESPACE_END
michael@0 41
michael@0 42 #else
michael@0 43
michael@0 44 #include "unicode/uobject.h"
michael@0 45 #include "unicode/unistr.h"
michael@0 46 #include "unicode/chariter.h"
michael@0 47 #include "unicode/locid.h"
michael@0 48 #include "unicode/ubrk.h"
michael@0 49 #include "unicode/strenum.h"
michael@0 50 #include "unicode/utext.h"
michael@0 51 #include "unicode/umisc.h"
michael@0 52
michael@0 53 U_NAMESPACE_BEGIN
michael@0 54
michael@0 55 /**
michael@0 56 * The BreakIterator class implements methods for finding the location
michael@0 57 * of boundaries in text. BreakIterator is an abstract base class.
michael@0 58 * Instances of BreakIterator maintain a current position and scan over
michael@0 59 * text returning the index of characters where boundaries occur.
michael@0 60 * <p>
michael@0 61 * Line boundary analysis determines where a text string can be broken
michael@0 62 * when line-wrapping. The mechanism correctly handles punctuation and
michael@0 63 * hyphenated words.
michael@0 64 * <p>
michael@0 65 * Sentence boundary analysis allows selection with correct
michael@0 66 * interpretation of periods within numbers and abbreviations, and
michael@0 67 * trailing punctuation marks such as quotation marks and parentheses.
michael@0 68 * <p>
michael@0 69 * Word boundary analysis is used by search and replace functions, as
michael@0 70 * well as within text editing applications that allow the user to
michael@0 71 * select words with a double click. Word selection provides correct
michael@0 72 * interpretation of punctuation marks within and following
michael@0 73 * words. Characters that are not part of a word, such as symbols or
michael@0 74 * punctuation marks, have word-breaks on both sides.
michael@0 75 * <p>
michael@0 76 * Character boundary analysis allows users to interact with
michael@0 77 * characters as they expect to, for example, when moving the cursor
michael@0 78 * through a text string. Character boundary analysis provides correct
michael@0 79 * navigation of through character strings, regardless of how the
michael@0 80 * character is stored. For example, an accented character might be
michael@0 81 * stored as a base character and a diacritical mark. What users
michael@0 82 * consider to be a character can differ between languages.
michael@0 83 * <p>
michael@0 84 * The text boundary positions are found according to the rules
michael@0 85 * described in Unicode Standard Annex #29, Text Boundaries, and
michael@0 86 * Unicode Standard Annex #14, Line Breaking Properties. These
michael@0 87 * are available at http://www.unicode.org/reports/tr14/ and
michael@0 88 * http://www.unicode.org/reports/tr29/.
michael@0 89 * <p>
michael@0 90 * In addition to the C++ API defined in this header file, a
michael@0 91 * plain C API with equivalent functionality is defined in the
michael@0 92 * file ubrk.h
michael@0 93 * <p>
michael@0 94 * Code snippets illustrating the use of the Break Iterator APIs
michael@0 95 * are available in the ICU User Guide,
michael@0 96 * http://icu-project.org/userguide/boundaryAnalysis.html
michael@0 97 * and in the sample program icu/source/samples/break/break.cpp
michael@0 98 *
michael@0 99 */
michael@0 100 class U_COMMON_API BreakIterator : public UObject {
michael@0 101 public:
michael@0 102 /**
michael@0 103 * destructor
michael@0 104 * @stable ICU 2.0
michael@0 105 */
michael@0 106 virtual ~BreakIterator();
michael@0 107
michael@0 108 /**
michael@0 109 * Return true if another object is semantically equal to this
michael@0 110 * one. The other object should be an instance of the same subclass of
michael@0 111 * BreakIterator. Objects of different subclasses are considered
michael@0 112 * unequal.
michael@0 113 * <P>
michael@0 114 * Return true if this BreakIterator is at the same position in the
michael@0 115 * same text, and is the same class and type (word, line, etc.) of
michael@0 116 * BreakIterator, as the argument. Text is considered the same if
michael@0 117 * it contains the same characters, it need not be the same
michael@0 118 * object, and styles are not considered.
michael@0 119 * @stable ICU 2.0
michael@0 120 */
michael@0 121 virtual UBool operator==(const BreakIterator&) const = 0;
michael@0 122
michael@0 123 /**
michael@0 124 * Returns the complement of the result of operator==
michael@0 125 * @param rhs The BreakIterator to be compared for inequality
michael@0 126 * @return the complement of the result of operator==
michael@0 127 * @stable ICU 2.0
michael@0 128 */
michael@0 129 UBool operator!=(const BreakIterator& rhs) const { return !operator==(rhs); }
michael@0 130
michael@0 131 /**
michael@0 132 * Return a polymorphic copy of this object. This is an abstract
michael@0 133 * method which subclasses implement.
michael@0 134 * @stable ICU 2.0
michael@0 135 */
michael@0 136 virtual BreakIterator* clone(void) const = 0;
michael@0 137
michael@0 138 /**
michael@0 139 * Return a polymorphic class ID for this object. Different subclasses
michael@0 140 * will return distinct unequal values.
michael@0 141 * @stable ICU 2.0
michael@0 142 */
michael@0 143 virtual UClassID getDynamicClassID(void) const = 0;
michael@0 144
michael@0 145 /**
michael@0 146 * Return a CharacterIterator over the text being analyzed.
michael@0 147 * @stable ICU 2.0
michael@0 148 */
michael@0 149 virtual CharacterIterator& getText(void) const = 0;
michael@0 150
michael@0 151
michael@0 152 /**
michael@0 153 * Get a UText for the text being analyzed.
michael@0 154 * The returned UText is a shallow clone of the UText used internally
michael@0 155 * by the break iterator implementation. It can safely be used to
michael@0 156 * access the text without impacting any break iterator operations,
michael@0 157 * but the underlying text itself must not be altered.
michael@0 158 *
michael@0 159 * @param fillIn A UText to be filled in. If NULL, a new UText will be
michael@0 160 * allocated to hold the result.
michael@0 161 * @param status receives any error codes.
michael@0 162 * @return The current UText for this break iterator. If an input
michael@0 163 * UText was provided, it will always be returned.
michael@0 164 * @stable ICU 3.4
michael@0 165 */
michael@0 166 virtual UText *getUText(UText *fillIn, UErrorCode &status) const = 0;
michael@0 167
michael@0 168 /**
michael@0 169 * Change the text over which this operates. The text boundary is
michael@0 170 * reset to the start.
michael@0 171 * @param text The UnicodeString used to change the text.
michael@0 172 * @stable ICU 2.0
michael@0 173 */
michael@0 174 virtual void setText(const UnicodeString &text) = 0;
michael@0 175
michael@0 176 /**
michael@0 177 * Reset the break iterator to operate over the text represented by
michael@0 178 * the UText. The iterator position is reset to the start.
michael@0 179 *
michael@0 180 * This function makes a shallow clone of the supplied UText. This means
michael@0 181 * that the caller is free to immediately close or otherwise reuse the
michael@0 182 * Utext that was passed as a parameter, but that the underlying text itself
michael@0 183 * must not be altered while being referenced by the break iterator.
michael@0 184 *
michael@0 185 * All index positions returned by break iterator functions are
michael@0 186 * native indices from the UText. For example, when breaking UTF-8
michael@0 187 * encoded text, the break positions returned by next(), previous(), etc.
michael@0 188 * will be UTF-8 string indices, not UTF-16 positions.
michael@0 189 *
michael@0 190 * @param text The UText used to change the text.
michael@0 191 * @param status receives any error codes.
michael@0 192 * @stable ICU 3.4
michael@0 193 */
michael@0 194 virtual void setText(UText *text, UErrorCode &status) = 0;
michael@0 195
michael@0 196 /**
michael@0 197 * Change the text over which this operates. The text boundary is
michael@0 198 * reset to the start.
michael@0 199 * Note that setText(UText *) provides similar functionality to this function,
michael@0 200 * and is more efficient.
michael@0 201 * @param it The CharacterIterator used to change the text.
michael@0 202 * @stable ICU 2.0
michael@0 203 */
michael@0 204 virtual void adoptText(CharacterIterator* it) = 0;
michael@0 205
michael@0 206 enum {
michael@0 207 /**
michael@0 208 * DONE is returned by previous() and next() after all valid
michael@0 209 * boundaries have been returned.
michael@0 210 * @stable ICU 2.0
michael@0 211 */
michael@0 212 DONE = (int32_t)-1
michael@0 213 };
michael@0 214
michael@0 215 /**
michael@0 216 * Set the iterator position to the index of the first character in the text being scanned.
michael@0 217 * @return The index of the first character in the text being scanned.
michael@0 218 * @stable ICU 2.0
michael@0 219 */
michael@0 220 virtual int32_t first(void) = 0;
michael@0 221
michael@0 222 /**
michael@0 223 * Set the iterator position to the index immediately BEYOND the last character in the text being scanned.
michael@0 224 * @return The index immediately BEYOND the last character in the text being scanned.
michael@0 225 * @stable ICU 2.0
michael@0 226 */
michael@0 227 virtual int32_t last(void) = 0;
michael@0 228
michael@0 229 /**
michael@0 230 * Set the iterator position to the boundary preceding the current boundary.
michael@0 231 * @return The character index of the previous text boundary or DONE if all
michael@0 232 * boundaries have been returned.
michael@0 233 * @stable ICU 2.0
michael@0 234 */
michael@0 235 virtual int32_t previous(void) = 0;
michael@0 236
michael@0 237 /**
michael@0 238 * Advance the iterator to the boundary following the current boundary.
michael@0 239 * @return The character index of the next text boundary or DONE if all
michael@0 240 * boundaries have been returned.
michael@0 241 * @stable ICU 2.0
michael@0 242 */
michael@0 243 virtual int32_t next(void) = 0;
michael@0 244
michael@0 245 /**
michael@0 246 * Return character index of the current interator position within the text.
michael@0 247 * @return The boundary most recently returned.
michael@0 248 * @stable ICU 2.0
michael@0 249 */
michael@0 250 virtual int32_t current(void) const = 0;
michael@0 251
michael@0 252 /**
michael@0 253 * Advance the iterator to the first boundary following the specified offset.
michael@0 254 * The value returned is always greater than the offset or
michael@0 255 * the value BreakIterator.DONE
michael@0 256 * @param offset the offset to begin scanning.
michael@0 257 * @return The first boundary after the specified offset.
michael@0 258 * @stable ICU 2.0
michael@0 259 */
michael@0 260 virtual int32_t following(int32_t offset) = 0;
michael@0 261
michael@0 262 /**
michael@0 263 * Set the iterator position to the first boundary preceding the specified offset.
michael@0 264 * The value returned is always smaller than the offset or
michael@0 265 * the value BreakIterator.DONE
michael@0 266 * @param offset the offset to begin scanning.
michael@0 267 * @return The first boundary before the specified offset.
michael@0 268 * @stable ICU 2.0
michael@0 269 */
michael@0 270 virtual int32_t preceding(int32_t offset) = 0;
michael@0 271
michael@0 272 /**
michael@0 273 * Return true if the specfied position is a boundary position.
michael@0 274 * As a side effect, the current position of the iterator is set
michael@0 275 * to the first boundary position at or following the specified offset.
michael@0 276 * @param offset the offset to check.
michael@0 277 * @return True if "offset" is a boundary position.
michael@0 278 * @stable ICU 2.0
michael@0 279 */
michael@0 280 virtual UBool isBoundary(int32_t offset) = 0;
michael@0 281
michael@0 282 /**
michael@0 283 * Set the iterator position to the nth boundary from the current boundary
michael@0 284 * @param n the number of boundaries to move by. A value of 0
michael@0 285 * does nothing. Negative values move to previous boundaries
michael@0 286 * and positive values move to later boundaries.
michael@0 287 * @return The new iterator position, or
michael@0 288 * DONE if there are fewer than |n| boundaries in the specfied direction.
michael@0 289 * @stable ICU 2.0
michael@0 290 */
michael@0 291 virtual int32_t next(int32_t n) = 0;
michael@0 292
michael@0 293 /**
michael@0 294 * For RuleBasedBreakIterators, return the status tag from the
michael@0 295 * break rule that determined the most recently
michael@0 296 * returned break position.
michael@0 297 * <p>
michael@0 298 * For break iterator types that do not support a rule status,
michael@0 299 * a default value of 0 is returned.
michael@0 300 * <p>
michael@0 301 * @return the status from the break rule that determined the most recently
michael@0 302 * returned break position.
michael@0 303 * @see RuleBaseBreakIterator::getRuleStatus()
michael@0 304 * @see UWordBreak
michael@0 305 * @draft ICU 52
michael@0 306 */
michael@0 307 virtual int32_t getRuleStatus() const;
michael@0 308
michael@0 309 /**
michael@0 310 * For RuleBasedBreakIterators, get the status (tag) values from the break rule(s)
michael@0 311 * that determined the most recently returned break position.
michael@0 312 * <p>
michael@0 313 * For break iterator types that do not support rule status,
michael@0 314 * no values are returned.
michael@0 315 * <p>
michael@0 316 * The returned status value(s) are stored into an array provided by the caller.
michael@0 317 * The values are stored in sorted (ascending) order.
michael@0 318 * If the capacity of the output array is insufficient to hold the data,
michael@0 319 * the output will be truncated to the available length, and a
michael@0 320 * U_BUFFER_OVERFLOW_ERROR will be signaled.
michael@0 321 * <p>
michael@0 322 * @see RuleBaseBreakIterator::getRuleStatusVec
michael@0 323 *
michael@0 324 * @param fillInVec an array to be filled in with the status values.
michael@0 325 * @param capacity the length of the supplied vector. A length of zero causes
michael@0 326 * the function to return the number of status values, in the
michael@0 327 * normal way, without attemtping to store any values.
michael@0 328 * @param status receives error codes.
michael@0 329 * @return The number of rule status values from rules that determined
michael@0 330 * the most recent boundary returned by the break iterator.
michael@0 331 * In the event of a U_BUFFER_OVERFLOW_ERROR, the return value
michael@0 332 * is the total number of status values that were available,
michael@0 333 * not the reduced number that were actually returned.
michael@0 334 * @see getRuleStatus
michael@0 335 * @draft ICU 52
michael@0 336 */
michael@0 337 virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status);
michael@0 338
michael@0 339 /**
michael@0 340 * Create BreakIterator for word-breaks using the given locale.
michael@0 341 * Returns an instance of a BreakIterator implementing word breaks.
michael@0 342 * WordBreak is useful for word selection (ex. double click)
michael@0 343 * @param where the locale.
michael@0 344 * @param status the error code
michael@0 345 * @return A BreakIterator for word-breaks. The UErrorCode& status
michael@0 346 * parameter is used to return status information to the user.
michael@0 347 * To check whether the construction succeeded or not, you should check
michael@0 348 * the value of U_SUCCESS(err). If you wish more detailed information, you
michael@0 349 * can check for informational error results which still indicate success.
michael@0 350 * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For
michael@0 351 * example, 'de_CH' was requested, but nothing was found there, so 'de' was
michael@0 352 * used. U_USING_DEFAULT_WARNING indicates that the default locale data was
michael@0 353 * used; neither the requested locale nor any of its fall back locales
michael@0 354 * could be found.
michael@0 355 * The caller owns the returned object and is responsible for deleting it.
michael@0 356 * @stable ICU 2.0
michael@0 357 */
michael@0 358 static BreakIterator* U_EXPORT2
michael@0 359 createWordInstance(const Locale& where, UErrorCode& status);
michael@0 360
michael@0 361 /**
michael@0 362 * Create BreakIterator for line-breaks using specified locale.
michael@0 363 * Returns an instance of a BreakIterator implementing line breaks. Line
michael@0 364 * breaks are logically possible line breaks, actual line breaks are
michael@0 365 * usually determined based on display width.
michael@0 366 * LineBreak is useful for word wrapping text.
michael@0 367 * @param where the locale.
michael@0 368 * @param status The error code.
michael@0 369 * @return A BreakIterator for line-breaks. The UErrorCode& status
michael@0 370 * parameter is used to return status information to the user.
michael@0 371 * To check whether the construction succeeded or not, you should check
michael@0 372 * the value of U_SUCCESS(err). If you wish more detailed information, you
michael@0 373 * can check for informational error results which still indicate success.
michael@0 374 * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For
michael@0 375 * example, 'de_CH' was requested, but nothing was found there, so 'de' was
michael@0 376 * used. U_USING_DEFAULT_WARNING indicates that the default locale data was
michael@0 377 * used; neither the requested locale nor any of its fall back locales
michael@0 378 * could be found.
michael@0 379 * The caller owns the returned object and is responsible for deleting it.
michael@0 380 * @stable ICU 2.0
michael@0 381 */
michael@0 382 static BreakIterator* U_EXPORT2
michael@0 383 createLineInstance(const Locale& where, UErrorCode& status);
michael@0 384
michael@0 385 /**
michael@0 386 * Create BreakIterator for character-breaks using specified locale
michael@0 387 * Returns an instance of a BreakIterator implementing character breaks.
michael@0 388 * Character breaks are boundaries of combining character sequences.
michael@0 389 * @param where the locale.
michael@0 390 * @param status The error code.
michael@0 391 * @return A BreakIterator for character-breaks. The UErrorCode& status
michael@0 392 * parameter is used to return status information to the user.
michael@0 393 * To check whether the construction succeeded or not, you should check
michael@0 394 * the value of U_SUCCESS(err). If you wish more detailed information, you
michael@0 395 * can check for informational error results which still indicate success.
michael@0 396 * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For
michael@0 397 * example, 'de_CH' was requested, but nothing was found there, so 'de' was
michael@0 398 * used. U_USING_DEFAULT_WARNING indicates that the default locale data was
michael@0 399 * used; neither the requested locale nor any of its fall back locales
michael@0 400 * could be found.
michael@0 401 * The caller owns the returned object and is responsible for deleting it.
michael@0 402 * @stable ICU 2.0
michael@0 403 */
michael@0 404 static BreakIterator* U_EXPORT2
michael@0 405 createCharacterInstance(const Locale& where, UErrorCode& status);
michael@0 406
michael@0 407 /**
michael@0 408 * Create BreakIterator for sentence-breaks using specified locale
michael@0 409 * Returns an instance of a BreakIterator implementing sentence breaks.
michael@0 410 * @param where the locale.
michael@0 411 * @param status The error code.
michael@0 412 * @return A BreakIterator for sentence-breaks. The UErrorCode& status
michael@0 413 * parameter is used to return status information to the user.
michael@0 414 * To check whether the construction succeeded or not, you should check
michael@0 415 * the value of U_SUCCESS(err). If you wish more detailed information, you
michael@0 416 * can check for informational error results which still indicate success.
michael@0 417 * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For
michael@0 418 * example, 'de_CH' was requested, but nothing was found there, so 'de' was
michael@0 419 * used. U_USING_DEFAULT_WARNING indicates that the default locale data was
michael@0 420 * used; neither the requested locale nor any of its fall back locales
michael@0 421 * could be found.
michael@0 422 * The caller owns the returned object and is responsible for deleting it.
michael@0 423 * @stable ICU 2.0
michael@0 424 */
michael@0 425 static BreakIterator* U_EXPORT2
michael@0 426 createSentenceInstance(const Locale& where, UErrorCode& status);
michael@0 427
michael@0 428 /**
michael@0 429 * Create BreakIterator for title-casing breaks using the specified locale
michael@0 430 * Returns an instance of a BreakIterator implementing title breaks.
michael@0 431 * The iterator returned locates title boundaries as described for
michael@0 432 * Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration,
michael@0 433 * please use Word Boundary iterator.{@link #createWordInstance }
michael@0 434 *
michael@0 435 * @param where the locale.
michael@0 436 * @param status The error code.
michael@0 437 * @return A BreakIterator for title-breaks. The UErrorCode& status
michael@0 438 * parameter is used to return status information to the user.
michael@0 439 * To check whether the construction succeeded or not, you should check
michael@0 440 * the value of U_SUCCESS(err). If you wish more detailed information, you
michael@0 441 * can check for informational error results which still indicate success.
michael@0 442 * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For
michael@0 443 * example, 'de_CH' was requested, but nothing was found there, so 'de' was
michael@0 444 * used. U_USING_DEFAULT_WARNING indicates that the default locale data was
michael@0 445 * used; neither the requested locale nor any of its fall back locales
michael@0 446 * could be found.
michael@0 447 * The caller owns the returned object and is responsible for deleting it.
michael@0 448 * @stable ICU 2.1
michael@0 449 */
michael@0 450 static BreakIterator* U_EXPORT2
michael@0 451 createTitleInstance(const Locale& where, UErrorCode& status);
michael@0 452
michael@0 453 /**
michael@0 454 * Get the set of Locales for which TextBoundaries are installed.
michael@0 455 * <p><b>Note:</b> this will not return locales added through the register
michael@0 456 * call. To see the registered locales too, use the getAvailableLocales
michael@0 457 * function that returns a StringEnumeration object </p>
michael@0 458 * @param count the output parameter of number of elements in the locale list
michael@0 459 * @return available locales
michael@0 460 * @stable ICU 2.0
michael@0 461 */
michael@0 462 static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count);
michael@0 463
michael@0 464 /**
michael@0 465 * Get name of the object for the desired Locale, in the desired langauge.
michael@0 466 * @param objectLocale must be from getAvailableLocales.
michael@0 467 * @param displayLocale specifies the desired locale for output.
michael@0 468 * @param name the fill-in parameter of the return value
michael@0 469 * Uses best match.
michael@0 470 * @return user-displayable name
michael@0 471 * @stable ICU 2.0
michael@0 472 */
michael@0 473 static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
michael@0 474 const Locale& displayLocale,
michael@0 475 UnicodeString& name);
michael@0 476
michael@0 477 /**
michael@0 478 * Get name of the object for the desired Locale, in the langauge of the
michael@0 479 * default locale.
michael@0 480 * @param objectLocale must be from getMatchingLocales
michael@0 481 * @param name the fill-in parameter of the return value
michael@0 482 * @return user-displayable name
michael@0 483 * @stable ICU 2.0
michael@0 484 */
michael@0 485 static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
michael@0 486 UnicodeString& name);
michael@0 487
michael@0 488 /**
michael@0 489 * Deprecated functionality. Use clone() instead.
michael@0 490 *
michael@0 491 * Thread safe client-buffer-based cloning operation
michael@0 492 * Do NOT call delete on a safeclone, since 'new' is not used to create it.
michael@0 493 * @param stackBuffer user allocated space for the new clone. If NULL new memory will be allocated.
michael@0 494 * If buffer is not large enough, new memory will be allocated.
michael@0 495 * @param BufferSize reference to size of allocated space.
michael@0 496 * If BufferSize == 0, a sufficient size for use in cloning will
michael@0 497 * be returned ('pre-flighting')
michael@0 498 * If BufferSize is not enough for a stack-based safe clone,
michael@0 499 * new memory will be allocated.
michael@0 500 * @param status to indicate whether the operation went on smoothly or there were errors
michael@0 501 * An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were
michael@0 502 * necessary.
michael@0 503 * @return pointer to the new clone
michael@0 504 *
michael@0 505 * @deprecated ICU 52. Use clone() instead.
michael@0 506 */
michael@0 507 virtual BreakIterator * createBufferClone(void *stackBuffer,
michael@0 508 int32_t &BufferSize,
michael@0 509 UErrorCode &status) = 0;
michael@0 510
michael@0 511 #ifndef U_HIDE_DEPRECATED_API
michael@0 512
michael@0 513 /**
michael@0 514 * Determine whether the BreakIterator was created in user memory by
michael@0 515 * createBufferClone(), and thus should not be deleted. Such objects
michael@0 516 * must be closed by an explicit call to the destructor (not delete).
michael@0 517 * @deprecated ICU 52. Always delete the BreakIterator.
michael@0 518 */
michael@0 519 inline UBool isBufferClone(void);
michael@0 520
michael@0 521 #endif /* U_HIDE_DEPRECATED_API */
michael@0 522
michael@0 523 #if !UCONFIG_NO_SERVICE
michael@0 524 /**
michael@0 525 * Register a new break iterator of the indicated kind, to use in the given locale.
michael@0 526 * The break iterator will be adopted. Clones of the iterator will be returned
michael@0 527 * if a request for a break iterator of the given kind matches or falls back to
michael@0 528 * this locale.
michael@0 529 * @param toAdopt the BreakIterator instance to be adopted
michael@0 530 * @param locale the Locale for which this instance is to be registered
michael@0 531 * @param kind the type of iterator for which this instance is to be registered
michael@0 532 * @param status the in/out status code, no special meanings are assigned
michael@0 533 * @return a registry key that can be used to unregister this instance
michael@0 534 * @stable ICU 2.4
michael@0 535 */
michael@0 536 static URegistryKey U_EXPORT2 registerInstance(BreakIterator* toAdopt,
michael@0 537 const Locale& locale,
michael@0 538 UBreakIteratorType kind,
michael@0 539 UErrorCode& status);
michael@0 540
michael@0 541 /**
michael@0 542 * Unregister a previously-registered BreakIterator using the key returned from the
michael@0 543 * register call. Key becomes invalid after a successful call and should not be used again.
michael@0 544 * The BreakIterator corresponding to the key will be deleted.
michael@0 545 * @param key the registry key returned by a previous call to registerInstance
michael@0 546 * @param status the in/out status code, no special meanings are assigned
michael@0 547 * @return TRUE if the iterator for the key was successfully unregistered
michael@0 548 * @stable ICU 2.4
michael@0 549 */
michael@0 550 static UBool U_EXPORT2 unregister(URegistryKey key, UErrorCode& status);
michael@0 551
michael@0 552 /**
michael@0 553 * Return a StringEnumeration over the locales available at the time of the call,
michael@0 554 * including registered locales.
michael@0 555 * @return a StringEnumeration over the locales available at the time of the call
michael@0 556 * @stable ICU 2.4
michael@0 557 */
michael@0 558 static StringEnumeration* U_EXPORT2 getAvailableLocales(void);
michael@0 559 #endif
michael@0 560
michael@0 561 /**
michael@0 562 * Returns the locale for this break iterator. Two flavors are available: valid and
michael@0 563 * actual locale.
michael@0 564 * @stable ICU 2.8
michael@0 565 */
michael@0 566 Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
michael@0 567
michael@0 568 #ifndef U_HIDE_INTERNAL_API
michael@0 569 /** Get the locale for this break iterator object. You can choose between valid and actual locale.
michael@0 570 * @param type type of the locale we're looking for (valid or actual)
michael@0 571 * @param status error code for the operation
michael@0 572 * @return the locale
michael@0 573 * @internal
michael@0 574 */
michael@0 575 const char *getLocaleID(ULocDataLocaleType type, UErrorCode& status) const;
michael@0 576 #endif /* U_HIDE_INTERNAL_API */
michael@0 577
michael@0 578 /**
michael@0 579 * Set the subject text string upon which the break iterator is operating
michael@0 580 * without changing any other aspect of the matching state.
michael@0 581 * The new and previous text strings must have the same content.
michael@0 582 *
michael@0 583 * This function is intended for use in environments where ICU is operating on
michael@0 584 * strings that may move around in memory. It provides a mechanism for notifying
michael@0 585 * ICU that the string has been relocated, and providing a new UText to access the
michael@0 586 * string in its new position.
michael@0 587 *
michael@0 588 * Note that the break iterator implementation never copies the underlying text
michael@0 589 * of a string being processed, but always operates directly on the original text
michael@0 590 * provided by the user. Refreshing simply drops the references to the old text
michael@0 591 * and replaces them with references to the new.
michael@0 592 *
michael@0 593 * Caution: this function is normally used only by very specialized,
michael@0 594 * system-level code. One example use case is with garbage collection that moves
michael@0 595 * the text in memory.
michael@0 596 *
michael@0 597 * @param input The new (moved) text string.
michael@0 598 * @param status Receives errors detected by this function.
michael@0 599 * @return *this
michael@0 600 *
michael@0 601 * @stable ICU 49
michael@0 602 */
michael@0 603 virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) = 0;
michael@0 604
michael@0 605 private:
michael@0 606 static BreakIterator* buildInstance(const Locale& loc, const char *type, int32_t kind, UErrorCode& status);
michael@0 607 static BreakIterator* createInstance(const Locale& loc, int32_t kind, UErrorCode& status);
michael@0 608 static BreakIterator* makeInstance(const Locale& loc, int32_t kind, UErrorCode& status);
michael@0 609
michael@0 610 friend class ICUBreakIteratorFactory;
michael@0 611 friend class ICUBreakIteratorService;
michael@0 612
michael@0 613 protected:
michael@0 614 // Do not enclose protected default/copy constructors with #ifndef U_HIDE_INTERNAL_API
michael@0 615 // or else the compiler will create a public ones.
michael@0 616 /** @internal */
michael@0 617 BreakIterator();
michael@0 618 /** @internal */
michael@0 619 BreakIterator (const BreakIterator &other) : UObject(other) {}
michael@0 620
michael@0 621 private:
michael@0 622
michael@0 623 /** @internal */
michael@0 624 char actualLocale[ULOC_FULLNAME_CAPACITY];
michael@0 625 char validLocale[ULOC_FULLNAME_CAPACITY];
michael@0 626
michael@0 627 /**
michael@0 628 * The assignment operator has no real implementation.
michael@0 629 * It's provided to make the compiler happy. Do not call.
michael@0 630 */
michael@0 631 BreakIterator& operator=(const BreakIterator&);
michael@0 632 };
michael@0 633
michael@0 634 #ifndef U_HIDE_DEPRECATED_API
michael@0 635
michael@0 636 inline UBool BreakIterator::isBufferClone()
michael@0 637 {
michael@0 638 return FALSE;
michael@0 639 }
michael@0 640
michael@0 641 #endif /* U_HIDE_DEPRECATED_API */
michael@0 642
michael@0 643 U_NAMESPACE_END
michael@0 644
michael@0 645 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
michael@0 646
michael@0 647 #endif // _BRKITER
michael@0 648 //eof

mercurial