intl/icu/source/common/unicode/ucasemap.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /*
michael@0 2 *******************************************************************************
michael@0 3 *
michael@0 4 * Copyright (C) 2005-2012, International Business Machines
michael@0 5 * Corporation and others. All Rights Reserved.
michael@0 6 *
michael@0 7 *******************************************************************************
michael@0 8 * file name: ucasemap.h
michael@0 9 * encoding: US-ASCII
michael@0 10 * tab size: 8 (not used)
michael@0 11 * indentation:4
michael@0 12 *
michael@0 13 * created on: 2005may06
michael@0 14 * created by: Markus W. Scherer
michael@0 15 *
michael@0 16 * Case mapping service object and functions using it.
michael@0 17 */
michael@0 18
michael@0 19 #ifndef __UCASEMAP_H__
michael@0 20 #define __UCASEMAP_H__
michael@0 21
michael@0 22 #include "unicode/utypes.h"
michael@0 23 #include "unicode/ustring.h"
michael@0 24 #include "unicode/localpointer.h"
michael@0 25
michael@0 26 /**
michael@0 27 * \file
michael@0 28 * \brief C API: Unicode case mapping functions using a UCaseMap service object.
michael@0 29 *
michael@0 30 * The service object takes care of memory allocations, data loading, and setup
michael@0 31 * for the attributes, as usual.
michael@0 32 *
michael@0 33 * Currently, the functionality provided here does not overlap with uchar.h
michael@0 34 * and ustring.h, except for ucasemap_toTitle().
michael@0 35 *
michael@0 36 * ucasemap_utf8XYZ() functions operate directly on UTF-8 strings.
michael@0 37 */
michael@0 38
michael@0 39 /**
michael@0 40 * UCaseMap is an opaque service object for newer ICU case mapping functions.
michael@0 41 * Older functions did not use a service object.
michael@0 42 * @stable ICU 3.4
michael@0 43 */
michael@0 44 struct UCaseMap;
michael@0 45 typedef struct UCaseMap UCaseMap; /**< C typedef for struct UCaseMap. @stable ICU 3.4 */
michael@0 46
michael@0 47 /**
michael@0 48 * Open a UCaseMap service object for a locale and a set of options.
michael@0 49 * The locale ID and options are preprocessed so that functions using the
michael@0 50 * service object need not process them in each call.
michael@0 51 *
michael@0 52 * @param locale ICU locale ID, used for language-dependent
michael@0 53 * upper-/lower-/title-casing according to the Unicode standard.
michael@0 54 * Usual semantics: ""=root, NULL=default locale, etc.
michael@0 55 * @param options Options bit set, used for case folding and string comparisons.
michael@0 56 * Same flags as for u_foldCase(), u_strFoldCase(),
michael@0 57 * u_strCaseCompare(), etc.
michael@0 58 * Use 0 or U_FOLD_CASE_DEFAULT for default behavior.
michael@0 59 * @param pErrorCode Must be a valid pointer to an error code value,
michael@0 60 * which must not indicate a failure before the function call.
michael@0 61 * @return Pointer to a UCaseMap service object, if successful.
michael@0 62 *
michael@0 63 * @see U_FOLD_CASE_DEFAULT
michael@0 64 * @see U_FOLD_CASE_EXCLUDE_SPECIAL_I
michael@0 65 * @see U_TITLECASE_NO_LOWERCASE
michael@0 66 * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
michael@0 67 * @stable ICU 3.4
michael@0 68 */
michael@0 69 U_STABLE UCaseMap * U_EXPORT2
michael@0 70 ucasemap_open(const char *locale, uint32_t options, UErrorCode *pErrorCode);
michael@0 71
michael@0 72 /**
michael@0 73 * Close a UCaseMap service object.
michael@0 74 * @param csm Object to be closed.
michael@0 75 * @stable ICU 3.4
michael@0 76 */
michael@0 77 U_STABLE void U_EXPORT2
michael@0 78 ucasemap_close(UCaseMap *csm);
michael@0 79
michael@0 80 #if U_SHOW_CPLUSPLUS_API
michael@0 81
michael@0 82 U_NAMESPACE_BEGIN
michael@0 83
michael@0 84 /**
michael@0 85 * \class LocalUCaseMapPointer
michael@0 86 * "Smart pointer" class, closes a UCaseMap via ucasemap_close().
michael@0 87 * For most methods see the LocalPointerBase base class.
michael@0 88 *
michael@0 89 * @see LocalPointerBase
michael@0 90 * @see LocalPointer
michael@0 91 * @stable ICU 4.4
michael@0 92 */
michael@0 93 U_DEFINE_LOCAL_OPEN_POINTER(LocalUCaseMapPointer, UCaseMap, ucasemap_close);
michael@0 94
michael@0 95 U_NAMESPACE_END
michael@0 96
michael@0 97 #endif
michael@0 98
michael@0 99 /**
michael@0 100 * Get the locale ID that is used for language-dependent case mappings.
michael@0 101 * @param csm UCaseMap service object.
michael@0 102 * @return locale ID
michael@0 103 * @stable ICU 3.4
michael@0 104 */
michael@0 105 U_STABLE const char * U_EXPORT2
michael@0 106 ucasemap_getLocale(const UCaseMap *csm);
michael@0 107
michael@0 108 /**
michael@0 109 * Get the options bit set that is used for case folding and string comparisons.
michael@0 110 * @param csm UCaseMap service object.
michael@0 111 * @return options bit set
michael@0 112 * @stable ICU 3.4
michael@0 113 */
michael@0 114 U_STABLE uint32_t U_EXPORT2
michael@0 115 ucasemap_getOptions(const UCaseMap *csm);
michael@0 116
michael@0 117 /**
michael@0 118 * Set the locale ID that is used for language-dependent case mappings.
michael@0 119 *
michael@0 120 * @param csm UCaseMap service object.
michael@0 121 * @param locale Locale ID, see ucasemap_open().
michael@0 122 * @param pErrorCode Must be a valid pointer to an error code value,
michael@0 123 * which must not indicate a failure before the function call.
michael@0 124 *
michael@0 125 * @see ucasemap_open
michael@0 126 * @stable ICU 3.4
michael@0 127 */
michael@0 128 U_STABLE void U_EXPORT2
michael@0 129 ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode);
michael@0 130
michael@0 131 /**
michael@0 132 * Set the options bit set that is used for case folding and string comparisons.
michael@0 133 *
michael@0 134 * @param csm UCaseMap service object.
michael@0 135 * @param options Options bit set, see ucasemap_open().
michael@0 136 * @param pErrorCode Must be a valid pointer to an error code value,
michael@0 137 * which must not indicate a failure before the function call.
michael@0 138 *
michael@0 139 * @see ucasemap_open
michael@0 140 * @stable ICU 3.4
michael@0 141 */
michael@0 142 U_STABLE void U_EXPORT2
michael@0 143 ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode);
michael@0 144
michael@0 145 /**
michael@0 146 * Do not lowercase non-initial parts of words when titlecasing.
michael@0 147 * Option bit for titlecasing APIs that take an options bit set.
michael@0 148 *
michael@0 149 * By default, titlecasing will titlecase the first cased character
michael@0 150 * of a word and lowercase all other characters.
michael@0 151 * With this option, the other characters will not be modified.
michael@0 152 *
michael@0 153 * @see ucasemap_setOptions
michael@0 154 * @see ucasemap_toTitle
michael@0 155 * @see ucasemap_utf8ToTitle
michael@0 156 * @see UnicodeString::toTitle
michael@0 157 * @stable ICU 3.8
michael@0 158 */
michael@0 159 #define U_TITLECASE_NO_LOWERCASE 0x100
michael@0 160
michael@0 161 /**
michael@0 162 * Do not adjust the titlecasing indexes from BreakIterator::next() indexes;
michael@0 163 * titlecase exactly the characters at breaks from the iterator.
michael@0 164 * Option bit for titlecasing APIs that take an options bit set.
michael@0 165 *
michael@0 166 * By default, titlecasing will take each break iterator index,
michael@0 167 * adjust it by looking for the next cased character, and titlecase that one.
michael@0 168 * Other characters are lowercased.
michael@0 169 *
michael@0 170 * This follows Unicode 4 & 5 section 3.13 Default Case Operations:
michael@0 171 *
michael@0 172 * R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
michael@0 173 * #29, "Text Boundaries." Between each pair of word boundaries, find the first
michael@0 174 * cased character F. If F exists, map F to default_title(F); then map each
michael@0 175 * subsequent character C to default_lower(C).
michael@0 176 *
michael@0 177 * @see ucasemap_setOptions
michael@0 178 * @see ucasemap_toTitle
michael@0 179 * @see ucasemap_utf8ToTitle
michael@0 180 * @see UnicodeString::toTitle
michael@0 181 * @see U_TITLECASE_NO_LOWERCASE
michael@0 182 * @stable ICU 3.8
michael@0 183 */
michael@0 184 #define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200
michael@0 185
michael@0 186 #if !UCONFIG_NO_BREAK_ITERATION
michael@0 187
michael@0 188 /**
michael@0 189 * Get the break iterator that is used for titlecasing.
michael@0 190 * Do not modify the returned break iterator.
michael@0 191 * @param csm UCaseMap service object.
michael@0 192 * @return titlecasing break iterator
michael@0 193 * @stable ICU 3.8
michael@0 194 */
michael@0 195 U_STABLE const UBreakIterator * U_EXPORT2
michael@0 196 ucasemap_getBreakIterator(const UCaseMap *csm);
michael@0 197
michael@0 198 /**
michael@0 199 * Set the break iterator that is used for titlecasing.
michael@0 200 * The UCaseMap service object releases a previously set break iterator
michael@0 201 * and "adopts" this new one, taking ownership of it.
michael@0 202 * It will be released in a subsequent call to ucasemap_setBreakIterator()
michael@0 203 * or ucasemap_close().
michael@0 204 *
michael@0 205 * Break iterator operations are not thread-safe. Therefore, titlecasing
michael@0 206 * functions use non-const UCaseMap objects. It is not possible to titlecase
michael@0 207 * strings concurrently using the same UCaseMap.
michael@0 208 *
michael@0 209 * @param csm UCaseMap service object.
michael@0 210 * @param iterToAdopt Break iterator to be adopted for titlecasing.
michael@0 211 * @param pErrorCode Must be a valid pointer to an error code value,
michael@0 212 * which must not indicate a failure before the function call.
michael@0 213 *
michael@0 214 * @see ucasemap_toTitle
michael@0 215 * @see ucasemap_utf8ToTitle
michael@0 216 * @stable ICU 3.8
michael@0 217 */
michael@0 218 U_STABLE void U_EXPORT2
michael@0 219 ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode *pErrorCode);
michael@0 220
michael@0 221 /**
michael@0 222 * Titlecase a UTF-16 string. This function is almost a duplicate of u_strToTitle(),
michael@0 223 * except that it takes ucasemap_setOptions() into account and has performance
michael@0 224 * advantages from being able to use a UCaseMap object for multiple case mapping
michael@0 225 * operations, saving setup time.
michael@0 226 *
michael@0 227 * Casing is locale-dependent and context-sensitive.
michael@0 228 * Titlecasing uses a break iterator to find the first characters of words
michael@0 229 * that are to be titlecased. It titlecases those characters and lowercases
michael@0 230 * all others. (This can be modified with ucasemap_setOptions().)
michael@0 231 *
michael@0 232 * Note: This function takes a non-const UCaseMap pointer because it will
michael@0 233 * open a default break iterator if no break iterator was set yet,
michael@0 234 * and effectively call ucasemap_setBreakIterator();
michael@0 235 * also because the break iterator is stateful and will be modified during
michael@0 236 * the iteration.
michael@0 237 *
michael@0 238 * The titlecase break iterator can be provided to customize for arbitrary
michael@0 239 * styles, using rules and dictionaries beyond the standard iterators.
michael@0 240 * The standard titlecase iterator for the root locale implements the
michael@0 241 * algorithm of Unicode TR 21.
michael@0 242 *
michael@0 243 * This function uses only the setUText(), first(), next() and close() methods of the
michael@0 244 * provided break iterator.
michael@0 245 *
michael@0 246 * The result may be longer or shorter than the original.
michael@0 247 * The source string and the destination buffer must not overlap.
michael@0 248 *
michael@0 249 * @param csm UCaseMap service object. This pointer is non-const!
michael@0 250 * See the note above for details.
michael@0 251 * @param dest A buffer for the result string. The result will be NUL-terminated if
michael@0 252 * the buffer is large enough.
michael@0 253 * The contents is undefined in case of failure.
michael@0 254 * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
michael@0 255 * dest may be NULL and the function will only return the length of the result
michael@0 256 * without writing any of the result string.
michael@0 257 * @param src The original string.
michael@0 258 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
michael@0 259 * @param pErrorCode Must be a valid pointer to an error code value,
michael@0 260 * which must not indicate a failure before the function call.
michael@0 261 * @return The length of the result string, if successful - or in case of a buffer overflow,
michael@0 262 * in which case it will be greater than destCapacity.
michael@0 263 *
michael@0 264 * @see u_strToTitle
michael@0 265 * @stable ICU 3.8
michael@0 266 */
michael@0 267 U_STABLE int32_t U_EXPORT2
michael@0 268 ucasemap_toTitle(UCaseMap *csm,
michael@0 269 UChar *dest, int32_t destCapacity,
michael@0 270 const UChar *src, int32_t srcLength,
michael@0 271 UErrorCode *pErrorCode);
michael@0 272
michael@0 273 #endif
michael@0 274
michael@0 275 /**
michael@0 276 * Lowercase the characters in a UTF-8 string.
michael@0 277 * Casing is locale-dependent and context-sensitive.
michael@0 278 * The result may be longer or shorter than the original.
michael@0 279 * The source string and the destination buffer must not overlap.
michael@0 280 *
michael@0 281 * @param csm UCaseMap service object.
michael@0 282 * @param dest A buffer for the result string. The result will be NUL-terminated if
michael@0 283 * the buffer is large enough.
michael@0 284 * The contents is undefined in case of failure.
michael@0 285 * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
michael@0 286 * dest may be NULL and the function will only return the length of the result
michael@0 287 * without writing any of the result string.
michael@0 288 * @param src The original string.
michael@0 289 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
michael@0 290 * @param pErrorCode Must be a valid pointer to an error code value,
michael@0 291 * which must not indicate a failure before the function call.
michael@0 292 * @return The length of the result string, if successful - or in case of a buffer overflow,
michael@0 293 * in which case it will be greater than destCapacity.
michael@0 294 *
michael@0 295 * @see u_strToLower
michael@0 296 * @stable ICU 3.4
michael@0 297 */
michael@0 298 U_STABLE int32_t U_EXPORT2
michael@0 299 ucasemap_utf8ToLower(const UCaseMap *csm,
michael@0 300 char *dest, int32_t destCapacity,
michael@0 301 const char *src, int32_t srcLength,
michael@0 302 UErrorCode *pErrorCode);
michael@0 303
michael@0 304 /**
michael@0 305 * Uppercase the characters in a UTF-8 string.
michael@0 306 * Casing is locale-dependent and context-sensitive.
michael@0 307 * The result may be longer or shorter than the original.
michael@0 308 * The source string and the destination buffer must not overlap.
michael@0 309 *
michael@0 310 * @param csm UCaseMap service object.
michael@0 311 * @param dest A buffer for the result string. The result will be NUL-terminated if
michael@0 312 * the buffer is large enough.
michael@0 313 * The contents is undefined in case of failure.
michael@0 314 * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
michael@0 315 * dest may be NULL and the function will only return the length of the result
michael@0 316 * without writing any of the result string.
michael@0 317 * @param src The original string.
michael@0 318 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
michael@0 319 * @param pErrorCode Must be a valid pointer to an error code value,
michael@0 320 * which must not indicate a failure before the function call.
michael@0 321 * @return The length of the result string, if successful - or in case of a buffer overflow,
michael@0 322 * in which case it will be greater than destCapacity.
michael@0 323 *
michael@0 324 * @see u_strToUpper
michael@0 325 * @stable ICU 3.4
michael@0 326 */
michael@0 327 U_STABLE int32_t U_EXPORT2
michael@0 328 ucasemap_utf8ToUpper(const UCaseMap *csm,
michael@0 329 char *dest, int32_t destCapacity,
michael@0 330 const char *src, int32_t srcLength,
michael@0 331 UErrorCode *pErrorCode);
michael@0 332
michael@0 333 #if !UCONFIG_NO_BREAK_ITERATION
michael@0 334
michael@0 335 /**
michael@0 336 * Titlecase a UTF-8 string.
michael@0 337 * Casing is locale-dependent and context-sensitive.
michael@0 338 * Titlecasing uses a break iterator to find the first characters of words
michael@0 339 * that are to be titlecased. It titlecases those characters and lowercases
michael@0 340 * all others. (This can be modified with ucasemap_setOptions().)
michael@0 341 *
michael@0 342 * Note: This function takes a non-const UCaseMap pointer because it will
michael@0 343 * open a default break iterator if no break iterator was set yet,
michael@0 344 * and effectively call ucasemap_setBreakIterator();
michael@0 345 * also because the break iterator is stateful and will be modified during
michael@0 346 * the iteration.
michael@0 347 *
michael@0 348 * The titlecase break iterator can be provided to customize for arbitrary
michael@0 349 * styles, using rules and dictionaries beyond the standard iterators.
michael@0 350 * The standard titlecase iterator for the root locale implements the
michael@0 351 * algorithm of Unicode TR 21.
michael@0 352 *
michael@0 353 * This function uses only the setUText(), first(), next() and close() methods of the
michael@0 354 * provided break iterator.
michael@0 355 *
michael@0 356 * The result may be longer or shorter than the original.
michael@0 357 * The source string and the destination buffer must not overlap.
michael@0 358 *
michael@0 359 * @param csm UCaseMap service object. This pointer is non-const!
michael@0 360 * See the note above for details.
michael@0 361 * @param dest A buffer for the result string. The result will be NUL-terminated if
michael@0 362 * the buffer is large enough.
michael@0 363 * The contents is undefined in case of failure.
michael@0 364 * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
michael@0 365 * dest may be NULL and the function will only return the length of the result
michael@0 366 * without writing any of the result string.
michael@0 367 * @param src The original string.
michael@0 368 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
michael@0 369 * @param pErrorCode Must be a valid pointer to an error code value,
michael@0 370 * which must not indicate a failure before the function call.
michael@0 371 * @return The length of the result string, if successful - or in case of a buffer overflow,
michael@0 372 * in which case it will be greater than destCapacity.
michael@0 373 *
michael@0 374 * @see u_strToTitle
michael@0 375 * @see U_TITLECASE_NO_LOWERCASE
michael@0 376 * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
michael@0 377 * @stable ICU 3.8
michael@0 378 */
michael@0 379 U_STABLE int32_t U_EXPORT2
michael@0 380 ucasemap_utf8ToTitle(UCaseMap *csm,
michael@0 381 char *dest, int32_t destCapacity,
michael@0 382 const char *src, int32_t srcLength,
michael@0 383 UErrorCode *pErrorCode);
michael@0 384
michael@0 385 #endif
michael@0 386
michael@0 387 /**
michael@0 388 * Case-folds the characters in a UTF-8 string.
michael@0 389 *
michael@0 390 * Case-folding is locale-independent and not context-sensitive,
michael@0 391 * but there is an option for whether to include or exclude mappings for dotted I
michael@0 392 * and dotless i that are marked with 'T' in CaseFolding.txt.
michael@0 393 *
michael@0 394 * The result may be longer or shorter than the original.
michael@0 395 * The source string and the destination buffer must not overlap.
michael@0 396 *
michael@0 397 * @param csm UCaseMap service object.
michael@0 398 * @param dest A buffer for the result string. The result will be NUL-terminated if
michael@0 399 * the buffer is large enough.
michael@0 400 * The contents is undefined in case of failure.
michael@0 401 * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
michael@0 402 * dest may be NULL and the function will only return the length of the result
michael@0 403 * without writing any of the result string.
michael@0 404 * @param src The original string.
michael@0 405 * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
michael@0 406 * @param pErrorCode Must be a valid pointer to an error code value,
michael@0 407 * which must not indicate a failure before the function call.
michael@0 408 * @return The length of the result string, if successful - or in case of a buffer overflow,
michael@0 409 * in which case it will be greater than destCapacity.
michael@0 410 *
michael@0 411 * @see u_strFoldCase
michael@0 412 * @see ucasemap_setOptions
michael@0 413 * @see U_FOLD_CASE_DEFAULT
michael@0 414 * @see U_FOLD_CASE_EXCLUDE_SPECIAL_I
michael@0 415 * @stable ICU 3.8
michael@0 416 */
michael@0 417 U_STABLE int32_t U_EXPORT2
michael@0 418 ucasemap_utf8FoldCase(const UCaseMap *csm,
michael@0 419 char *dest, int32_t destCapacity,
michael@0 420 const char *src, int32_t srcLength,
michael@0 421 UErrorCode *pErrorCode);
michael@0 422
michael@0 423 #endif

mercurial