intl/icu/source/common/unicode/ucasemap.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/common/unicode/ucasemap.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,423 @@
     1.4 +/*
     1.5 +*******************************************************************************
     1.6 +*
     1.7 +*   Copyright (C) 2005-2012, International Business Machines
     1.8 +*   Corporation and others.  All Rights Reserved.
     1.9 +*
    1.10 +*******************************************************************************
    1.11 +*   file name:  ucasemap.h
    1.12 +*   encoding:   US-ASCII
    1.13 +*   tab size:   8 (not used)
    1.14 +*   indentation:4
    1.15 +*
    1.16 +*   created on: 2005may06
    1.17 +*   created by: Markus W. Scherer
    1.18 +*
    1.19 +*   Case mapping service object and functions using it.
    1.20 +*/
    1.21 +
    1.22 +#ifndef __UCASEMAP_H__
    1.23 +#define __UCASEMAP_H__
    1.24 +
    1.25 +#include "unicode/utypes.h"
    1.26 +#include "unicode/ustring.h"
    1.27 +#include "unicode/localpointer.h"
    1.28 +
    1.29 +/**
    1.30 + * \file
    1.31 + * \brief C API: Unicode case mapping functions using a UCaseMap service object.
    1.32 + *
    1.33 + * The service object takes care of memory allocations, data loading, and setup
    1.34 + * for the attributes, as usual.
    1.35 + *
    1.36 + * Currently, the functionality provided here does not overlap with uchar.h
    1.37 + * and ustring.h, except for ucasemap_toTitle().
    1.38 + *
    1.39 + * ucasemap_utf8XYZ() functions operate directly on UTF-8 strings.
    1.40 + */
    1.41 +
    1.42 +/**
    1.43 + * UCaseMap is an opaque service object for newer ICU case mapping functions.
    1.44 + * Older functions did not use a service object.
    1.45 + * @stable ICU 3.4
    1.46 + */
    1.47 +struct UCaseMap;
    1.48 +typedef struct UCaseMap UCaseMap; /**< C typedef for struct UCaseMap. @stable ICU 3.4 */
    1.49 +
    1.50 +/**
    1.51 + * Open a UCaseMap service object for a locale and a set of options.
    1.52 + * The locale ID and options are preprocessed so that functions using the
    1.53 + * service object need not process them in each call.
    1.54 + *
    1.55 + * @param locale ICU locale ID, used for language-dependent
    1.56 + *               upper-/lower-/title-casing according to the Unicode standard.
    1.57 + *               Usual semantics: ""=root, NULL=default locale, etc.
    1.58 + * @param options Options bit set, used for case folding and string comparisons.
    1.59 + *                Same flags as for u_foldCase(), u_strFoldCase(),
    1.60 + *                u_strCaseCompare(), etc.
    1.61 + *                Use 0 or U_FOLD_CASE_DEFAULT for default behavior.
    1.62 + * @param pErrorCode Must be a valid pointer to an error code value,
    1.63 + *                   which must not indicate a failure before the function call.
    1.64 + * @return Pointer to a UCaseMap service object, if successful.
    1.65 + *
    1.66 + * @see U_FOLD_CASE_DEFAULT
    1.67 + * @see U_FOLD_CASE_EXCLUDE_SPECIAL_I
    1.68 + * @see U_TITLECASE_NO_LOWERCASE
    1.69 + * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
    1.70 + * @stable ICU 3.4
    1.71 + */
    1.72 +U_STABLE UCaseMap * U_EXPORT2
    1.73 +ucasemap_open(const char *locale, uint32_t options, UErrorCode *pErrorCode);
    1.74 +
    1.75 +/**
    1.76 + * Close a UCaseMap service object.
    1.77 + * @param csm Object to be closed.
    1.78 + * @stable ICU 3.4
    1.79 + */
    1.80 +U_STABLE void U_EXPORT2
    1.81 +ucasemap_close(UCaseMap *csm);
    1.82 +
    1.83 +#if U_SHOW_CPLUSPLUS_API
    1.84 +
    1.85 +U_NAMESPACE_BEGIN
    1.86 +
    1.87 +/**
    1.88 + * \class LocalUCaseMapPointer
    1.89 + * "Smart pointer" class, closes a UCaseMap via ucasemap_close().
    1.90 + * For most methods see the LocalPointerBase base class.
    1.91 + *
    1.92 + * @see LocalPointerBase
    1.93 + * @see LocalPointer
    1.94 + * @stable ICU 4.4
    1.95 + */
    1.96 +U_DEFINE_LOCAL_OPEN_POINTER(LocalUCaseMapPointer, UCaseMap, ucasemap_close);
    1.97 +
    1.98 +U_NAMESPACE_END
    1.99 +
   1.100 +#endif
   1.101 +
   1.102 +/**
   1.103 + * Get the locale ID that is used for language-dependent case mappings.
   1.104 + * @param csm UCaseMap service object.
   1.105 + * @return locale ID
   1.106 + * @stable ICU 3.4
   1.107 + */
   1.108 +U_STABLE const char * U_EXPORT2
   1.109 +ucasemap_getLocale(const UCaseMap *csm);
   1.110 +
   1.111 +/**
   1.112 + * Get the options bit set that is used for case folding and string comparisons.
   1.113 + * @param csm UCaseMap service object.
   1.114 + * @return options bit set
   1.115 + * @stable ICU 3.4
   1.116 + */
   1.117 +U_STABLE uint32_t U_EXPORT2
   1.118 +ucasemap_getOptions(const UCaseMap *csm);
   1.119 +
   1.120 +/**
   1.121 + * Set the locale ID that is used for language-dependent case mappings.
   1.122 + *
   1.123 + * @param csm UCaseMap service object.
   1.124 + * @param locale Locale ID, see ucasemap_open().
   1.125 + * @param pErrorCode Must be a valid pointer to an error code value,
   1.126 + *                   which must not indicate a failure before the function call.
   1.127 + *
   1.128 + * @see ucasemap_open
   1.129 + * @stable ICU 3.4
   1.130 + */
   1.131 +U_STABLE void U_EXPORT2
   1.132 +ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode);
   1.133 +
   1.134 +/**
   1.135 + * Set the options bit set that is used for case folding and string comparisons.
   1.136 + *
   1.137 + * @param csm UCaseMap service object.
   1.138 + * @param options Options bit set, see ucasemap_open().
   1.139 + * @param pErrorCode Must be a valid pointer to an error code value,
   1.140 + *                   which must not indicate a failure before the function call.
   1.141 + *
   1.142 + * @see ucasemap_open
   1.143 + * @stable ICU 3.4
   1.144 + */
   1.145 +U_STABLE void U_EXPORT2
   1.146 +ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode);
   1.147 +
   1.148 +/**
   1.149 + * Do not lowercase non-initial parts of words when titlecasing.
   1.150 + * Option bit for titlecasing APIs that take an options bit set.
   1.151 + *
   1.152 + * By default, titlecasing will titlecase the first cased character
   1.153 + * of a word and lowercase all other characters.
   1.154 + * With this option, the other characters will not be modified.
   1.155 + *
   1.156 + * @see ucasemap_setOptions
   1.157 + * @see ucasemap_toTitle
   1.158 + * @see ucasemap_utf8ToTitle
   1.159 + * @see UnicodeString::toTitle
   1.160 + * @stable ICU 3.8
   1.161 + */
   1.162 +#define U_TITLECASE_NO_LOWERCASE 0x100
   1.163 +
   1.164 +/**
   1.165 + * Do not adjust the titlecasing indexes from BreakIterator::next() indexes;
   1.166 + * titlecase exactly the characters at breaks from the iterator.
   1.167 + * Option bit for titlecasing APIs that take an options bit set.
   1.168 + *
   1.169 + * By default, titlecasing will take each break iterator index,
   1.170 + * adjust it by looking for the next cased character, and titlecase that one.
   1.171 + * Other characters are lowercased.
   1.172 + *
   1.173 + * This follows Unicode 4 & 5 section 3.13 Default Case Operations:
   1.174 + *
   1.175 + * R3  toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
   1.176 + * #29, "Text Boundaries." Between each pair of word boundaries, find the first
   1.177 + * cased character F. If F exists, map F to default_title(F); then map each
   1.178 + * subsequent character C to default_lower(C).
   1.179 + *
   1.180 + * @see ucasemap_setOptions
   1.181 + * @see ucasemap_toTitle
   1.182 + * @see ucasemap_utf8ToTitle
   1.183 + * @see UnicodeString::toTitle
   1.184 + * @see U_TITLECASE_NO_LOWERCASE
   1.185 + * @stable ICU 3.8
   1.186 + */
   1.187 +#define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200
   1.188 +
   1.189 +#if !UCONFIG_NO_BREAK_ITERATION
   1.190 +
   1.191 +/**
   1.192 + * Get the break iterator that is used for titlecasing.
   1.193 + * Do not modify the returned break iterator.
   1.194 + * @param csm UCaseMap service object.
   1.195 + * @return titlecasing break iterator
   1.196 + * @stable ICU 3.8
   1.197 + */
   1.198 +U_STABLE const UBreakIterator * U_EXPORT2
   1.199 +ucasemap_getBreakIterator(const UCaseMap *csm);
   1.200 +
   1.201 +/**
   1.202 + * Set the break iterator that is used for titlecasing.
   1.203 + * The UCaseMap service object releases a previously set break iterator
   1.204 + * and "adopts" this new one, taking ownership of it.
   1.205 + * It will be released in a subsequent call to ucasemap_setBreakIterator()
   1.206 + * or ucasemap_close().
   1.207 + *
   1.208 + * Break iterator operations are not thread-safe. Therefore, titlecasing
   1.209 + * functions use non-const UCaseMap objects. It is not possible to titlecase
   1.210 + * strings concurrently using the same UCaseMap.
   1.211 + *
   1.212 + * @param csm UCaseMap service object.
   1.213 + * @param iterToAdopt Break iterator to be adopted for titlecasing.
   1.214 + * @param pErrorCode Must be a valid pointer to an error code value,
   1.215 + *                   which must not indicate a failure before the function call.
   1.216 + *
   1.217 + * @see ucasemap_toTitle
   1.218 + * @see ucasemap_utf8ToTitle
   1.219 + * @stable ICU 3.8
   1.220 + */
   1.221 +U_STABLE void U_EXPORT2
   1.222 +ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode *pErrorCode);
   1.223 +
   1.224 +/**
   1.225 + * Titlecase a UTF-16 string. This function is almost a duplicate of u_strToTitle(),
   1.226 + * except that it takes ucasemap_setOptions() into account and has performance
   1.227 + * advantages from being able to use a UCaseMap object for multiple case mapping
   1.228 + * operations, saving setup time.
   1.229 + *
   1.230 + * Casing is locale-dependent and context-sensitive.
   1.231 + * Titlecasing uses a break iterator to find the first characters of words
   1.232 + * that are to be titlecased. It titlecases those characters and lowercases
   1.233 + * all others. (This can be modified with ucasemap_setOptions().)
   1.234 + *
   1.235 + * Note: This function takes a non-const UCaseMap pointer because it will
   1.236 + * open a default break iterator if no break iterator was set yet,
   1.237 + * and effectively call ucasemap_setBreakIterator();
   1.238 + * also because the break iterator is stateful and will be modified during
   1.239 + * the iteration.
   1.240 + *
   1.241 + * The titlecase break iterator can be provided to customize for arbitrary
   1.242 + * styles, using rules and dictionaries beyond the standard iterators.
   1.243 + * The standard titlecase iterator for the root locale implements the
   1.244 + * algorithm of Unicode TR 21.
   1.245 + *
   1.246 + * This function uses only the setUText(), first(), next() and close() methods of the
   1.247 + * provided break iterator.
   1.248 + *
   1.249 + * The result may be longer or shorter than the original.
   1.250 + * The source string and the destination buffer must not overlap.
   1.251 + *
   1.252 + * @param csm       UCaseMap service object. This pointer is non-const!
   1.253 + *                  See the note above for details.
   1.254 + * @param dest      A buffer for the result string. The result will be NUL-terminated if
   1.255 + *                  the buffer is large enough.
   1.256 + *                  The contents is undefined in case of failure.
   1.257 + * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
   1.258 + *                  dest may be NULL and the function will only return the length of the result
   1.259 + *                  without writing any of the result string.
   1.260 + * @param src       The original string.
   1.261 + * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
   1.262 + * @param pErrorCode Must be a valid pointer to an error code value,
   1.263 + *                  which must not indicate a failure before the function call.
   1.264 + * @return The length of the result string, if successful - or in case of a buffer overflow,
   1.265 + *         in which case it will be greater than destCapacity.
   1.266 + *
   1.267 + * @see u_strToTitle
   1.268 + * @stable ICU 3.8
   1.269 + */
   1.270 +U_STABLE int32_t U_EXPORT2
   1.271 +ucasemap_toTitle(UCaseMap *csm,
   1.272 +                 UChar *dest, int32_t destCapacity,
   1.273 +                 const UChar *src, int32_t srcLength,
   1.274 +                 UErrorCode *pErrorCode);
   1.275 +
   1.276 +#endif
   1.277 +
   1.278 +/**
   1.279 + * Lowercase the characters in a UTF-8 string.
   1.280 + * Casing is locale-dependent and context-sensitive.
   1.281 + * The result may be longer or shorter than the original.
   1.282 + * The source string and the destination buffer must not overlap.
   1.283 + *
   1.284 + * @param csm       UCaseMap service object.
   1.285 + * @param dest      A buffer for the result string. The result will be NUL-terminated if
   1.286 + *                  the buffer is large enough.
   1.287 + *                  The contents is undefined in case of failure.
   1.288 + * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
   1.289 + *                  dest may be NULL and the function will only return the length of the result
   1.290 + *                  without writing any of the result string.
   1.291 + * @param src       The original string.
   1.292 + * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
   1.293 + * @param pErrorCode Must be a valid pointer to an error code value,
   1.294 + *                  which must not indicate a failure before the function call.
   1.295 + * @return The length of the result string, if successful - or in case of a buffer overflow,
   1.296 + *         in which case it will be greater than destCapacity.
   1.297 + *
   1.298 + * @see u_strToLower
   1.299 + * @stable ICU 3.4
   1.300 + */
   1.301 +U_STABLE int32_t U_EXPORT2
   1.302 +ucasemap_utf8ToLower(const UCaseMap *csm,
   1.303 +                     char *dest, int32_t destCapacity,
   1.304 +                     const char *src, int32_t srcLength,
   1.305 +                     UErrorCode *pErrorCode);
   1.306 +
   1.307 +/**
   1.308 + * Uppercase the characters in a UTF-8 string.
   1.309 + * Casing is locale-dependent and context-sensitive.
   1.310 + * The result may be longer or shorter than the original.
   1.311 + * The source string and the destination buffer must not overlap.
   1.312 + *
   1.313 + * @param csm       UCaseMap service object.
   1.314 + * @param dest      A buffer for the result string. The result will be NUL-terminated if
   1.315 + *                  the buffer is large enough.
   1.316 + *                  The contents is undefined in case of failure.
   1.317 + * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
   1.318 + *                  dest may be NULL and the function will only return the length of the result
   1.319 + *                  without writing any of the result string.
   1.320 + * @param src       The original string.
   1.321 + * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
   1.322 + * @param pErrorCode Must be a valid pointer to an error code value,
   1.323 + *                  which must not indicate a failure before the function call.
   1.324 + * @return The length of the result string, if successful - or in case of a buffer overflow,
   1.325 + *         in which case it will be greater than destCapacity.
   1.326 + *
   1.327 + * @see u_strToUpper
   1.328 + * @stable ICU 3.4
   1.329 + */
   1.330 +U_STABLE int32_t U_EXPORT2
   1.331 +ucasemap_utf8ToUpper(const UCaseMap *csm,
   1.332 +                     char *dest, int32_t destCapacity,
   1.333 +                     const char *src, int32_t srcLength,
   1.334 +                     UErrorCode *pErrorCode);
   1.335 +
   1.336 +#if !UCONFIG_NO_BREAK_ITERATION
   1.337 +
   1.338 +/**
   1.339 + * Titlecase a UTF-8 string.
   1.340 + * Casing is locale-dependent and context-sensitive.
   1.341 + * Titlecasing uses a break iterator to find the first characters of words
   1.342 + * that are to be titlecased. It titlecases those characters and lowercases
   1.343 + * all others. (This can be modified with ucasemap_setOptions().)
   1.344 + *
   1.345 + * Note: This function takes a non-const UCaseMap pointer because it will
   1.346 + * open a default break iterator if no break iterator was set yet,
   1.347 + * and effectively call ucasemap_setBreakIterator();
   1.348 + * also because the break iterator is stateful and will be modified during
   1.349 + * the iteration.
   1.350 + *
   1.351 + * The titlecase break iterator can be provided to customize for arbitrary
   1.352 + * styles, using rules and dictionaries beyond the standard iterators.
   1.353 + * The standard titlecase iterator for the root locale implements the
   1.354 + * algorithm of Unicode TR 21.
   1.355 + *
   1.356 + * This function uses only the setUText(), first(), next() and close() methods of the
   1.357 + * provided break iterator.
   1.358 + *
   1.359 + * The result may be longer or shorter than the original.
   1.360 + * The source string and the destination buffer must not overlap.
   1.361 + *
   1.362 + * @param csm       UCaseMap service object. This pointer is non-const!
   1.363 + *                  See the note above for details.
   1.364 + * @param dest      A buffer for the result string. The result will be NUL-terminated if
   1.365 + *                  the buffer is large enough.
   1.366 + *                  The contents is undefined in case of failure.
   1.367 + * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
   1.368 + *                  dest may be NULL and the function will only return the length of the result
   1.369 + *                  without writing any of the result string.
   1.370 + * @param src       The original string.
   1.371 + * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
   1.372 + * @param pErrorCode Must be a valid pointer to an error code value,
   1.373 + *                  which must not indicate a failure before the function call.
   1.374 + * @return The length of the result string, if successful - or in case of a buffer overflow,
   1.375 + *         in which case it will be greater than destCapacity.
   1.376 + *
   1.377 + * @see u_strToTitle
   1.378 + * @see U_TITLECASE_NO_LOWERCASE
   1.379 + * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
   1.380 + * @stable ICU 3.8
   1.381 + */
   1.382 +U_STABLE int32_t U_EXPORT2
   1.383 +ucasemap_utf8ToTitle(UCaseMap *csm,
   1.384 +                    char *dest, int32_t destCapacity,
   1.385 +                    const char *src, int32_t srcLength,
   1.386 +                    UErrorCode *pErrorCode);
   1.387 +
   1.388 +#endif
   1.389 +
   1.390 +/**
   1.391 + * Case-folds the characters in a UTF-8 string.
   1.392 + *
   1.393 + * Case-folding is locale-independent and not context-sensitive,
   1.394 + * but there is an option for whether to include or exclude mappings for dotted I
   1.395 + * and dotless i that are marked with 'T' in CaseFolding.txt.
   1.396 + *
   1.397 + * The result may be longer or shorter than the original.
   1.398 + * The source string and the destination buffer must not overlap.
   1.399 + *
   1.400 + * @param csm       UCaseMap service object.
   1.401 + * @param dest      A buffer for the result string. The result will be NUL-terminated if
   1.402 + *                  the buffer is large enough.
   1.403 + *                  The contents is undefined in case of failure.
   1.404 + * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
   1.405 + *                  dest may be NULL and the function will only return the length of the result
   1.406 + *                  without writing any of the result string.
   1.407 + * @param src       The original string.
   1.408 + * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
   1.409 + * @param pErrorCode Must be a valid pointer to an error code value,
   1.410 + *                  which must not indicate a failure before the function call.
   1.411 + * @return The length of the result string, if successful - or in case of a buffer overflow,
   1.412 + *         in which case it will be greater than destCapacity.
   1.413 + *
   1.414 + * @see u_strFoldCase
   1.415 + * @see ucasemap_setOptions
   1.416 + * @see U_FOLD_CASE_DEFAULT
   1.417 + * @see U_FOLD_CASE_EXCLUDE_SPECIAL_I
   1.418 + * @stable ICU 3.8
   1.419 + */
   1.420 +U_STABLE int32_t U_EXPORT2
   1.421 +ucasemap_utf8FoldCase(const UCaseMap *csm,
   1.422 +                      char *dest, int32_t destCapacity,
   1.423 +                      const char *src, int32_t srcLength,
   1.424 +                      UErrorCode *pErrorCode);
   1.425 +
   1.426 +#endif

mercurial