1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/unicode/ucasemap.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,423 @@ 1.4 +/* 1.5 +******************************************************************************* 1.6 +* 1.7 +* Copyright (C) 2005-2012, International Business Machines 1.8 +* Corporation and others. All Rights Reserved. 1.9 +* 1.10 +******************************************************************************* 1.11 +* file name: ucasemap.h 1.12 +* encoding: US-ASCII 1.13 +* tab size: 8 (not used) 1.14 +* indentation:4 1.15 +* 1.16 +* created on: 2005may06 1.17 +* created by: Markus W. Scherer 1.18 +* 1.19 +* Case mapping service object and functions using it. 1.20 +*/ 1.21 + 1.22 +#ifndef __UCASEMAP_H__ 1.23 +#define __UCASEMAP_H__ 1.24 + 1.25 +#include "unicode/utypes.h" 1.26 +#include "unicode/ustring.h" 1.27 +#include "unicode/localpointer.h" 1.28 + 1.29 +/** 1.30 + * \file 1.31 + * \brief C API: Unicode case mapping functions using a UCaseMap service object. 1.32 + * 1.33 + * The service object takes care of memory allocations, data loading, and setup 1.34 + * for the attributes, as usual. 1.35 + * 1.36 + * Currently, the functionality provided here does not overlap with uchar.h 1.37 + * and ustring.h, except for ucasemap_toTitle(). 1.38 + * 1.39 + * ucasemap_utf8XYZ() functions operate directly on UTF-8 strings. 1.40 + */ 1.41 + 1.42 +/** 1.43 + * UCaseMap is an opaque service object for newer ICU case mapping functions. 1.44 + * Older functions did not use a service object. 1.45 + * @stable ICU 3.4 1.46 + */ 1.47 +struct UCaseMap; 1.48 +typedef struct UCaseMap UCaseMap; /**< C typedef for struct UCaseMap. @stable ICU 3.4 */ 1.49 + 1.50 +/** 1.51 + * Open a UCaseMap service object for a locale and a set of options. 1.52 + * The locale ID and options are preprocessed so that functions using the 1.53 + * service object need not process them in each call. 1.54 + * 1.55 + * @param locale ICU locale ID, used for language-dependent 1.56 + * upper-/lower-/title-casing according to the Unicode standard. 1.57 + * Usual semantics: ""=root, NULL=default locale, etc. 1.58 + * @param options Options bit set, used for case folding and string comparisons. 1.59 + * Same flags as for u_foldCase(), u_strFoldCase(), 1.60 + * u_strCaseCompare(), etc. 1.61 + * Use 0 or U_FOLD_CASE_DEFAULT for default behavior. 1.62 + * @param pErrorCode Must be a valid pointer to an error code value, 1.63 + * which must not indicate a failure before the function call. 1.64 + * @return Pointer to a UCaseMap service object, if successful. 1.65 + * 1.66 + * @see U_FOLD_CASE_DEFAULT 1.67 + * @see U_FOLD_CASE_EXCLUDE_SPECIAL_I 1.68 + * @see U_TITLECASE_NO_LOWERCASE 1.69 + * @see U_TITLECASE_NO_BREAK_ADJUSTMENT 1.70 + * @stable ICU 3.4 1.71 + */ 1.72 +U_STABLE UCaseMap * U_EXPORT2 1.73 +ucasemap_open(const char *locale, uint32_t options, UErrorCode *pErrorCode); 1.74 + 1.75 +/** 1.76 + * Close a UCaseMap service object. 1.77 + * @param csm Object to be closed. 1.78 + * @stable ICU 3.4 1.79 + */ 1.80 +U_STABLE void U_EXPORT2 1.81 +ucasemap_close(UCaseMap *csm); 1.82 + 1.83 +#if U_SHOW_CPLUSPLUS_API 1.84 + 1.85 +U_NAMESPACE_BEGIN 1.86 + 1.87 +/** 1.88 + * \class LocalUCaseMapPointer 1.89 + * "Smart pointer" class, closes a UCaseMap via ucasemap_close(). 1.90 + * For most methods see the LocalPointerBase base class. 1.91 + * 1.92 + * @see LocalPointerBase 1.93 + * @see LocalPointer 1.94 + * @stable ICU 4.4 1.95 + */ 1.96 +U_DEFINE_LOCAL_OPEN_POINTER(LocalUCaseMapPointer, UCaseMap, ucasemap_close); 1.97 + 1.98 +U_NAMESPACE_END 1.99 + 1.100 +#endif 1.101 + 1.102 +/** 1.103 + * Get the locale ID that is used for language-dependent case mappings. 1.104 + * @param csm UCaseMap service object. 1.105 + * @return locale ID 1.106 + * @stable ICU 3.4 1.107 + */ 1.108 +U_STABLE const char * U_EXPORT2 1.109 +ucasemap_getLocale(const UCaseMap *csm); 1.110 + 1.111 +/** 1.112 + * Get the options bit set that is used for case folding and string comparisons. 1.113 + * @param csm UCaseMap service object. 1.114 + * @return options bit set 1.115 + * @stable ICU 3.4 1.116 + */ 1.117 +U_STABLE uint32_t U_EXPORT2 1.118 +ucasemap_getOptions(const UCaseMap *csm); 1.119 + 1.120 +/** 1.121 + * Set the locale ID that is used for language-dependent case mappings. 1.122 + * 1.123 + * @param csm UCaseMap service object. 1.124 + * @param locale Locale ID, see ucasemap_open(). 1.125 + * @param pErrorCode Must be a valid pointer to an error code value, 1.126 + * which must not indicate a failure before the function call. 1.127 + * 1.128 + * @see ucasemap_open 1.129 + * @stable ICU 3.4 1.130 + */ 1.131 +U_STABLE void U_EXPORT2 1.132 +ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode); 1.133 + 1.134 +/** 1.135 + * Set the options bit set that is used for case folding and string comparisons. 1.136 + * 1.137 + * @param csm UCaseMap service object. 1.138 + * @param options Options bit set, see ucasemap_open(). 1.139 + * @param pErrorCode Must be a valid pointer to an error code value, 1.140 + * which must not indicate a failure before the function call. 1.141 + * 1.142 + * @see ucasemap_open 1.143 + * @stable ICU 3.4 1.144 + */ 1.145 +U_STABLE void U_EXPORT2 1.146 +ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode); 1.147 + 1.148 +/** 1.149 + * Do not lowercase non-initial parts of words when titlecasing. 1.150 + * Option bit for titlecasing APIs that take an options bit set. 1.151 + * 1.152 + * By default, titlecasing will titlecase the first cased character 1.153 + * of a word and lowercase all other characters. 1.154 + * With this option, the other characters will not be modified. 1.155 + * 1.156 + * @see ucasemap_setOptions 1.157 + * @see ucasemap_toTitle 1.158 + * @see ucasemap_utf8ToTitle 1.159 + * @see UnicodeString::toTitle 1.160 + * @stable ICU 3.8 1.161 + */ 1.162 +#define U_TITLECASE_NO_LOWERCASE 0x100 1.163 + 1.164 +/** 1.165 + * Do not adjust the titlecasing indexes from BreakIterator::next() indexes; 1.166 + * titlecase exactly the characters at breaks from the iterator. 1.167 + * Option bit for titlecasing APIs that take an options bit set. 1.168 + * 1.169 + * By default, titlecasing will take each break iterator index, 1.170 + * adjust it by looking for the next cased character, and titlecase that one. 1.171 + * Other characters are lowercased. 1.172 + * 1.173 + * This follows Unicode 4 & 5 section 3.13 Default Case Operations: 1.174 + * 1.175 + * R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex 1.176 + * #29, "Text Boundaries." Between each pair of word boundaries, find the first 1.177 + * cased character F. If F exists, map F to default_title(F); then map each 1.178 + * subsequent character C to default_lower(C). 1.179 + * 1.180 + * @see ucasemap_setOptions 1.181 + * @see ucasemap_toTitle 1.182 + * @see ucasemap_utf8ToTitle 1.183 + * @see UnicodeString::toTitle 1.184 + * @see U_TITLECASE_NO_LOWERCASE 1.185 + * @stable ICU 3.8 1.186 + */ 1.187 +#define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200 1.188 + 1.189 +#if !UCONFIG_NO_BREAK_ITERATION 1.190 + 1.191 +/** 1.192 + * Get the break iterator that is used for titlecasing. 1.193 + * Do not modify the returned break iterator. 1.194 + * @param csm UCaseMap service object. 1.195 + * @return titlecasing break iterator 1.196 + * @stable ICU 3.8 1.197 + */ 1.198 +U_STABLE const UBreakIterator * U_EXPORT2 1.199 +ucasemap_getBreakIterator(const UCaseMap *csm); 1.200 + 1.201 +/** 1.202 + * Set the break iterator that is used for titlecasing. 1.203 + * The UCaseMap service object releases a previously set break iterator 1.204 + * and "adopts" this new one, taking ownership of it. 1.205 + * It will be released in a subsequent call to ucasemap_setBreakIterator() 1.206 + * or ucasemap_close(). 1.207 + * 1.208 + * Break iterator operations are not thread-safe. Therefore, titlecasing 1.209 + * functions use non-const UCaseMap objects. It is not possible to titlecase 1.210 + * strings concurrently using the same UCaseMap. 1.211 + * 1.212 + * @param csm UCaseMap service object. 1.213 + * @param iterToAdopt Break iterator to be adopted for titlecasing. 1.214 + * @param pErrorCode Must be a valid pointer to an error code value, 1.215 + * which must not indicate a failure before the function call. 1.216 + * 1.217 + * @see ucasemap_toTitle 1.218 + * @see ucasemap_utf8ToTitle 1.219 + * @stable ICU 3.8 1.220 + */ 1.221 +U_STABLE void U_EXPORT2 1.222 +ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode *pErrorCode); 1.223 + 1.224 +/** 1.225 + * Titlecase a UTF-16 string. This function is almost a duplicate of u_strToTitle(), 1.226 + * except that it takes ucasemap_setOptions() into account and has performance 1.227 + * advantages from being able to use a UCaseMap object for multiple case mapping 1.228 + * operations, saving setup time. 1.229 + * 1.230 + * Casing is locale-dependent and context-sensitive. 1.231 + * Titlecasing uses a break iterator to find the first characters of words 1.232 + * that are to be titlecased. It titlecases those characters and lowercases 1.233 + * all others. (This can be modified with ucasemap_setOptions().) 1.234 + * 1.235 + * Note: This function takes a non-const UCaseMap pointer because it will 1.236 + * open a default break iterator if no break iterator was set yet, 1.237 + * and effectively call ucasemap_setBreakIterator(); 1.238 + * also because the break iterator is stateful and will be modified during 1.239 + * the iteration. 1.240 + * 1.241 + * The titlecase break iterator can be provided to customize for arbitrary 1.242 + * styles, using rules and dictionaries beyond the standard iterators. 1.243 + * The standard titlecase iterator for the root locale implements the 1.244 + * algorithm of Unicode TR 21. 1.245 + * 1.246 + * This function uses only the setUText(), first(), next() and close() methods of the 1.247 + * provided break iterator. 1.248 + * 1.249 + * The result may be longer or shorter than the original. 1.250 + * The source string and the destination buffer must not overlap. 1.251 + * 1.252 + * @param csm UCaseMap service object. This pointer is non-const! 1.253 + * See the note above for details. 1.254 + * @param dest A buffer for the result string. The result will be NUL-terminated if 1.255 + * the buffer is large enough. 1.256 + * The contents is undefined in case of failure. 1.257 + * @param destCapacity The size of the buffer (number of bytes). If it is 0, then 1.258 + * dest may be NULL and the function will only return the length of the result 1.259 + * without writing any of the result string. 1.260 + * @param src The original string. 1.261 + * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. 1.262 + * @param pErrorCode Must be a valid pointer to an error code value, 1.263 + * which must not indicate a failure before the function call. 1.264 + * @return The length of the result string, if successful - or in case of a buffer overflow, 1.265 + * in which case it will be greater than destCapacity. 1.266 + * 1.267 + * @see u_strToTitle 1.268 + * @stable ICU 3.8 1.269 + */ 1.270 +U_STABLE int32_t U_EXPORT2 1.271 +ucasemap_toTitle(UCaseMap *csm, 1.272 + UChar *dest, int32_t destCapacity, 1.273 + const UChar *src, int32_t srcLength, 1.274 + UErrorCode *pErrorCode); 1.275 + 1.276 +#endif 1.277 + 1.278 +/** 1.279 + * Lowercase the characters in a UTF-8 string. 1.280 + * Casing is locale-dependent and context-sensitive. 1.281 + * The result may be longer or shorter than the original. 1.282 + * The source string and the destination buffer must not overlap. 1.283 + * 1.284 + * @param csm UCaseMap service object. 1.285 + * @param dest A buffer for the result string. The result will be NUL-terminated if 1.286 + * the buffer is large enough. 1.287 + * The contents is undefined in case of failure. 1.288 + * @param destCapacity The size of the buffer (number of bytes). If it is 0, then 1.289 + * dest may be NULL and the function will only return the length of the result 1.290 + * without writing any of the result string. 1.291 + * @param src The original string. 1.292 + * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. 1.293 + * @param pErrorCode Must be a valid pointer to an error code value, 1.294 + * which must not indicate a failure before the function call. 1.295 + * @return The length of the result string, if successful - or in case of a buffer overflow, 1.296 + * in which case it will be greater than destCapacity. 1.297 + * 1.298 + * @see u_strToLower 1.299 + * @stable ICU 3.4 1.300 + */ 1.301 +U_STABLE int32_t U_EXPORT2 1.302 +ucasemap_utf8ToLower(const UCaseMap *csm, 1.303 + char *dest, int32_t destCapacity, 1.304 + const char *src, int32_t srcLength, 1.305 + UErrorCode *pErrorCode); 1.306 + 1.307 +/** 1.308 + * Uppercase the characters in a UTF-8 string. 1.309 + * Casing is locale-dependent and context-sensitive. 1.310 + * The result may be longer or shorter than the original. 1.311 + * The source string and the destination buffer must not overlap. 1.312 + * 1.313 + * @param csm UCaseMap service object. 1.314 + * @param dest A buffer for the result string. The result will be NUL-terminated if 1.315 + * the buffer is large enough. 1.316 + * The contents is undefined in case of failure. 1.317 + * @param destCapacity The size of the buffer (number of bytes). If it is 0, then 1.318 + * dest may be NULL and the function will only return the length of the result 1.319 + * without writing any of the result string. 1.320 + * @param src The original string. 1.321 + * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. 1.322 + * @param pErrorCode Must be a valid pointer to an error code value, 1.323 + * which must not indicate a failure before the function call. 1.324 + * @return The length of the result string, if successful - or in case of a buffer overflow, 1.325 + * in which case it will be greater than destCapacity. 1.326 + * 1.327 + * @see u_strToUpper 1.328 + * @stable ICU 3.4 1.329 + */ 1.330 +U_STABLE int32_t U_EXPORT2 1.331 +ucasemap_utf8ToUpper(const UCaseMap *csm, 1.332 + char *dest, int32_t destCapacity, 1.333 + const char *src, int32_t srcLength, 1.334 + UErrorCode *pErrorCode); 1.335 + 1.336 +#if !UCONFIG_NO_BREAK_ITERATION 1.337 + 1.338 +/** 1.339 + * Titlecase a UTF-8 string. 1.340 + * Casing is locale-dependent and context-sensitive. 1.341 + * Titlecasing uses a break iterator to find the first characters of words 1.342 + * that are to be titlecased. It titlecases those characters and lowercases 1.343 + * all others. (This can be modified with ucasemap_setOptions().) 1.344 + * 1.345 + * Note: This function takes a non-const UCaseMap pointer because it will 1.346 + * open a default break iterator if no break iterator was set yet, 1.347 + * and effectively call ucasemap_setBreakIterator(); 1.348 + * also because the break iterator is stateful and will be modified during 1.349 + * the iteration. 1.350 + * 1.351 + * The titlecase break iterator can be provided to customize for arbitrary 1.352 + * styles, using rules and dictionaries beyond the standard iterators. 1.353 + * The standard titlecase iterator for the root locale implements the 1.354 + * algorithm of Unicode TR 21. 1.355 + * 1.356 + * This function uses only the setUText(), first(), next() and close() methods of the 1.357 + * provided break iterator. 1.358 + * 1.359 + * The result may be longer or shorter than the original. 1.360 + * The source string and the destination buffer must not overlap. 1.361 + * 1.362 + * @param csm UCaseMap service object. This pointer is non-const! 1.363 + * See the note above for details. 1.364 + * @param dest A buffer for the result string. The result will be NUL-terminated if 1.365 + * the buffer is large enough. 1.366 + * The contents is undefined in case of failure. 1.367 + * @param destCapacity The size of the buffer (number of bytes). If it is 0, then 1.368 + * dest may be NULL and the function will only return the length of the result 1.369 + * without writing any of the result string. 1.370 + * @param src The original string. 1.371 + * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. 1.372 + * @param pErrorCode Must be a valid pointer to an error code value, 1.373 + * which must not indicate a failure before the function call. 1.374 + * @return The length of the result string, if successful - or in case of a buffer overflow, 1.375 + * in which case it will be greater than destCapacity. 1.376 + * 1.377 + * @see u_strToTitle 1.378 + * @see U_TITLECASE_NO_LOWERCASE 1.379 + * @see U_TITLECASE_NO_BREAK_ADJUSTMENT 1.380 + * @stable ICU 3.8 1.381 + */ 1.382 +U_STABLE int32_t U_EXPORT2 1.383 +ucasemap_utf8ToTitle(UCaseMap *csm, 1.384 + char *dest, int32_t destCapacity, 1.385 + const char *src, int32_t srcLength, 1.386 + UErrorCode *pErrorCode); 1.387 + 1.388 +#endif 1.389 + 1.390 +/** 1.391 + * Case-folds the characters in a UTF-8 string. 1.392 + * 1.393 + * Case-folding is locale-independent and not context-sensitive, 1.394 + * but there is an option for whether to include or exclude mappings for dotted I 1.395 + * and dotless i that are marked with 'T' in CaseFolding.txt. 1.396 + * 1.397 + * The result may be longer or shorter than the original. 1.398 + * The source string and the destination buffer must not overlap. 1.399 + * 1.400 + * @param csm UCaseMap service object. 1.401 + * @param dest A buffer for the result string. The result will be NUL-terminated if 1.402 + * the buffer is large enough. 1.403 + * The contents is undefined in case of failure. 1.404 + * @param destCapacity The size of the buffer (number of bytes). If it is 0, then 1.405 + * dest may be NULL and the function will only return the length of the result 1.406 + * without writing any of the result string. 1.407 + * @param src The original string. 1.408 + * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. 1.409 + * @param pErrorCode Must be a valid pointer to an error code value, 1.410 + * which must not indicate a failure before the function call. 1.411 + * @return The length of the result string, if successful - or in case of a buffer overflow, 1.412 + * in which case it will be greater than destCapacity. 1.413 + * 1.414 + * @see u_strFoldCase 1.415 + * @see ucasemap_setOptions 1.416 + * @see U_FOLD_CASE_DEFAULT 1.417 + * @see U_FOLD_CASE_EXCLUDE_SPECIAL_I 1.418 + * @stable ICU 3.8 1.419 + */ 1.420 +U_STABLE int32_t U_EXPORT2 1.421 +ucasemap_utf8FoldCase(const UCaseMap *csm, 1.422 + char *dest, int32_t destCapacity, 1.423 + const char *src, int32_t srcLength, 1.424 + UErrorCode *pErrorCode); 1.425 + 1.426 +#endif