intl/icu/source/common/unicode/ucasemap.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /*
     2 *******************************************************************************
     3 *
     4 *   Copyright (C) 2005-2012, International Business Machines
     5 *   Corporation and others.  All Rights Reserved.
     6 *
     7 *******************************************************************************
     8 *   file name:  ucasemap.h
     9 *   encoding:   US-ASCII
    10 *   tab size:   8 (not used)
    11 *   indentation:4
    12 *
    13 *   created on: 2005may06
    14 *   created by: Markus W. Scherer
    15 *
    16 *   Case mapping service object and functions using it.
    17 */
    19 #ifndef __UCASEMAP_H__
    20 #define __UCASEMAP_H__
    22 #include "unicode/utypes.h"
    23 #include "unicode/ustring.h"
    24 #include "unicode/localpointer.h"
    26 /**
    27  * \file
    28  * \brief C API: Unicode case mapping functions using a UCaseMap service object.
    29  *
    30  * The service object takes care of memory allocations, data loading, and setup
    31  * for the attributes, as usual.
    32  *
    33  * Currently, the functionality provided here does not overlap with uchar.h
    34  * and ustring.h, except for ucasemap_toTitle().
    35  *
    36  * ucasemap_utf8XYZ() functions operate directly on UTF-8 strings.
    37  */
    39 /**
    40  * UCaseMap is an opaque service object for newer ICU case mapping functions.
    41  * Older functions did not use a service object.
    42  * @stable ICU 3.4
    43  */
    44 struct UCaseMap;
    45 typedef struct UCaseMap UCaseMap; /**< C typedef for struct UCaseMap. @stable ICU 3.4 */
    47 /**
    48  * Open a UCaseMap service object for a locale and a set of options.
    49  * The locale ID and options are preprocessed so that functions using the
    50  * service object need not process them in each call.
    51  *
    52  * @param locale ICU locale ID, used for language-dependent
    53  *               upper-/lower-/title-casing according to the Unicode standard.
    54  *               Usual semantics: ""=root, NULL=default locale, etc.
    55  * @param options Options bit set, used for case folding and string comparisons.
    56  *                Same flags as for u_foldCase(), u_strFoldCase(),
    57  *                u_strCaseCompare(), etc.
    58  *                Use 0 or U_FOLD_CASE_DEFAULT for default behavior.
    59  * @param pErrorCode Must be a valid pointer to an error code value,
    60  *                   which must not indicate a failure before the function call.
    61  * @return Pointer to a UCaseMap service object, if successful.
    62  *
    63  * @see U_FOLD_CASE_DEFAULT
    64  * @see U_FOLD_CASE_EXCLUDE_SPECIAL_I
    65  * @see U_TITLECASE_NO_LOWERCASE
    66  * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
    67  * @stable ICU 3.4
    68  */
    69 U_STABLE UCaseMap * U_EXPORT2
    70 ucasemap_open(const char *locale, uint32_t options, UErrorCode *pErrorCode);
    72 /**
    73  * Close a UCaseMap service object.
    74  * @param csm Object to be closed.
    75  * @stable ICU 3.4
    76  */
    77 U_STABLE void U_EXPORT2
    78 ucasemap_close(UCaseMap *csm);
    80 #if U_SHOW_CPLUSPLUS_API
    82 U_NAMESPACE_BEGIN
    84 /**
    85  * \class LocalUCaseMapPointer
    86  * "Smart pointer" class, closes a UCaseMap via ucasemap_close().
    87  * For most methods see the LocalPointerBase base class.
    88  *
    89  * @see LocalPointerBase
    90  * @see LocalPointer
    91  * @stable ICU 4.4
    92  */
    93 U_DEFINE_LOCAL_OPEN_POINTER(LocalUCaseMapPointer, UCaseMap, ucasemap_close);
    95 U_NAMESPACE_END
    97 #endif
    99 /**
   100  * Get the locale ID that is used for language-dependent case mappings.
   101  * @param csm UCaseMap service object.
   102  * @return locale ID
   103  * @stable ICU 3.4
   104  */
   105 U_STABLE const char * U_EXPORT2
   106 ucasemap_getLocale(const UCaseMap *csm);
   108 /**
   109  * Get the options bit set that is used for case folding and string comparisons.
   110  * @param csm UCaseMap service object.
   111  * @return options bit set
   112  * @stable ICU 3.4
   113  */
   114 U_STABLE uint32_t U_EXPORT2
   115 ucasemap_getOptions(const UCaseMap *csm);
   117 /**
   118  * Set the locale ID that is used for language-dependent case mappings.
   119  *
   120  * @param csm UCaseMap service object.
   121  * @param locale Locale ID, see ucasemap_open().
   122  * @param pErrorCode Must be a valid pointer to an error code value,
   123  *                   which must not indicate a failure before the function call.
   124  *
   125  * @see ucasemap_open
   126  * @stable ICU 3.4
   127  */
   128 U_STABLE void U_EXPORT2
   129 ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode);
   131 /**
   132  * Set the options bit set that is used for case folding and string comparisons.
   133  *
   134  * @param csm UCaseMap service object.
   135  * @param options Options bit set, see ucasemap_open().
   136  * @param pErrorCode Must be a valid pointer to an error code value,
   137  *                   which must not indicate a failure before the function call.
   138  *
   139  * @see ucasemap_open
   140  * @stable ICU 3.4
   141  */
   142 U_STABLE void U_EXPORT2
   143 ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode);
   145 /**
   146  * Do not lowercase non-initial parts of words when titlecasing.
   147  * Option bit for titlecasing APIs that take an options bit set.
   148  *
   149  * By default, titlecasing will titlecase the first cased character
   150  * of a word and lowercase all other characters.
   151  * With this option, the other characters will not be modified.
   152  *
   153  * @see ucasemap_setOptions
   154  * @see ucasemap_toTitle
   155  * @see ucasemap_utf8ToTitle
   156  * @see UnicodeString::toTitle
   157  * @stable ICU 3.8
   158  */
   159 #define U_TITLECASE_NO_LOWERCASE 0x100
   161 /**
   162  * Do not adjust the titlecasing indexes from BreakIterator::next() indexes;
   163  * titlecase exactly the characters at breaks from the iterator.
   164  * Option bit for titlecasing APIs that take an options bit set.
   165  *
   166  * By default, titlecasing will take each break iterator index,
   167  * adjust it by looking for the next cased character, and titlecase that one.
   168  * Other characters are lowercased.
   169  *
   170  * This follows Unicode 4 & 5 section 3.13 Default Case Operations:
   171  *
   172  * R3  toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
   173  * #29, "Text Boundaries." Between each pair of word boundaries, find the first
   174  * cased character F. If F exists, map F to default_title(F); then map each
   175  * subsequent character C to default_lower(C).
   176  *
   177  * @see ucasemap_setOptions
   178  * @see ucasemap_toTitle
   179  * @see ucasemap_utf8ToTitle
   180  * @see UnicodeString::toTitle
   181  * @see U_TITLECASE_NO_LOWERCASE
   182  * @stable ICU 3.8
   183  */
   184 #define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200
   186 #if !UCONFIG_NO_BREAK_ITERATION
   188 /**
   189  * Get the break iterator that is used for titlecasing.
   190  * Do not modify the returned break iterator.
   191  * @param csm UCaseMap service object.
   192  * @return titlecasing break iterator
   193  * @stable ICU 3.8
   194  */
   195 U_STABLE const UBreakIterator * U_EXPORT2
   196 ucasemap_getBreakIterator(const UCaseMap *csm);
   198 /**
   199  * Set the break iterator that is used for titlecasing.
   200  * The UCaseMap service object releases a previously set break iterator
   201  * and "adopts" this new one, taking ownership of it.
   202  * It will be released in a subsequent call to ucasemap_setBreakIterator()
   203  * or ucasemap_close().
   204  *
   205  * Break iterator operations are not thread-safe. Therefore, titlecasing
   206  * functions use non-const UCaseMap objects. It is not possible to titlecase
   207  * strings concurrently using the same UCaseMap.
   208  *
   209  * @param csm UCaseMap service object.
   210  * @param iterToAdopt Break iterator to be adopted for titlecasing.
   211  * @param pErrorCode Must be a valid pointer to an error code value,
   212  *                   which must not indicate a failure before the function call.
   213  *
   214  * @see ucasemap_toTitle
   215  * @see ucasemap_utf8ToTitle
   216  * @stable ICU 3.8
   217  */
   218 U_STABLE void U_EXPORT2
   219 ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode *pErrorCode);
   221 /**
   222  * Titlecase a UTF-16 string. This function is almost a duplicate of u_strToTitle(),
   223  * except that it takes ucasemap_setOptions() into account and has performance
   224  * advantages from being able to use a UCaseMap object for multiple case mapping
   225  * operations, saving setup time.
   226  *
   227  * Casing is locale-dependent and context-sensitive.
   228  * Titlecasing uses a break iterator to find the first characters of words
   229  * that are to be titlecased. It titlecases those characters and lowercases
   230  * all others. (This can be modified with ucasemap_setOptions().)
   231  *
   232  * Note: This function takes a non-const UCaseMap pointer because it will
   233  * open a default break iterator if no break iterator was set yet,
   234  * and effectively call ucasemap_setBreakIterator();
   235  * also because the break iterator is stateful and will be modified during
   236  * the iteration.
   237  *
   238  * The titlecase break iterator can be provided to customize for arbitrary
   239  * styles, using rules and dictionaries beyond the standard iterators.
   240  * The standard titlecase iterator for the root locale implements the
   241  * algorithm of Unicode TR 21.
   242  *
   243  * This function uses only the setUText(), first(), next() and close() methods of the
   244  * provided break iterator.
   245  *
   246  * The result may be longer or shorter than the original.
   247  * The source string and the destination buffer must not overlap.
   248  *
   249  * @param csm       UCaseMap service object. This pointer is non-const!
   250  *                  See the note above for details.
   251  * @param dest      A buffer for the result string. The result will be NUL-terminated if
   252  *                  the buffer is large enough.
   253  *                  The contents is undefined in case of failure.
   254  * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
   255  *                  dest may be NULL and the function will only return the length of the result
   256  *                  without writing any of the result string.
   257  * @param src       The original string.
   258  * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
   259  * @param pErrorCode Must be a valid pointer to an error code value,
   260  *                  which must not indicate a failure before the function call.
   261  * @return The length of the result string, if successful - or in case of a buffer overflow,
   262  *         in which case it will be greater than destCapacity.
   263  *
   264  * @see u_strToTitle
   265  * @stable ICU 3.8
   266  */
   267 U_STABLE int32_t U_EXPORT2
   268 ucasemap_toTitle(UCaseMap *csm,
   269                  UChar *dest, int32_t destCapacity,
   270                  const UChar *src, int32_t srcLength,
   271                  UErrorCode *pErrorCode);
   273 #endif
   275 /**
   276  * Lowercase the characters in a UTF-8 string.
   277  * Casing is locale-dependent and context-sensitive.
   278  * The result may be longer or shorter than the original.
   279  * The source string and the destination buffer must not overlap.
   280  *
   281  * @param csm       UCaseMap service object.
   282  * @param dest      A buffer for the result string. The result will be NUL-terminated if
   283  *                  the buffer is large enough.
   284  *                  The contents is undefined in case of failure.
   285  * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
   286  *                  dest may be NULL and the function will only return the length of the result
   287  *                  without writing any of the result string.
   288  * @param src       The original string.
   289  * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
   290  * @param pErrorCode Must be a valid pointer to an error code value,
   291  *                  which must not indicate a failure before the function call.
   292  * @return The length of the result string, if successful - or in case of a buffer overflow,
   293  *         in which case it will be greater than destCapacity.
   294  *
   295  * @see u_strToLower
   296  * @stable ICU 3.4
   297  */
   298 U_STABLE int32_t U_EXPORT2
   299 ucasemap_utf8ToLower(const UCaseMap *csm,
   300                      char *dest, int32_t destCapacity,
   301                      const char *src, int32_t srcLength,
   302                      UErrorCode *pErrorCode);
   304 /**
   305  * Uppercase the characters in a UTF-8 string.
   306  * Casing is locale-dependent and context-sensitive.
   307  * The result may be longer or shorter than the original.
   308  * The source string and the destination buffer must not overlap.
   309  *
   310  * @param csm       UCaseMap service object.
   311  * @param dest      A buffer for the result string. The result will be NUL-terminated if
   312  *                  the buffer is large enough.
   313  *                  The contents is undefined in case of failure.
   314  * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
   315  *                  dest may be NULL and the function will only return the length of the result
   316  *                  without writing any of the result string.
   317  * @param src       The original string.
   318  * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
   319  * @param pErrorCode Must be a valid pointer to an error code value,
   320  *                  which must not indicate a failure before the function call.
   321  * @return The length of the result string, if successful - or in case of a buffer overflow,
   322  *         in which case it will be greater than destCapacity.
   323  *
   324  * @see u_strToUpper
   325  * @stable ICU 3.4
   326  */
   327 U_STABLE int32_t U_EXPORT2
   328 ucasemap_utf8ToUpper(const UCaseMap *csm,
   329                      char *dest, int32_t destCapacity,
   330                      const char *src, int32_t srcLength,
   331                      UErrorCode *pErrorCode);
   333 #if !UCONFIG_NO_BREAK_ITERATION
   335 /**
   336  * Titlecase a UTF-8 string.
   337  * Casing is locale-dependent and context-sensitive.
   338  * Titlecasing uses a break iterator to find the first characters of words
   339  * that are to be titlecased. It titlecases those characters and lowercases
   340  * all others. (This can be modified with ucasemap_setOptions().)
   341  *
   342  * Note: This function takes a non-const UCaseMap pointer because it will
   343  * open a default break iterator if no break iterator was set yet,
   344  * and effectively call ucasemap_setBreakIterator();
   345  * also because the break iterator is stateful and will be modified during
   346  * the iteration.
   347  *
   348  * The titlecase break iterator can be provided to customize for arbitrary
   349  * styles, using rules and dictionaries beyond the standard iterators.
   350  * The standard titlecase iterator for the root locale implements the
   351  * algorithm of Unicode TR 21.
   352  *
   353  * This function uses only the setUText(), first(), next() and close() methods of the
   354  * provided break iterator.
   355  *
   356  * The result may be longer or shorter than the original.
   357  * The source string and the destination buffer must not overlap.
   358  *
   359  * @param csm       UCaseMap service object. This pointer is non-const!
   360  *                  See the note above for details.
   361  * @param dest      A buffer for the result string. The result will be NUL-terminated if
   362  *                  the buffer is large enough.
   363  *                  The contents is undefined in case of failure.
   364  * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
   365  *                  dest may be NULL and the function will only return the length of the result
   366  *                  without writing any of the result string.
   367  * @param src       The original string.
   368  * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
   369  * @param pErrorCode Must be a valid pointer to an error code value,
   370  *                  which must not indicate a failure before the function call.
   371  * @return The length of the result string, if successful - or in case of a buffer overflow,
   372  *         in which case it will be greater than destCapacity.
   373  *
   374  * @see u_strToTitle
   375  * @see U_TITLECASE_NO_LOWERCASE
   376  * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
   377  * @stable ICU 3.8
   378  */
   379 U_STABLE int32_t U_EXPORT2
   380 ucasemap_utf8ToTitle(UCaseMap *csm,
   381                     char *dest, int32_t destCapacity,
   382                     const char *src, int32_t srcLength,
   383                     UErrorCode *pErrorCode);
   385 #endif
   387 /**
   388  * Case-folds the characters in a UTF-8 string.
   389  *
   390  * Case-folding is locale-independent and not context-sensitive,
   391  * but there is an option for whether to include or exclude mappings for dotted I
   392  * and dotless i that are marked with 'T' in CaseFolding.txt.
   393  *
   394  * The result may be longer or shorter than the original.
   395  * The source string and the destination buffer must not overlap.
   396  *
   397  * @param csm       UCaseMap service object.
   398  * @param dest      A buffer for the result string. The result will be NUL-terminated if
   399  *                  the buffer is large enough.
   400  *                  The contents is undefined in case of failure.
   401  * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
   402  *                  dest may be NULL and the function will only return the length of the result
   403  *                  without writing any of the result string.
   404  * @param src       The original string.
   405  * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
   406  * @param pErrorCode Must be a valid pointer to an error code value,
   407  *                  which must not indicate a failure before the function call.
   408  * @return The length of the result string, if successful - or in case of a buffer overflow,
   409  *         in which case it will be greater than destCapacity.
   410  *
   411  * @see u_strFoldCase
   412  * @see ucasemap_setOptions
   413  * @see U_FOLD_CASE_DEFAULT
   414  * @see U_FOLD_CASE_EXCLUDE_SPECIAL_I
   415  * @stable ICU 3.8
   416  */
   417 U_STABLE int32_t U_EXPORT2
   418 ucasemap_utf8FoldCase(const UCaseMap *csm,
   419                       char *dest, int32_t destCapacity,
   420                       const char *src, int32_t srcLength,
   421                       UErrorCode *pErrorCode);
   423 #endif

mercurial