diff -r 000000000000 -r 6474c204b198 intl/icu/source/i18n/unicode/utrans.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/intl/icu/source/i18n/unicode/utrans.h Wed Dec 31 06:09:35 2014 +0100 @@ -0,0 +1,607 @@ +/* +******************************************************************************* +* Copyright (C) 1997-2011, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* Date Name Description +* 06/21/00 aliu Creation. +******************************************************************************* +*/ + +#ifndef UTRANS_H +#define UTRANS_H + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_TRANSLITERATION + +#include "unicode/localpointer.h" +#include "unicode/urep.h" +#include "unicode/parseerr.h" +#include "unicode/uenum.h" + +/******************************************************************** + * General Notes + ******************************************************************** + */ +/** + * \file + * \brief C API: Transliterator + * + *
After a transliteration operation, some of the indices in this + * structure will be modified. See the field descriptions for + * details. + * + *
contextStart <= start <= limit <= contextLimit + * + *
Note: All index values in this structure must be at code point + * boundaries. That is, none of them may occur between two code units + * of a surrogate pair. If any index does split a surrogate pair, + * results are unspecified. + * + * @stable ICU 2.0 + */ +typedef struct UTransPosition { + + /** + * Beginning index, inclusive, of the context to be considered for + * a transliteration operation. The transliterator will ignore + * anything before this index. INPUT/OUTPUT parameter: This parameter + * is updated by a transliteration operation to reflect the maximum + * amount of antecontext needed by a transliterator. + * @stable ICU 2.4 + */ + int32_t contextStart; + + /** + * Ending index, exclusive, of the context to be considered for a + * transliteration operation. The transliterator will ignore + * anything at or after this index. INPUT/OUTPUT parameter: This + * parameter is updated to reflect changes in the length of the + * text, but points to the same logical position in the text. + * @stable ICU 2.4 + */ + int32_t contextLimit; + + /** + * Beginning index, inclusive, of the text to be transliteratd. + * INPUT/OUTPUT parameter: This parameter is advanced past + * characters that have already been transliterated by a + * transliteration operation. + * @stable ICU 2.4 + */ + int32_t start; + + /** + * Ending index, exclusive, of the text to be transliteratd. + * INPUT/OUTPUT parameter: This parameter is updated to reflect + * changes in the length of the text, but points to the same + * logical position in the text. + * @stable ICU 2.4 + */ + int32_t limit; + +} UTransPosition; + +/******************************************************************** + * General API + ********************************************************************/ + +/** + * Open a custom transliterator, given a custom rules string + * OR + * a system transliterator, given its ID. + * Any non-NULL result from this function should later be closed with + * utrans_close(). + * + * @param id a valid transliterator ID + * @param idLength the length of the ID string, or -1 if NUL-terminated + * @param dir the desired direction + * @param rules the transliterator rules. See the C++ header rbt.h for + * rules syntax. If NULL then a system transliterator matching + * the ID is returned. + * @param rulesLength the length of the rules, or -1 if the rules + * are NUL-terminated. + * @param parseError a pointer to a UParseError struct to receive the details + * of any parsing errors. This parameter may be NULL if no + * parsing error details are desired. + * @param pErrorCode a pointer to the UErrorCode + * @return a transliterator pointer that may be passed to other + * utrans_xxx() functions, or NULL if the open call fails. + * @stable ICU 2.8 + */ +U_STABLE UTransliterator* U_EXPORT2 +utrans_openU(const UChar *id, + int32_t idLength, + UTransDirection dir, + const UChar *rules, + int32_t rulesLength, + UParseError *parseError, + UErrorCode *pErrorCode); + +/** + * Open an inverse of an existing transliterator. For this to work, + * the inverse must be registered with the system. For example, if + * the Transliterator "A-B" is opened, and then its inverse is opened, + * the result is the Transliterator "B-A", if such a transliterator is + * registered with the system. Otherwise the result is NULL and a + * failing UErrorCode is set. Any non-NULL result from this function + * should later be closed with utrans_close(). + * + * @param trans the transliterator to open the inverse of. + * @param status a pointer to the UErrorCode + * @return a pointer to a newly-opened transliterator that is the + * inverse of trans, or NULL if the open call fails. + * @stable ICU 2.0 + */ +U_STABLE UTransliterator* U_EXPORT2 +utrans_openInverse(const UTransliterator* trans, + UErrorCode* status); + +/** + * Create a copy of a transliterator. Any non-NULL result from this + * function should later be closed with utrans_close(). + * + * @param trans the transliterator to be copied. + * @param status a pointer to the UErrorCode + * @return a transliterator pointer that may be passed to other + * utrans_xxx() functions, or NULL if the clone call fails. + * @stable ICU 2.0 + */ +U_STABLE UTransliterator* U_EXPORT2 +utrans_clone(const UTransliterator* trans, + UErrorCode* status); + +/** + * Close a transliterator. Any non-NULL pointer returned by + * utrans_openXxx() or utrans_clone() should eventually be closed. + * @param trans the transliterator to be closed. + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +utrans_close(UTransliterator* trans); + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalUTransliteratorPointer + * "Smart pointer" class, closes a UTransliterator via utrans_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @stable ICU 4.4 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalUTransliteratorPointer, UTransliterator, utrans_close); + +U_NAMESPACE_END + +#endif + +/** + * Return the programmatic identifier for this transliterator. + * If this identifier is passed to utrans_openU(), it will open + * a transliterator equivalent to this one, if the ID has been + * registered. + * + * @param trans the transliterator to return the ID of. + * @param resultLength pointer to an output variable receiving the length + * of the ID string; can be NULL + * @return the NUL-terminated ID string. This pointer remains + * valid until utrans_close() is called on this transliterator. + * + * @stable ICU 2.8 + */ +U_STABLE const UChar * U_EXPORT2 +utrans_getUnicodeID(const UTransliterator *trans, + int32_t *resultLength); + +/** + * Register an open transliterator with the system. When + * utrans_open() is called with an ID string that is equal to that + * returned by utrans_getID(adoptedTrans,...), then + * utrans_clone(adoptedTrans,...) is returned. + * + *
NOTE: After this call the system owns the adoptedTrans and will
+ * close it. The user must not call utrans_close() on adoptedTrans.
+ *
+ * @param adoptedTrans a transliterator, typically the result of
+ * utrans_openRules(), to be registered with the system.
+ * @param status a pointer to the UErrorCode
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+utrans_register(UTransliterator* adoptedTrans,
+ UErrorCode* status);
+
+/**
+ * Unregister a transliterator from the system. After this call the
+ * system will no longer recognize the given ID when passed to
+ * utrans_open(). If the ID is invalid then nothing is done.
+ *
+ * @param id an ID to unregister
+ * @param idLength the length of id, or -1 if id is zero-terminated
+ * @stable ICU 2.8
+ */
+U_STABLE void U_EXPORT2
+utrans_unregisterID(const UChar* id, int32_t idLength);
+
+/**
+ * Set the filter used by a transliterator. A filter can be used to
+ * make the transliterator pass certain characters through untouched.
+ * The filter is expressed using a UnicodeSet pattern. If the
+ * filterPattern is NULL or the empty string, then the transliterator
+ * will be reset to use no filter.
+ *
+ * @param trans the transliterator
+ * @param filterPattern a pattern string, in the form accepted by
+ * UnicodeSet, specifying which characters to apply the
+ * transliteration to. May be NULL or the empty string to indicate no
+ * filter.
+ * @param filterPatternLen the length of filterPattern, or -1 if
+ * filterPattern is zero-terminated
+ * @param status a pointer to the UErrorCode
+ * @see UnicodeSet
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+utrans_setFilter(UTransliterator* trans,
+ const UChar* filterPattern,
+ int32_t filterPatternLen,
+ UErrorCode* status);
+
+/**
+ * Return the number of system transliterators.
+ * It is recommended to use utrans_openIDs() instead.
+ *
+ * @return the number of system transliterators.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+utrans_countAvailableIDs(void);
+
+/**
+ * Return a UEnumeration for the available transliterators.
+ *
+ * @param pErrorCode Pointer to the UErrorCode in/out parameter.
+ * @return UEnumeration for the available transliterators.
+ * Close with uenum_close().
+ *
+ * @stable ICU 2.8
+ */
+U_STABLE UEnumeration * U_EXPORT2
+utrans_openIDs(UErrorCode *pErrorCode);
+
+/********************************************************************
+ * Transliteration API
+ ********************************************************************/
+
+/**
+ * Transliterate a segment of a UReplaceable string. The string is
+ * passed in as a UReplaceable pointer rep and a UReplaceableCallbacks
+ * function pointer struct repFunc. Functions in the repFunc struct
+ * will be called in order to modify the rep string.
+ *
+ * @param trans the transliterator
+ * @param rep a pointer to the string. This will be passed to the
+ * repFunc functions.
+ * @param repFunc a set of function pointers that will be used to
+ * modify the string pointed to by rep.
+ * @param start the beginning index, inclusive; 0 <= start <=
+ * limit
.
+ * @param limit pointer to the ending index, exclusive; start <=
+ * limit <= repFunc->length(rep)
. Upon return, *limit will
+ * contain the new limit index. The text previously occupying
+ * [start, limit)
has been transliterated, possibly to a
+ * string of a different length, at [start,
+ *
new-limit)
, where new-limit
+ * is the return value.
+ * @param status a pointer to the UErrorCode
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+utrans_trans(const UTransliterator* trans,
+ UReplaceable* rep,
+ UReplaceableCallbacks* repFunc,
+ int32_t start,
+ int32_t* limit,
+ UErrorCode* status);
+
+/**
+ * Transliterate the portion of the UReplaceable text buffer that can
+ * be transliterated unambiguosly. This method is typically called
+ * after new text has been inserted, e.g. as a result of a keyboard
+ * event. The transliterator will try to transliterate characters of
+ * rep
between index.cursor
and
+ * index.limit
. Characters before
+ * index.cursor
will not be changed.
+ *
+ *
Upon return, values in index
will be updated.
+ * index.start
will be advanced to the first
+ * character that future calls to this method will read.
+ * index.cursor
and index.limit
will
+ * be adjusted to delimit the range of text that future calls to
+ * this method may change.
+ *
+ *
Typical usage of this method begins with an initial call
+ * with index.start
and index.limit
+ * set to indicate the portion of text
to be
+ * transliterated, and index.cursor == index.start
.
+ * Thereafter, index
can be used without
+ * modification in future calls, provided that all changes to
+ * text
are made via this method.
+ *
+ *
This method assumes that future calls may be made that will
+ * insert new text into the buffer. As a result, it only performs
+ * unambiguous transliterations. After the last call to this method,
+ * there may be untransliterated text that is waiting for more input
+ * to resolve an ambiguity. In order to perform these pending
+ * transliterations, clients should call utrans_trans() with a start
+ * of index.start and a limit of index.end after the last call to this
+ * method has been made.
+ *
+ * @param trans the transliterator
+ * @param rep a pointer to the string. This will be passed to the
+ * repFunc functions.
+ * @param repFunc a set of function pointers that will be used to
+ * modify the string pointed to by rep.
+ * @param pos a struct containing the start and limit indices of the
+ * text to be read and the text to be transliterated
+ * @param status a pointer to the UErrorCode
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+utrans_transIncremental(const UTransliterator* trans,
+ UReplaceable* rep,
+ UReplaceableCallbacks* repFunc,
+ UTransPosition* pos,
+ UErrorCode* status);
+
+/**
+ * Transliterate a segment of a UChar* string. The string is passed
+ * in in a UChar* buffer. The string is modified in place. If the
+ * result is longer than textCapacity, it is truncated. The actual
+ * length of the result is returned in *textLength, if textLength is
+ * non-NULL. *textLength may be greater than textCapacity, but only
+ * textCapacity UChars will be written to *text, including the zero
+ * terminator.
+ *
+ * @param trans the transliterator
+ * @param text a pointer to a buffer containing the text to be
+ * transliterated on input and the result text on output.
+ * @param textLength a pointer to the length of the string in text.
+ * If the length is -1 then the string is assumed to be
+ * zero-terminated. Upon return, the new length is stored in
+ * *textLength. If textLength is NULL then the string is assumed to
+ * be zero-terminated.
+ * @param textCapacity a pointer to the length of the text buffer.
+ * Upon return,
+ * @param start the beginning index, inclusive; 0 <= start <=
+ * limit
.
+ * @param limit pointer to the ending index, exclusive; start <=
+ * limit <= repFunc->length(rep)
. Upon return, *limit will
+ * contain the new limit index. The text previously occupying
+ * [start, limit)
has been transliterated, possibly to a
+ * string of a different length, at [start,
+ *
new-limit)
, where new-limit
+ * is the return value.
+ * @param status a pointer to the UErrorCode
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+utrans_transUChars(const UTransliterator* trans,
+ UChar* text,
+ int32_t* textLength,
+ int32_t textCapacity,
+ int32_t start,
+ int32_t* limit,
+ UErrorCode* status);
+
+/**
+ * Transliterate the portion of the UChar* text buffer that can be
+ * transliterated unambiguosly. See utrans_transIncremental(). The
+ * string is passed in in a UChar* buffer. The string is modified in
+ * place. If the result is longer than textCapacity, it is truncated.
+ * The actual length of the result is returned in *textLength, if
+ * textLength is non-NULL. *textLength may be greater than
+ * textCapacity, but only textCapacity UChars will be written to
+ * *text, including the zero terminator. See utrans_transIncremental()
+ * for usage details.
+ *
+ * @param trans the transliterator
+ * @param text a pointer to a buffer containing the text to be
+ * transliterated on input and the result text on output.
+ * @param textLength a pointer to the length of the string in text.
+ * If the length is -1 then the string is assumed to be
+ * zero-terminated. Upon return, the new length is stored in
+ * *textLength. If textLength is NULL then the string is assumed to
+ * be zero-terminated.
+ * @param textCapacity the length of the text buffer
+ * @param pos a struct containing the start and limit indices of the
+ * text to be read and the text to be transliterated
+ * @param status a pointer to the UErrorCode
+ * @see utrans_transIncremental
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+utrans_transIncrementalUChars(const UTransliterator* trans,
+ UChar* text,
+ int32_t* textLength,
+ int32_t textCapacity,
+ UTransPosition* pos,
+ UErrorCode* status);
+
+/* deprecated API ----------------------------------------------------------- */
+
+#ifndef U_HIDE_DEPRECATED_API
+
+/* see utrans.h documentation for why these functions are deprecated */
+
+/**
+ * Deprecated, use utrans_openU() instead.
+ * Open a custom transliterator, given a custom rules string
+ * OR
+ * a system transliterator, given its ID.
+ * Any non-NULL result from this function should later be closed with
+ * utrans_close().
+ *
+ * @param id a valid ID, as returned by utrans_getAvailableID()
+ * @param dir the desired direction
+ * @param rules the transliterator rules. See the C++ header rbt.h
+ * for rules syntax. If NULL then a system transliterator matching
+ * the ID is returned.
+ * @param rulesLength the length of the rules, or -1 if the rules
+ * are zero-terminated.
+ * @param parseError a pointer to a UParseError struct to receive the
+ * details of any parsing errors. This parameter may be NULL if no
+ * parsing error details are desired.
+ * @param status a pointer to the UErrorCode
+ * @return a transliterator pointer that may be passed to other
+ * utrans_xxx() functions, or NULL if the open call fails.
+ * @deprecated ICU 2.8 Use utrans_openU() instead, see utrans.h
+ */
+U_DEPRECATED UTransliterator* U_EXPORT2
+utrans_open(const char* id,
+ UTransDirection dir,
+ const UChar* rules, /* may be Null */
+ int32_t rulesLength, /* -1 if null-terminated */
+ UParseError* parseError, /* may be Null */
+ UErrorCode* status);
+
+/**
+ * Deprecated, use utrans_getUnicodeID() instead.
+ * Return the programmatic identifier for this transliterator.
+ * If this identifier is passed to utrans_open(), it will open
+ * a transliterator equivalent to this one, if the ID has been
+ * registered.
+ * @param trans the transliterator to return the ID of.
+ * @param buf the buffer in which to receive the ID. This may be
+ * NULL, in which case no characters are copied.
+ * @param bufCapacity the capacity of the buffer. Ignored if buf is
+ * NULL.
+ * @return the actual length of the ID, not including
+ * zero-termination. This may be greater than bufCapacity.
+ * @deprecated ICU 2.8 Use utrans_getUnicodeID() instead, see utrans.h
+ */
+U_DEPRECATED int32_t U_EXPORT2
+utrans_getID(const UTransliterator* trans,
+ char* buf,
+ int32_t bufCapacity);
+
+/**
+ * Deprecated, use utrans_unregisterID() instead.
+ * Unregister a transliterator from the system. After this call the
+ * system will no longer recognize the given ID when passed to
+ * utrans_open(). If the id is invalid then nothing is done.
+ *
+ * @param id a zero-terminated ID
+ * @deprecated ICU 2.8 Use utrans_unregisterID() instead, see utrans.h
+ */
+U_DEPRECATED void U_EXPORT2
+utrans_unregister(const char* id);
+
+/**
+ * Deprecated, use utrans_openIDs() instead.
+ * Return the ID of the index-th system transliterator. The result
+ * is placed in the given buffer. If the given buffer is too small,
+ * the initial substring is copied to buf. The result in buf is
+ * always zero-terminated.
+ *
+ * @param index the number of the transliterator to return. Must
+ * satisfy 0 <= index < utrans_countAvailableIDs(). If index is out
+ * of range then it is treated as if it were 0.
+ * @param buf the buffer in which to receive the ID. This may be
+ * NULL, in which case no characters are copied.
+ * @param bufCapacity the capacity of the buffer. Ignored if buf is
+ * NULL.
+ * @return the actual length of the index-th ID, not including
+ * zero-termination. This may be greater than bufCapacity.
+ * @deprecated ICU 2.8 Use utrans_openIDs() instead, see utrans.h
+ */
+U_DEPRECATED int32_t U_EXPORT2
+utrans_getAvailableID(int32_t index,
+ char* buf,
+ int32_t bufCapacity);
+
+#endif /* U_HIDE_DEPRECATED_API */
+
+#endif /* #if !UCONFIG_NO_TRANSLITERATION */
+
+#endif