michael@0: /* michael@0: ********************************************************************** michael@0: * Copyright (C) 1999-2013, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: ********************************************************************** michael@0: * Date Name Description michael@0: * 11/17/99 aliu Creation. michael@0: ********************************************************************** michael@0: */ michael@0: #ifndef TRANSLIT_H michael@0: #define TRANSLIT_H michael@0: michael@0: #include "unicode/utypes.h" michael@0: michael@0: /** michael@0: * \file michael@0: * \brief C++ API: Tranforms text from one format to another. michael@0: */ michael@0: michael@0: #if !UCONFIG_NO_TRANSLITERATION michael@0: michael@0: #include "unicode/uobject.h" michael@0: #include "unicode/unistr.h" michael@0: #include "unicode/parseerr.h" michael@0: #include "unicode/utrans.h" // UTransPosition, UTransDirection michael@0: #include "unicode/strenum.h" michael@0: michael@0: U_NAMESPACE_BEGIN michael@0: michael@0: class UnicodeFilter; michael@0: class UnicodeSet; michael@0: class CompoundTransliterator; michael@0: class TransliteratorParser; michael@0: class NormalizationTransliterator; michael@0: class TransliteratorIDParser; michael@0: michael@0: /** michael@0: * michael@0: * Transliterator is an abstract class that michael@0: * transliterates text from one format to another. The most common michael@0: * kind of transliterator is a script, or alphabet, transliterator. michael@0: * For example, a Russian to Latin transliterator changes Russian text michael@0: * written in Cyrillic characters to phonetically equivalent Latin michael@0: * characters. It does not translate Russian to English! michael@0: * Transliteration, unlike translation, operates on characters, without michael@0: * reference to the meanings of words and sentences. michael@0: * michael@0: *

Although script conversion is its most common use, a michael@0: * transliterator can actually perform a more general class of tasks. michael@0: * In fact, Transliterator defines a very general API michael@0: * which specifies only that a segment of the input text is replaced michael@0: * by new text. The particulars of this conversion are determined michael@0: * entirely by subclasses of Transliterator. michael@0: * michael@0: *

Transliterators are stateless michael@0: * michael@0: *

Transliterator objects are stateless; they michael@0: * retain no information between calls to michael@0: * transliterate(). (However, this does not michael@0: * mean that threads may share transliterators without synchronizing michael@0: * them. Transliterators are not immutable, so they must be michael@0: * synchronized when shared between threads.) This might seem to michael@0: * limit the complexity of the transliteration operation. In michael@0: * practice, subclasses perform complex transliterations by delaying michael@0: * the replacement of text until it is known that no other michael@0: * replacements are possible. In other words, although the michael@0: * Transliterator objects are stateless, the source text michael@0: * itself embodies all the needed information, and delayed operation michael@0: * allows arbitrary complexity. michael@0: * michael@0: *

Batch transliteration michael@0: * michael@0: *

The simplest way to perform transliteration is all at once, on a michael@0: * string of existing text. This is referred to as batch michael@0: * transliteration. For example, given a string input michael@0: * and a transliterator t, the call michael@0: * michael@0: * \htmlonly

\endhtmlonlyString result = t.transliterate(input); michael@0: * \htmlonly
\endhtmlonly michael@0: * michael@0: * will transliterate it and return the result. Other methods allow michael@0: * the client to specify a substring to be transliterated and to use michael@0: * {@link Replaceable } objects instead of strings, in order to michael@0: * preserve out-of-band information (such as text styles). michael@0: * michael@0: *

Keyboard transliteration michael@0: * michael@0: *

Somewhat more involved is keyboard, or incremental michael@0: * transliteration. This is the transliteration of text that is michael@0: * arriving from some source (typically the user's keyboard) one michael@0: * character at a time, or in some other piecemeal fashion. michael@0: * michael@0: *

In keyboard transliteration, a Replaceable buffer michael@0: * stores the text. As text is inserted, as much as possible is michael@0: * transliterated on the fly. This means a GUI that displays the michael@0: * contents of the buffer may show text being modified as each new michael@0: * character arrives. michael@0: * michael@0: *

Consider the simple RuleBasedTransliterator: michael@0: * michael@0: * \htmlonly

\endhtmlonly michael@0: * th>{theta}
michael@0: * t>{tau} michael@0: *
\htmlonly
\endhtmlonly michael@0: * michael@0: * When the user types 't', nothing will happen, since the michael@0: * transliterator is waiting to see if the next character is 'h'. To michael@0: * remedy this, we introduce the notion of a cursor, marked by a '|' michael@0: * in the output string: michael@0: * michael@0: * \htmlonly
\endhtmlonly michael@0: * t>|{tau}
michael@0: * {tau}h>{theta} michael@0: *
\htmlonly
\endhtmlonly michael@0: * michael@0: * Now when the user types 't', tau appears, and if the next character michael@0: * is 'h', the tau changes to a theta. This is accomplished by michael@0: * maintaining a cursor position (independent of the insertion point, michael@0: * and invisible in the GUI) across calls to michael@0: * transliterate(). Typically, the cursor will michael@0: * be coincident with the insertion point, but in a case like the one michael@0: * above, it will precede the insertion point. michael@0: * michael@0: *

Keyboard transliteration methods maintain a set of three indices michael@0: * that are updated with each call to michael@0: * transliterate(), including the cursor, start, michael@0: * and limit. Since these indices are changed by the method, they are michael@0: * passed in an int[] array. The START index michael@0: * marks the beginning of the substring that the transliterator will michael@0: * look at. It is advanced as text becomes committed (but it is not michael@0: * the committed index; that's the CURSOR). The michael@0: * CURSOR index, described above, marks the point at michael@0: * which the transliterator last stopped, either because it reached michael@0: * the end, or because it required more characters to disambiguate michael@0: * between possible inputs. The CURSOR can also be michael@0: * explicitly set by rules in a RuleBasedTransliterator. michael@0: * Any characters before the CURSOR index are frozen; michael@0: * future keyboard transliteration calls within this input sequence michael@0: * will not change them. New text is inserted at the michael@0: * LIMIT index, which marks the end of the substring that michael@0: * the transliterator looks at. michael@0: * michael@0: *

Because keyboard transliteration assumes that more characters michael@0: * are to arrive, it is conservative in its operation. It only michael@0: * transliterates when it can do so unambiguously. Otherwise it waits michael@0: * for more characters to arrive. When the client code knows that no michael@0: * more characters are forthcoming, perhaps because the user has michael@0: * performed some input termination operation, then it should call michael@0: * finishTransliteration() to complete any michael@0: * pending transliterations. michael@0: * michael@0: *

Inverses michael@0: * michael@0: *

Pairs of transliterators may be inverses of one another. For michael@0: * example, if transliterator A transliterates characters by michael@0: * incrementing their Unicode value (so "abc" -> "def"), and michael@0: * transliterator B decrements character values, then A michael@0: * is an inverse of B and vice versa. If we compose A michael@0: * with B in a compound transliterator, the result is the michael@0: * indentity transliterator, that is, a transliterator that does not michael@0: * change its input text. michael@0: * michael@0: * The Transliterator method getInverse() michael@0: * returns a transliterator's inverse, if one exists, or michael@0: * null otherwise. However, the result of michael@0: * getInverse() usually will not be a true michael@0: * mathematical inverse. This is because true inverse transliterators michael@0: * are difficult to formulate. For example, consider two michael@0: * transliterators: AB, which transliterates the character 'A' michael@0: * to 'B', and BA, which transliterates 'B' to 'A'. It might michael@0: * seem that these are exact inverses, since michael@0: * michael@0: * \htmlonly

\endhtmlonly"A" x AB -> "B"
michael@0: * "B" x BA -> "A"\htmlonly
\endhtmlonly michael@0: * michael@0: * where 'x' represents transliteration. However, michael@0: * michael@0: * \htmlonly
\endhtmlonly"ABCD" x AB -> "BBCD"
michael@0: * "BBCD" x BA -> "AACD"\htmlonly
\endhtmlonly michael@0: * michael@0: * so AB composed with BA is not the michael@0: * identity. Nonetheless, BA may be usefully considered to be michael@0: * AB's inverse, and it is on this basis that michael@0: * AB.getInverse() could legitimately return michael@0: * BA. michael@0: * michael@0: *

IDs and display names michael@0: * michael@0: *

A transliterator is designated by a short identifier string or michael@0: * ID. IDs follow the format source-destination, michael@0: * where source describes the entity being replaced, and michael@0: * destination describes the entity replacing michael@0: * source. The entities may be the names of scripts, michael@0: * particular sequences of characters, or whatever else it is that the michael@0: * transliterator converts to or from. For example, a transliterator michael@0: * from Russian to Latin might be named "Russian-Latin". A michael@0: * transliterator from keyboard escape sequences to Latin-1 characters michael@0: * might be named "KeyboardEscape-Latin1". By convention, system michael@0: * entity names are in English, with the initial letters of words michael@0: * capitalized; user entity names may follow any format so long as michael@0: * they do not contain dashes. michael@0: * michael@0: *

In addition to programmatic IDs, transliterator objects have michael@0: * display names for presentation in user interfaces, returned by michael@0: * {@link #getDisplayName }. michael@0: * michael@0: *

Factory methods and registration michael@0: * michael@0: *

In general, client code should use the factory method michael@0: * {@link #createInstance } to obtain an instance of a michael@0: * transliterator given its ID. Valid IDs may be enumerated using michael@0: * getAvailableIDs(). Since transliterators are mutable, michael@0: * multiple calls to {@link #createInstance } with the same ID will michael@0: * return distinct objects. michael@0: * michael@0: *

In addition to the system transliterators registered at startup, michael@0: * user transliterators may be registered by calling michael@0: * registerInstance() at run time. A registered instance michael@0: * acts a template; future calls to {@link #createInstance } with the ID michael@0: * of the registered object return clones of that object. Thus any michael@0: * object passed to registerInstance() must implement michael@0: * clone() propertly. To register a transliterator subclass michael@0: * without instantiating it (until it is needed), users may call michael@0: * {@link #registerFactory }. In this case, the objects are michael@0: * instantiated by invoking the zero-argument public constructor of michael@0: * the class. michael@0: * michael@0: *

Subclassing michael@0: * michael@0: * Subclasses must implement the abstract method michael@0: * handleTransliterate().

Subclasses should override michael@0: * the transliterate() method taking a michael@0: * Replaceable and the transliterate() michael@0: * method taking a String and StringBuffer michael@0: * if the performance of these methods can be improved over the michael@0: * performance obtained by the default implementations in this class. michael@0: * michael@0: * @author Alan Liu michael@0: * @stable ICU 2.0 michael@0: */ michael@0: class U_I18N_API Transliterator : public UObject { michael@0: michael@0: private: michael@0: michael@0: /** michael@0: * Programmatic name, e.g., "Latin-Arabic". michael@0: */ michael@0: UnicodeString ID; michael@0: michael@0: /** michael@0: * This transliterator's filter. Any character for which michael@0: * filter.contains() returns false will not be michael@0: * altered by this transliterator. If filter is michael@0: * null then no filtering is applied. michael@0: */ michael@0: UnicodeFilter* filter; michael@0: michael@0: int32_t maximumContextLength; michael@0: michael@0: public: michael@0: michael@0: /** michael@0: * A context integer or pointer for a factory function, passed by michael@0: * value. michael@0: * @stable ICU 2.4 michael@0: */ michael@0: union Token { michael@0: /** michael@0: * This token, interpreted as a 32-bit integer. michael@0: * @stable ICU 2.4 michael@0: */ michael@0: int32_t integer; michael@0: /** michael@0: * This token, interpreted as a native pointer. michael@0: * @stable ICU 2.4 michael@0: */ michael@0: void* pointer; michael@0: }; michael@0: michael@0: #ifndef U_HIDE_INTERNAL_API michael@0: /** michael@0: * Return a token containing an integer. michael@0: * @return a token containing an integer. michael@0: * @internal michael@0: */ michael@0: inline static Token integerToken(int32_t); michael@0: michael@0: /** michael@0: * Return a token containing a pointer. michael@0: * @return a token containing a pointer. michael@0: * @internal michael@0: */ michael@0: inline static Token pointerToken(void*); michael@0: #endif /* U_HIDE_INTERNAL_API */ michael@0: michael@0: /** michael@0: * A function that creates and returns a Transliterator. When michael@0: * invoked, it will be passed the ID string that is being michael@0: * instantiated, together with the context pointer that was passed michael@0: * in when the factory function was first registered. Many michael@0: * factory functions will ignore both parameters, however, michael@0: * functions that are registered to more than one ID may use the michael@0: * ID or the context parameter to parameterize the transliterator michael@0: * they create. michael@0: * @param ID the string identifier for this transliterator michael@0: * @param context a context pointer that will be stored and michael@0: * later passed to the factory function when an ID matching michael@0: * the registration ID is being instantiated with this factory. michael@0: * @stable ICU 2.4 michael@0: */ michael@0: typedef Transliterator* (U_EXPORT2 *Factory)(const UnicodeString& ID, Token context); michael@0: michael@0: protected: michael@0: michael@0: /** michael@0: * Default constructor. michael@0: * @param ID the string identifier for this transliterator michael@0: * @param adoptedFilter the filter. Any character for which michael@0: * filter.contains() returns false will not be michael@0: * altered by this transliterator. If filter is michael@0: * null then no filtering is applied. michael@0: * @stable ICU 2.4 michael@0: */ michael@0: Transliterator(const UnicodeString& ID, UnicodeFilter* adoptedFilter); michael@0: michael@0: /** michael@0: * Copy constructor. michael@0: * @stable ICU 2.4 michael@0: */ michael@0: Transliterator(const Transliterator&); michael@0: michael@0: /** michael@0: * Assignment operator. michael@0: * @stable ICU 2.4 michael@0: */ michael@0: Transliterator& operator=(const Transliterator&); michael@0: michael@0: /** michael@0: * Create a transliterator from a basic ID. This is an ID michael@0: * containing only the forward direction source, target, and michael@0: * variant. michael@0: * @param id a basic ID of the form S-T or S-T/V. michael@0: * @param canon canonical ID to assign to the object, or michael@0: * NULL to leave the ID unchanged michael@0: * @return a newly created Transliterator or null if the ID is michael@0: * invalid. michael@0: * @stable ICU 2.4 michael@0: */ michael@0: static Transliterator* createBasicInstance(const UnicodeString& id, michael@0: const UnicodeString* canon); michael@0: michael@0: friend class TransliteratorParser; // for parseID() michael@0: friend class TransliteratorIDParser; // for createBasicInstance() michael@0: friend class TransliteratorAlias; // for setID() michael@0: michael@0: public: michael@0: michael@0: /** michael@0: * Destructor. michael@0: * @stable ICU 2.0 michael@0: */ michael@0: virtual ~Transliterator(); michael@0: michael@0: /** michael@0: * Implements Cloneable. michael@0: * All subclasses are encouraged to implement this method if it is michael@0: * possible and reasonable to do so. Subclasses that are to be michael@0: * registered with the system using registerInstance() michael@0: * are required to implement this method. If a subclass does not michael@0: * implement clone() properly and is registered with the system michael@0: * using registerInstance(), then the default clone() implementation michael@0: * will return null, and calls to createInstance() will fail. michael@0: * michael@0: * @return a copy of the object. michael@0: * @see #registerInstance michael@0: * @stable ICU 2.0 michael@0: */ michael@0: virtual Transliterator* clone() const; michael@0: michael@0: /** michael@0: * Transliterates a segment of a string, with optional filtering. michael@0: * michael@0: * @param text the string to be transliterated michael@0: * @param start the beginning index, inclusive; 0 <= start michael@0: * <= limit. michael@0: * @param limit the ending index, exclusive; start <= limit michael@0: * <= text.length(). michael@0: * @return The new limit index. The text previously occupying [start, michael@0: * limit) has been transliterated, possibly to a string of a different michael@0: * length, at [start, new-limit), where michael@0: * new-limit is the return value. If the input offsets are out of bounds, michael@0: * the returned value is -1 and the input string remains unchanged. michael@0: * @stable ICU 2.0 michael@0: */ michael@0: virtual int32_t transliterate(Replaceable& text, michael@0: int32_t start, int32_t limit) const; michael@0: michael@0: /** michael@0: * Transliterates an entire string in place. Convenience method. michael@0: * @param text the string to be transliterated michael@0: * @stable ICU 2.0 michael@0: */ michael@0: virtual void transliterate(Replaceable& text) const; michael@0: michael@0: /** michael@0: * Transliterates the portion of the text buffer that can be michael@0: * transliterated unambiguosly after new text has been inserted, michael@0: * typically as a result of a keyboard event. The new text in michael@0: * insertion will be inserted into text michael@0: * at index.limit, advancing michael@0: * index.limit by insertion.length(). michael@0: * Then the transliterator will try to transliterate characters of michael@0: * text between index.cursor and michael@0: * index.limit. Characters before michael@0: * index.cursor will not be changed. michael@0: * michael@0: *

Upon return, values in index will be updated. michael@0: * index.start will be advanced to the first michael@0: * character that future calls to this method will read. michael@0: * index.cursor and index.limit will michael@0: * be adjusted to delimit the range of text that future calls to michael@0: * this method may change. michael@0: * michael@0: *

Typical usage of this method begins with an initial call michael@0: * with index.start and index.limit michael@0: * set to indicate the portion of text to be michael@0: * transliterated, and index.cursor == index.start. michael@0: * Thereafter, index can be used without michael@0: * modification in future calls, provided that all changes to michael@0: * text are made via this method. michael@0: * michael@0: *

This method assumes that future calls may be made that will michael@0: * insert new text into the buffer. As a result, it only performs michael@0: * unambiguous transliterations. After the last call to this michael@0: * method, there may be untransliterated text that is waiting for michael@0: * more input to resolve an ambiguity. In order to perform these michael@0: * pending transliterations, clients should call {@link michael@0: * #finishTransliteration } after the last call to this michael@0: * method has been made. michael@0: * michael@0: * @param text the buffer holding transliterated and untransliterated text michael@0: * @param index an array of three integers. michael@0: * michael@0: *

michael@0: * michael@0: * @param insertion text to be inserted and possibly michael@0: * transliterated into the translation buffer at michael@0: * index.limit. If null then no text michael@0: * is inserted. michael@0: * @param status Output param to filled in with a success or an error. michael@0: * @see #handleTransliterate michael@0: * @exception IllegalArgumentException if index michael@0: * is invalid michael@0: * @see UTransPosition michael@0: * @stable ICU 2.0 michael@0: */ michael@0: virtual void transliterate(Replaceable& text, UTransPosition& index, michael@0: const UnicodeString& insertion, michael@0: UErrorCode& status) const; michael@0: michael@0: /** michael@0: * Transliterates the portion of the text buffer that can be michael@0: * transliterated unambiguosly after a new character has been michael@0: * inserted, typically as a result of a keyboard event. This is a michael@0: * convenience method. michael@0: * @param text the buffer holding transliterated and michael@0: * untransliterated text michael@0: * @param index an array of three integers. michael@0: * @param insertion text to be inserted and possibly michael@0: * transliterated into the translation buffer at michael@0: * index.limit. michael@0: * @param status Output param to filled in with a success or an error. michael@0: * @see #transliterate(Replaceable&, UTransPosition&, const UnicodeString&, UErrorCode&) const michael@0: * @stable ICU 2.0 michael@0: */ michael@0: virtual void transliterate(Replaceable& text, UTransPosition& index, michael@0: UChar32 insertion, michael@0: UErrorCode& status) const; michael@0: michael@0: /** michael@0: * Transliterates the portion of the text buffer that can be michael@0: * transliterated unambiguosly. This is a convenience method; see michael@0: * {@link michael@0: * #transliterate(Replaceable&, UTransPosition&, const UnicodeString&, UErrorCode&) const } michael@0: * for details. michael@0: * @param text the buffer holding transliterated and michael@0: * untransliterated text michael@0: * @param index an array of three integers. See {@link #transliterate(Replaceable&, UTransPosition&, const UnicodeString*, UErrorCode&) const }. michael@0: * @param status Output param to filled in with a success or an error. michael@0: * @see #transliterate(Replaceable, int[], String) michael@0: * @stable ICU 2.0 michael@0: */ michael@0: virtual void transliterate(Replaceable& text, UTransPosition& index, michael@0: UErrorCode& status) const; michael@0: michael@0: /** michael@0: * Finishes any pending transliterations that were waiting for michael@0: * more characters. Clients should call this method as the last michael@0: * call after a sequence of one or more calls to michael@0: * transliterate(). michael@0: * @param text the buffer holding transliterated and michael@0: * untransliterated text. michael@0: * @param index the array of indices previously passed to {@link michael@0: * #transliterate } michael@0: * @stable ICU 2.0 michael@0: */ michael@0: virtual void finishTransliteration(Replaceable& text, michael@0: UTransPosition& index) const; michael@0: michael@0: private: michael@0: michael@0: /** michael@0: * This internal method does incremental transliteration. If the michael@0: * 'insertion' is non-null then we append it to 'text' before michael@0: * proceeding. This method calls through to the pure virtual michael@0: * framework method handleTransliterate() to do the actual michael@0: * work. michael@0: * @param text the buffer holding transliterated and michael@0: * untransliterated text michael@0: * @param index an array of three integers. See {@link michael@0: * #transliterate(Replaceable, int[], String)}. michael@0: * @param insertion text to be inserted and possibly michael@0: * transliterated into the translation buffer at michael@0: * index.limit. michael@0: * @param status Output param to filled in with a success or an error. michael@0: */ michael@0: void _transliterate(Replaceable& text, michael@0: UTransPosition& index, michael@0: const UnicodeString* insertion, michael@0: UErrorCode &status) const; michael@0: michael@0: protected: michael@0: michael@0: /** michael@0: * Abstract method that concrete subclasses define to implement michael@0: * their transliteration algorithm. This method handles both michael@0: * incremental and non-incremental transliteration. Let michael@0: * originalStart refer to the value of michael@0: * pos.start upon entry. michael@0: * michael@0: * michael@0: * michael@0: *

Implementations of this method should also obey the michael@0: * following invariants:

michael@0: * michael@0: * michael@0: * michael@0: *

Subclasses may safely assume that all characters in michael@0: * [pos.start, pos.limit) are filtered. michael@0: * In other words, the filter has already been applied by the time michael@0: * this method is called. See michael@0: * filteredTransliterate(). michael@0: * michael@0: *

This method is not for public consumption. Calling michael@0: * this method directly will transliterate michael@0: * [pos.start, pos.limit) without michael@0: * applying the filter. End user code should call michael@0: * transliterate() instead of this method. Subclass code michael@0: * and wrapping transliterators should call michael@0: * filteredTransliterate() instead of this method.

michael@0: * michael@0: * @param text the buffer holding transliterated and michael@0: * untransliterated text michael@0: * michael@0: * @param pos the indices indicating the start, limit, context michael@0: * start, and context limit of the text. michael@0: * michael@0: * @param incremental if true, assume more text may be inserted at michael@0: * pos.limit and act accordingly. Otherwise, michael@0: * transliterate all text between pos.start and michael@0: * pos.limit and move pos.start up to michael@0: * pos.limit. michael@0: * michael@0: * @see #transliterate michael@0: * @stable ICU 2.4 michael@0: */ michael@0: virtual void handleTransliterate(Replaceable& text, michael@0: UTransPosition& pos, michael@0: UBool incremental) const = 0; michael@0: michael@0: public: michael@0: /** michael@0: * Transliterate a substring of text, as specified by index, taking filters michael@0: * into account. This method is for subclasses that need to delegate to michael@0: * another transliterator, such as CompoundTransliterator. michael@0: * @param text the text to be transliterated michael@0: * @param index the position indices michael@0: * @param incremental if TRUE, then assume more characters may be inserted michael@0: * at index.limit, and postpone processing to accomodate future incoming michael@0: * characters michael@0: * @stable ICU 2.4 michael@0: */ michael@0: virtual void filteredTransliterate(Replaceable& text, michael@0: UTransPosition& index, michael@0: UBool incremental) const; michael@0: michael@0: private: michael@0: michael@0: /** michael@0: * Top-level transliteration method, handling filtering, incremental and michael@0: * non-incremental transliteration, and rollback. All transliteration michael@0: * public API methods eventually call this method with a rollback argument michael@0: * of TRUE. Other entities may call this method but rollback should be michael@0: * FALSE. michael@0: * michael@0: *

If this transliterator has a filter, break up the input text into runs michael@0: * of unfiltered characters. Pass each run to michael@0: * subclass.handleTransliterate(). michael@0: * michael@0: *

In incremental mode, if rollback is TRUE, perform a special michael@0: * incremental procedure in which several passes are made over the input michael@0: * text, adding one character at a time, and committing successful michael@0: * transliterations as they occur. Unsuccessful transliterations are rolled michael@0: * back and retried with additional characters to give correct results. michael@0: * michael@0: * @param text the text to be transliterated michael@0: * @param index the position indices michael@0: * @param incremental if TRUE, then assume more characters may be inserted michael@0: * at index.limit, and postpone processing to accomodate future incoming michael@0: * characters michael@0: * @param rollback if TRUE and if incremental is TRUE, then perform special michael@0: * incremental processing, as described above, and undo partial michael@0: * transliterations where necessary. If incremental is FALSE then this michael@0: * parameter is ignored. michael@0: */ michael@0: virtual void filteredTransliterate(Replaceable& text, michael@0: UTransPosition& index, michael@0: UBool incremental, michael@0: UBool rollback) const; michael@0: michael@0: public: michael@0: michael@0: /** michael@0: * Returns the length of the longest context required by this transliterator. michael@0: * This is preceding context. The default implementation supplied michael@0: * by Transliterator returns zero; subclasses michael@0: * that use preceding context should override this method to return the michael@0: * correct value. For example, if a transliterator translates "ddd" (where michael@0: * d is any digit) to "555" when preceded by "(ddd)", then the preceding michael@0: * context length is 5, the length of "(ddd)". michael@0: * michael@0: * @return The maximum number of preceding context characters this michael@0: * transliterator needs to examine michael@0: * @stable ICU 2.0 michael@0: */ michael@0: int32_t getMaximumContextLength(void) const; michael@0: michael@0: protected: michael@0: michael@0: /** michael@0: * Method for subclasses to use to set the maximum context length. michael@0: * @param maxContextLength the new value to be set. michael@0: * @see #getMaximumContextLength michael@0: * @stable ICU 2.4 michael@0: */ michael@0: void setMaximumContextLength(int32_t maxContextLength); michael@0: michael@0: public: michael@0: michael@0: /** michael@0: * Returns a programmatic identifier for this transliterator. michael@0: * If this identifier is passed to createInstance(), it michael@0: * will return this object, if it has been registered. michael@0: * @return a programmatic identifier for this transliterator. michael@0: * @see #registerInstance michael@0: * @see #registerFactory michael@0: * @see #getAvailableIDs michael@0: * @stable ICU 2.0 michael@0: */ michael@0: virtual const UnicodeString& getID(void) const; michael@0: michael@0: /** michael@0: * Returns a name for this transliterator that is appropriate for michael@0: * display to the user in the default locale. See {@link michael@0: * #getDisplayName } for details. michael@0: * @param ID the string identifier for this transliterator michael@0: * @param result Output param to receive the display name michael@0: * @return A reference to 'result'. michael@0: * @stable ICU 2.0 michael@0: */ michael@0: static UnicodeString& U_EXPORT2 getDisplayName(const UnicodeString& ID, michael@0: UnicodeString& result); michael@0: michael@0: /** michael@0: * Returns a name for this transliterator that is appropriate for michael@0: * display to the user in the given locale. This name is taken michael@0: * from the locale resource data in the standard manner of the michael@0: * java.text package. michael@0: * michael@0: *

If no localized names exist in the system resource bundles, michael@0: * a name is synthesized using a localized michael@0: * MessageFormat pattern from the resource data. The michael@0: * arguments to this pattern are an integer followed by one or two michael@0: * strings. The integer is the number of strings, either 1 or 2. michael@0: * The strings are formed by splitting the ID for this michael@0: * transliterator at the first '-'. If there is no '-', then the michael@0: * entire ID forms the only string. michael@0: * @param ID the string identifier for this transliterator michael@0: * @param inLocale the Locale in which the display name should be michael@0: * localized. michael@0: * @param result Output param to receive the display name michael@0: * @return A reference to 'result'. michael@0: * @stable ICU 2.0 michael@0: */ michael@0: static UnicodeString& U_EXPORT2 getDisplayName(const UnicodeString& ID, michael@0: const Locale& inLocale, michael@0: UnicodeString& result); michael@0: michael@0: /** michael@0: * Returns the filter used by this transliterator, or NULL michael@0: * if this transliterator uses no filter. michael@0: * @return the filter used by this transliterator, or NULL michael@0: * if this transliterator uses no filter. michael@0: * @stable ICU 2.0 michael@0: */ michael@0: const UnicodeFilter* getFilter(void) const; michael@0: michael@0: /** michael@0: * Returns the filter used by this transliterator, or NULL if this michael@0: * transliterator uses no filter. The caller must eventually delete the michael@0: * result. After this call, this transliterator's filter is set to michael@0: * NULL. michael@0: * @return the filter used by this transliterator, or NULL if this michael@0: * transliterator uses no filter. michael@0: * @stable ICU 2.4 michael@0: */ michael@0: UnicodeFilter* orphanFilter(void); michael@0: michael@0: /** michael@0: * Changes the filter used by this transliterator. If the filter michael@0: * is set to null then no filtering will occur. michael@0: * michael@0: *

Callers must take care if a transliterator is in use by michael@0: * multiple threads. The filter should not be changed by one michael@0: * thread while another thread may be transliterating. michael@0: * @param adoptedFilter the new filter to be adopted. michael@0: * @stable ICU 2.0 michael@0: */ michael@0: void adoptFilter(UnicodeFilter* adoptedFilter); michael@0: michael@0: /** michael@0: * Returns this transliterator's inverse. See the class michael@0: * documentation for details. This implementation simply inverts michael@0: * the two entities in the ID and attempts to retrieve the michael@0: * resulting transliterator. That is, if getID() michael@0: * returns "A-B", then this method will return the result of michael@0: * createInstance("B-A"), or null if that michael@0: * call fails. michael@0: * michael@0: *

Subclasses with knowledge of their inverse may wish to michael@0: * override this method. michael@0: * michael@0: * @param status Output param to filled in with a success or an error. michael@0: * @return a transliterator that is an inverse, not necessarily michael@0: * exact, of this transliterator, or null if no such michael@0: * transliterator is registered. michael@0: * @see #registerInstance michael@0: * @stable ICU 2.0 michael@0: */ michael@0: Transliterator* createInverse(UErrorCode& status) const; michael@0: michael@0: /** michael@0: * Returns a Transliterator object given its ID. michael@0: * The ID must be either a system transliterator ID or a ID registered michael@0: * using registerInstance(). michael@0: * michael@0: * @param ID a valid ID, as enumerated by getAvailableIDs() michael@0: * @param dir either FORWARD or REVERSE. michael@0: * @param parseError Struct to recieve information on position michael@0: * of error if an error is encountered michael@0: * @param status Output param to filled in with a success or an error. michael@0: * @return A Transliterator object with the given ID michael@0: * @see #registerInstance michael@0: * @see #getAvailableIDs michael@0: * @see #getID michael@0: * @stable ICU 2.0 michael@0: */ michael@0: static Transliterator* U_EXPORT2 createInstance(const UnicodeString& ID, michael@0: UTransDirection dir, michael@0: UParseError& parseError, michael@0: UErrorCode& status); michael@0: michael@0: /** michael@0: * Returns a Transliterator object given its ID. michael@0: * The ID must be either a system transliterator ID or a ID registered michael@0: * using registerInstance(). michael@0: * @param ID a valid ID, as enumerated by getAvailableIDs() michael@0: * @param dir either FORWARD or REVERSE. michael@0: * @param status Output param to filled in with a success or an error. michael@0: * @return A Transliterator object with the given ID michael@0: * @stable ICU 2.0 michael@0: */ michael@0: static Transliterator* U_EXPORT2 createInstance(const UnicodeString& ID, michael@0: UTransDirection dir, michael@0: UErrorCode& status); michael@0: michael@0: /** michael@0: * Returns a Transliterator object constructed from michael@0: * the given rule string. This will be a RuleBasedTransliterator, michael@0: * if the rule string contains only rules, or a michael@0: * CompoundTransliterator, if it contains ID blocks, or a michael@0: * NullTransliterator, if it contains ID blocks which parse as michael@0: * empty for the given direction. michael@0: * @param ID the id for the transliterator. michael@0: * @param rules rules, separated by ';' michael@0: * @param dir either FORWARD or REVERSE. michael@0: * @param parseError Struct to recieve information on position michael@0: * of error if an error is encountered michael@0: * @param status Output param set to success/failure code. michael@0: * @stable ICU 2.0 michael@0: */ michael@0: static Transliterator* U_EXPORT2 createFromRules(const UnicodeString& ID, michael@0: const UnicodeString& rules, michael@0: UTransDirection dir, michael@0: UParseError& parseError, michael@0: UErrorCode& status); michael@0: michael@0: /** michael@0: * Create a rule string that can be passed to createFromRules() michael@0: * to recreate this transliterator. michael@0: * @param result the string to receive the rules. Previous michael@0: * contents will be deleted. michael@0: * @param escapeUnprintable if TRUE then convert unprintable michael@0: * character to their hex escape representations, \\uxxxx or michael@0: * \\Uxxxxxxxx. Unprintable characters are those other than michael@0: * U+000A, U+0020..U+007E. michael@0: * @stable ICU 2.0 michael@0: */ michael@0: virtual UnicodeString& toRules(UnicodeString& result, michael@0: UBool escapeUnprintable) const; michael@0: michael@0: /** michael@0: * Return the number of elements that make up this transliterator. michael@0: * For example, if the transliterator "NFD;Jamo-Latin;Latin-Greek" michael@0: * were created, the return value of this method would be 3. michael@0: * michael@0: *

If this transliterator is not composed of other michael@0: * transliterators, then this method returns 1. michael@0: * @return the number of transliterators that compose this michael@0: * transliterator, or 1 if this transliterator is not composed of michael@0: * multiple transliterators michael@0: * @stable ICU 3.0 michael@0: */ michael@0: int32_t countElements() const; michael@0: michael@0: /** michael@0: * Return an element that makes up this transliterator. For michael@0: * example, if the transliterator "NFD;Jamo-Latin;Latin-Greek" michael@0: * were created, the return value of this method would be one michael@0: * of the three transliterator objects that make up that michael@0: * transliterator: [NFD, Jamo-Latin, Latin-Greek]. michael@0: * michael@0: *

If this transliterator is not composed of other michael@0: * transliterators, then this method will return a reference to michael@0: * this transliterator when given the index 0. michael@0: * @param index a value from 0..countElements()-1 indicating the michael@0: * transliterator to return michael@0: * @param ec input-output error code michael@0: * @return one of the transliterators that makes up this michael@0: * transliterator, if this transliterator is made up of multiple michael@0: * transliterators, otherwise a reference to this object if given michael@0: * an index of 0 michael@0: * @stable ICU 3.0 michael@0: */ michael@0: const Transliterator& getElement(int32_t index, UErrorCode& ec) const; michael@0: michael@0: /** michael@0: * Returns the set of all characters that may be modified in the michael@0: * input text by this Transliterator. This incorporates this michael@0: * object's current filter; if the filter is changed, the return michael@0: * value of this function will change. The default implementation michael@0: * returns an empty set. Some subclasses may override {@link michael@0: * #handleGetSourceSet } to return a more precise result. The michael@0: * return result is approximate in any case and is intended for michael@0: * use by tests, tools, or utilities. michael@0: * @param result receives result set; previous contents lost michael@0: * @return a reference to result michael@0: * @see #getTargetSet michael@0: * @see #handleGetSourceSet michael@0: * @stable ICU 2.4 michael@0: */ michael@0: UnicodeSet& getSourceSet(UnicodeSet& result) const; michael@0: michael@0: /** michael@0: * Framework method that returns the set of all characters that michael@0: * may be modified in the input text by this Transliterator, michael@0: * ignoring the effect of this object's filter. The base class michael@0: * implementation returns the empty set. Subclasses that wish to michael@0: * implement this should override this method. michael@0: * @return the set of characters that this transliterator may michael@0: * modify. The set may be modified, so subclasses should return a michael@0: * newly-created object. michael@0: * @param result receives result set; previous contents lost michael@0: * @see #getSourceSet michael@0: * @see #getTargetSet michael@0: * @stable ICU 2.4 michael@0: */ michael@0: virtual void handleGetSourceSet(UnicodeSet& result) const; michael@0: michael@0: /** michael@0: * Returns the set of all characters that may be generated as michael@0: * replacement text by this transliterator. The default michael@0: * implementation returns the empty set. Some subclasses may michael@0: * override this method to return a more precise result. The michael@0: * return result is approximate in any case and is intended for michael@0: * use by tests, tools, or utilities requiring such michael@0: * meta-information. michael@0: * @param result receives result set; previous contents lost michael@0: * @return a reference to result michael@0: * @see #getTargetSet michael@0: * @stable ICU 2.4 michael@0: */ michael@0: virtual UnicodeSet& getTargetSet(UnicodeSet& result) const; michael@0: michael@0: public: michael@0: michael@0: /** michael@0: * Registers a factory function that creates transliterators of michael@0: * a given ID. michael@0: * @param id the ID being registered michael@0: * @param factory a function pointer that will be copied and michael@0: * called later when the given ID is passed to createInstance() michael@0: * @param context a context pointer that will be stored and michael@0: * later passed to the factory function when an ID matching michael@0: * the registration ID is being instantiated with this factory. michael@0: * @stable ICU 2.0 michael@0: */ michael@0: static void U_EXPORT2 registerFactory(const UnicodeString& id, michael@0: Factory factory, michael@0: Token context); michael@0: michael@0: /** michael@0: * Registers an instance obj of a subclass of michael@0: * Transliterator with the system. When michael@0: * createInstance() is called with an ID string that is michael@0: * equal to obj->getID(), then obj->clone() is michael@0: * returned. michael@0: * michael@0: * After this call the Transliterator class owns the adoptedObj michael@0: * and will delete it. michael@0: * michael@0: * @param adoptedObj an instance of subclass of michael@0: * Transliterator that defines clone() michael@0: * @see #createInstance michael@0: * @see #registerFactory michael@0: * @see #unregister michael@0: * @stable ICU 2.0 michael@0: */ michael@0: static void U_EXPORT2 registerInstance(Transliterator* adoptedObj); michael@0: michael@0: /** michael@0: * Registers an ID string as an alias of another ID string. michael@0: * That is, after calling this function, createInstance(aliasID) michael@0: * will return the same thing as createInstance(realID). michael@0: * This is generally used to create shorter, more mnemonic aliases michael@0: * for long compound IDs. michael@0: * michael@0: * @param aliasID The new ID being registered. michael@0: * @param realID The ID that the new ID is to be an alias for. michael@0: * This can be a compound ID and can include filters and should michael@0: * refer to transliterators that have already been registered with michael@0: * the framework, although this isn't checked. michael@0: * @stable ICU 3.6 michael@0: */ michael@0: static void U_EXPORT2 registerAlias(const UnicodeString& aliasID, michael@0: const UnicodeString& realID); michael@0: michael@0: protected: michael@0: michael@0: #ifndef U_HIDE_INTERNAL_API michael@0: /** michael@0: * @internal michael@0: * @param id the ID being registered michael@0: * @param factory a function pointer that will be copied and michael@0: * called later when the given ID is passed to createInstance() michael@0: * @param context a context pointer that will be stored and michael@0: * later passed to the factory function when an ID matching michael@0: * the registration ID is being instantiated with this factory. michael@0: */ michael@0: static void _registerFactory(const UnicodeString& id, michael@0: Factory factory, michael@0: Token context); michael@0: michael@0: /** michael@0: * @internal michael@0: */ michael@0: static void _registerInstance(Transliterator* adoptedObj); michael@0: michael@0: /** michael@0: * @internal michael@0: */ michael@0: static void _registerAlias(const UnicodeString& aliasID, const UnicodeString& realID); michael@0: michael@0: /** michael@0: * Register two targets as being inverses of one another. For michael@0: * example, calling registerSpecialInverse("NFC", "NFD", true) causes michael@0: * Transliterator to form the following inverse relationships: michael@0: * michael@0: *

NFC => NFD
michael@0:      * Any-NFC => Any-NFD
michael@0:      * NFD => NFC
michael@0:      * Any-NFD => Any-NFC
michael@0: * michael@0: * (Without the special inverse registration, the inverse of NFC michael@0: * would be NFC-Any.) Note that NFD is shorthand for Any-NFD, but michael@0: * that the presence or absence of "Any-" is preserved. michael@0: * michael@0: *

The relationship is symmetrical; registering (a, b) is michael@0: * equivalent to registering (b, a). michael@0: * michael@0: *

The relevant IDs must still be registered separately as michael@0: * factories or classes. michael@0: * michael@0: *

Only the targets are specified. Special inverses always michael@0: * have the form Any-Target1 <=> Any-Target2. The target should michael@0: * have canonical casing (the casing desired to be produced when michael@0: * an inverse is formed) and should contain no whitespace or other michael@0: * extraneous characters. michael@0: * michael@0: * @param target the target against which to register the inverse michael@0: * @param inverseTarget the inverse of target, that is michael@0: * Any-target.getInverse() => Any-inverseTarget michael@0: * @param bidirectional if true, register the reverse relation michael@0: * as well, that is, Any-inverseTarget.getInverse() => Any-target michael@0: * @internal michael@0: */ michael@0: static void _registerSpecialInverse(const UnicodeString& target, michael@0: const UnicodeString& inverseTarget, michael@0: UBool bidirectional); michael@0: #endif /* U_HIDE_INTERNAL_API */ michael@0: michael@0: public: michael@0: michael@0: /** michael@0: * Unregisters a transliterator or class. This may be either michael@0: * a system transliterator or a user transliterator or class. michael@0: * Any attempt to construct an unregistered transliterator based michael@0: * on its ID will fail. michael@0: * michael@0: * @param ID the ID of the transliterator or class michael@0: * @return the Object that was registered with michael@0: * ID, or null if none was michael@0: * @see #registerInstance michael@0: * @see #registerFactory michael@0: * @stable ICU 2.0 michael@0: */ michael@0: static void U_EXPORT2 unregister(const UnicodeString& ID); michael@0: michael@0: public: michael@0: michael@0: /** michael@0: * Return a StringEnumeration over the IDs available at the time of the michael@0: * call, including user-registered IDs. michael@0: * @param ec input-output error code michael@0: * @return a newly-created StringEnumeration over the transliterators michael@0: * available at the time of the call. The caller should delete this object michael@0: * when done using it. michael@0: * @stable ICU 3.0 michael@0: */ michael@0: static StringEnumeration* U_EXPORT2 getAvailableIDs(UErrorCode& ec); michael@0: michael@0: /** michael@0: * Return the number of registered source specifiers. michael@0: * @return the number of registered source specifiers. michael@0: * @stable ICU 2.0 michael@0: */ michael@0: static int32_t U_EXPORT2 countAvailableSources(void); michael@0: michael@0: /** michael@0: * Return a registered source specifier. michael@0: * @param index which specifier to return, from 0 to n-1, where michael@0: * n = countAvailableSources() michael@0: * @param result fill-in paramter to receive the source specifier. michael@0: * If index is out of range, result will be empty. michael@0: * @return reference to result michael@0: * @stable ICU 2.0 michael@0: */ michael@0: static UnicodeString& U_EXPORT2 getAvailableSource(int32_t index, michael@0: UnicodeString& result); michael@0: michael@0: /** michael@0: * Return the number of registered target specifiers for a given michael@0: * source specifier. michael@0: * @param source the given source specifier. michael@0: * @return the number of registered target specifiers for a given michael@0: * source specifier. michael@0: * @stable ICU 2.0 michael@0: */ michael@0: static int32_t U_EXPORT2 countAvailableTargets(const UnicodeString& source); michael@0: michael@0: /** michael@0: * Return a registered target specifier for a given source. michael@0: * @param index which specifier to return, from 0 to n-1, where michael@0: * n = countAvailableTargets(source) michael@0: * @param source the source specifier michael@0: * @param result fill-in paramter to receive the target specifier. michael@0: * If source is invalid or if index is out of range, result will michael@0: * be empty. michael@0: * @return reference to result michael@0: * @stable ICU 2.0 michael@0: */ michael@0: static UnicodeString& U_EXPORT2 getAvailableTarget(int32_t index, michael@0: const UnicodeString& source, michael@0: UnicodeString& result); michael@0: michael@0: /** michael@0: * Return the number of registered variant specifiers for a given michael@0: * source-target pair. michael@0: * @param source the source specifiers. michael@0: * @param target the target specifiers. michael@0: * @stable ICU 2.0 michael@0: */ michael@0: static int32_t U_EXPORT2 countAvailableVariants(const UnicodeString& source, michael@0: const UnicodeString& target); michael@0: michael@0: /** michael@0: * Return a registered variant specifier for a given source-target michael@0: * pair. michael@0: * @param index which specifier to return, from 0 to n-1, where michael@0: * n = countAvailableVariants(source, target) michael@0: * @param source the source specifier michael@0: * @param target the target specifier michael@0: * @param result fill-in paramter to receive the variant michael@0: * specifier. If source is invalid or if target is invalid or if michael@0: * index is out of range, result will be empty. michael@0: * @return reference to result michael@0: * @stable ICU 2.0 michael@0: */ michael@0: static UnicodeString& U_EXPORT2 getAvailableVariant(int32_t index, michael@0: const UnicodeString& source, michael@0: const UnicodeString& target, michael@0: UnicodeString& result); michael@0: michael@0: protected: michael@0: michael@0: #ifndef U_HIDE_INTERNAL_API michael@0: /** michael@0: * Non-mutexed internal method michael@0: * @internal michael@0: */ michael@0: static int32_t _countAvailableSources(void); michael@0: michael@0: /** michael@0: * Non-mutexed internal method michael@0: * @internal michael@0: */ michael@0: static UnicodeString& _getAvailableSource(int32_t index, michael@0: UnicodeString& result); michael@0: michael@0: /** michael@0: * Non-mutexed internal method michael@0: * @internal michael@0: */ michael@0: static int32_t _countAvailableTargets(const UnicodeString& source); michael@0: michael@0: /** michael@0: * Non-mutexed internal method michael@0: * @internal michael@0: */ michael@0: static UnicodeString& _getAvailableTarget(int32_t index, michael@0: const UnicodeString& source, michael@0: UnicodeString& result); michael@0: michael@0: /** michael@0: * Non-mutexed internal method michael@0: * @internal michael@0: */ michael@0: static int32_t _countAvailableVariants(const UnicodeString& source, michael@0: const UnicodeString& target); michael@0: michael@0: /** michael@0: * Non-mutexed internal method michael@0: * @internal michael@0: */ michael@0: static UnicodeString& _getAvailableVariant(int32_t index, michael@0: const UnicodeString& source, michael@0: const UnicodeString& target, michael@0: UnicodeString& result); michael@0: #endif /* U_HIDE_INTERNAL_API */ michael@0: michael@0: protected: michael@0: michael@0: /** michael@0: * Set the ID of this transliterators. Subclasses shouldn't do michael@0: * this, unless the underlying script behavior has changed. michael@0: * @param id the new id t to be set. michael@0: * @stable ICU 2.4 michael@0: */ michael@0: void setID(const UnicodeString& id); michael@0: michael@0: public: michael@0: michael@0: /** michael@0: * Return the class ID for this class. This is useful only for michael@0: * comparing to a return value from getDynamicClassID(). michael@0: * Note that Transliterator is an abstract base class, and therefor michael@0: * no fully constructed object will have a dynamic michael@0: * UCLassID that equals the UClassID returned from michael@0: * TRansliterator::getStaticClassID(). michael@0: * @return The class ID for class Transliterator. michael@0: * @stable ICU 2.0 michael@0: */ michael@0: static UClassID U_EXPORT2 getStaticClassID(void); michael@0: michael@0: /** michael@0: * Returns a unique class ID polymorphically. This method michael@0: * is to implement a simple version of RTTI, since not all C++ michael@0: * compilers support genuine RTTI. Polymorphic operator==() and michael@0: * clone() methods call this method. michael@0: * michael@0: *

Concrete subclasses of Transliterator must use the michael@0: * UOBJECT_DEFINE_RTTI_IMPLEMENTATION macro from michael@0: * uobject.h to provide the RTTI functions. michael@0: * michael@0: * @return The class ID for this object. All objects of a given michael@0: * class have the same class ID. Objects of other classes have michael@0: * different class IDs. michael@0: * @stable ICU 2.0 michael@0: */ michael@0: virtual UClassID getDynamicClassID(void) const = 0; michael@0: michael@0: private: michael@0: static UBool initializeRegistry(UErrorCode &status); michael@0: michael@0: public: michael@0: #ifndef U_HIDE_OBSOLETE_API michael@0: /** michael@0: * Return the number of IDs currently registered with the system. michael@0: * To retrieve the actual IDs, call getAvailableID(i) with michael@0: * i from 0 to countAvailableIDs() - 1. michael@0: * @return the number of IDs currently registered with the system. michael@0: * @obsolete ICU 3.4 use getAvailableIDs() instead michael@0: */ michael@0: static int32_t U_EXPORT2 countAvailableIDs(void); michael@0: michael@0: /** michael@0: * Return the index-th available ID. index must be between 0 michael@0: * and countAvailableIDs() - 1, inclusive. If index is out of michael@0: * range, the result of getAvailableID(0) is returned. michael@0: * @param index the given ID index. michael@0: * @return the index-th available ID. index must be between 0 michael@0: * and countAvailableIDs() - 1, inclusive. If index is out of michael@0: * range, the result of getAvailableID(0) is returned. michael@0: * @obsolete ICU 3.4 use getAvailableIDs() instead; this function michael@0: * is not thread safe, since it returns a reference to storage that michael@0: * may become invalid if another thread calls unregister michael@0: */ michael@0: static const UnicodeString& U_EXPORT2 getAvailableID(int32_t index); michael@0: #endif /* U_HIDE_OBSOLETE_API */ michael@0: }; michael@0: michael@0: inline int32_t Transliterator::getMaximumContextLength(void) const { michael@0: return maximumContextLength; michael@0: } michael@0: michael@0: inline void Transliterator::setID(const UnicodeString& id) { michael@0: ID = id; michael@0: // NUL-terminate the ID string, which is a non-aliased copy. michael@0: ID.append((UChar)0); michael@0: ID.truncate(ID.length()-1); michael@0: } michael@0: michael@0: #ifndef U_HIDE_INTERNAL_API michael@0: inline Transliterator::Token Transliterator::integerToken(int32_t i) { michael@0: Token t; michael@0: t.integer = i; michael@0: return t; michael@0: } michael@0: michael@0: inline Transliterator::Token Transliterator::pointerToken(void* p) { michael@0: Token t; michael@0: t.pointer = p; michael@0: return t; michael@0: } michael@0: #endif /* U_HIDE_INTERNAL_API */ michael@0: michael@0: U_NAMESPACE_END michael@0: michael@0: #endif /* #if !UCONFIG_NO_TRANSLITERATION */ michael@0: michael@0: #endif