michael@0: /*
michael@0: **********************************************************************
michael@0: * Copyright (C) 1999-2013, International Business Machines
michael@0: * Corporation and others. All Rights Reserved.
michael@0: **********************************************************************
michael@0: * Date Name Description
michael@0: * 11/17/99 aliu Creation.
michael@0: **********************************************************************
michael@0: */
michael@0: #ifndef TRANSLIT_H
michael@0: #define TRANSLIT_H
michael@0:
michael@0: #include "unicode/utypes.h"
michael@0:
michael@0: /**
michael@0: * \file
michael@0: * \brief C++ API: Tranforms text from one format to another.
michael@0: */
michael@0:
michael@0: #if !UCONFIG_NO_TRANSLITERATION
michael@0:
michael@0: #include "unicode/uobject.h"
michael@0: #include "unicode/unistr.h"
michael@0: #include "unicode/parseerr.h"
michael@0: #include "unicode/utrans.h" // UTransPosition, UTransDirection
michael@0: #include "unicode/strenum.h"
michael@0:
michael@0: U_NAMESPACE_BEGIN
michael@0:
michael@0: class UnicodeFilter;
michael@0: class UnicodeSet;
michael@0: class CompoundTransliterator;
michael@0: class TransliteratorParser;
michael@0: class NormalizationTransliterator;
michael@0: class TransliteratorIDParser;
michael@0:
michael@0: /**
michael@0: *
michael@0: * Transliterator
is an abstract class that
michael@0: * transliterates text from one format to another. The most common
michael@0: * kind of transliterator is a script, or alphabet, transliterator.
michael@0: * For example, a Russian to Latin transliterator changes Russian text
michael@0: * written in Cyrillic characters to phonetically equivalent Latin
michael@0: * characters. It does not translate Russian to English!
michael@0: * Transliteration, unlike translation, operates on characters, without
michael@0: * reference to the meanings of words and sentences.
michael@0: *
michael@0: *
Although script conversion is its most common use, a
michael@0: * transliterator can actually perform a more general class of tasks.
michael@0: * In fact, Transliterator
defines a very general API
michael@0: * which specifies only that a segment of the input text is replaced
michael@0: * by new text. The particulars of this conversion are determined
michael@0: * entirely by subclasses of Transliterator
.
michael@0: *
michael@0: *
Transliterators are stateless michael@0: * michael@0: *
Transliterator
objects are stateless; they
michael@0: * retain no information between calls to
michael@0: * transliterate()
. (However, this does not
michael@0: * mean that threads may share transliterators without synchronizing
michael@0: * them. Transliterators are not immutable, so they must be
michael@0: * synchronized when shared between threads.) This might seem to
michael@0: * limit the complexity of the transliteration operation. In
michael@0: * practice, subclasses perform complex transliterations by delaying
michael@0: * the replacement of text until it is known that no other
michael@0: * replacements are possible. In other words, although the
michael@0: * Transliterator
objects are stateless, the source text
michael@0: * itself embodies all the needed information, and delayed operation
michael@0: * allows arbitrary complexity.
michael@0: *
michael@0: *
Batch transliteration michael@0: * michael@0: *
The simplest way to perform transliteration is all at once, on a
michael@0: * string of existing text. This is referred to as batch
michael@0: * transliteration. For example, given a string input
michael@0: * and a transliterator t
, the call
michael@0: *
michael@0: * \htmlonly
\endhtmlonlyString result = t.transliterate(input);
michael@0: *
\htmlonly
\endhtmlonly
michael@0: *
michael@0: * will transliterate it and return the result. Other methods allow
michael@0: * the client to specify a substring to be transliterated and to use
michael@0: * {@link Replaceable } objects instead of strings, in order to
michael@0: * preserve out-of-band information (such as text styles).
michael@0: *
michael@0: * Keyboard transliteration michael@0: * michael@0: *
Somewhat more involved is keyboard, or incremental michael@0: * transliteration. This is the transliteration of text that is michael@0: * arriving from some source (typically the user's keyboard) one michael@0: * character at a time, or in some other piecemeal fashion. michael@0: * michael@0: *
In keyboard transliteration, a Replaceable
buffer
michael@0: * stores the text. As text is inserted, as much as possible is
michael@0: * transliterated on the fly. This means a GUI that displays the
michael@0: * contents of the buffer may show text being modified as each new
michael@0: * character arrives.
michael@0: *
michael@0: *
Consider the simple RuleBasedTransliterator
:
michael@0: *
michael@0: * \htmlonly
\endhtmlonly
michael@0: * th>{theta}
michael@0: * t>{tau}
michael@0: *
\htmlonly
\endhtmlonly
michael@0: *
michael@0: * When the user types 't', nothing will happen, since the
michael@0: * transliterator is waiting to see if the next character is 'h'. To
michael@0: * remedy this, we introduce the notion of a cursor, marked by a '|'
michael@0: * in the output string:
michael@0: *
michael@0: * \htmlonly\endhtmlonly
michael@0: * t>|{tau}
michael@0: * {tau}h>{theta}
michael@0: *
\htmlonly
\endhtmlonly
michael@0: *
michael@0: * Now when the user types 't', tau appears, and if the next character
michael@0: * is 'h', the tau changes to a theta. This is accomplished by
michael@0: * maintaining a cursor position (independent of the insertion point,
michael@0: * and invisible in the GUI) across calls to
michael@0: * transliterate()
. Typically, the cursor will
michael@0: * be coincident with the insertion point, but in a case like the one
michael@0: * above, it will precede the insertion point.
michael@0: *
michael@0: * Keyboard transliteration methods maintain a set of three indices
michael@0: * that are updated with each call to
michael@0: * transliterate()
, including the cursor, start,
michael@0: * and limit. Since these indices are changed by the method, they are
michael@0: * passed in an int[]
array. The START
index
michael@0: * marks the beginning of the substring that the transliterator will
michael@0: * look at. It is advanced as text becomes committed (but it is not
michael@0: * the committed index; that's the CURSOR
). The
michael@0: * CURSOR
index, described above, marks the point at
michael@0: * which the transliterator last stopped, either because it reached
michael@0: * the end, or because it required more characters to disambiguate
michael@0: * between possible inputs. The CURSOR
can also be
michael@0: * explicitly set by rules in a RuleBasedTransliterator
.
michael@0: * Any characters before the CURSOR
index are frozen;
michael@0: * future keyboard transliteration calls within this input sequence
michael@0: * will not change them. New text is inserted at the
michael@0: * LIMIT
index, which marks the end of the substring that
michael@0: * the transliterator looks at.
michael@0: *
michael@0: *
Because keyboard transliteration assumes that more characters
michael@0: * are to arrive, it is conservative in its operation. It only
michael@0: * transliterates when it can do so unambiguously. Otherwise it waits
michael@0: * for more characters to arrive. When the client code knows that no
michael@0: * more characters are forthcoming, perhaps because the user has
michael@0: * performed some input termination operation, then it should call
michael@0: * finishTransliteration()
to complete any
michael@0: * pending transliterations.
michael@0: *
michael@0: *
Inverses michael@0: * michael@0: *
Pairs of transliterators may be inverses of one another. For
michael@0: * example, if transliterator A transliterates characters by
michael@0: * incrementing their Unicode value (so "abc" -> "def"), and
michael@0: * transliterator B decrements character values, then A
michael@0: * is an inverse of B and vice versa. If we compose A
michael@0: * with B in a compound transliterator, the result is the
michael@0: * indentity transliterator, that is, a transliterator that does not
michael@0: * change its input text.
michael@0: *
michael@0: * The Transliterator
method getInverse()
michael@0: * returns a transliterator's inverse, if one exists, or
michael@0: * null
otherwise. However, the result of
michael@0: * getInverse()
usually will not be a true
michael@0: * mathematical inverse. This is because true inverse transliterators
michael@0: * are difficult to formulate. For example, consider two
michael@0: * transliterators: AB, which transliterates the character 'A'
michael@0: * to 'B', and BA, which transliterates 'B' to 'A'. It might
michael@0: * seem that these are exact inverses, since
michael@0: *
michael@0: * \htmlonly
\endhtmlonly"A" x AB -> "B"\endhtmlonly michael@0: * michael@0: * where 'x' represents transliteration. However, michael@0: * michael@0: * \htmlonly
michael@0: * "B" x BA -> "A"\htmlonly
\endhtmlonly"ABCD" x AB -> "BBCD"\endhtmlonly michael@0: * michael@0: * so AB composed with BA is not the michael@0: * identity. Nonetheless, BA may be usefully considered to be michael@0: * AB's inverse, and it is on this basis that michael@0: * AB
michael@0: * "BBCD" x BA -> "AACD"\htmlonly
.getInverse()
could legitimately return
michael@0: * BA.
michael@0: *
michael@0: * IDs and display names michael@0: * michael@0: *
A transliterator is designated by a short identifier string or michael@0: * ID. IDs follow the format source-destination, michael@0: * where source describes the entity being replaced, and michael@0: * destination describes the entity replacing michael@0: * source. The entities may be the names of scripts, michael@0: * particular sequences of characters, or whatever else it is that the michael@0: * transliterator converts to or from. For example, a transliterator michael@0: * from Russian to Latin might be named "Russian-Latin". A michael@0: * transliterator from keyboard escape sequences to Latin-1 characters michael@0: * might be named "KeyboardEscape-Latin1". By convention, system michael@0: * entity names are in English, with the initial letters of words michael@0: * capitalized; user entity names may follow any format so long as michael@0: * they do not contain dashes. michael@0: * michael@0: *
In addition to programmatic IDs, transliterator objects have michael@0: * display names for presentation in user interfaces, returned by michael@0: * {@link #getDisplayName }. michael@0: * michael@0: *
Factory methods and registration michael@0: * michael@0: *
In general, client code should use the factory method
michael@0: * {@link #createInstance } to obtain an instance of a
michael@0: * transliterator given its ID. Valid IDs may be enumerated using
michael@0: * getAvailableIDs()
. Since transliterators are mutable,
michael@0: * multiple calls to {@link #createInstance } with the same ID will
michael@0: * return distinct objects.
michael@0: *
michael@0: *
In addition to the system transliterators registered at startup,
michael@0: * user transliterators may be registered by calling
michael@0: * registerInstance()
at run time. A registered instance
michael@0: * acts a template; future calls to {@link #createInstance } with the ID
michael@0: * of the registered object return clones of that object. Thus any
michael@0: * object passed to registerInstance() must implement
michael@0: * clone() propertly. To register a transliterator subclass
michael@0: * without instantiating it (until it is needed), users may call
michael@0: * {@link #registerFactory }. In this case, the objects are
michael@0: * instantiated by invoking the zero-argument public constructor of
michael@0: * the class.
michael@0: *
michael@0: *
Subclassing
michael@0: *
michael@0: * Subclasses must implement the abstract method
michael@0: * handleTransliterate()
.
Subclasses should override
michael@0: * the transliterate()
method taking a
michael@0: * Replaceable
and the transliterate()
michael@0: * method taking a String
and StringBuffer
michael@0: * if the performance of these methods can be improved over the
michael@0: * performance obtained by the default implementations in this class.
michael@0: *
michael@0: * @author Alan Liu
michael@0: * @stable ICU 2.0
michael@0: */
michael@0: class U_I18N_API Transliterator : public UObject {
michael@0:
michael@0: private:
michael@0:
michael@0: /**
michael@0: * Programmatic name, e.g., "Latin-Arabic".
michael@0: */
michael@0: UnicodeString ID;
michael@0:
michael@0: /**
michael@0: * This transliterator's filter. Any character for which
michael@0: * filter.contains() returns false will not be
michael@0: * altered by this transliterator. If filter is
michael@0: * null then no filtering is applied.
michael@0: */
michael@0: UnicodeFilter* filter;
michael@0:
michael@0: int32_t maximumContextLength;
michael@0:
michael@0: public:
michael@0:
michael@0: /**
michael@0: * A context integer or pointer for a factory function, passed by
michael@0: * value.
michael@0: * @stable ICU 2.4
michael@0: */
michael@0: union Token {
michael@0: /**
michael@0: * This token, interpreted as a 32-bit integer.
michael@0: * @stable ICU 2.4
michael@0: */
michael@0: int32_t integer;
michael@0: /**
michael@0: * This token, interpreted as a native pointer.
michael@0: * @stable ICU 2.4
michael@0: */
michael@0: void* pointer;
michael@0: };
michael@0:
michael@0: #ifndef U_HIDE_INTERNAL_API
michael@0: /**
michael@0: * Return a token containing an integer.
michael@0: * @return a token containing an integer.
michael@0: * @internal
michael@0: */
michael@0: inline static Token integerToken(int32_t);
michael@0:
michael@0: /**
michael@0: * Return a token containing a pointer.
michael@0: * @return a token containing a pointer.
michael@0: * @internal
michael@0: */
michael@0: inline static Token pointerToken(void*);
michael@0: #endif /* U_HIDE_INTERNAL_API */
michael@0:
michael@0: /**
michael@0: * A function that creates and returns a Transliterator. When
michael@0: * invoked, it will be passed the ID string that is being
michael@0: * instantiated, together with the context pointer that was passed
michael@0: * in when the factory function was first registered. Many
michael@0: * factory functions will ignore both parameters, however,
michael@0: * functions that are registered to more than one ID may use the
michael@0: * ID or the context parameter to parameterize the transliterator
michael@0: * they create.
michael@0: * @param ID the string identifier for this transliterator
michael@0: * @param context a context pointer that will be stored and
michael@0: * later passed to the factory function when an ID matching
michael@0: * the registration ID is being instantiated with this factory.
michael@0: * @stable ICU 2.4
michael@0: */
michael@0: typedef Transliterator* (U_EXPORT2 *Factory)(const UnicodeString& ID, Token context);
michael@0:
michael@0: protected:
michael@0:
michael@0: /**
michael@0: * Default constructor.
michael@0: * @param ID the string identifier for this transliterator
michael@0: * @param adoptedFilter the filter. Any character for which
michael@0: * filter.contains() returns false will not be
michael@0: * altered by this transliterator. If filter is
michael@0: * null then no filtering is applied.
michael@0: * @stable ICU 2.4
michael@0: */
michael@0: Transliterator(const UnicodeString& ID, UnicodeFilter* adoptedFilter);
michael@0:
michael@0: /**
michael@0: * Copy constructor.
michael@0: * @stable ICU 2.4
michael@0: */
michael@0: Transliterator(const Transliterator&);
michael@0:
michael@0: /**
michael@0: * Assignment operator.
michael@0: * @stable ICU 2.4
michael@0: */
michael@0: Transliterator& operator=(const Transliterator&);
michael@0:
michael@0: /**
michael@0: * Create a transliterator from a basic ID. This is an ID
michael@0: * containing only the forward direction source, target, and
michael@0: * variant.
michael@0: * @param id a basic ID of the form S-T or S-T/V.
michael@0: * @param canon canonical ID to assign to the object, or
michael@0: * NULL to leave the ID unchanged
michael@0: * @return a newly created Transliterator or null if the ID is
michael@0: * invalid.
michael@0: * @stable ICU 2.4
michael@0: */
michael@0: static Transliterator* createBasicInstance(const UnicodeString& id,
michael@0: const UnicodeString* canon);
michael@0:
michael@0: friend class TransliteratorParser; // for parseID()
michael@0: friend class TransliteratorIDParser; // for createBasicInstance()
michael@0: friend class TransliteratorAlias; // for setID()
michael@0:
michael@0: public:
michael@0:
michael@0: /**
michael@0: * Destructor.
michael@0: * @stable ICU 2.0
michael@0: */
michael@0: virtual ~Transliterator();
michael@0:
michael@0: /**
michael@0: * Implements Cloneable.
michael@0: * All subclasses are encouraged to implement this method if it is
michael@0: * possible and reasonable to do so. Subclasses that are to be
michael@0: * registered with the system using registerInstance()
michael@0: * are required to implement this method. If a subclass does not
michael@0: * implement clone() properly and is registered with the system
michael@0: * using registerInstance(), then the default clone() implementation
michael@0: * will return null, and calls to createInstance() will fail.
michael@0: *
michael@0: * @return a copy of the object.
michael@0: * @see #registerInstance
michael@0: * @stable ICU 2.0
michael@0: */
michael@0: virtual Transliterator* clone() const;
michael@0:
michael@0: /**
michael@0: * Transliterates a segment of a string, with optional filtering.
michael@0: *
michael@0: * @param text the string to be transliterated
michael@0: * @param start the beginning index, inclusive; 0 <= start
michael@0: * <= limit
.
michael@0: * @param limit the ending index, exclusive; start <= limit
michael@0: * <= text.length()
.
michael@0: * @return The new limit index. The text previously occupying [start,
michael@0: * limit)
has been transliterated, possibly to a string of a different
michael@0: * length, at [start,
new-limit)
, where
michael@0: * new-limit is the return value. If the input offsets are out of bounds,
michael@0: * the returned value is -1 and the input string remains unchanged.
michael@0: * @stable ICU 2.0
michael@0: */
michael@0: virtual int32_t transliterate(Replaceable& text,
michael@0: int32_t start, int32_t limit) const;
michael@0:
michael@0: /**
michael@0: * Transliterates an entire string in place. Convenience method.
michael@0: * @param text the string to be transliterated
michael@0: * @stable ICU 2.0
michael@0: */
michael@0: virtual void transliterate(Replaceable& text) const;
michael@0:
michael@0: /**
michael@0: * Transliterates the portion of the text buffer that can be
michael@0: * transliterated unambiguosly after new text has been inserted,
michael@0: * typically as a result of a keyboard event. The new text in
michael@0: * insertion
will be inserted into text
michael@0: * at index.limit
, advancing
michael@0: * index.limit
by insertion.length()
.
michael@0: * Then the transliterator will try to transliterate characters of
michael@0: * text
between index.cursor
and
michael@0: * index.limit
. Characters before
michael@0: * index.cursor
will not be changed.
michael@0: *
michael@0: *
Upon return, values in index
will be updated.
michael@0: * index.start
will be advanced to the first
michael@0: * character that future calls to this method will read.
michael@0: * index.cursor
and index.limit
will
michael@0: * be adjusted to delimit the range of text that future calls to
michael@0: * this method may change.
michael@0: *
michael@0: *
Typical usage of this method begins with an initial call
michael@0: * with index.start
and index.limit
michael@0: * set to indicate the portion of text
to be
michael@0: * transliterated, and index.cursor == index.start
.
michael@0: * Thereafter, index
can be used without
michael@0: * modification in future calls, provided that all changes to
michael@0: * text
are made via this method.
michael@0: *
michael@0: *
This method assumes that future calls may be made that will michael@0: * insert new text into the buffer. As a result, it only performs michael@0: * unambiguous transliterations. After the last call to this michael@0: * method, there may be untransliterated text that is waiting for michael@0: * more input to resolve an ambiguity. In order to perform these michael@0: * pending transliterations, clients should call {@link michael@0: * #finishTransliteration } after the last call to this michael@0: * method has been made. michael@0: * michael@0: * @param text the buffer holding transliterated and untransliterated text michael@0: * @param index an array of three integers. michael@0: * michael@0: *
index.start
: the beginning index,
michael@0: * inclusive; 0 <= index.start <= index.limit
.
michael@0: *
michael@0: * index.limit
: the ending index, exclusive;
michael@0: * index.start <= index.limit <= text.length()
.
michael@0: * insertion
is inserted at
michael@0: * index.limit
.
michael@0: *
michael@0: * index.cursor
: the next character to be
michael@0: * considered for transliteration; index.start <=
michael@0: * index.cursor <= index.limit
. Characters before
michael@0: * index.cursor
will not be changed by future calls
michael@0: * to this method.index.limit
. If null
then no text
michael@0: * is inserted.
michael@0: * @param status Output param to filled in with a success or an error.
michael@0: * @see #handleTransliterate
michael@0: * @exception IllegalArgumentException if index
michael@0: * is invalid
michael@0: * @see UTransPosition
michael@0: * @stable ICU 2.0
michael@0: */
michael@0: virtual void transliterate(Replaceable& text, UTransPosition& index,
michael@0: const UnicodeString& insertion,
michael@0: UErrorCode& status) const;
michael@0:
michael@0: /**
michael@0: * Transliterates the portion of the text buffer that can be
michael@0: * transliterated unambiguosly after a new character has been
michael@0: * inserted, typically as a result of a keyboard event. This is a
michael@0: * convenience method.
michael@0: * @param text the buffer holding transliterated and
michael@0: * untransliterated text
michael@0: * @param index an array of three integers.
michael@0: * @param insertion text to be inserted and possibly
michael@0: * transliterated into the translation buffer at
michael@0: * index.limit
.
michael@0: * @param status Output param to filled in with a success or an error.
michael@0: * @see #transliterate(Replaceable&, UTransPosition&, const UnicodeString&, UErrorCode&) const
michael@0: * @stable ICU 2.0
michael@0: */
michael@0: virtual void transliterate(Replaceable& text, UTransPosition& index,
michael@0: UChar32 insertion,
michael@0: UErrorCode& status) const;
michael@0:
michael@0: /**
michael@0: * Transliterates the portion of the text buffer that can be
michael@0: * transliterated unambiguosly. This is a convenience method; see
michael@0: * {@link
michael@0: * #transliterate(Replaceable&, UTransPosition&, const UnicodeString&, UErrorCode&) const }
michael@0: * for details.
michael@0: * @param text the buffer holding transliterated and
michael@0: * untransliterated text
michael@0: * @param index an array of three integers. See {@link #transliterate(Replaceable&, UTransPosition&, const UnicodeString*, UErrorCode&) const }.
michael@0: * @param status Output param to filled in with a success or an error.
michael@0: * @see #transliterate(Replaceable, int[], String)
michael@0: * @stable ICU 2.0
michael@0: */
michael@0: virtual void transliterate(Replaceable& text, UTransPosition& index,
michael@0: UErrorCode& status) const;
michael@0:
michael@0: /**
michael@0: * Finishes any pending transliterations that were waiting for
michael@0: * more characters. Clients should call this method as the last
michael@0: * call after a sequence of one or more calls to
michael@0: * transliterate()
.
michael@0: * @param text the buffer holding transliterated and
michael@0: * untransliterated text.
michael@0: * @param index the array of indices previously passed to {@link
michael@0: * #transliterate }
michael@0: * @stable ICU 2.0
michael@0: */
michael@0: virtual void finishTransliteration(Replaceable& text,
michael@0: UTransPosition& index) const;
michael@0:
michael@0: private:
michael@0:
michael@0: /**
michael@0: * This internal method does incremental transliteration. If the
michael@0: * 'insertion' is non-null then we append it to 'text' before
michael@0: * proceeding. This method calls through to the pure virtual
michael@0: * framework method handleTransliterate() to do the actual
michael@0: * work.
michael@0: * @param text the buffer holding transliterated and
michael@0: * untransliterated text
michael@0: * @param index an array of three integers. See {@link
michael@0: * #transliterate(Replaceable, int[], String)}.
michael@0: * @param insertion text to be inserted and possibly
michael@0: * transliterated into the translation buffer at
michael@0: * index.limit
.
michael@0: * @param status Output param to filled in with a success or an error.
michael@0: */
michael@0: void _transliterate(Replaceable& text,
michael@0: UTransPosition& index,
michael@0: const UnicodeString* insertion,
michael@0: UErrorCode &status) const;
michael@0:
michael@0: protected:
michael@0:
michael@0: /**
michael@0: * Abstract method that concrete subclasses define to implement
michael@0: * their transliteration algorithm. This method handles both
michael@0: * incremental and non-incremental transliteration. Let
michael@0: * originalStart
refer to the value of
michael@0: * pos.start
upon entry.
michael@0: *
michael@0: * incremental
is false, then this method
michael@0: * should transliterate all characters between
michael@0: * pos.start
and pos.limit
. Upon return
michael@0: * pos.start
must == pos.limit
.incremental
is true, then this method
michael@0: * should transliterate all characters between
michael@0: * pos.start
and pos.limit
that can be
michael@0: * unambiguously transliterated, regardless of future insertions
michael@0: * of text at pos.limit
. Upon return,
michael@0: * pos.start
should be in the range
michael@0: * [originalStart
, pos.limit
).
michael@0: * pos.start
should be positioned such that
michael@0: * characters [originalStart
,
michael@0: * pos.start
) will not be changed in the future by this
michael@0: * transliterator and characters [pos.start
,
michael@0: * pos.limit
) are unchanged.Implementations of this method should also obey the michael@0: * following invariants:
michael@0: * michael@0: *pos.limit
and pos.contextLimit
michael@0: * should be updated to reflect changes in length of the text
michael@0: * between pos.start
and pos.limit
. The
michael@0: * difference pos.contextLimit - pos.limit
should
michael@0: * not change.pos.contextStart
should not change.pos.start
nor
michael@0: * pos.limit
should be less than
michael@0: * originalStart
.originalStart
and text after
michael@0: * pos.limit
should not change.pos.contextStart
and text after
michael@0: * pos.contextLimit
should be ignored.Subclasses may safely assume that all characters in
michael@0: * [pos.start
, pos.limit
) are filtered.
michael@0: * In other words, the filter has already been applied by the time
michael@0: * this method is called. See
michael@0: * filteredTransliterate()
.
michael@0: *
michael@0: *
This method is not for public consumption. Calling
michael@0: * this method directly will transliterate
michael@0: * [pos.start
, pos.limit
) without
michael@0: * applying the filter. End user code should call
michael@0: * transliterate()
instead of this method. Subclass code
michael@0: * and wrapping transliterators should call
michael@0: * filteredTransliterate()
instead of this method.
michael@0: *
michael@0: * @param text the buffer holding transliterated and
michael@0: * untransliterated text
michael@0: *
michael@0: * @param pos the indices indicating the start, limit, context
michael@0: * start, and context limit of the text.
michael@0: *
michael@0: * @param incremental if true, assume more text may be inserted at
michael@0: * pos.limit
and act accordingly. Otherwise,
michael@0: * transliterate all text between pos.start
and
michael@0: * pos.limit
and move pos.start
up to
michael@0: * pos.limit
.
michael@0: *
michael@0: * @see #transliterate
michael@0: * @stable ICU 2.4
michael@0: */
michael@0: virtual void handleTransliterate(Replaceable& text,
michael@0: UTransPosition& pos,
michael@0: UBool incremental) const = 0;
michael@0:
michael@0: public:
michael@0: /**
michael@0: * Transliterate a substring of text, as specified by index, taking filters
michael@0: * into account. This method is for subclasses that need to delegate to
michael@0: * another transliterator, such as CompoundTransliterator.
michael@0: * @param text the text to be transliterated
michael@0: * @param index the position indices
michael@0: * @param incremental if TRUE, then assume more characters may be inserted
michael@0: * at index.limit, and postpone processing to accomodate future incoming
michael@0: * characters
michael@0: * @stable ICU 2.4
michael@0: */
michael@0: virtual void filteredTransliterate(Replaceable& text,
michael@0: UTransPosition& index,
michael@0: UBool incremental) const;
michael@0:
michael@0: private:
michael@0:
michael@0: /**
michael@0: * Top-level transliteration method, handling filtering, incremental and
michael@0: * non-incremental transliteration, and rollback. All transliteration
michael@0: * public API methods eventually call this method with a rollback argument
michael@0: * of TRUE. Other entities may call this method but rollback should be
michael@0: * FALSE.
michael@0: *
michael@0: *
If this transliterator has a filter, break up the input text into runs michael@0: * of unfiltered characters. Pass each run to michael@0: * subclass.handleTransliterate(). michael@0: * michael@0: *
In incremental mode, if rollback is TRUE, perform a special
michael@0: * incremental procedure in which several passes are made over the input
michael@0: * text, adding one character at a time, and committing successful
michael@0: * transliterations as they occur. Unsuccessful transliterations are rolled
michael@0: * back and retried with additional characters to give correct results.
michael@0: *
michael@0: * @param text the text to be transliterated
michael@0: * @param index the position indices
michael@0: * @param incremental if TRUE, then assume more characters may be inserted
michael@0: * at index.limit, and postpone processing to accomodate future incoming
michael@0: * characters
michael@0: * @param rollback if TRUE and if incremental is TRUE, then perform special
michael@0: * incremental processing, as described above, and undo partial
michael@0: * transliterations where necessary. If incremental is FALSE then this
michael@0: * parameter is ignored.
michael@0: */
michael@0: virtual void filteredTransliterate(Replaceable& text,
michael@0: UTransPosition& index,
michael@0: UBool incremental,
michael@0: UBool rollback) const;
michael@0:
michael@0: public:
michael@0:
michael@0: /**
michael@0: * Returns the length of the longest context required by this transliterator.
michael@0: * This is preceding context. The default implementation supplied
michael@0: * by Transliterator
returns zero; subclasses
michael@0: * that use preceding context should override this method to return the
michael@0: * correct value. For example, if a transliterator translates "ddd" (where
michael@0: * d is any digit) to "555" when preceded by "(ddd)", then the preceding
michael@0: * context length is 5, the length of "(ddd)".
michael@0: *
michael@0: * @return The maximum number of preceding context characters this
michael@0: * transliterator needs to examine
michael@0: * @stable ICU 2.0
michael@0: */
michael@0: int32_t getMaximumContextLength(void) const;
michael@0:
michael@0: protected:
michael@0:
michael@0: /**
michael@0: * Method for subclasses to use to set the maximum context length.
michael@0: * @param maxContextLength the new value to be set.
michael@0: * @see #getMaximumContextLength
michael@0: * @stable ICU 2.4
michael@0: */
michael@0: void setMaximumContextLength(int32_t maxContextLength);
michael@0:
michael@0: public:
michael@0:
michael@0: /**
michael@0: * Returns a programmatic identifier for this transliterator.
michael@0: * If this identifier is passed to createInstance()
, it
michael@0: * will return this object, if it has been registered.
michael@0: * @return a programmatic identifier for this transliterator.
michael@0: * @see #registerInstance
michael@0: * @see #registerFactory
michael@0: * @see #getAvailableIDs
michael@0: * @stable ICU 2.0
michael@0: */
michael@0: virtual const UnicodeString& getID(void) const;
michael@0:
michael@0: /**
michael@0: * Returns a name for this transliterator that is appropriate for
michael@0: * display to the user in the default locale. See {@link
michael@0: * #getDisplayName } for details.
michael@0: * @param ID the string identifier for this transliterator
michael@0: * @param result Output param to receive the display name
michael@0: * @return A reference to 'result'.
michael@0: * @stable ICU 2.0
michael@0: */
michael@0: static UnicodeString& U_EXPORT2 getDisplayName(const UnicodeString& ID,
michael@0: UnicodeString& result);
michael@0:
michael@0: /**
michael@0: * Returns a name for this transliterator that is appropriate for
michael@0: * display to the user in the given locale. This name is taken
michael@0: * from the locale resource data in the standard manner of the
michael@0: * java.text
package.
michael@0: *
michael@0: *
If no localized names exist in the system resource bundles,
michael@0: * a name is synthesized using a localized
michael@0: * MessageFormat
pattern from the resource data. The
michael@0: * arguments to this pattern are an integer followed by one or two
michael@0: * strings. The integer is the number of strings, either 1 or 2.
michael@0: * The strings are formed by splitting the ID for this
michael@0: * transliterator at the first '-'. If there is no '-', then the
michael@0: * entire ID forms the only string.
michael@0: * @param ID the string identifier for this transliterator
michael@0: * @param inLocale the Locale in which the display name should be
michael@0: * localized.
michael@0: * @param result Output param to receive the display name
michael@0: * @return A reference to 'result'.
michael@0: * @stable ICU 2.0
michael@0: */
michael@0: static UnicodeString& U_EXPORT2 getDisplayName(const UnicodeString& ID,
michael@0: const Locale& inLocale,
michael@0: UnicodeString& result);
michael@0:
michael@0: /**
michael@0: * Returns the filter used by this transliterator, or NULL
michael@0: * if this transliterator uses no filter.
michael@0: * @return the filter used by this transliterator, or NULL
michael@0: * if this transliterator uses no filter.
michael@0: * @stable ICU 2.0
michael@0: */
michael@0: const UnicodeFilter* getFilter(void) const;
michael@0:
michael@0: /**
michael@0: * Returns the filter used by this transliterator, or NULL if this
michael@0: * transliterator uses no filter. The caller must eventually delete the
michael@0: * result. After this call, this transliterator's filter is set to
michael@0: * NULL.
michael@0: * @return the filter used by this transliterator, or NULL if this
michael@0: * transliterator uses no filter.
michael@0: * @stable ICU 2.4
michael@0: */
michael@0: UnicodeFilter* orphanFilter(void);
michael@0:
michael@0: /**
michael@0: * Changes the filter used by this transliterator. If the filter
michael@0: * is set to null then no filtering will occur.
michael@0: *
michael@0: *
Callers must take care if a transliterator is in use by
michael@0: * multiple threads. The filter should not be changed by one
michael@0: * thread while another thread may be transliterating.
michael@0: * @param adoptedFilter the new filter to be adopted.
michael@0: * @stable ICU 2.0
michael@0: */
michael@0: void adoptFilter(UnicodeFilter* adoptedFilter);
michael@0:
michael@0: /**
michael@0: * Returns this transliterator's inverse. See the class
michael@0: * documentation for details. This implementation simply inverts
michael@0: * the two entities in the ID and attempts to retrieve the
michael@0: * resulting transliterator. That is, if getID()
michael@0: * returns "A-B", then this method will return the result of
michael@0: * createInstance("B-A")
, or null
if that
michael@0: * call fails.
michael@0: *
michael@0: *
Subclasses with knowledge of their inverse may wish to
michael@0: * override this method.
michael@0: *
michael@0: * @param status Output param to filled in with a success or an error.
michael@0: * @return a transliterator that is an inverse, not necessarily
michael@0: * exact, of this transliterator, or null
if no such
michael@0: * transliterator is registered.
michael@0: * @see #registerInstance
michael@0: * @stable ICU 2.0
michael@0: */
michael@0: Transliterator* createInverse(UErrorCode& status) const;
michael@0:
michael@0: /**
michael@0: * Returns a Transliterator
object given its ID.
michael@0: * The ID must be either a system transliterator ID or a ID registered
michael@0: * using registerInstance()
.
michael@0: *
michael@0: * @param ID a valid ID, as enumerated by getAvailableIDs()
michael@0: * @param dir either FORWARD or REVERSE.
michael@0: * @param parseError Struct to recieve information on position
michael@0: * of error if an error is encountered
michael@0: * @param status Output param to filled in with a success or an error.
michael@0: * @return A Transliterator
object with the given ID
michael@0: * @see #registerInstance
michael@0: * @see #getAvailableIDs
michael@0: * @see #getID
michael@0: * @stable ICU 2.0
michael@0: */
michael@0: static Transliterator* U_EXPORT2 createInstance(const UnicodeString& ID,
michael@0: UTransDirection dir,
michael@0: UParseError& parseError,
michael@0: UErrorCode& status);
michael@0:
michael@0: /**
michael@0: * Returns a Transliterator
object given its ID.
michael@0: * The ID must be either a system transliterator ID or a ID registered
michael@0: * using registerInstance()
.
michael@0: * @param ID a valid ID, as enumerated by getAvailableIDs()
michael@0: * @param dir either FORWARD or REVERSE.
michael@0: * @param status Output param to filled in with a success or an error.
michael@0: * @return A Transliterator
object with the given ID
michael@0: * @stable ICU 2.0
michael@0: */
michael@0: static Transliterator* U_EXPORT2 createInstance(const UnicodeString& ID,
michael@0: UTransDirection dir,
michael@0: UErrorCode& status);
michael@0:
michael@0: /**
michael@0: * Returns a Transliterator
object constructed from
michael@0: * the given rule string. This will be a RuleBasedTransliterator,
michael@0: * if the rule string contains only rules, or a
michael@0: * CompoundTransliterator, if it contains ID blocks, or a
michael@0: * NullTransliterator, if it contains ID blocks which parse as
michael@0: * empty for the given direction.
michael@0: * @param ID the id for the transliterator.
michael@0: * @param rules rules, separated by ';'
michael@0: * @param dir either FORWARD or REVERSE.
michael@0: * @param parseError Struct to recieve information on position
michael@0: * of error if an error is encountered
michael@0: * @param status Output param set to success/failure code.
michael@0: * @stable ICU 2.0
michael@0: */
michael@0: static Transliterator* U_EXPORT2 createFromRules(const UnicodeString& ID,
michael@0: const UnicodeString& rules,
michael@0: UTransDirection dir,
michael@0: UParseError& parseError,
michael@0: UErrorCode& status);
michael@0:
michael@0: /**
michael@0: * Create a rule string that can be passed to createFromRules()
michael@0: * to recreate this transliterator.
michael@0: * @param result the string to receive the rules. Previous
michael@0: * contents will be deleted.
michael@0: * @param escapeUnprintable if TRUE then convert unprintable
michael@0: * character to their hex escape representations, \\uxxxx or
michael@0: * \\Uxxxxxxxx. Unprintable characters are those other than
michael@0: * U+000A, U+0020..U+007E.
michael@0: * @stable ICU 2.0
michael@0: */
michael@0: virtual UnicodeString& toRules(UnicodeString& result,
michael@0: UBool escapeUnprintable) const;
michael@0:
michael@0: /**
michael@0: * Return the number of elements that make up this transliterator.
michael@0: * For example, if the transliterator "NFD;Jamo-Latin;Latin-Greek"
michael@0: * were created, the return value of this method would be 3.
michael@0: *
michael@0: *
If this transliterator is not composed of other michael@0: * transliterators, then this method returns 1. michael@0: * @return the number of transliterators that compose this michael@0: * transliterator, or 1 if this transliterator is not composed of michael@0: * multiple transliterators michael@0: * @stable ICU 3.0 michael@0: */ michael@0: int32_t countElements() const; michael@0: michael@0: /** michael@0: * Return an element that makes up this transliterator. For michael@0: * example, if the transliterator "NFD;Jamo-Latin;Latin-Greek" michael@0: * were created, the return value of this method would be one michael@0: * of the three transliterator objects that make up that michael@0: * transliterator: [NFD, Jamo-Latin, Latin-Greek]. michael@0: * michael@0: *
If this transliterator is not composed of other
michael@0: * transliterators, then this method will return a reference to
michael@0: * this transliterator when given the index 0.
michael@0: * @param index a value from 0..countElements()-1 indicating the
michael@0: * transliterator to return
michael@0: * @param ec input-output error code
michael@0: * @return one of the transliterators that makes up this
michael@0: * transliterator, if this transliterator is made up of multiple
michael@0: * transliterators, otherwise a reference to this object if given
michael@0: * an index of 0
michael@0: * @stable ICU 3.0
michael@0: */
michael@0: const Transliterator& getElement(int32_t index, UErrorCode& ec) const;
michael@0:
michael@0: /**
michael@0: * Returns the set of all characters that may be modified in the
michael@0: * input text by this Transliterator. This incorporates this
michael@0: * object's current filter; if the filter is changed, the return
michael@0: * value of this function will change. The default implementation
michael@0: * returns an empty set. Some subclasses may override {@link
michael@0: * #handleGetSourceSet } to return a more precise result. The
michael@0: * return result is approximate in any case and is intended for
michael@0: * use by tests, tools, or utilities.
michael@0: * @param result receives result set; previous contents lost
michael@0: * @return a reference to result
michael@0: * @see #getTargetSet
michael@0: * @see #handleGetSourceSet
michael@0: * @stable ICU 2.4
michael@0: */
michael@0: UnicodeSet& getSourceSet(UnicodeSet& result) const;
michael@0:
michael@0: /**
michael@0: * Framework method that returns the set of all characters that
michael@0: * may be modified in the input text by this Transliterator,
michael@0: * ignoring the effect of this object's filter. The base class
michael@0: * implementation returns the empty set. Subclasses that wish to
michael@0: * implement this should override this method.
michael@0: * @return the set of characters that this transliterator may
michael@0: * modify. The set may be modified, so subclasses should return a
michael@0: * newly-created object.
michael@0: * @param result receives result set; previous contents lost
michael@0: * @see #getSourceSet
michael@0: * @see #getTargetSet
michael@0: * @stable ICU 2.4
michael@0: */
michael@0: virtual void handleGetSourceSet(UnicodeSet& result) const;
michael@0:
michael@0: /**
michael@0: * Returns the set of all characters that may be generated as
michael@0: * replacement text by this transliterator. The default
michael@0: * implementation returns the empty set. Some subclasses may
michael@0: * override this method to return a more precise result. The
michael@0: * return result is approximate in any case and is intended for
michael@0: * use by tests, tools, or utilities requiring such
michael@0: * meta-information.
michael@0: * @param result receives result set; previous contents lost
michael@0: * @return a reference to result
michael@0: * @see #getTargetSet
michael@0: * @stable ICU 2.4
michael@0: */
michael@0: virtual UnicodeSet& getTargetSet(UnicodeSet& result) const;
michael@0:
michael@0: public:
michael@0:
michael@0: /**
michael@0: * Registers a factory function that creates transliterators of
michael@0: * a given ID.
michael@0: * @param id the ID being registered
michael@0: * @param factory a function pointer that will be copied and
michael@0: * called later when the given ID is passed to createInstance()
michael@0: * @param context a context pointer that will be stored and
michael@0: * later passed to the factory function when an ID matching
michael@0: * the registration ID is being instantiated with this factory.
michael@0: * @stable ICU 2.0
michael@0: */
michael@0: static void U_EXPORT2 registerFactory(const UnicodeString& id,
michael@0: Factory factory,
michael@0: Token context);
michael@0:
michael@0: /**
michael@0: * Registers an instance obj of a subclass of
michael@0: * Transliterator
with the system. When
michael@0: * createInstance() is called with an ID string that is
michael@0: * equal to obj->getID(), then obj->clone() is
michael@0: * returned.
michael@0: *
michael@0: * After this call the Transliterator class owns the adoptedObj
michael@0: * and will delete it.
michael@0: *
michael@0: * @param adoptedObj an instance of subclass of
michael@0: * Transliterator
that defines clone()
michael@0: * @see #createInstance
michael@0: * @see #registerFactory
michael@0: * @see #unregister
michael@0: * @stable ICU 2.0
michael@0: */
michael@0: static void U_EXPORT2 registerInstance(Transliterator* adoptedObj);
michael@0:
michael@0: /**
michael@0: * Registers an ID string as an alias of another ID string.
michael@0: * That is, after calling this function, createInstance(aliasID)
michael@0: * will return the same thing as createInstance(realID).
michael@0: * This is generally used to create shorter, more mnemonic aliases
michael@0: * for long compound IDs.
michael@0: *
michael@0: * @param aliasID The new ID being registered.
michael@0: * @param realID The ID that the new ID is to be an alias for.
michael@0: * This can be a compound ID and can include filters and should
michael@0: * refer to transliterators that have already been registered with
michael@0: * the framework, although this isn't checked.
michael@0: * @stable ICU 3.6
michael@0: */
michael@0: static void U_EXPORT2 registerAlias(const UnicodeString& aliasID,
michael@0: const UnicodeString& realID);
michael@0:
michael@0: protected:
michael@0:
michael@0: #ifndef U_HIDE_INTERNAL_API
michael@0: /**
michael@0: * @internal
michael@0: * @param id the ID being registered
michael@0: * @param factory a function pointer that will be copied and
michael@0: * called later when the given ID is passed to createInstance()
michael@0: * @param context a context pointer that will be stored and
michael@0: * later passed to the factory function when an ID matching
michael@0: * the registration ID is being instantiated with this factory.
michael@0: */
michael@0: static void _registerFactory(const UnicodeString& id,
michael@0: Factory factory,
michael@0: Token context);
michael@0:
michael@0: /**
michael@0: * @internal
michael@0: */
michael@0: static void _registerInstance(Transliterator* adoptedObj);
michael@0:
michael@0: /**
michael@0: * @internal
michael@0: */
michael@0: static void _registerAlias(const UnicodeString& aliasID, const UnicodeString& realID);
michael@0:
michael@0: /**
michael@0: * Register two targets as being inverses of one another. For
michael@0: * example, calling registerSpecialInverse("NFC", "NFD", true) causes
michael@0: * Transliterator to form the following inverse relationships:
michael@0: *
michael@0: *
NFC => NFD michael@0: * Any-NFC => Any-NFD michael@0: * NFD => NFC michael@0: * Any-NFD => Any-NFCmichael@0: * michael@0: * (Without the special inverse registration, the inverse of NFC michael@0: * would be NFC-Any.) Note that NFD is shorthand for Any-NFD, but michael@0: * that the presence or absence of "Any-" is preserved. michael@0: * michael@0: *
The relationship is symmetrical; registering (a, b) is michael@0: * equivalent to registering (b, a). michael@0: * michael@0: *
The relevant IDs must still be registered separately as michael@0: * factories or classes. michael@0: * michael@0: *
Only the targets are specified. Special inverses always
michael@0: * have the form Any-Target1 <=> Any-Target2. The target should
michael@0: * have canonical casing (the casing desired to be produced when
michael@0: * an inverse is formed) and should contain no whitespace or other
michael@0: * extraneous characters.
michael@0: *
michael@0: * @param target the target against which to register the inverse
michael@0: * @param inverseTarget the inverse of target, that is
michael@0: * Any-target.getInverse() => Any-inverseTarget
michael@0: * @param bidirectional if true, register the reverse relation
michael@0: * as well, that is, Any-inverseTarget.getInverse() => Any-target
michael@0: * @internal
michael@0: */
michael@0: static void _registerSpecialInverse(const UnicodeString& target,
michael@0: const UnicodeString& inverseTarget,
michael@0: UBool bidirectional);
michael@0: #endif /* U_HIDE_INTERNAL_API */
michael@0:
michael@0: public:
michael@0:
michael@0: /**
michael@0: * Unregisters a transliterator or class. This may be either
michael@0: * a system transliterator or a user transliterator or class.
michael@0: * Any attempt to construct an unregistered transliterator based
michael@0: * on its ID will fail.
michael@0: *
michael@0: * @param ID the ID of the transliterator or class
michael@0: * @return the Object
that was registered with
michael@0: * ID
, or null
if none was
michael@0: * @see #registerInstance
michael@0: * @see #registerFactory
michael@0: * @stable ICU 2.0
michael@0: */
michael@0: static void U_EXPORT2 unregister(const UnicodeString& ID);
michael@0:
michael@0: public:
michael@0:
michael@0: /**
michael@0: * Return a StringEnumeration over the IDs available at the time of the
michael@0: * call, including user-registered IDs.
michael@0: * @param ec input-output error code
michael@0: * @return a newly-created StringEnumeration over the transliterators
michael@0: * available at the time of the call. The caller should delete this object
michael@0: * when done using it.
michael@0: * @stable ICU 3.0
michael@0: */
michael@0: static StringEnumeration* U_EXPORT2 getAvailableIDs(UErrorCode& ec);
michael@0:
michael@0: /**
michael@0: * Return the number of registered source specifiers.
michael@0: * @return the number of registered source specifiers.
michael@0: * @stable ICU 2.0
michael@0: */
michael@0: static int32_t U_EXPORT2 countAvailableSources(void);
michael@0:
michael@0: /**
michael@0: * Return a registered source specifier.
michael@0: * @param index which specifier to return, from 0 to n-1, where
michael@0: * n = countAvailableSources()
michael@0: * @param result fill-in paramter to receive the source specifier.
michael@0: * If index is out of range, result will be empty.
michael@0: * @return reference to result
michael@0: * @stable ICU 2.0
michael@0: */
michael@0: static UnicodeString& U_EXPORT2 getAvailableSource(int32_t index,
michael@0: UnicodeString& result);
michael@0:
michael@0: /**
michael@0: * Return the number of registered target specifiers for a given
michael@0: * source specifier.
michael@0: * @param source the given source specifier.
michael@0: * @return the number of registered target specifiers for a given
michael@0: * source specifier.
michael@0: * @stable ICU 2.0
michael@0: */
michael@0: static int32_t U_EXPORT2 countAvailableTargets(const UnicodeString& source);
michael@0:
michael@0: /**
michael@0: * Return a registered target specifier for a given source.
michael@0: * @param index which specifier to return, from 0 to n-1, where
michael@0: * n = countAvailableTargets(source)
michael@0: * @param source the source specifier
michael@0: * @param result fill-in paramter to receive the target specifier.
michael@0: * If source is invalid or if index is out of range, result will
michael@0: * be empty.
michael@0: * @return reference to result
michael@0: * @stable ICU 2.0
michael@0: */
michael@0: static UnicodeString& U_EXPORT2 getAvailableTarget(int32_t index,
michael@0: const UnicodeString& source,
michael@0: UnicodeString& result);
michael@0:
michael@0: /**
michael@0: * Return the number of registered variant specifiers for a given
michael@0: * source-target pair.
michael@0: * @param source the source specifiers.
michael@0: * @param target the target specifiers.
michael@0: * @stable ICU 2.0
michael@0: */
michael@0: static int32_t U_EXPORT2 countAvailableVariants(const UnicodeString& source,
michael@0: const UnicodeString& target);
michael@0:
michael@0: /**
michael@0: * Return a registered variant specifier for a given source-target
michael@0: * pair.
michael@0: * @param index which specifier to return, from 0 to n-1, where
michael@0: * n = countAvailableVariants(source, target)
michael@0: * @param source the source specifier
michael@0: * @param target the target specifier
michael@0: * @param result fill-in paramter to receive the variant
michael@0: * specifier. If source is invalid or if target is invalid or if
michael@0: * index is out of range, result will be empty.
michael@0: * @return reference to result
michael@0: * @stable ICU 2.0
michael@0: */
michael@0: static UnicodeString& U_EXPORT2 getAvailableVariant(int32_t index,
michael@0: const UnicodeString& source,
michael@0: const UnicodeString& target,
michael@0: UnicodeString& result);
michael@0:
michael@0: protected:
michael@0:
michael@0: #ifndef U_HIDE_INTERNAL_API
michael@0: /**
michael@0: * Non-mutexed internal method
michael@0: * @internal
michael@0: */
michael@0: static int32_t _countAvailableSources(void);
michael@0:
michael@0: /**
michael@0: * Non-mutexed internal method
michael@0: * @internal
michael@0: */
michael@0: static UnicodeString& _getAvailableSource(int32_t index,
michael@0: UnicodeString& result);
michael@0:
michael@0: /**
michael@0: * Non-mutexed internal method
michael@0: * @internal
michael@0: */
michael@0: static int32_t _countAvailableTargets(const UnicodeString& source);
michael@0:
michael@0: /**
michael@0: * Non-mutexed internal method
michael@0: * @internal
michael@0: */
michael@0: static UnicodeString& _getAvailableTarget(int32_t index,
michael@0: const UnicodeString& source,
michael@0: UnicodeString& result);
michael@0:
michael@0: /**
michael@0: * Non-mutexed internal method
michael@0: * @internal
michael@0: */
michael@0: static int32_t _countAvailableVariants(const UnicodeString& source,
michael@0: const UnicodeString& target);
michael@0:
michael@0: /**
michael@0: * Non-mutexed internal method
michael@0: * @internal
michael@0: */
michael@0: static UnicodeString& _getAvailableVariant(int32_t index,
michael@0: const UnicodeString& source,
michael@0: const UnicodeString& target,
michael@0: UnicodeString& result);
michael@0: #endif /* U_HIDE_INTERNAL_API */
michael@0:
michael@0: protected:
michael@0:
michael@0: /**
michael@0: * Set the ID of this transliterators. Subclasses shouldn't do
michael@0: * this, unless the underlying script behavior has changed.
michael@0: * @param id the new id t to be set.
michael@0: * @stable ICU 2.4
michael@0: */
michael@0: void setID(const UnicodeString& id);
michael@0:
michael@0: public:
michael@0:
michael@0: /**
michael@0: * Return the class ID for this class. This is useful only for
michael@0: * comparing to a return value from getDynamicClassID().
michael@0: * Note that Transliterator is an abstract base class, and therefor
michael@0: * no fully constructed object will have a dynamic
michael@0: * UCLassID that equals the UClassID returned from
michael@0: * TRansliterator::getStaticClassID().
michael@0: * @return The class ID for class Transliterator.
michael@0: * @stable ICU 2.0
michael@0: */
michael@0: static UClassID U_EXPORT2 getStaticClassID(void);
michael@0:
michael@0: /**
michael@0: * Returns a unique class ID polymorphically. This method
michael@0: * is to implement a simple version of RTTI, since not all C++
michael@0: * compilers support genuine RTTI. Polymorphic operator==() and
michael@0: * clone() methods call this method.
michael@0: *
michael@0: *
Concrete subclasses of Transliterator must use the michael@0: * UOBJECT_DEFINE_RTTI_IMPLEMENTATION macro from michael@0: * uobject.h to provide the RTTI functions. michael@0: * michael@0: * @return The class ID for this object. All objects of a given michael@0: * class have the same class ID. Objects of other classes have michael@0: * different class IDs. michael@0: * @stable ICU 2.0 michael@0: */ michael@0: virtual UClassID getDynamicClassID(void) const = 0; michael@0: michael@0: private: michael@0: static UBool initializeRegistry(UErrorCode &status); michael@0: michael@0: public: michael@0: #ifndef U_HIDE_OBSOLETE_API michael@0: /** michael@0: * Return the number of IDs currently registered with the system. michael@0: * To retrieve the actual IDs, call getAvailableID(i) with michael@0: * i from 0 to countAvailableIDs() - 1. michael@0: * @return the number of IDs currently registered with the system. michael@0: * @obsolete ICU 3.4 use getAvailableIDs() instead michael@0: */ michael@0: static int32_t U_EXPORT2 countAvailableIDs(void); michael@0: michael@0: /** michael@0: * Return the index-th available ID. index must be between 0 michael@0: * and countAvailableIDs() - 1, inclusive. If index is out of michael@0: * range, the result of getAvailableID(0) is returned. michael@0: * @param index the given ID index. michael@0: * @return the index-th available ID. index must be between 0 michael@0: * and countAvailableIDs() - 1, inclusive. If index is out of michael@0: * range, the result of getAvailableID(0) is returned. michael@0: * @obsolete ICU 3.4 use getAvailableIDs() instead; this function michael@0: * is not thread safe, since it returns a reference to storage that michael@0: * may become invalid if another thread calls unregister michael@0: */ michael@0: static const UnicodeString& U_EXPORT2 getAvailableID(int32_t index); michael@0: #endif /* U_HIDE_OBSOLETE_API */ michael@0: }; michael@0: michael@0: inline int32_t Transliterator::getMaximumContextLength(void) const { michael@0: return maximumContextLength; michael@0: } michael@0: michael@0: inline void Transliterator::setID(const UnicodeString& id) { michael@0: ID = id; michael@0: // NUL-terminate the ID string, which is a non-aliased copy. michael@0: ID.append((UChar)0); michael@0: ID.truncate(ID.length()-1); michael@0: } michael@0: michael@0: #ifndef U_HIDE_INTERNAL_API michael@0: inline Transliterator::Token Transliterator::integerToken(int32_t i) { michael@0: Token t; michael@0: t.integer = i; michael@0: return t; michael@0: } michael@0: michael@0: inline Transliterator::Token Transliterator::pointerToken(void* p) { michael@0: Token t; michael@0: t.pointer = p; michael@0: return t; michael@0: } michael@0: #endif /* U_HIDE_INTERNAL_API */ michael@0: michael@0: U_NAMESPACE_END michael@0: michael@0: #endif /* #if !UCONFIG_NO_TRANSLITERATION */ michael@0: michael@0: #endif