intl/icu/source/i18n/unicode/translit.h

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rw-r--r--

Correct previous dual key logic pending first delivery installment.

     1 /*
     2 **********************************************************************
     3 * Copyright (C) 1999-2013, International Business Machines
     4 * Corporation and others. All Rights Reserved.
     5 **********************************************************************
     6 *   Date        Name        Description
     7 *   11/17/99    aliu        Creation.
     8 **********************************************************************
     9 */
    10 #ifndef TRANSLIT_H
    11 #define TRANSLIT_H
    13 #include "unicode/utypes.h"
    15 /**
    16  * \file 
    17  * \brief C++ API: Tranforms text from one format to another.
    18  */
    20 #if !UCONFIG_NO_TRANSLITERATION
    22 #include "unicode/uobject.h"
    23 #include "unicode/unistr.h"
    24 #include "unicode/parseerr.h"
    25 #include "unicode/utrans.h" // UTransPosition, UTransDirection
    26 #include "unicode/strenum.h"
    28 U_NAMESPACE_BEGIN
    30 class UnicodeFilter;
    31 class UnicodeSet;
    32 class CompoundTransliterator;
    33 class TransliteratorParser;
    34 class NormalizationTransliterator;
    35 class TransliteratorIDParser;
    37 /**
    38  *
    39  * <code>Transliterator</code> is an abstract class that
    40  * transliterates text from one format to another.  The most common
    41  * kind of transliterator is a script, or alphabet, transliterator.
    42  * For example, a Russian to Latin transliterator changes Russian text
    43  * written in Cyrillic characters to phonetically equivalent Latin
    44  * characters.  It does not <em>translate</em> Russian to English!
    45  * Transliteration, unlike translation, operates on characters, without
    46  * reference to the meanings of words and sentences.
    47  *
    48  * <p>Although script conversion is its most common use, a
    49  * transliterator can actually perform a more general class of tasks.
    50  * In fact, <code>Transliterator</code> defines a very general API
    51  * which specifies only that a segment of the input text is replaced
    52  * by new text.  The particulars of this conversion are determined
    53  * entirely by subclasses of <code>Transliterator</code>.
    54  *
    55  * <p><b>Transliterators are stateless</b>
    56  *
    57  * <p><code>Transliterator</code> objects are <em>stateless</em>; they
    58  * retain no information between calls to
    59  * <code>transliterate()</code>.  (However, this does <em>not</em>
    60  * mean that threads may share transliterators without synchronizing
    61  * them.  Transliterators are not immutable, so they must be
    62  * synchronized when shared between threads.)  This might seem to
    63  * limit the complexity of the transliteration operation.  In
    64  * practice, subclasses perform complex transliterations by delaying
    65  * the replacement of text until it is known that no other
    66  * replacements are possible.  In other words, although the
    67  * <code>Transliterator</code> objects are stateless, the source text
    68  * itself embodies all the needed information, and delayed operation
    69  * allows arbitrary complexity.
    70  *
    71  * <p><b>Batch transliteration</b>
    72  *
    73  * <p>The simplest way to perform transliteration is all at once, on a
    74  * string of existing text.  This is referred to as <em>batch</em>
    75  * transliteration.  For example, given a string <code>input</code>
    76  * and a transliterator <code>t</code>, the call
    77  *
    78  * \htmlonly<blockquote>\endhtmlonly<code>String result = t.transliterate(input);
    79  * </code>\htmlonly</blockquote>\endhtmlonly
    80  *
    81  * will transliterate it and return the result.  Other methods allow
    82  * the client to specify a substring to be transliterated and to use
    83  * {@link Replaceable } objects instead of strings, in order to
    84  * preserve out-of-band information (such as text styles).
    85  *
    86  * <p><b>Keyboard transliteration</b>
    87  *
    88  * <p>Somewhat more involved is <em>keyboard</em>, or incremental
    89  * transliteration.  This is the transliteration of text that is
    90  * arriving from some source (typically the user's keyboard) one
    91  * character at a time, or in some other piecemeal fashion.
    92  *
    93  * <p>In keyboard transliteration, a <code>Replaceable</code> buffer
    94  * stores the text.  As text is inserted, as much as possible is
    95  * transliterated on the fly.  This means a GUI that displays the
    96  * contents of the buffer may show text being modified as each new
    97  * character arrives.
    98  *
    99  * <p>Consider the simple <code>RuleBasedTransliterator</code>:
   100  *
   101  * \htmlonly<blockquote>\endhtmlonly<code>
   102  * th&gt;{theta}<br>
   103  * t&gt;{tau}
   104  * </code>\htmlonly</blockquote>\endhtmlonly
   105  *
   106  * When the user types 't', nothing will happen, since the
   107  * transliterator is waiting to see if the next character is 'h'.  To
   108  * remedy this, we introduce the notion of a cursor, marked by a '|'
   109  * in the output string:
   110  *
   111  * \htmlonly<blockquote>\endhtmlonly<code>
   112  * t&gt;|{tau}<br>
   113  * {tau}h&gt;{theta}
   114  * </code>\htmlonly</blockquote>\endhtmlonly
   115  *
   116  * Now when the user types 't', tau appears, and if the next character
   117  * is 'h', the tau changes to a theta.  This is accomplished by
   118  * maintaining a cursor position (independent of the insertion point,
   119  * and invisible in the GUI) across calls to
   120  * <code>transliterate()</code>.  Typically, the cursor will
   121  * be coincident with the insertion point, but in a case like the one
   122  * above, it will precede the insertion point.
   123  *
   124  * <p>Keyboard transliteration methods maintain a set of three indices
   125  * that are updated with each call to
   126  * <code>transliterate()</code>, including the cursor, start,
   127  * and limit.  Since these indices are changed by the method, they are
   128  * passed in an <code>int[]</code> array. The <code>START</code> index
   129  * marks the beginning of the substring that the transliterator will
   130  * look at.  It is advanced as text becomes committed (but it is not
   131  * the committed index; that's the <code>CURSOR</code>).  The
   132  * <code>CURSOR</code> index, described above, marks the point at
   133  * which the transliterator last stopped, either because it reached
   134  * the end, or because it required more characters to disambiguate
   135  * between possible inputs.  The <code>CURSOR</code> can also be
   136  * explicitly set by rules in a <code>RuleBasedTransliterator</code>.
   137  * Any characters before the <code>CURSOR</code> index are frozen;
   138  * future keyboard transliteration calls within this input sequence
   139  * will not change them.  New text is inserted at the
   140  * <code>LIMIT</code> index, which marks the end of the substring that
   141  * the transliterator looks at.
   142  *
   143  * <p>Because keyboard transliteration assumes that more characters
   144  * are to arrive, it is conservative in its operation.  It only
   145  * transliterates when it can do so unambiguously.  Otherwise it waits
   146  * for more characters to arrive.  When the client code knows that no
   147  * more characters are forthcoming, perhaps because the user has
   148  * performed some input termination operation, then it should call
   149  * <code>finishTransliteration()</code> to complete any
   150  * pending transliterations.
   151  *
   152  * <p><b>Inverses</b>
   153  *
   154  * <p>Pairs of transliterators may be inverses of one another.  For
   155  * example, if transliterator <b>A</b> transliterates characters by
   156  * incrementing their Unicode value (so "abc" -> "def"), and
   157  * transliterator <b>B</b> decrements character values, then <b>A</b>
   158  * is an inverse of <b>B</b> and vice versa.  If we compose <b>A</b>
   159  * with <b>B</b> in a compound transliterator, the result is the
   160  * indentity transliterator, that is, a transliterator that does not
   161  * change its input text.
   162  *
   163  * The <code>Transliterator</code> method <code>getInverse()</code>
   164  * returns a transliterator's inverse, if one exists, or
   165  * <code>null</code> otherwise.  However, the result of
   166  * <code>getInverse()</code> usually will <em>not</em> be a true
   167  * mathematical inverse.  This is because true inverse transliterators
   168  * are difficult to formulate.  For example, consider two
   169  * transliterators: <b>AB</b>, which transliterates the character 'A'
   170  * to 'B', and <b>BA</b>, which transliterates 'B' to 'A'.  It might
   171  * seem that these are exact inverses, since
   172  *
   173  * \htmlonly<blockquote>\endhtmlonly"A" x <b>AB</b> -> "B"<br>
   174  * "B" x <b>BA</b> -> "A"\htmlonly</blockquote>\endhtmlonly
   175  *
   176  * where 'x' represents transliteration.  However,
   177  *
   178  * \htmlonly<blockquote>\endhtmlonly"ABCD" x <b>AB</b> -> "BBCD"<br>
   179  * "BBCD" x <b>BA</b> -> "AACD"\htmlonly</blockquote>\endhtmlonly
   180  *
   181  * so <b>AB</b> composed with <b>BA</b> is not the
   182  * identity. Nonetheless, <b>BA</b> may be usefully considered to be
   183  * <b>AB</b>'s inverse, and it is on this basis that
   184  * <b>AB</b><code>.getInverse()</code> could legitimately return
   185  * <b>BA</b>.
   186  *
   187  * <p><b>IDs and display names</b>
   188  *
   189  * <p>A transliterator is designated by a short identifier string or
   190  * <em>ID</em>.  IDs follow the format <em>source-destination</em>,
   191  * where <em>source</em> describes the entity being replaced, and
   192  * <em>destination</em> describes the entity replacing
   193  * <em>source</em>.  The entities may be the names of scripts,
   194  * particular sequences of characters, or whatever else it is that the
   195  * transliterator converts to or from.  For example, a transliterator
   196  * from Russian to Latin might be named "Russian-Latin".  A
   197  * transliterator from keyboard escape sequences to Latin-1 characters
   198  * might be named "KeyboardEscape-Latin1".  By convention, system
   199  * entity names are in English, with the initial letters of words
   200  * capitalized; user entity names may follow any format so long as
   201  * they do not contain dashes.
   202  *
   203  * <p>In addition to programmatic IDs, transliterator objects have
   204  * display names for presentation in user interfaces, returned by
   205  * {@link #getDisplayName }.
   206  *
   207  * <p><b>Factory methods and registration</b>
   208  *
   209  * <p>In general, client code should use the factory method
   210  * {@link #createInstance } to obtain an instance of a
   211  * transliterator given its ID.  Valid IDs may be enumerated using
   212  * <code>getAvailableIDs()</code>.  Since transliterators are mutable,
   213  * multiple calls to {@link #createInstance } with the same ID will
   214  * return distinct objects.
   215  *
   216  * <p>In addition to the system transliterators registered at startup,
   217  * user transliterators may be registered by calling
   218  * <code>registerInstance()</code> at run time.  A registered instance
   219  * acts a template; future calls to {@link #createInstance } with the ID
   220  * of the registered object return clones of that object.  Thus any
   221  * object passed to <tt>registerInstance()</tt> must implement
   222  * <tt>clone()</tt> propertly.  To register a transliterator subclass
   223  * without instantiating it (until it is needed), users may call
   224  * {@link #registerFactory }.  In this case, the objects are
   225  * instantiated by invoking the zero-argument public constructor of
   226  * the class.
   227  *
   228  * <p><b>Subclassing</b>
   229  *
   230  * Subclasses must implement the abstract method
   231  * <code>handleTransliterate()</code>.  <p>Subclasses should override
   232  * the <code>transliterate()</code> method taking a
   233  * <code>Replaceable</code> and the <code>transliterate()</code>
   234  * method taking a <code>String</code> and <code>StringBuffer</code>
   235  * if the performance of these methods can be improved over the
   236  * performance obtained by the default implementations in this class.
   237  *
   238  * @author Alan Liu
   239  * @stable ICU 2.0
   240  */
   241 class U_I18N_API Transliterator : public UObject {
   243 private:
   245     /**
   246      * Programmatic name, e.g., "Latin-Arabic".
   247      */
   248     UnicodeString ID;
   250     /**
   251      * This transliterator's filter.  Any character for which
   252      * <tt>filter.contains()</tt> returns <tt>false</tt> will not be
   253      * altered by this transliterator.  If <tt>filter</tt> is
   254      * <tt>null</tt> then no filtering is applied.
   255      */
   256     UnicodeFilter* filter;
   258     int32_t maximumContextLength;
   260  public:
   262     /**
   263      * A context integer or pointer for a factory function, passed by
   264      * value.
   265      * @stable ICU 2.4
   266      */
   267     union Token {
   268         /**
   269          * This token, interpreted as a 32-bit integer.
   270          * @stable ICU 2.4
   271          */
   272         int32_t integer;
   273         /**
   274          * This token, interpreted as a native pointer.
   275          * @stable ICU 2.4
   276          */
   277         void*   pointer;
   278     };
   280 #ifndef U_HIDE_INTERNAL_API
   281     /**
   282      * Return a token containing an integer.
   283      * @return a token containing an integer.
   284      * @internal
   285      */
   286     inline static Token integerToken(int32_t);
   288     /**
   289      * Return a token containing a pointer.
   290      * @return a token containing a pointer.
   291      * @internal
   292      */
   293     inline static Token pointerToken(void*);
   294 #endif  /* U_HIDE_INTERNAL_API */
   296     /**
   297      * A function that creates and returns a Transliterator.  When
   298      * invoked, it will be passed the ID string that is being
   299      * instantiated, together with the context pointer that was passed
   300      * in when the factory function was first registered.  Many
   301      * factory functions will ignore both parameters, however,
   302      * functions that are registered to more than one ID may use the
   303      * ID or the context parameter to parameterize the transliterator
   304      * they create.
   305      * @param ID      the string identifier for this transliterator
   306      * @param context a context pointer that will be stored and
   307      *                later passed to the factory function when an ID matching
   308      *                the registration ID is being instantiated with this factory.
   309      * @stable ICU 2.4
   310      */
   311     typedef Transliterator* (U_EXPORT2 *Factory)(const UnicodeString& ID, Token context);
   313 protected:
   315     /**
   316      * Default constructor.
   317      * @param ID the string identifier for this transliterator
   318      * @param adoptedFilter the filter.  Any character for which
   319      * <tt>filter.contains()</tt> returns <tt>false</tt> will not be
   320      * altered by this transliterator.  If <tt>filter</tt> is
   321      * <tt>null</tt> then no filtering is applied.
   322      * @stable ICU 2.4
   323      */
   324     Transliterator(const UnicodeString& ID, UnicodeFilter* adoptedFilter);
   326     /**
   327      * Copy constructor.
   328      * @stable ICU 2.4
   329      */
   330     Transliterator(const Transliterator&);
   332     /**
   333      * Assignment operator.
   334      * @stable ICU 2.4
   335      */
   336     Transliterator& operator=(const Transliterator&);
   338     /**
   339      * Create a transliterator from a basic ID.  This is an ID
   340      * containing only the forward direction source, target, and
   341      * variant.
   342      * @param id a basic ID of the form S-T or S-T/V.
   343      * @param canon canonical ID to assign to the object, or
   344      * NULL to leave the ID unchanged
   345      * @return a newly created Transliterator or null if the ID is
   346      * invalid.
   347      * @stable ICU 2.4
   348      */
   349     static Transliterator* createBasicInstance(const UnicodeString& id,
   350                                                const UnicodeString* canon);
   352     friend class TransliteratorParser; // for parseID()
   353     friend class TransliteratorIDParser; // for createBasicInstance()
   354     friend class TransliteratorAlias; // for setID()
   356 public:
   358     /**
   359      * Destructor.
   360      * @stable ICU 2.0
   361      */
   362     virtual ~Transliterator();
   364     /**
   365      * Implements Cloneable.
   366      * All subclasses are encouraged to implement this method if it is
   367      * possible and reasonable to do so.  Subclasses that are to be
   368      * registered with the system using <tt>registerInstance()</tt>
   369      * are required to implement this method.  If a subclass does not
   370      * implement clone() properly and is registered with the system
   371      * using registerInstance(), then the default clone() implementation
   372      * will return null, and calls to createInstance() will fail.
   373      *
   374      * @return a copy of the object.
   375      * @see #registerInstance
   376      * @stable ICU 2.0
   377      */
   378     virtual Transliterator* clone() const;
   380     /**
   381      * Transliterates a segment of a string, with optional filtering.
   382      *
   383      * @param text the string to be transliterated
   384      * @param start the beginning index, inclusive; <code>0 <= start
   385      * <= limit</code>.
   386      * @param limit the ending index, exclusive; <code>start <= limit
   387      * <= text.length()</code>.
   388      * @return The new limit index.  The text previously occupying <code>[start,
   389      * limit)</code> has been transliterated, possibly to a string of a different
   390      * length, at <code>[start, </code><em>new-limit</em><code>)</code>, where
   391      * <em>new-limit</em> is the return value. If the input offsets are out of bounds,
   392      * the returned value is -1 and the input string remains unchanged.
   393      * @stable ICU 2.0
   394      */
   395     virtual int32_t transliterate(Replaceable& text,
   396                                   int32_t start, int32_t limit) const;
   398     /**
   399      * Transliterates an entire string in place. Convenience method.
   400      * @param text the string to be transliterated
   401      * @stable ICU 2.0
   402      */
   403     virtual void transliterate(Replaceable& text) const;
   405     /**
   406      * Transliterates the portion of the text buffer that can be
   407      * transliterated unambiguosly after new text has been inserted,
   408      * typically as a result of a keyboard event.  The new text in
   409      * <code>insertion</code> will be inserted into <code>text</code>
   410      * at <code>index.limit</code>, advancing
   411      * <code>index.limit</code> by <code>insertion.length()</code>.
   412      * Then the transliterator will try to transliterate characters of
   413      * <code>text</code> between <code>index.cursor</code> and
   414      * <code>index.limit</code>.  Characters before
   415      * <code>index.cursor</code> will not be changed.
   416      *
   417      * <p>Upon return, values in <code>index</code> will be updated.
   418      * <code>index.start</code> will be advanced to the first
   419      * character that future calls to this method will read.
   420      * <code>index.cursor</code> and <code>index.limit</code> will
   421      * be adjusted to delimit the range of text that future calls to
   422      * this method may change.
   423      *
   424      * <p>Typical usage of this method begins with an initial call
   425      * with <code>index.start</code> and <code>index.limit</code>
   426      * set to indicate the portion of <code>text</code> to be
   427      * transliterated, and <code>index.cursor == index.start</code>.
   428      * Thereafter, <code>index</code> can be used without
   429      * modification in future calls, provided that all changes to
   430      * <code>text</code> are made via this method.
   431      *
   432      * <p>This method assumes that future calls may be made that will
   433      * insert new text into the buffer.  As a result, it only performs
   434      * unambiguous transliterations.  After the last call to this
   435      * method, there may be untransliterated text that is waiting for
   436      * more input to resolve an ambiguity.  In order to perform these
   437      * pending transliterations, clients should call {@link
   438      * #finishTransliteration } after the last call to this
   439      * method has been made.
   440      *
   441      * @param text the buffer holding transliterated and untransliterated text
   442      * @param index an array of three integers.
   443      *
   444      * <ul><li><code>index.start</code>: the beginning index,
   445      * inclusive; <code>0 <= index.start <= index.limit</code>.
   446      *
   447      * <li><code>index.limit</code>: the ending index, exclusive;
   448      * <code>index.start <= index.limit <= text.length()</code>.
   449      * <code>insertion</code> is inserted at
   450      * <code>index.limit</code>.
   451      *
   452      * <li><code>index.cursor</code>: the next character to be
   453      * considered for transliteration; <code>index.start <=
   454      * index.cursor <= index.limit</code>.  Characters before
   455      * <code>index.cursor</code> will not be changed by future calls
   456      * to this method.</ul>
   457      *
   458      * @param insertion text to be inserted and possibly
   459      * transliterated into the translation buffer at
   460      * <code>index.limit</code>.  If <code>null</code> then no text
   461      * is inserted.
   462      * @param status    Output param to filled in with a success or an error.
   463      * @see #handleTransliterate
   464      * @exception IllegalArgumentException if <code>index</code>
   465      * is invalid
   466      * @see UTransPosition
   467      * @stable ICU 2.0
   468      */
   469     virtual void transliterate(Replaceable& text, UTransPosition& index,
   470                                const UnicodeString& insertion,
   471                                UErrorCode& status) const;
   473     /**
   474      * Transliterates the portion of the text buffer that can be
   475      * transliterated unambiguosly after a new character has been
   476      * inserted, typically as a result of a keyboard event.  This is a
   477      * convenience method.
   478      * @param text the buffer holding transliterated and
   479      * untransliterated text
   480      * @param index an array of three integers.
   481      * @param insertion text to be inserted and possibly
   482      * transliterated into the translation buffer at
   483      * <code>index.limit</code>.
   484      * @param status    Output param to filled in with a success or an error.
   485      * @see #transliterate(Replaceable&, UTransPosition&, const UnicodeString&, UErrorCode&) const
   486      * @stable ICU 2.0
   487      */
   488     virtual void transliterate(Replaceable& text, UTransPosition& index,
   489                                UChar32 insertion,
   490                                UErrorCode& status) const;
   492     /**
   493      * Transliterates the portion of the text buffer that can be
   494      * transliterated unambiguosly.  This is a convenience method; see
   495      * {@link
   496      * #transliterate(Replaceable&, UTransPosition&, const UnicodeString&, UErrorCode&) const }
   497      * for details.
   498      * @param text the buffer holding transliterated and
   499      * untransliterated text
   500      * @param index an array of three integers.  See {@link #transliterate(Replaceable&, UTransPosition&, const UnicodeString*, UErrorCode&) const }.
   501      * @param status    Output param to filled in with a success or an error.
   502      * @see #transliterate(Replaceable, int[], String)
   503      * @stable ICU 2.0
   504      */
   505     virtual void transliterate(Replaceable& text, UTransPosition& index,
   506                                UErrorCode& status) const;
   508     /**
   509      * Finishes any pending transliterations that were waiting for
   510      * more characters.  Clients should call this method as the last
   511      * call after a sequence of one or more calls to
   512      * <code>transliterate()</code>.
   513      * @param text the buffer holding transliterated and
   514      * untransliterated text.
   515      * @param index the array of indices previously passed to {@link
   516      * #transliterate }
   517      * @stable ICU 2.0
   518      */
   519     virtual void finishTransliteration(Replaceable& text,
   520                                        UTransPosition& index) const;
   522 private:
   524     /**
   525      * This internal method does incremental transliteration.  If the
   526      * 'insertion' is non-null then we append it to 'text' before
   527      * proceeding.  This method calls through to the pure virtual
   528      * framework method handleTransliterate() to do the actual
   529      * work.
   530      * @param text the buffer holding transliterated and
   531      * untransliterated text
   532      * @param index an array of three integers.  See {@link
   533      * #transliterate(Replaceable, int[], String)}.
   534      * @param insertion text to be inserted and possibly
   535      * transliterated into the translation buffer at
   536      * <code>index.limit</code>.
   537      * @param status    Output param to filled in with a success or an error.
   538      */
   539     void _transliterate(Replaceable& text,
   540                         UTransPosition& index,
   541                         const UnicodeString* insertion,
   542                         UErrorCode &status) const;
   544 protected:
   546     /**
   547      * Abstract method that concrete subclasses define to implement
   548      * their transliteration algorithm.  This method handles both
   549      * incremental and non-incremental transliteration.  Let
   550      * <code>originalStart</code> refer to the value of
   551      * <code>pos.start</code> upon entry.
   552      *
   553      * <ul>
   554      *  <li>If <code>incremental</code> is false, then this method
   555      *  should transliterate all characters between
   556      *  <code>pos.start</code> and <code>pos.limit</code>. Upon return
   557      *  <code>pos.start</code> must == <code> pos.limit</code>.</li>
   558      *
   559      *  <li>If <code>incremental</code> is true, then this method
   560      *  should transliterate all characters between
   561      *  <code>pos.start</code> and <code>pos.limit</code> that can be
   562      *  unambiguously transliterated, regardless of future insertions
   563      *  of text at <code>pos.limit</code>.  Upon return,
   564      *  <code>pos.start</code> should be in the range
   565      *  [<code>originalStart</code>, <code>pos.limit</code>).
   566      *  <code>pos.start</code> should be positioned such that
   567      *  characters [<code>originalStart</code>, <code>
   568      *  pos.start</code>) will not be changed in the future by this
   569      *  transliterator and characters [<code>pos.start</code>,
   570      *  <code>pos.limit</code>) are unchanged.</li>
   571      * </ul>
   572      *
   573      * <p>Implementations of this method should also obey the
   574      * following invariants:</p>
   575      *
   576      * <ul>
   577      *  <li> <code>pos.limit</code> and <code>pos.contextLimit</code>
   578      *  should be updated to reflect changes in length of the text
   579      *  between <code>pos.start</code> and <code>pos.limit</code>. The
   580      *  difference <code> pos.contextLimit - pos.limit</code> should
   581      *  not change.</li>
   582      *
   583      *  <li><code>pos.contextStart</code> should not change.</li>
   584      *
   585      *  <li>Upon return, neither <code>pos.start</code> nor
   586      *  <code>pos.limit</code> should be less than
   587      *  <code>originalStart</code>.</li>
   588      *
   589      *  <li>Text before <code>originalStart</code> and text after
   590      *  <code>pos.limit</code> should not change.</li>
   591      *
   592      *  <li>Text before <code>pos.contextStart</code> and text after
   593      *  <code> pos.contextLimit</code> should be ignored.</li>
   594      * </ul>
   595      *
   596      * <p>Subclasses may safely assume that all characters in
   597      * [<code>pos.start</code>, <code>pos.limit</code>) are filtered.
   598      * In other words, the filter has already been applied by the time
   599      * this method is called.  See
   600      * <code>filteredTransliterate()</code>.
   601      *
   602      * <p>This method is <b>not</b> for public consumption.  Calling
   603      * this method directly will transliterate
   604      * [<code>pos.start</code>, <code>pos.limit</code>) without
   605      * applying the filter. End user code should call <code>
   606      * transliterate()</code> instead of this method. Subclass code
   607      * and wrapping transliterators should call
   608      * <code>filteredTransliterate()</code> instead of this method.<p>
   609      *
   610      * @param text the buffer holding transliterated and
   611      * untransliterated text
   612      *
   613      * @param pos the indices indicating the start, limit, context
   614      * start, and context limit of the text.
   615      *
   616      * @param incremental if true, assume more text may be inserted at
   617      * <code>pos.limit</code> and act accordingly.  Otherwise,
   618      * transliterate all text between <code>pos.start</code> and
   619      * <code>pos.limit</code> and move <code>pos.start</code> up to
   620      * <code>pos.limit</code>.
   621      *
   622      * @see #transliterate
   623      * @stable ICU 2.4
   624      */
   625     virtual void handleTransliterate(Replaceable& text,
   626                                      UTransPosition& pos,
   627                                      UBool incremental) const = 0;
   629 public:
   630     /**
   631      * Transliterate a substring of text, as specified by index, taking filters
   632      * into account.  This method is for subclasses that need to delegate to
   633      * another transliterator, such as CompoundTransliterator.
   634      * @param text the text to be transliterated
   635      * @param index the position indices
   636      * @param incremental if TRUE, then assume more characters may be inserted
   637      * at index.limit, and postpone processing to accomodate future incoming
   638      * characters
   639      * @stable ICU 2.4
   640      */
   641     virtual void filteredTransliterate(Replaceable& text,
   642                                        UTransPosition& index,
   643                                        UBool incremental) const;
   645 private:
   647     /**
   648      * Top-level transliteration method, handling filtering, incremental and
   649      * non-incremental transliteration, and rollback.  All transliteration
   650      * public API methods eventually call this method with a rollback argument
   651      * of TRUE.  Other entities may call this method but rollback should be
   652      * FALSE.
   653      *
   654      * <p>If this transliterator has a filter, break up the input text into runs
   655      * of unfiltered characters.  Pass each run to
   656      * subclass.handleTransliterate().
   657      *
   658      * <p>In incremental mode, if rollback is TRUE, perform a special
   659      * incremental procedure in which several passes are made over the input
   660      * text, adding one character at a time, and committing successful
   661      * transliterations as they occur.  Unsuccessful transliterations are rolled
   662      * back and retried with additional characters to give correct results.
   663      *
   664      * @param text the text to be transliterated
   665      * @param index the position indices
   666      * @param incremental if TRUE, then assume more characters may be inserted
   667      * at index.limit, and postpone processing to accomodate future incoming
   668      * characters
   669      * @param rollback if TRUE and if incremental is TRUE, then perform special
   670      * incremental processing, as described above, and undo partial
   671      * transliterations where necessary.  If incremental is FALSE then this
   672      * parameter is ignored.
   673      */
   674     virtual void filteredTransliterate(Replaceable& text,
   675                                        UTransPosition& index,
   676                                        UBool incremental,
   677                                        UBool rollback) const;
   679 public:
   681     /**
   682      * Returns the length of the longest context required by this transliterator.
   683      * This is <em>preceding</em> context.  The default implementation supplied
   684      * by <code>Transliterator</code> returns zero; subclasses
   685      * that use preceding context should override this method to return the
   686      * correct value.  For example, if a transliterator translates "ddd" (where
   687      * d is any digit) to "555" when preceded by "(ddd)", then the preceding
   688      * context length is 5, the length of "(ddd)".
   689      *
   690      * @return The maximum number of preceding context characters this
   691      * transliterator needs to examine
   692      * @stable ICU 2.0
   693      */
   694     int32_t getMaximumContextLength(void) const;
   696 protected:
   698     /**
   699      * Method for subclasses to use to set the maximum context length.
   700      * @param maxContextLength the new value to be set.
   701      * @see #getMaximumContextLength
   702      * @stable ICU 2.4
   703      */
   704     void setMaximumContextLength(int32_t maxContextLength);
   706 public:
   708     /**
   709      * Returns a programmatic identifier for this transliterator.
   710      * If this identifier is passed to <code>createInstance()</code>, it
   711      * will return this object, if it has been registered.
   712      * @return a programmatic identifier for this transliterator.
   713      * @see #registerInstance
   714      * @see #registerFactory
   715      * @see #getAvailableIDs
   716      * @stable ICU 2.0
   717      */
   718     virtual const UnicodeString& getID(void) const;
   720     /**
   721      * Returns a name for this transliterator that is appropriate for
   722      * display to the user in the default locale.  See {@link
   723      * #getDisplayName } for details.
   724      * @param ID     the string identifier for this transliterator
   725      * @param result Output param to receive the display name
   726      * @return       A reference to 'result'.
   727      * @stable ICU 2.0
   728      */
   729     static UnicodeString& U_EXPORT2 getDisplayName(const UnicodeString& ID,
   730                                          UnicodeString& result);
   732     /**
   733      * Returns a name for this transliterator that is appropriate for
   734      * display to the user in the given locale.  This name is taken
   735      * from the locale resource data in the standard manner of the
   736      * <code>java.text</code> package.
   737      *
   738      * <p>If no localized names exist in the system resource bundles,
   739      * a name is synthesized using a localized
   740      * <code>MessageFormat</code> pattern from the resource data.  The
   741      * arguments to this pattern are an integer followed by one or two
   742      * strings.  The integer is the number of strings, either 1 or 2.
   743      * The strings are formed by splitting the ID for this
   744      * transliterator at the first '-'.  If there is no '-', then the
   745      * entire ID forms the only string.
   746      * @param ID       the string identifier for this transliterator
   747      * @param inLocale the Locale in which the display name should be
   748      *                 localized.
   749      * @param result   Output param to receive the display name
   750      * @return         A reference to 'result'.
   751      * @stable ICU 2.0
   752      */
   753     static UnicodeString& U_EXPORT2 getDisplayName(const UnicodeString& ID,
   754                                          const Locale& inLocale,
   755                                          UnicodeString& result);
   757     /**
   758      * Returns the filter used by this transliterator, or <tt>NULL</tt>
   759      * if this transliterator uses no filter.
   760      * @return the filter used by this transliterator, or <tt>NULL</tt>
   761      *         if this transliterator uses no filter.
   762      * @stable ICU 2.0
   763      */
   764     const UnicodeFilter* getFilter(void) const;
   766     /**
   767      * Returns the filter used by this transliterator, or <tt>NULL</tt> if this
   768      * transliterator uses no filter.  The caller must eventually delete the
   769      * result.  After this call, this transliterator's filter is set to
   770      * <tt>NULL</tt>.
   771      * @return the filter used by this transliterator, or <tt>NULL</tt> if this
   772      *         transliterator uses no filter.
   773      * @stable ICU 2.4
   774      */
   775     UnicodeFilter* orphanFilter(void);
   777     /**
   778      * Changes the filter used by this transliterator.  If the filter
   779      * is set to <tt>null</tt> then no filtering will occur.
   780      *
   781      * <p>Callers must take care if a transliterator is in use by
   782      * multiple threads.  The filter should not be changed by one
   783      * thread while another thread may be transliterating.
   784      * @param adoptedFilter the new filter to be adopted.
   785      * @stable ICU 2.0
   786      */
   787     void adoptFilter(UnicodeFilter* adoptedFilter);
   789     /**
   790      * Returns this transliterator's inverse.  See the class
   791      * documentation for details.  This implementation simply inverts
   792      * the two entities in the ID and attempts to retrieve the
   793      * resulting transliterator.  That is, if <code>getID()</code>
   794      * returns "A-B", then this method will return the result of
   795      * <code>createInstance("B-A")</code>, or <code>null</code> if that
   796      * call fails.
   797      *
   798      * <p>Subclasses with knowledge of their inverse may wish to
   799      * override this method.
   800      *
   801      * @param status Output param to filled in with a success or an error.
   802      * @return a transliterator that is an inverse, not necessarily
   803      * exact, of this transliterator, or <code>null</code> if no such
   804      * transliterator is registered.
   805      * @see #registerInstance
   806      * @stable ICU 2.0
   807      */
   808     Transliterator* createInverse(UErrorCode& status) const;
   810     /**
   811      * Returns a <code>Transliterator</code> object given its ID.
   812      * The ID must be either a system transliterator ID or a ID registered
   813      * using <code>registerInstance()</code>.
   814      *
   815      * @param ID a valid ID, as enumerated by <code>getAvailableIDs()</code>
   816      * @param dir        either FORWARD or REVERSE.
   817      * @param parseError Struct to recieve information on position
   818      *                   of error if an error is encountered
   819      * @param status     Output param to filled in with a success or an error.
   820      * @return A <code>Transliterator</code> object with the given ID
   821      * @see #registerInstance
   822      * @see #getAvailableIDs
   823      * @see #getID
   824      * @stable ICU 2.0
   825      */
   826     static Transliterator* U_EXPORT2 createInstance(const UnicodeString& ID,
   827                                           UTransDirection dir,
   828                                           UParseError& parseError,
   829                                           UErrorCode& status);
   831     /**
   832      * Returns a <code>Transliterator</code> object given its ID.
   833      * The ID must be either a system transliterator ID or a ID registered
   834      * using <code>registerInstance()</code>.
   835      * @param ID a valid ID, as enumerated by <code>getAvailableIDs()</code>
   836      * @param dir        either FORWARD or REVERSE.
   837      * @param status     Output param to filled in with a success or an error.
   838      * @return A <code>Transliterator</code> object with the given ID
   839      * @stable ICU 2.0
   840      */
   841     static Transliterator* U_EXPORT2 createInstance(const UnicodeString& ID,
   842                                           UTransDirection dir,
   843                                           UErrorCode& status);
   845     /**
   846      * Returns a <code>Transliterator</code> object constructed from
   847      * the given rule string.  This will be a RuleBasedTransliterator,
   848      * if the rule string contains only rules, or a
   849      * CompoundTransliterator, if it contains ID blocks, or a
   850      * NullTransliterator, if it contains ID blocks which parse as
   851      * empty for the given direction.
   852      * @param ID            the id for the transliterator.
   853      * @param rules         rules, separated by ';'
   854      * @param dir           either FORWARD or REVERSE.
   855      * @param parseError    Struct to recieve information on position
   856      *                      of error if an error is encountered
   857      * @param status        Output param set to success/failure code.
   858      * @stable ICU 2.0
   859      */
   860     static Transliterator* U_EXPORT2 createFromRules(const UnicodeString& ID,
   861                                            const UnicodeString& rules,
   862                                            UTransDirection dir,
   863                                            UParseError& parseError,
   864                                            UErrorCode& status);
   866     /**
   867      * Create a rule string that can be passed to createFromRules()
   868      * to recreate this transliterator.
   869      * @param result the string to receive the rules.  Previous
   870      * contents will be deleted.
   871      * @param escapeUnprintable if TRUE then convert unprintable
   872      * character to their hex escape representations, \\uxxxx or
   873      * \\Uxxxxxxxx.  Unprintable characters are those other than
   874      * U+000A, U+0020..U+007E.
   875      * @stable ICU 2.0
   876      */
   877     virtual UnicodeString& toRules(UnicodeString& result,
   878                                    UBool escapeUnprintable) const;
   880     /**
   881      * Return the number of elements that make up this transliterator.
   882      * For example, if the transliterator "NFD;Jamo-Latin;Latin-Greek"
   883      * were created, the return value of this method would be 3.
   884      *
   885      * <p>If this transliterator is not composed of other
   886      * transliterators, then this method returns 1.
   887      * @return the number of transliterators that compose this
   888      * transliterator, or 1 if this transliterator is not composed of
   889      * multiple transliterators
   890      * @stable ICU 3.0
   891      */
   892     int32_t countElements() const;
   894     /**
   895      * Return an element that makes up this transliterator.  For
   896      * example, if the transliterator "NFD;Jamo-Latin;Latin-Greek"
   897      * were created, the return value of this method would be one
   898      * of the three transliterator objects that make up that
   899      * transliterator: [NFD, Jamo-Latin, Latin-Greek].
   900      *
   901      * <p>If this transliterator is not composed of other
   902      * transliterators, then this method will return a reference to
   903      * this transliterator when given the index 0.
   904      * @param index a value from 0..countElements()-1 indicating the
   905      * transliterator to return
   906      * @param ec input-output error code
   907      * @return one of the transliterators that makes up this
   908      * transliterator, if this transliterator is made up of multiple
   909      * transliterators, otherwise a reference to this object if given
   910      * an index of 0
   911      * @stable ICU 3.0
   912      */
   913     const Transliterator& getElement(int32_t index, UErrorCode& ec) const;
   915     /**
   916      * Returns the set of all characters that may be modified in the
   917      * input text by this Transliterator.  This incorporates this
   918      * object's current filter; if the filter is changed, the return
   919      * value of this function will change.  The default implementation
   920      * returns an empty set.  Some subclasses may override {@link
   921      * #handleGetSourceSet } to return a more precise result.  The
   922      * return result is approximate in any case and is intended for
   923      * use by tests, tools, or utilities.
   924      * @param result receives result set; previous contents lost
   925      * @return a reference to result
   926      * @see #getTargetSet
   927      * @see #handleGetSourceSet
   928      * @stable ICU 2.4
   929      */
   930     UnicodeSet& getSourceSet(UnicodeSet& result) const;
   932     /**
   933      * Framework method that returns the set of all characters that
   934      * may be modified in the input text by this Transliterator,
   935      * ignoring the effect of this object's filter.  The base class
   936      * implementation returns the empty set.  Subclasses that wish to
   937      * implement this should override this method.
   938      * @return the set of characters that this transliterator may
   939      * modify.  The set may be modified, so subclasses should return a
   940      * newly-created object.
   941      * @param result receives result set; previous contents lost
   942      * @see #getSourceSet
   943      * @see #getTargetSet
   944      * @stable ICU 2.4
   945      */
   946     virtual void handleGetSourceSet(UnicodeSet& result) const;
   948     /**
   949      * Returns the set of all characters that may be generated as
   950      * replacement text by this transliterator.  The default
   951      * implementation returns the empty set.  Some subclasses may
   952      * override this method to return a more precise result.  The
   953      * return result is approximate in any case and is intended for
   954      * use by tests, tools, or utilities requiring such
   955      * meta-information.
   956      * @param result receives result set; previous contents lost
   957      * @return a reference to result
   958      * @see #getTargetSet
   959      * @stable ICU 2.4
   960      */
   961     virtual UnicodeSet& getTargetSet(UnicodeSet& result) const;
   963 public:
   965     /**
   966      * Registers a factory function that creates transliterators of
   967      * a given ID.
   968      * @param id the ID being registered
   969      * @param factory a function pointer that will be copied and
   970      * called later when the given ID is passed to createInstance()
   971      * @param context a context pointer that will be stored and
   972      * later passed to the factory function when an ID matching
   973      * the registration ID is being instantiated with this factory.
   974      * @stable ICU 2.0
   975      */
   976     static void U_EXPORT2 registerFactory(const UnicodeString& id,
   977                                 Factory factory,
   978                                 Token context);
   980     /**
   981      * Registers an instance <tt>obj</tt> of a subclass of
   982      * <code>Transliterator</code> with the system.  When
   983      * <tt>createInstance()</tt> is called with an ID string that is
   984      * equal to <tt>obj->getID()</tt>, then <tt>obj->clone()</tt> is
   985      * returned.
   986      *
   987      * After this call the Transliterator class owns the adoptedObj
   988      * and will delete it.
   989      *
   990      * @param adoptedObj an instance of subclass of
   991      * <code>Transliterator</code> that defines <tt>clone()</tt>
   992      * @see #createInstance
   993      * @see #registerFactory
   994      * @see #unregister
   995      * @stable ICU 2.0
   996      */
   997     static void U_EXPORT2 registerInstance(Transliterator* adoptedObj);
   999     /**
  1000      * Registers an ID string as an alias of another ID string.
  1001      * That is, after calling this function, <tt>createInstance(aliasID)</tt>
  1002      * will return the same thing as <tt>createInstance(realID)</tt>.
  1003      * This is generally used to create shorter, more mnemonic aliases
  1004      * for long compound IDs.
  1006      * @param aliasID The new ID being registered.
  1007      * @param realID The ID that the new ID is to be an alias for.
  1008      * This can be a compound ID and can include filters and should
  1009      * refer to transliterators that have already been registered with
  1010      * the framework, although this isn't checked.
  1011      * @stable ICU 3.6
  1012      */
  1013      static void U_EXPORT2 registerAlias(const UnicodeString& aliasID,
  1014                                          const UnicodeString& realID);
  1016 protected:
  1018 #ifndef U_HIDE_INTERNAL_API
  1019     /**
  1020      * @internal
  1021      * @param id the ID being registered
  1022      * @param factory a function pointer that will be copied and
  1023      * called later when the given ID is passed to createInstance()
  1024      * @param context a context pointer that will be stored and
  1025      * later passed to the factory function when an ID matching
  1026      * the registration ID is being instantiated with this factory.
  1027      */
  1028     static void _registerFactory(const UnicodeString& id,
  1029                                  Factory factory,
  1030                                  Token context);
  1032     /**
  1033      * @internal
  1034      */
  1035     static void _registerInstance(Transliterator* adoptedObj);
  1037     /**
  1038      * @internal
  1039      */
  1040     static void _registerAlias(const UnicodeString& aliasID, const UnicodeString& realID);
  1042     /**
  1043      * Register two targets as being inverses of one another.  For
  1044      * example, calling registerSpecialInverse("NFC", "NFD", true) causes
  1045      * Transliterator to form the following inverse relationships:
  1047      * <pre>NFC => NFD
  1048      * Any-NFC => Any-NFD
  1049      * NFD => NFC
  1050      * Any-NFD => Any-NFC</pre>
  1052      * (Without the special inverse registration, the inverse of NFC
  1053      * would be NFC-Any.)  Note that NFD is shorthand for Any-NFD, but
  1054      * that the presence or absence of "Any-" is preserved.
  1056      * <p>The relationship is symmetrical; registering (a, b) is
  1057      * equivalent to registering (b, a).
  1059      * <p>The relevant IDs must still be registered separately as
  1060      * factories or classes.
  1062      * <p>Only the targets are specified.  Special inverses always
  1063      * have the form Any-Target1 <=> Any-Target2.  The target should
  1064      * have canonical casing (the casing desired to be produced when
  1065      * an inverse is formed) and should contain no whitespace or other
  1066      * extraneous characters.
  1068      * @param target the target against which to register the inverse
  1069      * @param inverseTarget the inverse of target, that is
  1070      * Any-target.getInverse() => Any-inverseTarget
  1071      * @param bidirectional if true, register the reverse relation
  1072      * as well, that is, Any-inverseTarget.getInverse() => Any-target
  1073      * @internal
  1074      */
  1075     static void _registerSpecialInverse(const UnicodeString& target,
  1076                                         const UnicodeString& inverseTarget,
  1077                                         UBool bidirectional);
  1078 #endif  /* U_HIDE_INTERNAL_API */
  1080 public:
  1082     /**
  1083      * Unregisters a transliterator or class.  This may be either
  1084      * a system transliterator or a user transliterator or class.
  1085      * Any attempt to construct an unregistered transliterator based
  1086      * on its ID will fail.
  1088      * @param ID the ID of the transliterator or class
  1089      * @return the <code>Object</code> that was registered with
  1090      * <code>ID</code>, or <code>null</code> if none was
  1091      * @see #registerInstance
  1092      * @see #registerFactory
  1093      * @stable ICU 2.0
  1094      */
  1095     static void U_EXPORT2 unregister(const UnicodeString& ID);
  1097 public:
  1099     /**
  1100      * Return a StringEnumeration over the IDs available at the time of the
  1101      * call, including user-registered IDs.
  1102      * @param ec input-output error code
  1103      * @return a newly-created StringEnumeration over the transliterators
  1104      * available at the time of the call. The caller should delete this object
  1105      * when done using it.
  1106      * @stable ICU 3.0
  1107      */
  1108     static StringEnumeration* U_EXPORT2 getAvailableIDs(UErrorCode& ec);
  1110     /**
  1111      * Return the number of registered source specifiers.
  1112      * @return the number of registered source specifiers.
  1113      * @stable ICU 2.0
  1114      */
  1115     static int32_t U_EXPORT2 countAvailableSources(void);
  1117     /**
  1118      * Return a registered source specifier.
  1119      * @param index which specifier to return, from 0 to n-1, where
  1120      * n = countAvailableSources()
  1121      * @param result fill-in paramter to receive the source specifier.
  1122      * If index is out of range, result will be empty.
  1123      * @return reference to result
  1124      * @stable ICU 2.0
  1125      */
  1126     static UnicodeString& U_EXPORT2 getAvailableSource(int32_t index,
  1127                                              UnicodeString& result);
  1129     /**
  1130      * Return the number of registered target specifiers for a given
  1131      * source specifier.
  1132      * @param source the given source specifier.
  1133      * @return the number of registered target specifiers for a given
  1134      *         source specifier.
  1135      * @stable ICU 2.0
  1136      */
  1137     static int32_t U_EXPORT2 countAvailableTargets(const UnicodeString& source);
  1139     /**
  1140      * Return a registered target specifier for a given source.
  1141      * @param index which specifier to return, from 0 to n-1, where
  1142      * n = countAvailableTargets(source)
  1143      * @param source the source specifier
  1144      * @param result fill-in paramter to receive the target specifier.
  1145      * If source is invalid or if index is out of range, result will
  1146      * be empty.
  1147      * @return reference to result
  1148      * @stable ICU 2.0
  1149      */
  1150     static UnicodeString& U_EXPORT2 getAvailableTarget(int32_t index,
  1151                                              const UnicodeString& source,
  1152                                              UnicodeString& result);
  1154     /**
  1155      * Return the number of registered variant specifiers for a given
  1156      * source-target pair.
  1157      * @param source    the source specifiers.
  1158      * @param target    the target specifiers.
  1159      * @stable ICU 2.0
  1160      */
  1161     static int32_t U_EXPORT2 countAvailableVariants(const UnicodeString& source,
  1162                                           const UnicodeString& target);
  1164     /**
  1165      * Return a registered variant specifier for a given source-target
  1166      * pair.
  1167      * @param index which specifier to return, from 0 to n-1, where
  1168      * n = countAvailableVariants(source, target)
  1169      * @param source the source specifier
  1170      * @param target the target specifier
  1171      * @param result fill-in paramter to receive the variant
  1172      * specifier.  If source is invalid or if target is invalid or if
  1173      * index is out of range, result will be empty.
  1174      * @return reference to result
  1175      * @stable ICU 2.0
  1176      */
  1177     static UnicodeString& U_EXPORT2 getAvailableVariant(int32_t index,
  1178                                               const UnicodeString& source,
  1179                                               const UnicodeString& target,
  1180                                               UnicodeString& result);
  1182 protected:
  1184 #ifndef U_HIDE_INTERNAL_API
  1185     /**
  1186      * Non-mutexed internal method
  1187      * @internal
  1188      */
  1189     static int32_t _countAvailableSources(void);
  1191     /**
  1192      * Non-mutexed internal method
  1193      * @internal
  1194      */
  1195     static UnicodeString& _getAvailableSource(int32_t index,
  1196                                               UnicodeString& result);
  1198     /**
  1199      * Non-mutexed internal method
  1200      * @internal
  1201      */
  1202     static int32_t _countAvailableTargets(const UnicodeString& source);
  1204     /**
  1205      * Non-mutexed internal method
  1206      * @internal
  1207      */
  1208     static UnicodeString& _getAvailableTarget(int32_t index,
  1209                                               const UnicodeString& source,
  1210                                               UnicodeString& result);
  1212     /**
  1213      * Non-mutexed internal method
  1214      * @internal
  1215      */
  1216     static int32_t _countAvailableVariants(const UnicodeString& source,
  1217                                            const UnicodeString& target);
  1219     /**
  1220      * Non-mutexed internal method
  1221      * @internal
  1222      */
  1223     static UnicodeString& _getAvailableVariant(int32_t index,
  1224                                                const UnicodeString& source,
  1225                                                const UnicodeString& target,
  1226                                                UnicodeString& result);
  1227 #endif  /* U_HIDE_INTERNAL_API */
  1229 protected:
  1231     /**
  1232      * Set the ID of this transliterators.  Subclasses shouldn't do
  1233      * this, unless the underlying script behavior has changed.
  1234      * @param id the new id t to be set.
  1235      * @stable ICU 2.4
  1236      */
  1237     void setID(const UnicodeString& id);
  1239 public:
  1241     /**
  1242      * Return the class ID for this class.  This is useful only for
  1243      * comparing to a return value from getDynamicClassID().
  1244      * Note that Transliterator is an abstract base class, and therefor
  1245      * no fully constructed object will  have a dynamic
  1246      * UCLassID that equals the UClassID returned from
  1247      * TRansliterator::getStaticClassID().
  1248      * @return       The class ID for class Transliterator.
  1249      * @stable ICU 2.0
  1250      */
  1251     static UClassID U_EXPORT2 getStaticClassID(void);
  1253     /**
  1254      * Returns a unique class ID <b>polymorphically</b>.  This method
  1255      * is to implement a simple version of RTTI, since not all C++
  1256      * compilers support genuine RTTI.  Polymorphic operator==() and
  1257      * clone() methods call this method.
  1259      * <p>Concrete subclasses of Transliterator must use the
  1260      *    UOBJECT_DEFINE_RTTI_IMPLEMENTATION macro from
  1261      *    uobject.h to provide the RTTI functions.
  1263      * @return The class ID for this object. All objects of a given
  1264      * class have the same class ID.  Objects of other classes have
  1265      * different class IDs.
  1266      * @stable ICU 2.0
  1267      */
  1268     virtual UClassID getDynamicClassID(void) const = 0;
  1270 private:
  1271     static UBool initializeRegistry(UErrorCode &status);
  1273 public:
  1274 #ifndef U_HIDE_OBSOLETE_API
  1275     /**
  1276      * Return the number of IDs currently registered with the system.
  1277      * To retrieve the actual IDs, call getAvailableID(i) with
  1278      * i from 0 to countAvailableIDs() - 1.
  1279      * @return the number of IDs currently registered with the system.
  1280      * @obsolete ICU 3.4 use getAvailableIDs() instead
  1281      */
  1282     static int32_t U_EXPORT2 countAvailableIDs(void);
  1284     /**
  1285      * Return the index-th available ID.  index must be between 0
  1286      * and countAvailableIDs() - 1, inclusive.  If index is out of
  1287      * range, the result of getAvailableID(0) is returned.
  1288      * @param index the given ID index.
  1289      * @return      the index-th available ID.  index must be between 0
  1290      *              and countAvailableIDs() - 1, inclusive.  If index is out of
  1291      *              range, the result of getAvailableID(0) is returned.
  1292      * @obsolete ICU 3.4 use getAvailableIDs() instead; this function
  1293      * is not thread safe, since it returns a reference to storage that
  1294      * may become invalid if another thread calls unregister
  1295      */
  1296     static const UnicodeString& U_EXPORT2 getAvailableID(int32_t index);
  1297 #endif  /* U_HIDE_OBSOLETE_API */
  1298 };
  1300 inline int32_t Transliterator::getMaximumContextLength(void) const {
  1301     return maximumContextLength;
  1304 inline void Transliterator::setID(const UnicodeString& id) {
  1305     ID = id;
  1306     // NUL-terminate the ID string, which is a non-aliased copy.
  1307     ID.append((UChar)0);
  1308     ID.truncate(ID.length()-1);
  1311 #ifndef U_HIDE_INTERNAL_API
  1312 inline Transliterator::Token Transliterator::integerToken(int32_t i) {
  1313     Token t;
  1314     t.integer = i;
  1315     return t;
  1318 inline Transliterator::Token Transliterator::pointerToken(void* p) {
  1319     Token t;
  1320     t.pointer = p;
  1321     return t;
  1323 #endif  /* U_HIDE_INTERNAL_API */
  1325 U_NAMESPACE_END
  1327 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
  1329 #endif

mercurial