1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/i18n/unicode/translit.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,1329 @@ 1.4 +/* 1.5 +********************************************************************** 1.6 +* Copyright (C) 1999-2013, International Business Machines 1.7 +* Corporation and others. All Rights Reserved. 1.8 +********************************************************************** 1.9 +* Date Name Description 1.10 +* 11/17/99 aliu Creation. 1.11 +********************************************************************** 1.12 +*/ 1.13 +#ifndef TRANSLIT_H 1.14 +#define TRANSLIT_H 1.15 + 1.16 +#include "unicode/utypes.h" 1.17 + 1.18 +/** 1.19 + * \file 1.20 + * \brief C++ API: Tranforms text from one format to another. 1.21 + */ 1.22 + 1.23 +#if !UCONFIG_NO_TRANSLITERATION 1.24 + 1.25 +#include "unicode/uobject.h" 1.26 +#include "unicode/unistr.h" 1.27 +#include "unicode/parseerr.h" 1.28 +#include "unicode/utrans.h" // UTransPosition, UTransDirection 1.29 +#include "unicode/strenum.h" 1.30 + 1.31 +U_NAMESPACE_BEGIN 1.32 + 1.33 +class UnicodeFilter; 1.34 +class UnicodeSet; 1.35 +class CompoundTransliterator; 1.36 +class TransliteratorParser; 1.37 +class NormalizationTransliterator; 1.38 +class TransliteratorIDParser; 1.39 + 1.40 +/** 1.41 + * 1.42 + * <code>Transliterator</code> is an abstract class that 1.43 + * transliterates text from one format to another. The most common 1.44 + * kind of transliterator is a script, or alphabet, transliterator. 1.45 + * For example, a Russian to Latin transliterator changes Russian text 1.46 + * written in Cyrillic characters to phonetically equivalent Latin 1.47 + * characters. It does not <em>translate</em> Russian to English! 1.48 + * Transliteration, unlike translation, operates on characters, without 1.49 + * reference to the meanings of words and sentences. 1.50 + * 1.51 + * <p>Although script conversion is its most common use, a 1.52 + * transliterator can actually perform a more general class of tasks. 1.53 + * In fact, <code>Transliterator</code> defines a very general API 1.54 + * which specifies only that a segment of the input text is replaced 1.55 + * by new text. The particulars of this conversion are determined 1.56 + * entirely by subclasses of <code>Transliterator</code>. 1.57 + * 1.58 + * <p><b>Transliterators are stateless</b> 1.59 + * 1.60 + * <p><code>Transliterator</code> objects are <em>stateless</em>; they 1.61 + * retain no information between calls to 1.62 + * <code>transliterate()</code>. (However, this does <em>not</em> 1.63 + * mean that threads may share transliterators without synchronizing 1.64 + * them. Transliterators are not immutable, so they must be 1.65 + * synchronized when shared between threads.) This might seem to 1.66 + * limit the complexity of the transliteration operation. In 1.67 + * practice, subclasses perform complex transliterations by delaying 1.68 + * the replacement of text until it is known that no other 1.69 + * replacements are possible. In other words, although the 1.70 + * <code>Transliterator</code> objects are stateless, the source text 1.71 + * itself embodies all the needed information, and delayed operation 1.72 + * allows arbitrary complexity. 1.73 + * 1.74 + * <p><b>Batch transliteration</b> 1.75 + * 1.76 + * <p>The simplest way to perform transliteration is all at once, on a 1.77 + * string of existing text. This is referred to as <em>batch</em> 1.78 + * transliteration. For example, given a string <code>input</code> 1.79 + * and a transliterator <code>t</code>, the call 1.80 + * 1.81 + * \htmlonly<blockquote>\endhtmlonly<code>String result = t.transliterate(input); 1.82 + * </code>\htmlonly</blockquote>\endhtmlonly 1.83 + * 1.84 + * will transliterate it and return the result. Other methods allow 1.85 + * the client to specify a substring to be transliterated and to use 1.86 + * {@link Replaceable } objects instead of strings, in order to 1.87 + * preserve out-of-band information (such as text styles). 1.88 + * 1.89 + * <p><b>Keyboard transliteration</b> 1.90 + * 1.91 + * <p>Somewhat more involved is <em>keyboard</em>, or incremental 1.92 + * transliteration. This is the transliteration of text that is 1.93 + * arriving from some source (typically the user's keyboard) one 1.94 + * character at a time, or in some other piecemeal fashion. 1.95 + * 1.96 + * <p>In keyboard transliteration, a <code>Replaceable</code> buffer 1.97 + * stores the text. As text is inserted, as much as possible is 1.98 + * transliterated on the fly. This means a GUI that displays the 1.99 + * contents of the buffer may show text being modified as each new 1.100 + * character arrives. 1.101 + * 1.102 + * <p>Consider the simple <code>RuleBasedTransliterator</code>: 1.103 + * 1.104 + * \htmlonly<blockquote>\endhtmlonly<code> 1.105 + * th>{theta}<br> 1.106 + * t>{tau} 1.107 + * </code>\htmlonly</blockquote>\endhtmlonly 1.108 + * 1.109 + * When the user types 't', nothing will happen, since the 1.110 + * transliterator is waiting to see if the next character is 'h'. To 1.111 + * remedy this, we introduce the notion of a cursor, marked by a '|' 1.112 + * in the output string: 1.113 + * 1.114 + * \htmlonly<blockquote>\endhtmlonly<code> 1.115 + * t>|{tau}<br> 1.116 + * {tau}h>{theta} 1.117 + * </code>\htmlonly</blockquote>\endhtmlonly 1.118 + * 1.119 + * Now when the user types 't', tau appears, and if the next character 1.120 + * is 'h', the tau changes to a theta. This is accomplished by 1.121 + * maintaining a cursor position (independent of the insertion point, 1.122 + * and invisible in the GUI) across calls to 1.123 + * <code>transliterate()</code>. Typically, the cursor will 1.124 + * be coincident with the insertion point, but in a case like the one 1.125 + * above, it will precede the insertion point. 1.126 + * 1.127 + * <p>Keyboard transliteration methods maintain a set of three indices 1.128 + * that are updated with each call to 1.129 + * <code>transliterate()</code>, including the cursor, start, 1.130 + * and limit. Since these indices are changed by the method, they are 1.131 + * passed in an <code>int[]</code> array. The <code>START</code> index 1.132 + * marks the beginning of the substring that the transliterator will 1.133 + * look at. It is advanced as text becomes committed (but it is not 1.134 + * the committed index; that's the <code>CURSOR</code>). The 1.135 + * <code>CURSOR</code> index, described above, marks the point at 1.136 + * which the transliterator last stopped, either because it reached 1.137 + * the end, or because it required more characters to disambiguate 1.138 + * between possible inputs. The <code>CURSOR</code> can also be 1.139 + * explicitly set by rules in a <code>RuleBasedTransliterator</code>. 1.140 + * Any characters before the <code>CURSOR</code> index are frozen; 1.141 + * future keyboard transliteration calls within this input sequence 1.142 + * will not change them. New text is inserted at the 1.143 + * <code>LIMIT</code> index, which marks the end of the substring that 1.144 + * the transliterator looks at. 1.145 + * 1.146 + * <p>Because keyboard transliteration assumes that more characters 1.147 + * are to arrive, it is conservative in its operation. It only 1.148 + * transliterates when it can do so unambiguously. Otherwise it waits 1.149 + * for more characters to arrive. When the client code knows that no 1.150 + * more characters are forthcoming, perhaps because the user has 1.151 + * performed some input termination operation, then it should call 1.152 + * <code>finishTransliteration()</code> to complete any 1.153 + * pending transliterations. 1.154 + * 1.155 + * <p><b>Inverses</b> 1.156 + * 1.157 + * <p>Pairs of transliterators may be inverses of one another. For 1.158 + * example, if transliterator <b>A</b> transliterates characters by 1.159 + * incrementing their Unicode value (so "abc" -> "def"), and 1.160 + * transliterator <b>B</b> decrements character values, then <b>A</b> 1.161 + * is an inverse of <b>B</b> and vice versa. If we compose <b>A</b> 1.162 + * with <b>B</b> in a compound transliterator, the result is the 1.163 + * indentity transliterator, that is, a transliterator that does not 1.164 + * change its input text. 1.165 + * 1.166 + * The <code>Transliterator</code> method <code>getInverse()</code> 1.167 + * returns a transliterator's inverse, if one exists, or 1.168 + * <code>null</code> otherwise. However, the result of 1.169 + * <code>getInverse()</code> usually will <em>not</em> be a true 1.170 + * mathematical inverse. This is because true inverse transliterators 1.171 + * are difficult to formulate. For example, consider two 1.172 + * transliterators: <b>AB</b>, which transliterates the character 'A' 1.173 + * to 'B', and <b>BA</b>, which transliterates 'B' to 'A'. It might 1.174 + * seem that these are exact inverses, since 1.175 + * 1.176 + * \htmlonly<blockquote>\endhtmlonly"A" x <b>AB</b> -> "B"<br> 1.177 + * "B" x <b>BA</b> -> "A"\htmlonly</blockquote>\endhtmlonly 1.178 + * 1.179 + * where 'x' represents transliteration. However, 1.180 + * 1.181 + * \htmlonly<blockquote>\endhtmlonly"ABCD" x <b>AB</b> -> "BBCD"<br> 1.182 + * "BBCD" x <b>BA</b> -> "AACD"\htmlonly</blockquote>\endhtmlonly 1.183 + * 1.184 + * so <b>AB</b> composed with <b>BA</b> is not the 1.185 + * identity. Nonetheless, <b>BA</b> may be usefully considered to be 1.186 + * <b>AB</b>'s inverse, and it is on this basis that 1.187 + * <b>AB</b><code>.getInverse()</code> could legitimately return 1.188 + * <b>BA</b>. 1.189 + * 1.190 + * <p><b>IDs and display names</b> 1.191 + * 1.192 + * <p>A transliterator is designated by a short identifier string or 1.193 + * <em>ID</em>. IDs follow the format <em>source-destination</em>, 1.194 + * where <em>source</em> describes the entity being replaced, and 1.195 + * <em>destination</em> describes the entity replacing 1.196 + * <em>source</em>. The entities may be the names of scripts, 1.197 + * particular sequences of characters, or whatever else it is that the 1.198 + * transliterator converts to or from. For example, a transliterator 1.199 + * from Russian to Latin might be named "Russian-Latin". A 1.200 + * transliterator from keyboard escape sequences to Latin-1 characters 1.201 + * might be named "KeyboardEscape-Latin1". By convention, system 1.202 + * entity names are in English, with the initial letters of words 1.203 + * capitalized; user entity names may follow any format so long as 1.204 + * they do not contain dashes. 1.205 + * 1.206 + * <p>In addition to programmatic IDs, transliterator objects have 1.207 + * display names for presentation in user interfaces, returned by 1.208 + * {@link #getDisplayName }. 1.209 + * 1.210 + * <p><b>Factory methods and registration</b> 1.211 + * 1.212 + * <p>In general, client code should use the factory method 1.213 + * {@link #createInstance } to obtain an instance of a 1.214 + * transliterator given its ID. Valid IDs may be enumerated using 1.215 + * <code>getAvailableIDs()</code>. Since transliterators are mutable, 1.216 + * multiple calls to {@link #createInstance } with the same ID will 1.217 + * return distinct objects. 1.218 + * 1.219 + * <p>In addition to the system transliterators registered at startup, 1.220 + * user transliterators may be registered by calling 1.221 + * <code>registerInstance()</code> at run time. A registered instance 1.222 + * acts a template; future calls to {@link #createInstance } with the ID 1.223 + * of the registered object return clones of that object. Thus any 1.224 + * object passed to <tt>registerInstance()</tt> must implement 1.225 + * <tt>clone()</tt> propertly. To register a transliterator subclass 1.226 + * without instantiating it (until it is needed), users may call 1.227 + * {@link #registerFactory }. In this case, the objects are 1.228 + * instantiated by invoking the zero-argument public constructor of 1.229 + * the class. 1.230 + * 1.231 + * <p><b>Subclassing</b> 1.232 + * 1.233 + * Subclasses must implement the abstract method 1.234 + * <code>handleTransliterate()</code>. <p>Subclasses should override 1.235 + * the <code>transliterate()</code> method taking a 1.236 + * <code>Replaceable</code> and the <code>transliterate()</code> 1.237 + * method taking a <code>String</code> and <code>StringBuffer</code> 1.238 + * if the performance of these methods can be improved over the 1.239 + * performance obtained by the default implementations in this class. 1.240 + * 1.241 + * @author Alan Liu 1.242 + * @stable ICU 2.0 1.243 + */ 1.244 +class U_I18N_API Transliterator : public UObject { 1.245 + 1.246 +private: 1.247 + 1.248 + /** 1.249 + * Programmatic name, e.g., "Latin-Arabic". 1.250 + */ 1.251 + UnicodeString ID; 1.252 + 1.253 + /** 1.254 + * This transliterator's filter. Any character for which 1.255 + * <tt>filter.contains()</tt> returns <tt>false</tt> will not be 1.256 + * altered by this transliterator. If <tt>filter</tt> is 1.257 + * <tt>null</tt> then no filtering is applied. 1.258 + */ 1.259 + UnicodeFilter* filter; 1.260 + 1.261 + int32_t maximumContextLength; 1.262 + 1.263 + public: 1.264 + 1.265 + /** 1.266 + * A context integer or pointer for a factory function, passed by 1.267 + * value. 1.268 + * @stable ICU 2.4 1.269 + */ 1.270 + union Token { 1.271 + /** 1.272 + * This token, interpreted as a 32-bit integer. 1.273 + * @stable ICU 2.4 1.274 + */ 1.275 + int32_t integer; 1.276 + /** 1.277 + * This token, interpreted as a native pointer. 1.278 + * @stable ICU 2.4 1.279 + */ 1.280 + void* pointer; 1.281 + }; 1.282 + 1.283 +#ifndef U_HIDE_INTERNAL_API 1.284 + /** 1.285 + * Return a token containing an integer. 1.286 + * @return a token containing an integer. 1.287 + * @internal 1.288 + */ 1.289 + inline static Token integerToken(int32_t); 1.290 + 1.291 + /** 1.292 + * Return a token containing a pointer. 1.293 + * @return a token containing a pointer. 1.294 + * @internal 1.295 + */ 1.296 + inline static Token pointerToken(void*); 1.297 +#endif /* U_HIDE_INTERNAL_API */ 1.298 + 1.299 + /** 1.300 + * A function that creates and returns a Transliterator. When 1.301 + * invoked, it will be passed the ID string that is being 1.302 + * instantiated, together with the context pointer that was passed 1.303 + * in when the factory function was first registered. Many 1.304 + * factory functions will ignore both parameters, however, 1.305 + * functions that are registered to more than one ID may use the 1.306 + * ID or the context parameter to parameterize the transliterator 1.307 + * they create. 1.308 + * @param ID the string identifier for this transliterator 1.309 + * @param context a context pointer that will be stored and 1.310 + * later passed to the factory function when an ID matching 1.311 + * the registration ID is being instantiated with this factory. 1.312 + * @stable ICU 2.4 1.313 + */ 1.314 + typedef Transliterator* (U_EXPORT2 *Factory)(const UnicodeString& ID, Token context); 1.315 + 1.316 +protected: 1.317 + 1.318 + /** 1.319 + * Default constructor. 1.320 + * @param ID the string identifier for this transliterator 1.321 + * @param adoptedFilter the filter. Any character for which 1.322 + * <tt>filter.contains()</tt> returns <tt>false</tt> will not be 1.323 + * altered by this transliterator. If <tt>filter</tt> is 1.324 + * <tt>null</tt> then no filtering is applied. 1.325 + * @stable ICU 2.4 1.326 + */ 1.327 + Transliterator(const UnicodeString& ID, UnicodeFilter* adoptedFilter); 1.328 + 1.329 + /** 1.330 + * Copy constructor. 1.331 + * @stable ICU 2.4 1.332 + */ 1.333 + Transliterator(const Transliterator&); 1.334 + 1.335 + /** 1.336 + * Assignment operator. 1.337 + * @stable ICU 2.4 1.338 + */ 1.339 + Transliterator& operator=(const Transliterator&); 1.340 + 1.341 + /** 1.342 + * Create a transliterator from a basic ID. This is an ID 1.343 + * containing only the forward direction source, target, and 1.344 + * variant. 1.345 + * @param id a basic ID of the form S-T or S-T/V. 1.346 + * @param canon canonical ID to assign to the object, or 1.347 + * NULL to leave the ID unchanged 1.348 + * @return a newly created Transliterator or null if the ID is 1.349 + * invalid. 1.350 + * @stable ICU 2.4 1.351 + */ 1.352 + static Transliterator* createBasicInstance(const UnicodeString& id, 1.353 + const UnicodeString* canon); 1.354 + 1.355 + friend class TransliteratorParser; // for parseID() 1.356 + friend class TransliteratorIDParser; // for createBasicInstance() 1.357 + friend class TransliteratorAlias; // for setID() 1.358 + 1.359 +public: 1.360 + 1.361 + /** 1.362 + * Destructor. 1.363 + * @stable ICU 2.0 1.364 + */ 1.365 + virtual ~Transliterator(); 1.366 + 1.367 + /** 1.368 + * Implements Cloneable. 1.369 + * All subclasses are encouraged to implement this method if it is 1.370 + * possible and reasonable to do so. Subclasses that are to be 1.371 + * registered with the system using <tt>registerInstance()</tt> 1.372 + * are required to implement this method. If a subclass does not 1.373 + * implement clone() properly and is registered with the system 1.374 + * using registerInstance(), then the default clone() implementation 1.375 + * will return null, and calls to createInstance() will fail. 1.376 + * 1.377 + * @return a copy of the object. 1.378 + * @see #registerInstance 1.379 + * @stable ICU 2.0 1.380 + */ 1.381 + virtual Transliterator* clone() const; 1.382 + 1.383 + /** 1.384 + * Transliterates a segment of a string, with optional filtering. 1.385 + * 1.386 + * @param text the string to be transliterated 1.387 + * @param start the beginning index, inclusive; <code>0 <= start 1.388 + * <= limit</code>. 1.389 + * @param limit the ending index, exclusive; <code>start <= limit 1.390 + * <= text.length()</code>. 1.391 + * @return The new limit index. The text previously occupying <code>[start, 1.392 + * limit)</code> has been transliterated, possibly to a string of a different 1.393 + * length, at <code>[start, </code><em>new-limit</em><code>)</code>, where 1.394 + * <em>new-limit</em> is the return value. If the input offsets are out of bounds, 1.395 + * the returned value is -1 and the input string remains unchanged. 1.396 + * @stable ICU 2.0 1.397 + */ 1.398 + virtual int32_t transliterate(Replaceable& text, 1.399 + int32_t start, int32_t limit) const; 1.400 + 1.401 + /** 1.402 + * Transliterates an entire string in place. Convenience method. 1.403 + * @param text the string to be transliterated 1.404 + * @stable ICU 2.0 1.405 + */ 1.406 + virtual void transliterate(Replaceable& text) const; 1.407 + 1.408 + /** 1.409 + * Transliterates the portion of the text buffer that can be 1.410 + * transliterated unambiguosly after new text has been inserted, 1.411 + * typically as a result of a keyboard event. The new text in 1.412 + * <code>insertion</code> will be inserted into <code>text</code> 1.413 + * at <code>index.limit</code>, advancing 1.414 + * <code>index.limit</code> by <code>insertion.length()</code>. 1.415 + * Then the transliterator will try to transliterate characters of 1.416 + * <code>text</code> between <code>index.cursor</code> and 1.417 + * <code>index.limit</code>. Characters before 1.418 + * <code>index.cursor</code> will not be changed. 1.419 + * 1.420 + * <p>Upon return, values in <code>index</code> will be updated. 1.421 + * <code>index.start</code> will be advanced to the first 1.422 + * character that future calls to this method will read. 1.423 + * <code>index.cursor</code> and <code>index.limit</code> will 1.424 + * be adjusted to delimit the range of text that future calls to 1.425 + * this method may change. 1.426 + * 1.427 + * <p>Typical usage of this method begins with an initial call 1.428 + * with <code>index.start</code> and <code>index.limit</code> 1.429 + * set to indicate the portion of <code>text</code> to be 1.430 + * transliterated, and <code>index.cursor == index.start</code>. 1.431 + * Thereafter, <code>index</code> can be used without 1.432 + * modification in future calls, provided that all changes to 1.433 + * <code>text</code> are made via this method. 1.434 + * 1.435 + * <p>This method assumes that future calls may be made that will 1.436 + * insert new text into the buffer. As a result, it only performs 1.437 + * unambiguous transliterations. After the last call to this 1.438 + * method, there may be untransliterated text that is waiting for 1.439 + * more input to resolve an ambiguity. In order to perform these 1.440 + * pending transliterations, clients should call {@link 1.441 + * #finishTransliteration } after the last call to this 1.442 + * method has been made. 1.443 + * 1.444 + * @param text the buffer holding transliterated and untransliterated text 1.445 + * @param index an array of three integers. 1.446 + * 1.447 + * <ul><li><code>index.start</code>: the beginning index, 1.448 + * inclusive; <code>0 <= index.start <= index.limit</code>. 1.449 + * 1.450 + * <li><code>index.limit</code>: the ending index, exclusive; 1.451 + * <code>index.start <= index.limit <= text.length()</code>. 1.452 + * <code>insertion</code> is inserted at 1.453 + * <code>index.limit</code>. 1.454 + * 1.455 + * <li><code>index.cursor</code>: the next character to be 1.456 + * considered for transliteration; <code>index.start <= 1.457 + * index.cursor <= index.limit</code>. Characters before 1.458 + * <code>index.cursor</code> will not be changed by future calls 1.459 + * to this method.</ul> 1.460 + * 1.461 + * @param insertion text to be inserted and possibly 1.462 + * transliterated into the translation buffer at 1.463 + * <code>index.limit</code>. If <code>null</code> then no text 1.464 + * is inserted. 1.465 + * @param status Output param to filled in with a success or an error. 1.466 + * @see #handleTransliterate 1.467 + * @exception IllegalArgumentException if <code>index</code> 1.468 + * is invalid 1.469 + * @see UTransPosition 1.470 + * @stable ICU 2.0 1.471 + */ 1.472 + virtual void transliterate(Replaceable& text, UTransPosition& index, 1.473 + const UnicodeString& insertion, 1.474 + UErrorCode& status) const; 1.475 + 1.476 + /** 1.477 + * Transliterates the portion of the text buffer that can be 1.478 + * transliterated unambiguosly after a new character has been 1.479 + * inserted, typically as a result of a keyboard event. This is a 1.480 + * convenience method. 1.481 + * @param text the buffer holding transliterated and 1.482 + * untransliterated text 1.483 + * @param index an array of three integers. 1.484 + * @param insertion text to be inserted and possibly 1.485 + * transliterated into the translation buffer at 1.486 + * <code>index.limit</code>. 1.487 + * @param status Output param to filled in with a success or an error. 1.488 + * @see #transliterate(Replaceable&, UTransPosition&, const UnicodeString&, UErrorCode&) const 1.489 + * @stable ICU 2.0 1.490 + */ 1.491 + virtual void transliterate(Replaceable& text, UTransPosition& index, 1.492 + UChar32 insertion, 1.493 + UErrorCode& status) const; 1.494 + 1.495 + /** 1.496 + * Transliterates the portion of the text buffer that can be 1.497 + * transliterated unambiguosly. This is a convenience method; see 1.498 + * {@link 1.499 + * #transliterate(Replaceable&, UTransPosition&, const UnicodeString&, UErrorCode&) const } 1.500 + * for details. 1.501 + * @param text the buffer holding transliterated and 1.502 + * untransliterated text 1.503 + * @param index an array of three integers. See {@link #transliterate(Replaceable&, UTransPosition&, const UnicodeString*, UErrorCode&) const }. 1.504 + * @param status Output param to filled in with a success or an error. 1.505 + * @see #transliterate(Replaceable, int[], String) 1.506 + * @stable ICU 2.0 1.507 + */ 1.508 + virtual void transliterate(Replaceable& text, UTransPosition& index, 1.509 + UErrorCode& status) const; 1.510 + 1.511 + /** 1.512 + * Finishes any pending transliterations that were waiting for 1.513 + * more characters. Clients should call this method as the last 1.514 + * call after a sequence of one or more calls to 1.515 + * <code>transliterate()</code>. 1.516 + * @param text the buffer holding transliterated and 1.517 + * untransliterated text. 1.518 + * @param index the array of indices previously passed to {@link 1.519 + * #transliterate } 1.520 + * @stable ICU 2.0 1.521 + */ 1.522 + virtual void finishTransliteration(Replaceable& text, 1.523 + UTransPosition& index) const; 1.524 + 1.525 +private: 1.526 + 1.527 + /** 1.528 + * This internal method does incremental transliteration. If the 1.529 + * 'insertion' is non-null then we append it to 'text' before 1.530 + * proceeding. This method calls through to the pure virtual 1.531 + * framework method handleTransliterate() to do the actual 1.532 + * work. 1.533 + * @param text the buffer holding transliterated and 1.534 + * untransliterated text 1.535 + * @param index an array of three integers. See {@link 1.536 + * #transliterate(Replaceable, int[], String)}. 1.537 + * @param insertion text to be inserted and possibly 1.538 + * transliterated into the translation buffer at 1.539 + * <code>index.limit</code>. 1.540 + * @param status Output param to filled in with a success or an error. 1.541 + */ 1.542 + void _transliterate(Replaceable& text, 1.543 + UTransPosition& index, 1.544 + const UnicodeString* insertion, 1.545 + UErrorCode &status) const; 1.546 + 1.547 +protected: 1.548 + 1.549 + /** 1.550 + * Abstract method that concrete subclasses define to implement 1.551 + * their transliteration algorithm. This method handles both 1.552 + * incremental and non-incremental transliteration. Let 1.553 + * <code>originalStart</code> refer to the value of 1.554 + * <code>pos.start</code> upon entry. 1.555 + * 1.556 + * <ul> 1.557 + * <li>If <code>incremental</code> is false, then this method 1.558 + * should transliterate all characters between 1.559 + * <code>pos.start</code> and <code>pos.limit</code>. Upon return 1.560 + * <code>pos.start</code> must == <code> pos.limit</code>.</li> 1.561 + * 1.562 + * <li>If <code>incremental</code> is true, then this method 1.563 + * should transliterate all characters between 1.564 + * <code>pos.start</code> and <code>pos.limit</code> that can be 1.565 + * unambiguously transliterated, regardless of future insertions 1.566 + * of text at <code>pos.limit</code>. Upon return, 1.567 + * <code>pos.start</code> should be in the range 1.568 + * [<code>originalStart</code>, <code>pos.limit</code>). 1.569 + * <code>pos.start</code> should be positioned such that 1.570 + * characters [<code>originalStart</code>, <code> 1.571 + * pos.start</code>) will not be changed in the future by this 1.572 + * transliterator and characters [<code>pos.start</code>, 1.573 + * <code>pos.limit</code>) are unchanged.</li> 1.574 + * </ul> 1.575 + * 1.576 + * <p>Implementations of this method should also obey the 1.577 + * following invariants:</p> 1.578 + * 1.579 + * <ul> 1.580 + * <li> <code>pos.limit</code> and <code>pos.contextLimit</code> 1.581 + * should be updated to reflect changes in length of the text 1.582 + * between <code>pos.start</code> and <code>pos.limit</code>. The 1.583 + * difference <code> pos.contextLimit - pos.limit</code> should 1.584 + * not change.</li> 1.585 + * 1.586 + * <li><code>pos.contextStart</code> should not change.</li> 1.587 + * 1.588 + * <li>Upon return, neither <code>pos.start</code> nor 1.589 + * <code>pos.limit</code> should be less than 1.590 + * <code>originalStart</code>.</li> 1.591 + * 1.592 + * <li>Text before <code>originalStart</code> and text after 1.593 + * <code>pos.limit</code> should not change.</li> 1.594 + * 1.595 + * <li>Text before <code>pos.contextStart</code> and text after 1.596 + * <code> pos.contextLimit</code> should be ignored.</li> 1.597 + * </ul> 1.598 + * 1.599 + * <p>Subclasses may safely assume that all characters in 1.600 + * [<code>pos.start</code>, <code>pos.limit</code>) are filtered. 1.601 + * In other words, the filter has already been applied by the time 1.602 + * this method is called. See 1.603 + * <code>filteredTransliterate()</code>. 1.604 + * 1.605 + * <p>This method is <b>not</b> for public consumption. Calling 1.606 + * this method directly will transliterate 1.607 + * [<code>pos.start</code>, <code>pos.limit</code>) without 1.608 + * applying the filter. End user code should call <code> 1.609 + * transliterate()</code> instead of this method. Subclass code 1.610 + * and wrapping transliterators should call 1.611 + * <code>filteredTransliterate()</code> instead of this method.<p> 1.612 + * 1.613 + * @param text the buffer holding transliterated and 1.614 + * untransliterated text 1.615 + * 1.616 + * @param pos the indices indicating the start, limit, context 1.617 + * start, and context limit of the text. 1.618 + * 1.619 + * @param incremental if true, assume more text may be inserted at 1.620 + * <code>pos.limit</code> and act accordingly. Otherwise, 1.621 + * transliterate all text between <code>pos.start</code> and 1.622 + * <code>pos.limit</code> and move <code>pos.start</code> up to 1.623 + * <code>pos.limit</code>. 1.624 + * 1.625 + * @see #transliterate 1.626 + * @stable ICU 2.4 1.627 + */ 1.628 + virtual void handleTransliterate(Replaceable& text, 1.629 + UTransPosition& pos, 1.630 + UBool incremental) const = 0; 1.631 + 1.632 +public: 1.633 + /** 1.634 + * Transliterate a substring of text, as specified by index, taking filters 1.635 + * into account. This method is for subclasses that need to delegate to 1.636 + * another transliterator, such as CompoundTransliterator. 1.637 + * @param text the text to be transliterated 1.638 + * @param index the position indices 1.639 + * @param incremental if TRUE, then assume more characters may be inserted 1.640 + * at index.limit, and postpone processing to accomodate future incoming 1.641 + * characters 1.642 + * @stable ICU 2.4 1.643 + */ 1.644 + virtual void filteredTransliterate(Replaceable& text, 1.645 + UTransPosition& index, 1.646 + UBool incremental) const; 1.647 + 1.648 +private: 1.649 + 1.650 + /** 1.651 + * Top-level transliteration method, handling filtering, incremental and 1.652 + * non-incremental transliteration, and rollback. All transliteration 1.653 + * public API methods eventually call this method with a rollback argument 1.654 + * of TRUE. Other entities may call this method but rollback should be 1.655 + * FALSE. 1.656 + * 1.657 + * <p>If this transliterator has a filter, break up the input text into runs 1.658 + * of unfiltered characters. Pass each run to 1.659 + * subclass.handleTransliterate(). 1.660 + * 1.661 + * <p>In incremental mode, if rollback is TRUE, perform a special 1.662 + * incremental procedure in which several passes are made over the input 1.663 + * text, adding one character at a time, and committing successful 1.664 + * transliterations as they occur. Unsuccessful transliterations are rolled 1.665 + * back and retried with additional characters to give correct results. 1.666 + * 1.667 + * @param text the text to be transliterated 1.668 + * @param index the position indices 1.669 + * @param incremental if TRUE, then assume more characters may be inserted 1.670 + * at index.limit, and postpone processing to accomodate future incoming 1.671 + * characters 1.672 + * @param rollback if TRUE and if incremental is TRUE, then perform special 1.673 + * incremental processing, as described above, and undo partial 1.674 + * transliterations where necessary. If incremental is FALSE then this 1.675 + * parameter is ignored. 1.676 + */ 1.677 + virtual void filteredTransliterate(Replaceable& text, 1.678 + UTransPosition& index, 1.679 + UBool incremental, 1.680 + UBool rollback) const; 1.681 + 1.682 +public: 1.683 + 1.684 + /** 1.685 + * Returns the length of the longest context required by this transliterator. 1.686 + * This is <em>preceding</em> context. The default implementation supplied 1.687 + * by <code>Transliterator</code> returns zero; subclasses 1.688 + * that use preceding context should override this method to return the 1.689 + * correct value. For example, if a transliterator translates "ddd" (where 1.690 + * d is any digit) to "555" when preceded by "(ddd)", then the preceding 1.691 + * context length is 5, the length of "(ddd)". 1.692 + * 1.693 + * @return The maximum number of preceding context characters this 1.694 + * transliterator needs to examine 1.695 + * @stable ICU 2.0 1.696 + */ 1.697 + int32_t getMaximumContextLength(void) const; 1.698 + 1.699 +protected: 1.700 + 1.701 + /** 1.702 + * Method for subclasses to use to set the maximum context length. 1.703 + * @param maxContextLength the new value to be set. 1.704 + * @see #getMaximumContextLength 1.705 + * @stable ICU 2.4 1.706 + */ 1.707 + void setMaximumContextLength(int32_t maxContextLength); 1.708 + 1.709 +public: 1.710 + 1.711 + /** 1.712 + * Returns a programmatic identifier for this transliterator. 1.713 + * If this identifier is passed to <code>createInstance()</code>, it 1.714 + * will return this object, if it has been registered. 1.715 + * @return a programmatic identifier for this transliterator. 1.716 + * @see #registerInstance 1.717 + * @see #registerFactory 1.718 + * @see #getAvailableIDs 1.719 + * @stable ICU 2.0 1.720 + */ 1.721 + virtual const UnicodeString& getID(void) const; 1.722 + 1.723 + /** 1.724 + * Returns a name for this transliterator that is appropriate for 1.725 + * display to the user in the default locale. See {@link 1.726 + * #getDisplayName } for details. 1.727 + * @param ID the string identifier for this transliterator 1.728 + * @param result Output param to receive the display name 1.729 + * @return A reference to 'result'. 1.730 + * @stable ICU 2.0 1.731 + */ 1.732 + static UnicodeString& U_EXPORT2 getDisplayName(const UnicodeString& ID, 1.733 + UnicodeString& result); 1.734 + 1.735 + /** 1.736 + * Returns a name for this transliterator that is appropriate for 1.737 + * display to the user in the given locale. This name is taken 1.738 + * from the locale resource data in the standard manner of the 1.739 + * <code>java.text</code> package. 1.740 + * 1.741 + * <p>If no localized names exist in the system resource bundles, 1.742 + * a name is synthesized using a localized 1.743 + * <code>MessageFormat</code> pattern from the resource data. The 1.744 + * arguments to this pattern are an integer followed by one or two 1.745 + * strings. The integer is the number of strings, either 1 or 2. 1.746 + * The strings are formed by splitting the ID for this 1.747 + * transliterator at the first '-'. If there is no '-', then the 1.748 + * entire ID forms the only string. 1.749 + * @param ID the string identifier for this transliterator 1.750 + * @param inLocale the Locale in which the display name should be 1.751 + * localized. 1.752 + * @param result Output param to receive the display name 1.753 + * @return A reference to 'result'. 1.754 + * @stable ICU 2.0 1.755 + */ 1.756 + static UnicodeString& U_EXPORT2 getDisplayName(const UnicodeString& ID, 1.757 + const Locale& inLocale, 1.758 + UnicodeString& result); 1.759 + 1.760 + /** 1.761 + * Returns the filter used by this transliterator, or <tt>NULL</tt> 1.762 + * if this transliterator uses no filter. 1.763 + * @return the filter used by this transliterator, or <tt>NULL</tt> 1.764 + * if this transliterator uses no filter. 1.765 + * @stable ICU 2.0 1.766 + */ 1.767 + const UnicodeFilter* getFilter(void) const; 1.768 + 1.769 + /** 1.770 + * Returns the filter used by this transliterator, or <tt>NULL</tt> if this 1.771 + * transliterator uses no filter. The caller must eventually delete the 1.772 + * result. After this call, this transliterator's filter is set to 1.773 + * <tt>NULL</tt>. 1.774 + * @return the filter used by this transliterator, or <tt>NULL</tt> if this 1.775 + * transliterator uses no filter. 1.776 + * @stable ICU 2.4 1.777 + */ 1.778 + UnicodeFilter* orphanFilter(void); 1.779 + 1.780 + /** 1.781 + * Changes the filter used by this transliterator. If the filter 1.782 + * is set to <tt>null</tt> then no filtering will occur. 1.783 + * 1.784 + * <p>Callers must take care if a transliterator is in use by 1.785 + * multiple threads. The filter should not be changed by one 1.786 + * thread while another thread may be transliterating. 1.787 + * @param adoptedFilter the new filter to be adopted. 1.788 + * @stable ICU 2.0 1.789 + */ 1.790 + void adoptFilter(UnicodeFilter* adoptedFilter); 1.791 + 1.792 + /** 1.793 + * Returns this transliterator's inverse. See the class 1.794 + * documentation for details. This implementation simply inverts 1.795 + * the two entities in the ID and attempts to retrieve the 1.796 + * resulting transliterator. That is, if <code>getID()</code> 1.797 + * returns "A-B", then this method will return the result of 1.798 + * <code>createInstance("B-A")</code>, or <code>null</code> if that 1.799 + * call fails. 1.800 + * 1.801 + * <p>Subclasses with knowledge of their inverse may wish to 1.802 + * override this method. 1.803 + * 1.804 + * @param status Output param to filled in with a success or an error. 1.805 + * @return a transliterator that is an inverse, not necessarily 1.806 + * exact, of this transliterator, or <code>null</code> if no such 1.807 + * transliterator is registered. 1.808 + * @see #registerInstance 1.809 + * @stable ICU 2.0 1.810 + */ 1.811 + Transliterator* createInverse(UErrorCode& status) const; 1.812 + 1.813 + /** 1.814 + * Returns a <code>Transliterator</code> object given its ID. 1.815 + * The ID must be either a system transliterator ID or a ID registered 1.816 + * using <code>registerInstance()</code>. 1.817 + * 1.818 + * @param ID a valid ID, as enumerated by <code>getAvailableIDs()</code> 1.819 + * @param dir either FORWARD or REVERSE. 1.820 + * @param parseError Struct to recieve information on position 1.821 + * of error if an error is encountered 1.822 + * @param status Output param to filled in with a success or an error. 1.823 + * @return A <code>Transliterator</code> object with the given ID 1.824 + * @see #registerInstance 1.825 + * @see #getAvailableIDs 1.826 + * @see #getID 1.827 + * @stable ICU 2.0 1.828 + */ 1.829 + static Transliterator* U_EXPORT2 createInstance(const UnicodeString& ID, 1.830 + UTransDirection dir, 1.831 + UParseError& parseError, 1.832 + UErrorCode& status); 1.833 + 1.834 + /** 1.835 + * Returns a <code>Transliterator</code> object given its ID. 1.836 + * The ID must be either a system transliterator ID or a ID registered 1.837 + * using <code>registerInstance()</code>. 1.838 + * @param ID a valid ID, as enumerated by <code>getAvailableIDs()</code> 1.839 + * @param dir either FORWARD or REVERSE. 1.840 + * @param status Output param to filled in with a success or an error. 1.841 + * @return A <code>Transliterator</code> object with the given ID 1.842 + * @stable ICU 2.0 1.843 + */ 1.844 + static Transliterator* U_EXPORT2 createInstance(const UnicodeString& ID, 1.845 + UTransDirection dir, 1.846 + UErrorCode& status); 1.847 + 1.848 + /** 1.849 + * Returns a <code>Transliterator</code> object constructed from 1.850 + * the given rule string. This will be a RuleBasedTransliterator, 1.851 + * if the rule string contains only rules, or a 1.852 + * CompoundTransliterator, if it contains ID blocks, or a 1.853 + * NullTransliterator, if it contains ID blocks which parse as 1.854 + * empty for the given direction. 1.855 + * @param ID the id for the transliterator. 1.856 + * @param rules rules, separated by ';' 1.857 + * @param dir either FORWARD or REVERSE. 1.858 + * @param parseError Struct to recieve information on position 1.859 + * of error if an error is encountered 1.860 + * @param status Output param set to success/failure code. 1.861 + * @stable ICU 2.0 1.862 + */ 1.863 + static Transliterator* U_EXPORT2 createFromRules(const UnicodeString& ID, 1.864 + const UnicodeString& rules, 1.865 + UTransDirection dir, 1.866 + UParseError& parseError, 1.867 + UErrorCode& status); 1.868 + 1.869 + /** 1.870 + * Create a rule string that can be passed to createFromRules() 1.871 + * to recreate this transliterator. 1.872 + * @param result the string to receive the rules. Previous 1.873 + * contents will be deleted. 1.874 + * @param escapeUnprintable if TRUE then convert unprintable 1.875 + * character to their hex escape representations, \\uxxxx or 1.876 + * \\Uxxxxxxxx. Unprintable characters are those other than 1.877 + * U+000A, U+0020..U+007E. 1.878 + * @stable ICU 2.0 1.879 + */ 1.880 + virtual UnicodeString& toRules(UnicodeString& result, 1.881 + UBool escapeUnprintable) const; 1.882 + 1.883 + /** 1.884 + * Return the number of elements that make up this transliterator. 1.885 + * For example, if the transliterator "NFD;Jamo-Latin;Latin-Greek" 1.886 + * were created, the return value of this method would be 3. 1.887 + * 1.888 + * <p>If this transliterator is not composed of other 1.889 + * transliterators, then this method returns 1. 1.890 + * @return the number of transliterators that compose this 1.891 + * transliterator, or 1 if this transliterator is not composed of 1.892 + * multiple transliterators 1.893 + * @stable ICU 3.0 1.894 + */ 1.895 + int32_t countElements() const; 1.896 + 1.897 + /** 1.898 + * Return an element that makes up this transliterator. For 1.899 + * example, if the transliterator "NFD;Jamo-Latin;Latin-Greek" 1.900 + * were created, the return value of this method would be one 1.901 + * of the three transliterator objects that make up that 1.902 + * transliterator: [NFD, Jamo-Latin, Latin-Greek]. 1.903 + * 1.904 + * <p>If this transliterator is not composed of other 1.905 + * transliterators, then this method will return a reference to 1.906 + * this transliterator when given the index 0. 1.907 + * @param index a value from 0..countElements()-1 indicating the 1.908 + * transliterator to return 1.909 + * @param ec input-output error code 1.910 + * @return one of the transliterators that makes up this 1.911 + * transliterator, if this transliterator is made up of multiple 1.912 + * transliterators, otherwise a reference to this object if given 1.913 + * an index of 0 1.914 + * @stable ICU 3.0 1.915 + */ 1.916 + const Transliterator& getElement(int32_t index, UErrorCode& ec) const; 1.917 + 1.918 + /** 1.919 + * Returns the set of all characters that may be modified in the 1.920 + * input text by this Transliterator. This incorporates this 1.921 + * object's current filter; if the filter is changed, the return 1.922 + * value of this function will change. The default implementation 1.923 + * returns an empty set. Some subclasses may override {@link 1.924 + * #handleGetSourceSet } to return a more precise result. The 1.925 + * return result is approximate in any case and is intended for 1.926 + * use by tests, tools, or utilities. 1.927 + * @param result receives result set; previous contents lost 1.928 + * @return a reference to result 1.929 + * @see #getTargetSet 1.930 + * @see #handleGetSourceSet 1.931 + * @stable ICU 2.4 1.932 + */ 1.933 + UnicodeSet& getSourceSet(UnicodeSet& result) const; 1.934 + 1.935 + /** 1.936 + * Framework method that returns the set of all characters that 1.937 + * may be modified in the input text by this Transliterator, 1.938 + * ignoring the effect of this object's filter. The base class 1.939 + * implementation returns the empty set. Subclasses that wish to 1.940 + * implement this should override this method. 1.941 + * @return the set of characters that this transliterator may 1.942 + * modify. The set may be modified, so subclasses should return a 1.943 + * newly-created object. 1.944 + * @param result receives result set; previous contents lost 1.945 + * @see #getSourceSet 1.946 + * @see #getTargetSet 1.947 + * @stable ICU 2.4 1.948 + */ 1.949 + virtual void handleGetSourceSet(UnicodeSet& result) const; 1.950 + 1.951 + /** 1.952 + * Returns the set of all characters that may be generated as 1.953 + * replacement text by this transliterator. The default 1.954 + * implementation returns the empty set. Some subclasses may 1.955 + * override this method to return a more precise result. The 1.956 + * return result is approximate in any case and is intended for 1.957 + * use by tests, tools, or utilities requiring such 1.958 + * meta-information. 1.959 + * @param result receives result set; previous contents lost 1.960 + * @return a reference to result 1.961 + * @see #getTargetSet 1.962 + * @stable ICU 2.4 1.963 + */ 1.964 + virtual UnicodeSet& getTargetSet(UnicodeSet& result) const; 1.965 + 1.966 +public: 1.967 + 1.968 + /** 1.969 + * Registers a factory function that creates transliterators of 1.970 + * a given ID. 1.971 + * @param id the ID being registered 1.972 + * @param factory a function pointer that will be copied and 1.973 + * called later when the given ID is passed to createInstance() 1.974 + * @param context a context pointer that will be stored and 1.975 + * later passed to the factory function when an ID matching 1.976 + * the registration ID is being instantiated with this factory. 1.977 + * @stable ICU 2.0 1.978 + */ 1.979 + static void U_EXPORT2 registerFactory(const UnicodeString& id, 1.980 + Factory factory, 1.981 + Token context); 1.982 + 1.983 + /** 1.984 + * Registers an instance <tt>obj</tt> of a subclass of 1.985 + * <code>Transliterator</code> with the system. When 1.986 + * <tt>createInstance()</tt> is called with an ID string that is 1.987 + * equal to <tt>obj->getID()</tt>, then <tt>obj->clone()</tt> is 1.988 + * returned. 1.989 + * 1.990 + * After this call the Transliterator class owns the adoptedObj 1.991 + * and will delete it. 1.992 + * 1.993 + * @param adoptedObj an instance of subclass of 1.994 + * <code>Transliterator</code> that defines <tt>clone()</tt> 1.995 + * @see #createInstance 1.996 + * @see #registerFactory 1.997 + * @see #unregister 1.998 + * @stable ICU 2.0 1.999 + */ 1.1000 + static void U_EXPORT2 registerInstance(Transliterator* adoptedObj); 1.1001 + 1.1002 + /** 1.1003 + * Registers an ID string as an alias of another ID string. 1.1004 + * That is, after calling this function, <tt>createInstance(aliasID)</tt> 1.1005 + * will return the same thing as <tt>createInstance(realID)</tt>. 1.1006 + * This is generally used to create shorter, more mnemonic aliases 1.1007 + * for long compound IDs. 1.1008 + * 1.1009 + * @param aliasID The new ID being registered. 1.1010 + * @param realID The ID that the new ID is to be an alias for. 1.1011 + * This can be a compound ID and can include filters and should 1.1012 + * refer to transliterators that have already been registered with 1.1013 + * the framework, although this isn't checked. 1.1014 + * @stable ICU 3.6 1.1015 + */ 1.1016 + static void U_EXPORT2 registerAlias(const UnicodeString& aliasID, 1.1017 + const UnicodeString& realID); 1.1018 + 1.1019 +protected: 1.1020 + 1.1021 +#ifndef U_HIDE_INTERNAL_API 1.1022 + /** 1.1023 + * @internal 1.1024 + * @param id the ID being registered 1.1025 + * @param factory a function pointer that will be copied and 1.1026 + * called later when the given ID is passed to createInstance() 1.1027 + * @param context a context pointer that will be stored and 1.1028 + * later passed to the factory function when an ID matching 1.1029 + * the registration ID is being instantiated with this factory. 1.1030 + */ 1.1031 + static void _registerFactory(const UnicodeString& id, 1.1032 + Factory factory, 1.1033 + Token context); 1.1034 + 1.1035 + /** 1.1036 + * @internal 1.1037 + */ 1.1038 + static void _registerInstance(Transliterator* adoptedObj); 1.1039 + 1.1040 + /** 1.1041 + * @internal 1.1042 + */ 1.1043 + static void _registerAlias(const UnicodeString& aliasID, const UnicodeString& realID); 1.1044 + 1.1045 + /** 1.1046 + * Register two targets as being inverses of one another. For 1.1047 + * example, calling registerSpecialInverse("NFC", "NFD", true) causes 1.1048 + * Transliterator to form the following inverse relationships: 1.1049 + * 1.1050 + * <pre>NFC => NFD 1.1051 + * Any-NFC => Any-NFD 1.1052 + * NFD => NFC 1.1053 + * Any-NFD => Any-NFC</pre> 1.1054 + * 1.1055 + * (Without the special inverse registration, the inverse of NFC 1.1056 + * would be NFC-Any.) Note that NFD is shorthand for Any-NFD, but 1.1057 + * that the presence or absence of "Any-" is preserved. 1.1058 + * 1.1059 + * <p>The relationship is symmetrical; registering (a, b) is 1.1060 + * equivalent to registering (b, a). 1.1061 + * 1.1062 + * <p>The relevant IDs must still be registered separately as 1.1063 + * factories or classes. 1.1064 + * 1.1065 + * <p>Only the targets are specified. Special inverses always 1.1066 + * have the form Any-Target1 <=> Any-Target2. The target should 1.1067 + * have canonical casing (the casing desired to be produced when 1.1068 + * an inverse is formed) and should contain no whitespace or other 1.1069 + * extraneous characters. 1.1070 + * 1.1071 + * @param target the target against which to register the inverse 1.1072 + * @param inverseTarget the inverse of target, that is 1.1073 + * Any-target.getInverse() => Any-inverseTarget 1.1074 + * @param bidirectional if true, register the reverse relation 1.1075 + * as well, that is, Any-inverseTarget.getInverse() => Any-target 1.1076 + * @internal 1.1077 + */ 1.1078 + static void _registerSpecialInverse(const UnicodeString& target, 1.1079 + const UnicodeString& inverseTarget, 1.1080 + UBool bidirectional); 1.1081 +#endif /* U_HIDE_INTERNAL_API */ 1.1082 + 1.1083 +public: 1.1084 + 1.1085 + /** 1.1086 + * Unregisters a transliterator or class. This may be either 1.1087 + * a system transliterator or a user transliterator or class. 1.1088 + * Any attempt to construct an unregistered transliterator based 1.1089 + * on its ID will fail. 1.1090 + * 1.1091 + * @param ID the ID of the transliterator or class 1.1092 + * @return the <code>Object</code> that was registered with 1.1093 + * <code>ID</code>, or <code>null</code> if none was 1.1094 + * @see #registerInstance 1.1095 + * @see #registerFactory 1.1096 + * @stable ICU 2.0 1.1097 + */ 1.1098 + static void U_EXPORT2 unregister(const UnicodeString& ID); 1.1099 + 1.1100 +public: 1.1101 + 1.1102 + /** 1.1103 + * Return a StringEnumeration over the IDs available at the time of the 1.1104 + * call, including user-registered IDs. 1.1105 + * @param ec input-output error code 1.1106 + * @return a newly-created StringEnumeration over the transliterators 1.1107 + * available at the time of the call. The caller should delete this object 1.1108 + * when done using it. 1.1109 + * @stable ICU 3.0 1.1110 + */ 1.1111 + static StringEnumeration* U_EXPORT2 getAvailableIDs(UErrorCode& ec); 1.1112 + 1.1113 + /** 1.1114 + * Return the number of registered source specifiers. 1.1115 + * @return the number of registered source specifiers. 1.1116 + * @stable ICU 2.0 1.1117 + */ 1.1118 + static int32_t U_EXPORT2 countAvailableSources(void); 1.1119 + 1.1120 + /** 1.1121 + * Return a registered source specifier. 1.1122 + * @param index which specifier to return, from 0 to n-1, where 1.1123 + * n = countAvailableSources() 1.1124 + * @param result fill-in paramter to receive the source specifier. 1.1125 + * If index is out of range, result will be empty. 1.1126 + * @return reference to result 1.1127 + * @stable ICU 2.0 1.1128 + */ 1.1129 + static UnicodeString& U_EXPORT2 getAvailableSource(int32_t index, 1.1130 + UnicodeString& result); 1.1131 + 1.1132 + /** 1.1133 + * Return the number of registered target specifiers for a given 1.1134 + * source specifier. 1.1135 + * @param source the given source specifier. 1.1136 + * @return the number of registered target specifiers for a given 1.1137 + * source specifier. 1.1138 + * @stable ICU 2.0 1.1139 + */ 1.1140 + static int32_t U_EXPORT2 countAvailableTargets(const UnicodeString& source); 1.1141 + 1.1142 + /** 1.1143 + * Return a registered target specifier for a given source. 1.1144 + * @param index which specifier to return, from 0 to n-1, where 1.1145 + * n = countAvailableTargets(source) 1.1146 + * @param source the source specifier 1.1147 + * @param result fill-in paramter to receive the target specifier. 1.1148 + * If source is invalid or if index is out of range, result will 1.1149 + * be empty. 1.1150 + * @return reference to result 1.1151 + * @stable ICU 2.0 1.1152 + */ 1.1153 + static UnicodeString& U_EXPORT2 getAvailableTarget(int32_t index, 1.1154 + const UnicodeString& source, 1.1155 + UnicodeString& result); 1.1156 + 1.1157 + /** 1.1158 + * Return the number of registered variant specifiers for a given 1.1159 + * source-target pair. 1.1160 + * @param source the source specifiers. 1.1161 + * @param target the target specifiers. 1.1162 + * @stable ICU 2.0 1.1163 + */ 1.1164 + static int32_t U_EXPORT2 countAvailableVariants(const UnicodeString& source, 1.1165 + const UnicodeString& target); 1.1166 + 1.1167 + /** 1.1168 + * Return a registered variant specifier for a given source-target 1.1169 + * pair. 1.1170 + * @param index which specifier to return, from 0 to n-1, where 1.1171 + * n = countAvailableVariants(source, target) 1.1172 + * @param source the source specifier 1.1173 + * @param target the target specifier 1.1174 + * @param result fill-in paramter to receive the variant 1.1175 + * specifier. If source is invalid or if target is invalid or if 1.1176 + * index is out of range, result will be empty. 1.1177 + * @return reference to result 1.1178 + * @stable ICU 2.0 1.1179 + */ 1.1180 + static UnicodeString& U_EXPORT2 getAvailableVariant(int32_t index, 1.1181 + const UnicodeString& source, 1.1182 + const UnicodeString& target, 1.1183 + UnicodeString& result); 1.1184 + 1.1185 +protected: 1.1186 + 1.1187 +#ifndef U_HIDE_INTERNAL_API 1.1188 + /** 1.1189 + * Non-mutexed internal method 1.1190 + * @internal 1.1191 + */ 1.1192 + static int32_t _countAvailableSources(void); 1.1193 + 1.1194 + /** 1.1195 + * Non-mutexed internal method 1.1196 + * @internal 1.1197 + */ 1.1198 + static UnicodeString& _getAvailableSource(int32_t index, 1.1199 + UnicodeString& result); 1.1200 + 1.1201 + /** 1.1202 + * Non-mutexed internal method 1.1203 + * @internal 1.1204 + */ 1.1205 + static int32_t _countAvailableTargets(const UnicodeString& source); 1.1206 + 1.1207 + /** 1.1208 + * Non-mutexed internal method 1.1209 + * @internal 1.1210 + */ 1.1211 + static UnicodeString& _getAvailableTarget(int32_t index, 1.1212 + const UnicodeString& source, 1.1213 + UnicodeString& result); 1.1214 + 1.1215 + /** 1.1216 + * Non-mutexed internal method 1.1217 + * @internal 1.1218 + */ 1.1219 + static int32_t _countAvailableVariants(const UnicodeString& source, 1.1220 + const UnicodeString& target); 1.1221 + 1.1222 + /** 1.1223 + * Non-mutexed internal method 1.1224 + * @internal 1.1225 + */ 1.1226 + static UnicodeString& _getAvailableVariant(int32_t index, 1.1227 + const UnicodeString& source, 1.1228 + const UnicodeString& target, 1.1229 + UnicodeString& result); 1.1230 +#endif /* U_HIDE_INTERNAL_API */ 1.1231 + 1.1232 +protected: 1.1233 + 1.1234 + /** 1.1235 + * Set the ID of this transliterators. Subclasses shouldn't do 1.1236 + * this, unless the underlying script behavior has changed. 1.1237 + * @param id the new id t to be set. 1.1238 + * @stable ICU 2.4 1.1239 + */ 1.1240 + void setID(const UnicodeString& id); 1.1241 + 1.1242 +public: 1.1243 + 1.1244 + /** 1.1245 + * Return the class ID for this class. This is useful only for 1.1246 + * comparing to a return value from getDynamicClassID(). 1.1247 + * Note that Transliterator is an abstract base class, and therefor 1.1248 + * no fully constructed object will have a dynamic 1.1249 + * UCLassID that equals the UClassID returned from 1.1250 + * TRansliterator::getStaticClassID(). 1.1251 + * @return The class ID for class Transliterator. 1.1252 + * @stable ICU 2.0 1.1253 + */ 1.1254 + static UClassID U_EXPORT2 getStaticClassID(void); 1.1255 + 1.1256 + /** 1.1257 + * Returns a unique class ID <b>polymorphically</b>. This method 1.1258 + * is to implement a simple version of RTTI, since not all C++ 1.1259 + * compilers support genuine RTTI. Polymorphic operator==() and 1.1260 + * clone() methods call this method. 1.1261 + * 1.1262 + * <p>Concrete subclasses of Transliterator must use the 1.1263 + * UOBJECT_DEFINE_RTTI_IMPLEMENTATION macro from 1.1264 + * uobject.h to provide the RTTI functions. 1.1265 + * 1.1266 + * @return The class ID for this object. All objects of a given 1.1267 + * class have the same class ID. Objects of other classes have 1.1268 + * different class IDs. 1.1269 + * @stable ICU 2.0 1.1270 + */ 1.1271 + virtual UClassID getDynamicClassID(void) const = 0; 1.1272 + 1.1273 +private: 1.1274 + static UBool initializeRegistry(UErrorCode &status); 1.1275 + 1.1276 +public: 1.1277 +#ifndef U_HIDE_OBSOLETE_API 1.1278 + /** 1.1279 + * Return the number of IDs currently registered with the system. 1.1280 + * To retrieve the actual IDs, call getAvailableID(i) with 1.1281 + * i from 0 to countAvailableIDs() - 1. 1.1282 + * @return the number of IDs currently registered with the system. 1.1283 + * @obsolete ICU 3.4 use getAvailableIDs() instead 1.1284 + */ 1.1285 + static int32_t U_EXPORT2 countAvailableIDs(void); 1.1286 + 1.1287 + /** 1.1288 + * Return the index-th available ID. index must be between 0 1.1289 + * and countAvailableIDs() - 1, inclusive. If index is out of 1.1290 + * range, the result of getAvailableID(0) is returned. 1.1291 + * @param index the given ID index. 1.1292 + * @return the index-th available ID. index must be between 0 1.1293 + * and countAvailableIDs() - 1, inclusive. If index is out of 1.1294 + * range, the result of getAvailableID(0) is returned. 1.1295 + * @obsolete ICU 3.4 use getAvailableIDs() instead; this function 1.1296 + * is not thread safe, since it returns a reference to storage that 1.1297 + * may become invalid if another thread calls unregister 1.1298 + */ 1.1299 + static const UnicodeString& U_EXPORT2 getAvailableID(int32_t index); 1.1300 +#endif /* U_HIDE_OBSOLETE_API */ 1.1301 +}; 1.1302 + 1.1303 +inline int32_t Transliterator::getMaximumContextLength(void) const { 1.1304 + return maximumContextLength; 1.1305 +} 1.1306 + 1.1307 +inline void Transliterator::setID(const UnicodeString& id) { 1.1308 + ID = id; 1.1309 + // NUL-terminate the ID string, which is a non-aliased copy. 1.1310 + ID.append((UChar)0); 1.1311 + ID.truncate(ID.length()-1); 1.1312 +} 1.1313 + 1.1314 +#ifndef U_HIDE_INTERNAL_API 1.1315 +inline Transliterator::Token Transliterator::integerToken(int32_t i) { 1.1316 + Token t; 1.1317 + t.integer = i; 1.1318 + return t; 1.1319 +} 1.1320 + 1.1321 +inline Transliterator::Token Transliterator::pointerToken(void* p) { 1.1322 + Token t; 1.1323 + t.pointer = p; 1.1324 + return t; 1.1325 +} 1.1326 +#endif /* U_HIDE_INTERNAL_API */ 1.1327 + 1.1328 +U_NAMESPACE_END 1.1329 + 1.1330 +#endif /* #if !UCONFIG_NO_TRANSLITERATION */ 1.1331 + 1.1332 +#endif