|
1 /* |
|
2 ********************************************************************** |
|
3 * Copyright (c) 2001-2008, International Business Machines |
|
4 * Corporation and others. All Rights Reserved. |
|
5 ********************************************************************** |
|
6 * Date Name Description |
|
7 * 08/10/2001 aliu Creation. |
|
8 ********************************************************************** |
|
9 */ |
|
10 #ifndef _TRANSREG_H |
|
11 #define _TRANSREG_H |
|
12 |
|
13 #include "unicode/utypes.h" |
|
14 |
|
15 #if !UCONFIG_NO_TRANSLITERATION |
|
16 |
|
17 #include "unicode/uobject.h" |
|
18 #include "unicode/translit.h" |
|
19 #include "hash.h" |
|
20 #include "uvector.h" |
|
21 |
|
22 U_NAMESPACE_BEGIN |
|
23 |
|
24 class TransliteratorEntry; |
|
25 class TransliteratorSpec; |
|
26 class UnicodeString; |
|
27 |
|
28 //------------------------------------------------------------------ |
|
29 // TransliteratorAlias |
|
30 //------------------------------------------------------------------ |
|
31 |
|
32 /** |
|
33 * A TransliteratorAlias object is returned by get() if the given ID |
|
34 * actually translates into something else. The caller then invokes |
|
35 * the create() method on the alias to create the actual |
|
36 * transliterator, and deletes the alias. |
|
37 * |
|
38 * Why all the shenanigans? To prevent circular calls between |
|
39 * the registry code and the transliterator code that deadlocks. |
|
40 */ |
|
41 class TransliteratorAlias : public UMemory { |
|
42 public: |
|
43 /** |
|
44 * Construct a simple alias (type == SIMPLE) |
|
45 * @param aliasID the given id. |
|
46 */ |
|
47 TransliteratorAlias(const UnicodeString& aliasID, const UnicodeSet* compoundFilter); |
|
48 |
|
49 /** |
|
50 * Construct a compound RBT alias (type == COMPOUND) |
|
51 */ |
|
52 TransliteratorAlias(const UnicodeString& ID, const UnicodeString& idBlocks, |
|
53 UVector* adoptedTransliterators, |
|
54 const UnicodeSet* compoundFilter); |
|
55 |
|
56 /** |
|
57 * Construct a rules alias (type = RULES) |
|
58 */ |
|
59 TransliteratorAlias(const UnicodeString& theID, |
|
60 const UnicodeString& rules, |
|
61 UTransDirection dir); |
|
62 |
|
63 ~TransliteratorAlias(); |
|
64 |
|
65 /** |
|
66 * The whole point of create() is that the caller must invoke |
|
67 * it when the registry mutex is NOT held, to prevent deadlock. |
|
68 * It may only be called once. |
|
69 * |
|
70 * Note: Only call create() if isRuleBased() returns FALSE. |
|
71 * |
|
72 * This method must be called *outside* of the TransliteratorRegistry |
|
73 * mutex. |
|
74 */ |
|
75 Transliterator* create(UParseError&, UErrorCode&); |
|
76 |
|
77 /** |
|
78 * Return TRUE if this alias is rule-based. If so, the caller |
|
79 * must call parse() on it, then call TransliteratorRegistry::reget(). |
|
80 */ |
|
81 UBool isRuleBased() const; |
|
82 |
|
83 /** |
|
84 * If isRuleBased() returns TRUE, then the caller must call this |
|
85 * method, followed by TransliteratorRegistry::reget(). The latter |
|
86 * method must be called inside the TransliteratorRegistry mutex. |
|
87 * |
|
88 * Note: Only call parse() if isRuleBased() returns TRUE. |
|
89 * |
|
90 * This method must be called *outside* of the TransliteratorRegistry |
|
91 * mutex, because it can instantiate Transliterators embedded in |
|
92 * the rules via the "&Latin-Arabic()" syntax. |
|
93 */ |
|
94 void parse(TransliteratorParser& parser, |
|
95 UParseError& pe, UErrorCode& ec) const; |
|
96 |
|
97 private: |
|
98 // We actually come in three flavors: |
|
99 // 1. Simple alias |
|
100 // Here aliasID is the alias string. Everything else is |
|
101 // null, zero, empty. |
|
102 // 2. CompoundRBT |
|
103 // Here ID is the ID, aliasID is the idBlock, trans is the |
|
104 // contained RBT, and idSplitPoint is the offet in aliasID |
|
105 // where the contained RBT goes. compoundFilter is the |
|
106 // compound filter, and it is _not_ owned. |
|
107 // 3. Rules |
|
108 // Here ID is the ID, aliasID is the rules string. |
|
109 // idSplitPoint is the UTransDirection. |
|
110 UnicodeString ID; |
|
111 UnicodeString aliasesOrRules; |
|
112 UVector* transes; // owned |
|
113 const UnicodeSet* compoundFilter; // alias |
|
114 UTransDirection direction; |
|
115 enum { SIMPLE, COMPOUND, RULES } type; |
|
116 |
|
117 TransliteratorAlias(const TransliteratorAlias &other); // forbid copying of this class |
|
118 TransliteratorAlias &operator=(const TransliteratorAlias &other); // forbid copying of this class |
|
119 }; |
|
120 |
|
121 |
|
122 /** |
|
123 * A registry of system transliterators. This is the data structure |
|
124 * that implements the mapping between transliterator IDs and the data |
|
125 * or function pointers used to create the corresponding |
|
126 * transliterators. There is one instance of the registry that is |
|
127 * created statically. |
|
128 * |
|
129 * The registry consists of a dynamic component -- a hashtable -- and |
|
130 * a static component -- locale resource bundles. The dynamic store |
|
131 * is semantically overlaid on the static store, so the static mapping |
|
132 * can be dynamically overridden. |
|
133 * |
|
134 * This is an internal class that is only used by Transliterator. |
|
135 * Transliterator maintains one static instance of this class and |
|
136 * delegates all registry-related operations to it. |
|
137 * |
|
138 * @author Alan Liu |
|
139 */ |
|
140 class TransliteratorRegistry : public UMemory { |
|
141 |
|
142 public: |
|
143 |
|
144 /** |
|
145 * Contructor |
|
146 * @param status Output param set to success/failure code. |
|
147 */ |
|
148 TransliteratorRegistry(UErrorCode& status); |
|
149 |
|
150 /** |
|
151 * Nonvirtual destructor -- this class is not subclassable. |
|
152 */ |
|
153 ~TransliteratorRegistry(); |
|
154 |
|
155 //------------------------------------------------------------------ |
|
156 // Basic public API |
|
157 //------------------------------------------------------------------ |
|
158 |
|
159 /** |
|
160 * Given a simple ID (forward direction, no inline filter, not |
|
161 * compound) attempt to instantiate it from the registry. Return |
|
162 * 0 on failure. |
|
163 * |
|
164 * Return a non-NULL aliasReturn value if the ID points to an alias. |
|
165 * We cannot instantiate it ourselves because the alias may contain |
|
166 * filters or compounds, which we do not understand. Caller should |
|
167 * make aliasReturn NULL before calling. |
|
168 * @param ID the given ID |
|
169 * @param aliasReturn output param to receive TransliteratorAlias; |
|
170 * should be NULL on entry |
|
171 * @param parseError Struct to recieve information on position |
|
172 * of error if an error is encountered |
|
173 * @param status Output param set to success/failure code. |
|
174 */ |
|
175 Transliterator* get(const UnicodeString& ID, |
|
176 TransliteratorAlias*& aliasReturn, |
|
177 UErrorCode& status); |
|
178 |
|
179 /** |
|
180 * The caller must call this after calling get(), if [a] calling get() |
|
181 * returns an alias, and [b] the alias is rule based. In that |
|
182 * situation the caller must call alias->parse() to do the parsing |
|
183 * OUTSIDE THE REGISTRY MUTEX, then call this method to retry |
|
184 * instantiating the transliterator. |
|
185 * |
|
186 * Note: Another alias might be returned by this method. |
|
187 * |
|
188 * This method (like all public methods of this class) must be called |
|
189 * from within the TransliteratorRegistry mutex. |
|
190 * |
|
191 * @param aliasReturn output param to receive TransliteratorAlias; |
|
192 * should be NULL on entry |
|
193 */ |
|
194 Transliterator* reget(const UnicodeString& ID, |
|
195 TransliteratorParser& parser, |
|
196 TransliteratorAlias*& aliasReturn, |
|
197 UErrorCode& status); |
|
198 |
|
199 /** |
|
200 * Register a prototype (adopted). This adds an entry to the |
|
201 * dynamic store, or replaces an existing entry. Any entry in the |
|
202 * underlying static locale resource store is masked. |
|
203 */ |
|
204 void put(Transliterator* adoptedProto, |
|
205 UBool visible, |
|
206 UErrorCode& ec); |
|
207 |
|
208 /** |
|
209 * Register an ID and a factory function pointer. This adds an |
|
210 * entry to the dynamic store, or replaces an existing entry. Any |
|
211 * entry in the underlying static locale resource store is masked. |
|
212 */ |
|
213 void put(const UnicodeString& ID, |
|
214 Transliterator::Factory factory, |
|
215 Transliterator::Token context, |
|
216 UBool visible, |
|
217 UErrorCode& ec); |
|
218 |
|
219 /** |
|
220 * Register an ID and a resource name. This adds an entry to the |
|
221 * dynamic store, or replaces an existing entry. Any entry in the |
|
222 * underlying static locale resource store is masked. |
|
223 */ |
|
224 void put(const UnicodeString& ID, |
|
225 const UnicodeString& resourceName, |
|
226 UTransDirection dir, |
|
227 UBool readonlyResourceAlias, |
|
228 UBool visible, |
|
229 UErrorCode& ec); |
|
230 |
|
231 /** |
|
232 * Register an ID and an alias ID. This adds an entry to the |
|
233 * dynamic store, or replaces an existing entry. Any entry in the |
|
234 * underlying static locale resource store is masked. |
|
235 */ |
|
236 void put(const UnicodeString& ID, |
|
237 const UnicodeString& alias, |
|
238 UBool readonlyAliasAlias, |
|
239 UBool visible, |
|
240 UErrorCode& ec); |
|
241 |
|
242 /** |
|
243 * Unregister an ID. This removes an entry from the dynamic store |
|
244 * if there is one. The static locale resource store is |
|
245 * unaffected. |
|
246 * @param ID the given ID. |
|
247 */ |
|
248 void remove(const UnicodeString& ID); |
|
249 |
|
250 //------------------------------------------------------------------ |
|
251 // Public ID and spec management |
|
252 //------------------------------------------------------------------ |
|
253 |
|
254 /** |
|
255 * Return a StringEnumeration over the IDs currently registered |
|
256 * with the system. |
|
257 * @internal |
|
258 */ |
|
259 StringEnumeration* getAvailableIDs() const; |
|
260 |
|
261 /** |
|
262 * == OBSOLETE - remove in ICU 3.4 == |
|
263 * Return the number of IDs currently registered with the system. |
|
264 * To retrieve the actual IDs, call getAvailableID(i) with |
|
265 * i from 0 to countAvailableIDs() - 1. |
|
266 * @return the number of IDs currently registered with the system. |
|
267 * @internal |
|
268 */ |
|
269 int32_t countAvailableIDs(void) const; |
|
270 |
|
271 /** |
|
272 * == OBSOLETE - remove in ICU 3.4 == |
|
273 * Return the index-th available ID. index must be between 0 |
|
274 * and countAvailableIDs() - 1, inclusive. If index is out of |
|
275 * range, the result of getAvailableID(0) is returned. |
|
276 * @param index the given index. |
|
277 * @return the index-th available ID. index must be between 0 |
|
278 * and countAvailableIDs() - 1, inclusive. If index is out of |
|
279 * range, the result of getAvailableID(0) is returned. |
|
280 * @internal |
|
281 */ |
|
282 const UnicodeString& getAvailableID(int32_t index) const; |
|
283 |
|
284 /** |
|
285 * Return the number of registered source specifiers. |
|
286 * @return the number of registered source specifiers. |
|
287 */ |
|
288 int32_t countAvailableSources(void) const; |
|
289 |
|
290 /** |
|
291 * Return a registered source specifier. |
|
292 * @param index which specifier to return, from 0 to n-1, where |
|
293 * n = countAvailableSources() |
|
294 * @param result fill-in paramter to receive the source specifier. |
|
295 * If index is out of range, result will be empty. |
|
296 * @return reference to result |
|
297 */ |
|
298 UnicodeString& getAvailableSource(int32_t index, |
|
299 UnicodeString& result) const; |
|
300 |
|
301 /** |
|
302 * Return the number of registered target specifiers for a given |
|
303 * source specifier. |
|
304 * @param source the given source specifier. |
|
305 * @return the number of registered target specifiers for a given |
|
306 * source specifier. |
|
307 */ |
|
308 int32_t countAvailableTargets(const UnicodeString& source) const; |
|
309 |
|
310 /** |
|
311 * Return a registered target specifier for a given source. |
|
312 * @param index which specifier to return, from 0 to n-1, where |
|
313 * n = countAvailableTargets(source) |
|
314 * @param source the source specifier |
|
315 * @param result fill-in paramter to receive the target specifier. |
|
316 * If source is invalid or if index is out of range, result will |
|
317 * be empty. |
|
318 * @return reference to result |
|
319 */ |
|
320 UnicodeString& getAvailableTarget(int32_t index, |
|
321 const UnicodeString& source, |
|
322 UnicodeString& result) const; |
|
323 |
|
324 /** |
|
325 * Return the number of registered variant specifiers for a given |
|
326 * source-target pair. There is always at least one variant: If |
|
327 * just source-target is registered, then the single variant |
|
328 * NO_VARIANT is returned. If source-target/variant is registered |
|
329 * then that variant is returned. |
|
330 * @param source the source specifiers |
|
331 * @param target the target specifiers |
|
332 * @return the number of registered variant specifiers for a given |
|
333 * source-target pair. |
|
334 */ |
|
335 int32_t countAvailableVariants(const UnicodeString& source, |
|
336 const UnicodeString& target) const; |
|
337 |
|
338 /** |
|
339 * Return a registered variant specifier for a given source-target |
|
340 * pair. If NO_VARIANT is one of the variants, then it will be |
|
341 * at index 0. |
|
342 * @param index which specifier to return, from 0 to n-1, where |
|
343 * n = countAvailableVariants(source, target) |
|
344 * @param source the source specifier |
|
345 * @param target the target specifier |
|
346 * @param result fill-in paramter to receive the variant |
|
347 * specifier. If source is invalid or if target is invalid or if |
|
348 * index is out of range, result will be empty. |
|
349 * @return reference to result |
|
350 */ |
|
351 UnicodeString& getAvailableVariant(int32_t index, |
|
352 const UnicodeString& source, |
|
353 const UnicodeString& target, |
|
354 UnicodeString& result) const; |
|
355 |
|
356 private: |
|
357 |
|
358 //---------------------------------------------------------------- |
|
359 // Private implementation |
|
360 //---------------------------------------------------------------- |
|
361 |
|
362 TransliteratorEntry* find(const UnicodeString& ID); |
|
363 |
|
364 TransliteratorEntry* find(UnicodeString& source, |
|
365 UnicodeString& target, |
|
366 UnicodeString& variant); |
|
367 |
|
368 TransliteratorEntry* findInDynamicStore(const TransliteratorSpec& src, |
|
369 const TransliteratorSpec& trg, |
|
370 const UnicodeString& variant) const; |
|
371 |
|
372 TransliteratorEntry* findInStaticStore(const TransliteratorSpec& src, |
|
373 const TransliteratorSpec& trg, |
|
374 const UnicodeString& variant); |
|
375 |
|
376 static TransliteratorEntry* findInBundle(const TransliteratorSpec& specToOpen, |
|
377 const TransliteratorSpec& specToFind, |
|
378 const UnicodeString& variant, |
|
379 UTransDirection direction); |
|
380 |
|
381 void registerEntry(const UnicodeString& source, |
|
382 const UnicodeString& target, |
|
383 const UnicodeString& variant, |
|
384 TransliteratorEntry* adopted, |
|
385 UBool visible); |
|
386 |
|
387 void registerEntry(const UnicodeString& ID, |
|
388 TransliteratorEntry* adopted, |
|
389 UBool visible); |
|
390 |
|
391 void registerEntry(const UnicodeString& ID, |
|
392 const UnicodeString& source, |
|
393 const UnicodeString& target, |
|
394 const UnicodeString& variant, |
|
395 TransliteratorEntry* adopted, |
|
396 UBool visible); |
|
397 |
|
398 void registerSTV(const UnicodeString& source, |
|
399 const UnicodeString& target, |
|
400 const UnicodeString& variant); |
|
401 |
|
402 void removeSTV(const UnicodeString& source, |
|
403 const UnicodeString& target, |
|
404 const UnicodeString& variant); |
|
405 |
|
406 Transliterator* instantiateEntry(const UnicodeString& ID, |
|
407 TransliteratorEntry *entry, |
|
408 TransliteratorAlias*& aliasReturn, |
|
409 UErrorCode& status); |
|
410 |
|
411 /** |
|
412 * A StringEnumeration over the registered IDs in this object. |
|
413 */ |
|
414 class Enumeration : public StringEnumeration { |
|
415 public: |
|
416 Enumeration(const TransliteratorRegistry& reg); |
|
417 virtual ~Enumeration(); |
|
418 virtual int32_t count(UErrorCode& status) const; |
|
419 virtual const UnicodeString* snext(UErrorCode& status); |
|
420 virtual void reset(UErrorCode& status); |
|
421 static UClassID U_EXPORT2 getStaticClassID(); |
|
422 virtual UClassID getDynamicClassID() const; |
|
423 private: |
|
424 int32_t index; |
|
425 const TransliteratorRegistry& reg; |
|
426 }; |
|
427 friend class Enumeration; |
|
428 |
|
429 private: |
|
430 |
|
431 /** |
|
432 * Dynamic registry mapping full IDs to Entry objects. This |
|
433 * contains both public and internal entities. The visibility is |
|
434 * controlled by whether an entry is listed in availableIDs and |
|
435 * specDAG or not. |
|
436 */ |
|
437 Hashtable registry; |
|
438 |
|
439 /** |
|
440 * DAG of visible IDs by spec. Hashtable: source => (Hashtable: |
|
441 * target => (UVector: variant)) The UVector of variants is never |
|
442 * empty. For a source-target with no variant, the special |
|
443 * variant NO_VARIANT (the empty string) is stored in slot zero of |
|
444 * the UVector. |
|
445 */ |
|
446 Hashtable specDAG; |
|
447 |
|
448 /** |
|
449 * Vector of public full IDs. |
|
450 */ |
|
451 UVector availableIDs; |
|
452 |
|
453 TransliteratorRegistry(const TransliteratorRegistry &other); // forbid copying of this class |
|
454 TransliteratorRegistry &operator=(const TransliteratorRegistry &other); // forbid copying of this class |
|
455 }; |
|
456 |
|
457 U_NAMESPACE_END |
|
458 |
|
459 #endif /* #if !UCONFIG_NO_TRANSLITERATION */ |
|
460 |
|
461 #endif |
|
462 //eof |