1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/i18n/transreg.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,462 @@ 1.4 +/* 1.5 +********************************************************************** 1.6 +* Copyright (c) 2001-2008, International Business Machines 1.7 +* Corporation and others. All Rights Reserved. 1.8 +********************************************************************** 1.9 +* Date Name Description 1.10 +* 08/10/2001 aliu Creation. 1.11 +********************************************************************** 1.12 +*/ 1.13 +#ifndef _TRANSREG_H 1.14 +#define _TRANSREG_H 1.15 + 1.16 +#include "unicode/utypes.h" 1.17 + 1.18 +#if !UCONFIG_NO_TRANSLITERATION 1.19 + 1.20 +#include "unicode/uobject.h" 1.21 +#include "unicode/translit.h" 1.22 +#include "hash.h" 1.23 +#include "uvector.h" 1.24 + 1.25 +U_NAMESPACE_BEGIN 1.26 + 1.27 +class TransliteratorEntry; 1.28 +class TransliteratorSpec; 1.29 +class UnicodeString; 1.30 + 1.31 +//------------------------------------------------------------------ 1.32 +// TransliteratorAlias 1.33 +//------------------------------------------------------------------ 1.34 + 1.35 +/** 1.36 + * A TransliteratorAlias object is returned by get() if the given ID 1.37 + * actually translates into something else. The caller then invokes 1.38 + * the create() method on the alias to create the actual 1.39 + * transliterator, and deletes the alias. 1.40 + * 1.41 + * Why all the shenanigans? To prevent circular calls between 1.42 + * the registry code and the transliterator code that deadlocks. 1.43 + */ 1.44 +class TransliteratorAlias : public UMemory { 1.45 + public: 1.46 + /** 1.47 + * Construct a simple alias (type == SIMPLE) 1.48 + * @param aliasID the given id. 1.49 + */ 1.50 + TransliteratorAlias(const UnicodeString& aliasID, const UnicodeSet* compoundFilter); 1.51 + 1.52 + /** 1.53 + * Construct a compound RBT alias (type == COMPOUND) 1.54 + */ 1.55 + TransliteratorAlias(const UnicodeString& ID, const UnicodeString& idBlocks, 1.56 + UVector* adoptedTransliterators, 1.57 + const UnicodeSet* compoundFilter); 1.58 + 1.59 + /** 1.60 + * Construct a rules alias (type = RULES) 1.61 + */ 1.62 + TransliteratorAlias(const UnicodeString& theID, 1.63 + const UnicodeString& rules, 1.64 + UTransDirection dir); 1.65 + 1.66 + ~TransliteratorAlias(); 1.67 + 1.68 + /** 1.69 + * The whole point of create() is that the caller must invoke 1.70 + * it when the registry mutex is NOT held, to prevent deadlock. 1.71 + * It may only be called once. 1.72 + * 1.73 + * Note: Only call create() if isRuleBased() returns FALSE. 1.74 + * 1.75 + * This method must be called *outside* of the TransliteratorRegistry 1.76 + * mutex. 1.77 + */ 1.78 + Transliterator* create(UParseError&, UErrorCode&); 1.79 + 1.80 + /** 1.81 + * Return TRUE if this alias is rule-based. If so, the caller 1.82 + * must call parse() on it, then call TransliteratorRegistry::reget(). 1.83 + */ 1.84 + UBool isRuleBased() const; 1.85 + 1.86 + /** 1.87 + * If isRuleBased() returns TRUE, then the caller must call this 1.88 + * method, followed by TransliteratorRegistry::reget(). The latter 1.89 + * method must be called inside the TransliteratorRegistry mutex. 1.90 + * 1.91 + * Note: Only call parse() if isRuleBased() returns TRUE. 1.92 + * 1.93 + * This method must be called *outside* of the TransliteratorRegistry 1.94 + * mutex, because it can instantiate Transliterators embedded in 1.95 + * the rules via the "&Latin-Arabic()" syntax. 1.96 + */ 1.97 + void parse(TransliteratorParser& parser, 1.98 + UParseError& pe, UErrorCode& ec) const; 1.99 + 1.100 + private: 1.101 + // We actually come in three flavors: 1.102 + // 1. Simple alias 1.103 + // Here aliasID is the alias string. Everything else is 1.104 + // null, zero, empty. 1.105 + // 2. CompoundRBT 1.106 + // Here ID is the ID, aliasID is the idBlock, trans is the 1.107 + // contained RBT, and idSplitPoint is the offet in aliasID 1.108 + // where the contained RBT goes. compoundFilter is the 1.109 + // compound filter, and it is _not_ owned. 1.110 + // 3. Rules 1.111 + // Here ID is the ID, aliasID is the rules string. 1.112 + // idSplitPoint is the UTransDirection. 1.113 + UnicodeString ID; 1.114 + UnicodeString aliasesOrRules; 1.115 + UVector* transes; // owned 1.116 + const UnicodeSet* compoundFilter; // alias 1.117 + UTransDirection direction; 1.118 + enum { SIMPLE, COMPOUND, RULES } type; 1.119 + 1.120 + TransliteratorAlias(const TransliteratorAlias &other); // forbid copying of this class 1.121 + TransliteratorAlias &operator=(const TransliteratorAlias &other); // forbid copying of this class 1.122 +}; 1.123 + 1.124 + 1.125 +/** 1.126 + * A registry of system transliterators. This is the data structure 1.127 + * that implements the mapping between transliterator IDs and the data 1.128 + * or function pointers used to create the corresponding 1.129 + * transliterators. There is one instance of the registry that is 1.130 + * created statically. 1.131 + * 1.132 + * The registry consists of a dynamic component -- a hashtable -- and 1.133 + * a static component -- locale resource bundles. The dynamic store 1.134 + * is semantically overlaid on the static store, so the static mapping 1.135 + * can be dynamically overridden. 1.136 + * 1.137 + * This is an internal class that is only used by Transliterator. 1.138 + * Transliterator maintains one static instance of this class and 1.139 + * delegates all registry-related operations to it. 1.140 + * 1.141 + * @author Alan Liu 1.142 + */ 1.143 +class TransliteratorRegistry : public UMemory { 1.144 + 1.145 + public: 1.146 + 1.147 + /** 1.148 + * Contructor 1.149 + * @param status Output param set to success/failure code. 1.150 + */ 1.151 + TransliteratorRegistry(UErrorCode& status); 1.152 + 1.153 + /** 1.154 + * Nonvirtual destructor -- this class is not subclassable. 1.155 + */ 1.156 + ~TransliteratorRegistry(); 1.157 + 1.158 + //------------------------------------------------------------------ 1.159 + // Basic public API 1.160 + //------------------------------------------------------------------ 1.161 + 1.162 + /** 1.163 + * Given a simple ID (forward direction, no inline filter, not 1.164 + * compound) attempt to instantiate it from the registry. Return 1.165 + * 0 on failure. 1.166 + * 1.167 + * Return a non-NULL aliasReturn value if the ID points to an alias. 1.168 + * We cannot instantiate it ourselves because the alias may contain 1.169 + * filters or compounds, which we do not understand. Caller should 1.170 + * make aliasReturn NULL before calling. 1.171 + * @param ID the given ID 1.172 + * @param aliasReturn output param to receive TransliteratorAlias; 1.173 + * should be NULL on entry 1.174 + * @param parseError Struct to recieve information on position 1.175 + * of error if an error is encountered 1.176 + * @param status Output param set to success/failure code. 1.177 + */ 1.178 + Transliterator* get(const UnicodeString& ID, 1.179 + TransliteratorAlias*& aliasReturn, 1.180 + UErrorCode& status); 1.181 + 1.182 + /** 1.183 + * The caller must call this after calling get(), if [a] calling get() 1.184 + * returns an alias, and [b] the alias is rule based. In that 1.185 + * situation the caller must call alias->parse() to do the parsing 1.186 + * OUTSIDE THE REGISTRY MUTEX, then call this method to retry 1.187 + * instantiating the transliterator. 1.188 + * 1.189 + * Note: Another alias might be returned by this method. 1.190 + * 1.191 + * This method (like all public methods of this class) must be called 1.192 + * from within the TransliteratorRegistry mutex. 1.193 + * 1.194 + * @param aliasReturn output param to receive TransliteratorAlias; 1.195 + * should be NULL on entry 1.196 + */ 1.197 + Transliterator* reget(const UnicodeString& ID, 1.198 + TransliteratorParser& parser, 1.199 + TransliteratorAlias*& aliasReturn, 1.200 + UErrorCode& status); 1.201 + 1.202 + /** 1.203 + * Register a prototype (adopted). This adds an entry to the 1.204 + * dynamic store, or replaces an existing entry. Any entry in the 1.205 + * underlying static locale resource store is masked. 1.206 + */ 1.207 + void put(Transliterator* adoptedProto, 1.208 + UBool visible, 1.209 + UErrorCode& ec); 1.210 + 1.211 + /** 1.212 + * Register an ID and a factory function pointer. This adds an 1.213 + * entry to the dynamic store, or replaces an existing entry. Any 1.214 + * entry in the underlying static locale resource store is masked. 1.215 + */ 1.216 + void put(const UnicodeString& ID, 1.217 + Transliterator::Factory factory, 1.218 + Transliterator::Token context, 1.219 + UBool visible, 1.220 + UErrorCode& ec); 1.221 + 1.222 + /** 1.223 + * Register an ID and a resource name. This adds an entry to the 1.224 + * dynamic store, or replaces an existing entry. Any entry in the 1.225 + * underlying static locale resource store is masked. 1.226 + */ 1.227 + void put(const UnicodeString& ID, 1.228 + const UnicodeString& resourceName, 1.229 + UTransDirection dir, 1.230 + UBool readonlyResourceAlias, 1.231 + UBool visible, 1.232 + UErrorCode& ec); 1.233 + 1.234 + /** 1.235 + * Register an ID and an alias ID. This adds an entry to the 1.236 + * dynamic store, or replaces an existing entry. Any entry in the 1.237 + * underlying static locale resource store is masked. 1.238 + */ 1.239 + void put(const UnicodeString& ID, 1.240 + const UnicodeString& alias, 1.241 + UBool readonlyAliasAlias, 1.242 + UBool visible, 1.243 + UErrorCode& ec); 1.244 + 1.245 + /** 1.246 + * Unregister an ID. This removes an entry from the dynamic store 1.247 + * if there is one. The static locale resource store is 1.248 + * unaffected. 1.249 + * @param ID the given ID. 1.250 + */ 1.251 + void remove(const UnicodeString& ID); 1.252 + 1.253 + //------------------------------------------------------------------ 1.254 + // Public ID and spec management 1.255 + //------------------------------------------------------------------ 1.256 + 1.257 + /** 1.258 + * Return a StringEnumeration over the IDs currently registered 1.259 + * with the system. 1.260 + * @internal 1.261 + */ 1.262 + StringEnumeration* getAvailableIDs() const; 1.263 + 1.264 + /** 1.265 + * == OBSOLETE - remove in ICU 3.4 == 1.266 + * Return the number of IDs currently registered with the system. 1.267 + * To retrieve the actual IDs, call getAvailableID(i) with 1.268 + * i from 0 to countAvailableIDs() - 1. 1.269 + * @return the number of IDs currently registered with the system. 1.270 + * @internal 1.271 + */ 1.272 + int32_t countAvailableIDs(void) const; 1.273 + 1.274 + /** 1.275 + * == OBSOLETE - remove in ICU 3.4 == 1.276 + * Return the index-th available ID. index must be between 0 1.277 + * and countAvailableIDs() - 1, inclusive. If index is out of 1.278 + * range, the result of getAvailableID(0) is returned. 1.279 + * @param index the given index. 1.280 + * @return the index-th available ID. index must be between 0 1.281 + * and countAvailableIDs() - 1, inclusive. If index is out of 1.282 + * range, the result of getAvailableID(0) is returned. 1.283 + * @internal 1.284 + */ 1.285 + const UnicodeString& getAvailableID(int32_t index) const; 1.286 + 1.287 + /** 1.288 + * Return the number of registered source specifiers. 1.289 + * @return the number of registered source specifiers. 1.290 + */ 1.291 + int32_t countAvailableSources(void) const; 1.292 + 1.293 + /** 1.294 + * Return a registered source specifier. 1.295 + * @param index which specifier to return, from 0 to n-1, where 1.296 + * n = countAvailableSources() 1.297 + * @param result fill-in paramter to receive the source specifier. 1.298 + * If index is out of range, result will be empty. 1.299 + * @return reference to result 1.300 + */ 1.301 + UnicodeString& getAvailableSource(int32_t index, 1.302 + UnicodeString& result) const; 1.303 + 1.304 + /** 1.305 + * Return the number of registered target specifiers for a given 1.306 + * source specifier. 1.307 + * @param source the given source specifier. 1.308 + * @return the number of registered target specifiers for a given 1.309 + * source specifier. 1.310 + */ 1.311 + int32_t countAvailableTargets(const UnicodeString& source) const; 1.312 + 1.313 + /** 1.314 + * Return a registered target specifier for a given source. 1.315 + * @param index which specifier to return, from 0 to n-1, where 1.316 + * n = countAvailableTargets(source) 1.317 + * @param source the source specifier 1.318 + * @param result fill-in paramter to receive the target specifier. 1.319 + * If source is invalid or if index is out of range, result will 1.320 + * be empty. 1.321 + * @return reference to result 1.322 + */ 1.323 + UnicodeString& getAvailableTarget(int32_t index, 1.324 + const UnicodeString& source, 1.325 + UnicodeString& result) const; 1.326 + 1.327 + /** 1.328 + * Return the number of registered variant specifiers for a given 1.329 + * source-target pair. There is always at least one variant: If 1.330 + * just source-target is registered, then the single variant 1.331 + * NO_VARIANT is returned. If source-target/variant is registered 1.332 + * then that variant is returned. 1.333 + * @param source the source specifiers 1.334 + * @param target the target specifiers 1.335 + * @return the number of registered variant specifiers for a given 1.336 + * source-target pair. 1.337 + */ 1.338 + int32_t countAvailableVariants(const UnicodeString& source, 1.339 + const UnicodeString& target) const; 1.340 + 1.341 + /** 1.342 + * Return a registered variant specifier for a given source-target 1.343 + * pair. If NO_VARIANT is one of the variants, then it will be 1.344 + * at index 0. 1.345 + * @param index which specifier to return, from 0 to n-1, where 1.346 + * n = countAvailableVariants(source, target) 1.347 + * @param source the source specifier 1.348 + * @param target the target specifier 1.349 + * @param result fill-in paramter to receive the variant 1.350 + * specifier. If source is invalid or if target is invalid or if 1.351 + * index is out of range, result will be empty. 1.352 + * @return reference to result 1.353 + */ 1.354 + UnicodeString& getAvailableVariant(int32_t index, 1.355 + const UnicodeString& source, 1.356 + const UnicodeString& target, 1.357 + UnicodeString& result) const; 1.358 + 1.359 + private: 1.360 + 1.361 + //---------------------------------------------------------------- 1.362 + // Private implementation 1.363 + //---------------------------------------------------------------- 1.364 + 1.365 + TransliteratorEntry* find(const UnicodeString& ID); 1.366 + 1.367 + TransliteratorEntry* find(UnicodeString& source, 1.368 + UnicodeString& target, 1.369 + UnicodeString& variant); 1.370 + 1.371 + TransliteratorEntry* findInDynamicStore(const TransliteratorSpec& src, 1.372 + const TransliteratorSpec& trg, 1.373 + const UnicodeString& variant) const; 1.374 + 1.375 + TransliteratorEntry* findInStaticStore(const TransliteratorSpec& src, 1.376 + const TransliteratorSpec& trg, 1.377 + const UnicodeString& variant); 1.378 + 1.379 + static TransliteratorEntry* findInBundle(const TransliteratorSpec& specToOpen, 1.380 + const TransliteratorSpec& specToFind, 1.381 + const UnicodeString& variant, 1.382 + UTransDirection direction); 1.383 + 1.384 + void registerEntry(const UnicodeString& source, 1.385 + const UnicodeString& target, 1.386 + const UnicodeString& variant, 1.387 + TransliteratorEntry* adopted, 1.388 + UBool visible); 1.389 + 1.390 + void registerEntry(const UnicodeString& ID, 1.391 + TransliteratorEntry* adopted, 1.392 + UBool visible); 1.393 + 1.394 + void registerEntry(const UnicodeString& ID, 1.395 + const UnicodeString& source, 1.396 + const UnicodeString& target, 1.397 + const UnicodeString& variant, 1.398 + TransliteratorEntry* adopted, 1.399 + UBool visible); 1.400 + 1.401 + void registerSTV(const UnicodeString& source, 1.402 + const UnicodeString& target, 1.403 + const UnicodeString& variant); 1.404 + 1.405 + void removeSTV(const UnicodeString& source, 1.406 + const UnicodeString& target, 1.407 + const UnicodeString& variant); 1.408 + 1.409 + Transliterator* instantiateEntry(const UnicodeString& ID, 1.410 + TransliteratorEntry *entry, 1.411 + TransliteratorAlias*& aliasReturn, 1.412 + UErrorCode& status); 1.413 + 1.414 + /** 1.415 + * A StringEnumeration over the registered IDs in this object. 1.416 + */ 1.417 + class Enumeration : public StringEnumeration { 1.418 + public: 1.419 + Enumeration(const TransliteratorRegistry& reg); 1.420 + virtual ~Enumeration(); 1.421 + virtual int32_t count(UErrorCode& status) const; 1.422 + virtual const UnicodeString* snext(UErrorCode& status); 1.423 + virtual void reset(UErrorCode& status); 1.424 + static UClassID U_EXPORT2 getStaticClassID(); 1.425 + virtual UClassID getDynamicClassID() const; 1.426 + private: 1.427 + int32_t index; 1.428 + const TransliteratorRegistry& reg; 1.429 + }; 1.430 + friend class Enumeration; 1.431 + 1.432 + private: 1.433 + 1.434 + /** 1.435 + * Dynamic registry mapping full IDs to Entry objects. This 1.436 + * contains both public and internal entities. The visibility is 1.437 + * controlled by whether an entry is listed in availableIDs and 1.438 + * specDAG or not. 1.439 + */ 1.440 + Hashtable registry; 1.441 + 1.442 + /** 1.443 + * DAG of visible IDs by spec. Hashtable: source => (Hashtable: 1.444 + * target => (UVector: variant)) The UVector of variants is never 1.445 + * empty. For a source-target with no variant, the special 1.446 + * variant NO_VARIANT (the empty string) is stored in slot zero of 1.447 + * the UVector. 1.448 + */ 1.449 + Hashtable specDAG; 1.450 + 1.451 + /** 1.452 + * Vector of public full IDs. 1.453 + */ 1.454 + UVector availableIDs; 1.455 + 1.456 + TransliteratorRegistry(const TransliteratorRegistry &other); // forbid copying of this class 1.457 + TransliteratorRegistry &operator=(const TransliteratorRegistry &other); // forbid copying of this class 1.458 +}; 1.459 + 1.460 +U_NAMESPACE_END 1.461 + 1.462 +#endif /* #if !UCONFIG_NO_TRANSLITERATION */ 1.463 + 1.464 +#endif 1.465 +//eof