michael@0: /* michael@0: ********************************************************************** michael@0: * Copyright (c) 2001-2008, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: ********************************************************************** michael@0: * Date Name Description michael@0: * 08/10/2001 aliu Creation. michael@0: ********************************************************************** michael@0: */ michael@0: #ifndef _TRANSREG_H michael@0: #define _TRANSREG_H michael@0: michael@0: #include "unicode/utypes.h" michael@0: michael@0: #if !UCONFIG_NO_TRANSLITERATION michael@0: michael@0: #include "unicode/uobject.h" michael@0: #include "unicode/translit.h" michael@0: #include "hash.h" michael@0: #include "uvector.h" michael@0: michael@0: U_NAMESPACE_BEGIN michael@0: michael@0: class TransliteratorEntry; michael@0: class TransliteratorSpec; michael@0: class UnicodeString; michael@0: michael@0: //------------------------------------------------------------------ michael@0: // TransliteratorAlias michael@0: //------------------------------------------------------------------ michael@0: michael@0: /** michael@0: * A TransliteratorAlias object is returned by get() if the given ID michael@0: * actually translates into something else. The caller then invokes michael@0: * the create() method on the alias to create the actual michael@0: * transliterator, and deletes the alias. michael@0: * michael@0: * Why all the shenanigans? To prevent circular calls between michael@0: * the registry code and the transliterator code that deadlocks. michael@0: */ michael@0: class TransliteratorAlias : public UMemory { michael@0: public: michael@0: /** michael@0: * Construct a simple alias (type == SIMPLE) michael@0: * @param aliasID the given id. michael@0: */ michael@0: TransliteratorAlias(const UnicodeString& aliasID, const UnicodeSet* compoundFilter); michael@0: michael@0: /** michael@0: * Construct a compound RBT alias (type == COMPOUND) michael@0: */ michael@0: TransliteratorAlias(const UnicodeString& ID, const UnicodeString& idBlocks, michael@0: UVector* adoptedTransliterators, michael@0: const UnicodeSet* compoundFilter); michael@0: michael@0: /** michael@0: * Construct a rules alias (type = RULES) michael@0: */ michael@0: TransliteratorAlias(const UnicodeString& theID, michael@0: const UnicodeString& rules, michael@0: UTransDirection dir); michael@0: michael@0: ~TransliteratorAlias(); michael@0: michael@0: /** michael@0: * The whole point of create() is that the caller must invoke michael@0: * it when the registry mutex is NOT held, to prevent deadlock. michael@0: * It may only be called once. michael@0: * michael@0: * Note: Only call create() if isRuleBased() returns FALSE. michael@0: * michael@0: * This method must be called *outside* of the TransliteratorRegistry michael@0: * mutex. michael@0: */ michael@0: Transliterator* create(UParseError&, UErrorCode&); michael@0: michael@0: /** michael@0: * Return TRUE if this alias is rule-based. If so, the caller michael@0: * must call parse() on it, then call TransliteratorRegistry::reget(). michael@0: */ michael@0: UBool isRuleBased() const; michael@0: michael@0: /** michael@0: * If isRuleBased() returns TRUE, then the caller must call this michael@0: * method, followed by TransliteratorRegistry::reget(). The latter michael@0: * method must be called inside the TransliteratorRegistry mutex. michael@0: * michael@0: * Note: Only call parse() if isRuleBased() returns TRUE. michael@0: * michael@0: * This method must be called *outside* of the TransliteratorRegistry michael@0: * mutex, because it can instantiate Transliterators embedded in michael@0: * the rules via the "&Latin-Arabic()" syntax. michael@0: */ michael@0: void parse(TransliteratorParser& parser, michael@0: UParseError& pe, UErrorCode& ec) const; michael@0: michael@0: private: michael@0: // We actually come in three flavors: michael@0: // 1. Simple alias michael@0: // Here aliasID is the alias string. Everything else is michael@0: // null, zero, empty. michael@0: // 2. CompoundRBT michael@0: // Here ID is the ID, aliasID is the idBlock, trans is the michael@0: // contained RBT, and idSplitPoint is the offet in aliasID michael@0: // where the contained RBT goes. compoundFilter is the michael@0: // compound filter, and it is _not_ owned. michael@0: // 3. Rules michael@0: // Here ID is the ID, aliasID is the rules string. michael@0: // idSplitPoint is the UTransDirection. michael@0: UnicodeString ID; michael@0: UnicodeString aliasesOrRules; michael@0: UVector* transes; // owned michael@0: const UnicodeSet* compoundFilter; // alias michael@0: UTransDirection direction; michael@0: enum { SIMPLE, COMPOUND, RULES } type; michael@0: michael@0: TransliteratorAlias(const TransliteratorAlias &other); // forbid copying of this class michael@0: TransliteratorAlias &operator=(const TransliteratorAlias &other); // forbid copying of this class michael@0: }; michael@0: michael@0: michael@0: /** michael@0: * A registry of system transliterators. This is the data structure michael@0: * that implements the mapping between transliterator IDs and the data michael@0: * or function pointers used to create the corresponding michael@0: * transliterators. There is one instance of the registry that is michael@0: * created statically. michael@0: * michael@0: * The registry consists of a dynamic component -- a hashtable -- and michael@0: * a static component -- locale resource bundles. The dynamic store michael@0: * is semantically overlaid on the static store, so the static mapping michael@0: * can be dynamically overridden. michael@0: * michael@0: * This is an internal class that is only used by Transliterator. michael@0: * Transliterator maintains one static instance of this class and michael@0: * delegates all registry-related operations to it. michael@0: * michael@0: * @author Alan Liu michael@0: */ michael@0: class TransliteratorRegistry : public UMemory { michael@0: michael@0: public: michael@0: michael@0: /** michael@0: * Contructor michael@0: * @param status Output param set to success/failure code. michael@0: */ michael@0: TransliteratorRegistry(UErrorCode& status); michael@0: michael@0: /** michael@0: * Nonvirtual destructor -- this class is not subclassable. michael@0: */ michael@0: ~TransliteratorRegistry(); michael@0: michael@0: //------------------------------------------------------------------ michael@0: // Basic public API michael@0: //------------------------------------------------------------------ michael@0: michael@0: /** michael@0: * Given a simple ID (forward direction, no inline filter, not michael@0: * compound) attempt to instantiate it from the registry. Return michael@0: * 0 on failure. michael@0: * michael@0: * Return a non-NULL aliasReturn value if the ID points to an alias. michael@0: * We cannot instantiate it ourselves because the alias may contain michael@0: * filters or compounds, which we do not understand. Caller should michael@0: * make aliasReturn NULL before calling. michael@0: * @param ID the given ID michael@0: * @param aliasReturn output param to receive TransliteratorAlias; michael@0: * should be NULL on entry michael@0: * @param parseError Struct to recieve information on position michael@0: * of error if an error is encountered michael@0: * @param status Output param set to success/failure code. michael@0: */ michael@0: Transliterator* get(const UnicodeString& ID, michael@0: TransliteratorAlias*& aliasReturn, michael@0: UErrorCode& status); michael@0: michael@0: /** michael@0: * The caller must call this after calling get(), if [a] calling get() michael@0: * returns an alias, and [b] the alias is rule based. In that michael@0: * situation the caller must call alias->parse() to do the parsing michael@0: * OUTSIDE THE REGISTRY MUTEX, then call this method to retry michael@0: * instantiating the transliterator. michael@0: * michael@0: * Note: Another alias might be returned by this method. michael@0: * michael@0: * This method (like all public methods of this class) must be called michael@0: * from within the TransliteratorRegistry mutex. michael@0: * michael@0: * @param aliasReturn output param to receive TransliteratorAlias; michael@0: * should be NULL on entry michael@0: */ michael@0: Transliterator* reget(const UnicodeString& ID, michael@0: TransliteratorParser& parser, michael@0: TransliteratorAlias*& aliasReturn, michael@0: UErrorCode& status); michael@0: michael@0: /** michael@0: * Register a prototype (adopted). This adds an entry to the michael@0: * dynamic store, or replaces an existing entry. Any entry in the michael@0: * underlying static locale resource store is masked. michael@0: */ michael@0: void put(Transliterator* adoptedProto, michael@0: UBool visible, michael@0: UErrorCode& ec); michael@0: michael@0: /** michael@0: * Register an ID and a factory function pointer. This adds an michael@0: * entry to the dynamic store, or replaces an existing entry. Any michael@0: * entry in the underlying static locale resource store is masked. michael@0: */ michael@0: void put(const UnicodeString& ID, michael@0: Transliterator::Factory factory, michael@0: Transliterator::Token context, michael@0: UBool visible, michael@0: UErrorCode& ec); michael@0: michael@0: /** michael@0: * Register an ID and a resource name. This adds an entry to the michael@0: * dynamic store, or replaces an existing entry. Any entry in the michael@0: * underlying static locale resource store is masked. michael@0: */ michael@0: void put(const UnicodeString& ID, michael@0: const UnicodeString& resourceName, michael@0: UTransDirection dir, michael@0: UBool readonlyResourceAlias, michael@0: UBool visible, michael@0: UErrorCode& ec); michael@0: michael@0: /** michael@0: * Register an ID and an alias ID. This adds an entry to the michael@0: * dynamic store, or replaces an existing entry. Any entry in the michael@0: * underlying static locale resource store is masked. michael@0: */ michael@0: void put(const UnicodeString& ID, michael@0: const UnicodeString& alias, michael@0: UBool readonlyAliasAlias, michael@0: UBool visible, michael@0: UErrorCode& ec); michael@0: michael@0: /** michael@0: * Unregister an ID. This removes an entry from the dynamic store michael@0: * if there is one. The static locale resource store is michael@0: * unaffected. michael@0: * @param ID the given ID. michael@0: */ michael@0: void remove(const UnicodeString& ID); michael@0: michael@0: //------------------------------------------------------------------ michael@0: // Public ID and spec management michael@0: //------------------------------------------------------------------ michael@0: michael@0: /** michael@0: * Return a StringEnumeration over the IDs currently registered michael@0: * with the system. michael@0: * @internal michael@0: */ michael@0: StringEnumeration* getAvailableIDs() const; michael@0: michael@0: /** michael@0: * == OBSOLETE - remove in ICU 3.4 == michael@0: * Return the number of IDs currently registered with the system. michael@0: * To retrieve the actual IDs, call getAvailableID(i) with michael@0: * i from 0 to countAvailableIDs() - 1. michael@0: * @return the number of IDs currently registered with the system. michael@0: * @internal michael@0: */ michael@0: int32_t countAvailableIDs(void) const; michael@0: michael@0: /** michael@0: * == OBSOLETE - remove in ICU 3.4 == michael@0: * Return the index-th available ID. index must be between 0 michael@0: * and countAvailableIDs() - 1, inclusive. If index is out of michael@0: * range, the result of getAvailableID(0) is returned. michael@0: * @param index the given index. michael@0: * @return the index-th available ID. index must be between 0 michael@0: * and countAvailableIDs() - 1, inclusive. If index is out of michael@0: * range, the result of getAvailableID(0) is returned. michael@0: * @internal michael@0: */ michael@0: const UnicodeString& getAvailableID(int32_t index) const; michael@0: michael@0: /** michael@0: * Return the number of registered source specifiers. michael@0: * @return the number of registered source specifiers. michael@0: */ michael@0: int32_t countAvailableSources(void) const; michael@0: michael@0: /** michael@0: * Return a registered source specifier. michael@0: * @param index which specifier to return, from 0 to n-1, where michael@0: * n = countAvailableSources() michael@0: * @param result fill-in paramter to receive the source specifier. michael@0: * If index is out of range, result will be empty. michael@0: * @return reference to result michael@0: */ michael@0: UnicodeString& getAvailableSource(int32_t index, michael@0: UnicodeString& result) const; michael@0: michael@0: /** michael@0: * Return the number of registered target specifiers for a given michael@0: * source specifier. michael@0: * @param source the given source specifier. michael@0: * @return the number of registered target specifiers for a given michael@0: * source specifier. michael@0: */ michael@0: int32_t countAvailableTargets(const UnicodeString& source) const; michael@0: michael@0: /** michael@0: * Return a registered target specifier for a given source. michael@0: * @param index which specifier to return, from 0 to n-1, where michael@0: * n = countAvailableTargets(source) michael@0: * @param source the source specifier michael@0: * @param result fill-in paramter to receive the target specifier. michael@0: * If source is invalid or if index is out of range, result will michael@0: * be empty. michael@0: * @return reference to result michael@0: */ michael@0: UnicodeString& getAvailableTarget(int32_t index, michael@0: const UnicodeString& source, michael@0: UnicodeString& result) const; michael@0: michael@0: /** michael@0: * Return the number of registered variant specifiers for a given michael@0: * source-target pair. There is always at least one variant: If michael@0: * just source-target is registered, then the single variant michael@0: * NO_VARIANT is returned. If source-target/variant is registered michael@0: * then that variant is returned. michael@0: * @param source the source specifiers michael@0: * @param target the target specifiers michael@0: * @return the number of registered variant specifiers for a given michael@0: * source-target pair. michael@0: */ michael@0: int32_t countAvailableVariants(const UnicodeString& source, michael@0: const UnicodeString& target) const; michael@0: michael@0: /** michael@0: * Return a registered variant specifier for a given source-target michael@0: * pair. If NO_VARIANT is one of the variants, then it will be michael@0: * at index 0. michael@0: * @param index which specifier to return, from 0 to n-1, where michael@0: * n = countAvailableVariants(source, target) michael@0: * @param source the source specifier michael@0: * @param target the target specifier michael@0: * @param result fill-in paramter to receive the variant michael@0: * specifier. If source is invalid or if target is invalid or if michael@0: * index is out of range, result will be empty. michael@0: * @return reference to result michael@0: */ michael@0: UnicodeString& getAvailableVariant(int32_t index, michael@0: const UnicodeString& source, michael@0: const UnicodeString& target, michael@0: UnicodeString& result) const; michael@0: michael@0: private: michael@0: michael@0: //---------------------------------------------------------------- michael@0: // Private implementation michael@0: //---------------------------------------------------------------- michael@0: michael@0: TransliteratorEntry* find(const UnicodeString& ID); michael@0: michael@0: TransliteratorEntry* find(UnicodeString& source, michael@0: UnicodeString& target, michael@0: UnicodeString& variant); michael@0: michael@0: TransliteratorEntry* findInDynamicStore(const TransliteratorSpec& src, michael@0: const TransliteratorSpec& trg, michael@0: const UnicodeString& variant) const; michael@0: michael@0: TransliteratorEntry* findInStaticStore(const TransliteratorSpec& src, michael@0: const TransliteratorSpec& trg, michael@0: const UnicodeString& variant); michael@0: michael@0: static TransliteratorEntry* findInBundle(const TransliteratorSpec& specToOpen, michael@0: const TransliteratorSpec& specToFind, michael@0: const UnicodeString& variant, michael@0: UTransDirection direction); michael@0: michael@0: void registerEntry(const UnicodeString& source, michael@0: const UnicodeString& target, michael@0: const UnicodeString& variant, michael@0: TransliteratorEntry* adopted, michael@0: UBool visible); michael@0: michael@0: void registerEntry(const UnicodeString& ID, michael@0: TransliteratorEntry* adopted, michael@0: UBool visible); michael@0: michael@0: void registerEntry(const UnicodeString& ID, michael@0: const UnicodeString& source, michael@0: const UnicodeString& target, michael@0: const UnicodeString& variant, michael@0: TransliteratorEntry* adopted, michael@0: UBool visible); michael@0: michael@0: void registerSTV(const UnicodeString& source, michael@0: const UnicodeString& target, michael@0: const UnicodeString& variant); michael@0: michael@0: void removeSTV(const UnicodeString& source, michael@0: const UnicodeString& target, michael@0: const UnicodeString& variant); michael@0: michael@0: Transliterator* instantiateEntry(const UnicodeString& ID, michael@0: TransliteratorEntry *entry, michael@0: TransliteratorAlias*& aliasReturn, michael@0: UErrorCode& status); michael@0: michael@0: /** michael@0: * A StringEnumeration over the registered IDs in this object. michael@0: */ michael@0: class Enumeration : public StringEnumeration { michael@0: public: michael@0: Enumeration(const TransliteratorRegistry& reg); michael@0: virtual ~Enumeration(); michael@0: virtual int32_t count(UErrorCode& status) const; michael@0: virtual const UnicodeString* snext(UErrorCode& status); michael@0: virtual void reset(UErrorCode& status); michael@0: static UClassID U_EXPORT2 getStaticClassID(); michael@0: virtual UClassID getDynamicClassID() const; michael@0: private: michael@0: int32_t index; michael@0: const TransliteratorRegistry& reg; michael@0: }; michael@0: friend class Enumeration; michael@0: michael@0: private: michael@0: michael@0: /** michael@0: * Dynamic registry mapping full IDs to Entry objects. This michael@0: * contains both public and internal entities. The visibility is michael@0: * controlled by whether an entry is listed in availableIDs and michael@0: * specDAG or not. michael@0: */ michael@0: Hashtable registry; michael@0: michael@0: /** michael@0: * DAG of visible IDs by spec. Hashtable: source => (Hashtable: michael@0: * target => (UVector: variant)) The UVector of variants is never michael@0: * empty. For a source-target with no variant, the special michael@0: * variant NO_VARIANT (the empty string) is stored in slot zero of michael@0: * the UVector. michael@0: */ michael@0: Hashtable specDAG; michael@0: michael@0: /** michael@0: * Vector of public full IDs. michael@0: */ michael@0: UVector availableIDs; michael@0: michael@0: TransliteratorRegistry(const TransliteratorRegistry &other); // forbid copying of this class michael@0: TransliteratorRegistry &operator=(const TransliteratorRegistry &other); // forbid copying of this class michael@0: }; michael@0: michael@0: U_NAMESPACE_END michael@0: michael@0: #endif /* #if !UCONFIG_NO_TRANSLITERATION */ michael@0: michael@0: #endif michael@0: //eof