intl/icu/source/i18n/transreg.h

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 /*
     2 **********************************************************************
     3 *   Copyright (c) 2001-2008, International Business Machines
     4 *   Corporation and others.  All Rights Reserved.
     5 **********************************************************************
     6 *   Date        Name        Description
     7 *   08/10/2001  aliu        Creation.
     8 **********************************************************************
     9 */
    10 #ifndef _TRANSREG_H
    11 #define _TRANSREG_H
    13 #include "unicode/utypes.h"
    15 #if !UCONFIG_NO_TRANSLITERATION
    17 #include "unicode/uobject.h"
    18 #include "unicode/translit.h"
    19 #include "hash.h"
    20 #include "uvector.h"
    22 U_NAMESPACE_BEGIN
    24 class TransliteratorEntry;
    25 class TransliteratorSpec;
    26 class UnicodeString;
    28 //------------------------------------------------------------------
    29 // TransliteratorAlias
    30 //------------------------------------------------------------------
    32 /**
    33  * A TransliteratorAlias object is returned by get() if the given ID
    34  * actually translates into something else.  The caller then invokes
    35  * the create() method on the alias to create the actual
    36  * transliterator, and deletes the alias.
    37  *
    38  * Why all the shenanigans?  To prevent circular calls between
    39  * the registry code and the transliterator code that deadlocks.
    40  */
    41 class TransliteratorAlias : public UMemory {
    42  public:
    43     /**
    44      * Construct a simple alias (type == SIMPLE)
    45      * @param aliasID the given id.
    46      */
    47     TransliteratorAlias(const UnicodeString& aliasID, const UnicodeSet* compoundFilter);
    49     /**
    50      * Construct a compound RBT alias (type == COMPOUND)
    51      */
    52     TransliteratorAlias(const UnicodeString& ID, const UnicodeString& idBlocks,
    53                         UVector* adoptedTransliterators,
    54                         const UnicodeSet* compoundFilter);
    56     /**
    57      * Construct a rules alias (type = RULES)
    58      */
    59     TransliteratorAlias(const UnicodeString& theID,
    60                         const UnicodeString& rules,
    61                         UTransDirection dir);
    63     ~TransliteratorAlias();
    65     /**
    66      * The whole point of create() is that the caller must invoke
    67      * it when the registry mutex is NOT held, to prevent deadlock.
    68      * It may only be called once.
    69      *
    70      * Note: Only call create() if isRuleBased() returns FALSE.
    71      *
    72      * This method must be called *outside* of the TransliteratorRegistry
    73      * mutex.
    74      */
    75     Transliterator* create(UParseError&, UErrorCode&);
    77     /**
    78      * Return TRUE if this alias is rule-based.  If so, the caller
    79      * must call parse() on it, then call TransliteratorRegistry::reget().
    80      */
    81     UBool isRuleBased() const;
    83     /**
    84      * If isRuleBased() returns TRUE, then the caller must call this
    85      * method, followed by TransliteratorRegistry::reget().  The latter
    86      * method must be called inside the TransliteratorRegistry mutex.
    87      *
    88      * Note: Only call parse() if isRuleBased() returns TRUE.
    89      *
    90      * This method must be called *outside* of the TransliteratorRegistry
    91      * mutex, because it can instantiate Transliterators embedded in
    92      * the rules via the "&Latin-Arabic()" syntax.
    93      */
    94     void parse(TransliteratorParser& parser,
    95                UParseError& pe, UErrorCode& ec) const;
    97  private:
    98     // We actually come in three flavors:
    99     // 1. Simple alias
   100     //    Here aliasID is the alias string.  Everything else is
   101     //    null, zero, empty.
   102     // 2. CompoundRBT
   103     //    Here ID is the ID, aliasID is the idBlock, trans is the
   104     //    contained RBT, and idSplitPoint is the offet in aliasID
   105     //    where the contained RBT goes.  compoundFilter is the
   106     //    compound filter, and it is _not_ owned.
   107     // 3. Rules
   108     //    Here ID is the ID, aliasID is the rules string.
   109     //    idSplitPoint is the UTransDirection.
   110     UnicodeString ID;
   111     UnicodeString aliasesOrRules;
   112     UVector* transes; // owned
   113     const UnicodeSet* compoundFilter; // alias
   114     UTransDirection direction;
   115     enum { SIMPLE, COMPOUND, RULES } type;
   117     TransliteratorAlias(const TransliteratorAlias &other); // forbid copying of this class
   118     TransliteratorAlias &operator=(const TransliteratorAlias &other); // forbid copying of this class
   119 };
   122 /**
   123  * A registry of system transliterators.  This is the data structure
   124  * that implements the mapping between transliterator IDs and the data
   125  * or function pointers used to create the corresponding
   126  * transliterators.  There is one instance of the registry that is
   127  * created statically.
   128  *
   129  * The registry consists of a dynamic component -- a hashtable -- and
   130  * a static component -- locale resource bundles.  The dynamic store
   131  * is semantically overlaid on the static store, so the static mapping
   132  * can be dynamically overridden.
   133  *
   134  * This is an internal class that is only used by Transliterator.
   135  * Transliterator maintains one static instance of this class and
   136  * delegates all registry-related operations to it.
   137  *
   138  * @author Alan Liu
   139  */
   140 class TransliteratorRegistry : public UMemory {
   142  public:
   144     /**
   145      * Contructor
   146      * @param status Output param set to success/failure code.
   147      */
   148     TransliteratorRegistry(UErrorCode& status);
   150     /**
   151      * Nonvirtual destructor -- this class is not subclassable.
   152      */
   153     ~TransliteratorRegistry();
   155     //------------------------------------------------------------------
   156     // Basic public API
   157     //------------------------------------------------------------------
   159     /**
   160      * Given a simple ID (forward direction, no inline filter, not
   161      * compound) attempt to instantiate it from the registry.  Return
   162      * 0 on failure.
   163      *
   164      * Return a non-NULL aliasReturn value if the ID points to an alias.
   165      * We cannot instantiate it ourselves because the alias may contain
   166      * filters or compounds, which we do not understand.  Caller should
   167      * make aliasReturn NULL before calling.
   168      * @param ID          the given ID
   169      * @param aliasReturn output param to receive TransliteratorAlias;
   170      *                    should be NULL on entry
   171      * @param parseError  Struct to recieve information on position
   172      *                    of error if an error is encountered
   173      * @param status      Output param set to success/failure code.
   174      */
   175     Transliterator* get(const UnicodeString& ID,
   176                         TransliteratorAlias*& aliasReturn,
   177                         UErrorCode& status);
   179     /**
   180      * The caller must call this after calling get(), if [a] calling get()
   181      * returns an alias, and [b] the alias is rule based.  In that
   182      * situation the caller must call alias->parse() to do the parsing
   183      * OUTSIDE THE REGISTRY MUTEX, then call this method to retry
   184      * instantiating the transliterator.
   185      *
   186      * Note: Another alias might be returned by this method.
   187      *
   188      * This method (like all public methods of this class) must be called
   189      * from within the TransliteratorRegistry mutex.
   190      *
   191      * @param aliasReturn output param to receive TransliteratorAlias;
   192      *                    should be NULL on entry
   193      */
   194     Transliterator* reget(const UnicodeString& ID,
   195                           TransliteratorParser& parser,
   196                           TransliteratorAlias*& aliasReturn,
   197                           UErrorCode& status);
   199     /**
   200      * Register a prototype (adopted).  This adds an entry to the
   201      * dynamic store, or replaces an existing entry.  Any entry in the
   202      * underlying static locale resource store is masked.
   203      */
   204     void put(Transliterator* adoptedProto,
   205              UBool visible,
   206              UErrorCode& ec);
   208     /**
   209      * Register an ID and a factory function pointer.  This adds an
   210      * entry to the dynamic store, or replaces an existing entry.  Any
   211      * entry in the underlying static locale resource store is masked.
   212      */
   213     void put(const UnicodeString& ID,
   214              Transliterator::Factory factory,
   215              Transliterator::Token context,
   216              UBool visible,
   217              UErrorCode& ec);
   219     /**
   220      * Register an ID and a resource name.  This adds an entry to the
   221      * dynamic store, or replaces an existing entry.  Any entry in the
   222      * underlying static locale resource store is masked.
   223      */
   224     void put(const UnicodeString& ID,
   225              const UnicodeString& resourceName,
   226              UTransDirection dir,
   227              UBool readonlyResourceAlias,
   228              UBool visible,
   229              UErrorCode& ec);
   231     /**
   232      * Register an ID and an alias ID.  This adds an entry to the
   233      * dynamic store, or replaces an existing entry.  Any entry in the
   234      * underlying static locale resource store is masked.
   235      */
   236     void put(const UnicodeString& ID,
   237              const UnicodeString& alias,
   238              UBool readonlyAliasAlias,
   239              UBool visible,
   240              UErrorCode& ec);
   242     /**
   243      * Unregister an ID.  This removes an entry from the dynamic store
   244      * if there is one.  The static locale resource store is
   245      * unaffected.
   246      * @param ID    the given ID.
   247      */
   248     void remove(const UnicodeString& ID);
   250     //------------------------------------------------------------------
   251     // Public ID and spec management
   252     //------------------------------------------------------------------
   254     /**
   255      * Return a StringEnumeration over the IDs currently registered
   256      * with the system.
   257      * @internal
   258      */
   259     StringEnumeration* getAvailableIDs() const;
   261     /**
   262      * == OBSOLETE - remove in ICU 3.4 ==
   263      * Return the number of IDs currently registered with the system.
   264      * To retrieve the actual IDs, call getAvailableID(i) with
   265      * i from 0 to countAvailableIDs() - 1.
   266      * @return the number of IDs currently registered with the system.
   267      * @internal
   268      */
   269     int32_t countAvailableIDs(void) const;
   271     /**
   272      * == OBSOLETE - remove in ICU 3.4 ==
   273      * Return the index-th available ID.  index must be between 0
   274      * and countAvailableIDs() - 1, inclusive.  If index is out of
   275      * range, the result of getAvailableID(0) is returned.
   276      * @param index the given index.
   277      * @return the index-th available ID.  index must be between 0
   278      *         and countAvailableIDs() - 1, inclusive.  If index is out of
   279      *         range, the result of getAvailableID(0) is returned.
   280      * @internal
   281      */
   282     const UnicodeString& getAvailableID(int32_t index) const;
   284     /**
   285      * Return the number of registered source specifiers.
   286      * @return the number of registered source specifiers.
   287      */
   288     int32_t countAvailableSources(void) const;
   290     /**
   291      * Return a registered source specifier.
   292      * @param index which specifier to return, from 0 to n-1, where
   293      * n = countAvailableSources()
   294      * @param result fill-in paramter to receive the source specifier.
   295      * If index is out of range, result will be empty.
   296      * @return reference to result
   297      */
   298     UnicodeString& getAvailableSource(int32_t index,
   299                                       UnicodeString& result) const;
   301     /**
   302      * Return the number of registered target specifiers for a given
   303      * source specifier.
   304      * @param source the given source specifier.
   305      * @return the number of registered target specifiers for a given
   306      *         source specifier.
   307      */
   308     int32_t countAvailableTargets(const UnicodeString& source) const;
   310     /**
   311      * Return a registered target specifier for a given source.
   312      * @param index which specifier to return, from 0 to n-1, where
   313      * n = countAvailableTargets(source)
   314      * @param source the source specifier
   315      * @param result fill-in paramter to receive the target specifier.
   316      * If source is invalid or if index is out of range, result will
   317      * be empty.
   318      * @return reference to result
   319      */
   320     UnicodeString& getAvailableTarget(int32_t index,
   321                                       const UnicodeString& source,
   322                                       UnicodeString& result) const;
   324     /**
   325      * Return the number of registered variant specifiers for a given
   326      * source-target pair.  There is always at least one variant: If
   327      * just source-target is registered, then the single variant
   328      * NO_VARIANT is returned.  If source-target/variant is registered
   329      * then that variant is returned.
   330      * @param source the source specifiers
   331      * @param target the target specifiers
   332      * @return the number of registered variant specifiers for a given
   333      *         source-target pair.
   334      */
   335     int32_t countAvailableVariants(const UnicodeString& source,
   336                                    const UnicodeString& target) const;
   338     /**
   339      * Return a registered variant specifier for a given source-target
   340      * pair.  If NO_VARIANT is one of the variants, then it will be
   341      * at index 0.
   342      * @param index which specifier to return, from 0 to n-1, where
   343      * n = countAvailableVariants(source, target)
   344      * @param source the source specifier
   345      * @param target the target specifier
   346      * @param result fill-in paramter to receive the variant
   347      * specifier.  If source is invalid or if target is invalid or if
   348      * index is out of range, result will be empty.
   349      * @return reference to result
   350      */
   351     UnicodeString& getAvailableVariant(int32_t index,
   352                                        const UnicodeString& source,
   353                                        const UnicodeString& target,
   354                                        UnicodeString& result) const;
   356  private:
   358     //----------------------------------------------------------------
   359     // Private implementation
   360     //----------------------------------------------------------------
   362     TransliteratorEntry* find(const UnicodeString& ID);
   364     TransliteratorEntry* find(UnicodeString& source,
   365                 UnicodeString& target,
   366                 UnicodeString& variant);
   368     TransliteratorEntry* findInDynamicStore(const TransliteratorSpec& src,
   369                               const TransliteratorSpec& trg,
   370                               const UnicodeString& variant) const;
   372     TransliteratorEntry* findInStaticStore(const TransliteratorSpec& src,
   373                              const TransliteratorSpec& trg,
   374                              const UnicodeString& variant);
   376     static TransliteratorEntry* findInBundle(const TransliteratorSpec& specToOpen,
   377                                const TransliteratorSpec& specToFind,
   378                                const UnicodeString& variant,
   379                                UTransDirection direction);
   381     void registerEntry(const UnicodeString& source,
   382                        const UnicodeString& target,
   383                        const UnicodeString& variant,
   384                        TransliteratorEntry* adopted,
   385                        UBool visible);
   387     void registerEntry(const UnicodeString& ID,
   388                        TransliteratorEntry* adopted,
   389                        UBool visible);
   391     void registerEntry(const UnicodeString& ID,
   392                        const UnicodeString& source,
   393                        const UnicodeString& target,
   394                        const UnicodeString& variant,
   395                        TransliteratorEntry* adopted,
   396                        UBool visible);
   398     void registerSTV(const UnicodeString& source,
   399                      const UnicodeString& target,
   400                      const UnicodeString& variant);
   402     void removeSTV(const UnicodeString& source,
   403                    const UnicodeString& target,
   404                    const UnicodeString& variant);
   406     Transliterator* instantiateEntry(const UnicodeString& ID,
   407                                      TransliteratorEntry *entry,
   408                                      TransliteratorAlias*& aliasReturn,
   409                                      UErrorCode& status);
   411     /**
   412      * A StringEnumeration over the registered IDs in this object.
   413      */
   414     class Enumeration : public StringEnumeration {
   415     public:
   416         Enumeration(const TransliteratorRegistry& reg);
   417         virtual ~Enumeration();
   418         virtual int32_t count(UErrorCode& status) const;
   419         virtual const UnicodeString* snext(UErrorCode& status);
   420         virtual void reset(UErrorCode& status);
   421         static UClassID U_EXPORT2 getStaticClassID();
   422         virtual UClassID getDynamicClassID() const;
   423     private:
   424         int32_t index;
   425         const TransliteratorRegistry& reg;
   426     };
   427     friend class Enumeration;
   429  private:
   431     /**
   432      * Dynamic registry mapping full IDs to Entry objects.  This
   433      * contains both public and internal entities.  The visibility is
   434      * controlled by whether an entry is listed in availableIDs and
   435      * specDAG or not.
   436      */
   437     Hashtable registry;
   439     /**
   440      * DAG of visible IDs by spec.  Hashtable: source => (Hashtable:
   441      * target => (UVector: variant)) The UVector of variants is never
   442      * empty.  For a source-target with no variant, the special
   443      * variant NO_VARIANT (the empty string) is stored in slot zero of
   444      * the UVector.
   445      */
   446     Hashtable specDAG;
   448     /**
   449      * Vector of public full IDs.
   450      */
   451     UVector availableIDs;
   453     TransliteratorRegistry(const TransliteratorRegistry &other); // forbid copying of this class
   454     TransliteratorRegistry &operator=(const TransliteratorRegistry &other); // forbid copying of this class
   455 };
   457 U_NAMESPACE_END
   459 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
   461 #endif
   462 //eof

mercurial