intl/icu/source/i18n/transreg.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/i18n/transreg.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,462 @@
     1.4 +/*
     1.5 +**********************************************************************
     1.6 +*   Copyright (c) 2001-2008, International Business Machines
     1.7 +*   Corporation and others.  All Rights Reserved.
     1.8 +**********************************************************************
     1.9 +*   Date        Name        Description
    1.10 +*   08/10/2001  aliu        Creation.
    1.11 +**********************************************************************
    1.12 +*/
    1.13 +#ifndef _TRANSREG_H
    1.14 +#define _TRANSREG_H
    1.15 +
    1.16 +#include "unicode/utypes.h"
    1.17 +
    1.18 +#if !UCONFIG_NO_TRANSLITERATION
    1.19 +
    1.20 +#include "unicode/uobject.h"
    1.21 +#include "unicode/translit.h"
    1.22 +#include "hash.h"
    1.23 +#include "uvector.h"
    1.24 +
    1.25 +U_NAMESPACE_BEGIN
    1.26 +
    1.27 +class TransliteratorEntry;
    1.28 +class TransliteratorSpec;
    1.29 +class UnicodeString;
    1.30 +
    1.31 +//------------------------------------------------------------------
    1.32 +// TransliteratorAlias
    1.33 +//------------------------------------------------------------------
    1.34 +
    1.35 +/**
    1.36 + * A TransliteratorAlias object is returned by get() if the given ID
    1.37 + * actually translates into something else.  The caller then invokes
    1.38 + * the create() method on the alias to create the actual
    1.39 + * transliterator, and deletes the alias.
    1.40 + *
    1.41 + * Why all the shenanigans?  To prevent circular calls between
    1.42 + * the registry code and the transliterator code that deadlocks.
    1.43 + */
    1.44 +class TransliteratorAlias : public UMemory {
    1.45 + public:
    1.46 +    /**
    1.47 +     * Construct a simple alias (type == SIMPLE)
    1.48 +     * @param aliasID the given id.
    1.49 +     */
    1.50 +    TransliteratorAlias(const UnicodeString& aliasID, const UnicodeSet* compoundFilter);
    1.51 +
    1.52 +    /**
    1.53 +     * Construct a compound RBT alias (type == COMPOUND)
    1.54 +     */
    1.55 +    TransliteratorAlias(const UnicodeString& ID, const UnicodeString& idBlocks,
    1.56 +                        UVector* adoptedTransliterators,
    1.57 +                        const UnicodeSet* compoundFilter);
    1.58 +
    1.59 +    /**
    1.60 +     * Construct a rules alias (type = RULES)
    1.61 +     */
    1.62 +    TransliteratorAlias(const UnicodeString& theID,
    1.63 +                        const UnicodeString& rules,
    1.64 +                        UTransDirection dir);
    1.65 +
    1.66 +    ~TransliteratorAlias();
    1.67 +
    1.68 +    /**
    1.69 +     * The whole point of create() is that the caller must invoke
    1.70 +     * it when the registry mutex is NOT held, to prevent deadlock.
    1.71 +     * It may only be called once.
    1.72 +     *
    1.73 +     * Note: Only call create() if isRuleBased() returns FALSE.
    1.74 +     *
    1.75 +     * This method must be called *outside* of the TransliteratorRegistry
    1.76 +     * mutex.
    1.77 +     */
    1.78 +    Transliterator* create(UParseError&, UErrorCode&);
    1.79 +
    1.80 +    /**
    1.81 +     * Return TRUE if this alias is rule-based.  If so, the caller
    1.82 +     * must call parse() on it, then call TransliteratorRegistry::reget().
    1.83 +     */
    1.84 +    UBool isRuleBased() const;
    1.85 +
    1.86 +    /**
    1.87 +     * If isRuleBased() returns TRUE, then the caller must call this
    1.88 +     * method, followed by TransliteratorRegistry::reget().  The latter
    1.89 +     * method must be called inside the TransliteratorRegistry mutex.
    1.90 +     *
    1.91 +     * Note: Only call parse() if isRuleBased() returns TRUE.
    1.92 +     *
    1.93 +     * This method must be called *outside* of the TransliteratorRegistry
    1.94 +     * mutex, because it can instantiate Transliterators embedded in
    1.95 +     * the rules via the "&Latin-Arabic()" syntax.
    1.96 +     */
    1.97 +    void parse(TransliteratorParser& parser,
    1.98 +               UParseError& pe, UErrorCode& ec) const;
    1.99 +
   1.100 + private:
   1.101 +    // We actually come in three flavors:
   1.102 +    // 1. Simple alias
   1.103 +    //    Here aliasID is the alias string.  Everything else is
   1.104 +    //    null, zero, empty.
   1.105 +    // 2. CompoundRBT
   1.106 +    //    Here ID is the ID, aliasID is the idBlock, trans is the
   1.107 +    //    contained RBT, and idSplitPoint is the offet in aliasID
   1.108 +    //    where the contained RBT goes.  compoundFilter is the
   1.109 +    //    compound filter, and it is _not_ owned.
   1.110 +    // 3. Rules
   1.111 +    //    Here ID is the ID, aliasID is the rules string.
   1.112 +    //    idSplitPoint is the UTransDirection.
   1.113 +    UnicodeString ID;
   1.114 +    UnicodeString aliasesOrRules;
   1.115 +    UVector* transes; // owned
   1.116 +    const UnicodeSet* compoundFilter; // alias
   1.117 +    UTransDirection direction;
   1.118 +    enum { SIMPLE, COMPOUND, RULES } type;
   1.119 +
   1.120 +    TransliteratorAlias(const TransliteratorAlias &other); // forbid copying of this class
   1.121 +    TransliteratorAlias &operator=(const TransliteratorAlias &other); // forbid copying of this class
   1.122 +};
   1.123 +
   1.124 +
   1.125 +/**
   1.126 + * A registry of system transliterators.  This is the data structure
   1.127 + * that implements the mapping between transliterator IDs and the data
   1.128 + * or function pointers used to create the corresponding
   1.129 + * transliterators.  There is one instance of the registry that is
   1.130 + * created statically.
   1.131 + *
   1.132 + * The registry consists of a dynamic component -- a hashtable -- and
   1.133 + * a static component -- locale resource bundles.  The dynamic store
   1.134 + * is semantically overlaid on the static store, so the static mapping
   1.135 + * can be dynamically overridden.
   1.136 + *
   1.137 + * This is an internal class that is only used by Transliterator.
   1.138 + * Transliterator maintains one static instance of this class and
   1.139 + * delegates all registry-related operations to it.
   1.140 + *
   1.141 + * @author Alan Liu
   1.142 + */
   1.143 +class TransliteratorRegistry : public UMemory {
   1.144 +
   1.145 + public:
   1.146 +
   1.147 +    /**
   1.148 +     * Contructor
   1.149 +     * @param status Output param set to success/failure code.
   1.150 +     */
   1.151 +    TransliteratorRegistry(UErrorCode& status);
   1.152 +
   1.153 +    /**
   1.154 +     * Nonvirtual destructor -- this class is not subclassable.
   1.155 +     */
   1.156 +    ~TransliteratorRegistry();
   1.157 +
   1.158 +    //------------------------------------------------------------------
   1.159 +    // Basic public API
   1.160 +    //------------------------------------------------------------------
   1.161 +
   1.162 +    /**
   1.163 +     * Given a simple ID (forward direction, no inline filter, not
   1.164 +     * compound) attempt to instantiate it from the registry.  Return
   1.165 +     * 0 on failure.
   1.166 +     *
   1.167 +     * Return a non-NULL aliasReturn value if the ID points to an alias.
   1.168 +     * We cannot instantiate it ourselves because the alias may contain
   1.169 +     * filters or compounds, which we do not understand.  Caller should
   1.170 +     * make aliasReturn NULL before calling.
   1.171 +     * @param ID          the given ID
   1.172 +     * @param aliasReturn output param to receive TransliteratorAlias;
   1.173 +     *                    should be NULL on entry
   1.174 +     * @param parseError  Struct to recieve information on position
   1.175 +     *                    of error if an error is encountered
   1.176 +     * @param status      Output param set to success/failure code.
   1.177 +     */
   1.178 +    Transliterator* get(const UnicodeString& ID,
   1.179 +                        TransliteratorAlias*& aliasReturn,
   1.180 +                        UErrorCode& status);
   1.181 +
   1.182 +    /**
   1.183 +     * The caller must call this after calling get(), if [a] calling get()
   1.184 +     * returns an alias, and [b] the alias is rule based.  In that
   1.185 +     * situation the caller must call alias->parse() to do the parsing
   1.186 +     * OUTSIDE THE REGISTRY MUTEX, then call this method to retry
   1.187 +     * instantiating the transliterator.
   1.188 +     *
   1.189 +     * Note: Another alias might be returned by this method.
   1.190 +     *
   1.191 +     * This method (like all public methods of this class) must be called
   1.192 +     * from within the TransliteratorRegistry mutex.
   1.193 +     *
   1.194 +     * @param aliasReturn output param to receive TransliteratorAlias;
   1.195 +     *                    should be NULL on entry
   1.196 +     */
   1.197 +    Transliterator* reget(const UnicodeString& ID,
   1.198 +                          TransliteratorParser& parser,
   1.199 +                          TransliteratorAlias*& aliasReturn,
   1.200 +                          UErrorCode& status);
   1.201 +
   1.202 +    /**
   1.203 +     * Register a prototype (adopted).  This adds an entry to the
   1.204 +     * dynamic store, or replaces an existing entry.  Any entry in the
   1.205 +     * underlying static locale resource store is masked.
   1.206 +     */
   1.207 +    void put(Transliterator* adoptedProto,
   1.208 +             UBool visible,
   1.209 +             UErrorCode& ec);
   1.210 +
   1.211 +    /**
   1.212 +     * Register an ID and a factory function pointer.  This adds an
   1.213 +     * entry to the dynamic store, or replaces an existing entry.  Any
   1.214 +     * entry in the underlying static locale resource store is masked.
   1.215 +     */
   1.216 +    void put(const UnicodeString& ID,
   1.217 +             Transliterator::Factory factory,
   1.218 +             Transliterator::Token context,
   1.219 +             UBool visible,
   1.220 +             UErrorCode& ec);
   1.221 +
   1.222 +    /**
   1.223 +     * Register an ID and a resource name.  This adds an entry to the
   1.224 +     * dynamic store, or replaces an existing entry.  Any entry in the
   1.225 +     * underlying static locale resource store is masked.
   1.226 +     */
   1.227 +    void put(const UnicodeString& ID,
   1.228 +             const UnicodeString& resourceName,
   1.229 +             UTransDirection dir,
   1.230 +             UBool readonlyResourceAlias,
   1.231 +             UBool visible,
   1.232 +             UErrorCode& ec);
   1.233 +
   1.234 +    /**
   1.235 +     * Register an ID and an alias ID.  This adds an entry to the
   1.236 +     * dynamic store, or replaces an existing entry.  Any entry in the
   1.237 +     * underlying static locale resource store is masked.
   1.238 +     */
   1.239 +    void put(const UnicodeString& ID,
   1.240 +             const UnicodeString& alias,
   1.241 +             UBool readonlyAliasAlias,
   1.242 +             UBool visible,
   1.243 +             UErrorCode& ec);
   1.244 +
   1.245 +    /**
   1.246 +     * Unregister an ID.  This removes an entry from the dynamic store
   1.247 +     * if there is one.  The static locale resource store is
   1.248 +     * unaffected.
   1.249 +     * @param ID    the given ID.
   1.250 +     */
   1.251 +    void remove(const UnicodeString& ID);
   1.252 +
   1.253 +    //------------------------------------------------------------------
   1.254 +    // Public ID and spec management
   1.255 +    //------------------------------------------------------------------
   1.256 +
   1.257 +    /**
   1.258 +     * Return a StringEnumeration over the IDs currently registered
   1.259 +     * with the system.
   1.260 +     * @internal
   1.261 +     */
   1.262 +    StringEnumeration* getAvailableIDs() const;
   1.263 +
   1.264 +    /**
   1.265 +     * == OBSOLETE - remove in ICU 3.4 ==
   1.266 +     * Return the number of IDs currently registered with the system.
   1.267 +     * To retrieve the actual IDs, call getAvailableID(i) with
   1.268 +     * i from 0 to countAvailableIDs() - 1.
   1.269 +     * @return the number of IDs currently registered with the system.
   1.270 +     * @internal
   1.271 +     */
   1.272 +    int32_t countAvailableIDs(void) const;
   1.273 +
   1.274 +    /**
   1.275 +     * == OBSOLETE - remove in ICU 3.4 ==
   1.276 +     * Return the index-th available ID.  index must be between 0
   1.277 +     * and countAvailableIDs() - 1, inclusive.  If index is out of
   1.278 +     * range, the result of getAvailableID(0) is returned.
   1.279 +     * @param index the given index.
   1.280 +     * @return the index-th available ID.  index must be between 0
   1.281 +     *         and countAvailableIDs() - 1, inclusive.  If index is out of
   1.282 +     *         range, the result of getAvailableID(0) is returned.
   1.283 +     * @internal
   1.284 +     */
   1.285 +    const UnicodeString& getAvailableID(int32_t index) const;
   1.286 +
   1.287 +    /**
   1.288 +     * Return the number of registered source specifiers.
   1.289 +     * @return the number of registered source specifiers.
   1.290 +     */
   1.291 +    int32_t countAvailableSources(void) const;
   1.292 +
   1.293 +    /**
   1.294 +     * Return a registered source specifier.
   1.295 +     * @param index which specifier to return, from 0 to n-1, where
   1.296 +     * n = countAvailableSources()
   1.297 +     * @param result fill-in paramter to receive the source specifier.
   1.298 +     * If index is out of range, result will be empty.
   1.299 +     * @return reference to result
   1.300 +     */
   1.301 +    UnicodeString& getAvailableSource(int32_t index,
   1.302 +                                      UnicodeString& result) const;
   1.303 +
   1.304 +    /**
   1.305 +     * Return the number of registered target specifiers for a given
   1.306 +     * source specifier.
   1.307 +     * @param source the given source specifier.
   1.308 +     * @return the number of registered target specifiers for a given
   1.309 +     *         source specifier.
   1.310 +     */
   1.311 +    int32_t countAvailableTargets(const UnicodeString& source) const;
   1.312 +
   1.313 +    /**
   1.314 +     * Return a registered target specifier for a given source.
   1.315 +     * @param index which specifier to return, from 0 to n-1, where
   1.316 +     * n = countAvailableTargets(source)
   1.317 +     * @param source the source specifier
   1.318 +     * @param result fill-in paramter to receive the target specifier.
   1.319 +     * If source is invalid or if index is out of range, result will
   1.320 +     * be empty.
   1.321 +     * @return reference to result
   1.322 +     */
   1.323 +    UnicodeString& getAvailableTarget(int32_t index,
   1.324 +                                      const UnicodeString& source,
   1.325 +                                      UnicodeString& result) const;
   1.326 +
   1.327 +    /**
   1.328 +     * Return the number of registered variant specifiers for a given
   1.329 +     * source-target pair.  There is always at least one variant: If
   1.330 +     * just source-target is registered, then the single variant
   1.331 +     * NO_VARIANT is returned.  If source-target/variant is registered
   1.332 +     * then that variant is returned.
   1.333 +     * @param source the source specifiers
   1.334 +     * @param target the target specifiers
   1.335 +     * @return the number of registered variant specifiers for a given
   1.336 +     *         source-target pair.
   1.337 +     */
   1.338 +    int32_t countAvailableVariants(const UnicodeString& source,
   1.339 +                                   const UnicodeString& target) const;
   1.340 +
   1.341 +    /**
   1.342 +     * Return a registered variant specifier for a given source-target
   1.343 +     * pair.  If NO_VARIANT is one of the variants, then it will be
   1.344 +     * at index 0.
   1.345 +     * @param index which specifier to return, from 0 to n-1, where
   1.346 +     * n = countAvailableVariants(source, target)
   1.347 +     * @param source the source specifier
   1.348 +     * @param target the target specifier
   1.349 +     * @param result fill-in paramter to receive the variant
   1.350 +     * specifier.  If source is invalid or if target is invalid or if
   1.351 +     * index is out of range, result will be empty.
   1.352 +     * @return reference to result
   1.353 +     */
   1.354 +    UnicodeString& getAvailableVariant(int32_t index,
   1.355 +                                       const UnicodeString& source,
   1.356 +                                       const UnicodeString& target,
   1.357 +                                       UnicodeString& result) const;
   1.358 +
   1.359 + private:
   1.360 +
   1.361 +    //----------------------------------------------------------------
   1.362 +    // Private implementation
   1.363 +    //----------------------------------------------------------------
   1.364 +
   1.365 +    TransliteratorEntry* find(const UnicodeString& ID);
   1.366 +
   1.367 +    TransliteratorEntry* find(UnicodeString& source,
   1.368 +                UnicodeString& target,
   1.369 +                UnicodeString& variant);
   1.370 +
   1.371 +    TransliteratorEntry* findInDynamicStore(const TransliteratorSpec& src,
   1.372 +                              const TransliteratorSpec& trg,
   1.373 +                              const UnicodeString& variant) const;
   1.374 +
   1.375 +    TransliteratorEntry* findInStaticStore(const TransliteratorSpec& src,
   1.376 +                             const TransliteratorSpec& trg,
   1.377 +                             const UnicodeString& variant);
   1.378 +
   1.379 +    static TransliteratorEntry* findInBundle(const TransliteratorSpec& specToOpen,
   1.380 +                               const TransliteratorSpec& specToFind,
   1.381 +                               const UnicodeString& variant,
   1.382 +                               UTransDirection direction);
   1.383 +
   1.384 +    void registerEntry(const UnicodeString& source,
   1.385 +                       const UnicodeString& target,
   1.386 +                       const UnicodeString& variant,
   1.387 +                       TransliteratorEntry* adopted,
   1.388 +                       UBool visible);
   1.389 +
   1.390 +    void registerEntry(const UnicodeString& ID,
   1.391 +                       TransliteratorEntry* adopted,
   1.392 +                       UBool visible);
   1.393 +
   1.394 +    void registerEntry(const UnicodeString& ID,
   1.395 +                       const UnicodeString& source,
   1.396 +                       const UnicodeString& target,
   1.397 +                       const UnicodeString& variant,
   1.398 +                       TransliteratorEntry* adopted,
   1.399 +                       UBool visible);
   1.400 +
   1.401 +    void registerSTV(const UnicodeString& source,
   1.402 +                     const UnicodeString& target,
   1.403 +                     const UnicodeString& variant);
   1.404 +
   1.405 +    void removeSTV(const UnicodeString& source,
   1.406 +                   const UnicodeString& target,
   1.407 +                   const UnicodeString& variant);
   1.408 +
   1.409 +    Transliterator* instantiateEntry(const UnicodeString& ID,
   1.410 +                                     TransliteratorEntry *entry,
   1.411 +                                     TransliteratorAlias*& aliasReturn,
   1.412 +                                     UErrorCode& status);
   1.413 +
   1.414 +    /**
   1.415 +     * A StringEnumeration over the registered IDs in this object.
   1.416 +     */
   1.417 +    class Enumeration : public StringEnumeration {
   1.418 +    public:
   1.419 +        Enumeration(const TransliteratorRegistry& reg);
   1.420 +        virtual ~Enumeration();
   1.421 +        virtual int32_t count(UErrorCode& status) const;
   1.422 +        virtual const UnicodeString* snext(UErrorCode& status);
   1.423 +        virtual void reset(UErrorCode& status);
   1.424 +        static UClassID U_EXPORT2 getStaticClassID();
   1.425 +        virtual UClassID getDynamicClassID() const;
   1.426 +    private:
   1.427 +        int32_t index;
   1.428 +        const TransliteratorRegistry& reg;
   1.429 +    };
   1.430 +    friend class Enumeration;
   1.431 +
   1.432 + private:
   1.433 +
   1.434 +    /**
   1.435 +     * Dynamic registry mapping full IDs to Entry objects.  This
   1.436 +     * contains both public and internal entities.  The visibility is
   1.437 +     * controlled by whether an entry is listed in availableIDs and
   1.438 +     * specDAG or not.
   1.439 +     */
   1.440 +    Hashtable registry;
   1.441 +
   1.442 +    /**
   1.443 +     * DAG of visible IDs by spec.  Hashtable: source => (Hashtable:
   1.444 +     * target => (UVector: variant)) The UVector of variants is never
   1.445 +     * empty.  For a source-target with no variant, the special
   1.446 +     * variant NO_VARIANT (the empty string) is stored in slot zero of
   1.447 +     * the UVector.
   1.448 +     */
   1.449 +    Hashtable specDAG;
   1.450 +
   1.451 +    /**
   1.452 +     * Vector of public full IDs.
   1.453 +     */
   1.454 +    UVector availableIDs;
   1.455 +
   1.456 +    TransliteratorRegistry(const TransliteratorRegistry &other); // forbid copying of this class
   1.457 +    TransliteratorRegistry &operator=(const TransliteratorRegistry &other); // forbid copying of this class
   1.458 +};
   1.459 +
   1.460 +U_NAMESPACE_END
   1.461 +
   1.462 +#endif /* #if !UCONFIG_NO_TRANSLITERATION */
   1.463 +
   1.464 +#endif
   1.465 +//eof

mercurial