intl/icu/source/i18n/transreg.h

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rw-r--r--

Correct previous dual key logic pending first delivery installment.

michael@0 1 /*
michael@0 2 **********************************************************************
michael@0 3 * Copyright (c) 2001-2008, International Business Machines
michael@0 4 * Corporation and others. All Rights Reserved.
michael@0 5 **********************************************************************
michael@0 6 * Date Name Description
michael@0 7 * 08/10/2001 aliu Creation.
michael@0 8 **********************************************************************
michael@0 9 */
michael@0 10 #ifndef _TRANSREG_H
michael@0 11 #define _TRANSREG_H
michael@0 12
michael@0 13 #include "unicode/utypes.h"
michael@0 14
michael@0 15 #if !UCONFIG_NO_TRANSLITERATION
michael@0 16
michael@0 17 #include "unicode/uobject.h"
michael@0 18 #include "unicode/translit.h"
michael@0 19 #include "hash.h"
michael@0 20 #include "uvector.h"
michael@0 21
michael@0 22 U_NAMESPACE_BEGIN
michael@0 23
michael@0 24 class TransliteratorEntry;
michael@0 25 class TransliteratorSpec;
michael@0 26 class UnicodeString;
michael@0 27
michael@0 28 //------------------------------------------------------------------
michael@0 29 // TransliteratorAlias
michael@0 30 //------------------------------------------------------------------
michael@0 31
michael@0 32 /**
michael@0 33 * A TransliteratorAlias object is returned by get() if the given ID
michael@0 34 * actually translates into something else. The caller then invokes
michael@0 35 * the create() method on the alias to create the actual
michael@0 36 * transliterator, and deletes the alias.
michael@0 37 *
michael@0 38 * Why all the shenanigans? To prevent circular calls between
michael@0 39 * the registry code and the transliterator code that deadlocks.
michael@0 40 */
michael@0 41 class TransliteratorAlias : public UMemory {
michael@0 42 public:
michael@0 43 /**
michael@0 44 * Construct a simple alias (type == SIMPLE)
michael@0 45 * @param aliasID the given id.
michael@0 46 */
michael@0 47 TransliteratorAlias(const UnicodeString& aliasID, const UnicodeSet* compoundFilter);
michael@0 48
michael@0 49 /**
michael@0 50 * Construct a compound RBT alias (type == COMPOUND)
michael@0 51 */
michael@0 52 TransliteratorAlias(const UnicodeString& ID, const UnicodeString& idBlocks,
michael@0 53 UVector* adoptedTransliterators,
michael@0 54 const UnicodeSet* compoundFilter);
michael@0 55
michael@0 56 /**
michael@0 57 * Construct a rules alias (type = RULES)
michael@0 58 */
michael@0 59 TransliteratorAlias(const UnicodeString& theID,
michael@0 60 const UnicodeString& rules,
michael@0 61 UTransDirection dir);
michael@0 62
michael@0 63 ~TransliteratorAlias();
michael@0 64
michael@0 65 /**
michael@0 66 * The whole point of create() is that the caller must invoke
michael@0 67 * it when the registry mutex is NOT held, to prevent deadlock.
michael@0 68 * It may only be called once.
michael@0 69 *
michael@0 70 * Note: Only call create() if isRuleBased() returns FALSE.
michael@0 71 *
michael@0 72 * This method must be called *outside* of the TransliteratorRegistry
michael@0 73 * mutex.
michael@0 74 */
michael@0 75 Transliterator* create(UParseError&, UErrorCode&);
michael@0 76
michael@0 77 /**
michael@0 78 * Return TRUE if this alias is rule-based. If so, the caller
michael@0 79 * must call parse() on it, then call TransliteratorRegistry::reget().
michael@0 80 */
michael@0 81 UBool isRuleBased() const;
michael@0 82
michael@0 83 /**
michael@0 84 * If isRuleBased() returns TRUE, then the caller must call this
michael@0 85 * method, followed by TransliteratorRegistry::reget(). The latter
michael@0 86 * method must be called inside the TransliteratorRegistry mutex.
michael@0 87 *
michael@0 88 * Note: Only call parse() if isRuleBased() returns TRUE.
michael@0 89 *
michael@0 90 * This method must be called *outside* of the TransliteratorRegistry
michael@0 91 * mutex, because it can instantiate Transliterators embedded in
michael@0 92 * the rules via the "&Latin-Arabic()" syntax.
michael@0 93 */
michael@0 94 void parse(TransliteratorParser& parser,
michael@0 95 UParseError& pe, UErrorCode& ec) const;
michael@0 96
michael@0 97 private:
michael@0 98 // We actually come in three flavors:
michael@0 99 // 1. Simple alias
michael@0 100 // Here aliasID is the alias string. Everything else is
michael@0 101 // null, zero, empty.
michael@0 102 // 2. CompoundRBT
michael@0 103 // Here ID is the ID, aliasID is the idBlock, trans is the
michael@0 104 // contained RBT, and idSplitPoint is the offet in aliasID
michael@0 105 // where the contained RBT goes. compoundFilter is the
michael@0 106 // compound filter, and it is _not_ owned.
michael@0 107 // 3. Rules
michael@0 108 // Here ID is the ID, aliasID is the rules string.
michael@0 109 // idSplitPoint is the UTransDirection.
michael@0 110 UnicodeString ID;
michael@0 111 UnicodeString aliasesOrRules;
michael@0 112 UVector* transes; // owned
michael@0 113 const UnicodeSet* compoundFilter; // alias
michael@0 114 UTransDirection direction;
michael@0 115 enum { SIMPLE, COMPOUND, RULES } type;
michael@0 116
michael@0 117 TransliteratorAlias(const TransliteratorAlias &other); // forbid copying of this class
michael@0 118 TransliteratorAlias &operator=(const TransliteratorAlias &other); // forbid copying of this class
michael@0 119 };
michael@0 120
michael@0 121
michael@0 122 /**
michael@0 123 * A registry of system transliterators. This is the data structure
michael@0 124 * that implements the mapping between transliterator IDs and the data
michael@0 125 * or function pointers used to create the corresponding
michael@0 126 * transliterators. There is one instance of the registry that is
michael@0 127 * created statically.
michael@0 128 *
michael@0 129 * The registry consists of a dynamic component -- a hashtable -- and
michael@0 130 * a static component -- locale resource bundles. The dynamic store
michael@0 131 * is semantically overlaid on the static store, so the static mapping
michael@0 132 * can be dynamically overridden.
michael@0 133 *
michael@0 134 * This is an internal class that is only used by Transliterator.
michael@0 135 * Transliterator maintains one static instance of this class and
michael@0 136 * delegates all registry-related operations to it.
michael@0 137 *
michael@0 138 * @author Alan Liu
michael@0 139 */
michael@0 140 class TransliteratorRegistry : public UMemory {
michael@0 141
michael@0 142 public:
michael@0 143
michael@0 144 /**
michael@0 145 * Contructor
michael@0 146 * @param status Output param set to success/failure code.
michael@0 147 */
michael@0 148 TransliteratorRegistry(UErrorCode& status);
michael@0 149
michael@0 150 /**
michael@0 151 * Nonvirtual destructor -- this class is not subclassable.
michael@0 152 */
michael@0 153 ~TransliteratorRegistry();
michael@0 154
michael@0 155 //------------------------------------------------------------------
michael@0 156 // Basic public API
michael@0 157 //------------------------------------------------------------------
michael@0 158
michael@0 159 /**
michael@0 160 * Given a simple ID (forward direction, no inline filter, not
michael@0 161 * compound) attempt to instantiate it from the registry. Return
michael@0 162 * 0 on failure.
michael@0 163 *
michael@0 164 * Return a non-NULL aliasReturn value if the ID points to an alias.
michael@0 165 * We cannot instantiate it ourselves because the alias may contain
michael@0 166 * filters or compounds, which we do not understand. Caller should
michael@0 167 * make aliasReturn NULL before calling.
michael@0 168 * @param ID the given ID
michael@0 169 * @param aliasReturn output param to receive TransliteratorAlias;
michael@0 170 * should be NULL on entry
michael@0 171 * @param parseError Struct to recieve information on position
michael@0 172 * of error if an error is encountered
michael@0 173 * @param status Output param set to success/failure code.
michael@0 174 */
michael@0 175 Transliterator* get(const UnicodeString& ID,
michael@0 176 TransliteratorAlias*& aliasReturn,
michael@0 177 UErrorCode& status);
michael@0 178
michael@0 179 /**
michael@0 180 * The caller must call this after calling get(), if [a] calling get()
michael@0 181 * returns an alias, and [b] the alias is rule based. In that
michael@0 182 * situation the caller must call alias->parse() to do the parsing
michael@0 183 * OUTSIDE THE REGISTRY MUTEX, then call this method to retry
michael@0 184 * instantiating the transliterator.
michael@0 185 *
michael@0 186 * Note: Another alias might be returned by this method.
michael@0 187 *
michael@0 188 * This method (like all public methods of this class) must be called
michael@0 189 * from within the TransliteratorRegistry mutex.
michael@0 190 *
michael@0 191 * @param aliasReturn output param to receive TransliteratorAlias;
michael@0 192 * should be NULL on entry
michael@0 193 */
michael@0 194 Transliterator* reget(const UnicodeString& ID,
michael@0 195 TransliteratorParser& parser,
michael@0 196 TransliteratorAlias*& aliasReturn,
michael@0 197 UErrorCode& status);
michael@0 198
michael@0 199 /**
michael@0 200 * Register a prototype (adopted). This adds an entry to the
michael@0 201 * dynamic store, or replaces an existing entry. Any entry in the
michael@0 202 * underlying static locale resource store is masked.
michael@0 203 */
michael@0 204 void put(Transliterator* adoptedProto,
michael@0 205 UBool visible,
michael@0 206 UErrorCode& ec);
michael@0 207
michael@0 208 /**
michael@0 209 * Register an ID and a factory function pointer. This adds an
michael@0 210 * entry to the dynamic store, or replaces an existing entry. Any
michael@0 211 * entry in the underlying static locale resource store is masked.
michael@0 212 */
michael@0 213 void put(const UnicodeString& ID,
michael@0 214 Transliterator::Factory factory,
michael@0 215 Transliterator::Token context,
michael@0 216 UBool visible,
michael@0 217 UErrorCode& ec);
michael@0 218
michael@0 219 /**
michael@0 220 * Register an ID and a resource name. This adds an entry to the
michael@0 221 * dynamic store, or replaces an existing entry. Any entry in the
michael@0 222 * underlying static locale resource store is masked.
michael@0 223 */
michael@0 224 void put(const UnicodeString& ID,
michael@0 225 const UnicodeString& resourceName,
michael@0 226 UTransDirection dir,
michael@0 227 UBool readonlyResourceAlias,
michael@0 228 UBool visible,
michael@0 229 UErrorCode& ec);
michael@0 230
michael@0 231 /**
michael@0 232 * Register an ID and an alias ID. This adds an entry to the
michael@0 233 * dynamic store, or replaces an existing entry. Any entry in the
michael@0 234 * underlying static locale resource store is masked.
michael@0 235 */
michael@0 236 void put(const UnicodeString& ID,
michael@0 237 const UnicodeString& alias,
michael@0 238 UBool readonlyAliasAlias,
michael@0 239 UBool visible,
michael@0 240 UErrorCode& ec);
michael@0 241
michael@0 242 /**
michael@0 243 * Unregister an ID. This removes an entry from the dynamic store
michael@0 244 * if there is one. The static locale resource store is
michael@0 245 * unaffected.
michael@0 246 * @param ID the given ID.
michael@0 247 */
michael@0 248 void remove(const UnicodeString& ID);
michael@0 249
michael@0 250 //------------------------------------------------------------------
michael@0 251 // Public ID and spec management
michael@0 252 //------------------------------------------------------------------
michael@0 253
michael@0 254 /**
michael@0 255 * Return a StringEnumeration over the IDs currently registered
michael@0 256 * with the system.
michael@0 257 * @internal
michael@0 258 */
michael@0 259 StringEnumeration* getAvailableIDs() const;
michael@0 260
michael@0 261 /**
michael@0 262 * == OBSOLETE - remove in ICU 3.4 ==
michael@0 263 * Return the number of IDs currently registered with the system.
michael@0 264 * To retrieve the actual IDs, call getAvailableID(i) with
michael@0 265 * i from 0 to countAvailableIDs() - 1.
michael@0 266 * @return the number of IDs currently registered with the system.
michael@0 267 * @internal
michael@0 268 */
michael@0 269 int32_t countAvailableIDs(void) const;
michael@0 270
michael@0 271 /**
michael@0 272 * == OBSOLETE - remove in ICU 3.4 ==
michael@0 273 * Return the index-th available ID. index must be between 0
michael@0 274 * and countAvailableIDs() - 1, inclusive. If index is out of
michael@0 275 * range, the result of getAvailableID(0) is returned.
michael@0 276 * @param index the given index.
michael@0 277 * @return the index-th available ID. index must be between 0
michael@0 278 * and countAvailableIDs() - 1, inclusive. If index is out of
michael@0 279 * range, the result of getAvailableID(0) is returned.
michael@0 280 * @internal
michael@0 281 */
michael@0 282 const UnicodeString& getAvailableID(int32_t index) const;
michael@0 283
michael@0 284 /**
michael@0 285 * Return the number of registered source specifiers.
michael@0 286 * @return the number of registered source specifiers.
michael@0 287 */
michael@0 288 int32_t countAvailableSources(void) const;
michael@0 289
michael@0 290 /**
michael@0 291 * Return a registered source specifier.
michael@0 292 * @param index which specifier to return, from 0 to n-1, where
michael@0 293 * n = countAvailableSources()
michael@0 294 * @param result fill-in paramter to receive the source specifier.
michael@0 295 * If index is out of range, result will be empty.
michael@0 296 * @return reference to result
michael@0 297 */
michael@0 298 UnicodeString& getAvailableSource(int32_t index,
michael@0 299 UnicodeString& result) const;
michael@0 300
michael@0 301 /**
michael@0 302 * Return the number of registered target specifiers for a given
michael@0 303 * source specifier.
michael@0 304 * @param source the given source specifier.
michael@0 305 * @return the number of registered target specifiers for a given
michael@0 306 * source specifier.
michael@0 307 */
michael@0 308 int32_t countAvailableTargets(const UnicodeString& source) const;
michael@0 309
michael@0 310 /**
michael@0 311 * Return a registered target specifier for a given source.
michael@0 312 * @param index which specifier to return, from 0 to n-1, where
michael@0 313 * n = countAvailableTargets(source)
michael@0 314 * @param source the source specifier
michael@0 315 * @param result fill-in paramter to receive the target specifier.
michael@0 316 * If source is invalid or if index is out of range, result will
michael@0 317 * be empty.
michael@0 318 * @return reference to result
michael@0 319 */
michael@0 320 UnicodeString& getAvailableTarget(int32_t index,
michael@0 321 const UnicodeString& source,
michael@0 322 UnicodeString& result) const;
michael@0 323
michael@0 324 /**
michael@0 325 * Return the number of registered variant specifiers for a given
michael@0 326 * source-target pair. There is always at least one variant: If
michael@0 327 * just source-target is registered, then the single variant
michael@0 328 * NO_VARIANT is returned. If source-target/variant is registered
michael@0 329 * then that variant is returned.
michael@0 330 * @param source the source specifiers
michael@0 331 * @param target the target specifiers
michael@0 332 * @return the number of registered variant specifiers for a given
michael@0 333 * source-target pair.
michael@0 334 */
michael@0 335 int32_t countAvailableVariants(const UnicodeString& source,
michael@0 336 const UnicodeString& target) const;
michael@0 337
michael@0 338 /**
michael@0 339 * Return a registered variant specifier for a given source-target
michael@0 340 * pair. If NO_VARIANT is one of the variants, then it will be
michael@0 341 * at index 0.
michael@0 342 * @param index which specifier to return, from 0 to n-1, where
michael@0 343 * n = countAvailableVariants(source, target)
michael@0 344 * @param source the source specifier
michael@0 345 * @param target the target specifier
michael@0 346 * @param result fill-in paramter to receive the variant
michael@0 347 * specifier. If source is invalid or if target is invalid or if
michael@0 348 * index is out of range, result will be empty.
michael@0 349 * @return reference to result
michael@0 350 */
michael@0 351 UnicodeString& getAvailableVariant(int32_t index,
michael@0 352 const UnicodeString& source,
michael@0 353 const UnicodeString& target,
michael@0 354 UnicodeString& result) const;
michael@0 355
michael@0 356 private:
michael@0 357
michael@0 358 //----------------------------------------------------------------
michael@0 359 // Private implementation
michael@0 360 //----------------------------------------------------------------
michael@0 361
michael@0 362 TransliteratorEntry* find(const UnicodeString& ID);
michael@0 363
michael@0 364 TransliteratorEntry* find(UnicodeString& source,
michael@0 365 UnicodeString& target,
michael@0 366 UnicodeString& variant);
michael@0 367
michael@0 368 TransliteratorEntry* findInDynamicStore(const TransliteratorSpec& src,
michael@0 369 const TransliteratorSpec& trg,
michael@0 370 const UnicodeString& variant) const;
michael@0 371
michael@0 372 TransliteratorEntry* findInStaticStore(const TransliteratorSpec& src,
michael@0 373 const TransliteratorSpec& trg,
michael@0 374 const UnicodeString& variant);
michael@0 375
michael@0 376 static TransliteratorEntry* findInBundle(const TransliteratorSpec& specToOpen,
michael@0 377 const TransliteratorSpec& specToFind,
michael@0 378 const UnicodeString& variant,
michael@0 379 UTransDirection direction);
michael@0 380
michael@0 381 void registerEntry(const UnicodeString& source,
michael@0 382 const UnicodeString& target,
michael@0 383 const UnicodeString& variant,
michael@0 384 TransliteratorEntry* adopted,
michael@0 385 UBool visible);
michael@0 386
michael@0 387 void registerEntry(const UnicodeString& ID,
michael@0 388 TransliteratorEntry* adopted,
michael@0 389 UBool visible);
michael@0 390
michael@0 391 void registerEntry(const UnicodeString& ID,
michael@0 392 const UnicodeString& source,
michael@0 393 const UnicodeString& target,
michael@0 394 const UnicodeString& variant,
michael@0 395 TransliteratorEntry* adopted,
michael@0 396 UBool visible);
michael@0 397
michael@0 398 void registerSTV(const UnicodeString& source,
michael@0 399 const UnicodeString& target,
michael@0 400 const UnicodeString& variant);
michael@0 401
michael@0 402 void removeSTV(const UnicodeString& source,
michael@0 403 const UnicodeString& target,
michael@0 404 const UnicodeString& variant);
michael@0 405
michael@0 406 Transliterator* instantiateEntry(const UnicodeString& ID,
michael@0 407 TransliteratorEntry *entry,
michael@0 408 TransliteratorAlias*& aliasReturn,
michael@0 409 UErrorCode& status);
michael@0 410
michael@0 411 /**
michael@0 412 * A StringEnumeration over the registered IDs in this object.
michael@0 413 */
michael@0 414 class Enumeration : public StringEnumeration {
michael@0 415 public:
michael@0 416 Enumeration(const TransliteratorRegistry& reg);
michael@0 417 virtual ~Enumeration();
michael@0 418 virtual int32_t count(UErrorCode& status) const;
michael@0 419 virtual const UnicodeString* snext(UErrorCode& status);
michael@0 420 virtual void reset(UErrorCode& status);
michael@0 421 static UClassID U_EXPORT2 getStaticClassID();
michael@0 422 virtual UClassID getDynamicClassID() const;
michael@0 423 private:
michael@0 424 int32_t index;
michael@0 425 const TransliteratorRegistry& reg;
michael@0 426 };
michael@0 427 friend class Enumeration;
michael@0 428
michael@0 429 private:
michael@0 430
michael@0 431 /**
michael@0 432 * Dynamic registry mapping full IDs to Entry objects. This
michael@0 433 * contains both public and internal entities. The visibility is
michael@0 434 * controlled by whether an entry is listed in availableIDs and
michael@0 435 * specDAG or not.
michael@0 436 */
michael@0 437 Hashtable registry;
michael@0 438
michael@0 439 /**
michael@0 440 * DAG of visible IDs by spec. Hashtable: source => (Hashtable:
michael@0 441 * target => (UVector: variant)) The UVector of variants is never
michael@0 442 * empty. For a source-target with no variant, the special
michael@0 443 * variant NO_VARIANT (the empty string) is stored in slot zero of
michael@0 444 * the UVector.
michael@0 445 */
michael@0 446 Hashtable specDAG;
michael@0 447
michael@0 448 /**
michael@0 449 * Vector of public full IDs.
michael@0 450 */
michael@0 451 UVector availableIDs;
michael@0 452
michael@0 453 TransliteratorRegistry(const TransliteratorRegistry &other); // forbid copying of this class
michael@0 454 TransliteratorRegistry &operator=(const TransliteratorRegistry &other); // forbid copying of this class
michael@0 455 };
michael@0 456
michael@0 457 U_NAMESPACE_END
michael@0 458
michael@0 459 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
michael@0 460
michael@0 461 #endif
michael@0 462 //eof

mercurial