intl/icu/source/i18n/rbt_data.h

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rw-r--r--

Correct previous dual key logic pending first delivery installment.

michael@0 1 /*
michael@0 2 **********************************************************************
michael@0 3 * Copyright (C) 1999-2007, International Business Machines Corporation
michael@0 4 * and others. All Rights Reserved.
michael@0 5 **********************************************************************
michael@0 6 * Date Name Description
michael@0 7 * 11/17/99 aliu Creation.
michael@0 8 **********************************************************************
michael@0 9 */
michael@0 10 #ifndef RBT_DATA_H
michael@0 11 #define RBT_DATA_H
michael@0 12
michael@0 13 #include "unicode/utypes.h"
michael@0 14 #include "unicode/uclean.h"
michael@0 15
michael@0 16 #if !UCONFIG_NO_TRANSLITERATION
michael@0 17
michael@0 18 #include "unicode/uobject.h"
michael@0 19 #include "rbt_set.h"
michael@0 20 #include "hash.h"
michael@0 21
michael@0 22 U_NAMESPACE_BEGIN
michael@0 23
michael@0 24 class UnicodeFunctor;
michael@0 25 class UnicodeMatcher;
michael@0 26 class UnicodeReplacer;
michael@0 27
michael@0 28 /**
michael@0 29 * The rule data for a RuleBasedTransliterators. RBT objects hold
michael@0 30 * a const pointer to a TRD object that they do not own. TRD objects
michael@0 31 * are essentially the parsed rules in compact, usable form. The
michael@0 32 * TRD objects themselves are held for the life of the process in
michael@0 33 * a static cache owned by Transliterator.
michael@0 34 *
michael@0 35 * This class' API is a little asymmetric. There is a method to
michael@0 36 * define a variable, but no way to define a set. This is because the
michael@0 37 * sets are defined by the parser in a UVector, and the vector is
michael@0 38 * copied into a fixed-size array here. Once this is done, no new
michael@0 39 * sets may be defined. In practice, there is no need to do so, since
michael@0 40 * generating the data and using it are discrete phases. When there
michael@0 41 * is a need to access the set data during the parse phase, another
michael@0 42 * data structure handles this. See the parsing code for more
michael@0 43 * details.
michael@0 44 */
michael@0 45 class TransliterationRuleData : public UMemory {
michael@0 46
michael@0 47 public:
michael@0 48
michael@0 49 // PUBLIC DATA MEMBERS
michael@0 50
michael@0 51 /**
michael@0 52 * Rule table. May be empty.
michael@0 53 */
michael@0 54 TransliterationRuleSet ruleSet;
michael@0 55
michael@0 56 /**
michael@0 57 * Map variable name (String) to variable (UnicodeString). A variable name
michael@0 58 * corresponds to zero or more characters, stored in a UnicodeString in
michael@0 59 * this hash. One or more of these chars may also correspond to a
michael@0 60 * UnicodeMatcher, in which case the character in the UnicodeString in this hash is
michael@0 61 * a stand-in: it is an index for a secondary lookup in
michael@0 62 * data.variables. The stand-in also represents the UnicodeMatcher in
michael@0 63 * the stored rules.
michael@0 64 */
michael@0 65 Hashtable variableNames;
michael@0 66
michael@0 67 /**
michael@0 68 * Map category variable (UChar) to set (UnicodeFunctor).
michael@0 69 * Variables that correspond to a set of characters are mapped
michael@0 70 * from variable name to a stand-in character in data.variableNames.
michael@0 71 * The stand-in then serves as a key in this hash to lookup the
michael@0 72 * actual UnicodeFunctor object. In addition, the stand-in is
michael@0 73 * stored in the rule text to represent the set of characters.
michael@0 74 * variables[i] represents character (variablesBase + i).
michael@0 75 */
michael@0 76 UnicodeFunctor** variables;
michael@0 77
michael@0 78 /**
michael@0 79 * Flag that indicates whether the variables are owned (if a single
michael@0 80 * call to Transliterator::createFromRules() produces a CompoundTransliterator
michael@0 81 * with more than one RuleBasedTransliterator as children, they all share
michael@0 82 * the same variables list, so only the first one is considered to own
michael@0 83 * the variables)
michael@0 84 */
michael@0 85 UBool variablesAreOwned;
michael@0 86
michael@0 87 /**
michael@0 88 * The character that represents variables[0]. Characters
michael@0 89 * variablesBase through variablesBase +
michael@0 90 * variablesLength - 1 represent UnicodeFunctor objects.
michael@0 91 */
michael@0 92 UChar variablesBase;
michael@0 93
michael@0 94 /**
michael@0 95 * The length of variables.
michael@0 96 */
michael@0 97 int32_t variablesLength;
michael@0 98
michael@0 99 public:
michael@0 100
michael@0 101 /**
michael@0 102 * Constructor
michael@0 103 * @param status Output param set to success/failure code on exit.
michael@0 104 */
michael@0 105 TransliterationRuleData(UErrorCode& status);
michael@0 106
michael@0 107 /**
michael@0 108 * Copy Constructor
michael@0 109 */
michael@0 110 TransliterationRuleData(const TransliterationRuleData&);
michael@0 111
michael@0 112 /**
michael@0 113 * destructor
michael@0 114 */
michael@0 115 ~TransliterationRuleData();
michael@0 116
michael@0 117 /**
michael@0 118 * Given a stand-in character, return the UnicodeFunctor that it
michael@0 119 * represents, or NULL if it doesn't represent anything.
michael@0 120 * @param standIn the given stand-in character.
michael@0 121 * @return the UnicodeFunctor that 'standIn' represents
michael@0 122 */
michael@0 123 UnicodeFunctor* lookup(UChar32 standIn) const;
michael@0 124
michael@0 125 /**
michael@0 126 * Given a stand-in character, return the UnicodeMatcher that it
michael@0 127 * represents, or NULL if it doesn't represent anything or if it
michael@0 128 * represents something that is not a matcher.
michael@0 129 * @param standIn the given stand-in character.
michael@0 130 * @return return the UnicodeMatcher that 'standIn' represents
michael@0 131 */
michael@0 132 UnicodeMatcher* lookupMatcher(UChar32 standIn) const;
michael@0 133
michael@0 134 /**
michael@0 135 * Given a stand-in character, return the UnicodeReplacer that it
michael@0 136 * represents, or NULL if it doesn't represent anything or if it
michael@0 137 * represents something that is not a replacer.
michael@0 138 * @param standIn the given stand-in character.
michael@0 139 * @return return the UnicodeReplacer that 'standIn' represents
michael@0 140 */
michael@0 141 UnicodeReplacer* lookupReplacer(UChar32 standIn) const;
michael@0 142
michael@0 143
michael@0 144 private:
michael@0 145 TransliterationRuleData &operator=(const TransliterationRuleData &other); // forbid copying of this class
michael@0 146 };
michael@0 147
michael@0 148 U_NAMESPACE_END
michael@0 149
michael@0 150 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
michael@0 151
michael@0 152 #endif

mercurial