|
1 /* |
|
2 ********************************************************************** |
|
3 * Copyright (C) 1999-2007, International Business Machines Corporation |
|
4 * and others. All Rights Reserved. |
|
5 ********************************************************************** |
|
6 * Date Name Description |
|
7 * 11/17/99 aliu Creation. |
|
8 ********************************************************************** |
|
9 */ |
|
10 #ifndef RBT_SET_H |
|
11 #define RBT_SET_H |
|
12 |
|
13 #include "unicode/utypes.h" |
|
14 |
|
15 #if !UCONFIG_NO_TRANSLITERATION |
|
16 |
|
17 #include "unicode/uobject.h" |
|
18 #include "unicode/utrans.h" |
|
19 #include "uvector.h" |
|
20 |
|
21 U_NAMESPACE_BEGIN |
|
22 |
|
23 class Replaceable; |
|
24 class TransliterationRule; |
|
25 class TransliterationRuleData; |
|
26 class UnicodeFilter; |
|
27 class UnicodeString; |
|
28 class UnicodeSet; |
|
29 |
|
30 /** |
|
31 * A set of rules for a <code>RuleBasedTransliterator</code>. |
|
32 * @author Alan Liu |
|
33 */ |
|
34 class TransliterationRuleSet : public UMemory { |
|
35 /** |
|
36 * Vector of rules, in the order added. This is used while the |
|
37 * rule set is getting built. After that, freeze() reorders and |
|
38 * indexes the rules into rules[]. Any given rule is stored once |
|
39 * in ruleVector, and one or more times in rules[]. ruleVector |
|
40 * owns and deletes the rules. |
|
41 */ |
|
42 UVector* ruleVector; |
|
43 |
|
44 /** |
|
45 * Sorted and indexed table of rules. This is created by freeze() |
|
46 * from the rules in ruleVector. It contains alias pointers to |
|
47 * the rules in ruleVector. It is zero before freeze() is called |
|
48 * and non-zero thereafter. |
|
49 */ |
|
50 TransliterationRule** rules; |
|
51 |
|
52 /** |
|
53 * Index table. For text having a first character c, compute x = c&0xFF. |
|
54 * Now use rules[index[x]..index[x+1]-1]. This index table is created by |
|
55 * freeze(). Before freeze() is called it contains garbage. |
|
56 */ |
|
57 int32_t index[257]; |
|
58 |
|
59 /** |
|
60 * Length of the longest preceding context |
|
61 */ |
|
62 int32_t maxContextLength; |
|
63 |
|
64 public: |
|
65 |
|
66 /** |
|
67 * Construct a new empty rule set. |
|
68 * @param status Output parameter filled in with success or failure status. |
|
69 */ |
|
70 TransliterationRuleSet(UErrorCode& status); |
|
71 |
|
72 /** |
|
73 * Copy constructor. |
|
74 */ |
|
75 TransliterationRuleSet(const TransliterationRuleSet&); |
|
76 |
|
77 /** |
|
78 * Destructor. |
|
79 */ |
|
80 virtual ~TransliterationRuleSet(); |
|
81 |
|
82 /** |
|
83 * Change the data object that this rule belongs to. Used |
|
84 * internally by the TransliterationRuleData copy constructor. |
|
85 * @param data the new data value to be set. |
|
86 */ |
|
87 void setData(const TransliterationRuleData* data); |
|
88 |
|
89 /** |
|
90 * Return the maximum context length. |
|
91 * @return the length of the longest preceding context. |
|
92 */ |
|
93 virtual int32_t getMaximumContextLength(void) const; |
|
94 |
|
95 /** |
|
96 * Add a rule to this set. Rules are added in order, and order is |
|
97 * significant. The last call to this method must be followed by |
|
98 * a call to <code>freeze()</code> before the rule set is used. |
|
99 * This method must <em>not</em> be called after freeze() has been |
|
100 * called. |
|
101 * |
|
102 * @param adoptedRule the rule to add |
|
103 */ |
|
104 virtual void addRule(TransliterationRule* adoptedRule, |
|
105 UErrorCode& status); |
|
106 |
|
107 /** |
|
108 * Check this for masked rules and index it to optimize performance. |
|
109 * The sequence of operations is: (1) add rules to a set using |
|
110 * <code>addRule()</code>; (2) freeze the set using |
|
111 * <code>freeze()</code>; (3) use the rule set. If |
|
112 * <code>addRule()</code> is called after calling this method, it |
|
113 * invalidates this object, and this method must be called again. |
|
114 * That is, <code>freeze()</code> may be called multiple times, |
|
115 * although for optimal performance it shouldn't be. |
|
116 * @param parseError A pointer to UParseError to receive information about errors |
|
117 * occurred. |
|
118 * @param status Output parameter filled in with success or failure status. |
|
119 */ |
|
120 virtual void freeze(UParseError& parseError, UErrorCode& status); |
|
121 |
|
122 /** |
|
123 * Transliterate the given text with the given UTransPosition |
|
124 * indices. Return TRUE if the transliteration should continue |
|
125 * or FALSE if it should halt (because of a U_PARTIAL_MATCH match). |
|
126 * Note that FALSE is only ever returned if isIncremental is TRUE. |
|
127 * @param text the text to be transliterated |
|
128 * @param index the position indices, which will be updated |
|
129 * @param isIncremental if TRUE, assume new text may be inserted |
|
130 * at index.limit, and return FALSE if thre is a partial match. |
|
131 * @return TRUE unless a U_PARTIAL_MATCH has been obtained, |
|
132 * indicating that transliteration should stop until more text |
|
133 * arrives. |
|
134 */ |
|
135 UBool transliterate(Replaceable& text, |
|
136 UTransPosition& index, |
|
137 UBool isIncremental); |
|
138 |
|
139 /** |
|
140 * Create rule strings that represents this rule set. |
|
141 * @param result string to receive the rule strings. Current |
|
142 * contents will be deleted. |
|
143 * @param escapeUnprintable True, will escape the unprintable characters |
|
144 * @return A reference to 'result'. |
|
145 */ |
|
146 virtual UnicodeString& toRules(UnicodeString& result, |
|
147 UBool escapeUnprintable) const; |
|
148 |
|
149 /** |
|
150 * Return the set of all characters that may be modified |
|
151 * (getTarget=false) or emitted (getTarget=true) by this set. |
|
152 */ |
|
153 UnicodeSet& getSourceTargetSet(UnicodeSet& result, |
|
154 UBool getTarget) const; |
|
155 |
|
156 private: |
|
157 |
|
158 TransliterationRuleSet &operator=(const TransliterationRuleSet &other); // forbid copying of this class |
|
159 }; |
|
160 |
|
161 U_NAMESPACE_END |
|
162 |
|
163 #endif /* #if !UCONFIG_NO_TRANSLITERATION */ |
|
164 |
|
165 #endif |