1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/i18n/rbt_data.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,152 @@ 1.4 +/* 1.5 +********************************************************************** 1.6 +* Copyright (C) 1999-2007, International Business Machines Corporation 1.7 +* and others. All Rights Reserved. 1.8 +********************************************************************** 1.9 +* Date Name Description 1.10 +* 11/17/99 aliu Creation. 1.11 +********************************************************************** 1.12 +*/ 1.13 +#ifndef RBT_DATA_H 1.14 +#define RBT_DATA_H 1.15 + 1.16 +#include "unicode/utypes.h" 1.17 +#include "unicode/uclean.h" 1.18 + 1.19 +#if !UCONFIG_NO_TRANSLITERATION 1.20 + 1.21 +#include "unicode/uobject.h" 1.22 +#include "rbt_set.h" 1.23 +#include "hash.h" 1.24 + 1.25 +U_NAMESPACE_BEGIN 1.26 + 1.27 +class UnicodeFunctor; 1.28 +class UnicodeMatcher; 1.29 +class UnicodeReplacer; 1.30 + 1.31 +/** 1.32 + * The rule data for a RuleBasedTransliterators. RBT objects hold 1.33 + * a const pointer to a TRD object that they do not own. TRD objects 1.34 + * are essentially the parsed rules in compact, usable form. The 1.35 + * TRD objects themselves are held for the life of the process in 1.36 + * a static cache owned by Transliterator. 1.37 + * 1.38 + * This class' API is a little asymmetric. There is a method to 1.39 + * define a variable, but no way to define a set. This is because the 1.40 + * sets are defined by the parser in a UVector, and the vector is 1.41 + * copied into a fixed-size array here. Once this is done, no new 1.42 + * sets may be defined. In practice, there is no need to do so, since 1.43 + * generating the data and using it are discrete phases. When there 1.44 + * is a need to access the set data during the parse phase, another 1.45 + * data structure handles this. See the parsing code for more 1.46 + * details. 1.47 + */ 1.48 +class TransliterationRuleData : public UMemory { 1.49 + 1.50 +public: 1.51 + 1.52 + // PUBLIC DATA MEMBERS 1.53 + 1.54 + /** 1.55 + * Rule table. May be empty. 1.56 + */ 1.57 + TransliterationRuleSet ruleSet; 1.58 + 1.59 + /** 1.60 + * Map variable name (String) to variable (UnicodeString). A variable name 1.61 + * corresponds to zero or more characters, stored in a UnicodeString in 1.62 + * this hash. One or more of these chars may also correspond to a 1.63 + * UnicodeMatcher, in which case the character in the UnicodeString in this hash is 1.64 + * a stand-in: it is an index for a secondary lookup in 1.65 + * data.variables. The stand-in also represents the UnicodeMatcher in 1.66 + * the stored rules. 1.67 + */ 1.68 + Hashtable variableNames; 1.69 + 1.70 + /** 1.71 + * Map category variable (UChar) to set (UnicodeFunctor). 1.72 + * Variables that correspond to a set of characters are mapped 1.73 + * from variable name to a stand-in character in data.variableNames. 1.74 + * The stand-in then serves as a key in this hash to lookup the 1.75 + * actual UnicodeFunctor object. In addition, the stand-in is 1.76 + * stored in the rule text to represent the set of characters. 1.77 + * variables[i] represents character (variablesBase + i). 1.78 + */ 1.79 + UnicodeFunctor** variables; 1.80 + 1.81 + /** 1.82 + * Flag that indicates whether the variables are owned (if a single 1.83 + * call to Transliterator::createFromRules() produces a CompoundTransliterator 1.84 + * with more than one RuleBasedTransliterator as children, they all share 1.85 + * the same variables list, so only the first one is considered to own 1.86 + * the variables) 1.87 + */ 1.88 + UBool variablesAreOwned; 1.89 + 1.90 + /** 1.91 + * The character that represents variables[0]. Characters 1.92 + * variablesBase through variablesBase + 1.93 + * variablesLength - 1 represent UnicodeFunctor objects. 1.94 + */ 1.95 + UChar variablesBase; 1.96 + 1.97 + /** 1.98 + * The length of variables. 1.99 + */ 1.100 + int32_t variablesLength; 1.101 + 1.102 +public: 1.103 + 1.104 + /** 1.105 + * Constructor 1.106 + * @param status Output param set to success/failure code on exit. 1.107 + */ 1.108 + TransliterationRuleData(UErrorCode& status); 1.109 + 1.110 + /** 1.111 + * Copy Constructor 1.112 + */ 1.113 + TransliterationRuleData(const TransliterationRuleData&); 1.114 + 1.115 + /** 1.116 + * destructor 1.117 + */ 1.118 + ~TransliterationRuleData(); 1.119 + 1.120 + /** 1.121 + * Given a stand-in character, return the UnicodeFunctor that it 1.122 + * represents, or NULL if it doesn't represent anything. 1.123 + * @param standIn the given stand-in character. 1.124 + * @return the UnicodeFunctor that 'standIn' represents 1.125 + */ 1.126 + UnicodeFunctor* lookup(UChar32 standIn) const; 1.127 + 1.128 + /** 1.129 + * Given a stand-in character, return the UnicodeMatcher that it 1.130 + * represents, or NULL if it doesn't represent anything or if it 1.131 + * represents something that is not a matcher. 1.132 + * @param standIn the given stand-in character. 1.133 + * @return return the UnicodeMatcher that 'standIn' represents 1.134 + */ 1.135 + UnicodeMatcher* lookupMatcher(UChar32 standIn) const; 1.136 + 1.137 + /** 1.138 + * Given a stand-in character, return the UnicodeReplacer that it 1.139 + * represents, or NULL if it doesn't represent anything or if it 1.140 + * represents something that is not a replacer. 1.141 + * @param standIn the given stand-in character. 1.142 + * @return return the UnicodeReplacer that 'standIn' represents 1.143 + */ 1.144 + UnicodeReplacer* lookupReplacer(UChar32 standIn) const; 1.145 + 1.146 + 1.147 +private: 1.148 + TransliterationRuleData &operator=(const TransliterationRuleData &other); // forbid copying of this class 1.149 +}; 1.150 + 1.151 +U_NAMESPACE_END 1.152 + 1.153 +#endif /* #if !UCONFIG_NO_TRANSLITERATION */ 1.154 + 1.155 +#endif