Wed, 31 Dec 2014 07:22:50 +0100
Correct previous dual key logic pending first delivery installment.
michael@0 | 1 | // |
michael@0 | 2 | // file: rbbistbl.cpp Implementation of the ICU RBBISymbolTable class |
michael@0 | 3 | // |
michael@0 | 4 | /* |
michael@0 | 5 | *************************************************************************** |
michael@0 | 6 | * Copyright (C) 2002-2011 International Business Machines Corporation |
michael@0 | 7 | * and others. All rights reserved. |
michael@0 | 8 | *************************************************************************** |
michael@0 | 9 | */ |
michael@0 | 10 | |
michael@0 | 11 | #include "unicode/utypes.h" |
michael@0 | 12 | |
michael@0 | 13 | #if !UCONFIG_NO_BREAK_ITERATION |
michael@0 | 14 | |
michael@0 | 15 | #include "unicode/unistr.h" |
michael@0 | 16 | #include "unicode/uniset.h" |
michael@0 | 17 | #include "unicode/uchar.h" |
michael@0 | 18 | #include "unicode/parsepos.h" |
michael@0 | 19 | |
michael@0 | 20 | #include "umutex.h" |
michael@0 | 21 | |
michael@0 | 22 | #include "rbbirb.h" |
michael@0 | 23 | #include "rbbinode.h" |
michael@0 | 24 | |
michael@0 | 25 | |
michael@0 | 26 | // |
michael@0 | 27 | // RBBISymbolTableEntry_deleter Used by the UHashTable to delete the contents |
michael@0 | 28 | // when the hash table is deleted. |
michael@0 | 29 | // |
michael@0 | 30 | U_CDECL_BEGIN |
michael@0 | 31 | static void U_CALLCONV RBBISymbolTableEntry_deleter(void *p) { |
michael@0 | 32 | icu::RBBISymbolTableEntry *px = (icu::RBBISymbolTableEntry *)p; |
michael@0 | 33 | delete px; |
michael@0 | 34 | } |
michael@0 | 35 | U_CDECL_END |
michael@0 | 36 | |
michael@0 | 37 | |
michael@0 | 38 | |
michael@0 | 39 | U_NAMESPACE_BEGIN |
michael@0 | 40 | |
michael@0 | 41 | RBBISymbolTable::RBBISymbolTable(RBBIRuleScanner *rs, const UnicodeString &rules, UErrorCode &status) |
michael@0 | 42 | :fRules(rules), fRuleScanner(rs), ffffString(UChar(0xffff)) |
michael@0 | 43 | { |
michael@0 | 44 | fHashTable = NULL; |
michael@0 | 45 | fCachedSetLookup = NULL; |
michael@0 | 46 | |
michael@0 | 47 | fHashTable = uhash_open(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, &status); |
michael@0 | 48 | // uhash_open checks status |
michael@0 | 49 | if (U_FAILURE(status)) { |
michael@0 | 50 | return; |
michael@0 | 51 | } |
michael@0 | 52 | uhash_setValueDeleter(fHashTable, RBBISymbolTableEntry_deleter); |
michael@0 | 53 | } |
michael@0 | 54 | |
michael@0 | 55 | |
michael@0 | 56 | |
michael@0 | 57 | RBBISymbolTable::~RBBISymbolTable() |
michael@0 | 58 | { |
michael@0 | 59 | uhash_close(fHashTable); |
michael@0 | 60 | } |
michael@0 | 61 | |
michael@0 | 62 | |
michael@0 | 63 | // |
michael@0 | 64 | // RBBISymbolTable::lookup This function from the abstract symbol table inteface |
michael@0 | 65 | // looks up a variable name and returns a UnicodeString |
michael@0 | 66 | // containing the substitution text. |
michael@0 | 67 | // |
michael@0 | 68 | // The variable name does NOT include the leading $. |
michael@0 | 69 | // |
michael@0 | 70 | const UnicodeString *RBBISymbolTable::lookup(const UnicodeString& s) const |
michael@0 | 71 | { |
michael@0 | 72 | RBBISymbolTableEntry *el; |
michael@0 | 73 | RBBINode *varRefNode; |
michael@0 | 74 | RBBINode *exprNode; |
michael@0 | 75 | RBBINode *usetNode; |
michael@0 | 76 | const UnicodeString *retString; |
michael@0 | 77 | RBBISymbolTable *This = (RBBISymbolTable *)this; // cast off const |
michael@0 | 78 | |
michael@0 | 79 | el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &s); |
michael@0 | 80 | if (el == NULL) { |
michael@0 | 81 | return NULL; |
michael@0 | 82 | } |
michael@0 | 83 | |
michael@0 | 84 | varRefNode = el->val; |
michael@0 | 85 | exprNode = varRefNode->fLeftChild; // Root node of expression for variable |
michael@0 | 86 | if (exprNode->fType == RBBINode::setRef) { |
michael@0 | 87 | // The $variable refers to a single UnicodeSet |
michael@0 | 88 | // return the ffffString, which will subsequently be interpreted as a |
michael@0 | 89 | // stand-in character for the set by RBBISymbolTable::lookupMatcher() |
michael@0 | 90 | usetNode = exprNode->fLeftChild; |
michael@0 | 91 | This->fCachedSetLookup = usetNode->fInputSet; |
michael@0 | 92 | retString = &ffffString; |
michael@0 | 93 | } |
michael@0 | 94 | else |
michael@0 | 95 | { |
michael@0 | 96 | // The variable refers to something other than just a set. |
michael@0 | 97 | // return the original source string for the expression |
michael@0 | 98 | retString = &exprNode->fText; |
michael@0 | 99 | This->fCachedSetLookup = NULL; |
michael@0 | 100 | } |
michael@0 | 101 | return retString; |
michael@0 | 102 | } |
michael@0 | 103 | |
michael@0 | 104 | |
michael@0 | 105 | |
michael@0 | 106 | // |
michael@0 | 107 | // RBBISymbolTable::lookupMatcher This function from the abstract symbol table |
michael@0 | 108 | // interface maps a single stand-in character to a |
michael@0 | 109 | // pointer to a Unicode Set. The Unicode Set code uses this |
michael@0 | 110 | // mechanism to get all references to the same $variable |
michael@0 | 111 | // name to refer to a single common Unicode Set instance. |
michael@0 | 112 | // |
michael@0 | 113 | // This implementation cheats a little, and does not maintain a map of stand-in chars |
michael@0 | 114 | // to sets. Instead, it takes advantage of the fact that the UnicodeSet |
michael@0 | 115 | // constructor will always call this function right after calling lookup(), |
michael@0 | 116 | // and we just need to remember what set to return between these two calls. |
michael@0 | 117 | const UnicodeFunctor *RBBISymbolTable::lookupMatcher(UChar32 ch) const |
michael@0 | 118 | { |
michael@0 | 119 | UnicodeSet *retVal = NULL; |
michael@0 | 120 | RBBISymbolTable *This = (RBBISymbolTable *)this; // cast off const |
michael@0 | 121 | if (ch == 0xffff) { |
michael@0 | 122 | retVal = fCachedSetLookup; |
michael@0 | 123 | This->fCachedSetLookup = 0; |
michael@0 | 124 | } |
michael@0 | 125 | return retVal; |
michael@0 | 126 | } |
michael@0 | 127 | |
michael@0 | 128 | // |
michael@0 | 129 | // RBBISymbolTable::parseReference This function from the abstract symbol table interface |
michael@0 | 130 | // looks for a $variable name in the source text. |
michael@0 | 131 | // It does not look it up, only scans for it. |
michael@0 | 132 | // It is used by the UnicodeSet parser. |
michael@0 | 133 | // |
michael@0 | 134 | // This implementation is lifted pretty much verbatim |
michael@0 | 135 | // from the rules based transliterator implementation. |
michael@0 | 136 | // I didn't see an obvious way of sharing it. |
michael@0 | 137 | // |
michael@0 | 138 | UnicodeString RBBISymbolTable::parseReference(const UnicodeString& text, |
michael@0 | 139 | ParsePosition& pos, int32_t limit) const |
michael@0 | 140 | { |
michael@0 | 141 | int32_t start = pos.getIndex(); |
michael@0 | 142 | int32_t i = start; |
michael@0 | 143 | UnicodeString result; |
michael@0 | 144 | while (i < limit) { |
michael@0 | 145 | UChar c = text.charAt(i); |
michael@0 | 146 | if ((i==start && !u_isIDStart(c)) || !u_isIDPart(c)) { |
michael@0 | 147 | break; |
michael@0 | 148 | } |
michael@0 | 149 | ++i; |
michael@0 | 150 | } |
michael@0 | 151 | if (i == start) { // No valid name chars |
michael@0 | 152 | return result; // Indicate failure with empty string |
michael@0 | 153 | } |
michael@0 | 154 | pos.setIndex(i); |
michael@0 | 155 | text.extractBetween(start, i, result); |
michael@0 | 156 | return result; |
michael@0 | 157 | } |
michael@0 | 158 | |
michael@0 | 159 | |
michael@0 | 160 | |
michael@0 | 161 | // |
michael@0 | 162 | // RBBISymbolTable::lookupNode Given a key (a variable name), return the |
michael@0 | 163 | // corresponding RBBI Node. If there is no entry |
michael@0 | 164 | // in the table for this name, return NULL. |
michael@0 | 165 | // |
michael@0 | 166 | RBBINode *RBBISymbolTable::lookupNode(const UnicodeString &key) const{ |
michael@0 | 167 | |
michael@0 | 168 | RBBINode *retNode = NULL; |
michael@0 | 169 | RBBISymbolTableEntry *el; |
michael@0 | 170 | |
michael@0 | 171 | el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key); |
michael@0 | 172 | if (el != NULL) { |
michael@0 | 173 | retNode = el->val; |
michael@0 | 174 | } |
michael@0 | 175 | return retNode; |
michael@0 | 176 | } |
michael@0 | 177 | |
michael@0 | 178 | |
michael@0 | 179 | // |
michael@0 | 180 | // RBBISymbolTable::addEntry Add a new entry to the symbol table. |
michael@0 | 181 | // Indicate an error if the name already exists - |
michael@0 | 182 | // this will only occur in the case of duplicate |
michael@0 | 183 | // variable assignments. |
michael@0 | 184 | // |
michael@0 | 185 | void RBBISymbolTable::addEntry (const UnicodeString &key, RBBINode *val, UErrorCode &err) { |
michael@0 | 186 | RBBISymbolTableEntry *e; |
michael@0 | 187 | /* test for buffer overflows */ |
michael@0 | 188 | if (U_FAILURE(err)) { |
michael@0 | 189 | return; |
michael@0 | 190 | } |
michael@0 | 191 | e = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key); |
michael@0 | 192 | if (e != NULL) { |
michael@0 | 193 | err = U_BRK_VARIABLE_REDFINITION; |
michael@0 | 194 | return; |
michael@0 | 195 | } |
michael@0 | 196 | |
michael@0 | 197 | e = new RBBISymbolTableEntry; |
michael@0 | 198 | if (e == NULL) { |
michael@0 | 199 | err = U_MEMORY_ALLOCATION_ERROR; |
michael@0 | 200 | return; |
michael@0 | 201 | } |
michael@0 | 202 | e->key = key; |
michael@0 | 203 | e->val = val; |
michael@0 | 204 | uhash_put( fHashTable, &e->key, e, &err); |
michael@0 | 205 | } |
michael@0 | 206 | |
michael@0 | 207 | |
michael@0 | 208 | RBBISymbolTableEntry::RBBISymbolTableEntry() : UMemory(), key(), val(NULL) {} |
michael@0 | 209 | |
michael@0 | 210 | RBBISymbolTableEntry::~RBBISymbolTableEntry() { |
michael@0 | 211 | // The "val" of a symbol table entry is a variable reference node. |
michael@0 | 212 | // The l. child of the val is the rhs expression from the assignment. |
michael@0 | 213 | // Unlike other node types, children of variable reference nodes are not |
michael@0 | 214 | // automatically recursively deleted. We do it manually here. |
michael@0 | 215 | delete val->fLeftChild; |
michael@0 | 216 | val->fLeftChild = NULL; |
michael@0 | 217 | |
michael@0 | 218 | delete val; |
michael@0 | 219 | |
michael@0 | 220 | // Note: the key UnicodeString is destructed by virtue of being in the object by value. |
michael@0 | 221 | } |
michael@0 | 222 | |
michael@0 | 223 | |
michael@0 | 224 | // |
michael@0 | 225 | // RBBISymbolTable::print Debugging function, dump out the symbol table contents. |
michael@0 | 226 | // |
michael@0 | 227 | #ifdef RBBI_DEBUG |
michael@0 | 228 | void RBBISymbolTable::rbbiSymtablePrint() const { |
michael@0 | 229 | RBBIDebugPrintf("Variable Definitions\n" |
michael@0 | 230 | "Name Node Val String Val\n" |
michael@0 | 231 | "----------------------------------------------------------------------\n"); |
michael@0 | 232 | |
michael@0 | 233 | int32_t pos = -1; |
michael@0 | 234 | const UHashElement *e = NULL; |
michael@0 | 235 | for (;;) { |
michael@0 | 236 | e = uhash_nextElement(fHashTable, &pos); |
michael@0 | 237 | if (e == NULL ) { |
michael@0 | 238 | break; |
michael@0 | 239 | } |
michael@0 | 240 | RBBISymbolTableEntry *s = (RBBISymbolTableEntry *)e->value.pointer; |
michael@0 | 241 | |
michael@0 | 242 | RBBI_DEBUG_printUnicodeString(s->key, 15); |
michael@0 | 243 | RBBIDebugPrintf(" %8p ", (void *)s->val); |
michael@0 | 244 | RBBI_DEBUG_printUnicodeString(s->val->fLeftChild->fText); |
michael@0 | 245 | RBBIDebugPrintf("\n"); |
michael@0 | 246 | } |
michael@0 | 247 | |
michael@0 | 248 | RBBIDebugPrintf("\nParsed Variable Definitions\n"); |
michael@0 | 249 | pos = -1; |
michael@0 | 250 | for (;;) { |
michael@0 | 251 | e = uhash_nextElement(fHashTable, &pos); |
michael@0 | 252 | if (e == NULL ) { |
michael@0 | 253 | break; |
michael@0 | 254 | } |
michael@0 | 255 | RBBISymbolTableEntry *s = (RBBISymbolTableEntry *)e->value.pointer; |
michael@0 | 256 | RBBI_DEBUG_printUnicodeString(s->key); |
michael@0 | 257 | s->val->fLeftChild->printTree(TRUE); |
michael@0 | 258 | RBBIDebugPrintf("\n"); |
michael@0 | 259 | } |
michael@0 | 260 | } |
michael@0 | 261 | #endif |
michael@0 | 262 | |
michael@0 | 263 | |
michael@0 | 264 | |
michael@0 | 265 | |
michael@0 | 266 | |
michael@0 | 267 | U_NAMESPACE_END |
michael@0 | 268 | |
michael@0 | 269 | #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ |