intl/icu/source/common/rbbistbl.cpp

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rw-r--r--

Correct previous dual key logic pending first delivery installment.

michael@0 1 //
michael@0 2 // file: rbbistbl.cpp Implementation of the ICU RBBISymbolTable class
michael@0 3 //
michael@0 4 /*
michael@0 5 ***************************************************************************
michael@0 6 * Copyright (C) 2002-2011 International Business Machines Corporation
michael@0 7 * and others. All rights reserved.
michael@0 8 ***************************************************************************
michael@0 9 */
michael@0 10
michael@0 11 #include "unicode/utypes.h"
michael@0 12
michael@0 13 #if !UCONFIG_NO_BREAK_ITERATION
michael@0 14
michael@0 15 #include "unicode/unistr.h"
michael@0 16 #include "unicode/uniset.h"
michael@0 17 #include "unicode/uchar.h"
michael@0 18 #include "unicode/parsepos.h"
michael@0 19
michael@0 20 #include "umutex.h"
michael@0 21
michael@0 22 #include "rbbirb.h"
michael@0 23 #include "rbbinode.h"
michael@0 24
michael@0 25
michael@0 26 //
michael@0 27 // RBBISymbolTableEntry_deleter Used by the UHashTable to delete the contents
michael@0 28 // when the hash table is deleted.
michael@0 29 //
michael@0 30 U_CDECL_BEGIN
michael@0 31 static void U_CALLCONV RBBISymbolTableEntry_deleter(void *p) {
michael@0 32 icu::RBBISymbolTableEntry *px = (icu::RBBISymbolTableEntry *)p;
michael@0 33 delete px;
michael@0 34 }
michael@0 35 U_CDECL_END
michael@0 36
michael@0 37
michael@0 38
michael@0 39 U_NAMESPACE_BEGIN
michael@0 40
michael@0 41 RBBISymbolTable::RBBISymbolTable(RBBIRuleScanner *rs, const UnicodeString &rules, UErrorCode &status)
michael@0 42 :fRules(rules), fRuleScanner(rs), ffffString(UChar(0xffff))
michael@0 43 {
michael@0 44 fHashTable = NULL;
michael@0 45 fCachedSetLookup = NULL;
michael@0 46
michael@0 47 fHashTable = uhash_open(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, &status);
michael@0 48 // uhash_open checks status
michael@0 49 if (U_FAILURE(status)) {
michael@0 50 return;
michael@0 51 }
michael@0 52 uhash_setValueDeleter(fHashTable, RBBISymbolTableEntry_deleter);
michael@0 53 }
michael@0 54
michael@0 55
michael@0 56
michael@0 57 RBBISymbolTable::~RBBISymbolTable()
michael@0 58 {
michael@0 59 uhash_close(fHashTable);
michael@0 60 }
michael@0 61
michael@0 62
michael@0 63 //
michael@0 64 // RBBISymbolTable::lookup This function from the abstract symbol table inteface
michael@0 65 // looks up a variable name and returns a UnicodeString
michael@0 66 // containing the substitution text.
michael@0 67 //
michael@0 68 // The variable name does NOT include the leading $.
michael@0 69 //
michael@0 70 const UnicodeString *RBBISymbolTable::lookup(const UnicodeString& s) const
michael@0 71 {
michael@0 72 RBBISymbolTableEntry *el;
michael@0 73 RBBINode *varRefNode;
michael@0 74 RBBINode *exprNode;
michael@0 75 RBBINode *usetNode;
michael@0 76 const UnicodeString *retString;
michael@0 77 RBBISymbolTable *This = (RBBISymbolTable *)this; // cast off const
michael@0 78
michael@0 79 el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &s);
michael@0 80 if (el == NULL) {
michael@0 81 return NULL;
michael@0 82 }
michael@0 83
michael@0 84 varRefNode = el->val;
michael@0 85 exprNode = varRefNode->fLeftChild; // Root node of expression for variable
michael@0 86 if (exprNode->fType == RBBINode::setRef) {
michael@0 87 // The $variable refers to a single UnicodeSet
michael@0 88 // return the ffffString, which will subsequently be interpreted as a
michael@0 89 // stand-in character for the set by RBBISymbolTable::lookupMatcher()
michael@0 90 usetNode = exprNode->fLeftChild;
michael@0 91 This->fCachedSetLookup = usetNode->fInputSet;
michael@0 92 retString = &ffffString;
michael@0 93 }
michael@0 94 else
michael@0 95 {
michael@0 96 // The variable refers to something other than just a set.
michael@0 97 // return the original source string for the expression
michael@0 98 retString = &exprNode->fText;
michael@0 99 This->fCachedSetLookup = NULL;
michael@0 100 }
michael@0 101 return retString;
michael@0 102 }
michael@0 103
michael@0 104
michael@0 105
michael@0 106 //
michael@0 107 // RBBISymbolTable::lookupMatcher This function from the abstract symbol table
michael@0 108 // interface maps a single stand-in character to a
michael@0 109 // pointer to a Unicode Set. The Unicode Set code uses this
michael@0 110 // mechanism to get all references to the same $variable
michael@0 111 // name to refer to a single common Unicode Set instance.
michael@0 112 //
michael@0 113 // This implementation cheats a little, and does not maintain a map of stand-in chars
michael@0 114 // to sets. Instead, it takes advantage of the fact that the UnicodeSet
michael@0 115 // constructor will always call this function right after calling lookup(),
michael@0 116 // and we just need to remember what set to return between these two calls.
michael@0 117 const UnicodeFunctor *RBBISymbolTable::lookupMatcher(UChar32 ch) const
michael@0 118 {
michael@0 119 UnicodeSet *retVal = NULL;
michael@0 120 RBBISymbolTable *This = (RBBISymbolTable *)this; // cast off const
michael@0 121 if (ch == 0xffff) {
michael@0 122 retVal = fCachedSetLookup;
michael@0 123 This->fCachedSetLookup = 0;
michael@0 124 }
michael@0 125 return retVal;
michael@0 126 }
michael@0 127
michael@0 128 //
michael@0 129 // RBBISymbolTable::parseReference This function from the abstract symbol table interface
michael@0 130 // looks for a $variable name in the source text.
michael@0 131 // It does not look it up, only scans for it.
michael@0 132 // It is used by the UnicodeSet parser.
michael@0 133 //
michael@0 134 // This implementation is lifted pretty much verbatim
michael@0 135 // from the rules based transliterator implementation.
michael@0 136 // I didn't see an obvious way of sharing it.
michael@0 137 //
michael@0 138 UnicodeString RBBISymbolTable::parseReference(const UnicodeString& text,
michael@0 139 ParsePosition& pos, int32_t limit) const
michael@0 140 {
michael@0 141 int32_t start = pos.getIndex();
michael@0 142 int32_t i = start;
michael@0 143 UnicodeString result;
michael@0 144 while (i < limit) {
michael@0 145 UChar c = text.charAt(i);
michael@0 146 if ((i==start && !u_isIDStart(c)) || !u_isIDPart(c)) {
michael@0 147 break;
michael@0 148 }
michael@0 149 ++i;
michael@0 150 }
michael@0 151 if (i == start) { // No valid name chars
michael@0 152 return result; // Indicate failure with empty string
michael@0 153 }
michael@0 154 pos.setIndex(i);
michael@0 155 text.extractBetween(start, i, result);
michael@0 156 return result;
michael@0 157 }
michael@0 158
michael@0 159
michael@0 160
michael@0 161 //
michael@0 162 // RBBISymbolTable::lookupNode Given a key (a variable name), return the
michael@0 163 // corresponding RBBI Node. If there is no entry
michael@0 164 // in the table for this name, return NULL.
michael@0 165 //
michael@0 166 RBBINode *RBBISymbolTable::lookupNode(const UnicodeString &key) const{
michael@0 167
michael@0 168 RBBINode *retNode = NULL;
michael@0 169 RBBISymbolTableEntry *el;
michael@0 170
michael@0 171 el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key);
michael@0 172 if (el != NULL) {
michael@0 173 retNode = el->val;
michael@0 174 }
michael@0 175 return retNode;
michael@0 176 }
michael@0 177
michael@0 178
michael@0 179 //
michael@0 180 // RBBISymbolTable::addEntry Add a new entry to the symbol table.
michael@0 181 // Indicate an error if the name already exists -
michael@0 182 // this will only occur in the case of duplicate
michael@0 183 // variable assignments.
michael@0 184 //
michael@0 185 void RBBISymbolTable::addEntry (const UnicodeString &key, RBBINode *val, UErrorCode &err) {
michael@0 186 RBBISymbolTableEntry *e;
michael@0 187 /* test for buffer overflows */
michael@0 188 if (U_FAILURE(err)) {
michael@0 189 return;
michael@0 190 }
michael@0 191 e = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key);
michael@0 192 if (e != NULL) {
michael@0 193 err = U_BRK_VARIABLE_REDFINITION;
michael@0 194 return;
michael@0 195 }
michael@0 196
michael@0 197 e = new RBBISymbolTableEntry;
michael@0 198 if (e == NULL) {
michael@0 199 err = U_MEMORY_ALLOCATION_ERROR;
michael@0 200 return;
michael@0 201 }
michael@0 202 e->key = key;
michael@0 203 e->val = val;
michael@0 204 uhash_put( fHashTable, &e->key, e, &err);
michael@0 205 }
michael@0 206
michael@0 207
michael@0 208 RBBISymbolTableEntry::RBBISymbolTableEntry() : UMemory(), key(), val(NULL) {}
michael@0 209
michael@0 210 RBBISymbolTableEntry::~RBBISymbolTableEntry() {
michael@0 211 // The "val" of a symbol table entry is a variable reference node.
michael@0 212 // The l. child of the val is the rhs expression from the assignment.
michael@0 213 // Unlike other node types, children of variable reference nodes are not
michael@0 214 // automatically recursively deleted. We do it manually here.
michael@0 215 delete val->fLeftChild;
michael@0 216 val->fLeftChild = NULL;
michael@0 217
michael@0 218 delete val;
michael@0 219
michael@0 220 // Note: the key UnicodeString is destructed by virtue of being in the object by value.
michael@0 221 }
michael@0 222
michael@0 223
michael@0 224 //
michael@0 225 // RBBISymbolTable::print Debugging function, dump out the symbol table contents.
michael@0 226 //
michael@0 227 #ifdef RBBI_DEBUG
michael@0 228 void RBBISymbolTable::rbbiSymtablePrint() const {
michael@0 229 RBBIDebugPrintf("Variable Definitions\n"
michael@0 230 "Name Node Val String Val\n"
michael@0 231 "----------------------------------------------------------------------\n");
michael@0 232
michael@0 233 int32_t pos = -1;
michael@0 234 const UHashElement *e = NULL;
michael@0 235 for (;;) {
michael@0 236 e = uhash_nextElement(fHashTable, &pos);
michael@0 237 if (e == NULL ) {
michael@0 238 break;
michael@0 239 }
michael@0 240 RBBISymbolTableEntry *s = (RBBISymbolTableEntry *)e->value.pointer;
michael@0 241
michael@0 242 RBBI_DEBUG_printUnicodeString(s->key, 15);
michael@0 243 RBBIDebugPrintf(" %8p ", (void *)s->val);
michael@0 244 RBBI_DEBUG_printUnicodeString(s->val->fLeftChild->fText);
michael@0 245 RBBIDebugPrintf("\n");
michael@0 246 }
michael@0 247
michael@0 248 RBBIDebugPrintf("\nParsed Variable Definitions\n");
michael@0 249 pos = -1;
michael@0 250 for (;;) {
michael@0 251 e = uhash_nextElement(fHashTable, &pos);
michael@0 252 if (e == NULL ) {
michael@0 253 break;
michael@0 254 }
michael@0 255 RBBISymbolTableEntry *s = (RBBISymbolTableEntry *)e->value.pointer;
michael@0 256 RBBI_DEBUG_printUnicodeString(s->key);
michael@0 257 s->val->fLeftChild->printTree(TRUE);
michael@0 258 RBBIDebugPrintf("\n");
michael@0 259 }
michael@0 260 }
michael@0 261 #endif
michael@0 262
michael@0 263
michael@0 264
michael@0 265
michael@0 266
michael@0 267 U_NAMESPACE_END
michael@0 268
michael@0 269 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */

mercurial