1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/rbbistbl.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,269 @@ 1.4 +// 1.5 +// file: rbbistbl.cpp Implementation of the ICU RBBISymbolTable class 1.6 +// 1.7 +/* 1.8 +*************************************************************************** 1.9 +* Copyright (C) 2002-2011 International Business Machines Corporation 1.10 +* and others. All rights reserved. 1.11 +*************************************************************************** 1.12 +*/ 1.13 + 1.14 +#include "unicode/utypes.h" 1.15 + 1.16 +#if !UCONFIG_NO_BREAK_ITERATION 1.17 + 1.18 +#include "unicode/unistr.h" 1.19 +#include "unicode/uniset.h" 1.20 +#include "unicode/uchar.h" 1.21 +#include "unicode/parsepos.h" 1.22 + 1.23 +#include "umutex.h" 1.24 + 1.25 +#include "rbbirb.h" 1.26 +#include "rbbinode.h" 1.27 + 1.28 + 1.29 +// 1.30 +// RBBISymbolTableEntry_deleter Used by the UHashTable to delete the contents 1.31 +// when the hash table is deleted. 1.32 +// 1.33 +U_CDECL_BEGIN 1.34 +static void U_CALLCONV RBBISymbolTableEntry_deleter(void *p) { 1.35 + icu::RBBISymbolTableEntry *px = (icu::RBBISymbolTableEntry *)p; 1.36 + delete px; 1.37 +} 1.38 +U_CDECL_END 1.39 + 1.40 + 1.41 + 1.42 +U_NAMESPACE_BEGIN 1.43 + 1.44 +RBBISymbolTable::RBBISymbolTable(RBBIRuleScanner *rs, const UnicodeString &rules, UErrorCode &status) 1.45 + :fRules(rules), fRuleScanner(rs), ffffString(UChar(0xffff)) 1.46 +{ 1.47 + fHashTable = NULL; 1.48 + fCachedSetLookup = NULL; 1.49 + 1.50 + fHashTable = uhash_open(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, &status); 1.51 + // uhash_open checks status 1.52 + if (U_FAILURE(status)) { 1.53 + return; 1.54 + } 1.55 + uhash_setValueDeleter(fHashTable, RBBISymbolTableEntry_deleter); 1.56 +} 1.57 + 1.58 + 1.59 + 1.60 +RBBISymbolTable::~RBBISymbolTable() 1.61 +{ 1.62 + uhash_close(fHashTable); 1.63 +} 1.64 + 1.65 + 1.66 +// 1.67 +// RBBISymbolTable::lookup This function from the abstract symbol table inteface 1.68 +// looks up a variable name and returns a UnicodeString 1.69 +// containing the substitution text. 1.70 +// 1.71 +// The variable name does NOT include the leading $. 1.72 +// 1.73 +const UnicodeString *RBBISymbolTable::lookup(const UnicodeString& s) const 1.74 +{ 1.75 + RBBISymbolTableEntry *el; 1.76 + RBBINode *varRefNode; 1.77 + RBBINode *exprNode; 1.78 + RBBINode *usetNode; 1.79 + const UnicodeString *retString; 1.80 + RBBISymbolTable *This = (RBBISymbolTable *)this; // cast off const 1.81 + 1.82 + el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &s); 1.83 + if (el == NULL) { 1.84 + return NULL; 1.85 + } 1.86 + 1.87 + varRefNode = el->val; 1.88 + exprNode = varRefNode->fLeftChild; // Root node of expression for variable 1.89 + if (exprNode->fType == RBBINode::setRef) { 1.90 + // The $variable refers to a single UnicodeSet 1.91 + // return the ffffString, which will subsequently be interpreted as a 1.92 + // stand-in character for the set by RBBISymbolTable::lookupMatcher() 1.93 + usetNode = exprNode->fLeftChild; 1.94 + This->fCachedSetLookup = usetNode->fInputSet; 1.95 + retString = &ffffString; 1.96 + } 1.97 + else 1.98 + { 1.99 + // The variable refers to something other than just a set. 1.100 + // return the original source string for the expression 1.101 + retString = &exprNode->fText; 1.102 + This->fCachedSetLookup = NULL; 1.103 + } 1.104 + return retString; 1.105 +} 1.106 + 1.107 + 1.108 + 1.109 +// 1.110 +// RBBISymbolTable::lookupMatcher This function from the abstract symbol table 1.111 +// interface maps a single stand-in character to a 1.112 +// pointer to a Unicode Set. The Unicode Set code uses this 1.113 +// mechanism to get all references to the same $variable 1.114 +// name to refer to a single common Unicode Set instance. 1.115 +// 1.116 +// This implementation cheats a little, and does not maintain a map of stand-in chars 1.117 +// to sets. Instead, it takes advantage of the fact that the UnicodeSet 1.118 +// constructor will always call this function right after calling lookup(), 1.119 +// and we just need to remember what set to return between these two calls. 1.120 +const UnicodeFunctor *RBBISymbolTable::lookupMatcher(UChar32 ch) const 1.121 +{ 1.122 + UnicodeSet *retVal = NULL; 1.123 + RBBISymbolTable *This = (RBBISymbolTable *)this; // cast off const 1.124 + if (ch == 0xffff) { 1.125 + retVal = fCachedSetLookup; 1.126 + This->fCachedSetLookup = 0; 1.127 + } 1.128 + return retVal; 1.129 +} 1.130 + 1.131 +// 1.132 +// RBBISymbolTable::parseReference This function from the abstract symbol table interface 1.133 +// looks for a $variable name in the source text. 1.134 +// It does not look it up, only scans for it. 1.135 +// It is used by the UnicodeSet parser. 1.136 +// 1.137 +// This implementation is lifted pretty much verbatim 1.138 +// from the rules based transliterator implementation. 1.139 +// I didn't see an obvious way of sharing it. 1.140 +// 1.141 +UnicodeString RBBISymbolTable::parseReference(const UnicodeString& text, 1.142 + ParsePosition& pos, int32_t limit) const 1.143 +{ 1.144 + int32_t start = pos.getIndex(); 1.145 + int32_t i = start; 1.146 + UnicodeString result; 1.147 + while (i < limit) { 1.148 + UChar c = text.charAt(i); 1.149 + if ((i==start && !u_isIDStart(c)) || !u_isIDPart(c)) { 1.150 + break; 1.151 + } 1.152 + ++i; 1.153 + } 1.154 + if (i == start) { // No valid name chars 1.155 + return result; // Indicate failure with empty string 1.156 + } 1.157 + pos.setIndex(i); 1.158 + text.extractBetween(start, i, result); 1.159 + return result; 1.160 +} 1.161 + 1.162 + 1.163 + 1.164 +// 1.165 +// RBBISymbolTable::lookupNode Given a key (a variable name), return the 1.166 +// corresponding RBBI Node. If there is no entry 1.167 +// in the table for this name, return NULL. 1.168 +// 1.169 +RBBINode *RBBISymbolTable::lookupNode(const UnicodeString &key) const{ 1.170 + 1.171 + RBBINode *retNode = NULL; 1.172 + RBBISymbolTableEntry *el; 1.173 + 1.174 + el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key); 1.175 + if (el != NULL) { 1.176 + retNode = el->val; 1.177 + } 1.178 + return retNode; 1.179 +} 1.180 + 1.181 + 1.182 +// 1.183 +// RBBISymbolTable::addEntry Add a new entry to the symbol table. 1.184 +// Indicate an error if the name already exists - 1.185 +// this will only occur in the case of duplicate 1.186 +// variable assignments. 1.187 +// 1.188 +void RBBISymbolTable::addEntry (const UnicodeString &key, RBBINode *val, UErrorCode &err) { 1.189 + RBBISymbolTableEntry *e; 1.190 + /* test for buffer overflows */ 1.191 + if (U_FAILURE(err)) { 1.192 + return; 1.193 + } 1.194 + e = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key); 1.195 + if (e != NULL) { 1.196 + err = U_BRK_VARIABLE_REDFINITION; 1.197 + return; 1.198 + } 1.199 + 1.200 + e = new RBBISymbolTableEntry; 1.201 + if (e == NULL) { 1.202 + err = U_MEMORY_ALLOCATION_ERROR; 1.203 + return; 1.204 + } 1.205 + e->key = key; 1.206 + e->val = val; 1.207 + uhash_put( fHashTable, &e->key, e, &err); 1.208 +} 1.209 + 1.210 + 1.211 +RBBISymbolTableEntry::RBBISymbolTableEntry() : UMemory(), key(), val(NULL) {} 1.212 + 1.213 +RBBISymbolTableEntry::~RBBISymbolTableEntry() { 1.214 + // The "val" of a symbol table entry is a variable reference node. 1.215 + // The l. child of the val is the rhs expression from the assignment. 1.216 + // Unlike other node types, children of variable reference nodes are not 1.217 + // automatically recursively deleted. We do it manually here. 1.218 + delete val->fLeftChild; 1.219 + val->fLeftChild = NULL; 1.220 + 1.221 + delete val; 1.222 + 1.223 + // Note: the key UnicodeString is destructed by virtue of being in the object by value. 1.224 +} 1.225 + 1.226 + 1.227 +// 1.228 +// RBBISymbolTable::print Debugging function, dump out the symbol table contents. 1.229 +// 1.230 +#ifdef RBBI_DEBUG 1.231 +void RBBISymbolTable::rbbiSymtablePrint() const { 1.232 + RBBIDebugPrintf("Variable Definitions\n" 1.233 + "Name Node Val String Val\n" 1.234 + "----------------------------------------------------------------------\n"); 1.235 + 1.236 + int32_t pos = -1; 1.237 + const UHashElement *e = NULL; 1.238 + for (;;) { 1.239 + e = uhash_nextElement(fHashTable, &pos); 1.240 + if (e == NULL ) { 1.241 + break; 1.242 + } 1.243 + RBBISymbolTableEntry *s = (RBBISymbolTableEntry *)e->value.pointer; 1.244 + 1.245 + RBBI_DEBUG_printUnicodeString(s->key, 15); 1.246 + RBBIDebugPrintf(" %8p ", (void *)s->val); 1.247 + RBBI_DEBUG_printUnicodeString(s->val->fLeftChild->fText); 1.248 + RBBIDebugPrintf("\n"); 1.249 + } 1.250 + 1.251 + RBBIDebugPrintf("\nParsed Variable Definitions\n"); 1.252 + pos = -1; 1.253 + for (;;) { 1.254 + e = uhash_nextElement(fHashTable, &pos); 1.255 + if (e == NULL ) { 1.256 + break; 1.257 + } 1.258 + RBBISymbolTableEntry *s = (RBBISymbolTableEntry *)e->value.pointer; 1.259 + RBBI_DEBUG_printUnicodeString(s->key); 1.260 + s->val->fLeftChild->printTree(TRUE); 1.261 + RBBIDebugPrintf("\n"); 1.262 + } 1.263 +} 1.264 +#endif 1.265 + 1.266 + 1.267 + 1.268 + 1.269 + 1.270 +U_NAMESPACE_END 1.271 + 1.272 +#endif /* #if !UCONFIG_NO_BREAK_ITERATION */