intl/icu/source/common/rbbistbl.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/common/rbbistbl.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,269 @@
     1.4 +//
     1.5 +//  file:  rbbistbl.cpp    Implementation of the ICU RBBISymbolTable class
     1.6 +//
     1.7 +/*
     1.8 +***************************************************************************
     1.9 +*   Copyright (C) 2002-2011 International Business Machines Corporation
    1.10 +*   and others. All rights reserved.
    1.11 +***************************************************************************
    1.12 +*/
    1.13 +
    1.14 +#include "unicode/utypes.h"
    1.15 +
    1.16 +#if !UCONFIG_NO_BREAK_ITERATION
    1.17 +
    1.18 +#include "unicode/unistr.h"
    1.19 +#include "unicode/uniset.h"
    1.20 +#include "unicode/uchar.h"
    1.21 +#include "unicode/parsepos.h"
    1.22 +
    1.23 +#include "umutex.h"
    1.24 +
    1.25 +#include "rbbirb.h"
    1.26 +#include "rbbinode.h"
    1.27 +
    1.28 +
    1.29 +//
    1.30 +//  RBBISymbolTableEntry_deleter    Used by the UHashTable to delete the contents
    1.31 +//                                  when the hash table is deleted.
    1.32 +//
    1.33 +U_CDECL_BEGIN
    1.34 +static void U_CALLCONV RBBISymbolTableEntry_deleter(void *p) {
    1.35 +    icu::RBBISymbolTableEntry *px = (icu::RBBISymbolTableEntry *)p;
    1.36 +    delete px;
    1.37 +}
    1.38 +U_CDECL_END
    1.39 +
    1.40 +
    1.41 +
    1.42 +U_NAMESPACE_BEGIN
    1.43 +
    1.44 +RBBISymbolTable::RBBISymbolTable(RBBIRuleScanner *rs, const UnicodeString &rules, UErrorCode &status)
    1.45 +    :fRules(rules), fRuleScanner(rs), ffffString(UChar(0xffff))
    1.46 +{
    1.47 +    fHashTable       = NULL;
    1.48 +    fCachedSetLookup = NULL;
    1.49 +    
    1.50 +    fHashTable = uhash_open(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, &status);
    1.51 +    // uhash_open checks status
    1.52 +    if (U_FAILURE(status)) {
    1.53 +        return;
    1.54 +    }
    1.55 +    uhash_setValueDeleter(fHashTable, RBBISymbolTableEntry_deleter);
    1.56 +}
    1.57 +
    1.58 +
    1.59 +
    1.60 +RBBISymbolTable::~RBBISymbolTable()
    1.61 +{
    1.62 +    uhash_close(fHashTable);
    1.63 +}
    1.64 +
    1.65 +
    1.66 +//
    1.67 +//  RBBISymbolTable::lookup       This function from the abstract symbol table inteface
    1.68 +//                                looks up a variable name and returns a UnicodeString
    1.69 +//                                containing the substitution text.
    1.70 +//
    1.71 +//                                The variable name does NOT include the leading $.
    1.72 +//
    1.73 +const UnicodeString  *RBBISymbolTable::lookup(const UnicodeString& s) const
    1.74 +{
    1.75 +    RBBISymbolTableEntry  *el;
    1.76 +    RBBINode              *varRefNode;
    1.77 +    RBBINode              *exprNode;
    1.78 +    RBBINode              *usetNode;
    1.79 +    const UnicodeString   *retString;
    1.80 +    RBBISymbolTable       *This = (RBBISymbolTable *)this;   // cast off const
    1.81 +
    1.82 +    el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &s);
    1.83 +    if (el == NULL) {
    1.84 +        return NULL;
    1.85 +    }
    1.86 +
    1.87 +    varRefNode = el->val;
    1.88 +    exprNode   = varRefNode->fLeftChild;     // Root node of expression for variable
    1.89 +    if (exprNode->fType == RBBINode::setRef) {
    1.90 +        // The $variable refers to a single UnicodeSet
    1.91 +        //   return the ffffString, which will subsequently be interpreted as a
    1.92 +        //   stand-in character for the set by RBBISymbolTable::lookupMatcher()
    1.93 +        usetNode = exprNode->fLeftChild;
    1.94 +        This->fCachedSetLookup = usetNode->fInputSet;
    1.95 +        retString = &ffffString;
    1.96 +    }
    1.97 +    else
    1.98 +    {
    1.99 +        // The variable refers to something other than just a set.
   1.100 +        // return the original source string for the expression
   1.101 +        retString = &exprNode->fText;
   1.102 +        This->fCachedSetLookup = NULL;
   1.103 +    }
   1.104 +    return retString;
   1.105 +}
   1.106 +
   1.107 +
   1.108 +
   1.109 +//
   1.110 +//  RBBISymbolTable::lookupMatcher   This function from the abstract symbol table
   1.111 +//                                   interface maps a single stand-in character to a
   1.112 +//                                   pointer to a Unicode Set.   The Unicode Set code uses this
   1.113 +//                                   mechanism to get all references to the same $variable
   1.114 +//                                   name to refer to a single common Unicode Set instance.
   1.115 +//
   1.116 +//    This implementation cheats a little, and does not maintain a map of stand-in chars
   1.117 +//    to sets.  Instead, it takes advantage of the fact that  the UnicodeSet
   1.118 +//    constructor will always call this function right after calling lookup(),
   1.119 +//    and we just need to remember what set to return between these two calls.
   1.120 +const UnicodeFunctor *RBBISymbolTable::lookupMatcher(UChar32 ch) const
   1.121 +{
   1.122 +    UnicodeSet *retVal = NULL;
   1.123 +    RBBISymbolTable *This = (RBBISymbolTable *)this;   // cast off const
   1.124 +    if (ch == 0xffff) {
   1.125 +        retVal = fCachedSetLookup;
   1.126 +        This->fCachedSetLookup = 0;
   1.127 +    }
   1.128 +    return retVal;
   1.129 +}
   1.130 +
   1.131 +//
   1.132 +// RBBISymbolTable::parseReference   This function from the abstract symbol table interface
   1.133 +//                                   looks for a $variable name in the source text.
   1.134 +//                                   It does not look it up, only scans for it.
   1.135 +//                                   It is used by the UnicodeSet parser.
   1.136 +//
   1.137 +//                                   This implementation is lifted pretty much verbatim
   1.138 +//                                   from the rules based transliterator implementation.
   1.139 +//                                   I didn't see an obvious way of sharing it.
   1.140 +//
   1.141 +UnicodeString   RBBISymbolTable::parseReference(const UnicodeString& text,
   1.142 +                                                ParsePosition& pos, int32_t limit) const
   1.143 +{
   1.144 +    int32_t start = pos.getIndex();
   1.145 +    int32_t i = start;
   1.146 +    UnicodeString result;
   1.147 +    while (i < limit) {
   1.148 +        UChar c = text.charAt(i);
   1.149 +        if ((i==start && !u_isIDStart(c)) || !u_isIDPart(c)) {
   1.150 +            break;
   1.151 +        }
   1.152 +        ++i;
   1.153 +    }
   1.154 +    if (i == start) { // No valid name chars
   1.155 +        return result; // Indicate failure with empty string
   1.156 +    }
   1.157 +    pos.setIndex(i);
   1.158 +    text.extractBetween(start, i, result);
   1.159 +    return result;
   1.160 +}
   1.161 +
   1.162 +
   1.163 +
   1.164 +//
   1.165 +// RBBISymbolTable::lookupNode      Given a key (a variable name), return the
   1.166 +//                                  corresponding RBBI Node.  If there is no entry
   1.167 +//                                  in the table for this name, return NULL.
   1.168 +//
   1.169 +RBBINode       *RBBISymbolTable::lookupNode(const UnicodeString &key) const{
   1.170 +
   1.171 +    RBBINode             *retNode = NULL;
   1.172 +    RBBISymbolTableEntry *el;
   1.173 +
   1.174 +    el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key);
   1.175 +    if (el != NULL) {
   1.176 +        retNode = el->val;
   1.177 +    }
   1.178 +    return retNode;
   1.179 +}
   1.180 +
   1.181 +
   1.182 +//
   1.183 +//    RBBISymbolTable::addEntry     Add a new entry to the symbol table.
   1.184 +//                                  Indicate an error if the name already exists -
   1.185 +//                                    this will only occur in the case of duplicate
   1.186 +//                                    variable assignments.
   1.187 +//
   1.188 +void            RBBISymbolTable::addEntry  (const UnicodeString &key, RBBINode *val, UErrorCode &err) {
   1.189 +    RBBISymbolTableEntry *e;
   1.190 +    /* test for buffer overflows */
   1.191 +    if (U_FAILURE(err)) {
   1.192 +        return;
   1.193 +    }
   1.194 +    e = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key);
   1.195 +    if (e != NULL) {
   1.196 +        err = U_BRK_VARIABLE_REDFINITION;
   1.197 +        return;
   1.198 +    }
   1.199 +
   1.200 +    e = new RBBISymbolTableEntry;
   1.201 +    if (e == NULL) {
   1.202 +        err = U_MEMORY_ALLOCATION_ERROR;
   1.203 +        return;
   1.204 +    }
   1.205 +    e->key = key;
   1.206 +    e->val = val;
   1.207 +    uhash_put( fHashTable, &e->key, e, &err);
   1.208 +}
   1.209 +
   1.210 +
   1.211 +RBBISymbolTableEntry::RBBISymbolTableEntry() : UMemory(), key(), val(NULL) {}
   1.212 +
   1.213 +RBBISymbolTableEntry::~RBBISymbolTableEntry() {
   1.214 +    // The "val" of a symbol table entry is a variable reference node.
   1.215 +    // The l. child of the val is the rhs expression from the assignment.
   1.216 +    // Unlike other node types, children of variable reference nodes are not
   1.217 +    //    automatically recursively deleted.  We do it manually here.
   1.218 +    delete val->fLeftChild;
   1.219 +    val->fLeftChild = NULL;
   1.220 +
   1.221 +    delete  val;
   1.222 +
   1.223 +    // Note: the key UnicodeString is destructed by virtue of being in the object by value.
   1.224 +}
   1.225 +
   1.226 +
   1.227 +//
   1.228 +//  RBBISymbolTable::print    Debugging function, dump out the symbol table contents.
   1.229 +//
   1.230 +#ifdef RBBI_DEBUG
   1.231 +void RBBISymbolTable::rbbiSymtablePrint() const {
   1.232 +    RBBIDebugPrintf("Variable Definitions\n"
   1.233 +           "Name               Node Val     String Val\n"
   1.234 +           "----------------------------------------------------------------------\n");
   1.235 +
   1.236 +    int32_t pos = -1;
   1.237 +    const UHashElement  *e   = NULL;
   1.238 +    for (;;) {
   1.239 +        e = uhash_nextElement(fHashTable,  &pos);
   1.240 +        if (e == NULL ) {
   1.241 +            break;
   1.242 +        }
   1.243 +        RBBISymbolTableEntry  *s   = (RBBISymbolTableEntry *)e->value.pointer;
   1.244 +
   1.245 +        RBBI_DEBUG_printUnicodeString(s->key, 15);
   1.246 +        RBBIDebugPrintf("   %8p   ", (void *)s->val);
   1.247 +        RBBI_DEBUG_printUnicodeString(s->val->fLeftChild->fText);
   1.248 +        RBBIDebugPrintf("\n");
   1.249 +    }
   1.250 +
   1.251 +    RBBIDebugPrintf("\nParsed Variable Definitions\n");
   1.252 +    pos = -1;
   1.253 +    for (;;) {
   1.254 +        e = uhash_nextElement(fHashTable,  &pos);
   1.255 +        if (e == NULL ) {
   1.256 +            break;
   1.257 +        }
   1.258 +        RBBISymbolTableEntry  *s   = (RBBISymbolTableEntry *)e->value.pointer;
   1.259 +        RBBI_DEBUG_printUnicodeString(s->key);
   1.260 +        s->val->fLeftChild->printTree(TRUE);
   1.261 +        RBBIDebugPrintf("\n");
   1.262 +    }
   1.263 +}
   1.264 +#endif
   1.265 +
   1.266 +
   1.267 +
   1.268 +
   1.269 +
   1.270 +U_NAMESPACE_END
   1.271 +
   1.272 +#endif /* #if !UCONFIG_NO_BREAK_ITERATION */

mercurial