intl/icu/source/common/rbbirb.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 //
michael@0 2 // rbbirb.h
michael@0 3 //
michael@0 4 // Copyright (C) 2002-2008, International Business Machines Corporation and others.
michael@0 5 // All Rights Reserved.
michael@0 6 //
michael@0 7 // This file contains declarations for several classes from the
michael@0 8 // Rule Based Break Iterator rule builder.
michael@0 9 //
michael@0 10
michael@0 11
michael@0 12 #ifndef RBBIRB_H
michael@0 13 #define RBBIRB_H
michael@0 14
michael@0 15 #include "unicode/utypes.h"
michael@0 16 #include "unicode/uobject.h"
michael@0 17 #include "unicode/rbbi.h"
michael@0 18 #include "unicode/uniset.h"
michael@0 19 #include "unicode/parseerr.h"
michael@0 20 #include "uhash.h"
michael@0 21 #include "uvector.h"
michael@0 22 #include "unicode/symtable.h"// For UnicodeSet parsing, is the interface that
michael@0 23 // looks up references to $variables within a set.
michael@0 24
michael@0 25
michael@0 26
michael@0 27 U_NAMESPACE_BEGIN
michael@0 28
michael@0 29 class RBBIRuleScanner;
michael@0 30 struct RBBIRuleTableEl;
michael@0 31 class RBBISetBuilder;
michael@0 32 class RBBINode;
michael@0 33 class RBBITableBuilder;
michael@0 34
michael@0 35
michael@0 36
michael@0 37 //--------------------------------------------------------------------------------
michael@0 38 //
michael@0 39 // RBBISymbolTable. Implements SymbolTable interface that is used by the
michael@0 40 // UnicodeSet parser to resolve references to $variables.
michael@0 41 //
michael@0 42 //--------------------------------------------------------------------------------
michael@0 43 class RBBISymbolTableEntry : public UMemory { // The symbol table hash table contains one
michael@0 44 public: // of these structs for each entry.
michael@0 45 RBBISymbolTableEntry();
michael@0 46 UnicodeString key;
michael@0 47 RBBINode *val;
michael@0 48 ~RBBISymbolTableEntry();
michael@0 49
michael@0 50 private:
michael@0 51 RBBISymbolTableEntry(const RBBISymbolTableEntry &other); // forbid copying of this class
michael@0 52 RBBISymbolTableEntry &operator=(const RBBISymbolTableEntry &other); // forbid copying of this class
michael@0 53 };
michael@0 54
michael@0 55
michael@0 56 class RBBISymbolTable : public UMemory, public SymbolTable {
michael@0 57 private:
michael@0 58 const UnicodeString &fRules;
michael@0 59 UHashtable *fHashTable;
michael@0 60 RBBIRuleScanner *fRuleScanner;
michael@0 61
michael@0 62 // These next two fields are part of the mechanism for passing references to
michael@0 63 // already-constructed UnicodeSets back to the UnicodeSet constructor
michael@0 64 // when the pattern includes $variable references.
michael@0 65 const UnicodeString ffffString; // = "/uffff"
michael@0 66 UnicodeSet *fCachedSetLookup;
michael@0 67
michael@0 68 public:
michael@0 69 // API inherited from class SymbolTable
michael@0 70 virtual const UnicodeString* lookup(const UnicodeString& s) const;
michael@0 71 virtual const UnicodeFunctor* lookupMatcher(UChar32 ch) const;
michael@0 72 virtual UnicodeString parseReference(const UnicodeString& text,
michael@0 73 ParsePosition& pos, int32_t limit) const;
michael@0 74
michael@0 75 // Additional Functions
michael@0 76 RBBISymbolTable(RBBIRuleScanner *, const UnicodeString &fRules, UErrorCode &status);
michael@0 77 virtual ~RBBISymbolTable();
michael@0 78
michael@0 79 virtual RBBINode *lookupNode(const UnicodeString &key) const;
michael@0 80 virtual void addEntry (const UnicodeString &key, RBBINode *val, UErrorCode &err);
michael@0 81
michael@0 82 #ifdef RBBI_DEBUG
michael@0 83 virtual void rbbiSymtablePrint() const;
michael@0 84 #else
michael@0 85 // A do-nothing inline function for non-debug builds. Member funcs can't be empty
michael@0 86 // or the call sites won't compile.
michael@0 87 int32_t fFakeField;
michael@0 88 #define rbbiSymtablePrint() fFakeField=0;
michael@0 89 #endif
michael@0 90
michael@0 91 private:
michael@0 92 RBBISymbolTable(const RBBISymbolTable &other); // forbid copying of this class
michael@0 93 RBBISymbolTable &operator=(const RBBISymbolTable &other); // forbid copying of this class
michael@0 94 };
michael@0 95
michael@0 96
michael@0 97 //--------------------------------------------------------------------------------
michael@0 98 //
michael@0 99 // class RBBIRuleBuilder The top-level class handling RBBI rule compiling.
michael@0 100 //
michael@0 101 //--------------------------------------------------------------------------------
michael@0 102 class RBBIRuleBuilder : public UMemory {
michael@0 103 public:
michael@0 104
michael@0 105 // Create a rule based break iterator from a set of rules.
michael@0 106 // This function is the main entry point into the rule builder. The
michael@0 107 // public ICU API for creating RBBIs uses this function to do the actual work.
michael@0 108 //
michael@0 109 static BreakIterator * createRuleBasedBreakIterator( const UnicodeString &rules,
michael@0 110 UParseError *parseError,
michael@0 111 UErrorCode &status);
michael@0 112
michael@0 113 public:
michael@0 114 // The "public" functions and data members that appear below are accessed
michael@0 115 // (and shared) by the various parts that make up the rule builder. They
michael@0 116 // are NOT intended to be accessed by anything outside of the
michael@0 117 // rule builder implementation.
michael@0 118 RBBIRuleBuilder(const UnicodeString &rules,
michael@0 119 UParseError *parseErr,
michael@0 120 UErrorCode &status
michael@0 121 );
michael@0 122
michael@0 123 virtual ~RBBIRuleBuilder();
michael@0 124 char *fDebugEnv; // controls debug trace output
michael@0 125 UErrorCode *fStatus; // Error reporting. Keeping status
michael@0 126 UParseError *fParseError; // here avoids passing it everywhere.
michael@0 127 const UnicodeString &fRules; // The rule string that we are compiling
michael@0 128
michael@0 129 RBBIRuleScanner *fScanner; // The scanner.
michael@0 130 RBBINode *fForwardTree; // The parse trees, generated by the scanner,
michael@0 131 RBBINode *fReverseTree; // then manipulated by subsequent steps.
michael@0 132 RBBINode *fSafeFwdTree;
michael@0 133 RBBINode *fSafeRevTree;
michael@0 134
michael@0 135 RBBINode **fDefaultTree; // For rules not qualified with a !
michael@0 136 // the tree to which they belong to.
michael@0 137
michael@0 138 UBool fChainRules; // True for chained Unicode TR style rules.
michael@0 139 // False for traditional regexp rules.
michael@0 140
michael@0 141 UBool fLBCMNoChain; // True: suppress chaining of rules on
michael@0 142 // chars with LineBreak property == CM.
michael@0 143
michael@0 144 UBool fLookAheadHardBreak; // True: Look ahead matches cause an
michael@0 145 // immediate break, no continuing for the
michael@0 146 // longest match.
michael@0 147
michael@0 148 RBBISetBuilder *fSetBuilder; // Set and Character Category builder.
michael@0 149 UVector *fUSetNodes; // Vector of all uset nodes.
michael@0 150
michael@0 151 RBBITableBuilder *fForwardTables; // State transition tables
michael@0 152 RBBITableBuilder *fReverseTables;
michael@0 153 RBBITableBuilder *fSafeFwdTables;
michael@0 154 RBBITableBuilder *fSafeRevTables;
michael@0 155
michael@0 156 UVector *fRuleStatusVals; // The values that can be returned
michael@0 157 // from getRuleStatus().
michael@0 158
michael@0 159 RBBIDataHeader *flattenData(); // Create the flattened (runtime format)
michael@0 160 // data tables..
michael@0 161 private:
michael@0 162 RBBIRuleBuilder(const RBBIRuleBuilder &other); // forbid copying of this class
michael@0 163 RBBIRuleBuilder &operator=(const RBBIRuleBuilder &other); // forbid copying of this class
michael@0 164 };
michael@0 165
michael@0 166
michael@0 167
michael@0 168
michael@0 169 //----------------------------------------------------------------------------
michael@0 170 //
michael@0 171 // RBBISetTableEl is an entry in the hash table of UnicodeSets that have
michael@0 172 // been encountered. The val Node will be of nodetype uset
michael@0 173 // and contain pointers to the actual UnicodeSets.
michael@0 174 // The Key is the source string for initializing the set.
michael@0 175 //
michael@0 176 // The hash table is used to avoid creating duplicate
michael@0 177 // unnamed (not $var references) UnicodeSets.
michael@0 178 //
michael@0 179 // Memory Management:
michael@0 180 // The Hash Table owns these RBBISetTableEl structs and
michael@0 181 // the key strings. It does NOT own the val nodes.
michael@0 182 //
michael@0 183 //----------------------------------------------------------------------------
michael@0 184 struct RBBISetTableEl {
michael@0 185 UnicodeString *key;
michael@0 186 RBBINode *val;
michael@0 187 };
michael@0 188
michael@0 189
michael@0 190 //----------------------------------------------------------------------------
michael@0 191 //
michael@0 192 // RBBIDebugPrintf Printf equivalent, for debugging output.
michael@0 193 // Conditional compilation of the implementation lets us
michael@0 194 // get rid of the stdio dependency in environments where it
michael@0 195 // is unavailable.
michael@0 196 //
michael@0 197 //----------------------------------------------------------------------------
michael@0 198 #ifdef RBBI_DEBUG
michael@0 199 #include <stdio.h>
michael@0 200 #define RBBIDebugPrintf printf
michael@0 201 #define RBBIDebugPuts puts
michael@0 202 #else
michael@0 203 #undef RBBIDebugPrintf
michael@0 204 #define RBBIDebugPuts(arg)
michael@0 205 #endif
michael@0 206
michael@0 207 U_NAMESPACE_END
michael@0 208 #endif
michael@0 209
michael@0 210
michael@0 211

mercurial