michael@0: // michael@0: // rbbirb.h michael@0: // michael@0: // Copyright (C) 2002-2008, International Business Machines Corporation and others. michael@0: // All Rights Reserved. michael@0: // michael@0: // This file contains declarations for several classes from the michael@0: // Rule Based Break Iterator rule builder. michael@0: // michael@0: michael@0: michael@0: #ifndef RBBIRB_H michael@0: #define RBBIRB_H michael@0: michael@0: #include "unicode/utypes.h" michael@0: #include "unicode/uobject.h" michael@0: #include "unicode/rbbi.h" michael@0: #include "unicode/uniset.h" michael@0: #include "unicode/parseerr.h" michael@0: #include "uhash.h" michael@0: #include "uvector.h" michael@0: #include "unicode/symtable.h"// For UnicodeSet parsing, is the interface that michael@0: // looks up references to $variables within a set. michael@0: michael@0: michael@0: michael@0: U_NAMESPACE_BEGIN michael@0: michael@0: class RBBIRuleScanner; michael@0: struct RBBIRuleTableEl; michael@0: class RBBISetBuilder; michael@0: class RBBINode; michael@0: class RBBITableBuilder; michael@0: michael@0: michael@0: michael@0: //-------------------------------------------------------------------------------- michael@0: // michael@0: // RBBISymbolTable. Implements SymbolTable interface that is used by the michael@0: // UnicodeSet parser to resolve references to $variables. michael@0: // michael@0: //-------------------------------------------------------------------------------- michael@0: class RBBISymbolTableEntry : public UMemory { // The symbol table hash table contains one michael@0: public: // of these structs for each entry. michael@0: RBBISymbolTableEntry(); michael@0: UnicodeString key; michael@0: RBBINode *val; michael@0: ~RBBISymbolTableEntry(); michael@0: michael@0: private: michael@0: RBBISymbolTableEntry(const RBBISymbolTableEntry &other); // forbid copying of this class michael@0: RBBISymbolTableEntry &operator=(const RBBISymbolTableEntry &other); // forbid copying of this class michael@0: }; michael@0: michael@0: michael@0: class RBBISymbolTable : public UMemory, public SymbolTable { michael@0: private: michael@0: const UnicodeString &fRules; michael@0: UHashtable *fHashTable; michael@0: RBBIRuleScanner *fRuleScanner; michael@0: michael@0: // These next two fields are part of the mechanism for passing references to michael@0: // already-constructed UnicodeSets back to the UnicodeSet constructor michael@0: // when the pattern includes $variable references. michael@0: const UnicodeString ffffString; // = "/uffff" michael@0: UnicodeSet *fCachedSetLookup; michael@0: michael@0: public: michael@0: // API inherited from class SymbolTable michael@0: virtual const UnicodeString* lookup(const UnicodeString& s) const; michael@0: virtual const UnicodeFunctor* lookupMatcher(UChar32 ch) const; michael@0: virtual UnicodeString parseReference(const UnicodeString& text, michael@0: ParsePosition& pos, int32_t limit) const; michael@0: michael@0: // Additional Functions michael@0: RBBISymbolTable(RBBIRuleScanner *, const UnicodeString &fRules, UErrorCode &status); michael@0: virtual ~RBBISymbolTable(); michael@0: michael@0: virtual RBBINode *lookupNode(const UnicodeString &key) const; michael@0: virtual void addEntry (const UnicodeString &key, RBBINode *val, UErrorCode &err); michael@0: michael@0: #ifdef RBBI_DEBUG michael@0: virtual void rbbiSymtablePrint() const; michael@0: #else michael@0: // A do-nothing inline function for non-debug builds. Member funcs can't be empty michael@0: // or the call sites won't compile. michael@0: int32_t fFakeField; michael@0: #define rbbiSymtablePrint() fFakeField=0; michael@0: #endif michael@0: michael@0: private: michael@0: RBBISymbolTable(const RBBISymbolTable &other); // forbid copying of this class michael@0: RBBISymbolTable &operator=(const RBBISymbolTable &other); // forbid copying of this class michael@0: }; michael@0: michael@0: michael@0: //-------------------------------------------------------------------------------- michael@0: // michael@0: // class RBBIRuleBuilder The top-level class handling RBBI rule compiling. michael@0: // michael@0: //-------------------------------------------------------------------------------- michael@0: class RBBIRuleBuilder : public UMemory { michael@0: public: michael@0: michael@0: // Create a rule based break iterator from a set of rules. michael@0: // This function is the main entry point into the rule builder. The michael@0: // public ICU API for creating RBBIs uses this function to do the actual work. michael@0: // michael@0: static BreakIterator * createRuleBasedBreakIterator( const UnicodeString &rules, michael@0: UParseError *parseError, michael@0: UErrorCode &status); michael@0: michael@0: public: michael@0: // The "public" functions and data members that appear below are accessed michael@0: // (and shared) by the various parts that make up the rule builder. They michael@0: // are NOT intended to be accessed by anything outside of the michael@0: // rule builder implementation. michael@0: RBBIRuleBuilder(const UnicodeString &rules, michael@0: UParseError *parseErr, michael@0: UErrorCode &status michael@0: ); michael@0: michael@0: virtual ~RBBIRuleBuilder(); michael@0: char *fDebugEnv; // controls debug trace output michael@0: UErrorCode *fStatus; // Error reporting. Keeping status michael@0: UParseError *fParseError; // here avoids passing it everywhere. michael@0: const UnicodeString &fRules; // The rule string that we are compiling michael@0: michael@0: RBBIRuleScanner *fScanner; // The scanner. michael@0: RBBINode *fForwardTree; // The parse trees, generated by the scanner, michael@0: RBBINode *fReverseTree; // then manipulated by subsequent steps. michael@0: RBBINode *fSafeFwdTree; michael@0: RBBINode *fSafeRevTree; michael@0: michael@0: RBBINode **fDefaultTree; // For rules not qualified with a ! michael@0: // the tree to which they belong to. michael@0: michael@0: UBool fChainRules; // True for chained Unicode TR style rules. michael@0: // False for traditional regexp rules. michael@0: michael@0: UBool fLBCMNoChain; // True: suppress chaining of rules on michael@0: // chars with LineBreak property == CM. michael@0: michael@0: UBool fLookAheadHardBreak; // True: Look ahead matches cause an michael@0: // immediate break, no continuing for the michael@0: // longest match. michael@0: michael@0: RBBISetBuilder *fSetBuilder; // Set and Character Category builder. michael@0: UVector *fUSetNodes; // Vector of all uset nodes. michael@0: michael@0: RBBITableBuilder *fForwardTables; // State transition tables michael@0: RBBITableBuilder *fReverseTables; michael@0: RBBITableBuilder *fSafeFwdTables; michael@0: RBBITableBuilder *fSafeRevTables; michael@0: michael@0: UVector *fRuleStatusVals; // The values that can be returned michael@0: // from getRuleStatus(). michael@0: michael@0: RBBIDataHeader *flattenData(); // Create the flattened (runtime format) michael@0: // data tables.. michael@0: private: michael@0: RBBIRuleBuilder(const RBBIRuleBuilder &other); // forbid copying of this class michael@0: RBBIRuleBuilder &operator=(const RBBIRuleBuilder &other); // forbid copying of this class michael@0: }; michael@0: michael@0: michael@0: michael@0: michael@0: //---------------------------------------------------------------------------- michael@0: // michael@0: // RBBISetTableEl is an entry in the hash table of UnicodeSets that have michael@0: // been encountered. The val Node will be of nodetype uset michael@0: // and contain pointers to the actual UnicodeSets. michael@0: // The Key is the source string for initializing the set. michael@0: // michael@0: // The hash table is used to avoid creating duplicate michael@0: // unnamed (not $var references) UnicodeSets. michael@0: // michael@0: // Memory Management: michael@0: // The Hash Table owns these RBBISetTableEl structs and michael@0: // the key strings. It does NOT own the val nodes. michael@0: // michael@0: //---------------------------------------------------------------------------- michael@0: struct RBBISetTableEl { michael@0: UnicodeString *key; michael@0: RBBINode *val; michael@0: }; michael@0: michael@0: michael@0: //---------------------------------------------------------------------------- michael@0: // michael@0: // RBBIDebugPrintf Printf equivalent, for debugging output. michael@0: // Conditional compilation of the implementation lets us michael@0: // get rid of the stdio dependency in environments where it michael@0: // is unavailable. michael@0: // michael@0: //---------------------------------------------------------------------------- michael@0: #ifdef RBBI_DEBUG michael@0: #include michael@0: #define RBBIDebugPrintf printf michael@0: #define RBBIDebugPuts puts michael@0: #else michael@0: #undef RBBIDebugPrintf michael@0: #define RBBIDebugPuts(arg) michael@0: #endif michael@0: michael@0: U_NAMESPACE_END michael@0: #endif michael@0: michael@0: michael@0: