intl/icu/source/common/rbbiscan.h

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rw-r--r--

Correct previous dual key logic pending first delivery installment.

michael@0 1 //
michael@0 2 // rbbiscan.h
michael@0 3 //
michael@0 4 // Copyright (C) 2002-2008, International Business Machines Corporation and others.
michael@0 5 // All Rights Reserved.
michael@0 6 //
michael@0 7 // This file contains declarations for class RBBIRuleScanner
michael@0 8 //
michael@0 9
michael@0 10
michael@0 11 #ifndef RBBISCAN_H
michael@0 12 #define RBBISCAN_H
michael@0 13
michael@0 14 #include "unicode/utypes.h"
michael@0 15 #include "unicode/uobject.h"
michael@0 16 #include "unicode/rbbi.h"
michael@0 17 #include "unicode/uniset.h"
michael@0 18 #include "unicode/parseerr.h"
michael@0 19 #include "uhash.h"
michael@0 20 #include "uvector.h"
michael@0 21 #include "unicode/symtable.h"// For UnicodeSet parsing, is the interface that
michael@0 22 // looks up references to $variables within a set.
michael@0 23 #include "rbbinode.h"
michael@0 24 //#include "rbbitblb.h"
michael@0 25
michael@0 26
michael@0 27
michael@0 28 U_NAMESPACE_BEGIN
michael@0 29
michael@0 30 class RBBIRuleBuilder;
michael@0 31 class RBBISymbolTable;
michael@0 32
michael@0 33
michael@0 34 //--------------------------------------------------------------------------------
michael@0 35 //
michael@0 36 // class RBBIRuleScanner does the lowest level, character-at-a-time
michael@0 37 // scanning of break iterator rules.
michael@0 38 //
michael@0 39 // The output of the scanner is parse trees for
michael@0 40 // the rule expressions and a list of all Unicode Sets
michael@0 41 // encountered.
michael@0 42 //
michael@0 43 //--------------------------------------------------------------------------------
michael@0 44
michael@0 45 class RBBIRuleScanner : public UMemory {
michael@0 46 public:
michael@0 47
michael@0 48 enum {
michael@0 49 kStackSize = 100 // The size of the state stack for
michael@0 50 }; // rules parsing. Corresponds roughly
michael@0 51 // to the depth of parentheses nesting
michael@0 52 // that is allowed in the rules.
michael@0 53
michael@0 54 struct RBBIRuleChar {
michael@0 55 UChar32 fChar;
michael@0 56 UBool fEscaped;
michael@0 57 };
michael@0 58
michael@0 59 RBBIRuleScanner(RBBIRuleBuilder *rb);
michael@0 60
michael@0 61
michael@0 62 virtual ~RBBIRuleScanner();
michael@0 63
michael@0 64 void nextChar(RBBIRuleChar &c); // Get the next char from the input stream.
michael@0 65 // Return false if at end.
michael@0 66
michael@0 67 UBool push(const RBBIRuleChar &c); // Push (unget) one character.
michael@0 68 // Only a single character may be pushed.
michael@0 69
michael@0 70 void parse(); // Parse the rules, generating two parse
michael@0 71 // trees, one each for the forward and
michael@0 72 // reverse rules,
michael@0 73 // and a list of UnicodeSets encountered.
michael@0 74
michael@0 75 /**
michael@0 76 * Return a rules string without unnecessary
michael@0 77 * characters.
michael@0 78 */
michael@0 79 static UnicodeString stripRules(const UnicodeString &rules);
michael@0 80 private:
michael@0 81
michael@0 82 UBool doParseActions(int32_t a);
michael@0 83 void error(UErrorCode e); // error reporting convenience function.
michael@0 84 void fixOpStack(RBBINode::OpPrecedence p);
michael@0 85 // a character.
michael@0 86 void findSetFor(const UnicodeString &s, RBBINode *node, UnicodeSet *setToAdopt = NULL);
michael@0 87
michael@0 88 UChar32 nextCharLL();
michael@0 89 #ifdef RBBI_DEBUG
michael@0 90 void printNodeStack(const char *title);
michael@0 91 #endif
michael@0 92 RBBINode *pushNewNode(RBBINode::NodeType t);
michael@0 93 void scanSet();
michael@0 94
michael@0 95
michael@0 96 RBBIRuleBuilder *fRB; // The rule builder that we are part of.
michael@0 97
michael@0 98 int32_t fScanIndex; // Index of current character being processed
michael@0 99 // in the rule input string.
michael@0 100 int32_t fNextIndex; // Index of the next character, which
michael@0 101 // is the first character not yet scanned.
michael@0 102 UBool fQuoteMode; // Scan is in a 'quoted region'
michael@0 103 int32_t fLineNum; // Line number in input file.
michael@0 104 int32_t fCharNum; // Char position within the line.
michael@0 105 UChar32 fLastChar; // Previous char, needed to count CR-LF
michael@0 106 // as a single line, not two.
michael@0 107
michael@0 108 RBBIRuleChar fC; // Current char for parse state machine
michael@0 109 // processing.
michael@0 110 UnicodeString fVarName; // $variableName, valid when we've just
michael@0 111 // scanned one.
michael@0 112
michael@0 113 RBBIRuleTableEl **fStateTable; // State Transition Table for RBBI Rule
michael@0 114 // parsing. index by p[state][char-class]
michael@0 115
michael@0 116 uint16_t fStack[kStackSize]; // State stack, holds state pushes
michael@0 117 int32_t fStackPtr; // and pops as specified in the state
michael@0 118 // transition rules.
michael@0 119
michael@0 120 RBBINode *fNodeStack[kStackSize]; // Node stack, holds nodes created
michael@0 121 // during the parse of a rule
michael@0 122 int32_t fNodeStackPtr;
michael@0 123
michael@0 124
michael@0 125 UBool fReverseRule; // True if the rule currently being scanned
michael@0 126 // is a reverse direction rule (if it
michael@0 127 // starts with a '!')
michael@0 128
michael@0 129 UBool fLookAheadRule; // True if the rule includes a '/'
michael@0 130 // somewhere within it.
michael@0 131
michael@0 132 RBBISymbolTable *fSymbolTable; // symbol table, holds definitions of
michael@0 133 // $variable symbols.
michael@0 134
michael@0 135 UHashtable *fSetTable; // UnicocodeSet hash table, holds indexes to
michael@0 136 // the sets created while parsing rules.
michael@0 137 // The key is the string used for creating
michael@0 138 // the set.
michael@0 139
michael@0 140 UnicodeSet fRuleSets[10]; // Unicode Sets that are needed during
michael@0 141 // the scanning of RBBI rules. The
michael@0 142 // indicies for these are assigned by the
michael@0 143 // perl script that builds the state tables.
michael@0 144 // See rbbirpt.h.
michael@0 145
michael@0 146 int32_t fRuleNum; // Counts each rule as it is scanned.
michael@0 147
michael@0 148 int32_t fOptionStart; // Input index of start of a !!option
michael@0 149 // keyword, while being scanned.
michael@0 150
michael@0 151 UnicodeSet *gRuleSet_rule_char;
michael@0 152 UnicodeSet *gRuleSet_white_space;
michael@0 153 UnicodeSet *gRuleSet_name_char;
michael@0 154 UnicodeSet *gRuleSet_name_start_char;
michael@0 155
michael@0 156 RBBIRuleScanner(const RBBIRuleScanner &other); // forbid copying of this class
michael@0 157 RBBIRuleScanner &operator=(const RBBIRuleScanner &other); // forbid copying of this class
michael@0 158 };
michael@0 159
michael@0 160 U_NAMESPACE_END
michael@0 161
michael@0 162 #endif

mercurial