intl/icu/source/common/rbbiscan.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/common/rbbiscan.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,162 @@
     1.4 +//
     1.5 +//  rbbiscan.h
     1.6 +//
     1.7 +//  Copyright (C) 2002-2008, International Business Machines Corporation and others.
     1.8 +//  All Rights Reserved.
     1.9 +//
    1.10 +//  This file contains declarations for class RBBIRuleScanner
    1.11 +//
    1.12 +
    1.13 +
    1.14 +#ifndef RBBISCAN_H
    1.15 +#define RBBISCAN_H
    1.16 +
    1.17 +#include "unicode/utypes.h"
    1.18 +#include "unicode/uobject.h"
    1.19 +#include "unicode/rbbi.h"
    1.20 +#include "unicode/uniset.h"
    1.21 +#include "unicode/parseerr.h"
    1.22 +#include "uhash.h"
    1.23 +#include "uvector.h"
    1.24 +#include "unicode/symtable.h"// For UnicodeSet parsing, is the interface that
    1.25 +                          //    looks up references to $variables within a set.
    1.26 +#include "rbbinode.h"
    1.27 +//#include "rbbitblb.h"
    1.28 +
    1.29 +
    1.30 +
    1.31 +U_NAMESPACE_BEGIN
    1.32 +
    1.33 +class   RBBIRuleBuilder;
    1.34 +class   RBBISymbolTable;
    1.35 +
    1.36 +
    1.37 +//--------------------------------------------------------------------------------
    1.38 +//
    1.39 +//  class RBBIRuleScanner does the lowest level, character-at-a-time
    1.40 +//                        scanning of break iterator rules.  
    1.41 +//
    1.42 +//                        The output of the scanner is parse trees for
    1.43 +//                        the rule expressions and a list of all Unicode Sets
    1.44 +//                        encountered.
    1.45 +//
    1.46 +//--------------------------------------------------------------------------------
    1.47 +
    1.48 +class RBBIRuleScanner : public UMemory {
    1.49 +public:
    1.50 +
    1.51 +    enum {
    1.52 +        kStackSize = 100            // The size of the state stack for
    1.53 +    };                              //   rules parsing.  Corresponds roughly
    1.54 +                                    //   to the depth of parentheses nesting
    1.55 +                                    //   that is allowed in the rules.
    1.56 +
    1.57 +    struct RBBIRuleChar {
    1.58 +        UChar32             fChar;
    1.59 +        UBool               fEscaped;
    1.60 +    };
    1.61 +
    1.62 +    RBBIRuleScanner(RBBIRuleBuilder  *rb);
    1.63 +
    1.64 +
    1.65 +    virtual    ~RBBIRuleScanner();
    1.66 +
    1.67 +    void        nextChar(RBBIRuleChar &c);          // Get the next char from the input stream.
    1.68 +                                                    // Return false if at end.
    1.69 +
    1.70 +    UBool       push(const RBBIRuleChar &c);        // Push (unget) one character.
    1.71 +                                                    //   Only a single character may be pushed.
    1.72 +
    1.73 +    void        parse();                            // Parse the rules, generating two parse
    1.74 +                                                    //   trees, one each for the forward and
    1.75 +                                                    //   reverse rules,
    1.76 +                                                    //   and a list of UnicodeSets encountered.
    1.77 +
    1.78 +    /**
    1.79 +     * Return a rules string without unnecessary
    1.80 +     * characters.
    1.81 +     */
    1.82 +    static UnicodeString stripRules(const UnicodeString &rules);
    1.83 +private:
    1.84 +
    1.85 +    UBool       doParseActions(int32_t a);
    1.86 +    void        error(UErrorCode e);                   // error reporting convenience function.
    1.87 +    void        fixOpStack(RBBINode::OpPrecedence p);
    1.88 +                                                       //   a character.
    1.89 +    void        findSetFor(const UnicodeString &s, RBBINode *node, UnicodeSet *setToAdopt = NULL);
    1.90 +
    1.91 +    UChar32     nextCharLL();
    1.92 +#ifdef RBBI_DEBUG
    1.93 +    void        printNodeStack(const char *title);
    1.94 +#endif
    1.95 +    RBBINode    *pushNewNode(RBBINode::NodeType  t);
    1.96 +    void        scanSet();
    1.97 +
    1.98 +
    1.99 +    RBBIRuleBuilder               *fRB;              // The rule builder that we are part of.
   1.100 +
   1.101 +    int32_t                       fScanIndex;        // Index of current character being processed
   1.102 +                                                     //   in the rule input string.
   1.103 +    int32_t                       fNextIndex;        // Index of the next character, which
   1.104 +                                                     //   is the first character not yet scanned.
   1.105 +    UBool                         fQuoteMode;        // Scan is in a 'quoted region'
   1.106 +    int32_t                       fLineNum;          // Line number in input file.
   1.107 +    int32_t                       fCharNum;          // Char position within the line.
   1.108 +    UChar32                       fLastChar;         // Previous char, needed to count CR-LF
   1.109 +                                                     //   as a single line, not two.
   1.110 +
   1.111 +    RBBIRuleChar                  fC;                // Current char for parse state machine
   1.112 +                                                     //   processing.
   1.113 +    UnicodeString                 fVarName;          // $variableName, valid when we've just
   1.114 +                                                     //   scanned one.
   1.115 +
   1.116 +    RBBIRuleTableEl               **fStateTable;     // State Transition Table for RBBI Rule
   1.117 +                                                     //   parsing.  index by p[state][char-class]
   1.118 +
   1.119 +    uint16_t                      fStack[kStackSize];  // State stack, holds state pushes
   1.120 +    int32_t                       fStackPtr;           //  and pops as specified in the state
   1.121 +                                                       //  transition rules.
   1.122 +
   1.123 +    RBBINode                      *fNodeStack[kStackSize]; // Node stack, holds nodes created
   1.124 +                                                           //  during the parse of a rule
   1.125 +    int32_t                        fNodeStackPtr;
   1.126 +
   1.127 +
   1.128 +    UBool                          fReverseRule;     // True if the rule currently being scanned
   1.129 +                                                     //  is a reverse direction rule (if it
   1.130 +                                                     //  starts with a '!')
   1.131 +
   1.132 +    UBool                          fLookAheadRule;   // True if the rule includes a '/'
   1.133 +                                                     //   somewhere within it.
   1.134 +
   1.135 +    RBBISymbolTable               *fSymbolTable;     // symbol table, holds definitions of
   1.136 +                                                     //   $variable symbols.
   1.137 +
   1.138 +    UHashtable                    *fSetTable;        // UnicocodeSet hash table, holds indexes to
   1.139 +                                                     //   the sets created while parsing rules.
   1.140 +                                                     //   The key is the string used for creating
   1.141 +                                                     //   the set.
   1.142 +
   1.143 +    UnicodeSet                     fRuleSets[10];    // Unicode Sets that are needed during
   1.144 +                                                     //  the scanning of RBBI rules.  The
   1.145 +                                                     //  indicies for these are assigned by the
   1.146 +                                                     //  perl script that builds the state tables.
   1.147 +                                                     //  See rbbirpt.h.
   1.148 +
   1.149 +    int32_t                        fRuleNum;         // Counts each rule as it is scanned.
   1.150 +
   1.151 +    int32_t                        fOptionStart;     // Input index of start of a !!option
   1.152 +                                                     //   keyword, while being scanned.
   1.153 +
   1.154 +    UnicodeSet *gRuleSet_rule_char;
   1.155 +    UnicodeSet *gRuleSet_white_space;
   1.156 +    UnicodeSet *gRuleSet_name_char;
   1.157 +    UnicodeSet *gRuleSet_name_start_char;
   1.158 +
   1.159 +    RBBIRuleScanner(const RBBIRuleScanner &other); // forbid copying of this class
   1.160 +    RBBIRuleScanner &operator=(const RBBIRuleScanner &other); // forbid copying of this class
   1.161 +};
   1.162 +
   1.163 +U_NAMESPACE_END
   1.164 +
   1.165 +#endif

mercurial