intl/icu/source/common/rbbisetb.h

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 //
     2 //  rbbisetb.h
     3 /*
     4 **********************************************************************
     5 *   Copyright (c) 2001-2005, International Business Machines
     6 *   Corporation and others.  All Rights Reserved.
     7 **********************************************************************
     8 */
    10 #ifndef RBBISETB_H
    11 #define RBBISETB_H
    13 #include "unicode/utypes.h"
    14 #include "unicode/uobject.h"
    15 #include "rbbirb.h"
    16 #include "uvector.h"
    18 struct  UNewTrie;
    20 U_NAMESPACE_BEGIN
    22 //
    23 //  RBBISetBuilder   Derives the character categories used by the runtime RBBI engine
    24 //                   from the Unicode Sets appearing in the source  RBBI rules, and
    25 //                   creates the TRIE table used to map from Unicode to the
    26 //                   character categories.
    27 //
    30 //
    31 //  RangeDescriptor
    32 //
    33 //     Each of the non-overlapping character ranges gets one of these descriptors.
    34 //     All of them are strung together in a linked list, which is kept in order
    35 //     (by character)
    36 //
    37 class RangeDescriptor : public UMemory {
    38 public:
    39     UChar32            fStartChar;      // Start of range, unicode 32 bit value.
    40     UChar32            fEndChar;        // End of range, unicode 32 bit value.
    41     int32_t            fNum;            // runtime-mapped input value for this range.
    42     UVector           *fIncludesSets;   // vector of the the original
    43                                         //   Unicode sets that include this range.
    44                                         //    (Contains ptrs to uset nodes)
    45     RangeDescriptor   *fNext;           // Next RangeDescriptor in the linked list.
    47     RangeDescriptor(UErrorCode &status);
    48     RangeDescriptor(const RangeDescriptor &other, UErrorCode &status);
    49     ~RangeDescriptor();
    50     void split(UChar32 where, UErrorCode &status);   // Spit this range in two at "where", with
    51                                         //   where appearing in the second (higher) part.
    52     void setDictionaryFlag();           // Check whether this range appears as part of
    53                                         //   the Unicode set named "dictionary"
    55 private:
    56     RangeDescriptor(const RangeDescriptor &other); // forbid copying of this class
    57     RangeDescriptor &operator=(const RangeDescriptor &other); // forbid copying of this class
    58 };
    61 //
    62 //  RBBISetBuilder   Handles processing of Unicode Sets from RBBI rules.
    63 //
    64 //      Starting with the rules parse tree from the scanner,
    65 //
    66 //                   -  Enumerate the set of UnicodeSets that are referenced
    67 //                      by the RBBI rules.
    68 //                   -  compute a derived set of non-overlapping UnicodeSets
    69 //                      that will correspond to columns in the state table for
    70 //                      the RBBI execution engine.
    71 //                   -  construct the trie table that maps input characters
    72 //                      to set numbers in the non-overlapping set of sets.
    73 //
    76 class RBBISetBuilder : public UMemory {
    77 public:
    78     RBBISetBuilder(RBBIRuleBuilder *rb);
    79     ~RBBISetBuilder();
    81     void     build();
    82     void     addValToSets(UVector *sets,      uint32_t val);
    83     void     addValToSet (RBBINode *usetNode, uint32_t val);
    84     int32_t  getNumCharCategories() const;   // CharCategories are the same as input symbol set to the
    85                                              //    runtime state machine, which are the same as
    86                                              //    columns in the DFA state table
    87     int32_t  getTrieSize() /*const*/;        // Size in bytes of the serialized Trie.
    88     void     serializeTrie(uint8_t *where);  // write out the serialized Trie.
    89     UChar32  getFirstChar(int32_t  val) const;
    90     UBool    sawBOF() const;                 // Indicate whether any references to the {bof} pseudo
    91                                              //   character were encountered.
    92 #ifdef RBBI_DEBUG
    93     void     printSets();
    94     void     printRanges();
    95     void     printRangeGroups();
    96 #else
    97     #define printSets()
    98     #define printRanges()
    99     #define printRangeGroups()
   100 #endif
   102 private:
   103     void           numberSets();
   105     RBBIRuleBuilder       *fRB;             // The RBBI Rule Compiler that owns us.
   106     UErrorCode            *fStatus;
   108     RangeDescriptor       *fRangeList;      // Head of the linked list of RangeDescriptors
   110     UNewTrie              *fTrie;           // The mapping TRIE that is the end result of processing
   111     uint32_t              fTrieSize;        //  the Unicode Sets.
   113     // Groups correspond to character categories -
   114     //       groups of ranges that are in the same original UnicodeSets.
   115     //       fGroupCount is the index of the last used group.
   116     //       fGroupCount+1 is also the number of columns in the RBBI state table being compiled.
   117     //       State table column 0 is not used.  Column 1 is for end-of-input.
   118     //       column 2 is for group 0.  Funny counting.
   119     int32_t               fGroupCount;
   121     UBool                 fSawBOF;
   123     RBBISetBuilder(const RBBISetBuilder &other); // forbid copying of this class
   124     RBBISetBuilder &operator=(const RBBISetBuilder &other); // forbid copying of this class
   125 };
   129 U_NAMESPACE_END
   130 #endif

mercurial