Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
michael@0 | 1 | /* |
michael@0 | 2 | ******************************************************************************* |
michael@0 | 3 | * |
michael@0 | 4 | * Copyright (C) 1999-2013 International Business Machines |
michael@0 | 5 | * Corporation and others. All Rights Reserved. |
michael@0 | 6 | * |
michael@0 | 7 | ******************************************************************************* |
michael@0 | 8 | * file name: rbbidata.h |
michael@0 | 9 | * encoding: US-ASCII |
michael@0 | 10 | * tab size: 8 (not used) |
michael@0 | 11 | * indentation:4 |
michael@0 | 12 | * |
michael@0 | 13 | * RBBI data formats Includes |
michael@0 | 14 | * |
michael@0 | 15 | * Structs that describes the format of the Binary RBBI data, |
michael@0 | 16 | * as it is stored in ICU's data file. |
michael@0 | 17 | * |
michael@0 | 18 | * RBBIDataWrapper - Instances of this class sit between the |
michael@0 | 19 | * raw data structs and the RulesBasedBreakIterator objects |
michael@0 | 20 | * that are created by applications. The wrapper class |
michael@0 | 21 | * provides reference counting for the underlying data, |
michael@0 | 22 | * and direct pointers to data that would not otherwise |
michael@0 | 23 | * be accessible without ugly pointer arithmetic. The |
michael@0 | 24 | * wrapper does not attempt to provide any higher level |
michael@0 | 25 | * abstractions for the data itself. |
michael@0 | 26 | * |
michael@0 | 27 | * There will be only one instance of RBBIDataWrapper for any |
michael@0 | 28 | * set of RBBI run time data being shared by instances |
michael@0 | 29 | * (clones) of RulesBasedBreakIterator. |
michael@0 | 30 | */ |
michael@0 | 31 | |
michael@0 | 32 | #ifndef __RBBIDATA_H__ |
michael@0 | 33 | #define __RBBIDATA_H__ |
michael@0 | 34 | |
michael@0 | 35 | #include "unicode/utypes.h" |
michael@0 | 36 | #include "unicode/udata.h" |
michael@0 | 37 | #include "udataswp.h" |
michael@0 | 38 | |
michael@0 | 39 | /** |
michael@0 | 40 | * Swap RBBI data. See udataswp.h. |
michael@0 | 41 | * @internal |
michael@0 | 42 | */ |
michael@0 | 43 | U_CAPI int32_t U_EXPORT2 |
michael@0 | 44 | ubrk_swap(const UDataSwapper *ds, |
michael@0 | 45 | const void *inData, int32_t length, void *outData, |
michael@0 | 46 | UErrorCode *pErrorCode); |
michael@0 | 47 | |
michael@0 | 48 | #ifdef __cplusplus |
michael@0 | 49 | |
michael@0 | 50 | #include "unicode/uobject.h" |
michael@0 | 51 | #include "unicode/unistr.h" |
michael@0 | 52 | #include "umutex.h" |
michael@0 | 53 | #include "utrie.h" |
michael@0 | 54 | |
michael@0 | 55 | U_NAMESPACE_BEGIN |
michael@0 | 56 | |
michael@0 | 57 | /* |
michael@0 | 58 | * The following structs map exactly onto the raw data from ICU common data file. |
michael@0 | 59 | */ |
michael@0 | 60 | struct RBBIDataHeader { |
michael@0 | 61 | uint32_t fMagic; /* == 0xbla0 */ |
michael@0 | 62 | uint8_t fFormatVersion[4]; /* Data Format. Same as the value in struct UDataInfo */ |
michael@0 | 63 | /* if there is one associated with this data. */ |
michael@0 | 64 | /* (version originates in rbbi, is copied to UDataInfo) */ |
michael@0 | 65 | /* For ICU 3.2 and earlier, this field was */ |
michael@0 | 66 | /* uint32_t fVersion */ |
michael@0 | 67 | /* with a value of 1. */ |
michael@0 | 68 | uint32_t fLength; /* Total length in bytes of this RBBI Data, */ |
michael@0 | 69 | /* including all sections, not just the header. */ |
michael@0 | 70 | uint32_t fCatCount; /* Number of character categories. */ |
michael@0 | 71 | |
michael@0 | 72 | /* */ |
michael@0 | 73 | /* Offsets and sizes of each of the subsections within the RBBI data. */ |
michael@0 | 74 | /* All offsets are bytes from the start of the RBBIDataHeader. */ |
michael@0 | 75 | /* All sizes are in bytes. */ |
michael@0 | 76 | /* */ |
michael@0 | 77 | uint32_t fFTable; /* forward state transition table. */ |
michael@0 | 78 | uint32_t fFTableLen; |
michael@0 | 79 | uint32_t fRTable; /* Offset to the reverse state transition table. */ |
michael@0 | 80 | uint32_t fRTableLen; |
michael@0 | 81 | uint32_t fSFTable; /* safe point forward transition table */ |
michael@0 | 82 | uint32_t fSFTableLen; |
michael@0 | 83 | uint32_t fSRTable; /* safe point reverse transition table */ |
michael@0 | 84 | uint32_t fSRTableLen; |
michael@0 | 85 | uint32_t fTrie; /* Offset to Trie data for character categories */ |
michael@0 | 86 | uint32_t fTrieLen; |
michael@0 | 87 | uint32_t fRuleSource; /* Offset to the source for for the break */ |
michael@0 | 88 | uint32_t fRuleSourceLen; /* rules. Stored UChar *. */ |
michael@0 | 89 | uint32_t fStatusTable; /* Offset to the table of rule status values */ |
michael@0 | 90 | uint32_t fStatusTableLen; |
michael@0 | 91 | |
michael@0 | 92 | uint32_t fReserved[6]; /* Reserved for expansion */ |
michael@0 | 93 | |
michael@0 | 94 | }; |
michael@0 | 95 | |
michael@0 | 96 | |
michael@0 | 97 | |
michael@0 | 98 | struct RBBIStateTableRow { |
michael@0 | 99 | int16_t fAccepting; /* Non-zero if this row is for an accepting state. */ |
michael@0 | 100 | /* Value 0: not an accepting state. */ |
michael@0 | 101 | /* -1: Unconditional Accepting state. */ |
michael@0 | 102 | /* positive: Look-ahead match has completed. */ |
michael@0 | 103 | /* Actual boundary position happened earlier */ |
michael@0 | 104 | /* Value here == fLookAhead in earlier */ |
michael@0 | 105 | /* state, at actual boundary pos. */ |
michael@0 | 106 | int16_t fLookAhead; /* Non-zero if this row is for a state that */ |
michael@0 | 107 | /* corresponds to a '/' in the rule source. */ |
michael@0 | 108 | /* Value is the same as the fAccepting */ |
michael@0 | 109 | /* value for the rule (which will appear */ |
michael@0 | 110 | /* in a different state. */ |
michael@0 | 111 | int16_t fTagIdx; /* Non-zero if this row covers a {tagged} position */ |
michael@0 | 112 | /* from a rule. Value is the index in the */ |
michael@0 | 113 | /* StatusTable of the set of matching */ |
michael@0 | 114 | /* tags (rule status values) */ |
michael@0 | 115 | int16_t fReserved; |
michael@0 | 116 | uint16_t fNextState[2]; /* Next State, indexed by char category. */ |
michael@0 | 117 | /* This array does not have two elements */ |
michael@0 | 118 | /* Array Size is actually fData->fHeader->fCatCount */ |
michael@0 | 119 | /* CAUTION: see RBBITableBuilder::getTableSize() */ |
michael@0 | 120 | /* before changing anything here. */ |
michael@0 | 121 | }; |
michael@0 | 122 | |
michael@0 | 123 | |
michael@0 | 124 | struct RBBIStateTable { |
michael@0 | 125 | uint32_t fNumStates; /* Number of states. */ |
michael@0 | 126 | uint32_t fRowLen; /* Length of a state table row, in bytes. */ |
michael@0 | 127 | uint32_t fFlags; /* Option Flags for this state table */ |
michael@0 | 128 | uint32_t fReserved; /* reserved */ |
michael@0 | 129 | char fTableData[4]; /* First RBBIStateTableRow begins here. */ |
michael@0 | 130 | /* (making it char[] simplifies ugly address */ |
michael@0 | 131 | /* arithmetic for indexing variable length rows.) */ |
michael@0 | 132 | }; |
michael@0 | 133 | |
michael@0 | 134 | typedef enum { |
michael@0 | 135 | RBBI_LOOKAHEAD_HARD_BREAK = 1, |
michael@0 | 136 | RBBI_BOF_REQUIRED = 2 |
michael@0 | 137 | } RBBIStateTableFlags; |
michael@0 | 138 | |
michael@0 | 139 | |
michael@0 | 140 | /* */ |
michael@0 | 141 | /* The reference counting wrapper class */ |
michael@0 | 142 | /* */ |
michael@0 | 143 | class RBBIDataWrapper : public UMemory { |
michael@0 | 144 | public: |
michael@0 | 145 | enum EDontAdopt { |
michael@0 | 146 | kDontAdopt |
michael@0 | 147 | }; |
michael@0 | 148 | RBBIDataWrapper(const RBBIDataHeader *data, UErrorCode &status); |
michael@0 | 149 | RBBIDataWrapper(const RBBIDataHeader *data, enum EDontAdopt dontAdopt, UErrorCode &status); |
michael@0 | 150 | RBBIDataWrapper(UDataMemory* udm, UErrorCode &status); |
michael@0 | 151 | ~RBBIDataWrapper(); |
michael@0 | 152 | |
michael@0 | 153 | void init(const RBBIDataHeader *data, UErrorCode &status); |
michael@0 | 154 | RBBIDataWrapper *addReference(); |
michael@0 | 155 | void removeReference(); |
michael@0 | 156 | UBool operator ==(const RBBIDataWrapper &other) const; |
michael@0 | 157 | int32_t hashCode(); |
michael@0 | 158 | const UnicodeString &getRuleSourceString() const; |
michael@0 | 159 | #ifdef RBBI_DEBUG |
michael@0 | 160 | void printData(); |
michael@0 | 161 | void printTable(const char *heading, const RBBIStateTable *table); |
michael@0 | 162 | #else |
michael@0 | 163 | #define printData() |
michael@0 | 164 | #define printTable(heading, table) |
michael@0 | 165 | #endif |
michael@0 | 166 | |
michael@0 | 167 | /* */ |
michael@0 | 168 | /* Pointers to items within the data */ |
michael@0 | 169 | /* */ |
michael@0 | 170 | const RBBIDataHeader *fHeader; |
michael@0 | 171 | const RBBIStateTable *fForwardTable; |
michael@0 | 172 | const RBBIStateTable *fReverseTable; |
michael@0 | 173 | const RBBIStateTable *fSafeFwdTable; |
michael@0 | 174 | const RBBIStateTable *fSafeRevTable; |
michael@0 | 175 | const UChar *fRuleSource; |
michael@0 | 176 | const int32_t *fRuleStatusTable; |
michael@0 | 177 | |
michael@0 | 178 | /* number of int32_t values in the rule status table. Used to sanity check indexing */ |
michael@0 | 179 | int32_t fStatusMaxIdx; |
michael@0 | 180 | |
michael@0 | 181 | UTrie fTrie; |
michael@0 | 182 | |
michael@0 | 183 | private: |
michael@0 | 184 | u_atomic_int32_t fRefCount; |
michael@0 | 185 | UDataMemory *fUDataMem; |
michael@0 | 186 | UnicodeString fRuleString; |
michael@0 | 187 | UBool fDontFreeData; |
michael@0 | 188 | |
michael@0 | 189 | RBBIDataWrapper(const RBBIDataWrapper &other); /* forbid copying of this class */ |
michael@0 | 190 | RBBIDataWrapper &operator=(const RBBIDataWrapper &other); /* forbid copying of this class */ |
michael@0 | 191 | }; |
michael@0 | 192 | |
michael@0 | 193 | |
michael@0 | 194 | |
michael@0 | 195 | U_NAMESPACE_END |
michael@0 | 196 | |
michael@0 | 197 | #endif /* C++ */ |
michael@0 | 198 | |
michael@0 | 199 | #endif |