intl/icu/source/common/rbbidata.h

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

michael@0 1 /*
michael@0 2 *******************************************************************************
michael@0 3 *
michael@0 4 * Copyright (C) 1999-2013 International Business Machines
michael@0 5 * Corporation and others. All Rights Reserved.
michael@0 6 *
michael@0 7 *******************************************************************************
michael@0 8 * file name: rbbidata.h
michael@0 9 * encoding: US-ASCII
michael@0 10 * tab size: 8 (not used)
michael@0 11 * indentation:4
michael@0 12 *
michael@0 13 * RBBI data formats Includes
michael@0 14 *
michael@0 15 * Structs that describes the format of the Binary RBBI data,
michael@0 16 * as it is stored in ICU's data file.
michael@0 17 *
michael@0 18 * RBBIDataWrapper - Instances of this class sit between the
michael@0 19 * raw data structs and the RulesBasedBreakIterator objects
michael@0 20 * that are created by applications. The wrapper class
michael@0 21 * provides reference counting for the underlying data,
michael@0 22 * and direct pointers to data that would not otherwise
michael@0 23 * be accessible without ugly pointer arithmetic. The
michael@0 24 * wrapper does not attempt to provide any higher level
michael@0 25 * abstractions for the data itself.
michael@0 26 *
michael@0 27 * There will be only one instance of RBBIDataWrapper for any
michael@0 28 * set of RBBI run time data being shared by instances
michael@0 29 * (clones) of RulesBasedBreakIterator.
michael@0 30 */
michael@0 31
michael@0 32 #ifndef __RBBIDATA_H__
michael@0 33 #define __RBBIDATA_H__
michael@0 34
michael@0 35 #include "unicode/utypes.h"
michael@0 36 #include "unicode/udata.h"
michael@0 37 #include "udataswp.h"
michael@0 38
michael@0 39 /**
michael@0 40 * Swap RBBI data. See udataswp.h.
michael@0 41 * @internal
michael@0 42 */
michael@0 43 U_CAPI int32_t U_EXPORT2
michael@0 44 ubrk_swap(const UDataSwapper *ds,
michael@0 45 const void *inData, int32_t length, void *outData,
michael@0 46 UErrorCode *pErrorCode);
michael@0 47
michael@0 48 #ifdef __cplusplus
michael@0 49
michael@0 50 #include "unicode/uobject.h"
michael@0 51 #include "unicode/unistr.h"
michael@0 52 #include "umutex.h"
michael@0 53 #include "utrie.h"
michael@0 54
michael@0 55 U_NAMESPACE_BEGIN
michael@0 56
michael@0 57 /*
michael@0 58 * The following structs map exactly onto the raw data from ICU common data file.
michael@0 59 */
michael@0 60 struct RBBIDataHeader {
michael@0 61 uint32_t fMagic; /* == 0xbla0 */
michael@0 62 uint8_t fFormatVersion[4]; /* Data Format. Same as the value in struct UDataInfo */
michael@0 63 /* if there is one associated with this data. */
michael@0 64 /* (version originates in rbbi, is copied to UDataInfo) */
michael@0 65 /* For ICU 3.2 and earlier, this field was */
michael@0 66 /* uint32_t fVersion */
michael@0 67 /* with a value of 1. */
michael@0 68 uint32_t fLength; /* Total length in bytes of this RBBI Data, */
michael@0 69 /* including all sections, not just the header. */
michael@0 70 uint32_t fCatCount; /* Number of character categories. */
michael@0 71
michael@0 72 /* */
michael@0 73 /* Offsets and sizes of each of the subsections within the RBBI data. */
michael@0 74 /* All offsets are bytes from the start of the RBBIDataHeader. */
michael@0 75 /* All sizes are in bytes. */
michael@0 76 /* */
michael@0 77 uint32_t fFTable; /* forward state transition table. */
michael@0 78 uint32_t fFTableLen;
michael@0 79 uint32_t fRTable; /* Offset to the reverse state transition table. */
michael@0 80 uint32_t fRTableLen;
michael@0 81 uint32_t fSFTable; /* safe point forward transition table */
michael@0 82 uint32_t fSFTableLen;
michael@0 83 uint32_t fSRTable; /* safe point reverse transition table */
michael@0 84 uint32_t fSRTableLen;
michael@0 85 uint32_t fTrie; /* Offset to Trie data for character categories */
michael@0 86 uint32_t fTrieLen;
michael@0 87 uint32_t fRuleSource; /* Offset to the source for for the break */
michael@0 88 uint32_t fRuleSourceLen; /* rules. Stored UChar *. */
michael@0 89 uint32_t fStatusTable; /* Offset to the table of rule status values */
michael@0 90 uint32_t fStatusTableLen;
michael@0 91
michael@0 92 uint32_t fReserved[6]; /* Reserved for expansion */
michael@0 93
michael@0 94 };
michael@0 95
michael@0 96
michael@0 97
michael@0 98 struct RBBIStateTableRow {
michael@0 99 int16_t fAccepting; /* Non-zero if this row is for an accepting state. */
michael@0 100 /* Value 0: not an accepting state. */
michael@0 101 /* -1: Unconditional Accepting state. */
michael@0 102 /* positive: Look-ahead match has completed. */
michael@0 103 /* Actual boundary position happened earlier */
michael@0 104 /* Value here == fLookAhead in earlier */
michael@0 105 /* state, at actual boundary pos. */
michael@0 106 int16_t fLookAhead; /* Non-zero if this row is for a state that */
michael@0 107 /* corresponds to a '/' in the rule source. */
michael@0 108 /* Value is the same as the fAccepting */
michael@0 109 /* value for the rule (which will appear */
michael@0 110 /* in a different state. */
michael@0 111 int16_t fTagIdx; /* Non-zero if this row covers a {tagged} position */
michael@0 112 /* from a rule. Value is the index in the */
michael@0 113 /* StatusTable of the set of matching */
michael@0 114 /* tags (rule status values) */
michael@0 115 int16_t fReserved;
michael@0 116 uint16_t fNextState[2]; /* Next State, indexed by char category. */
michael@0 117 /* This array does not have two elements */
michael@0 118 /* Array Size is actually fData->fHeader->fCatCount */
michael@0 119 /* CAUTION: see RBBITableBuilder::getTableSize() */
michael@0 120 /* before changing anything here. */
michael@0 121 };
michael@0 122
michael@0 123
michael@0 124 struct RBBIStateTable {
michael@0 125 uint32_t fNumStates; /* Number of states. */
michael@0 126 uint32_t fRowLen; /* Length of a state table row, in bytes. */
michael@0 127 uint32_t fFlags; /* Option Flags for this state table */
michael@0 128 uint32_t fReserved; /* reserved */
michael@0 129 char fTableData[4]; /* First RBBIStateTableRow begins here. */
michael@0 130 /* (making it char[] simplifies ugly address */
michael@0 131 /* arithmetic for indexing variable length rows.) */
michael@0 132 };
michael@0 133
michael@0 134 typedef enum {
michael@0 135 RBBI_LOOKAHEAD_HARD_BREAK = 1,
michael@0 136 RBBI_BOF_REQUIRED = 2
michael@0 137 } RBBIStateTableFlags;
michael@0 138
michael@0 139
michael@0 140 /* */
michael@0 141 /* The reference counting wrapper class */
michael@0 142 /* */
michael@0 143 class RBBIDataWrapper : public UMemory {
michael@0 144 public:
michael@0 145 enum EDontAdopt {
michael@0 146 kDontAdopt
michael@0 147 };
michael@0 148 RBBIDataWrapper(const RBBIDataHeader *data, UErrorCode &status);
michael@0 149 RBBIDataWrapper(const RBBIDataHeader *data, enum EDontAdopt dontAdopt, UErrorCode &status);
michael@0 150 RBBIDataWrapper(UDataMemory* udm, UErrorCode &status);
michael@0 151 ~RBBIDataWrapper();
michael@0 152
michael@0 153 void init(const RBBIDataHeader *data, UErrorCode &status);
michael@0 154 RBBIDataWrapper *addReference();
michael@0 155 void removeReference();
michael@0 156 UBool operator ==(const RBBIDataWrapper &other) const;
michael@0 157 int32_t hashCode();
michael@0 158 const UnicodeString &getRuleSourceString() const;
michael@0 159 #ifdef RBBI_DEBUG
michael@0 160 void printData();
michael@0 161 void printTable(const char *heading, const RBBIStateTable *table);
michael@0 162 #else
michael@0 163 #define printData()
michael@0 164 #define printTable(heading, table)
michael@0 165 #endif
michael@0 166
michael@0 167 /* */
michael@0 168 /* Pointers to items within the data */
michael@0 169 /* */
michael@0 170 const RBBIDataHeader *fHeader;
michael@0 171 const RBBIStateTable *fForwardTable;
michael@0 172 const RBBIStateTable *fReverseTable;
michael@0 173 const RBBIStateTable *fSafeFwdTable;
michael@0 174 const RBBIStateTable *fSafeRevTable;
michael@0 175 const UChar *fRuleSource;
michael@0 176 const int32_t *fRuleStatusTable;
michael@0 177
michael@0 178 /* number of int32_t values in the rule status table. Used to sanity check indexing */
michael@0 179 int32_t fStatusMaxIdx;
michael@0 180
michael@0 181 UTrie fTrie;
michael@0 182
michael@0 183 private:
michael@0 184 u_atomic_int32_t fRefCount;
michael@0 185 UDataMemory *fUDataMem;
michael@0 186 UnicodeString fRuleString;
michael@0 187 UBool fDontFreeData;
michael@0 188
michael@0 189 RBBIDataWrapper(const RBBIDataWrapper &other); /* forbid copying of this class */
michael@0 190 RBBIDataWrapper &operator=(const RBBIDataWrapper &other); /* forbid copying of this class */
michael@0 191 };
michael@0 192
michael@0 193
michael@0 194
michael@0 195 U_NAMESPACE_END
michael@0 196
michael@0 197 #endif /* C++ */
michael@0 198
michael@0 199 #endif

mercurial