1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/rbbidata.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,199 @@ 1.4 +/* 1.5 +******************************************************************************* 1.6 +* 1.7 +* Copyright (C) 1999-2013 International Business Machines 1.8 +* Corporation and others. All Rights Reserved. 1.9 +* 1.10 +******************************************************************************* 1.11 +* file name: rbbidata.h 1.12 +* encoding: US-ASCII 1.13 +* tab size: 8 (not used) 1.14 +* indentation:4 1.15 +* 1.16 +* RBBI data formats Includes 1.17 +* 1.18 +* Structs that describes the format of the Binary RBBI data, 1.19 +* as it is stored in ICU's data file. 1.20 +* 1.21 +* RBBIDataWrapper - Instances of this class sit between the 1.22 +* raw data structs and the RulesBasedBreakIterator objects 1.23 +* that are created by applications. The wrapper class 1.24 +* provides reference counting for the underlying data, 1.25 +* and direct pointers to data that would not otherwise 1.26 +* be accessible without ugly pointer arithmetic. The 1.27 +* wrapper does not attempt to provide any higher level 1.28 +* abstractions for the data itself. 1.29 +* 1.30 +* There will be only one instance of RBBIDataWrapper for any 1.31 +* set of RBBI run time data being shared by instances 1.32 +* (clones) of RulesBasedBreakIterator. 1.33 +*/ 1.34 + 1.35 +#ifndef __RBBIDATA_H__ 1.36 +#define __RBBIDATA_H__ 1.37 + 1.38 +#include "unicode/utypes.h" 1.39 +#include "unicode/udata.h" 1.40 +#include "udataswp.h" 1.41 + 1.42 +/** 1.43 + * Swap RBBI data. See udataswp.h. 1.44 + * @internal 1.45 + */ 1.46 +U_CAPI int32_t U_EXPORT2 1.47 +ubrk_swap(const UDataSwapper *ds, 1.48 + const void *inData, int32_t length, void *outData, 1.49 + UErrorCode *pErrorCode); 1.50 + 1.51 +#ifdef __cplusplus 1.52 + 1.53 +#include "unicode/uobject.h" 1.54 +#include "unicode/unistr.h" 1.55 +#include "umutex.h" 1.56 +#include "utrie.h" 1.57 + 1.58 +U_NAMESPACE_BEGIN 1.59 + 1.60 +/* 1.61 + * The following structs map exactly onto the raw data from ICU common data file. 1.62 + */ 1.63 +struct RBBIDataHeader { 1.64 + uint32_t fMagic; /* == 0xbla0 */ 1.65 + uint8_t fFormatVersion[4]; /* Data Format. Same as the value in struct UDataInfo */ 1.66 + /* if there is one associated with this data. */ 1.67 + /* (version originates in rbbi, is copied to UDataInfo) */ 1.68 + /* For ICU 3.2 and earlier, this field was */ 1.69 + /* uint32_t fVersion */ 1.70 + /* with a value of 1. */ 1.71 + uint32_t fLength; /* Total length in bytes of this RBBI Data, */ 1.72 + /* including all sections, not just the header. */ 1.73 + uint32_t fCatCount; /* Number of character categories. */ 1.74 + 1.75 + /* */ 1.76 + /* Offsets and sizes of each of the subsections within the RBBI data. */ 1.77 + /* All offsets are bytes from the start of the RBBIDataHeader. */ 1.78 + /* All sizes are in bytes. */ 1.79 + /* */ 1.80 + uint32_t fFTable; /* forward state transition table. */ 1.81 + uint32_t fFTableLen; 1.82 + uint32_t fRTable; /* Offset to the reverse state transition table. */ 1.83 + uint32_t fRTableLen; 1.84 + uint32_t fSFTable; /* safe point forward transition table */ 1.85 + uint32_t fSFTableLen; 1.86 + uint32_t fSRTable; /* safe point reverse transition table */ 1.87 + uint32_t fSRTableLen; 1.88 + uint32_t fTrie; /* Offset to Trie data for character categories */ 1.89 + uint32_t fTrieLen; 1.90 + uint32_t fRuleSource; /* Offset to the source for for the break */ 1.91 + uint32_t fRuleSourceLen; /* rules. Stored UChar *. */ 1.92 + uint32_t fStatusTable; /* Offset to the table of rule status values */ 1.93 + uint32_t fStatusTableLen; 1.94 + 1.95 + uint32_t fReserved[6]; /* Reserved for expansion */ 1.96 + 1.97 +}; 1.98 + 1.99 + 1.100 + 1.101 +struct RBBIStateTableRow { 1.102 + int16_t fAccepting; /* Non-zero if this row is for an accepting state. */ 1.103 + /* Value 0: not an accepting state. */ 1.104 + /* -1: Unconditional Accepting state. */ 1.105 + /* positive: Look-ahead match has completed. */ 1.106 + /* Actual boundary position happened earlier */ 1.107 + /* Value here == fLookAhead in earlier */ 1.108 + /* state, at actual boundary pos. */ 1.109 + int16_t fLookAhead; /* Non-zero if this row is for a state that */ 1.110 + /* corresponds to a '/' in the rule source. */ 1.111 + /* Value is the same as the fAccepting */ 1.112 + /* value for the rule (which will appear */ 1.113 + /* in a different state. */ 1.114 + int16_t fTagIdx; /* Non-zero if this row covers a {tagged} position */ 1.115 + /* from a rule. Value is the index in the */ 1.116 + /* StatusTable of the set of matching */ 1.117 + /* tags (rule status values) */ 1.118 + int16_t fReserved; 1.119 + uint16_t fNextState[2]; /* Next State, indexed by char category. */ 1.120 + /* This array does not have two elements */ 1.121 + /* Array Size is actually fData->fHeader->fCatCount */ 1.122 + /* CAUTION: see RBBITableBuilder::getTableSize() */ 1.123 + /* before changing anything here. */ 1.124 +}; 1.125 + 1.126 + 1.127 +struct RBBIStateTable { 1.128 + uint32_t fNumStates; /* Number of states. */ 1.129 + uint32_t fRowLen; /* Length of a state table row, in bytes. */ 1.130 + uint32_t fFlags; /* Option Flags for this state table */ 1.131 + uint32_t fReserved; /* reserved */ 1.132 + char fTableData[4]; /* First RBBIStateTableRow begins here. */ 1.133 + /* (making it char[] simplifies ugly address */ 1.134 + /* arithmetic for indexing variable length rows.) */ 1.135 +}; 1.136 + 1.137 +typedef enum { 1.138 + RBBI_LOOKAHEAD_HARD_BREAK = 1, 1.139 + RBBI_BOF_REQUIRED = 2 1.140 +} RBBIStateTableFlags; 1.141 + 1.142 + 1.143 +/* */ 1.144 +/* The reference counting wrapper class */ 1.145 +/* */ 1.146 +class RBBIDataWrapper : public UMemory { 1.147 +public: 1.148 + enum EDontAdopt { 1.149 + kDontAdopt 1.150 + }; 1.151 + RBBIDataWrapper(const RBBIDataHeader *data, UErrorCode &status); 1.152 + RBBIDataWrapper(const RBBIDataHeader *data, enum EDontAdopt dontAdopt, UErrorCode &status); 1.153 + RBBIDataWrapper(UDataMemory* udm, UErrorCode &status); 1.154 + ~RBBIDataWrapper(); 1.155 + 1.156 + void init(const RBBIDataHeader *data, UErrorCode &status); 1.157 + RBBIDataWrapper *addReference(); 1.158 + void removeReference(); 1.159 + UBool operator ==(const RBBIDataWrapper &other) const; 1.160 + int32_t hashCode(); 1.161 + const UnicodeString &getRuleSourceString() const; 1.162 +#ifdef RBBI_DEBUG 1.163 + void printData(); 1.164 + void printTable(const char *heading, const RBBIStateTable *table); 1.165 +#else 1.166 + #define printData() 1.167 + #define printTable(heading, table) 1.168 +#endif 1.169 + 1.170 + /* */ 1.171 + /* Pointers to items within the data */ 1.172 + /* */ 1.173 + const RBBIDataHeader *fHeader; 1.174 + const RBBIStateTable *fForwardTable; 1.175 + const RBBIStateTable *fReverseTable; 1.176 + const RBBIStateTable *fSafeFwdTable; 1.177 + const RBBIStateTable *fSafeRevTable; 1.178 + const UChar *fRuleSource; 1.179 + const int32_t *fRuleStatusTable; 1.180 + 1.181 + /* number of int32_t values in the rule status table. Used to sanity check indexing */ 1.182 + int32_t fStatusMaxIdx; 1.183 + 1.184 + UTrie fTrie; 1.185 + 1.186 +private: 1.187 + u_atomic_int32_t fRefCount; 1.188 + UDataMemory *fUDataMem; 1.189 + UnicodeString fRuleString; 1.190 + UBool fDontFreeData; 1.191 + 1.192 + RBBIDataWrapper(const RBBIDataWrapper &other); /* forbid copying of this class */ 1.193 + RBBIDataWrapper &operator=(const RBBIDataWrapper &other); /* forbid copying of this class */ 1.194 +}; 1.195 + 1.196 + 1.197 + 1.198 +U_NAMESPACE_END 1.199 + 1.200 +#endif /* C++ */ 1.201 + 1.202 +#endif