|
1 /* |
|
2 ******************************************************************************* |
|
3 * Copyright (C) 2010-2013, International Business Machines |
|
4 * Corporation and others. All Rights Reserved. |
|
5 ******************************************************************************* |
|
6 * file name: bytestriebuilder.h |
|
7 * encoding: US-ASCII |
|
8 * tab size: 8 (not used) |
|
9 * indentation:4 |
|
10 * |
|
11 * created on: 2010sep25 |
|
12 * created by: Markus W. Scherer |
|
13 */ |
|
14 |
|
15 /** |
|
16 * \file |
|
17 * \brief C++ API: Builder for icu::BytesTrie |
|
18 */ |
|
19 |
|
20 #ifndef __BYTESTRIEBUILDER_H__ |
|
21 #define __BYTESTRIEBUILDER_H__ |
|
22 |
|
23 #include "unicode/utypes.h" |
|
24 #include "unicode/bytestrie.h" |
|
25 #include "unicode/stringpiece.h" |
|
26 #include "unicode/stringtriebuilder.h" |
|
27 |
|
28 U_NAMESPACE_BEGIN |
|
29 |
|
30 class BytesTrieElement; |
|
31 class CharString; |
|
32 |
|
33 /** |
|
34 * Builder class for BytesTrie. |
|
35 * |
|
36 * This class is not intended for public subclassing. |
|
37 * @stable ICU 4.8 |
|
38 */ |
|
39 class U_COMMON_API BytesTrieBuilder : public StringTrieBuilder { |
|
40 public: |
|
41 /** |
|
42 * Constructs an empty builder. |
|
43 * @param errorCode Standard ICU error code. |
|
44 * @stable ICU 4.8 |
|
45 */ |
|
46 BytesTrieBuilder(UErrorCode &errorCode); |
|
47 |
|
48 /** |
|
49 * Destructor. |
|
50 * @stable ICU 4.8 |
|
51 */ |
|
52 virtual ~BytesTrieBuilder(); |
|
53 |
|
54 /** |
|
55 * Adds a (byte sequence, value) pair. |
|
56 * The byte sequence must be unique. |
|
57 * The bytes will be copied; the builder does not keep |
|
58 * a reference to the input StringPiece or its data(). |
|
59 * @param s The input byte sequence. |
|
60 * @param value The value associated with this byte sequence. |
|
61 * @param errorCode Standard ICU error code. Its input value must |
|
62 * pass the U_SUCCESS() test, or else the function returns |
|
63 * immediately. Check for U_FAILURE() on output or use with |
|
64 * function chaining. (See User Guide for details.) |
|
65 * @return *this |
|
66 * @stable ICU 4.8 |
|
67 */ |
|
68 BytesTrieBuilder &add(const StringPiece &s, int32_t value, UErrorCode &errorCode); |
|
69 |
|
70 /** |
|
71 * Builds a BytesTrie for the add()ed data. |
|
72 * Once built, no further data can be add()ed until clear() is called. |
|
73 * |
|
74 * A BytesTrie cannot be empty. At least one (byte sequence, value) pair |
|
75 * must have been add()ed. |
|
76 * |
|
77 * This method passes ownership of the builder's internal result array to the new trie object. |
|
78 * Another call to any build() variant will re-serialize the trie. |
|
79 * After clear() has been called, a new array will be used as well. |
|
80 * @param buildOption Build option, see UStringTrieBuildOption. |
|
81 * @param errorCode Standard ICU error code. Its input value must |
|
82 * pass the U_SUCCESS() test, or else the function returns |
|
83 * immediately. Check for U_FAILURE() on output or use with |
|
84 * function chaining. (See User Guide for details.) |
|
85 * @return A new BytesTrie for the add()ed data. |
|
86 * @stable ICU 4.8 |
|
87 */ |
|
88 BytesTrie *build(UStringTrieBuildOption buildOption, UErrorCode &errorCode); |
|
89 |
|
90 /** |
|
91 * Builds a BytesTrie for the add()ed data and byte-serializes it. |
|
92 * Once built, no further data can be add()ed until clear() is called. |
|
93 * |
|
94 * A BytesTrie cannot be empty. At least one (byte sequence, value) pair |
|
95 * must have been add()ed. |
|
96 * |
|
97 * Multiple calls to buildStringPiece() return StringPieces referring to the |
|
98 * builder's same byte array, without rebuilding. |
|
99 * If buildStringPiece() is called after build(), the trie will be |
|
100 * re-serialized into a new array. |
|
101 * If build() is called after buildStringPiece(), the trie object will become |
|
102 * the owner of the previously returned array. |
|
103 * After clear() has been called, a new array will be used as well. |
|
104 * @param buildOption Build option, see UStringTrieBuildOption. |
|
105 * @param errorCode Standard ICU error code. Its input value must |
|
106 * pass the U_SUCCESS() test, or else the function returns |
|
107 * immediately. Check for U_FAILURE() on output or use with |
|
108 * function chaining. (See User Guide for details.) |
|
109 * @return A StringPiece which refers to the byte-serialized BytesTrie for the add()ed data. |
|
110 * @stable ICU 4.8 |
|
111 */ |
|
112 StringPiece buildStringPiece(UStringTrieBuildOption buildOption, UErrorCode &errorCode); |
|
113 |
|
114 /** |
|
115 * Removes all (byte sequence, value) pairs. |
|
116 * New data can then be add()ed and a new trie can be built. |
|
117 * @return *this |
|
118 * @stable ICU 4.8 |
|
119 */ |
|
120 BytesTrieBuilder &clear(); |
|
121 |
|
122 private: |
|
123 BytesTrieBuilder(const BytesTrieBuilder &other); // no copy constructor |
|
124 BytesTrieBuilder &operator=(const BytesTrieBuilder &other); // no assignment operator |
|
125 |
|
126 void buildBytes(UStringTrieBuildOption buildOption, UErrorCode &errorCode); |
|
127 |
|
128 virtual int32_t getElementStringLength(int32_t i) const; |
|
129 virtual UChar getElementUnit(int32_t i, int32_t byteIndex) const; |
|
130 virtual int32_t getElementValue(int32_t i) const; |
|
131 |
|
132 virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t byteIndex) const; |
|
133 |
|
134 virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t byteIndex) const; |
|
135 virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t byteIndex, int32_t count) const; |
|
136 virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t byteIndex, UChar byte) const; |
|
137 |
|
138 virtual UBool matchNodesCanHaveValues() const { return FALSE; } |
|
139 |
|
140 virtual int32_t getMaxBranchLinearSubNodeLength() const { return BytesTrie::kMaxBranchLinearSubNodeLength; } |
|
141 virtual int32_t getMinLinearMatch() const { return BytesTrie::kMinLinearMatch; } |
|
142 virtual int32_t getMaxLinearMatchLength() const { return BytesTrie::kMaxLinearMatchLength; } |
|
143 |
|
144 #ifndef U_HIDE_INTERNAL_API |
|
145 /** |
|
146 * @internal |
|
147 */ |
|
148 class BTLinearMatchNode : public LinearMatchNode { |
|
149 public: |
|
150 BTLinearMatchNode(const char *units, int32_t len, Node *nextNode); |
|
151 virtual UBool operator==(const Node &other) const; |
|
152 virtual void write(StringTrieBuilder &builder); |
|
153 private: |
|
154 const char *s; |
|
155 }; |
|
156 #endif /* U_HIDE_INTERNAL_API */ |
|
157 |
|
158 virtual Node *createLinearMatchNode(int32_t i, int32_t byteIndex, int32_t length, |
|
159 Node *nextNode) const; |
|
160 |
|
161 UBool ensureCapacity(int32_t length); |
|
162 virtual int32_t write(int32_t byte); |
|
163 int32_t write(const char *b, int32_t length); |
|
164 virtual int32_t writeElementUnits(int32_t i, int32_t byteIndex, int32_t length); |
|
165 virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal); |
|
166 virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node); |
|
167 virtual int32_t writeDeltaTo(int32_t jumpTarget); |
|
168 |
|
169 CharString *strings; // Pointer not object so we need not #include internal charstr.h. |
|
170 BytesTrieElement *elements; |
|
171 int32_t elementsCapacity; |
|
172 int32_t elementsLength; |
|
173 |
|
174 // Byte serialization of the trie. |
|
175 // Grows from the back: bytesLength measures from the end of the buffer! |
|
176 char *bytes; |
|
177 int32_t bytesCapacity; |
|
178 int32_t bytesLength; |
|
179 }; |
|
180 |
|
181 U_NAMESPACE_END |
|
182 |
|
183 #endif // __BYTESTRIEBUILDER_H__ |