|
1 /* |
|
2 ******************************************************************************* |
|
3 * |
|
4 * Copyright (C) 2002-2010, International Business Machines |
|
5 * Corporation and others. All Rights Reserved. |
|
6 * |
|
7 ******************************************************************************* |
|
8 * file name: propsvec.h |
|
9 * encoding: US-ASCII |
|
10 * tab size: 8 (not used) |
|
11 * indentation:4 |
|
12 * |
|
13 * created on: 2002feb22 |
|
14 * created by: Markus W. Scherer |
|
15 * |
|
16 * Store bits (Unicode character properties) in bit set vectors. |
|
17 */ |
|
18 |
|
19 #ifndef __UPROPSVEC_H__ |
|
20 #define __UPROPSVEC_H__ |
|
21 |
|
22 #include "unicode/utypes.h" |
|
23 #include "utrie.h" |
|
24 #include "utrie2.h" |
|
25 |
|
26 U_CDECL_BEGIN |
|
27 |
|
28 /** |
|
29 * Unicode Properties Vectors associated with code point ranges. |
|
30 * |
|
31 * Rows of uint32_t integers in a contiguous array store |
|
32 * the range limits and the properties vectors. |
|
33 * |
|
34 * Logically, each row has a certain number of uint32_t values, |
|
35 * which is set via the upvec_open() "columns" parameter. |
|
36 * |
|
37 * Internally, two additional columns are stored. |
|
38 * In each internal row, |
|
39 * row[0] contains the start code point and |
|
40 * row[1] contains the limit code point, |
|
41 * which is the start of the next range. |
|
42 * |
|
43 * Initially, there is only one "normal" row for |
|
44 * range [0..0x110000[ with values 0. |
|
45 * There are additional rows for special purposes, see UPVEC_FIRST_SPECIAL_CP. |
|
46 * |
|
47 * It would be possible to store only one range boundary per row, |
|
48 * but self-contained rows allow to later sort them by contents. |
|
49 */ |
|
50 struct UPropsVectors; |
|
51 typedef struct UPropsVectors UPropsVectors; |
|
52 |
|
53 /* |
|
54 * Special pseudo code points for storing the initialValue and the errorValue, |
|
55 * which are used to initialize a UTrie2 or similar. |
|
56 */ |
|
57 #define UPVEC_FIRST_SPECIAL_CP 0x110000 |
|
58 #define UPVEC_INITIAL_VALUE_CP 0x110000 |
|
59 #define UPVEC_ERROR_VALUE_CP 0x110001 |
|
60 #define UPVEC_MAX_CP 0x110001 |
|
61 |
|
62 /* |
|
63 * Special pseudo code point used in upvec_compact() signalling the end of |
|
64 * delivering special values and the beginning of delivering real ones. |
|
65 * Stable value, unlike UPVEC_MAX_CP which might grow over time. |
|
66 */ |
|
67 #define UPVEC_START_REAL_VALUES_CP 0x200000 |
|
68 |
|
69 /* |
|
70 * Open a UPropsVectors object. |
|
71 * @param columns Number of value integers (uint32_t) per row. |
|
72 */ |
|
73 U_CAPI UPropsVectors * U_EXPORT2 |
|
74 upvec_open(int32_t columns, UErrorCode *pErrorCode); |
|
75 |
|
76 U_CAPI void U_EXPORT2 |
|
77 upvec_close(UPropsVectors *pv); |
|
78 |
|
79 /* |
|
80 * In rows for code points [start..end], select the column, |
|
81 * reset the mask bits and set the value bits (ANDed with the mask). |
|
82 * |
|
83 * Will set U_NO_WRITE_PERMISSION if called after upvec_compact(). |
|
84 */ |
|
85 U_CAPI void U_EXPORT2 |
|
86 upvec_setValue(UPropsVectors *pv, |
|
87 UChar32 start, UChar32 end, |
|
88 int32_t column, |
|
89 uint32_t value, uint32_t mask, |
|
90 UErrorCode *pErrorCode); |
|
91 |
|
92 /* |
|
93 * Logically const but must not be used on the same pv concurrently! |
|
94 * Always returns 0 if called after upvec_compact(). |
|
95 */ |
|
96 U_CAPI uint32_t U_EXPORT2 |
|
97 upvec_getValue(const UPropsVectors *pv, UChar32 c, int32_t column); |
|
98 |
|
99 /* |
|
100 * pRangeStart and pRangeEnd can be NULL. |
|
101 * @return NULL if rowIndex out of range and for illegal arguments, |
|
102 * or if called after upvec_compact() |
|
103 */ |
|
104 U_CAPI uint32_t * U_EXPORT2 |
|
105 upvec_getRow(const UPropsVectors *pv, int32_t rowIndex, |
|
106 UChar32 *pRangeStart, UChar32 *pRangeEnd); |
|
107 |
|
108 /* |
|
109 * Compact the vectors: |
|
110 * - modify the memory |
|
111 * - keep only unique vectors |
|
112 * - store them contiguously from the beginning of the memory |
|
113 * - for each (non-unique) row, call the handler function |
|
114 * |
|
115 * The handler's rowIndex is the index of the row in the compacted |
|
116 * memory block. |
|
117 * (Therefore, it starts at 0 increases in increments of the columns value.) |
|
118 * |
|
119 * In a first phase, only special values are delivered (each exactly once), |
|
120 * with start==end both equalling a special pseudo code point. |
|
121 * Then the handler is called once more with start==end==UPVEC_START_REAL_VALUES_CP |
|
122 * where rowIndex is the length of the compacted array, |
|
123 * and the row is arbitrary (but not NULL). |
|
124 * Then, in the second phase, the handler is called for each row of real values. |
|
125 */ |
|
126 typedef void U_CALLCONV |
|
127 UPVecCompactHandler(void *context, |
|
128 UChar32 start, UChar32 end, |
|
129 int32_t rowIndex, uint32_t *row, int32_t columns, |
|
130 UErrorCode *pErrorCode); |
|
131 |
|
132 U_CAPI void U_EXPORT2 |
|
133 upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode); |
|
134 |
|
135 /* |
|
136 * Get the vectors array after calling upvec_compact(). |
|
137 * The caller must not modify nor release the returned array. |
|
138 * Returns NULL if called before upvec_compact(). |
|
139 */ |
|
140 U_CAPI const uint32_t * U_EXPORT2 |
|
141 upvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns); |
|
142 |
|
143 /* |
|
144 * Get a clone of the vectors array after calling upvec_compact(). |
|
145 * The caller owns the returned array and must uprv_free() it. |
|
146 * Returns NULL if called before upvec_compact(). |
|
147 */ |
|
148 U_CAPI uint32_t * U_EXPORT2 |
|
149 upvec_cloneArray(const UPropsVectors *pv, |
|
150 int32_t *pRows, int32_t *pColumns, UErrorCode *pErrorCode); |
|
151 |
|
152 /* |
|
153 * Call upvec_compact(), create a 16-bit UTrie2 with indexes into the compacted |
|
154 * vectors array, and freeze the trie. |
|
155 */ |
|
156 U_CAPI UTrie2 * U_EXPORT2 |
|
157 upvec_compactToUTrie2WithRowIndexes(UPropsVectors *pv, UErrorCode *pErrorCode); |
|
158 |
|
159 struct UPVecToUTrie2Context { |
|
160 UTrie2 *trie; |
|
161 int32_t initialValue; |
|
162 int32_t errorValue; |
|
163 int32_t maxValue; |
|
164 }; |
|
165 typedef struct UPVecToUTrie2Context UPVecToUTrie2Context; |
|
166 |
|
167 /* context=UPVecToUTrie2Context, creates the trie and stores the rowIndex values */ |
|
168 U_CAPI void U_CALLCONV |
|
169 upvec_compactToUTrie2Handler(void *context, |
|
170 UChar32 start, UChar32 end, |
|
171 int32_t rowIndex, uint32_t *row, int32_t columns, |
|
172 UErrorCode *pErrorCode); |
|
173 |
|
174 U_CDECL_END |
|
175 |
|
176 #endif |