|
1 /* |
|
2 ****************************************************************************** |
|
3 * |
|
4 * Copyright (C) 2001-2013, International Business Machines |
|
5 * Corporation and others. All Rights Reserved. |
|
6 * |
|
7 ****************************************************************************** |
|
8 * file name: utrie2.cpp |
|
9 * encoding: US-ASCII |
|
10 * tab size: 8 (not used) |
|
11 * indentation:4 |
|
12 * |
|
13 * created on: 2008aug16 (starting from a copy of utrie.c) |
|
14 * created by: Markus W. Scherer |
|
15 * |
|
16 * This is a common implementation of a Unicode trie. |
|
17 * It is a kind of compressed, serializable table of 16- or 32-bit values associated with |
|
18 * Unicode code points (0..0x10ffff). |
|
19 * This is the second common version of a Unicode trie (hence the name UTrie2). |
|
20 * See utrie2.h for a comparison. |
|
21 * |
|
22 * This file contains only the runtime and enumeration code, for read-only access. |
|
23 * See utrie2_builder.c for the builder code. |
|
24 */ |
|
25 #ifdef UTRIE2_DEBUG |
|
26 # include <stdio.h> |
|
27 #endif |
|
28 |
|
29 #include "unicode/utypes.h" |
|
30 #include "unicode/utf.h" |
|
31 #include "unicode/utf8.h" |
|
32 #include "unicode/utf16.h" |
|
33 #include "cmemory.h" |
|
34 #include "utrie2.h" |
|
35 #include "utrie2_impl.h" |
|
36 #include "uassert.h" |
|
37 |
|
38 /* Public UTrie2 API implementation ----------------------------------------- */ |
|
39 |
|
40 static uint32_t |
|
41 get32(const UNewTrie2 *trie, UChar32 c, UBool fromLSCP) { |
|
42 int32_t i2, block; |
|
43 |
|
44 if(c>=trie->highStart && (!U_IS_LEAD(c) || fromLSCP)) { |
|
45 return trie->data[trie->dataLength-UTRIE2_DATA_GRANULARITY]; |
|
46 } |
|
47 |
|
48 if(U_IS_LEAD(c) && fromLSCP) { |
|
49 i2=(UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2))+ |
|
50 (c>>UTRIE2_SHIFT_2); |
|
51 } else { |
|
52 i2=trie->index1[c>>UTRIE2_SHIFT_1]+ |
|
53 ((c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK); |
|
54 } |
|
55 block=trie->index2[i2]; |
|
56 return trie->data[block+(c&UTRIE2_DATA_MASK)]; |
|
57 } |
|
58 |
|
59 U_CAPI uint32_t U_EXPORT2 |
|
60 utrie2_get32(const UTrie2 *trie, UChar32 c) { |
|
61 if(trie->data16!=NULL) { |
|
62 return UTRIE2_GET16(trie, c); |
|
63 } else if(trie->data32!=NULL) { |
|
64 return UTRIE2_GET32(trie, c); |
|
65 } else if((uint32_t)c>0x10ffff) { |
|
66 return trie->errorValue; |
|
67 } else { |
|
68 return get32(trie->newTrie, c, TRUE); |
|
69 } |
|
70 } |
|
71 |
|
72 U_CAPI uint32_t U_EXPORT2 |
|
73 utrie2_get32FromLeadSurrogateCodeUnit(const UTrie2 *trie, UChar32 c) { |
|
74 if(!U_IS_LEAD(c)) { |
|
75 return trie->errorValue; |
|
76 } |
|
77 if(trie->data16!=NULL) { |
|
78 return UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, c); |
|
79 } else if(trie->data32!=NULL) { |
|
80 return UTRIE2_GET32_FROM_U16_SINGLE_LEAD(trie, c); |
|
81 } else { |
|
82 return get32(trie->newTrie, c, FALSE); |
|
83 } |
|
84 } |
|
85 |
|
86 static inline int32_t |
|
87 u8Index(const UTrie2 *trie, UChar32 c, int32_t i) { |
|
88 int32_t idx= |
|
89 _UTRIE2_INDEX_FROM_CP( |
|
90 trie, |
|
91 trie->data32==NULL ? trie->indexLength : 0, |
|
92 c); |
|
93 return (idx<<3)|i; |
|
94 } |
|
95 |
|
96 U_CAPI int32_t U_EXPORT2 |
|
97 utrie2_internalU8NextIndex(const UTrie2 *trie, UChar32 c, |
|
98 const uint8_t *src, const uint8_t *limit) { |
|
99 int32_t i, length; |
|
100 i=0; |
|
101 /* support 64-bit pointers by avoiding cast of arbitrary difference */ |
|
102 if((limit-src)<=7) { |
|
103 length=(int32_t)(limit-src); |
|
104 } else { |
|
105 length=7; |
|
106 } |
|
107 c=utf8_nextCharSafeBody(src, &i, length, c, -1); |
|
108 return u8Index(trie, c, i); |
|
109 } |
|
110 |
|
111 U_CAPI int32_t U_EXPORT2 |
|
112 utrie2_internalU8PrevIndex(const UTrie2 *trie, UChar32 c, |
|
113 const uint8_t *start, const uint8_t *src) { |
|
114 int32_t i, length; |
|
115 /* support 64-bit pointers by avoiding cast of arbitrary difference */ |
|
116 if((src-start)<=7) { |
|
117 i=length=(int32_t)(src-start); |
|
118 } else { |
|
119 i=length=7; |
|
120 start=src-7; |
|
121 } |
|
122 c=utf8_prevCharSafeBody(start, 0, &i, c, -1); |
|
123 i=length-i; /* number of bytes read backward from src */ |
|
124 return u8Index(trie, c, i); |
|
125 } |
|
126 |
|
127 U_CAPI UTrie2 * U_EXPORT2 |
|
128 utrie2_openFromSerialized(UTrie2ValueBits valueBits, |
|
129 const void *data, int32_t length, int32_t *pActualLength, |
|
130 UErrorCode *pErrorCode) { |
|
131 const UTrie2Header *header; |
|
132 const uint16_t *p16; |
|
133 int32_t actualLength; |
|
134 |
|
135 UTrie2 tempTrie; |
|
136 UTrie2 *trie; |
|
137 |
|
138 if(U_FAILURE(*pErrorCode)) { |
|
139 return 0; |
|
140 } |
|
141 |
|
142 if( length<=0 || (U_POINTER_MASK_LSB(data, 3)!=0) || |
|
143 valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits |
|
144 ) { |
|
145 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
|
146 return 0; |
|
147 } |
|
148 |
|
149 /* enough data for a trie header? */ |
|
150 if(length<(int32_t)sizeof(UTrie2Header)) { |
|
151 *pErrorCode=U_INVALID_FORMAT_ERROR; |
|
152 return 0; |
|
153 } |
|
154 |
|
155 /* check the signature */ |
|
156 header=(const UTrie2Header *)data; |
|
157 if(header->signature!=UTRIE2_SIG) { |
|
158 *pErrorCode=U_INVALID_FORMAT_ERROR; |
|
159 return 0; |
|
160 } |
|
161 |
|
162 /* get the options */ |
|
163 if(valueBits!=(UTrie2ValueBits)(header->options&UTRIE2_OPTIONS_VALUE_BITS_MASK)) { |
|
164 *pErrorCode=U_INVALID_FORMAT_ERROR; |
|
165 return 0; |
|
166 } |
|
167 |
|
168 /* get the length values and offsets */ |
|
169 uprv_memset(&tempTrie, 0, sizeof(tempTrie)); |
|
170 tempTrie.indexLength=header->indexLength; |
|
171 tempTrie.dataLength=header->shiftedDataLength<<UTRIE2_INDEX_SHIFT; |
|
172 tempTrie.index2NullOffset=header->index2NullOffset; |
|
173 tempTrie.dataNullOffset=header->dataNullOffset; |
|
174 |
|
175 tempTrie.highStart=header->shiftedHighStart<<UTRIE2_SHIFT_1; |
|
176 tempTrie.highValueIndex=tempTrie.dataLength-UTRIE2_DATA_GRANULARITY; |
|
177 if(valueBits==UTRIE2_16_VALUE_BITS) { |
|
178 tempTrie.highValueIndex+=tempTrie.indexLength; |
|
179 } |
|
180 |
|
181 /* calculate the actual length */ |
|
182 actualLength=(int32_t)sizeof(UTrie2Header)+tempTrie.indexLength*2; |
|
183 if(valueBits==UTRIE2_16_VALUE_BITS) { |
|
184 actualLength+=tempTrie.dataLength*2; |
|
185 } else { |
|
186 actualLength+=tempTrie.dataLength*4; |
|
187 } |
|
188 if(length<actualLength) { |
|
189 *pErrorCode=U_INVALID_FORMAT_ERROR; /* not enough bytes */ |
|
190 return 0; |
|
191 } |
|
192 |
|
193 /* allocate the trie */ |
|
194 trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2)); |
|
195 if(trie==NULL) { |
|
196 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
|
197 return 0; |
|
198 } |
|
199 uprv_memcpy(trie, &tempTrie, sizeof(tempTrie)); |
|
200 trie->memory=(uint32_t *)data; |
|
201 trie->length=actualLength; |
|
202 trie->isMemoryOwned=FALSE; |
|
203 |
|
204 /* set the pointers to its index and data arrays */ |
|
205 p16=(const uint16_t *)(header+1); |
|
206 trie->index=p16; |
|
207 p16+=trie->indexLength; |
|
208 |
|
209 /* get the data */ |
|
210 switch(valueBits) { |
|
211 case UTRIE2_16_VALUE_BITS: |
|
212 trie->data16=p16; |
|
213 trie->data32=NULL; |
|
214 trie->initialValue=trie->index[trie->dataNullOffset]; |
|
215 trie->errorValue=trie->data16[UTRIE2_BAD_UTF8_DATA_OFFSET]; |
|
216 break; |
|
217 case UTRIE2_32_VALUE_BITS: |
|
218 trie->data16=NULL; |
|
219 trie->data32=(const uint32_t *)p16; |
|
220 trie->initialValue=trie->data32[trie->dataNullOffset]; |
|
221 trie->errorValue=trie->data32[UTRIE2_BAD_UTF8_DATA_OFFSET]; |
|
222 break; |
|
223 default: |
|
224 *pErrorCode=U_INVALID_FORMAT_ERROR; |
|
225 return 0; |
|
226 } |
|
227 |
|
228 if(pActualLength!=NULL) { |
|
229 *pActualLength=actualLength; |
|
230 } |
|
231 return trie; |
|
232 } |
|
233 |
|
234 U_CAPI UTrie2 * U_EXPORT2 |
|
235 utrie2_openDummy(UTrie2ValueBits valueBits, |
|
236 uint32_t initialValue, uint32_t errorValue, |
|
237 UErrorCode *pErrorCode) { |
|
238 UTrie2 *trie; |
|
239 UTrie2Header *header; |
|
240 uint32_t *p; |
|
241 uint16_t *dest16; |
|
242 int32_t indexLength, dataLength, length, i; |
|
243 int32_t dataMove; /* >0 if the data is moved to the end of the index array */ |
|
244 |
|
245 if(U_FAILURE(*pErrorCode)) { |
|
246 return 0; |
|
247 } |
|
248 |
|
249 if(valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits) { |
|
250 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
|
251 return 0; |
|
252 } |
|
253 |
|
254 /* calculate the total length of the dummy trie data */ |
|
255 indexLength=UTRIE2_INDEX_1_OFFSET; |
|
256 dataLength=UTRIE2_DATA_START_OFFSET+UTRIE2_DATA_GRANULARITY; |
|
257 length=(int32_t)sizeof(UTrie2Header)+indexLength*2; |
|
258 if(valueBits==UTRIE2_16_VALUE_BITS) { |
|
259 length+=dataLength*2; |
|
260 } else { |
|
261 length+=dataLength*4; |
|
262 } |
|
263 |
|
264 /* allocate the trie */ |
|
265 trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2)); |
|
266 if(trie==NULL) { |
|
267 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
|
268 return 0; |
|
269 } |
|
270 uprv_memset(trie, 0, sizeof(UTrie2)); |
|
271 trie->memory=uprv_malloc(length); |
|
272 if(trie->memory==NULL) { |
|
273 uprv_free(trie); |
|
274 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
|
275 return 0; |
|
276 } |
|
277 trie->length=length; |
|
278 trie->isMemoryOwned=TRUE; |
|
279 |
|
280 /* set the UTrie2 fields */ |
|
281 if(valueBits==UTRIE2_16_VALUE_BITS) { |
|
282 dataMove=indexLength; |
|
283 } else { |
|
284 dataMove=0; |
|
285 } |
|
286 |
|
287 trie->indexLength=indexLength; |
|
288 trie->dataLength=dataLength; |
|
289 trie->index2NullOffset=UTRIE2_INDEX_2_OFFSET; |
|
290 trie->dataNullOffset=(uint16_t)dataMove; |
|
291 trie->initialValue=initialValue; |
|
292 trie->errorValue=errorValue; |
|
293 trie->highStart=0; |
|
294 trie->highValueIndex=dataMove+UTRIE2_DATA_START_OFFSET; |
|
295 |
|
296 /* set the header fields */ |
|
297 header=(UTrie2Header *)trie->memory; |
|
298 |
|
299 header->signature=UTRIE2_SIG; /* "Tri2" */ |
|
300 header->options=(uint16_t)valueBits; |
|
301 |
|
302 header->indexLength=(uint16_t)indexLength; |
|
303 header->shiftedDataLength=(uint16_t)(dataLength>>UTRIE2_INDEX_SHIFT); |
|
304 header->index2NullOffset=(uint16_t)UTRIE2_INDEX_2_OFFSET; |
|
305 header->dataNullOffset=(uint16_t)dataMove; |
|
306 header->shiftedHighStart=0; |
|
307 |
|
308 /* fill the index and data arrays */ |
|
309 dest16=(uint16_t *)(header+1); |
|
310 trie->index=dest16; |
|
311 |
|
312 /* write the index-2 array values shifted right by UTRIE2_INDEX_SHIFT */ |
|
313 for(i=0; i<UTRIE2_INDEX_2_BMP_LENGTH; ++i) { |
|
314 *dest16++=(uint16_t)(dataMove>>UTRIE2_INDEX_SHIFT); /* null data block */ |
|
315 } |
|
316 |
|
317 /* write UTF-8 2-byte index-2 values, not right-shifted */ |
|
318 for(i=0; i<(0xc2-0xc0); ++i) { /* C0..C1 */ |
|
319 *dest16++=(uint16_t)(dataMove+UTRIE2_BAD_UTF8_DATA_OFFSET); |
|
320 } |
|
321 for(; i<(0xe0-0xc0); ++i) { /* C2..DF */ |
|
322 *dest16++=(uint16_t)dataMove; |
|
323 } |
|
324 |
|
325 /* write the 16/32-bit data array */ |
|
326 switch(valueBits) { |
|
327 case UTRIE2_16_VALUE_BITS: |
|
328 /* write 16-bit data values */ |
|
329 trie->data16=dest16; |
|
330 trie->data32=NULL; |
|
331 for(i=0; i<0x80; ++i) { |
|
332 *dest16++=(uint16_t)initialValue; |
|
333 } |
|
334 for(; i<0xc0; ++i) { |
|
335 *dest16++=(uint16_t)errorValue; |
|
336 } |
|
337 /* highValue and reserved values */ |
|
338 for(i=0; i<UTRIE2_DATA_GRANULARITY; ++i) { |
|
339 *dest16++=(uint16_t)initialValue; |
|
340 } |
|
341 break; |
|
342 case UTRIE2_32_VALUE_BITS: |
|
343 /* write 32-bit data values */ |
|
344 p=(uint32_t *)dest16; |
|
345 trie->data16=NULL; |
|
346 trie->data32=p; |
|
347 for(i=0; i<0x80; ++i) { |
|
348 *p++=initialValue; |
|
349 } |
|
350 for(; i<0xc0; ++i) { |
|
351 *p++=errorValue; |
|
352 } |
|
353 /* highValue and reserved values */ |
|
354 for(i=0; i<UTRIE2_DATA_GRANULARITY; ++i) { |
|
355 *p++=initialValue; |
|
356 } |
|
357 break; |
|
358 default: |
|
359 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
|
360 return 0; |
|
361 } |
|
362 |
|
363 return trie; |
|
364 } |
|
365 |
|
366 U_CAPI void U_EXPORT2 |
|
367 utrie2_close(UTrie2 *trie) { |
|
368 if(trie!=NULL) { |
|
369 if(trie->isMemoryOwned) { |
|
370 uprv_free(trie->memory); |
|
371 } |
|
372 if(trie->newTrie!=NULL) { |
|
373 uprv_free(trie->newTrie->data); |
|
374 uprv_free(trie->newTrie); |
|
375 } |
|
376 uprv_free(trie); |
|
377 } |
|
378 } |
|
379 |
|
380 U_CAPI int32_t U_EXPORT2 |
|
381 utrie2_getVersion(const void *data, int32_t length, UBool anyEndianOk) { |
|
382 uint32_t signature; |
|
383 if(length<16 || data==NULL || (U_POINTER_MASK_LSB(data, 3)!=0)) { |
|
384 return 0; |
|
385 } |
|
386 signature=*(const uint32_t *)data; |
|
387 if(signature==UTRIE2_SIG) { |
|
388 return 2; |
|
389 } |
|
390 if(anyEndianOk && signature==UTRIE2_OE_SIG) { |
|
391 return 2; |
|
392 } |
|
393 if(signature==UTRIE_SIG) { |
|
394 return 1; |
|
395 } |
|
396 if(anyEndianOk && signature==UTRIE_OE_SIG) { |
|
397 return 1; |
|
398 } |
|
399 return 0; |
|
400 } |
|
401 |
|
402 U_CAPI int32_t U_EXPORT2 |
|
403 utrie2_swap(const UDataSwapper *ds, |
|
404 const void *inData, int32_t length, void *outData, |
|
405 UErrorCode *pErrorCode) { |
|
406 const UTrie2Header *inTrie; |
|
407 UTrie2Header trie; |
|
408 int32_t dataLength, size; |
|
409 UTrie2ValueBits valueBits; |
|
410 |
|
411 if(U_FAILURE(*pErrorCode)) { |
|
412 return 0; |
|
413 } |
|
414 if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) { |
|
415 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
|
416 return 0; |
|
417 } |
|
418 |
|
419 /* setup and swapping */ |
|
420 if(length>=0 && length<(int32_t)sizeof(UTrie2Header)) { |
|
421 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; |
|
422 return 0; |
|
423 } |
|
424 |
|
425 inTrie=(const UTrie2Header *)inData; |
|
426 trie.signature=ds->readUInt32(inTrie->signature); |
|
427 trie.options=ds->readUInt16(inTrie->options); |
|
428 trie.indexLength=ds->readUInt16(inTrie->indexLength); |
|
429 trie.shiftedDataLength=ds->readUInt16(inTrie->shiftedDataLength); |
|
430 |
|
431 valueBits=(UTrie2ValueBits)(trie.options&UTRIE2_OPTIONS_VALUE_BITS_MASK); |
|
432 dataLength=(int32_t)trie.shiftedDataLength<<UTRIE2_INDEX_SHIFT; |
|
433 |
|
434 if( trie.signature!=UTRIE2_SIG || |
|
435 valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits || |
|
436 trie.indexLength<UTRIE2_INDEX_1_OFFSET || |
|
437 dataLength<UTRIE2_DATA_START_OFFSET |
|
438 ) { |
|
439 *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */ |
|
440 return 0; |
|
441 } |
|
442 |
|
443 size=sizeof(UTrie2Header)+trie.indexLength*2; |
|
444 switch(valueBits) { |
|
445 case UTRIE2_16_VALUE_BITS: |
|
446 size+=dataLength*2; |
|
447 break; |
|
448 case UTRIE2_32_VALUE_BITS: |
|
449 size+=dataLength*4; |
|
450 break; |
|
451 default: |
|
452 *pErrorCode=U_INVALID_FORMAT_ERROR; |
|
453 return 0; |
|
454 } |
|
455 |
|
456 if(length>=0) { |
|
457 UTrie2Header *outTrie; |
|
458 |
|
459 if(length<size) { |
|
460 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; |
|
461 return 0; |
|
462 } |
|
463 |
|
464 outTrie=(UTrie2Header *)outData; |
|
465 |
|
466 /* swap the header */ |
|
467 ds->swapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode); |
|
468 ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode); |
|
469 |
|
470 /* swap the index and the data */ |
|
471 switch(valueBits) { |
|
472 case UTRIE2_16_VALUE_BITS: |
|
473 ds->swapArray16(ds, inTrie+1, (trie.indexLength+dataLength)*2, outTrie+1, pErrorCode); |
|
474 break; |
|
475 case UTRIE2_32_VALUE_BITS: |
|
476 ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode); |
|
477 ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, dataLength*4, |
|
478 (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode); |
|
479 break; |
|
480 default: |
|
481 *pErrorCode=U_INVALID_FORMAT_ERROR; |
|
482 return 0; |
|
483 } |
|
484 } |
|
485 |
|
486 return size; |
|
487 } |
|
488 |
|
489 // utrie2_swapAnyVersion() should be defined here but lives in utrie2_builder.c |
|
490 // to avoid a dependency from utrie2.cpp on utrie.c. |
|
491 |
|
492 /* enumeration -------------------------------------------------------------- */ |
|
493 |
|
494 #define MIN_VALUE(a, b) ((a)<(b) ? (a) : (b)) |
|
495 |
|
496 /* default UTrie2EnumValue() returns the input value itself */ |
|
497 static uint32_t U_CALLCONV |
|
498 enumSameValue(const void * /*context*/, uint32_t value) { |
|
499 return value; |
|
500 } |
|
501 |
|
502 /** |
|
503 * Enumerate all ranges of code points with the same relevant values. |
|
504 * The values are transformed from the raw trie entries by the enumValue function. |
|
505 * |
|
506 * Currently requires start<limit and both start and limit must be multiples |
|
507 * of UTRIE2_DATA_BLOCK_LENGTH. |
|
508 * |
|
509 * Optimizations: |
|
510 * - Skip a whole block if we know that it is filled with a single value, |
|
511 * and it is the same as we visited just before. |
|
512 * - Handle the null block specially because we know a priori that it is filled |
|
513 * with a single value. |
|
514 */ |
|
515 static void |
|
516 enumEitherTrie(const UTrie2 *trie, |
|
517 UChar32 start, UChar32 limit, |
|
518 UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, const void *context) { |
|
519 const uint32_t *data32; |
|
520 const uint16_t *idx; |
|
521 |
|
522 uint32_t value, prevValue, initialValue; |
|
523 UChar32 c, prev, highStart; |
|
524 int32_t j, i2Block, prevI2Block, index2NullOffset, block, prevBlock, nullBlock; |
|
525 |
|
526 if(enumRange==NULL) { |
|
527 return; |
|
528 } |
|
529 if(enumValue==NULL) { |
|
530 enumValue=enumSameValue; |
|
531 } |
|
532 |
|
533 if(trie->newTrie==NULL) { |
|
534 /* frozen trie */ |
|
535 idx=trie->index; |
|
536 U_ASSERT(idx!=NULL); /* the following code assumes trie->newTrie is not NULL when idx is NULL */ |
|
537 data32=trie->data32; |
|
538 |
|
539 index2NullOffset=trie->index2NullOffset; |
|
540 nullBlock=trie->dataNullOffset; |
|
541 } else { |
|
542 /* unfrozen, mutable trie */ |
|
543 idx=NULL; |
|
544 data32=trie->newTrie->data; |
|
545 U_ASSERT(data32!=NULL); /* the following code assumes idx is not NULL when data32 is NULL */ |
|
546 |
|
547 index2NullOffset=trie->newTrie->index2NullOffset; |
|
548 nullBlock=trie->newTrie->dataNullOffset; |
|
549 } |
|
550 |
|
551 highStart=trie->highStart; |
|
552 |
|
553 /* get the enumeration value that corresponds to an initial-value trie data entry */ |
|
554 initialValue=enumValue(context, trie->initialValue); |
|
555 |
|
556 /* set variables for previous range */ |
|
557 prevI2Block=-1; |
|
558 prevBlock=-1; |
|
559 prev=start; |
|
560 prevValue=0; |
|
561 |
|
562 /* enumerate index-2 blocks */ |
|
563 for(c=start; c<limit && c<highStart;) { |
|
564 /* Code point limit for iterating inside this i2Block. */ |
|
565 UChar32 tempLimit=c+UTRIE2_CP_PER_INDEX_1_ENTRY; |
|
566 if(limit<tempLimit) { |
|
567 tempLimit=limit; |
|
568 } |
|
569 if(c<=0xffff) { |
|
570 if(!U_IS_SURROGATE(c)) { |
|
571 i2Block=c>>UTRIE2_SHIFT_2; |
|
572 } else if(U_IS_SURROGATE_LEAD(c)) { |
|
573 /* |
|
574 * Enumerate values for lead surrogate code points, not code units: |
|
575 * This special block has half the normal length. |
|
576 */ |
|
577 i2Block=UTRIE2_LSCP_INDEX_2_OFFSET; |
|
578 tempLimit=MIN_VALUE(0xdc00, limit); |
|
579 } else { |
|
580 /* |
|
581 * Switch back to the normal part of the index-2 table. |
|
582 * Enumerate the second half of the surrogates block. |
|
583 */ |
|
584 i2Block=0xd800>>UTRIE2_SHIFT_2; |
|
585 tempLimit=MIN_VALUE(0xe000, limit); |
|
586 } |
|
587 } else { |
|
588 /* supplementary code points */ |
|
589 if(idx!=NULL) { |
|
590 i2Block=idx[(UTRIE2_INDEX_1_OFFSET-UTRIE2_OMITTED_BMP_INDEX_1_LENGTH)+ |
|
591 (c>>UTRIE2_SHIFT_1)]; |
|
592 } else { |
|
593 i2Block=trie->newTrie->index1[c>>UTRIE2_SHIFT_1]; |
|
594 } |
|
595 if(i2Block==prevI2Block && (c-prev)>=UTRIE2_CP_PER_INDEX_1_ENTRY) { |
|
596 /* |
|
597 * The index-2 block is the same as the previous one, and filled with prevValue. |
|
598 * Only possible for supplementary code points because the linear-BMP index-2 |
|
599 * table creates unique i2Block values. |
|
600 */ |
|
601 c+=UTRIE2_CP_PER_INDEX_1_ENTRY; |
|
602 continue; |
|
603 } |
|
604 } |
|
605 prevI2Block=i2Block; |
|
606 if(i2Block==index2NullOffset) { |
|
607 /* this is the null index-2 block */ |
|
608 if(prevValue!=initialValue) { |
|
609 if(prev<c && !enumRange(context, prev, c-1, prevValue)) { |
|
610 return; |
|
611 } |
|
612 prevBlock=nullBlock; |
|
613 prev=c; |
|
614 prevValue=initialValue; |
|
615 } |
|
616 c+=UTRIE2_CP_PER_INDEX_1_ENTRY; |
|
617 } else { |
|
618 /* enumerate data blocks for one index-2 block */ |
|
619 int32_t i2, i2Limit; |
|
620 i2=(c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK; |
|
621 if((c>>UTRIE2_SHIFT_1)==(tempLimit>>UTRIE2_SHIFT_1)) { |
|
622 i2Limit=(tempLimit>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK; |
|
623 } else { |
|
624 i2Limit=UTRIE2_INDEX_2_BLOCK_LENGTH; |
|
625 } |
|
626 for(; i2<i2Limit; ++i2) { |
|
627 if(idx!=NULL) { |
|
628 block=(int32_t)idx[i2Block+i2]<<UTRIE2_INDEX_SHIFT; |
|
629 } else { |
|
630 block=trie->newTrie->index2[i2Block+i2]; |
|
631 } |
|
632 if(block==prevBlock && (c-prev)>=UTRIE2_DATA_BLOCK_LENGTH) { |
|
633 /* the block is the same as the previous one, and filled with prevValue */ |
|
634 c+=UTRIE2_DATA_BLOCK_LENGTH; |
|
635 continue; |
|
636 } |
|
637 prevBlock=block; |
|
638 if(block==nullBlock) { |
|
639 /* this is the null data block */ |
|
640 if(prevValue!=initialValue) { |
|
641 if(prev<c && !enumRange(context, prev, c-1, prevValue)) { |
|
642 return; |
|
643 } |
|
644 prev=c; |
|
645 prevValue=initialValue; |
|
646 } |
|
647 c+=UTRIE2_DATA_BLOCK_LENGTH; |
|
648 } else { |
|
649 for(j=0; j<UTRIE2_DATA_BLOCK_LENGTH; ++j) { |
|
650 value=enumValue(context, data32!=NULL ? data32[block+j] : idx[block+j]); |
|
651 if(value!=prevValue) { |
|
652 if(prev<c && !enumRange(context, prev, c-1, prevValue)) { |
|
653 return; |
|
654 } |
|
655 prev=c; |
|
656 prevValue=value; |
|
657 } |
|
658 ++c; |
|
659 } |
|
660 } |
|
661 } |
|
662 } |
|
663 } |
|
664 |
|
665 if(c>limit) { |
|
666 c=limit; /* could be higher if in the index2NullOffset */ |
|
667 } else if(c<limit) { |
|
668 /* c==highStart<limit */ |
|
669 uint32_t highValue; |
|
670 if(idx!=NULL) { |
|
671 highValue= |
|
672 data32!=NULL ? |
|
673 data32[trie->highValueIndex] : |
|
674 idx[trie->highValueIndex]; |
|
675 } else { |
|
676 highValue=trie->newTrie->data[trie->newTrie->dataLength-UTRIE2_DATA_GRANULARITY]; |
|
677 } |
|
678 value=enumValue(context, highValue); |
|
679 if(value!=prevValue) { |
|
680 if(prev<c && !enumRange(context, prev, c-1, prevValue)) { |
|
681 return; |
|
682 } |
|
683 prev=c; |
|
684 prevValue=value; |
|
685 } |
|
686 c=limit; |
|
687 } |
|
688 |
|
689 /* deliver last range */ |
|
690 enumRange(context, prev, c-1, prevValue); |
|
691 } |
|
692 |
|
693 U_CAPI void U_EXPORT2 |
|
694 utrie2_enum(const UTrie2 *trie, |
|
695 UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, const void *context) { |
|
696 enumEitherTrie(trie, 0, 0x110000, enumValue, enumRange, context); |
|
697 } |
|
698 |
|
699 U_CAPI void U_EXPORT2 |
|
700 utrie2_enumForLeadSurrogate(const UTrie2 *trie, UChar32 lead, |
|
701 UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, |
|
702 const void *context) { |
|
703 if(!U16_IS_LEAD(lead)) { |
|
704 return; |
|
705 } |
|
706 lead=(lead-0xd7c0)<<10; /* start code point */ |
|
707 enumEitherTrie(trie, lead, lead+0x400, enumValue, enumRange, context); |
|
708 } |
|
709 |
|
710 /* C++ convenience wrappers ------------------------------------------------- */ |
|
711 |
|
712 U_NAMESPACE_BEGIN |
|
713 |
|
714 uint16_t BackwardUTrie2StringIterator::previous16() { |
|
715 codePointLimit=codePointStart; |
|
716 if(start>=codePointStart) { |
|
717 codePoint=U_SENTINEL; |
|
718 return 0; |
|
719 } |
|
720 uint16_t result; |
|
721 UTRIE2_U16_PREV16(trie, start, codePointStart, codePoint, result); |
|
722 return result; |
|
723 } |
|
724 |
|
725 uint16_t ForwardUTrie2StringIterator::next16() { |
|
726 codePointStart=codePointLimit; |
|
727 if(codePointLimit==limit) { |
|
728 codePoint=U_SENTINEL; |
|
729 return 0; |
|
730 } |
|
731 uint16_t result; |
|
732 UTRIE2_U16_NEXT16(trie, codePointLimit, limit, codePoint, result); |
|
733 return result; |
|
734 } |
|
735 |
|
736 U_NAMESPACE_END |