|
1 /* |
|
2 ******************************************************************************* |
|
3 * |
|
4 * Copyright (C) 2005-2012, International Business Machines |
|
5 * Corporation and others. All Rights Reserved. |
|
6 * |
|
7 ******************************************************************************* |
|
8 * file name: swapimpl.cpp |
|
9 * encoding: US-ASCII |
|
10 * tab size: 8 (not used) |
|
11 * indentation:4 |
|
12 * |
|
13 * created on: 2005may05 |
|
14 * created by: Markus W. Scherer |
|
15 * |
|
16 * Data file swapping functions moved here from the common library |
|
17 * because some data is hardcoded in ICU4C and needs not be swapped any more. |
|
18 * Moving the functions here simplifies testing (for code coverage) because |
|
19 * we need not jump through hoops (like adding snapshots of these files |
|
20 * to testdata). |
|
21 * |
|
22 * The declarations for these functions remain in the internal header files |
|
23 * in icu/source/common/ |
|
24 */ |
|
25 |
|
26 #include "unicode/utypes.h" |
|
27 #include "unicode/putil.h" |
|
28 #include "unicode/udata.h" |
|
29 |
|
30 /* Explicit include statement for std_string.h is needed |
|
31 * for compilation on certain platforms. (e.g. AIX/VACPP) |
|
32 */ |
|
33 #include "unicode/std_string.h" |
|
34 |
|
35 #include "cmemory.h" |
|
36 #include "cstring.h" |
|
37 #include "uinvchar.h" |
|
38 #include "uassert.h" |
|
39 #include "uarrsort.h" |
|
40 #include "ucmndata.h" |
|
41 #include "udataswp.h" |
|
42 |
|
43 /* swapping implementations in common */ |
|
44 |
|
45 #include "uresdata.h" |
|
46 #include "ucnv_io.h" |
|
47 #include "uprops.h" |
|
48 #include "ucase.h" |
|
49 #include "ubidi_props.h" |
|
50 #include "ucol_swp.h" |
|
51 #include "ucnv_bld.h" |
|
52 #include "unormimp.h" |
|
53 #include "normalizer2impl.h" |
|
54 #include "sprpimpl.h" |
|
55 #include "propname.h" |
|
56 #include "rbbidata.h" |
|
57 #include "utrie2.h" |
|
58 #include "dictionarydata.h" |
|
59 |
|
60 /* swapping implementations in i18n */ |
|
61 |
|
62 #if !UCONFIG_NO_NORMALIZATION |
|
63 #include "uspoof_impl.h" |
|
64 #endif |
|
65 |
|
66 U_NAMESPACE_USE |
|
67 |
|
68 /* definitions */ |
|
69 |
|
70 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) |
|
71 |
|
72 /* Unicode property (value) aliases data swapping --------------------------- */ |
|
73 |
|
74 static int32_t U_CALLCONV |
|
75 upname_swap(const UDataSwapper *ds, |
|
76 const void *inData, int32_t length, void *outData, |
|
77 UErrorCode *pErrorCode) { |
|
78 /* udata_swapDataHeader checks the arguments */ |
|
79 int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); |
|
80 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
|
81 return 0; |
|
82 } |
|
83 |
|
84 /* check data format and format version */ |
|
85 const UDataInfo *pInfo= |
|
86 reinterpret_cast<const UDataInfo *>( |
|
87 static_cast<const char *>(inData)+4); |
|
88 if(!( |
|
89 pInfo->dataFormat[0]==0x70 && /* dataFormat="pnam" */ |
|
90 pInfo->dataFormat[1]==0x6e && |
|
91 pInfo->dataFormat[2]==0x61 && |
|
92 pInfo->dataFormat[3]==0x6d && |
|
93 pInfo->formatVersion[0]==2 |
|
94 )) { |
|
95 udata_printError(ds, "upname_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as pnames.icu\n", |
|
96 pInfo->dataFormat[0], pInfo->dataFormat[1], |
|
97 pInfo->dataFormat[2], pInfo->dataFormat[3], |
|
98 pInfo->formatVersion[0]); |
|
99 *pErrorCode=U_UNSUPPORTED_ERROR; |
|
100 return 0; |
|
101 } |
|
102 |
|
103 const uint8_t *inBytes=static_cast<const uint8_t *>(inData)+headerSize; |
|
104 uint8_t *outBytes=static_cast<uint8_t *>(outData)+headerSize; |
|
105 |
|
106 if(length>=0) { |
|
107 length-=headerSize; |
|
108 // formatVersion 2 initially has indexes[8], 32 bytes. |
|
109 if(length<32) { |
|
110 udata_printError(ds, "upname_swap(): too few bytes (%d after header) for pnames.icu\n", |
|
111 (int)length); |
|
112 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; |
|
113 return 0; |
|
114 } |
|
115 } |
|
116 |
|
117 const int32_t *inIndexes=reinterpret_cast<const int32_t *>(inBytes); |
|
118 int32_t totalSize=udata_readInt32(ds, inIndexes[PropNameData::IX_TOTAL_SIZE]); |
|
119 if(length>=0) { |
|
120 if(length<totalSize) { |
|
121 udata_printError(ds, "upname_swap(): too few bytes (%d after header, should be %d) " |
|
122 "for pnames.icu\n", |
|
123 (int)length, (int)totalSize); |
|
124 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; |
|
125 return 0; |
|
126 } |
|
127 |
|
128 int32_t numBytesIndexesAndValueMaps= |
|
129 udata_readInt32(ds, inIndexes[PropNameData::IX_BYTE_TRIES_OFFSET]); |
|
130 |
|
131 // Swap the indexes[] and the valueMaps[]. |
|
132 ds->swapArray32(ds, inBytes, numBytesIndexesAndValueMaps, outBytes, pErrorCode); |
|
133 |
|
134 // Copy the rest of the data. |
|
135 if(inBytes!=outBytes) { |
|
136 uprv_memcpy(outBytes+numBytesIndexesAndValueMaps, |
|
137 inBytes+numBytesIndexesAndValueMaps, |
|
138 totalSize-numBytesIndexesAndValueMaps); |
|
139 } |
|
140 |
|
141 // We need not swap anything else: |
|
142 // |
|
143 // The ByteTries are already byte-serialized, and are fixed on ASCII. |
|
144 // (On an EBCDIC machine, the input string is converted to lowercase ASCII |
|
145 // while matching.) |
|
146 // |
|
147 // The name groups are mostly invariant characters, but since we only |
|
148 // generate, and keep in subversion, ASCII versions of pnames.icu, |
|
149 // and since only ICU4J uses the pnames.icu data file |
|
150 // (the data is hardcoded in ICU4C) and ICU4J uses ASCII data files, |
|
151 // we just copy those bytes too. |
|
152 } |
|
153 |
|
154 return headerSize+totalSize; |
|
155 } |
|
156 |
|
157 /* Unicode properties data swapping ----------------------------------------- */ |
|
158 |
|
159 static int32_t U_CALLCONV |
|
160 uprops_swap(const UDataSwapper *ds, |
|
161 const void *inData, int32_t length, void *outData, |
|
162 UErrorCode *pErrorCode) { |
|
163 const UDataInfo *pInfo; |
|
164 int32_t headerSize, i; |
|
165 |
|
166 int32_t dataIndexes[UPROPS_INDEX_COUNT]; |
|
167 const int32_t *inData32; |
|
168 |
|
169 /* udata_swapDataHeader checks the arguments */ |
|
170 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); |
|
171 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
|
172 return 0; |
|
173 } |
|
174 |
|
175 /* check data format and format version */ |
|
176 pInfo=(const UDataInfo *)((const char *)inData+4); |
|
177 if(!( |
|
178 pInfo->dataFormat[0]==0x55 && /* dataFormat="UPro" */ |
|
179 pInfo->dataFormat[1]==0x50 && |
|
180 pInfo->dataFormat[2]==0x72 && |
|
181 pInfo->dataFormat[3]==0x6f && |
|
182 (3<=pInfo->formatVersion[0] && pInfo->formatVersion[0]<=7) && |
|
183 (pInfo->formatVersion[0]>=7 || |
|
184 (pInfo->formatVersion[2]==UTRIE_SHIFT && |
|
185 pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT)) |
|
186 )) { |
|
187 udata_printError(ds, "uprops_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not a Unicode properties file\n", |
|
188 pInfo->dataFormat[0], pInfo->dataFormat[1], |
|
189 pInfo->dataFormat[2], pInfo->dataFormat[3], |
|
190 pInfo->formatVersion[0]); |
|
191 *pErrorCode=U_UNSUPPORTED_ERROR; |
|
192 return 0; |
|
193 } |
|
194 |
|
195 /* the properties file must contain at least the indexes array */ |
|
196 if(length>=0 && (length-headerSize)<(int32_t)sizeof(dataIndexes)) { |
|
197 udata_printError(ds, "uprops_swap(): too few bytes (%d after header) for a Unicode properties file\n", |
|
198 length-headerSize); |
|
199 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; |
|
200 return 0; |
|
201 } |
|
202 |
|
203 /* read the indexes */ |
|
204 inData32=(const int32_t *)((const char *)inData+headerSize); |
|
205 for(i=0; i<UPROPS_INDEX_COUNT; ++i) { |
|
206 dataIndexes[i]=udata_readInt32(ds, inData32[i]); |
|
207 } |
|
208 |
|
209 /* |
|
210 * comments are copied from the data format description in genprops/store.c |
|
211 * indexes[] constants are in uprops.h |
|
212 */ |
|
213 int32_t dataTop; |
|
214 if(length>=0) { |
|
215 int32_t *outData32; |
|
216 |
|
217 /* |
|
218 * In formatVersion 7, UPROPS_DATA_TOP_INDEX has the post-header data size. |
|
219 * In earlier formatVersions, it is 0 and a lower dataIndexes entry |
|
220 * has the top of the last item. |
|
221 */ |
|
222 for(i=UPROPS_DATA_TOP_INDEX; i>0 && (dataTop=dataIndexes[i])==0; --i) {} |
|
223 |
|
224 if((length-headerSize)<(4*dataTop)) { |
|
225 udata_printError(ds, "uprops_swap(): too few bytes (%d after header) for a Unicode properties file\n", |
|
226 length-headerSize); |
|
227 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; |
|
228 return 0; |
|
229 } |
|
230 |
|
231 outData32=(int32_t *)((char *)outData+headerSize); |
|
232 |
|
233 /* copy everything for inaccessible data (padding) */ |
|
234 if(inData32!=outData32) { |
|
235 uprv_memcpy(outData32, inData32, 4*dataTop); |
|
236 } |
|
237 |
|
238 /* swap the indexes[16] */ |
|
239 ds->swapArray32(ds, inData32, 4*UPROPS_INDEX_COUNT, outData32, pErrorCode); |
|
240 |
|
241 /* |
|
242 * swap the main properties UTrie |
|
243 * PT serialized properties trie, see utrie.h (byte size: 4*(i0-16)) |
|
244 */ |
|
245 utrie2_swapAnyVersion(ds, |
|
246 inData32+UPROPS_INDEX_COUNT, |
|
247 4*(dataIndexes[UPROPS_PROPS32_INDEX]-UPROPS_INDEX_COUNT), |
|
248 outData32+UPROPS_INDEX_COUNT, |
|
249 pErrorCode); |
|
250 |
|
251 /* |
|
252 * swap the properties and exceptions words |
|
253 * P const uint32_t props32[i1-i0]; |
|
254 * E const uint32_t exceptions[i2-i1]; |
|
255 */ |
|
256 ds->swapArray32(ds, |
|
257 inData32+dataIndexes[UPROPS_PROPS32_INDEX], |
|
258 4*(dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX]-dataIndexes[UPROPS_PROPS32_INDEX]), |
|
259 outData32+dataIndexes[UPROPS_PROPS32_INDEX], |
|
260 pErrorCode); |
|
261 |
|
262 /* |
|
263 * swap the UChars |
|
264 * U const UChar uchars[2*(i3-i2)]; |
|
265 */ |
|
266 ds->swapArray16(ds, |
|
267 inData32+dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX], |
|
268 4*(dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX]-dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX]), |
|
269 outData32+dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX], |
|
270 pErrorCode); |
|
271 |
|
272 /* |
|
273 * swap the additional UTrie |
|
274 * i3 additionalTrieIndex; -- 32-bit unit index to the additional trie for more properties |
|
275 */ |
|
276 utrie2_swapAnyVersion(ds, |
|
277 inData32+dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX], |
|
278 4*(dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX]-dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX]), |
|
279 outData32+dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX], |
|
280 pErrorCode); |
|
281 |
|
282 /* |
|
283 * swap the properties vectors |
|
284 * PV const uint32_t propsVectors[(i6-i4)/i5][i5]==uint32_t propsVectors[i6-i4]; |
|
285 */ |
|
286 ds->swapArray32(ds, |
|
287 inData32+dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX], |
|
288 4*(dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]-dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX]), |
|
289 outData32+dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX], |
|
290 pErrorCode); |
|
291 |
|
292 // swap the Script_Extensions data |
|
293 // SCX const uint16_t scriptExtensions[2*(i7-i6)]; |
|
294 ds->swapArray16(ds, |
|
295 inData32+dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX], |
|
296 4*(dataIndexes[UPROPS_RESERVED_INDEX_7]-dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]), |
|
297 outData32+dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX], |
|
298 pErrorCode); |
|
299 } |
|
300 |
|
301 /* i7 reservedIndex7; -- 32-bit unit index to the top of the Script_Extensions data */ |
|
302 return headerSize+4*dataIndexes[UPROPS_RESERVED_INDEX_7]; |
|
303 } |
|
304 |
|
305 /* Unicode case mapping data swapping --------------------------------------- */ |
|
306 |
|
307 static int32_t U_CALLCONV |
|
308 ucase_swap(const UDataSwapper *ds, |
|
309 const void *inData, int32_t length, void *outData, |
|
310 UErrorCode *pErrorCode) { |
|
311 const UDataInfo *pInfo; |
|
312 int32_t headerSize; |
|
313 |
|
314 const uint8_t *inBytes; |
|
315 uint8_t *outBytes; |
|
316 |
|
317 const int32_t *inIndexes; |
|
318 int32_t indexes[16]; |
|
319 |
|
320 int32_t i, offset, count, size; |
|
321 |
|
322 /* udata_swapDataHeader checks the arguments */ |
|
323 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); |
|
324 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
|
325 return 0; |
|
326 } |
|
327 |
|
328 /* check data format and format version */ |
|
329 pInfo=(const UDataInfo *)((const char *)inData+4); |
|
330 if(!( |
|
331 pInfo->dataFormat[0]==UCASE_FMT_0 && /* dataFormat="cAsE" */ |
|
332 pInfo->dataFormat[1]==UCASE_FMT_1 && |
|
333 pInfo->dataFormat[2]==UCASE_FMT_2 && |
|
334 pInfo->dataFormat[3]==UCASE_FMT_3 && |
|
335 ((pInfo->formatVersion[0]==1 && |
|
336 pInfo->formatVersion[2]==UTRIE_SHIFT && |
|
337 pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT) || |
|
338 pInfo->formatVersion[0]==2 || pInfo->formatVersion[0]==3) |
|
339 )) { |
|
340 udata_printError(ds, "ucase_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as case mapping data\n", |
|
341 pInfo->dataFormat[0], pInfo->dataFormat[1], |
|
342 pInfo->dataFormat[2], pInfo->dataFormat[3], |
|
343 pInfo->formatVersion[0]); |
|
344 *pErrorCode=U_UNSUPPORTED_ERROR; |
|
345 return 0; |
|
346 } |
|
347 |
|
348 inBytes=(const uint8_t *)inData+headerSize; |
|
349 outBytes=(uint8_t *)outData+headerSize; |
|
350 |
|
351 inIndexes=(const int32_t *)inBytes; |
|
352 |
|
353 if(length>=0) { |
|
354 length-=headerSize; |
|
355 if(length<16*4) { |
|
356 udata_printError(ds, "ucase_swap(): too few bytes (%d after header) for case mapping data\n", |
|
357 length); |
|
358 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; |
|
359 return 0; |
|
360 } |
|
361 } |
|
362 |
|
363 /* read the first 16 indexes (ICU 3.2/format version 1: UCASE_IX_TOP==16, might grow) */ |
|
364 for(i=0; i<16; ++i) { |
|
365 indexes[i]=udata_readInt32(ds, inIndexes[i]); |
|
366 } |
|
367 |
|
368 /* get the total length of the data */ |
|
369 size=indexes[UCASE_IX_LENGTH]; |
|
370 |
|
371 if(length>=0) { |
|
372 if(length<size) { |
|
373 udata_printError(ds, "ucase_swap(): too few bytes (%d after header) for all of case mapping data\n", |
|
374 length); |
|
375 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; |
|
376 return 0; |
|
377 } |
|
378 |
|
379 /* copy the data for inaccessible bytes */ |
|
380 if(inBytes!=outBytes) { |
|
381 uprv_memcpy(outBytes, inBytes, size); |
|
382 } |
|
383 |
|
384 offset=0; |
|
385 |
|
386 /* swap the int32_t indexes[] */ |
|
387 count=indexes[UCASE_IX_INDEX_TOP]*4; |
|
388 ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode); |
|
389 offset+=count; |
|
390 |
|
391 /* swap the UTrie */ |
|
392 count=indexes[UCASE_IX_TRIE_SIZE]; |
|
393 utrie2_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode); |
|
394 offset+=count; |
|
395 |
|
396 /* swap the uint16_t exceptions[] and unfold[] */ |
|
397 count=(indexes[UCASE_IX_EXC_LENGTH]+indexes[UCASE_IX_UNFOLD_LENGTH])*2; |
|
398 ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode); |
|
399 offset+=count; |
|
400 |
|
401 U_ASSERT(offset==size); |
|
402 } |
|
403 |
|
404 return headerSize+size; |
|
405 } |
|
406 |
|
407 /* Unicode bidi/shaping data swapping --------------------------------------- */ |
|
408 |
|
409 static int32_t U_CALLCONV |
|
410 ubidi_swap(const UDataSwapper *ds, |
|
411 const void *inData, int32_t length, void *outData, |
|
412 UErrorCode *pErrorCode) { |
|
413 const UDataInfo *pInfo; |
|
414 int32_t headerSize; |
|
415 |
|
416 const uint8_t *inBytes; |
|
417 uint8_t *outBytes; |
|
418 |
|
419 const int32_t *inIndexes; |
|
420 int32_t indexes[16]; |
|
421 |
|
422 int32_t i, offset, count, size; |
|
423 |
|
424 /* udata_swapDataHeader checks the arguments */ |
|
425 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); |
|
426 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
|
427 return 0; |
|
428 } |
|
429 |
|
430 /* check data format and format version */ |
|
431 pInfo=(const UDataInfo *)((const char *)inData+4); |
|
432 if(!( |
|
433 pInfo->dataFormat[0]==UBIDI_FMT_0 && /* dataFormat="BiDi" */ |
|
434 pInfo->dataFormat[1]==UBIDI_FMT_1 && |
|
435 pInfo->dataFormat[2]==UBIDI_FMT_2 && |
|
436 pInfo->dataFormat[3]==UBIDI_FMT_3 && |
|
437 ((pInfo->formatVersion[0]==1 && |
|
438 pInfo->formatVersion[2]==UTRIE_SHIFT && |
|
439 pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT) || |
|
440 pInfo->formatVersion[0]==2) |
|
441 )) { |
|
442 udata_printError(ds, "ubidi_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as bidi/shaping data\n", |
|
443 pInfo->dataFormat[0], pInfo->dataFormat[1], |
|
444 pInfo->dataFormat[2], pInfo->dataFormat[3], |
|
445 pInfo->formatVersion[0]); |
|
446 *pErrorCode=U_UNSUPPORTED_ERROR; |
|
447 return 0; |
|
448 } |
|
449 |
|
450 inBytes=(const uint8_t *)inData+headerSize; |
|
451 outBytes=(uint8_t *)outData+headerSize; |
|
452 |
|
453 inIndexes=(const int32_t *)inBytes; |
|
454 |
|
455 if(length>=0) { |
|
456 length-=headerSize; |
|
457 if(length<16*4) { |
|
458 udata_printError(ds, "ubidi_swap(): too few bytes (%d after header) for bidi/shaping data\n", |
|
459 length); |
|
460 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; |
|
461 return 0; |
|
462 } |
|
463 } |
|
464 |
|
465 /* read the first 16 indexes (ICU 3.4/format version 1: UBIDI_IX_TOP==16, might grow) */ |
|
466 for(i=0; i<16; ++i) { |
|
467 indexes[i]=udata_readInt32(ds, inIndexes[i]); |
|
468 } |
|
469 |
|
470 /* get the total length of the data */ |
|
471 size=indexes[UBIDI_IX_LENGTH]; |
|
472 |
|
473 if(length>=0) { |
|
474 if(length<size) { |
|
475 udata_printError(ds, "ubidi_swap(): too few bytes (%d after header) for all of bidi/shaping data\n", |
|
476 length); |
|
477 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; |
|
478 return 0; |
|
479 } |
|
480 |
|
481 /* copy the data for inaccessible bytes */ |
|
482 if(inBytes!=outBytes) { |
|
483 uprv_memcpy(outBytes, inBytes, size); |
|
484 } |
|
485 |
|
486 offset=0; |
|
487 |
|
488 /* swap the int32_t indexes[] */ |
|
489 count=indexes[UBIDI_IX_INDEX_TOP]*4; |
|
490 ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode); |
|
491 offset+=count; |
|
492 |
|
493 /* swap the UTrie */ |
|
494 count=indexes[UBIDI_IX_TRIE_SIZE]; |
|
495 utrie2_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode); |
|
496 offset+=count; |
|
497 |
|
498 /* swap the uint32_t mirrors[] */ |
|
499 count=indexes[UBIDI_IX_MIRROR_LENGTH]*4; |
|
500 ds->swapArray32(ds, inBytes+offset, count, outBytes+offset, pErrorCode); |
|
501 offset+=count; |
|
502 |
|
503 /* just skip the uint8_t jgArray[] */ |
|
504 count=indexes[UBIDI_IX_JG_LIMIT]-indexes[UBIDI_IX_JG_START]; |
|
505 offset+=count; |
|
506 |
|
507 U_ASSERT(offset==size); |
|
508 } |
|
509 |
|
510 return headerSize+size; |
|
511 } |
|
512 |
|
513 /* Unicode normalization data swapping -------------------------------------- */ |
|
514 |
|
515 #if !UCONFIG_NO_NORMALIZATION |
|
516 |
|
517 static int32_t U_CALLCONV |
|
518 unorm_swap(const UDataSwapper *ds, |
|
519 const void *inData, int32_t length, void *outData, |
|
520 UErrorCode *pErrorCode) { |
|
521 const UDataInfo *pInfo; |
|
522 int32_t headerSize; |
|
523 |
|
524 const uint8_t *inBytes; |
|
525 uint8_t *outBytes; |
|
526 |
|
527 const int32_t *inIndexes; |
|
528 int32_t indexes[32]; |
|
529 |
|
530 int32_t i, offset, count, size; |
|
531 |
|
532 /* udata_swapDataHeader checks the arguments */ |
|
533 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); |
|
534 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
|
535 return 0; |
|
536 } |
|
537 |
|
538 /* check data format and format version */ |
|
539 pInfo=(const UDataInfo *)((const char *)inData+4); |
|
540 if(!( |
|
541 pInfo->dataFormat[0]==0x4e && /* dataFormat="Norm" */ |
|
542 pInfo->dataFormat[1]==0x6f && |
|
543 pInfo->dataFormat[2]==0x72 && |
|
544 pInfo->dataFormat[3]==0x6d && |
|
545 pInfo->formatVersion[0]==2 |
|
546 )) { |
|
547 udata_printError(ds, "unorm_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as unorm.icu\n", |
|
548 pInfo->dataFormat[0], pInfo->dataFormat[1], |
|
549 pInfo->dataFormat[2], pInfo->dataFormat[3], |
|
550 pInfo->formatVersion[0]); |
|
551 *pErrorCode=U_UNSUPPORTED_ERROR; |
|
552 return 0; |
|
553 } |
|
554 |
|
555 inBytes=(const uint8_t *)inData+headerSize; |
|
556 outBytes=(uint8_t *)outData+headerSize; |
|
557 |
|
558 inIndexes=(const int32_t *)inBytes; |
|
559 |
|
560 if(length>=0) { |
|
561 length-=headerSize; |
|
562 if(length<32*4) { |
|
563 udata_printError(ds, "unorm_swap(): too few bytes (%d after header) for unorm.icu\n", |
|
564 length); |
|
565 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; |
|
566 return 0; |
|
567 } |
|
568 } |
|
569 |
|
570 /* read the first 32 indexes (ICU 2.8/format version 2.2: _NORM_INDEX_TOP==32, might grow) */ |
|
571 for(i=0; i<32; ++i) { |
|
572 indexes[i]=udata_readInt32(ds, inIndexes[i]); |
|
573 } |
|
574 |
|
575 /* calculate the total length of the data */ |
|
576 size= |
|
577 32*4+ /* size of indexes[] */ |
|
578 indexes[_NORM_INDEX_TRIE_SIZE]+ |
|
579 indexes[_NORM_INDEX_UCHAR_COUNT]*2+ |
|
580 indexes[_NORM_INDEX_COMBINE_DATA_COUNT]*2+ |
|
581 indexes[_NORM_INDEX_FCD_TRIE_SIZE]+ |
|
582 indexes[_NORM_INDEX_AUX_TRIE_SIZE]+ |
|
583 indexes[_NORM_INDEX_CANON_SET_COUNT]*2; |
|
584 |
|
585 if(length>=0) { |
|
586 if(length<size) { |
|
587 udata_printError(ds, "unorm_swap(): too few bytes (%d after header) for all of unorm.icu\n", |
|
588 length); |
|
589 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; |
|
590 return 0; |
|
591 } |
|
592 |
|
593 /* copy the data for inaccessible bytes */ |
|
594 if(inBytes!=outBytes) { |
|
595 uprv_memcpy(outBytes, inBytes, size); |
|
596 } |
|
597 |
|
598 offset=0; |
|
599 |
|
600 /* swap the indexes[] */ |
|
601 count=32*4; |
|
602 ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode); |
|
603 offset+=count; |
|
604 |
|
605 /* swap the main UTrie */ |
|
606 count=indexes[_NORM_INDEX_TRIE_SIZE]; |
|
607 utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode); |
|
608 offset+=count; |
|
609 |
|
610 /* swap the uint16_t extraData[] and the uint16_t combiningTable[] */ |
|
611 count=(indexes[_NORM_INDEX_UCHAR_COUNT]+indexes[_NORM_INDEX_COMBINE_DATA_COUNT])*2; |
|
612 ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode); |
|
613 offset+=count; |
|
614 |
|
615 /* swap the FCD UTrie */ |
|
616 count=indexes[_NORM_INDEX_FCD_TRIE_SIZE]; |
|
617 if(count!=0) { |
|
618 utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode); |
|
619 offset+=count; |
|
620 } |
|
621 |
|
622 /* swap the aux UTrie */ |
|
623 count=indexes[_NORM_INDEX_AUX_TRIE_SIZE]; |
|
624 if(count!=0) { |
|
625 utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode); |
|
626 offset+=count; |
|
627 } |
|
628 |
|
629 /* swap the uint16_t combiningTable[] */ |
|
630 count=indexes[_NORM_INDEX_CANON_SET_COUNT]*2; |
|
631 ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode); |
|
632 offset+=count; |
|
633 } |
|
634 |
|
635 return headerSize+size; |
|
636 } |
|
637 |
|
638 #endif |
|
639 |
|
640 /* Swap 'Test' data from gentest */ |
|
641 static int32_t U_CALLCONV |
|
642 test_swap(const UDataSwapper *ds, |
|
643 const void *inData, int32_t length, void *outData, |
|
644 UErrorCode *pErrorCode) { |
|
645 const UDataInfo *pInfo; |
|
646 int32_t headerSize; |
|
647 |
|
648 const uint8_t *inBytes; |
|
649 uint8_t *outBytes; |
|
650 |
|
651 int32_t offset; |
|
652 |
|
653 /* udata_swapDataHeader checks the arguments */ |
|
654 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); |
|
655 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
|
656 udata_printError(ds, "test_swap(): data header swap failed %s\n", pErrorCode != NULL ? u_errorName(*pErrorCode) : "pErrorCode is NULL"); |
|
657 return 0; |
|
658 } |
|
659 |
|
660 /* check data format and format version */ |
|
661 pInfo=(const UDataInfo *)((const char *)inData+4); |
|
662 if(!( |
|
663 pInfo->dataFormat[0]==0x54 && /* dataFormat="Norm" */ |
|
664 pInfo->dataFormat[1]==0x65 && |
|
665 pInfo->dataFormat[2]==0x73 && |
|
666 pInfo->dataFormat[3]==0x74 && |
|
667 pInfo->formatVersion[0]==1 |
|
668 )) { |
|
669 udata_printError(ds, "test_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as testdata\n", |
|
670 pInfo->dataFormat[0], pInfo->dataFormat[1], |
|
671 pInfo->dataFormat[2], pInfo->dataFormat[3], |
|
672 pInfo->formatVersion[0]); |
|
673 *pErrorCode=U_UNSUPPORTED_ERROR; |
|
674 return 0; |
|
675 } |
|
676 |
|
677 inBytes=(const uint8_t *)inData+headerSize; |
|
678 outBytes=(uint8_t *)outData+headerSize; |
|
679 |
|
680 int32_t size16 = 2; // 16bit plus padding |
|
681 int32_t sizeStr = 5; // 4 char inv-str plus null |
|
682 int32_t size = size16 + sizeStr; |
|
683 |
|
684 if(length>=0) { |
|
685 if(length<size) { |
|
686 udata_printError(ds, "test_swap(): too few bytes (%d after header, wanted %d) for all of testdata\n", |
|
687 length, size); |
|
688 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; |
|
689 return 0; |
|
690 } |
|
691 |
|
692 offset =0; |
|
693 /* swap a 1 entry array */ |
|
694 ds->swapArray16(ds, inBytes+offset, size16, outBytes+offset, pErrorCode); |
|
695 offset+=size16; |
|
696 ds->swapInvChars(ds, inBytes+offset, sizeStr, outBytes+offset, pErrorCode); |
|
697 } |
|
698 |
|
699 return headerSize+size; |
|
700 } |
|
701 |
|
702 /* swap any data (except a .dat package) ------------------------------------ */ |
|
703 |
|
704 static const struct { |
|
705 uint8_t dataFormat[4]; |
|
706 UDataSwapFn *swapFn; |
|
707 } swapFns[]={ |
|
708 { { 0x52, 0x65, 0x73, 0x42 }, ures_swap }, /* dataFormat="ResB" */ |
|
709 #if !UCONFIG_NO_LEGACY_CONVERSION |
|
710 { { 0x63, 0x6e, 0x76, 0x74 }, ucnv_swap }, /* dataFormat="cnvt" */ |
|
711 #endif |
|
712 #if !UCONFIG_NO_CONVERSION |
|
713 { { 0x43, 0x76, 0x41, 0x6c }, ucnv_swapAliases }, /* dataFormat="CvAl" */ |
|
714 #endif |
|
715 #if !UCONFIG_NO_IDNA |
|
716 { { 0x53, 0x50, 0x52, 0x50 }, usprep_swap }, /* dataFormat="SPRP" */ |
|
717 #endif |
|
718 /* insert data formats here, descending by expected frequency of occurrence */ |
|
719 { { 0x55, 0x50, 0x72, 0x6f }, uprops_swap }, /* dataFormat="UPro" */ |
|
720 |
|
721 { { UCASE_FMT_0, UCASE_FMT_1, UCASE_FMT_2, UCASE_FMT_3 }, |
|
722 ucase_swap }, /* dataFormat="cAsE" */ |
|
723 |
|
724 { { UBIDI_FMT_0, UBIDI_FMT_1, UBIDI_FMT_2, UBIDI_FMT_3 }, |
|
725 ubidi_swap }, /* dataFormat="BiDi" */ |
|
726 |
|
727 #if !UCONFIG_NO_NORMALIZATION |
|
728 { { 0x4e, 0x6f, 0x72, 0x6d }, unorm_swap }, /* dataFormat="Norm" */ |
|
729 { { 0x4e, 0x72, 0x6d, 0x32 }, unorm2_swap }, /* dataFormat="Nrm2" */ |
|
730 #endif |
|
731 #if !UCONFIG_NO_COLLATION |
|
732 { { 0x55, 0x43, 0x6f, 0x6c }, ucol_swap }, /* dataFormat="UCol" */ |
|
733 { { 0x49, 0x6e, 0x76, 0x43 }, ucol_swapInverseUCA },/* dataFormat="InvC" */ |
|
734 #endif |
|
735 #if !UCONFIG_NO_BREAK_ITERATION |
|
736 { { 0x42, 0x72, 0x6b, 0x20 }, ubrk_swap }, /* dataFormat="Brk " */ |
|
737 { { 0x44, 0x69, 0x63, 0x74 }, udict_swap }, /* dataFormat="Dict" */ |
|
738 #endif |
|
739 { { 0x70, 0x6e, 0x61, 0x6d }, upname_swap }, /* dataFormat="pnam" */ |
|
740 { { 0x75, 0x6e, 0x61, 0x6d }, uchar_swapNames }, /* dataFormat="unam" */ |
|
741 #if !UCONFIG_NO_NORMALIZATION |
|
742 { { 0x43, 0x66, 0x75, 0x20 }, uspoof_swap }, /* dataFormat="Cfu " */ |
|
743 #endif |
|
744 { { 0x54, 0x65, 0x73, 0x74 }, test_swap } /* dataFormat="Test" */ |
|
745 }; |
|
746 |
|
747 U_CAPI int32_t U_EXPORT2 |
|
748 udata_swap(const UDataSwapper *ds, |
|
749 const void *inData, int32_t length, void *outData, |
|
750 UErrorCode *pErrorCode) { |
|
751 char dataFormatChars[4]; |
|
752 const UDataInfo *pInfo; |
|
753 int32_t i, swappedLength; |
|
754 |
|
755 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
|
756 return 0; |
|
757 } |
|
758 |
|
759 /* |
|
760 * Preflight the header first; checks for illegal arguments, too. |
|
761 * Do not swap the header right away because the format-specific swapper |
|
762 * will swap it, get the headerSize again, and also use the header |
|
763 * information. Otherwise we would have to pass some of the information |
|
764 * and not be able to use the UDataSwapFn signature. |
|
765 */ |
|
766 udata_swapDataHeader(ds, inData, -1, NULL, pErrorCode); |
|
767 |
|
768 /* |
|
769 * If we wanted udata_swap() to also handle non-loadable data like a UTrie, |
|
770 * then we could check here for further known magic values and structures. |
|
771 */ |
|
772 if(U_FAILURE(*pErrorCode)) { |
|
773 return 0; /* the data format was not recognized */ |
|
774 } |
|
775 |
|
776 pInfo=(const UDataInfo *)((const char *)inData+4); |
|
777 |
|
778 { |
|
779 /* convert the data format from ASCII to Unicode to the system charset */ |
|
780 UChar u[4]={ |
|
781 pInfo->dataFormat[0], pInfo->dataFormat[1], |
|
782 pInfo->dataFormat[2], pInfo->dataFormat[3] |
|
783 }; |
|
784 |
|
785 if(uprv_isInvariantUString(u, 4)) { |
|
786 u_UCharsToChars(u, dataFormatChars, 4); |
|
787 } else { |
|
788 dataFormatChars[0]=dataFormatChars[1]=dataFormatChars[2]=dataFormatChars[3]='?'; |
|
789 } |
|
790 } |
|
791 |
|
792 /* dispatch to the swap function for the dataFormat */ |
|
793 for(i=0; i<LENGTHOF(swapFns); ++i) { |
|
794 if(0==memcmp(swapFns[i].dataFormat, pInfo->dataFormat, 4)) { |
|
795 swappedLength=swapFns[i].swapFn(ds, inData, length, outData, pErrorCode); |
|
796 |
|
797 if(U_FAILURE(*pErrorCode)) { |
|
798 udata_printError(ds, "udata_swap(): failure swapping data format %02x.%02x.%02x.%02x (\"%c%c%c%c\") - %s\n", |
|
799 pInfo->dataFormat[0], pInfo->dataFormat[1], |
|
800 pInfo->dataFormat[2], pInfo->dataFormat[3], |
|
801 dataFormatChars[0], dataFormatChars[1], |
|
802 dataFormatChars[2], dataFormatChars[3], |
|
803 u_errorName(*pErrorCode)); |
|
804 } else if(swappedLength<(length-15)) { |
|
805 /* swapped less than expected */ |
|
806 udata_printError(ds, "udata_swap() warning: swapped only %d out of %d bytes - data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n", |
|
807 swappedLength, length, |
|
808 pInfo->dataFormat[0], pInfo->dataFormat[1], |
|
809 pInfo->dataFormat[2], pInfo->dataFormat[3], |
|
810 dataFormatChars[0], dataFormatChars[1], |
|
811 dataFormatChars[2], dataFormatChars[3], |
|
812 u_errorName(*pErrorCode)); |
|
813 } |
|
814 |
|
815 return swappedLength; |
|
816 } |
|
817 } |
|
818 |
|
819 /* the dataFormat was not recognized */ |
|
820 udata_printError(ds, "udata_swap(): unknown data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n", |
|
821 pInfo->dataFormat[0], pInfo->dataFormat[1], |
|
822 pInfo->dataFormat[2], pInfo->dataFormat[3], |
|
823 dataFormatChars[0], dataFormatChars[1], |
|
824 dataFormatChars[2], dataFormatChars[3]); |
|
825 |
|
826 *pErrorCode=U_UNSUPPORTED_ERROR; |
|
827 return 0; |
|
828 } |