1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/tools/toolutil/swapimpl.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,828 @@ 1.4 +/* 1.5 +******************************************************************************* 1.6 +* 1.7 +* Copyright (C) 2005-2012, International Business Machines 1.8 +* Corporation and others. All Rights Reserved. 1.9 +* 1.10 +******************************************************************************* 1.11 +* file name: swapimpl.cpp 1.12 +* encoding: US-ASCII 1.13 +* tab size: 8 (not used) 1.14 +* indentation:4 1.15 +* 1.16 +* created on: 2005may05 1.17 +* created by: Markus W. Scherer 1.18 +* 1.19 +* Data file swapping functions moved here from the common library 1.20 +* because some data is hardcoded in ICU4C and needs not be swapped any more. 1.21 +* Moving the functions here simplifies testing (for code coverage) because 1.22 +* we need not jump through hoops (like adding snapshots of these files 1.23 +* to testdata). 1.24 +* 1.25 +* The declarations for these functions remain in the internal header files 1.26 +* in icu/source/common/ 1.27 +*/ 1.28 + 1.29 +#include "unicode/utypes.h" 1.30 +#include "unicode/putil.h" 1.31 +#include "unicode/udata.h" 1.32 + 1.33 +/* Explicit include statement for std_string.h is needed 1.34 + * for compilation on certain platforms. (e.g. AIX/VACPP) 1.35 + */ 1.36 +#include "unicode/std_string.h" 1.37 + 1.38 +#include "cmemory.h" 1.39 +#include "cstring.h" 1.40 +#include "uinvchar.h" 1.41 +#include "uassert.h" 1.42 +#include "uarrsort.h" 1.43 +#include "ucmndata.h" 1.44 +#include "udataswp.h" 1.45 + 1.46 +/* swapping implementations in common */ 1.47 + 1.48 +#include "uresdata.h" 1.49 +#include "ucnv_io.h" 1.50 +#include "uprops.h" 1.51 +#include "ucase.h" 1.52 +#include "ubidi_props.h" 1.53 +#include "ucol_swp.h" 1.54 +#include "ucnv_bld.h" 1.55 +#include "unormimp.h" 1.56 +#include "normalizer2impl.h" 1.57 +#include "sprpimpl.h" 1.58 +#include "propname.h" 1.59 +#include "rbbidata.h" 1.60 +#include "utrie2.h" 1.61 +#include "dictionarydata.h" 1.62 + 1.63 +/* swapping implementations in i18n */ 1.64 + 1.65 +#if !UCONFIG_NO_NORMALIZATION 1.66 +#include "uspoof_impl.h" 1.67 +#endif 1.68 + 1.69 +U_NAMESPACE_USE 1.70 + 1.71 +/* definitions */ 1.72 + 1.73 +#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 1.74 + 1.75 +/* Unicode property (value) aliases data swapping --------------------------- */ 1.76 + 1.77 +static int32_t U_CALLCONV 1.78 +upname_swap(const UDataSwapper *ds, 1.79 + const void *inData, int32_t length, void *outData, 1.80 + UErrorCode *pErrorCode) { 1.81 + /* udata_swapDataHeader checks the arguments */ 1.82 + int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 1.83 + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1.84 + return 0; 1.85 + } 1.86 + 1.87 + /* check data format and format version */ 1.88 + const UDataInfo *pInfo= 1.89 + reinterpret_cast<const UDataInfo *>( 1.90 + static_cast<const char *>(inData)+4); 1.91 + if(!( 1.92 + pInfo->dataFormat[0]==0x70 && /* dataFormat="pnam" */ 1.93 + pInfo->dataFormat[1]==0x6e && 1.94 + pInfo->dataFormat[2]==0x61 && 1.95 + pInfo->dataFormat[3]==0x6d && 1.96 + pInfo->formatVersion[0]==2 1.97 + )) { 1.98 + udata_printError(ds, "upname_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as pnames.icu\n", 1.99 + pInfo->dataFormat[0], pInfo->dataFormat[1], 1.100 + pInfo->dataFormat[2], pInfo->dataFormat[3], 1.101 + pInfo->formatVersion[0]); 1.102 + *pErrorCode=U_UNSUPPORTED_ERROR; 1.103 + return 0; 1.104 + } 1.105 + 1.106 + const uint8_t *inBytes=static_cast<const uint8_t *>(inData)+headerSize; 1.107 + uint8_t *outBytes=static_cast<uint8_t *>(outData)+headerSize; 1.108 + 1.109 + if(length>=0) { 1.110 + length-=headerSize; 1.111 + // formatVersion 2 initially has indexes[8], 32 bytes. 1.112 + if(length<32) { 1.113 + udata_printError(ds, "upname_swap(): too few bytes (%d after header) for pnames.icu\n", 1.114 + (int)length); 1.115 + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 1.116 + return 0; 1.117 + } 1.118 + } 1.119 + 1.120 + const int32_t *inIndexes=reinterpret_cast<const int32_t *>(inBytes); 1.121 + int32_t totalSize=udata_readInt32(ds, inIndexes[PropNameData::IX_TOTAL_SIZE]); 1.122 + if(length>=0) { 1.123 + if(length<totalSize) { 1.124 + udata_printError(ds, "upname_swap(): too few bytes (%d after header, should be %d) " 1.125 + "for pnames.icu\n", 1.126 + (int)length, (int)totalSize); 1.127 + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 1.128 + return 0; 1.129 + } 1.130 + 1.131 + int32_t numBytesIndexesAndValueMaps= 1.132 + udata_readInt32(ds, inIndexes[PropNameData::IX_BYTE_TRIES_OFFSET]); 1.133 + 1.134 + // Swap the indexes[] and the valueMaps[]. 1.135 + ds->swapArray32(ds, inBytes, numBytesIndexesAndValueMaps, outBytes, pErrorCode); 1.136 + 1.137 + // Copy the rest of the data. 1.138 + if(inBytes!=outBytes) { 1.139 + uprv_memcpy(outBytes+numBytesIndexesAndValueMaps, 1.140 + inBytes+numBytesIndexesAndValueMaps, 1.141 + totalSize-numBytesIndexesAndValueMaps); 1.142 + } 1.143 + 1.144 + // We need not swap anything else: 1.145 + // 1.146 + // The ByteTries are already byte-serialized, and are fixed on ASCII. 1.147 + // (On an EBCDIC machine, the input string is converted to lowercase ASCII 1.148 + // while matching.) 1.149 + // 1.150 + // The name groups are mostly invariant characters, but since we only 1.151 + // generate, and keep in subversion, ASCII versions of pnames.icu, 1.152 + // and since only ICU4J uses the pnames.icu data file 1.153 + // (the data is hardcoded in ICU4C) and ICU4J uses ASCII data files, 1.154 + // we just copy those bytes too. 1.155 + } 1.156 + 1.157 + return headerSize+totalSize; 1.158 +} 1.159 + 1.160 +/* Unicode properties data swapping ----------------------------------------- */ 1.161 + 1.162 +static int32_t U_CALLCONV 1.163 +uprops_swap(const UDataSwapper *ds, 1.164 + const void *inData, int32_t length, void *outData, 1.165 + UErrorCode *pErrorCode) { 1.166 + const UDataInfo *pInfo; 1.167 + int32_t headerSize, i; 1.168 + 1.169 + int32_t dataIndexes[UPROPS_INDEX_COUNT]; 1.170 + const int32_t *inData32; 1.171 + 1.172 + /* udata_swapDataHeader checks the arguments */ 1.173 + headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 1.174 + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1.175 + return 0; 1.176 + } 1.177 + 1.178 + /* check data format and format version */ 1.179 + pInfo=(const UDataInfo *)((const char *)inData+4); 1.180 + if(!( 1.181 + pInfo->dataFormat[0]==0x55 && /* dataFormat="UPro" */ 1.182 + pInfo->dataFormat[1]==0x50 && 1.183 + pInfo->dataFormat[2]==0x72 && 1.184 + pInfo->dataFormat[3]==0x6f && 1.185 + (3<=pInfo->formatVersion[0] && pInfo->formatVersion[0]<=7) && 1.186 + (pInfo->formatVersion[0]>=7 || 1.187 + (pInfo->formatVersion[2]==UTRIE_SHIFT && 1.188 + pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT)) 1.189 + )) { 1.190 + udata_printError(ds, "uprops_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not a Unicode properties file\n", 1.191 + pInfo->dataFormat[0], pInfo->dataFormat[1], 1.192 + pInfo->dataFormat[2], pInfo->dataFormat[3], 1.193 + pInfo->formatVersion[0]); 1.194 + *pErrorCode=U_UNSUPPORTED_ERROR; 1.195 + return 0; 1.196 + } 1.197 + 1.198 + /* the properties file must contain at least the indexes array */ 1.199 + if(length>=0 && (length-headerSize)<(int32_t)sizeof(dataIndexes)) { 1.200 + udata_printError(ds, "uprops_swap(): too few bytes (%d after header) for a Unicode properties file\n", 1.201 + length-headerSize); 1.202 + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 1.203 + return 0; 1.204 + } 1.205 + 1.206 + /* read the indexes */ 1.207 + inData32=(const int32_t *)((const char *)inData+headerSize); 1.208 + for(i=0; i<UPROPS_INDEX_COUNT; ++i) { 1.209 + dataIndexes[i]=udata_readInt32(ds, inData32[i]); 1.210 + } 1.211 + 1.212 + /* 1.213 + * comments are copied from the data format description in genprops/store.c 1.214 + * indexes[] constants are in uprops.h 1.215 + */ 1.216 + int32_t dataTop; 1.217 + if(length>=0) { 1.218 + int32_t *outData32; 1.219 + 1.220 + /* 1.221 + * In formatVersion 7, UPROPS_DATA_TOP_INDEX has the post-header data size. 1.222 + * In earlier formatVersions, it is 0 and a lower dataIndexes entry 1.223 + * has the top of the last item. 1.224 + */ 1.225 + for(i=UPROPS_DATA_TOP_INDEX; i>0 && (dataTop=dataIndexes[i])==0; --i) {} 1.226 + 1.227 + if((length-headerSize)<(4*dataTop)) { 1.228 + udata_printError(ds, "uprops_swap(): too few bytes (%d after header) for a Unicode properties file\n", 1.229 + length-headerSize); 1.230 + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 1.231 + return 0; 1.232 + } 1.233 + 1.234 + outData32=(int32_t *)((char *)outData+headerSize); 1.235 + 1.236 + /* copy everything for inaccessible data (padding) */ 1.237 + if(inData32!=outData32) { 1.238 + uprv_memcpy(outData32, inData32, 4*dataTop); 1.239 + } 1.240 + 1.241 + /* swap the indexes[16] */ 1.242 + ds->swapArray32(ds, inData32, 4*UPROPS_INDEX_COUNT, outData32, pErrorCode); 1.243 + 1.244 + /* 1.245 + * swap the main properties UTrie 1.246 + * PT serialized properties trie, see utrie.h (byte size: 4*(i0-16)) 1.247 + */ 1.248 + utrie2_swapAnyVersion(ds, 1.249 + inData32+UPROPS_INDEX_COUNT, 1.250 + 4*(dataIndexes[UPROPS_PROPS32_INDEX]-UPROPS_INDEX_COUNT), 1.251 + outData32+UPROPS_INDEX_COUNT, 1.252 + pErrorCode); 1.253 + 1.254 + /* 1.255 + * swap the properties and exceptions words 1.256 + * P const uint32_t props32[i1-i0]; 1.257 + * E const uint32_t exceptions[i2-i1]; 1.258 + */ 1.259 + ds->swapArray32(ds, 1.260 + inData32+dataIndexes[UPROPS_PROPS32_INDEX], 1.261 + 4*(dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX]-dataIndexes[UPROPS_PROPS32_INDEX]), 1.262 + outData32+dataIndexes[UPROPS_PROPS32_INDEX], 1.263 + pErrorCode); 1.264 + 1.265 + /* 1.266 + * swap the UChars 1.267 + * U const UChar uchars[2*(i3-i2)]; 1.268 + */ 1.269 + ds->swapArray16(ds, 1.270 + inData32+dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX], 1.271 + 4*(dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX]-dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX]), 1.272 + outData32+dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX], 1.273 + pErrorCode); 1.274 + 1.275 + /* 1.276 + * swap the additional UTrie 1.277 + * i3 additionalTrieIndex; -- 32-bit unit index to the additional trie for more properties 1.278 + */ 1.279 + utrie2_swapAnyVersion(ds, 1.280 + inData32+dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX], 1.281 + 4*(dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX]-dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX]), 1.282 + outData32+dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX], 1.283 + pErrorCode); 1.284 + 1.285 + /* 1.286 + * swap the properties vectors 1.287 + * PV const uint32_t propsVectors[(i6-i4)/i5][i5]==uint32_t propsVectors[i6-i4]; 1.288 + */ 1.289 + ds->swapArray32(ds, 1.290 + inData32+dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX], 1.291 + 4*(dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]-dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX]), 1.292 + outData32+dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX], 1.293 + pErrorCode); 1.294 + 1.295 + // swap the Script_Extensions data 1.296 + // SCX const uint16_t scriptExtensions[2*(i7-i6)]; 1.297 + ds->swapArray16(ds, 1.298 + inData32+dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX], 1.299 + 4*(dataIndexes[UPROPS_RESERVED_INDEX_7]-dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]), 1.300 + outData32+dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX], 1.301 + pErrorCode); 1.302 + } 1.303 + 1.304 + /* i7 reservedIndex7; -- 32-bit unit index to the top of the Script_Extensions data */ 1.305 + return headerSize+4*dataIndexes[UPROPS_RESERVED_INDEX_7]; 1.306 +} 1.307 + 1.308 +/* Unicode case mapping data swapping --------------------------------------- */ 1.309 + 1.310 +static int32_t U_CALLCONV 1.311 +ucase_swap(const UDataSwapper *ds, 1.312 + const void *inData, int32_t length, void *outData, 1.313 + UErrorCode *pErrorCode) { 1.314 + const UDataInfo *pInfo; 1.315 + int32_t headerSize; 1.316 + 1.317 + const uint8_t *inBytes; 1.318 + uint8_t *outBytes; 1.319 + 1.320 + const int32_t *inIndexes; 1.321 + int32_t indexes[16]; 1.322 + 1.323 + int32_t i, offset, count, size; 1.324 + 1.325 + /* udata_swapDataHeader checks the arguments */ 1.326 + headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 1.327 + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1.328 + return 0; 1.329 + } 1.330 + 1.331 + /* check data format and format version */ 1.332 + pInfo=(const UDataInfo *)((const char *)inData+4); 1.333 + if(!( 1.334 + pInfo->dataFormat[0]==UCASE_FMT_0 && /* dataFormat="cAsE" */ 1.335 + pInfo->dataFormat[1]==UCASE_FMT_1 && 1.336 + pInfo->dataFormat[2]==UCASE_FMT_2 && 1.337 + pInfo->dataFormat[3]==UCASE_FMT_3 && 1.338 + ((pInfo->formatVersion[0]==1 && 1.339 + pInfo->formatVersion[2]==UTRIE_SHIFT && 1.340 + pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT) || 1.341 + pInfo->formatVersion[0]==2 || pInfo->formatVersion[0]==3) 1.342 + )) { 1.343 + udata_printError(ds, "ucase_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as case mapping data\n", 1.344 + pInfo->dataFormat[0], pInfo->dataFormat[1], 1.345 + pInfo->dataFormat[2], pInfo->dataFormat[3], 1.346 + pInfo->formatVersion[0]); 1.347 + *pErrorCode=U_UNSUPPORTED_ERROR; 1.348 + return 0; 1.349 + } 1.350 + 1.351 + inBytes=(const uint8_t *)inData+headerSize; 1.352 + outBytes=(uint8_t *)outData+headerSize; 1.353 + 1.354 + inIndexes=(const int32_t *)inBytes; 1.355 + 1.356 + if(length>=0) { 1.357 + length-=headerSize; 1.358 + if(length<16*4) { 1.359 + udata_printError(ds, "ucase_swap(): too few bytes (%d after header) for case mapping data\n", 1.360 + length); 1.361 + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 1.362 + return 0; 1.363 + } 1.364 + } 1.365 + 1.366 + /* read the first 16 indexes (ICU 3.2/format version 1: UCASE_IX_TOP==16, might grow) */ 1.367 + for(i=0; i<16; ++i) { 1.368 + indexes[i]=udata_readInt32(ds, inIndexes[i]); 1.369 + } 1.370 + 1.371 + /* get the total length of the data */ 1.372 + size=indexes[UCASE_IX_LENGTH]; 1.373 + 1.374 + if(length>=0) { 1.375 + if(length<size) { 1.376 + udata_printError(ds, "ucase_swap(): too few bytes (%d after header) for all of case mapping data\n", 1.377 + length); 1.378 + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 1.379 + return 0; 1.380 + } 1.381 + 1.382 + /* copy the data for inaccessible bytes */ 1.383 + if(inBytes!=outBytes) { 1.384 + uprv_memcpy(outBytes, inBytes, size); 1.385 + } 1.386 + 1.387 + offset=0; 1.388 + 1.389 + /* swap the int32_t indexes[] */ 1.390 + count=indexes[UCASE_IX_INDEX_TOP]*4; 1.391 + ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode); 1.392 + offset+=count; 1.393 + 1.394 + /* swap the UTrie */ 1.395 + count=indexes[UCASE_IX_TRIE_SIZE]; 1.396 + utrie2_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 1.397 + offset+=count; 1.398 + 1.399 + /* swap the uint16_t exceptions[] and unfold[] */ 1.400 + count=(indexes[UCASE_IX_EXC_LENGTH]+indexes[UCASE_IX_UNFOLD_LENGTH])*2; 1.401 + ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 1.402 + offset+=count; 1.403 + 1.404 + U_ASSERT(offset==size); 1.405 + } 1.406 + 1.407 + return headerSize+size; 1.408 +} 1.409 + 1.410 +/* Unicode bidi/shaping data swapping --------------------------------------- */ 1.411 + 1.412 +static int32_t U_CALLCONV 1.413 +ubidi_swap(const UDataSwapper *ds, 1.414 + const void *inData, int32_t length, void *outData, 1.415 + UErrorCode *pErrorCode) { 1.416 + const UDataInfo *pInfo; 1.417 + int32_t headerSize; 1.418 + 1.419 + const uint8_t *inBytes; 1.420 + uint8_t *outBytes; 1.421 + 1.422 + const int32_t *inIndexes; 1.423 + int32_t indexes[16]; 1.424 + 1.425 + int32_t i, offset, count, size; 1.426 + 1.427 + /* udata_swapDataHeader checks the arguments */ 1.428 + headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 1.429 + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1.430 + return 0; 1.431 + } 1.432 + 1.433 + /* check data format and format version */ 1.434 + pInfo=(const UDataInfo *)((const char *)inData+4); 1.435 + if(!( 1.436 + pInfo->dataFormat[0]==UBIDI_FMT_0 && /* dataFormat="BiDi" */ 1.437 + pInfo->dataFormat[1]==UBIDI_FMT_1 && 1.438 + pInfo->dataFormat[2]==UBIDI_FMT_2 && 1.439 + pInfo->dataFormat[3]==UBIDI_FMT_3 && 1.440 + ((pInfo->formatVersion[0]==1 && 1.441 + pInfo->formatVersion[2]==UTRIE_SHIFT && 1.442 + pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT) || 1.443 + pInfo->formatVersion[0]==2) 1.444 + )) { 1.445 + udata_printError(ds, "ubidi_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as bidi/shaping data\n", 1.446 + pInfo->dataFormat[0], pInfo->dataFormat[1], 1.447 + pInfo->dataFormat[2], pInfo->dataFormat[3], 1.448 + pInfo->formatVersion[0]); 1.449 + *pErrorCode=U_UNSUPPORTED_ERROR; 1.450 + return 0; 1.451 + } 1.452 + 1.453 + inBytes=(const uint8_t *)inData+headerSize; 1.454 + outBytes=(uint8_t *)outData+headerSize; 1.455 + 1.456 + inIndexes=(const int32_t *)inBytes; 1.457 + 1.458 + if(length>=0) { 1.459 + length-=headerSize; 1.460 + if(length<16*4) { 1.461 + udata_printError(ds, "ubidi_swap(): too few bytes (%d after header) for bidi/shaping data\n", 1.462 + length); 1.463 + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 1.464 + return 0; 1.465 + } 1.466 + } 1.467 + 1.468 + /* read the first 16 indexes (ICU 3.4/format version 1: UBIDI_IX_TOP==16, might grow) */ 1.469 + for(i=0; i<16; ++i) { 1.470 + indexes[i]=udata_readInt32(ds, inIndexes[i]); 1.471 + } 1.472 + 1.473 + /* get the total length of the data */ 1.474 + size=indexes[UBIDI_IX_LENGTH]; 1.475 + 1.476 + if(length>=0) { 1.477 + if(length<size) { 1.478 + udata_printError(ds, "ubidi_swap(): too few bytes (%d after header) for all of bidi/shaping data\n", 1.479 + length); 1.480 + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 1.481 + return 0; 1.482 + } 1.483 + 1.484 + /* copy the data for inaccessible bytes */ 1.485 + if(inBytes!=outBytes) { 1.486 + uprv_memcpy(outBytes, inBytes, size); 1.487 + } 1.488 + 1.489 + offset=0; 1.490 + 1.491 + /* swap the int32_t indexes[] */ 1.492 + count=indexes[UBIDI_IX_INDEX_TOP]*4; 1.493 + ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode); 1.494 + offset+=count; 1.495 + 1.496 + /* swap the UTrie */ 1.497 + count=indexes[UBIDI_IX_TRIE_SIZE]; 1.498 + utrie2_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 1.499 + offset+=count; 1.500 + 1.501 + /* swap the uint32_t mirrors[] */ 1.502 + count=indexes[UBIDI_IX_MIRROR_LENGTH]*4; 1.503 + ds->swapArray32(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 1.504 + offset+=count; 1.505 + 1.506 + /* just skip the uint8_t jgArray[] */ 1.507 + count=indexes[UBIDI_IX_JG_LIMIT]-indexes[UBIDI_IX_JG_START]; 1.508 + offset+=count; 1.509 + 1.510 + U_ASSERT(offset==size); 1.511 + } 1.512 + 1.513 + return headerSize+size; 1.514 +} 1.515 + 1.516 +/* Unicode normalization data swapping -------------------------------------- */ 1.517 + 1.518 +#if !UCONFIG_NO_NORMALIZATION 1.519 + 1.520 +static int32_t U_CALLCONV 1.521 +unorm_swap(const UDataSwapper *ds, 1.522 + const void *inData, int32_t length, void *outData, 1.523 + UErrorCode *pErrorCode) { 1.524 + const UDataInfo *pInfo; 1.525 + int32_t headerSize; 1.526 + 1.527 + const uint8_t *inBytes; 1.528 + uint8_t *outBytes; 1.529 + 1.530 + const int32_t *inIndexes; 1.531 + int32_t indexes[32]; 1.532 + 1.533 + int32_t i, offset, count, size; 1.534 + 1.535 + /* udata_swapDataHeader checks the arguments */ 1.536 + headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 1.537 + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1.538 + return 0; 1.539 + } 1.540 + 1.541 + /* check data format and format version */ 1.542 + pInfo=(const UDataInfo *)((const char *)inData+4); 1.543 + if(!( 1.544 + pInfo->dataFormat[0]==0x4e && /* dataFormat="Norm" */ 1.545 + pInfo->dataFormat[1]==0x6f && 1.546 + pInfo->dataFormat[2]==0x72 && 1.547 + pInfo->dataFormat[3]==0x6d && 1.548 + pInfo->formatVersion[0]==2 1.549 + )) { 1.550 + udata_printError(ds, "unorm_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as unorm.icu\n", 1.551 + pInfo->dataFormat[0], pInfo->dataFormat[1], 1.552 + pInfo->dataFormat[2], pInfo->dataFormat[3], 1.553 + pInfo->formatVersion[0]); 1.554 + *pErrorCode=U_UNSUPPORTED_ERROR; 1.555 + return 0; 1.556 + } 1.557 + 1.558 + inBytes=(const uint8_t *)inData+headerSize; 1.559 + outBytes=(uint8_t *)outData+headerSize; 1.560 + 1.561 + inIndexes=(const int32_t *)inBytes; 1.562 + 1.563 + if(length>=0) { 1.564 + length-=headerSize; 1.565 + if(length<32*4) { 1.566 + udata_printError(ds, "unorm_swap(): too few bytes (%d after header) for unorm.icu\n", 1.567 + length); 1.568 + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 1.569 + return 0; 1.570 + } 1.571 + } 1.572 + 1.573 + /* read the first 32 indexes (ICU 2.8/format version 2.2: _NORM_INDEX_TOP==32, might grow) */ 1.574 + for(i=0; i<32; ++i) { 1.575 + indexes[i]=udata_readInt32(ds, inIndexes[i]); 1.576 + } 1.577 + 1.578 + /* calculate the total length of the data */ 1.579 + size= 1.580 + 32*4+ /* size of indexes[] */ 1.581 + indexes[_NORM_INDEX_TRIE_SIZE]+ 1.582 + indexes[_NORM_INDEX_UCHAR_COUNT]*2+ 1.583 + indexes[_NORM_INDEX_COMBINE_DATA_COUNT]*2+ 1.584 + indexes[_NORM_INDEX_FCD_TRIE_SIZE]+ 1.585 + indexes[_NORM_INDEX_AUX_TRIE_SIZE]+ 1.586 + indexes[_NORM_INDEX_CANON_SET_COUNT]*2; 1.587 + 1.588 + if(length>=0) { 1.589 + if(length<size) { 1.590 + udata_printError(ds, "unorm_swap(): too few bytes (%d after header) for all of unorm.icu\n", 1.591 + length); 1.592 + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 1.593 + return 0; 1.594 + } 1.595 + 1.596 + /* copy the data for inaccessible bytes */ 1.597 + if(inBytes!=outBytes) { 1.598 + uprv_memcpy(outBytes, inBytes, size); 1.599 + } 1.600 + 1.601 + offset=0; 1.602 + 1.603 + /* swap the indexes[] */ 1.604 + count=32*4; 1.605 + ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode); 1.606 + offset+=count; 1.607 + 1.608 + /* swap the main UTrie */ 1.609 + count=indexes[_NORM_INDEX_TRIE_SIZE]; 1.610 + utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 1.611 + offset+=count; 1.612 + 1.613 + /* swap the uint16_t extraData[] and the uint16_t combiningTable[] */ 1.614 + count=(indexes[_NORM_INDEX_UCHAR_COUNT]+indexes[_NORM_INDEX_COMBINE_DATA_COUNT])*2; 1.615 + ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 1.616 + offset+=count; 1.617 + 1.618 + /* swap the FCD UTrie */ 1.619 + count=indexes[_NORM_INDEX_FCD_TRIE_SIZE]; 1.620 + if(count!=0) { 1.621 + utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 1.622 + offset+=count; 1.623 + } 1.624 + 1.625 + /* swap the aux UTrie */ 1.626 + count=indexes[_NORM_INDEX_AUX_TRIE_SIZE]; 1.627 + if(count!=0) { 1.628 + utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 1.629 + offset+=count; 1.630 + } 1.631 + 1.632 + /* swap the uint16_t combiningTable[] */ 1.633 + count=indexes[_NORM_INDEX_CANON_SET_COUNT]*2; 1.634 + ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 1.635 + offset+=count; 1.636 + } 1.637 + 1.638 + return headerSize+size; 1.639 +} 1.640 + 1.641 +#endif 1.642 + 1.643 +/* Swap 'Test' data from gentest */ 1.644 +static int32_t U_CALLCONV 1.645 +test_swap(const UDataSwapper *ds, 1.646 + const void *inData, int32_t length, void *outData, 1.647 + UErrorCode *pErrorCode) { 1.648 + const UDataInfo *pInfo; 1.649 + int32_t headerSize; 1.650 + 1.651 + const uint8_t *inBytes; 1.652 + uint8_t *outBytes; 1.653 + 1.654 + int32_t offset; 1.655 + 1.656 + /* udata_swapDataHeader checks the arguments */ 1.657 + headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 1.658 + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1.659 + udata_printError(ds, "test_swap(): data header swap failed %s\n", pErrorCode != NULL ? u_errorName(*pErrorCode) : "pErrorCode is NULL"); 1.660 + return 0; 1.661 + } 1.662 + 1.663 + /* check data format and format version */ 1.664 + pInfo=(const UDataInfo *)((const char *)inData+4); 1.665 + if(!( 1.666 + pInfo->dataFormat[0]==0x54 && /* dataFormat="Norm" */ 1.667 + pInfo->dataFormat[1]==0x65 && 1.668 + pInfo->dataFormat[2]==0x73 && 1.669 + pInfo->dataFormat[3]==0x74 && 1.670 + pInfo->formatVersion[0]==1 1.671 + )) { 1.672 + udata_printError(ds, "test_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as testdata\n", 1.673 + pInfo->dataFormat[0], pInfo->dataFormat[1], 1.674 + pInfo->dataFormat[2], pInfo->dataFormat[3], 1.675 + pInfo->formatVersion[0]); 1.676 + *pErrorCode=U_UNSUPPORTED_ERROR; 1.677 + return 0; 1.678 + } 1.679 + 1.680 + inBytes=(const uint8_t *)inData+headerSize; 1.681 + outBytes=(uint8_t *)outData+headerSize; 1.682 + 1.683 + int32_t size16 = 2; // 16bit plus padding 1.684 + int32_t sizeStr = 5; // 4 char inv-str plus null 1.685 + int32_t size = size16 + sizeStr; 1.686 + 1.687 + if(length>=0) { 1.688 + if(length<size) { 1.689 + udata_printError(ds, "test_swap(): too few bytes (%d after header, wanted %d) for all of testdata\n", 1.690 + length, size); 1.691 + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 1.692 + return 0; 1.693 + } 1.694 + 1.695 + offset =0; 1.696 + /* swap a 1 entry array */ 1.697 + ds->swapArray16(ds, inBytes+offset, size16, outBytes+offset, pErrorCode); 1.698 + offset+=size16; 1.699 + ds->swapInvChars(ds, inBytes+offset, sizeStr, outBytes+offset, pErrorCode); 1.700 + } 1.701 + 1.702 + return headerSize+size; 1.703 +} 1.704 + 1.705 +/* swap any data (except a .dat package) ------------------------------------ */ 1.706 + 1.707 +static const struct { 1.708 + uint8_t dataFormat[4]; 1.709 + UDataSwapFn *swapFn; 1.710 +} swapFns[]={ 1.711 + { { 0x52, 0x65, 0x73, 0x42 }, ures_swap }, /* dataFormat="ResB" */ 1.712 +#if !UCONFIG_NO_LEGACY_CONVERSION 1.713 + { { 0x63, 0x6e, 0x76, 0x74 }, ucnv_swap }, /* dataFormat="cnvt" */ 1.714 +#endif 1.715 +#if !UCONFIG_NO_CONVERSION 1.716 + { { 0x43, 0x76, 0x41, 0x6c }, ucnv_swapAliases }, /* dataFormat="CvAl" */ 1.717 +#endif 1.718 +#if !UCONFIG_NO_IDNA 1.719 + { { 0x53, 0x50, 0x52, 0x50 }, usprep_swap }, /* dataFormat="SPRP" */ 1.720 +#endif 1.721 + /* insert data formats here, descending by expected frequency of occurrence */ 1.722 + { { 0x55, 0x50, 0x72, 0x6f }, uprops_swap }, /* dataFormat="UPro" */ 1.723 + 1.724 + { { UCASE_FMT_0, UCASE_FMT_1, UCASE_FMT_2, UCASE_FMT_3 }, 1.725 + ucase_swap }, /* dataFormat="cAsE" */ 1.726 + 1.727 + { { UBIDI_FMT_0, UBIDI_FMT_1, UBIDI_FMT_2, UBIDI_FMT_3 }, 1.728 + ubidi_swap }, /* dataFormat="BiDi" */ 1.729 + 1.730 +#if !UCONFIG_NO_NORMALIZATION 1.731 + { { 0x4e, 0x6f, 0x72, 0x6d }, unorm_swap }, /* dataFormat="Norm" */ 1.732 + { { 0x4e, 0x72, 0x6d, 0x32 }, unorm2_swap }, /* dataFormat="Nrm2" */ 1.733 +#endif 1.734 +#if !UCONFIG_NO_COLLATION 1.735 + { { 0x55, 0x43, 0x6f, 0x6c }, ucol_swap }, /* dataFormat="UCol" */ 1.736 + { { 0x49, 0x6e, 0x76, 0x43 }, ucol_swapInverseUCA },/* dataFormat="InvC" */ 1.737 +#endif 1.738 +#if !UCONFIG_NO_BREAK_ITERATION 1.739 + { { 0x42, 0x72, 0x6b, 0x20 }, ubrk_swap }, /* dataFormat="Brk " */ 1.740 + { { 0x44, 0x69, 0x63, 0x74 }, udict_swap }, /* dataFormat="Dict" */ 1.741 +#endif 1.742 + { { 0x70, 0x6e, 0x61, 0x6d }, upname_swap }, /* dataFormat="pnam" */ 1.743 + { { 0x75, 0x6e, 0x61, 0x6d }, uchar_swapNames }, /* dataFormat="unam" */ 1.744 +#if !UCONFIG_NO_NORMALIZATION 1.745 + { { 0x43, 0x66, 0x75, 0x20 }, uspoof_swap }, /* dataFormat="Cfu " */ 1.746 +#endif 1.747 + { { 0x54, 0x65, 0x73, 0x74 }, test_swap } /* dataFormat="Test" */ 1.748 +}; 1.749 + 1.750 +U_CAPI int32_t U_EXPORT2 1.751 +udata_swap(const UDataSwapper *ds, 1.752 + const void *inData, int32_t length, void *outData, 1.753 + UErrorCode *pErrorCode) { 1.754 + char dataFormatChars[4]; 1.755 + const UDataInfo *pInfo; 1.756 + int32_t i, swappedLength; 1.757 + 1.758 + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1.759 + return 0; 1.760 + } 1.761 + 1.762 + /* 1.763 + * Preflight the header first; checks for illegal arguments, too. 1.764 + * Do not swap the header right away because the format-specific swapper 1.765 + * will swap it, get the headerSize again, and also use the header 1.766 + * information. Otherwise we would have to pass some of the information 1.767 + * and not be able to use the UDataSwapFn signature. 1.768 + */ 1.769 + udata_swapDataHeader(ds, inData, -1, NULL, pErrorCode); 1.770 + 1.771 + /* 1.772 + * If we wanted udata_swap() to also handle non-loadable data like a UTrie, 1.773 + * then we could check here for further known magic values and structures. 1.774 + */ 1.775 + if(U_FAILURE(*pErrorCode)) { 1.776 + return 0; /* the data format was not recognized */ 1.777 + } 1.778 + 1.779 + pInfo=(const UDataInfo *)((const char *)inData+4); 1.780 + 1.781 + { 1.782 + /* convert the data format from ASCII to Unicode to the system charset */ 1.783 + UChar u[4]={ 1.784 + pInfo->dataFormat[0], pInfo->dataFormat[1], 1.785 + pInfo->dataFormat[2], pInfo->dataFormat[3] 1.786 + }; 1.787 + 1.788 + if(uprv_isInvariantUString(u, 4)) { 1.789 + u_UCharsToChars(u, dataFormatChars, 4); 1.790 + } else { 1.791 + dataFormatChars[0]=dataFormatChars[1]=dataFormatChars[2]=dataFormatChars[3]='?'; 1.792 + } 1.793 + } 1.794 + 1.795 + /* dispatch to the swap function for the dataFormat */ 1.796 + for(i=0; i<LENGTHOF(swapFns); ++i) { 1.797 + if(0==memcmp(swapFns[i].dataFormat, pInfo->dataFormat, 4)) { 1.798 + swappedLength=swapFns[i].swapFn(ds, inData, length, outData, pErrorCode); 1.799 + 1.800 + if(U_FAILURE(*pErrorCode)) { 1.801 + udata_printError(ds, "udata_swap(): failure swapping data format %02x.%02x.%02x.%02x (\"%c%c%c%c\") - %s\n", 1.802 + pInfo->dataFormat[0], pInfo->dataFormat[1], 1.803 + pInfo->dataFormat[2], pInfo->dataFormat[3], 1.804 + dataFormatChars[0], dataFormatChars[1], 1.805 + dataFormatChars[2], dataFormatChars[3], 1.806 + u_errorName(*pErrorCode)); 1.807 + } else if(swappedLength<(length-15)) { 1.808 + /* swapped less than expected */ 1.809 + udata_printError(ds, "udata_swap() warning: swapped only %d out of %d bytes - data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n", 1.810 + swappedLength, length, 1.811 + pInfo->dataFormat[0], pInfo->dataFormat[1], 1.812 + pInfo->dataFormat[2], pInfo->dataFormat[3], 1.813 + dataFormatChars[0], dataFormatChars[1], 1.814 + dataFormatChars[2], dataFormatChars[3], 1.815 + u_errorName(*pErrorCode)); 1.816 + } 1.817 + 1.818 + return swappedLength; 1.819 + } 1.820 + } 1.821 + 1.822 + /* the dataFormat was not recognized */ 1.823 + udata_printError(ds, "udata_swap(): unknown data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n", 1.824 + pInfo->dataFormat[0], pInfo->dataFormat[1], 1.825 + pInfo->dataFormat[2], pInfo->dataFormat[3], 1.826 + dataFormatChars[0], dataFormatChars[1], 1.827 + dataFormatChars[2], dataFormatChars[3]); 1.828 + 1.829 + *pErrorCode=U_UNSUPPORTED_ERROR; 1.830 + return 0; 1.831 +}