1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/ucol_swp.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,467 @@ 1.4 +/* 1.5 +******************************************************************************* 1.6 +* 1.7 +* Copyright (C) 2003-2012, International Business Machines 1.8 +* Corporation and others. All Rights Reserved. 1.9 +* 1.10 +******************************************************************************* 1.11 +* file name: ucol_swp.cpp 1.12 +* encoding: US-ASCII 1.13 +* tab size: 8 (not used) 1.14 +* indentation:4 1.15 +* 1.16 +* created on: 2003sep10 1.17 +* created by: Markus W. Scherer 1.18 +* 1.19 +* Swap collation binaries. 1.20 +*/ 1.21 + 1.22 +#include "unicode/udata.h" /* UDataInfo */ 1.23 +#include "utrie.h" 1.24 +#include "udataswp.h" 1.25 +#include "cmemory.h" 1.26 +#include "ucol_data.h" 1.27 +#include "ucol_swp.h" 1.28 + 1.29 +/* swapping ----------------------------------------------------------------- */ 1.30 + 1.31 +/* 1.32 + * This performs data swapping for a folded trie (see utrie.c for details). 1.33 + */ 1.34 + 1.35 +U_CAPI int32_t U_EXPORT2 1.36 +utrie_swap(const UDataSwapper *ds, 1.37 + const void *inData, int32_t length, void *outData, 1.38 + UErrorCode *pErrorCode) { 1.39 + const UTrieHeader *inTrie; 1.40 + UTrieHeader trie; 1.41 + int32_t size; 1.42 + UBool dataIs32; 1.43 + 1.44 + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1.45 + return 0; 1.46 + } 1.47 + if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) { 1.48 + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1.49 + return 0; 1.50 + } 1.51 + 1.52 + /* setup and swapping */ 1.53 + if(length>=0 && (uint32_t)length<sizeof(UTrieHeader)) { 1.54 + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 1.55 + return 0; 1.56 + } 1.57 + 1.58 + inTrie=(const UTrieHeader *)inData; 1.59 + trie.signature=ds->readUInt32(inTrie->signature); 1.60 + trie.options=ds->readUInt32(inTrie->options); 1.61 + trie.indexLength=udata_readInt32(ds, inTrie->indexLength); 1.62 + trie.dataLength=udata_readInt32(ds, inTrie->dataLength); 1.63 + 1.64 + if( trie.signature!=0x54726965 || 1.65 + (trie.options&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_SHIFT || 1.66 + ((trie.options>>UTRIE_OPTIONS_INDEX_SHIFT)&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_INDEX_SHIFT || 1.67 + trie.indexLength<UTRIE_BMP_INDEX_LENGTH || 1.68 + (trie.indexLength&(UTRIE_SURROGATE_BLOCK_COUNT-1))!=0 || 1.69 + trie.dataLength<UTRIE_DATA_BLOCK_LENGTH || 1.70 + (trie.dataLength&(UTRIE_DATA_GRANULARITY-1))!=0 || 1.71 + ((trie.options&UTRIE_OPTIONS_LATIN1_IS_LINEAR)!=0 && trie.dataLength<(UTRIE_DATA_BLOCK_LENGTH+0x100)) 1.72 + ) { 1.73 + *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */ 1.74 + return 0; 1.75 + } 1.76 + 1.77 + dataIs32=(UBool)((trie.options&UTRIE_OPTIONS_DATA_IS_32_BIT)!=0); 1.78 + size=sizeof(UTrieHeader)+trie.indexLength*2+trie.dataLength*(dataIs32?4:2); 1.79 + 1.80 + if(length>=0) { 1.81 + UTrieHeader *outTrie; 1.82 + 1.83 + if(length<size) { 1.84 + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 1.85 + return 0; 1.86 + } 1.87 + 1.88 + outTrie=(UTrieHeader *)outData; 1.89 + 1.90 + /* swap the header */ 1.91 + ds->swapArray32(ds, inTrie, sizeof(UTrieHeader), outTrie, pErrorCode); 1.92 + 1.93 + /* swap the index and the data */ 1.94 + if(dataIs32) { 1.95 + ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode); 1.96 + ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, trie.dataLength*4, 1.97 + (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode); 1.98 + } else { 1.99 + ds->swapArray16(ds, inTrie+1, (trie.indexLength+trie.dataLength)*2, outTrie+1, pErrorCode); 1.100 + } 1.101 + } 1.102 + 1.103 + return size; 1.104 +} 1.105 + 1.106 +#if !UCONFIG_NO_COLLATION 1.107 + 1.108 +/* Modified copy of the beginning of ucol_swapBinary(). */ 1.109 +U_CAPI UBool U_EXPORT2 1.110 +ucol_looksLikeCollationBinary(const UDataSwapper *ds, 1.111 + const void *inData, int32_t length) { 1.112 + const UCATableHeader *inHeader; 1.113 + UCATableHeader header; 1.114 + 1.115 + if(ds==NULL || inData==NULL || length<-1) { 1.116 + return FALSE; 1.117 + } 1.118 + 1.119 + inHeader=(const UCATableHeader *)inData; 1.120 + 1.121 + /* 1.122 + * The collation binary must contain at least the UCATableHeader, 1.123 + * starting with its size field. 1.124 + * sizeof(UCATableHeader)==42*4 in ICU 2.8 1.125 + * check the length against the header size before reading the size field 1.126 + */ 1.127 + uprv_memset(&header, 0, sizeof(header)); 1.128 + if(length<0) { 1.129 + header.size=udata_readInt32(ds, inHeader->size); 1.130 + } else if((length<(42*4) || length<(header.size=udata_readInt32(ds, inHeader->size)))) { 1.131 + return FALSE; 1.132 + } 1.133 + 1.134 + header.magic=ds->readUInt32(inHeader->magic); 1.135 + if(!( 1.136 + header.magic==UCOL_HEADER_MAGIC && 1.137 + inHeader->formatVersion[0]==3 /*&& 1.138 + inHeader->formatVersion[1]>=0*/ 1.139 + )) { 1.140 + return FALSE; 1.141 + } 1.142 + 1.143 + if(inHeader->isBigEndian!=ds->inIsBigEndian || inHeader->charSetFamily!=ds->inCharset) { 1.144 + return FALSE; 1.145 + } 1.146 + 1.147 + return TRUE; 1.148 +} 1.149 + 1.150 +/* swap a header-less collation binary, inside a resource bundle or ucadata.icu */ 1.151 +U_CAPI int32_t U_EXPORT2 1.152 +ucol_swapBinary(const UDataSwapper *ds, 1.153 + const void *inData, int32_t length, void *outData, 1.154 + UErrorCode *pErrorCode) { 1.155 + const uint8_t *inBytes; 1.156 + uint8_t *outBytes; 1.157 + 1.158 + const UCATableHeader *inHeader; 1.159 + UCATableHeader *outHeader; 1.160 + UCATableHeader header; 1.161 + 1.162 + uint32_t count; 1.163 + 1.164 + /* argument checking in case we were not called from ucol_swap() */ 1.165 + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1.166 + return 0; 1.167 + } 1.168 + if(ds==NULL || inData==NULL || length<-1 || (length>0 && outData==NULL)) { 1.169 + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1.170 + return 0; 1.171 + } 1.172 + 1.173 + inBytes=(const uint8_t *)inData; 1.174 + outBytes=(uint8_t *)outData; 1.175 + 1.176 + inHeader=(const UCATableHeader *)inData; 1.177 + outHeader=(UCATableHeader *)outData; 1.178 + 1.179 + /* 1.180 + * The collation binary must contain at least the UCATableHeader, 1.181 + * starting with its size field. 1.182 + * sizeof(UCATableHeader)==42*4 in ICU 2.8 1.183 + * check the length against the header size before reading the size field 1.184 + */ 1.185 + uprv_memset(&header, 0, sizeof(header)); 1.186 + if(length<0) { 1.187 + header.size=udata_readInt32(ds, inHeader->size); 1.188 + } else if((length<(42*4) || length<(header.size=udata_readInt32(ds, inHeader->size)))) { 1.189 + udata_printError(ds, "ucol_swapBinary(): too few bytes (%d after header) for collation data\n", 1.190 + length); 1.191 + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 1.192 + return 0; 1.193 + } 1.194 + 1.195 + header.magic=ds->readUInt32(inHeader->magic); 1.196 + if(!( 1.197 + header.magic==UCOL_HEADER_MAGIC && 1.198 + inHeader->formatVersion[0]==3 /*&& 1.199 + inHeader->formatVersion[1]>=0*/ 1.200 + )) { 1.201 + udata_printError(ds, "ucol_swapBinary(): magic 0x%08x or format version %02x.%02x is not a collation binary\n", 1.202 + header.magic, 1.203 + inHeader->formatVersion[0], inHeader->formatVersion[1]); 1.204 + *pErrorCode=U_UNSUPPORTED_ERROR; 1.205 + return 0; 1.206 + } 1.207 + 1.208 + if(inHeader->isBigEndian!=ds->inIsBigEndian || inHeader->charSetFamily!=ds->inCharset) { 1.209 + udata_printError(ds, "ucol_swapBinary(): endianness %d or charset %d does not match the swapper\n", 1.210 + inHeader->isBigEndian, inHeader->charSetFamily); 1.211 + *pErrorCode=U_INVALID_FORMAT_ERROR; 1.212 + return 0; 1.213 + } 1.214 + 1.215 + if(length>=0) { 1.216 + /* copy everything, takes care of data that needs no swapping */ 1.217 + if(inBytes!=outBytes) { 1.218 + uprv_memcpy(outBytes, inBytes, header.size); 1.219 + } 1.220 + 1.221 + /* swap the necessary pieces in the order of their occurrence in the data */ 1.222 + 1.223 + /* read more of the UCATableHeader (the size field was read above) */ 1.224 + header.options= ds->readUInt32(inHeader->options); 1.225 + header.UCAConsts= ds->readUInt32(inHeader->UCAConsts); 1.226 + header.contractionUCACombos= ds->readUInt32(inHeader->contractionUCACombos); 1.227 + header.mappingPosition= ds->readUInt32(inHeader->mappingPosition); 1.228 + header.expansion= ds->readUInt32(inHeader->expansion); 1.229 + header.contractionIndex= ds->readUInt32(inHeader->contractionIndex); 1.230 + header.contractionCEs= ds->readUInt32(inHeader->contractionCEs); 1.231 + header.contractionSize= ds->readUInt32(inHeader->contractionSize); 1.232 + header.endExpansionCE= ds->readUInt32(inHeader->endExpansionCE); 1.233 + header.expansionCESize= ds->readUInt32(inHeader->expansionCESize); 1.234 + header.endExpansionCECount= udata_readInt32(ds, inHeader->endExpansionCECount); 1.235 + header.contractionUCACombosSize=udata_readInt32(ds, inHeader->contractionUCACombosSize); 1.236 + header.scriptToLeadByte= ds->readUInt32(inHeader->scriptToLeadByte); 1.237 + header.leadByteToScript= ds->readUInt32(inHeader->leadByteToScript); 1.238 + 1.239 + /* swap the 32-bit integers in the header */ 1.240 + ds->swapArray32(ds, inHeader, (int32_t)((const char *)&inHeader->jamoSpecial-(const char *)inHeader), 1.241 + outHeader, pErrorCode); 1.242 + ds->swapArray32(ds, &(inHeader->scriptToLeadByte), sizeof(header.scriptToLeadByte) + sizeof(header.leadByteToScript), 1.243 + &(outHeader->scriptToLeadByte), pErrorCode); 1.244 + /* set the output platform properties */ 1.245 + outHeader->isBigEndian=ds->outIsBigEndian; 1.246 + outHeader->charSetFamily=ds->outCharset; 1.247 + 1.248 + /* swap the options */ 1.249 + if(header.options!=0) { 1.250 + ds->swapArray32(ds, inBytes+header.options, header.expansion-header.options, 1.251 + outBytes+header.options, pErrorCode); 1.252 + } 1.253 + 1.254 + /* swap the expansions */ 1.255 + if(header.mappingPosition!=0 && header.expansion!=0) { 1.256 + if(header.contractionIndex!=0) { 1.257 + /* expansions bounded by contractions */ 1.258 + count=header.contractionIndex-header.expansion; 1.259 + } else { 1.260 + /* no contractions: expansions bounded by the main trie */ 1.261 + count=header.mappingPosition-header.expansion; 1.262 + } 1.263 + ds->swapArray32(ds, inBytes+header.expansion, (int32_t)count, 1.264 + outBytes+header.expansion, pErrorCode); 1.265 + } 1.266 + 1.267 + /* swap the contractions */ 1.268 + if(header.contractionSize!=0) { 1.269 + /* contractionIndex: UChar[] */ 1.270 + ds->swapArray16(ds, inBytes+header.contractionIndex, header.contractionSize*2, 1.271 + outBytes+header.contractionIndex, pErrorCode); 1.272 + 1.273 + /* contractionCEs: CEs[] */ 1.274 + ds->swapArray32(ds, inBytes+header.contractionCEs, header.contractionSize*4, 1.275 + outBytes+header.contractionCEs, pErrorCode); 1.276 + } 1.277 + 1.278 + /* swap the main trie */ 1.279 + if(header.mappingPosition!=0) { 1.280 + count=header.endExpansionCE-header.mappingPosition; 1.281 + utrie_swap(ds, inBytes+header.mappingPosition, (int32_t)count, 1.282 + outBytes+header.mappingPosition, pErrorCode); 1.283 + } 1.284 + 1.285 + /* swap the max expansion table */ 1.286 + if(header.endExpansionCECount!=0) { 1.287 + ds->swapArray32(ds, inBytes+header.endExpansionCE, header.endExpansionCECount*4, 1.288 + outBytes+header.endExpansionCE, pErrorCode); 1.289 + } 1.290 + 1.291 + /* expansionCESize, unsafeCP, contrEndCP: uint8_t[], no need to swap */ 1.292 + 1.293 + /* swap UCA constants */ 1.294 + if(header.UCAConsts!=0) { 1.295 + /* 1.296 + * if UCAConsts!=0 then contractionUCACombos because we are swapping 1.297 + * the UCA data file, and we know that the UCA contains contractions 1.298 + */ 1.299 + count=header.contractionUCACombos-header.UCAConsts; 1.300 + ds->swapArray32(ds, inBytes+header.UCAConsts, header.contractionUCACombos-header.UCAConsts, 1.301 + outBytes+header.UCAConsts, pErrorCode); 1.302 + } 1.303 + 1.304 + /* swap UCA contractions */ 1.305 + if(header.contractionUCACombosSize!=0) { 1.306 + count=header.contractionUCACombosSize*inHeader->contractionUCACombosWidth*U_SIZEOF_UCHAR; 1.307 + ds->swapArray16(ds, inBytes+header.contractionUCACombos, (int32_t)count, 1.308 + outBytes+header.contractionUCACombos, pErrorCode); 1.309 + } 1.310 + 1.311 + /* swap the script to lead bytes */ 1.312 + if(header.scriptToLeadByte!=0) { 1.313 + int indexCount = ds->readUInt16(*((uint16_t*)(inBytes+header.scriptToLeadByte))); // each entry = 2 * uint16 1.314 + int dataCount = ds->readUInt16(*((uint16_t*)(inBytes+header.scriptToLeadByte + 2))); // each entry = uint16 1.315 + ds->swapArray16(ds, inBytes+header.scriptToLeadByte, 1.316 + 4 + (4 * indexCount) + (2 * dataCount), 1.317 + outBytes+header.scriptToLeadByte, pErrorCode); 1.318 + } 1.319 + 1.320 + /* swap the lead byte to scripts */ 1.321 + if(header.leadByteToScript!=0) { 1.322 + int indexCount = ds->readUInt16(*((uint16_t*)(inBytes+header.leadByteToScript))); // each entry = uint16 1.323 + int dataCount = ds->readUInt16(*((uint16_t*)(inBytes+header.leadByteToScript + 2))); // each entry = uint16 1.324 + ds->swapArray16(ds, inBytes+header.leadByteToScript, 1.325 + 4 + (2 * indexCount) + (2 * dataCount), 1.326 + outBytes+header.leadByteToScript, pErrorCode); 1.327 + } 1.328 + } 1.329 + 1.330 + return header.size; 1.331 +} 1.332 + 1.333 +/* swap ICU collation data like ucadata.icu */ 1.334 +U_CAPI int32_t U_EXPORT2 1.335 +ucol_swap(const UDataSwapper *ds, 1.336 + const void *inData, int32_t length, void *outData, 1.337 + UErrorCode *pErrorCode) { 1.338 + 1.339 + const UDataInfo *pInfo; 1.340 + int32_t headerSize, collationSize; 1.341 + 1.342 + /* udata_swapDataHeader checks the arguments */ 1.343 + headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 1.344 + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1.345 + return 0; 1.346 + } 1.347 + 1.348 + /* check data format and format version */ 1.349 + pInfo=(const UDataInfo *)((const char *)inData+4); 1.350 + if(!( 1.351 + pInfo->dataFormat[0]==0x55 && /* dataFormat="UCol" */ 1.352 + pInfo->dataFormat[1]==0x43 && 1.353 + pInfo->dataFormat[2]==0x6f && 1.354 + pInfo->dataFormat[3]==0x6c && 1.355 + pInfo->formatVersion[0]==3 /*&& 1.356 + pInfo->formatVersion[1]>=0*/ 1.357 + )) { 1.358 + udata_printError(ds, "ucol_swap(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not a collation file\n", 1.359 + pInfo->dataFormat[0], pInfo->dataFormat[1], 1.360 + pInfo->dataFormat[2], pInfo->dataFormat[3], 1.361 + pInfo->formatVersion[0], pInfo->formatVersion[1]); 1.362 + *pErrorCode=U_UNSUPPORTED_ERROR; 1.363 + return 0; 1.364 + } 1.365 + 1.366 + collationSize=ucol_swapBinary(ds, 1.367 + (const char *)inData+headerSize, 1.368 + length>=0 ? length-headerSize : -1, 1.369 + (char *)outData+headerSize, 1.370 + pErrorCode); 1.371 + if(U_SUCCESS(*pErrorCode)) { 1.372 + return headerSize+collationSize; 1.373 + } else { 1.374 + return 0; 1.375 + } 1.376 +} 1.377 + 1.378 +/* swap inverse UCA collation data (invuca.icu) */ 1.379 +U_CAPI int32_t U_EXPORT2 1.380 +ucol_swapInverseUCA(const UDataSwapper *ds, 1.381 + const void *inData, int32_t length, void *outData, 1.382 + UErrorCode *pErrorCode) { 1.383 + const UDataInfo *pInfo; 1.384 + int32_t headerSize; 1.385 + 1.386 + const uint8_t *inBytes; 1.387 + uint8_t *outBytes; 1.388 + 1.389 + const InverseUCATableHeader *inHeader; 1.390 + InverseUCATableHeader *outHeader; 1.391 + InverseUCATableHeader header={ 0,0,0,0,0,{0,0,0,0},{0,0,0,0,0,0,0,0} }; 1.392 + 1.393 + /* udata_swapDataHeader checks the arguments */ 1.394 + headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 1.395 + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1.396 + return 0; 1.397 + } 1.398 + 1.399 + /* check data format and format version */ 1.400 + pInfo=(const UDataInfo *)((const char *)inData+4); 1.401 + if(!( 1.402 + pInfo->dataFormat[0]==0x49 && /* dataFormat="InvC" */ 1.403 + pInfo->dataFormat[1]==0x6e && 1.404 + pInfo->dataFormat[2]==0x76 && 1.405 + pInfo->dataFormat[3]==0x43 && 1.406 + pInfo->formatVersion[0]==2 && 1.407 + pInfo->formatVersion[1]>=1 1.408 + )) { 1.409 + udata_printError(ds, "ucol_swapInverseUCA(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not an inverse UCA collation file\n", 1.410 + pInfo->dataFormat[0], pInfo->dataFormat[1], 1.411 + pInfo->dataFormat[2], pInfo->dataFormat[3], 1.412 + pInfo->formatVersion[0], pInfo->formatVersion[1]); 1.413 + *pErrorCode=U_UNSUPPORTED_ERROR; 1.414 + return 0; 1.415 + } 1.416 + 1.417 + inBytes=(const uint8_t *)inData+headerSize; 1.418 + outBytes=(uint8_t *)outData+headerSize; 1.419 + 1.420 + inHeader=(const InverseUCATableHeader *)inBytes; 1.421 + outHeader=(InverseUCATableHeader *)outBytes; 1.422 + 1.423 + /* 1.424 + * The inverse UCA collation binary must contain at least the InverseUCATableHeader, 1.425 + * starting with its size field. 1.426 + * sizeof(UCATableHeader)==8*4 in ICU 2.8 1.427 + * check the length against the header size before reading the size field 1.428 + */ 1.429 + if(length<0) { 1.430 + header.byteSize=udata_readInt32(ds, inHeader->byteSize); 1.431 + } else if( 1.432 + ((length-headerSize)<(8*4) || 1.433 + (uint32_t)(length-headerSize)<(header.byteSize=udata_readInt32(ds, inHeader->byteSize))) 1.434 + ) { 1.435 + udata_printError(ds, "ucol_swapInverseUCA(): too few bytes (%d after header) for inverse UCA collation data\n", 1.436 + length); 1.437 + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 1.438 + return 0; 1.439 + } 1.440 + 1.441 + if(length>=0) { 1.442 + /* copy everything, takes care of data that needs no swapping */ 1.443 + if(inBytes!=outBytes) { 1.444 + uprv_memcpy(outBytes, inBytes, header.byteSize); 1.445 + } 1.446 + 1.447 + /* swap the necessary pieces in the order of their occurrence in the data */ 1.448 + 1.449 + /* read more of the InverseUCATableHeader (the byteSize field was read above) */ 1.450 + header.tableSize= ds->readUInt32(inHeader->tableSize); 1.451 + header.contsSize= ds->readUInt32(inHeader->contsSize); 1.452 + header.table= ds->readUInt32(inHeader->table); 1.453 + header.conts= ds->readUInt32(inHeader->conts); 1.454 + 1.455 + /* swap the 32-bit integers in the header */ 1.456 + ds->swapArray32(ds, inHeader, 5*4, outHeader, pErrorCode); 1.457 + 1.458 + /* swap the inverse table; tableSize counts uint32_t[3] rows */ 1.459 + ds->swapArray32(ds, inBytes+header.table, header.tableSize*3*4, 1.460 + outBytes+header.table, pErrorCode); 1.461 + 1.462 + /* swap the continuation table; contsSize counts UChars */ 1.463 + ds->swapArray16(ds, inBytes+header.conts, header.contsSize*U_SIZEOF_UCHAR, 1.464 + outBytes+header.conts, pErrorCode); 1.465 + } 1.466 + 1.467 + return headerSize+header.byteSize; 1.468 +} 1.469 + 1.470 +#endif /* #if !UCONFIG_NO_COLLATION */