michael@0: /* michael@0: ******************************************************************************* michael@0: * michael@0: * Copyright (C) 2003-2012, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: * michael@0: ******************************************************************************* michael@0: * file name: ucol_swp.cpp michael@0: * encoding: US-ASCII michael@0: * tab size: 8 (not used) michael@0: * indentation:4 michael@0: * michael@0: * created on: 2003sep10 michael@0: * created by: Markus W. Scherer michael@0: * michael@0: * Swap collation binaries. michael@0: */ michael@0: michael@0: #include "unicode/udata.h" /* UDataInfo */ michael@0: #include "utrie.h" michael@0: #include "udataswp.h" michael@0: #include "cmemory.h" michael@0: #include "ucol_data.h" michael@0: #include "ucol_swp.h" michael@0: michael@0: /* swapping ----------------------------------------------------------------- */ michael@0: michael@0: /* michael@0: * This performs data swapping for a folded trie (see utrie.c for details). michael@0: */ michael@0: michael@0: U_CAPI int32_t U_EXPORT2 michael@0: utrie_swap(const UDataSwapper *ds, michael@0: const void *inData, int32_t length, void *outData, michael@0: UErrorCode *pErrorCode) { michael@0: const UTrieHeader *inTrie; michael@0: UTrieHeader trie; michael@0: int32_t size; michael@0: UBool dataIs32; michael@0: michael@0: if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { michael@0: return 0; michael@0: } michael@0: if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) { michael@0: *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; michael@0: return 0; michael@0: } michael@0: michael@0: /* setup and swapping */ michael@0: if(length>=0 && (uint32_t)lengthreadUInt32(inTrie->signature); michael@0: trie.options=ds->readUInt32(inTrie->options); michael@0: trie.indexLength=udata_readInt32(ds, inTrie->indexLength); michael@0: trie.dataLength=udata_readInt32(ds, inTrie->dataLength); michael@0: michael@0: if( trie.signature!=0x54726965 || michael@0: (trie.options&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_SHIFT || michael@0: ((trie.options>>UTRIE_OPTIONS_INDEX_SHIFT)&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_INDEX_SHIFT || michael@0: trie.indexLength=0) { michael@0: UTrieHeader *outTrie; michael@0: michael@0: if(lengthswapArray32(ds, inTrie, sizeof(UTrieHeader), outTrie, pErrorCode); michael@0: michael@0: /* swap the index and the data */ michael@0: if(dataIs32) { michael@0: ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode); michael@0: ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, trie.dataLength*4, michael@0: (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode); michael@0: } else { michael@0: ds->swapArray16(ds, inTrie+1, (trie.indexLength+trie.dataLength)*2, outTrie+1, pErrorCode); michael@0: } michael@0: } michael@0: michael@0: return size; michael@0: } michael@0: michael@0: #if !UCONFIG_NO_COLLATION michael@0: michael@0: /* Modified copy of the beginning of ucol_swapBinary(). */ michael@0: U_CAPI UBool U_EXPORT2 michael@0: ucol_looksLikeCollationBinary(const UDataSwapper *ds, michael@0: const void *inData, int32_t length) { michael@0: const UCATableHeader *inHeader; michael@0: UCATableHeader header; michael@0: michael@0: if(ds==NULL || inData==NULL || length<-1) { michael@0: return FALSE; michael@0: } michael@0: michael@0: inHeader=(const UCATableHeader *)inData; michael@0: michael@0: /* michael@0: * The collation binary must contain at least the UCATableHeader, michael@0: * starting with its size field. michael@0: * sizeof(UCATableHeader)==42*4 in ICU 2.8 michael@0: * check the length against the header size before reading the size field michael@0: */ michael@0: uprv_memset(&header, 0, sizeof(header)); michael@0: if(length<0) { michael@0: header.size=udata_readInt32(ds, inHeader->size); michael@0: } else if((length<(42*4) || length<(header.size=udata_readInt32(ds, inHeader->size)))) { michael@0: return FALSE; michael@0: } michael@0: michael@0: header.magic=ds->readUInt32(inHeader->magic); michael@0: if(!( michael@0: header.magic==UCOL_HEADER_MAGIC && michael@0: inHeader->formatVersion[0]==3 /*&& michael@0: inHeader->formatVersion[1]>=0*/ michael@0: )) { michael@0: return FALSE; michael@0: } michael@0: michael@0: if(inHeader->isBigEndian!=ds->inIsBigEndian || inHeader->charSetFamily!=ds->inCharset) { michael@0: return FALSE; michael@0: } michael@0: michael@0: return TRUE; michael@0: } michael@0: michael@0: /* swap a header-less collation binary, inside a resource bundle or ucadata.icu */ michael@0: U_CAPI int32_t U_EXPORT2 michael@0: ucol_swapBinary(const UDataSwapper *ds, michael@0: const void *inData, int32_t length, void *outData, michael@0: UErrorCode *pErrorCode) { michael@0: const uint8_t *inBytes; michael@0: uint8_t *outBytes; michael@0: michael@0: const UCATableHeader *inHeader; michael@0: UCATableHeader *outHeader; michael@0: UCATableHeader header; michael@0: michael@0: uint32_t count; michael@0: michael@0: /* argument checking in case we were not called from ucol_swap() */ michael@0: if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { michael@0: return 0; michael@0: } michael@0: if(ds==NULL || inData==NULL || length<-1 || (length>0 && outData==NULL)) { michael@0: *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; michael@0: return 0; michael@0: } michael@0: michael@0: inBytes=(const uint8_t *)inData; michael@0: outBytes=(uint8_t *)outData; michael@0: michael@0: inHeader=(const UCATableHeader *)inData; michael@0: outHeader=(UCATableHeader *)outData; michael@0: michael@0: /* michael@0: * The collation binary must contain at least the UCATableHeader, michael@0: * starting with its size field. michael@0: * sizeof(UCATableHeader)==42*4 in ICU 2.8 michael@0: * check the length against the header size before reading the size field michael@0: */ michael@0: uprv_memset(&header, 0, sizeof(header)); michael@0: if(length<0) { michael@0: header.size=udata_readInt32(ds, inHeader->size); michael@0: } else if((length<(42*4) || length<(header.size=udata_readInt32(ds, inHeader->size)))) { michael@0: udata_printError(ds, "ucol_swapBinary(): too few bytes (%d after header) for collation data\n", michael@0: length); michael@0: *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; michael@0: return 0; michael@0: } michael@0: michael@0: header.magic=ds->readUInt32(inHeader->magic); michael@0: if(!( michael@0: header.magic==UCOL_HEADER_MAGIC && michael@0: inHeader->formatVersion[0]==3 /*&& michael@0: inHeader->formatVersion[1]>=0*/ michael@0: )) { michael@0: udata_printError(ds, "ucol_swapBinary(): magic 0x%08x or format version %02x.%02x is not a collation binary\n", michael@0: header.magic, michael@0: inHeader->formatVersion[0], inHeader->formatVersion[1]); michael@0: *pErrorCode=U_UNSUPPORTED_ERROR; michael@0: return 0; michael@0: } michael@0: michael@0: if(inHeader->isBigEndian!=ds->inIsBigEndian || inHeader->charSetFamily!=ds->inCharset) { michael@0: udata_printError(ds, "ucol_swapBinary(): endianness %d or charset %d does not match the swapper\n", michael@0: inHeader->isBigEndian, inHeader->charSetFamily); michael@0: *pErrorCode=U_INVALID_FORMAT_ERROR; michael@0: return 0; michael@0: } michael@0: michael@0: if(length>=0) { michael@0: /* copy everything, takes care of data that needs no swapping */ michael@0: if(inBytes!=outBytes) { michael@0: uprv_memcpy(outBytes, inBytes, header.size); michael@0: } michael@0: michael@0: /* swap the necessary pieces in the order of their occurrence in the data */ michael@0: michael@0: /* read more of the UCATableHeader (the size field was read above) */ michael@0: header.options= ds->readUInt32(inHeader->options); michael@0: header.UCAConsts= ds->readUInt32(inHeader->UCAConsts); michael@0: header.contractionUCACombos= ds->readUInt32(inHeader->contractionUCACombos); michael@0: header.mappingPosition= ds->readUInt32(inHeader->mappingPosition); michael@0: header.expansion= ds->readUInt32(inHeader->expansion); michael@0: header.contractionIndex= ds->readUInt32(inHeader->contractionIndex); michael@0: header.contractionCEs= ds->readUInt32(inHeader->contractionCEs); michael@0: header.contractionSize= ds->readUInt32(inHeader->contractionSize); michael@0: header.endExpansionCE= ds->readUInt32(inHeader->endExpansionCE); michael@0: header.expansionCESize= ds->readUInt32(inHeader->expansionCESize); michael@0: header.endExpansionCECount= udata_readInt32(ds, inHeader->endExpansionCECount); michael@0: header.contractionUCACombosSize=udata_readInt32(ds, inHeader->contractionUCACombosSize); michael@0: header.scriptToLeadByte= ds->readUInt32(inHeader->scriptToLeadByte); michael@0: header.leadByteToScript= ds->readUInt32(inHeader->leadByteToScript); michael@0: michael@0: /* swap the 32-bit integers in the header */ michael@0: ds->swapArray32(ds, inHeader, (int32_t)((const char *)&inHeader->jamoSpecial-(const char *)inHeader), michael@0: outHeader, pErrorCode); michael@0: ds->swapArray32(ds, &(inHeader->scriptToLeadByte), sizeof(header.scriptToLeadByte) + sizeof(header.leadByteToScript), michael@0: &(outHeader->scriptToLeadByte), pErrorCode); michael@0: /* set the output platform properties */ michael@0: outHeader->isBigEndian=ds->outIsBigEndian; michael@0: outHeader->charSetFamily=ds->outCharset; michael@0: michael@0: /* swap the options */ michael@0: if(header.options!=0) { michael@0: ds->swapArray32(ds, inBytes+header.options, header.expansion-header.options, michael@0: outBytes+header.options, pErrorCode); michael@0: } michael@0: michael@0: /* swap the expansions */ michael@0: if(header.mappingPosition!=0 && header.expansion!=0) { michael@0: if(header.contractionIndex!=0) { michael@0: /* expansions bounded by contractions */ michael@0: count=header.contractionIndex-header.expansion; michael@0: } else { michael@0: /* no contractions: expansions bounded by the main trie */ michael@0: count=header.mappingPosition-header.expansion; michael@0: } michael@0: ds->swapArray32(ds, inBytes+header.expansion, (int32_t)count, michael@0: outBytes+header.expansion, pErrorCode); michael@0: } michael@0: michael@0: /* swap the contractions */ michael@0: if(header.contractionSize!=0) { michael@0: /* contractionIndex: UChar[] */ michael@0: ds->swapArray16(ds, inBytes+header.contractionIndex, header.contractionSize*2, michael@0: outBytes+header.contractionIndex, pErrorCode); michael@0: michael@0: /* contractionCEs: CEs[] */ michael@0: ds->swapArray32(ds, inBytes+header.contractionCEs, header.contractionSize*4, michael@0: outBytes+header.contractionCEs, pErrorCode); michael@0: } michael@0: michael@0: /* swap the main trie */ michael@0: if(header.mappingPosition!=0) { michael@0: count=header.endExpansionCE-header.mappingPosition; michael@0: utrie_swap(ds, inBytes+header.mappingPosition, (int32_t)count, michael@0: outBytes+header.mappingPosition, pErrorCode); michael@0: } michael@0: michael@0: /* swap the max expansion table */ michael@0: if(header.endExpansionCECount!=0) { michael@0: ds->swapArray32(ds, inBytes+header.endExpansionCE, header.endExpansionCECount*4, michael@0: outBytes+header.endExpansionCE, pErrorCode); michael@0: } michael@0: michael@0: /* expansionCESize, unsafeCP, contrEndCP: uint8_t[], no need to swap */ michael@0: michael@0: /* swap UCA constants */ michael@0: if(header.UCAConsts!=0) { michael@0: /* michael@0: * if UCAConsts!=0 then contractionUCACombos because we are swapping michael@0: * the UCA data file, and we know that the UCA contains contractions michael@0: */ michael@0: count=header.contractionUCACombos-header.UCAConsts; michael@0: ds->swapArray32(ds, inBytes+header.UCAConsts, header.contractionUCACombos-header.UCAConsts, michael@0: outBytes+header.UCAConsts, pErrorCode); michael@0: } michael@0: michael@0: /* swap UCA contractions */ michael@0: if(header.contractionUCACombosSize!=0) { michael@0: count=header.contractionUCACombosSize*inHeader->contractionUCACombosWidth*U_SIZEOF_UCHAR; michael@0: ds->swapArray16(ds, inBytes+header.contractionUCACombos, (int32_t)count, michael@0: outBytes+header.contractionUCACombos, pErrorCode); michael@0: } michael@0: michael@0: /* swap the script to lead bytes */ michael@0: if(header.scriptToLeadByte!=0) { michael@0: int indexCount = ds->readUInt16(*((uint16_t*)(inBytes+header.scriptToLeadByte))); // each entry = 2 * uint16 michael@0: int dataCount = ds->readUInt16(*((uint16_t*)(inBytes+header.scriptToLeadByte + 2))); // each entry = uint16 michael@0: ds->swapArray16(ds, inBytes+header.scriptToLeadByte, michael@0: 4 + (4 * indexCount) + (2 * dataCount), michael@0: outBytes+header.scriptToLeadByte, pErrorCode); michael@0: } michael@0: michael@0: /* swap the lead byte to scripts */ michael@0: if(header.leadByteToScript!=0) { michael@0: int indexCount = ds->readUInt16(*((uint16_t*)(inBytes+header.leadByteToScript))); // each entry = uint16 michael@0: int dataCount = ds->readUInt16(*((uint16_t*)(inBytes+header.leadByteToScript + 2))); // each entry = uint16 michael@0: ds->swapArray16(ds, inBytes+header.leadByteToScript, michael@0: 4 + (2 * indexCount) + (2 * dataCount), michael@0: outBytes+header.leadByteToScript, pErrorCode); michael@0: } michael@0: } michael@0: michael@0: return header.size; michael@0: } michael@0: michael@0: /* swap ICU collation data like ucadata.icu */ michael@0: U_CAPI int32_t U_EXPORT2 michael@0: ucol_swap(const UDataSwapper *ds, michael@0: const void *inData, int32_t length, void *outData, michael@0: UErrorCode *pErrorCode) { michael@0: michael@0: const UDataInfo *pInfo; michael@0: int32_t headerSize, collationSize; michael@0: michael@0: /* udata_swapDataHeader checks the arguments */ michael@0: headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); michael@0: if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { michael@0: return 0; michael@0: } michael@0: michael@0: /* check data format and format version */ michael@0: pInfo=(const UDataInfo *)((const char *)inData+4); michael@0: if(!( michael@0: pInfo->dataFormat[0]==0x55 && /* dataFormat="UCol" */ michael@0: pInfo->dataFormat[1]==0x43 && michael@0: pInfo->dataFormat[2]==0x6f && michael@0: pInfo->dataFormat[3]==0x6c && michael@0: pInfo->formatVersion[0]==3 /*&& michael@0: pInfo->formatVersion[1]>=0*/ michael@0: )) { michael@0: udata_printError(ds, "ucol_swap(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not a collation file\n", michael@0: pInfo->dataFormat[0], pInfo->dataFormat[1], michael@0: pInfo->dataFormat[2], pInfo->dataFormat[3], michael@0: pInfo->formatVersion[0], pInfo->formatVersion[1]); michael@0: *pErrorCode=U_UNSUPPORTED_ERROR; michael@0: return 0; michael@0: } michael@0: michael@0: collationSize=ucol_swapBinary(ds, michael@0: (const char *)inData+headerSize, michael@0: length>=0 ? length-headerSize : -1, michael@0: (char *)outData+headerSize, michael@0: pErrorCode); michael@0: if(U_SUCCESS(*pErrorCode)) { michael@0: return headerSize+collationSize; michael@0: } else { michael@0: return 0; michael@0: } michael@0: } michael@0: michael@0: /* swap inverse UCA collation data (invuca.icu) */ michael@0: U_CAPI int32_t U_EXPORT2 michael@0: ucol_swapInverseUCA(const UDataSwapper *ds, michael@0: const void *inData, int32_t length, void *outData, michael@0: UErrorCode *pErrorCode) { michael@0: const UDataInfo *pInfo; michael@0: int32_t headerSize; michael@0: michael@0: const uint8_t *inBytes; michael@0: uint8_t *outBytes; michael@0: michael@0: const InverseUCATableHeader *inHeader; michael@0: InverseUCATableHeader *outHeader; michael@0: InverseUCATableHeader header={ 0,0,0,0,0,{0,0,0,0},{0,0,0,0,0,0,0,0} }; michael@0: michael@0: /* udata_swapDataHeader checks the arguments */ michael@0: headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); michael@0: if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { michael@0: return 0; michael@0: } michael@0: michael@0: /* check data format and format version */ michael@0: pInfo=(const UDataInfo *)((const char *)inData+4); michael@0: if(!( michael@0: pInfo->dataFormat[0]==0x49 && /* dataFormat="InvC" */ michael@0: pInfo->dataFormat[1]==0x6e && michael@0: pInfo->dataFormat[2]==0x76 && michael@0: pInfo->dataFormat[3]==0x43 && michael@0: pInfo->formatVersion[0]==2 && michael@0: pInfo->formatVersion[1]>=1 michael@0: )) { michael@0: udata_printError(ds, "ucol_swapInverseUCA(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not an inverse UCA collation file\n", michael@0: pInfo->dataFormat[0], pInfo->dataFormat[1], michael@0: pInfo->dataFormat[2], pInfo->dataFormat[3], michael@0: pInfo->formatVersion[0], pInfo->formatVersion[1]); michael@0: *pErrorCode=U_UNSUPPORTED_ERROR; michael@0: return 0; michael@0: } michael@0: michael@0: inBytes=(const uint8_t *)inData+headerSize; michael@0: outBytes=(uint8_t *)outData+headerSize; michael@0: michael@0: inHeader=(const InverseUCATableHeader *)inBytes; michael@0: outHeader=(InverseUCATableHeader *)outBytes; michael@0: michael@0: /* michael@0: * The inverse UCA collation binary must contain at least the InverseUCATableHeader, michael@0: * starting with its size field. michael@0: * sizeof(UCATableHeader)==8*4 in ICU 2.8 michael@0: * check the length against the header size before reading the size field michael@0: */ michael@0: if(length<0) { michael@0: header.byteSize=udata_readInt32(ds, inHeader->byteSize); michael@0: } else if( michael@0: ((length-headerSize)<(8*4) || michael@0: (uint32_t)(length-headerSize)<(header.byteSize=udata_readInt32(ds, inHeader->byteSize))) michael@0: ) { michael@0: udata_printError(ds, "ucol_swapInverseUCA(): too few bytes (%d after header) for inverse UCA collation data\n", michael@0: length); michael@0: *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; michael@0: return 0; michael@0: } michael@0: michael@0: if(length>=0) { michael@0: /* copy everything, takes care of data that needs no swapping */ michael@0: if(inBytes!=outBytes) { michael@0: uprv_memcpy(outBytes, inBytes, header.byteSize); michael@0: } michael@0: michael@0: /* swap the necessary pieces in the order of their occurrence in the data */ michael@0: michael@0: /* read more of the InverseUCATableHeader (the byteSize field was read above) */ michael@0: header.tableSize= ds->readUInt32(inHeader->tableSize); michael@0: header.contsSize= ds->readUInt32(inHeader->contsSize); michael@0: header.table= ds->readUInt32(inHeader->table); michael@0: header.conts= ds->readUInt32(inHeader->conts); michael@0: michael@0: /* swap the 32-bit integers in the header */ michael@0: ds->swapArray32(ds, inHeader, 5*4, outHeader, pErrorCode); michael@0: michael@0: /* swap the inverse table; tableSize counts uint32_t[3] rows */ michael@0: ds->swapArray32(ds, inBytes+header.table, header.tableSize*3*4, michael@0: outBytes+header.table, pErrorCode); michael@0: michael@0: /* swap the continuation table; contsSize counts UChars */ michael@0: ds->swapArray16(ds, inBytes+header.conts, header.contsSize*U_SIZEOF_UCHAR, michael@0: outBytes+header.conts, pErrorCode); michael@0: } michael@0: michael@0: return headerSize+header.byteSize; michael@0: } michael@0: michael@0: #endif /* #if !UCONFIG_NO_COLLATION */