michael@0: /* michael@0: ******************************************************************************* michael@0: * michael@0: * Copyright (C) 2005-2012, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: * michael@0: ******************************************************************************* michael@0: * file name: swapimpl.cpp michael@0: * encoding: US-ASCII michael@0: * tab size: 8 (not used) michael@0: * indentation:4 michael@0: * michael@0: * created on: 2005may05 michael@0: * created by: Markus W. Scherer michael@0: * michael@0: * Data file swapping functions moved here from the common library michael@0: * because some data is hardcoded in ICU4C and needs not be swapped any more. michael@0: * Moving the functions here simplifies testing (for code coverage) because michael@0: * we need not jump through hoops (like adding snapshots of these files michael@0: * to testdata). michael@0: * michael@0: * The declarations for these functions remain in the internal header files michael@0: * in icu/source/common/ michael@0: */ michael@0: michael@0: #include "unicode/utypes.h" michael@0: #include "unicode/putil.h" michael@0: #include "unicode/udata.h" michael@0: michael@0: /* Explicit include statement for std_string.h is needed michael@0: * for compilation on certain platforms. (e.g. AIX/VACPP) michael@0: */ michael@0: #include "unicode/std_string.h" michael@0: michael@0: #include "cmemory.h" michael@0: #include "cstring.h" michael@0: #include "uinvchar.h" michael@0: #include "uassert.h" michael@0: #include "uarrsort.h" michael@0: #include "ucmndata.h" michael@0: #include "udataswp.h" michael@0: michael@0: /* swapping implementations in common */ michael@0: michael@0: #include "uresdata.h" michael@0: #include "ucnv_io.h" michael@0: #include "uprops.h" michael@0: #include "ucase.h" michael@0: #include "ubidi_props.h" michael@0: #include "ucol_swp.h" michael@0: #include "ucnv_bld.h" michael@0: #include "unormimp.h" michael@0: #include "normalizer2impl.h" michael@0: #include "sprpimpl.h" michael@0: #include "propname.h" michael@0: #include "rbbidata.h" michael@0: #include "utrie2.h" michael@0: #include "dictionarydata.h" michael@0: michael@0: /* swapping implementations in i18n */ michael@0: michael@0: #if !UCONFIG_NO_NORMALIZATION michael@0: #include "uspoof_impl.h" michael@0: #endif michael@0: michael@0: U_NAMESPACE_USE michael@0: michael@0: /* definitions */ michael@0: michael@0: #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) michael@0: michael@0: /* Unicode property (value) aliases data swapping --------------------------- */ michael@0: michael@0: static int32_t U_CALLCONV michael@0: upname_swap(const UDataSwapper *ds, michael@0: const void *inData, int32_t length, void *outData, michael@0: UErrorCode *pErrorCode) { michael@0: /* udata_swapDataHeader checks the arguments */ michael@0: int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); michael@0: if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { michael@0: return 0; michael@0: } michael@0: michael@0: /* check data format and format version */ michael@0: const UDataInfo *pInfo= michael@0: reinterpret_cast( michael@0: static_cast(inData)+4); michael@0: if(!( michael@0: pInfo->dataFormat[0]==0x70 && /* dataFormat="pnam" */ michael@0: pInfo->dataFormat[1]==0x6e && michael@0: pInfo->dataFormat[2]==0x61 && michael@0: pInfo->dataFormat[3]==0x6d && michael@0: pInfo->formatVersion[0]==2 michael@0: )) { michael@0: udata_printError(ds, "upname_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as pnames.icu\n", michael@0: pInfo->dataFormat[0], pInfo->dataFormat[1], michael@0: pInfo->dataFormat[2], pInfo->dataFormat[3], michael@0: pInfo->formatVersion[0]); michael@0: *pErrorCode=U_UNSUPPORTED_ERROR; michael@0: return 0; michael@0: } michael@0: michael@0: const uint8_t *inBytes=static_cast(inData)+headerSize; michael@0: uint8_t *outBytes=static_cast(outData)+headerSize; michael@0: michael@0: if(length>=0) { michael@0: length-=headerSize; michael@0: // formatVersion 2 initially has indexes[8], 32 bytes. michael@0: if(length<32) { michael@0: udata_printError(ds, "upname_swap(): too few bytes (%d after header) for pnames.icu\n", michael@0: (int)length); michael@0: *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; michael@0: return 0; michael@0: } michael@0: } michael@0: michael@0: const int32_t *inIndexes=reinterpret_cast(inBytes); michael@0: int32_t totalSize=udata_readInt32(ds, inIndexes[PropNameData::IX_TOTAL_SIZE]); michael@0: if(length>=0) { michael@0: if(lengthswapArray32(ds, inBytes, numBytesIndexesAndValueMaps, outBytes, pErrorCode); michael@0: michael@0: // Copy the rest of the data. michael@0: if(inBytes!=outBytes) { michael@0: uprv_memcpy(outBytes+numBytesIndexesAndValueMaps, michael@0: inBytes+numBytesIndexesAndValueMaps, michael@0: totalSize-numBytesIndexesAndValueMaps); michael@0: } michael@0: michael@0: // We need not swap anything else: michael@0: // michael@0: // The ByteTries are already byte-serialized, and are fixed on ASCII. michael@0: // (On an EBCDIC machine, the input string is converted to lowercase ASCII michael@0: // while matching.) michael@0: // michael@0: // The name groups are mostly invariant characters, but since we only michael@0: // generate, and keep in subversion, ASCII versions of pnames.icu, michael@0: // and since only ICU4J uses the pnames.icu data file michael@0: // (the data is hardcoded in ICU4C) and ICU4J uses ASCII data files, michael@0: // we just copy those bytes too. michael@0: } michael@0: michael@0: return headerSize+totalSize; michael@0: } michael@0: michael@0: /* Unicode properties data swapping ----------------------------------------- */ michael@0: michael@0: static int32_t U_CALLCONV michael@0: uprops_swap(const UDataSwapper *ds, michael@0: const void *inData, int32_t length, void *outData, michael@0: UErrorCode *pErrorCode) { michael@0: const UDataInfo *pInfo; michael@0: int32_t headerSize, i; michael@0: michael@0: int32_t dataIndexes[UPROPS_INDEX_COUNT]; michael@0: const int32_t *inData32; michael@0: michael@0: /* udata_swapDataHeader checks the arguments */ michael@0: headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); michael@0: if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { michael@0: return 0; michael@0: } michael@0: michael@0: /* check data format and format version */ michael@0: pInfo=(const UDataInfo *)((const char *)inData+4); michael@0: if(!( michael@0: pInfo->dataFormat[0]==0x55 && /* dataFormat="UPro" */ michael@0: pInfo->dataFormat[1]==0x50 && michael@0: pInfo->dataFormat[2]==0x72 && michael@0: pInfo->dataFormat[3]==0x6f && michael@0: (3<=pInfo->formatVersion[0] && pInfo->formatVersion[0]<=7) && michael@0: (pInfo->formatVersion[0]>=7 || michael@0: (pInfo->formatVersion[2]==UTRIE_SHIFT && michael@0: pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT)) michael@0: )) { michael@0: udata_printError(ds, "uprops_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not a Unicode properties file\n", michael@0: pInfo->dataFormat[0], pInfo->dataFormat[1], michael@0: pInfo->dataFormat[2], pInfo->dataFormat[3], michael@0: pInfo->formatVersion[0]); michael@0: *pErrorCode=U_UNSUPPORTED_ERROR; michael@0: return 0; michael@0: } michael@0: michael@0: /* the properties file must contain at least the indexes array */ michael@0: if(length>=0 && (length-headerSize)<(int32_t)sizeof(dataIndexes)) { michael@0: udata_printError(ds, "uprops_swap(): too few bytes (%d after header) for a Unicode properties file\n", michael@0: length-headerSize); michael@0: *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; michael@0: return 0; michael@0: } michael@0: michael@0: /* read the indexes */ michael@0: inData32=(const int32_t *)((const char *)inData+headerSize); michael@0: for(i=0; i=0) { michael@0: int32_t *outData32; michael@0: michael@0: /* michael@0: * In formatVersion 7, UPROPS_DATA_TOP_INDEX has the post-header data size. michael@0: * In earlier formatVersions, it is 0 and a lower dataIndexes entry michael@0: * has the top of the last item. michael@0: */ michael@0: for(i=UPROPS_DATA_TOP_INDEX; i>0 && (dataTop=dataIndexes[i])==0; --i) {} michael@0: michael@0: if((length-headerSize)<(4*dataTop)) { michael@0: udata_printError(ds, "uprops_swap(): too few bytes (%d after header) for a Unicode properties file\n", michael@0: length-headerSize); michael@0: *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; michael@0: return 0; michael@0: } michael@0: michael@0: outData32=(int32_t *)((char *)outData+headerSize); michael@0: michael@0: /* copy everything for inaccessible data (padding) */ michael@0: if(inData32!=outData32) { michael@0: uprv_memcpy(outData32, inData32, 4*dataTop); michael@0: } michael@0: michael@0: /* swap the indexes[16] */ michael@0: ds->swapArray32(ds, inData32, 4*UPROPS_INDEX_COUNT, outData32, pErrorCode); michael@0: michael@0: /* michael@0: * swap the main properties UTrie michael@0: * PT serialized properties trie, see utrie.h (byte size: 4*(i0-16)) michael@0: */ michael@0: utrie2_swapAnyVersion(ds, michael@0: inData32+UPROPS_INDEX_COUNT, michael@0: 4*(dataIndexes[UPROPS_PROPS32_INDEX]-UPROPS_INDEX_COUNT), michael@0: outData32+UPROPS_INDEX_COUNT, michael@0: pErrorCode); michael@0: michael@0: /* michael@0: * swap the properties and exceptions words michael@0: * P const uint32_t props32[i1-i0]; michael@0: * E const uint32_t exceptions[i2-i1]; michael@0: */ michael@0: ds->swapArray32(ds, michael@0: inData32+dataIndexes[UPROPS_PROPS32_INDEX], michael@0: 4*(dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX]-dataIndexes[UPROPS_PROPS32_INDEX]), michael@0: outData32+dataIndexes[UPROPS_PROPS32_INDEX], michael@0: pErrorCode); michael@0: michael@0: /* michael@0: * swap the UChars michael@0: * U const UChar uchars[2*(i3-i2)]; michael@0: */ michael@0: ds->swapArray16(ds, michael@0: inData32+dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX], michael@0: 4*(dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX]-dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX]), michael@0: outData32+dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX], michael@0: pErrorCode); michael@0: michael@0: /* michael@0: * swap the additional UTrie michael@0: * i3 additionalTrieIndex; -- 32-bit unit index to the additional trie for more properties michael@0: */ michael@0: utrie2_swapAnyVersion(ds, michael@0: inData32+dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX], michael@0: 4*(dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX]-dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX]), michael@0: outData32+dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX], michael@0: pErrorCode); michael@0: michael@0: /* michael@0: * swap the properties vectors michael@0: * PV const uint32_t propsVectors[(i6-i4)/i5][i5]==uint32_t propsVectors[i6-i4]; michael@0: */ michael@0: ds->swapArray32(ds, michael@0: inData32+dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX], michael@0: 4*(dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]-dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX]), michael@0: outData32+dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX], michael@0: pErrorCode); michael@0: michael@0: // swap the Script_Extensions data michael@0: // SCX const uint16_t scriptExtensions[2*(i7-i6)]; michael@0: ds->swapArray16(ds, michael@0: inData32+dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX], michael@0: 4*(dataIndexes[UPROPS_RESERVED_INDEX_7]-dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]), michael@0: outData32+dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX], michael@0: pErrorCode); michael@0: } michael@0: michael@0: /* i7 reservedIndex7; -- 32-bit unit index to the top of the Script_Extensions data */ michael@0: return headerSize+4*dataIndexes[UPROPS_RESERVED_INDEX_7]; michael@0: } michael@0: michael@0: /* Unicode case mapping data swapping --------------------------------------- */ michael@0: michael@0: static int32_t U_CALLCONV michael@0: ucase_swap(const UDataSwapper *ds, michael@0: const void *inData, int32_t length, void *outData, michael@0: UErrorCode *pErrorCode) { michael@0: const UDataInfo *pInfo; michael@0: int32_t headerSize; michael@0: michael@0: const uint8_t *inBytes; michael@0: uint8_t *outBytes; michael@0: michael@0: const int32_t *inIndexes; michael@0: int32_t indexes[16]; michael@0: michael@0: int32_t i, offset, count, size; michael@0: michael@0: /* udata_swapDataHeader checks the arguments */ michael@0: headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); michael@0: if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { michael@0: return 0; michael@0: } michael@0: michael@0: /* check data format and format version */ michael@0: pInfo=(const UDataInfo *)((const char *)inData+4); michael@0: if(!( michael@0: pInfo->dataFormat[0]==UCASE_FMT_0 && /* dataFormat="cAsE" */ michael@0: pInfo->dataFormat[1]==UCASE_FMT_1 && michael@0: pInfo->dataFormat[2]==UCASE_FMT_2 && michael@0: pInfo->dataFormat[3]==UCASE_FMT_3 && michael@0: ((pInfo->formatVersion[0]==1 && michael@0: pInfo->formatVersion[2]==UTRIE_SHIFT && michael@0: pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT) || michael@0: pInfo->formatVersion[0]==2 || pInfo->formatVersion[0]==3) michael@0: )) { michael@0: udata_printError(ds, "ucase_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as case mapping data\n", michael@0: pInfo->dataFormat[0], pInfo->dataFormat[1], michael@0: pInfo->dataFormat[2], pInfo->dataFormat[3], michael@0: pInfo->formatVersion[0]); michael@0: *pErrorCode=U_UNSUPPORTED_ERROR; michael@0: return 0; michael@0: } michael@0: michael@0: inBytes=(const uint8_t *)inData+headerSize; michael@0: outBytes=(uint8_t *)outData+headerSize; michael@0: michael@0: inIndexes=(const int32_t *)inBytes; michael@0: michael@0: if(length>=0) { michael@0: length-=headerSize; michael@0: if(length<16*4) { michael@0: udata_printError(ds, "ucase_swap(): too few bytes (%d after header) for case mapping data\n", michael@0: length); michael@0: *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; michael@0: return 0; michael@0: } michael@0: } michael@0: michael@0: /* read the first 16 indexes (ICU 3.2/format version 1: UCASE_IX_TOP==16, might grow) */ michael@0: for(i=0; i<16; ++i) { michael@0: indexes[i]=udata_readInt32(ds, inIndexes[i]); michael@0: } michael@0: michael@0: /* get the total length of the data */ michael@0: size=indexes[UCASE_IX_LENGTH]; michael@0: michael@0: if(length>=0) { michael@0: if(lengthswapArray32(ds, inBytes, count, outBytes, pErrorCode); michael@0: offset+=count; michael@0: michael@0: /* swap the UTrie */ michael@0: count=indexes[UCASE_IX_TRIE_SIZE]; michael@0: utrie2_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode); michael@0: offset+=count; michael@0: michael@0: /* swap the uint16_t exceptions[] and unfold[] */ michael@0: count=(indexes[UCASE_IX_EXC_LENGTH]+indexes[UCASE_IX_UNFOLD_LENGTH])*2; michael@0: ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode); michael@0: offset+=count; michael@0: michael@0: U_ASSERT(offset==size); michael@0: } michael@0: michael@0: return headerSize+size; michael@0: } michael@0: michael@0: /* Unicode bidi/shaping data swapping --------------------------------------- */ michael@0: michael@0: static int32_t U_CALLCONV michael@0: ubidi_swap(const UDataSwapper *ds, michael@0: const void *inData, int32_t length, void *outData, michael@0: UErrorCode *pErrorCode) { michael@0: const UDataInfo *pInfo; michael@0: int32_t headerSize; michael@0: michael@0: const uint8_t *inBytes; michael@0: uint8_t *outBytes; michael@0: michael@0: const int32_t *inIndexes; michael@0: int32_t indexes[16]; michael@0: michael@0: int32_t i, offset, count, size; michael@0: michael@0: /* udata_swapDataHeader checks the arguments */ michael@0: headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); michael@0: if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { michael@0: return 0; michael@0: } michael@0: michael@0: /* check data format and format version */ michael@0: pInfo=(const UDataInfo *)((const char *)inData+4); michael@0: if(!( michael@0: pInfo->dataFormat[0]==UBIDI_FMT_0 && /* dataFormat="BiDi" */ michael@0: pInfo->dataFormat[1]==UBIDI_FMT_1 && michael@0: pInfo->dataFormat[2]==UBIDI_FMT_2 && michael@0: pInfo->dataFormat[3]==UBIDI_FMT_3 && michael@0: ((pInfo->formatVersion[0]==1 && michael@0: pInfo->formatVersion[2]==UTRIE_SHIFT && michael@0: pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT) || michael@0: pInfo->formatVersion[0]==2) michael@0: )) { michael@0: udata_printError(ds, "ubidi_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as bidi/shaping data\n", michael@0: pInfo->dataFormat[0], pInfo->dataFormat[1], michael@0: pInfo->dataFormat[2], pInfo->dataFormat[3], michael@0: pInfo->formatVersion[0]); michael@0: *pErrorCode=U_UNSUPPORTED_ERROR; michael@0: return 0; michael@0: } michael@0: michael@0: inBytes=(const uint8_t *)inData+headerSize; michael@0: outBytes=(uint8_t *)outData+headerSize; michael@0: michael@0: inIndexes=(const int32_t *)inBytes; michael@0: michael@0: if(length>=0) { michael@0: length-=headerSize; michael@0: if(length<16*4) { michael@0: udata_printError(ds, "ubidi_swap(): too few bytes (%d after header) for bidi/shaping data\n", michael@0: length); michael@0: *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; michael@0: return 0; michael@0: } michael@0: } michael@0: michael@0: /* read the first 16 indexes (ICU 3.4/format version 1: UBIDI_IX_TOP==16, might grow) */ michael@0: for(i=0; i<16; ++i) { michael@0: indexes[i]=udata_readInt32(ds, inIndexes[i]); michael@0: } michael@0: michael@0: /* get the total length of the data */ michael@0: size=indexes[UBIDI_IX_LENGTH]; michael@0: michael@0: if(length>=0) { michael@0: if(lengthswapArray32(ds, inBytes, count, outBytes, pErrorCode); michael@0: offset+=count; michael@0: michael@0: /* swap the UTrie */ michael@0: count=indexes[UBIDI_IX_TRIE_SIZE]; michael@0: utrie2_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode); michael@0: offset+=count; michael@0: michael@0: /* swap the uint32_t mirrors[] */ michael@0: count=indexes[UBIDI_IX_MIRROR_LENGTH]*4; michael@0: ds->swapArray32(ds, inBytes+offset, count, outBytes+offset, pErrorCode); michael@0: offset+=count; michael@0: michael@0: /* just skip the uint8_t jgArray[] */ michael@0: count=indexes[UBIDI_IX_JG_LIMIT]-indexes[UBIDI_IX_JG_START]; michael@0: offset+=count; michael@0: michael@0: U_ASSERT(offset==size); michael@0: } michael@0: michael@0: return headerSize+size; michael@0: } michael@0: michael@0: /* Unicode normalization data swapping -------------------------------------- */ michael@0: michael@0: #if !UCONFIG_NO_NORMALIZATION michael@0: michael@0: static int32_t U_CALLCONV michael@0: unorm_swap(const UDataSwapper *ds, michael@0: const void *inData, int32_t length, void *outData, michael@0: UErrorCode *pErrorCode) { michael@0: const UDataInfo *pInfo; michael@0: int32_t headerSize; michael@0: michael@0: const uint8_t *inBytes; michael@0: uint8_t *outBytes; michael@0: michael@0: const int32_t *inIndexes; michael@0: int32_t indexes[32]; michael@0: michael@0: int32_t i, offset, count, size; michael@0: michael@0: /* udata_swapDataHeader checks the arguments */ michael@0: headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); michael@0: if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { michael@0: return 0; michael@0: } michael@0: michael@0: /* check data format and format version */ michael@0: pInfo=(const UDataInfo *)((const char *)inData+4); michael@0: if(!( michael@0: pInfo->dataFormat[0]==0x4e && /* dataFormat="Norm" */ michael@0: pInfo->dataFormat[1]==0x6f && michael@0: pInfo->dataFormat[2]==0x72 && michael@0: pInfo->dataFormat[3]==0x6d && michael@0: pInfo->formatVersion[0]==2 michael@0: )) { michael@0: udata_printError(ds, "unorm_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as unorm.icu\n", michael@0: pInfo->dataFormat[0], pInfo->dataFormat[1], michael@0: pInfo->dataFormat[2], pInfo->dataFormat[3], michael@0: pInfo->formatVersion[0]); michael@0: *pErrorCode=U_UNSUPPORTED_ERROR; michael@0: return 0; michael@0: } michael@0: michael@0: inBytes=(const uint8_t *)inData+headerSize; michael@0: outBytes=(uint8_t *)outData+headerSize; michael@0: michael@0: inIndexes=(const int32_t *)inBytes; michael@0: michael@0: if(length>=0) { michael@0: length-=headerSize; michael@0: if(length<32*4) { michael@0: udata_printError(ds, "unorm_swap(): too few bytes (%d after header) for unorm.icu\n", michael@0: length); michael@0: *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; michael@0: return 0; michael@0: } michael@0: } michael@0: michael@0: /* read the first 32 indexes (ICU 2.8/format version 2.2: _NORM_INDEX_TOP==32, might grow) */ michael@0: for(i=0; i<32; ++i) { michael@0: indexes[i]=udata_readInt32(ds, inIndexes[i]); michael@0: } michael@0: michael@0: /* calculate the total length of the data */ michael@0: size= michael@0: 32*4+ /* size of indexes[] */ michael@0: indexes[_NORM_INDEX_TRIE_SIZE]+ michael@0: indexes[_NORM_INDEX_UCHAR_COUNT]*2+ michael@0: indexes[_NORM_INDEX_COMBINE_DATA_COUNT]*2+ michael@0: indexes[_NORM_INDEX_FCD_TRIE_SIZE]+ michael@0: indexes[_NORM_INDEX_AUX_TRIE_SIZE]+ michael@0: indexes[_NORM_INDEX_CANON_SET_COUNT]*2; michael@0: michael@0: if(length>=0) { michael@0: if(lengthswapArray32(ds, inBytes, count, outBytes, pErrorCode); michael@0: offset+=count; michael@0: michael@0: /* swap the main UTrie */ michael@0: count=indexes[_NORM_INDEX_TRIE_SIZE]; michael@0: utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode); michael@0: offset+=count; michael@0: michael@0: /* swap the uint16_t extraData[] and the uint16_t combiningTable[] */ michael@0: count=(indexes[_NORM_INDEX_UCHAR_COUNT]+indexes[_NORM_INDEX_COMBINE_DATA_COUNT])*2; michael@0: ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode); michael@0: offset+=count; michael@0: michael@0: /* swap the FCD UTrie */ michael@0: count=indexes[_NORM_INDEX_FCD_TRIE_SIZE]; michael@0: if(count!=0) { michael@0: utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode); michael@0: offset+=count; michael@0: } michael@0: michael@0: /* swap the aux UTrie */ michael@0: count=indexes[_NORM_INDEX_AUX_TRIE_SIZE]; michael@0: if(count!=0) { michael@0: utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode); michael@0: offset+=count; michael@0: } michael@0: michael@0: /* swap the uint16_t combiningTable[] */ michael@0: count=indexes[_NORM_INDEX_CANON_SET_COUNT]*2; michael@0: ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode); michael@0: offset+=count; michael@0: } michael@0: michael@0: return headerSize+size; michael@0: } michael@0: michael@0: #endif michael@0: michael@0: /* Swap 'Test' data from gentest */ michael@0: static int32_t U_CALLCONV michael@0: test_swap(const UDataSwapper *ds, michael@0: const void *inData, int32_t length, void *outData, michael@0: UErrorCode *pErrorCode) { michael@0: const UDataInfo *pInfo; michael@0: int32_t headerSize; michael@0: michael@0: const uint8_t *inBytes; michael@0: uint8_t *outBytes; michael@0: michael@0: int32_t offset; michael@0: michael@0: /* udata_swapDataHeader checks the arguments */ michael@0: headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); michael@0: if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { michael@0: udata_printError(ds, "test_swap(): data header swap failed %s\n", pErrorCode != NULL ? u_errorName(*pErrorCode) : "pErrorCode is NULL"); michael@0: return 0; michael@0: } michael@0: michael@0: /* check data format and format version */ michael@0: pInfo=(const UDataInfo *)((const char *)inData+4); michael@0: if(!( michael@0: pInfo->dataFormat[0]==0x54 && /* dataFormat="Norm" */ michael@0: pInfo->dataFormat[1]==0x65 && michael@0: pInfo->dataFormat[2]==0x73 && michael@0: pInfo->dataFormat[3]==0x74 && michael@0: pInfo->formatVersion[0]==1 michael@0: )) { michael@0: udata_printError(ds, "test_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as testdata\n", michael@0: pInfo->dataFormat[0], pInfo->dataFormat[1], michael@0: pInfo->dataFormat[2], pInfo->dataFormat[3], michael@0: pInfo->formatVersion[0]); michael@0: *pErrorCode=U_UNSUPPORTED_ERROR; michael@0: return 0; michael@0: } michael@0: michael@0: inBytes=(const uint8_t *)inData+headerSize; michael@0: outBytes=(uint8_t *)outData+headerSize; michael@0: michael@0: int32_t size16 = 2; // 16bit plus padding michael@0: int32_t sizeStr = 5; // 4 char inv-str plus null michael@0: int32_t size = size16 + sizeStr; michael@0: michael@0: if(length>=0) { michael@0: if(lengthswapArray16(ds, inBytes+offset, size16, outBytes+offset, pErrorCode); michael@0: offset+=size16; michael@0: ds->swapInvChars(ds, inBytes+offset, sizeStr, outBytes+offset, pErrorCode); michael@0: } michael@0: michael@0: return headerSize+size; michael@0: } michael@0: michael@0: /* swap any data (except a .dat package) ------------------------------------ */ michael@0: michael@0: static const struct { michael@0: uint8_t dataFormat[4]; michael@0: UDataSwapFn *swapFn; michael@0: } swapFns[]={ michael@0: { { 0x52, 0x65, 0x73, 0x42 }, ures_swap }, /* dataFormat="ResB" */ michael@0: #if !UCONFIG_NO_LEGACY_CONVERSION michael@0: { { 0x63, 0x6e, 0x76, 0x74 }, ucnv_swap }, /* dataFormat="cnvt" */ michael@0: #endif michael@0: #if !UCONFIG_NO_CONVERSION michael@0: { { 0x43, 0x76, 0x41, 0x6c }, ucnv_swapAliases }, /* dataFormat="CvAl" */ michael@0: #endif michael@0: #if !UCONFIG_NO_IDNA michael@0: { { 0x53, 0x50, 0x52, 0x50 }, usprep_swap }, /* dataFormat="SPRP" */ michael@0: #endif michael@0: /* insert data formats here, descending by expected frequency of occurrence */ michael@0: { { 0x55, 0x50, 0x72, 0x6f }, uprops_swap }, /* dataFormat="UPro" */ michael@0: michael@0: { { UCASE_FMT_0, UCASE_FMT_1, UCASE_FMT_2, UCASE_FMT_3 }, michael@0: ucase_swap }, /* dataFormat="cAsE" */ michael@0: michael@0: { { UBIDI_FMT_0, UBIDI_FMT_1, UBIDI_FMT_2, UBIDI_FMT_3 }, michael@0: ubidi_swap }, /* dataFormat="BiDi" */ michael@0: michael@0: #if !UCONFIG_NO_NORMALIZATION michael@0: { { 0x4e, 0x6f, 0x72, 0x6d }, unorm_swap }, /* dataFormat="Norm" */ michael@0: { { 0x4e, 0x72, 0x6d, 0x32 }, unorm2_swap }, /* dataFormat="Nrm2" */ michael@0: #endif michael@0: #if !UCONFIG_NO_COLLATION michael@0: { { 0x55, 0x43, 0x6f, 0x6c }, ucol_swap }, /* dataFormat="UCol" */ michael@0: { { 0x49, 0x6e, 0x76, 0x43 }, ucol_swapInverseUCA },/* dataFormat="InvC" */ michael@0: #endif michael@0: #if !UCONFIG_NO_BREAK_ITERATION michael@0: { { 0x42, 0x72, 0x6b, 0x20 }, ubrk_swap }, /* dataFormat="Brk " */ michael@0: { { 0x44, 0x69, 0x63, 0x74 }, udict_swap }, /* dataFormat="Dict" */ michael@0: #endif michael@0: { { 0x70, 0x6e, 0x61, 0x6d }, upname_swap }, /* dataFormat="pnam" */ michael@0: { { 0x75, 0x6e, 0x61, 0x6d }, uchar_swapNames }, /* dataFormat="unam" */ michael@0: #if !UCONFIG_NO_NORMALIZATION michael@0: { { 0x43, 0x66, 0x75, 0x20 }, uspoof_swap }, /* dataFormat="Cfu " */ michael@0: #endif michael@0: { { 0x54, 0x65, 0x73, 0x74 }, test_swap } /* dataFormat="Test" */ michael@0: }; michael@0: michael@0: U_CAPI int32_t U_EXPORT2 michael@0: udata_swap(const UDataSwapper *ds, michael@0: const void *inData, int32_t length, void *outData, michael@0: UErrorCode *pErrorCode) { michael@0: char dataFormatChars[4]; michael@0: const UDataInfo *pInfo; michael@0: int32_t i, swappedLength; michael@0: michael@0: if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { michael@0: return 0; michael@0: } michael@0: michael@0: /* michael@0: * Preflight the header first; checks for illegal arguments, too. michael@0: * Do not swap the header right away because the format-specific swapper michael@0: * will swap it, get the headerSize again, and also use the header michael@0: * information. Otherwise we would have to pass some of the information michael@0: * and not be able to use the UDataSwapFn signature. michael@0: */ michael@0: udata_swapDataHeader(ds, inData, -1, NULL, pErrorCode); michael@0: michael@0: /* michael@0: * If we wanted udata_swap() to also handle non-loadable data like a UTrie, michael@0: * then we could check here for further known magic values and structures. michael@0: */ michael@0: if(U_FAILURE(*pErrorCode)) { michael@0: return 0; /* the data format was not recognized */ michael@0: } michael@0: michael@0: pInfo=(const UDataInfo *)((const char *)inData+4); michael@0: michael@0: { michael@0: /* convert the data format from ASCII to Unicode to the system charset */ michael@0: UChar u[4]={ michael@0: pInfo->dataFormat[0], pInfo->dataFormat[1], michael@0: pInfo->dataFormat[2], pInfo->dataFormat[3] michael@0: }; michael@0: michael@0: if(uprv_isInvariantUString(u, 4)) { michael@0: u_UCharsToChars(u, dataFormatChars, 4); michael@0: } else { michael@0: dataFormatChars[0]=dataFormatChars[1]=dataFormatChars[2]=dataFormatChars[3]='?'; michael@0: } michael@0: } michael@0: michael@0: /* dispatch to the swap function for the dataFormat */ michael@0: for(i=0; idataFormat, 4)) { michael@0: swappedLength=swapFns[i].swapFn(ds, inData, length, outData, pErrorCode); michael@0: michael@0: if(U_FAILURE(*pErrorCode)) { michael@0: udata_printError(ds, "udata_swap(): failure swapping data format %02x.%02x.%02x.%02x (\"%c%c%c%c\") - %s\n", michael@0: pInfo->dataFormat[0], pInfo->dataFormat[1], michael@0: pInfo->dataFormat[2], pInfo->dataFormat[3], michael@0: dataFormatChars[0], dataFormatChars[1], michael@0: dataFormatChars[2], dataFormatChars[3], michael@0: u_errorName(*pErrorCode)); michael@0: } else if(swappedLength<(length-15)) { michael@0: /* swapped less than expected */ michael@0: udata_printError(ds, "udata_swap() warning: swapped only %d out of %d bytes - data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n", michael@0: swappedLength, length, michael@0: pInfo->dataFormat[0], pInfo->dataFormat[1], michael@0: pInfo->dataFormat[2], pInfo->dataFormat[3], michael@0: dataFormatChars[0], dataFormatChars[1], michael@0: dataFormatChars[2], dataFormatChars[3], michael@0: u_errorName(*pErrorCode)); michael@0: } michael@0: michael@0: return swappedLength; michael@0: } michael@0: } michael@0: michael@0: /* the dataFormat was not recognized */ michael@0: udata_printError(ds, "udata_swap(): unknown data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n", michael@0: pInfo->dataFormat[0], pInfo->dataFormat[1], michael@0: pInfo->dataFormat[2], pInfo->dataFormat[3], michael@0: dataFormatChars[0], dataFormatChars[1], michael@0: dataFormatChars[2], dataFormatChars[3]); michael@0: michael@0: *pErrorCode=U_UNSUPPORTED_ERROR; michael@0: return 0; michael@0: }