intl/icu/source/tools/toolutil/swapimpl.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/tools/toolutil/swapimpl.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,828 @@
     1.4 +/*
     1.5 +*******************************************************************************
     1.6 +*
     1.7 +*   Copyright (C) 2005-2012, International Business Machines
     1.8 +*   Corporation and others.  All Rights Reserved.
     1.9 +*
    1.10 +*******************************************************************************
    1.11 +*   file name:  swapimpl.cpp
    1.12 +*   encoding:   US-ASCII
    1.13 +*   tab size:   8 (not used)
    1.14 +*   indentation:4
    1.15 +*
    1.16 +*   created on: 2005may05
    1.17 +*   created by: Markus W. Scherer
    1.18 +*
    1.19 +*   Data file swapping functions moved here from the common library
    1.20 +*   because some data is hardcoded in ICU4C and needs not be swapped any more.
    1.21 +*   Moving the functions here simplifies testing (for code coverage) because
    1.22 +*   we need not jump through hoops (like adding snapshots of these files
    1.23 +*   to testdata).
    1.24 +*
    1.25 +*   The declarations for these functions remain in the internal header files
    1.26 +*   in icu/source/common/
    1.27 +*/
    1.28 +
    1.29 +#include "unicode/utypes.h"
    1.30 +#include "unicode/putil.h"
    1.31 +#include "unicode/udata.h"
    1.32 +
    1.33 +/* Explicit include statement for std_string.h is needed
    1.34 + * for compilation on certain platforms. (e.g. AIX/VACPP)
    1.35 + */
    1.36 +#include "unicode/std_string.h"
    1.37 +
    1.38 +#include "cmemory.h"
    1.39 +#include "cstring.h"
    1.40 +#include "uinvchar.h"
    1.41 +#include "uassert.h"
    1.42 +#include "uarrsort.h"
    1.43 +#include "ucmndata.h"
    1.44 +#include "udataswp.h"
    1.45 +
    1.46 +/* swapping implementations in common */
    1.47 +
    1.48 +#include "uresdata.h"
    1.49 +#include "ucnv_io.h"
    1.50 +#include "uprops.h"
    1.51 +#include "ucase.h"
    1.52 +#include "ubidi_props.h"
    1.53 +#include "ucol_swp.h"
    1.54 +#include "ucnv_bld.h"
    1.55 +#include "unormimp.h"
    1.56 +#include "normalizer2impl.h"
    1.57 +#include "sprpimpl.h"
    1.58 +#include "propname.h"
    1.59 +#include "rbbidata.h"
    1.60 +#include "utrie2.h"
    1.61 +#include "dictionarydata.h"
    1.62 +
    1.63 +/* swapping implementations in i18n */
    1.64 +
    1.65 +#if !UCONFIG_NO_NORMALIZATION
    1.66 +#include "uspoof_impl.h"
    1.67 +#endif
    1.68 +
    1.69 +U_NAMESPACE_USE
    1.70 +
    1.71 +/* definitions */
    1.72 +
    1.73 +#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
    1.74 +
    1.75 +/* Unicode property (value) aliases data swapping --------------------------- */
    1.76 +
    1.77 +static int32_t U_CALLCONV
    1.78 +upname_swap(const UDataSwapper *ds,
    1.79 +            const void *inData, int32_t length, void *outData,
    1.80 +            UErrorCode *pErrorCode) {
    1.81 +    /* udata_swapDataHeader checks the arguments */
    1.82 +    int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
    1.83 +    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
    1.84 +        return 0;
    1.85 +    }
    1.86 +
    1.87 +    /* check data format and format version */
    1.88 +    const UDataInfo *pInfo=
    1.89 +        reinterpret_cast<const UDataInfo *>(
    1.90 +            static_cast<const char *>(inData)+4);
    1.91 +    if(!(
    1.92 +        pInfo->dataFormat[0]==0x70 &&   /* dataFormat="pnam" */
    1.93 +        pInfo->dataFormat[1]==0x6e &&
    1.94 +        pInfo->dataFormat[2]==0x61 &&
    1.95 +        pInfo->dataFormat[3]==0x6d &&
    1.96 +        pInfo->formatVersion[0]==2
    1.97 +    )) {
    1.98 +        udata_printError(ds, "upname_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as pnames.icu\n",
    1.99 +                         pInfo->dataFormat[0], pInfo->dataFormat[1],
   1.100 +                         pInfo->dataFormat[2], pInfo->dataFormat[3],
   1.101 +                         pInfo->formatVersion[0]);
   1.102 +        *pErrorCode=U_UNSUPPORTED_ERROR;
   1.103 +        return 0;
   1.104 +    }
   1.105 +
   1.106 +    const uint8_t *inBytes=static_cast<const uint8_t *>(inData)+headerSize;
   1.107 +    uint8_t *outBytes=static_cast<uint8_t *>(outData)+headerSize;
   1.108 +
   1.109 +    if(length>=0) {
   1.110 +        length-=headerSize;
   1.111 +        // formatVersion 2 initially has indexes[8], 32 bytes.
   1.112 +        if(length<32) {
   1.113 +            udata_printError(ds, "upname_swap(): too few bytes (%d after header) for pnames.icu\n",
   1.114 +                             (int)length);
   1.115 +            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
   1.116 +            return 0;
   1.117 +        }
   1.118 +    }
   1.119 +
   1.120 +    const int32_t *inIndexes=reinterpret_cast<const int32_t *>(inBytes);
   1.121 +    int32_t totalSize=udata_readInt32(ds, inIndexes[PropNameData::IX_TOTAL_SIZE]);
   1.122 +    if(length>=0) {
   1.123 +        if(length<totalSize) {
   1.124 +            udata_printError(ds, "upname_swap(): too few bytes (%d after header, should be %d) "
   1.125 +                             "for pnames.icu\n",
   1.126 +                             (int)length, (int)totalSize);
   1.127 +            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
   1.128 +            return 0;
   1.129 +        }
   1.130 +
   1.131 +        int32_t numBytesIndexesAndValueMaps=
   1.132 +            udata_readInt32(ds, inIndexes[PropNameData::IX_BYTE_TRIES_OFFSET]);
   1.133 +
   1.134 +        // Swap the indexes[] and the valueMaps[].
   1.135 +        ds->swapArray32(ds, inBytes, numBytesIndexesAndValueMaps, outBytes, pErrorCode);
   1.136 +
   1.137 +        // Copy the rest of the data.
   1.138 +        if(inBytes!=outBytes) {
   1.139 +            uprv_memcpy(outBytes+numBytesIndexesAndValueMaps,
   1.140 +                        inBytes+numBytesIndexesAndValueMaps,
   1.141 +                        totalSize-numBytesIndexesAndValueMaps);
   1.142 +        }
   1.143 +
   1.144 +        // We need not swap anything else:
   1.145 +        //
   1.146 +        // The ByteTries are already byte-serialized, and are fixed on ASCII.
   1.147 +        // (On an EBCDIC machine, the input string is converted to lowercase ASCII
   1.148 +        // while matching.)
   1.149 +        //
   1.150 +        // The name groups are mostly invariant characters, but since we only
   1.151 +        // generate, and keep in subversion, ASCII versions of pnames.icu,
   1.152 +        // and since only ICU4J uses the pnames.icu data file
   1.153 +        // (the data is hardcoded in ICU4C) and ICU4J uses ASCII data files,
   1.154 +        // we just copy those bytes too.
   1.155 +    }
   1.156 +
   1.157 +    return headerSize+totalSize;
   1.158 +}
   1.159 +
   1.160 +/* Unicode properties data swapping ----------------------------------------- */
   1.161 +
   1.162 +static int32_t U_CALLCONV
   1.163 +uprops_swap(const UDataSwapper *ds,
   1.164 +            const void *inData, int32_t length, void *outData,
   1.165 +            UErrorCode *pErrorCode) {
   1.166 +    const UDataInfo *pInfo;
   1.167 +    int32_t headerSize, i;
   1.168 +
   1.169 +    int32_t dataIndexes[UPROPS_INDEX_COUNT];
   1.170 +    const int32_t *inData32;
   1.171 +
   1.172 +    /* udata_swapDataHeader checks the arguments */
   1.173 +    headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
   1.174 +    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
   1.175 +        return 0;
   1.176 +    }
   1.177 +
   1.178 +    /* check data format and format version */
   1.179 +    pInfo=(const UDataInfo *)((const char *)inData+4);
   1.180 +    if(!(
   1.181 +        pInfo->dataFormat[0]==0x55 &&   /* dataFormat="UPro" */
   1.182 +        pInfo->dataFormat[1]==0x50 &&
   1.183 +        pInfo->dataFormat[2]==0x72 &&
   1.184 +        pInfo->dataFormat[3]==0x6f &&
   1.185 +        (3<=pInfo->formatVersion[0] && pInfo->formatVersion[0]<=7) &&
   1.186 +        (pInfo->formatVersion[0]>=7 ||
   1.187 +            (pInfo->formatVersion[2]==UTRIE_SHIFT &&
   1.188 +             pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT))
   1.189 +    )) {
   1.190 +        udata_printError(ds, "uprops_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not a Unicode properties file\n",
   1.191 +                         pInfo->dataFormat[0], pInfo->dataFormat[1],
   1.192 +                         pInfo->dataFormat[2], pInfo->dataFormat[3],
   1.193 +                         pInfo->formatVersion[0]);
   1.194 +        *pErrorCode=U_UNSUPPORTED_ERROR;
   1.195 +        return 0;
   1.196 +    }
   1.197 +
   1.198 +    /* the properties file must contain at least the indexes array */
   1.199 +    if(length>=0 && (length-headerSize)<(int32_t)sizeof(dataIndexes)) {
   1.200 +        udata_printError(ds, "uprops_swap(): too few bytes (%d after header) for a Unicode properties file\n",
   1.201 +                         length-headerSize);
   1.202 +        *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
   1.203 +        return 0;
   1.204 +    }
   1.205 +
   1.206 +    /* read the indexes */
   1.207 +    inData32=(const int32_t *)((const char *)inData+headerSize);
   1.208 +    for(i=0; i<UPROPS_INDEX_COUNT; ++i) {
   1.209 +        dataIndexes[i]=udata_readInt32(ds, inData32[i]);
   1.210 +    }
   1.211 +
   1.212 +    /*
   1.213 +     * comments are copied from the data format description in genprops/store.c
   1.214 +     * indexes[] constants are in uprops.h
   1.215 +     */
   1.216 +    int32_t dataTop;
   1.217 +    if(length>=0) {
   1.218 +        int32_t *outData32;
   1.219 +
   1.220 +        /*
   1.221 +         * In formatVersion 7, UPROPS_DATA_TOP_INDEX has the post-header data size.
   1.222 +         * In earlier formatVersions, it is 0 and a lower dataIndexes entry
   1.223 +         * has the top of the last item.
   1.224 +         */
   1.225 +        for(i=UPROPS_DATA_TOP_INDEX; i>0 && (dataTop=dataIndexes[i])==0; --i) {}
   1.226 +
   1.227 +        if((length-headerSize)<(4*dataTop)) {
   1.228 +            udata_printError(ds, "uprops_swap(): too few bytes (%d after header) for a Unicode properties file\n",
   1.229 +                             length-headerSize);
   1.230 +            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
   1.231 +            return 0;
   1.232 +        }
   1.233 +
   1.234 +        outData32=(int32_t *)((char *)outData+headerSize);
   1.235 +
   1.236 +        /* copy everything for inaccessible data (padding) */
   1.237 +        if(inData32!=outData32) {
   1.238 +            uprv_memcpy(outData32, inData32, 4*dataTop);
   1.239 +        }
   1.240 +
   1.241 +        /* swap the indexes[16] */
   1.242 +        ds->swapArray32(ds, inData32, 4*UPROPS_INDEX_COUNT, outData32, pErrorCode);
   1.243 +
   1.244 +        /*
   1.245 +         * swap the main properties UTrie
   1.246 +         * PT serialized properties trie, see utrie.h (byte size: 4*(i0-16))
   1.247 +         */
   1.248 +        utrie2_swapAnyVersion(ds,
   1.249 +            inData32+UPROPS_INDEX_COUNT,
   1.250 +            4*(dataIndexes[UPROPS_PROPS32_INDEX]-UPROPS_INDEX_COUNT),
   1.251 +            outData32+UPROPS_INDEX_COUNT,
   1.252 +            pErrorCode);
   1.253 +
   1.254 +        /*
   1.255 +         * swap the properties and exceptions words
   1.256 +         * P  const uint32_t props32[i1-i0];
   1.257 +         * E  const uint32_t exceptions[i2-i1];
   1.258 +         */
   1.259 +        ds->swapArray32(ds,
   1.260 +            inData32+dataIndexes[UPROPS_PROPS32_INDEX],
   1.261 +            4*(dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX]-dataIndexes[UPROPS_PROPS32_INDEX]),
   1.262 +            outData32+dataIndexes[UPROPS_PROPS32_INDEX],
   1.263 +            pErrorCode);
   1.264 +
   1.265 +        /*
   1.266 +         * swap the UChars
   1.267 +         * U  const UChar uchars[2*(i3-i2)];
   1.268 +         */
   1.269 +        ds->swapArray16(ds,
   1.270 +            inData32+dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX],
   1.271 +            4*(dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX]-dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX]),
   1.272 +            outData32+dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX],
   1.273 +            pErrorCode);
   1.274 +
   1.275 +        /*
   1.276 +         * swap the additional UTrie
   1.277 +         * i3 additionalTrieIndex; -- 32-bit unit index to the additional trie for more properties
   1.278 +         */
   1.279 +        utrie2_swapAnyVersion(ds,
   1.280 +            inData32+dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX],
   1.281 +            4*(dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX]-dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX]),
   1.282 +            outData32+dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX],
   1.283 +            pErrorCode);
   1.284 +
   1.285 +        /*
   1.286 +         * swap the properties vectors
   1.287 +         * PV const uint32_t propsVectors[(i6-i4)/i5][i5]==uint32_t propsVectors[i6-i4];
   1.288 +         */
   1.289 +        ds->swapArray32(ds,
   1.290 +            inData32+dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX],
   1.291 +            4*(dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]-dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX]),
   1.292 +            outData32+dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX],
   1.293 +            pErrorCode);
   1.294 +
   1.295 +        // swap the Script_Extensions data
   1.296 +        // SCX const uint16_t scriptExtensions[2*(i7-i6)];
   1.297 +        ds->swapArray16(ds,
   1.298 +            inData32+dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX],
   1.299 +            4*(dataIndexes[UPROPS_RESERVED_INDEX_7]-dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]),
   1.300 +            outData32+dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX],
   1.301 +            pErrorCode);
   1.302 +    }
   1.303 +
   1.304 +    /* i7 reservedIndex7; -- 32-bit unit index to the top of the Script_Extensions data */
   1.305 +    return headerSize+4*dataIndexes[UPROPS_RESERVED_INDEX_7];
   1.306 +}
   1.307 +
   1.308 +/* Unicode case mapping data swapping --------------------------------------- */
   1.309 +
   1.310 +static int32_t U_CALLCONV
   1.311 +ucase_swap(const UDataSwapper *ds,
   1.312 +           const void *inData, int32_t length, void *outData,
   1.313 +           UErrorCode *pErrorCode) {
   1.314 +    const UDataInfo *pInfo;
   1.315 +    int32_t headerSize;
   1.316 +
   1.317 +    const uint8_t *inBytes;
   1.318 +    uint8_t *outBytes;
   1.319 +
   1.320 +    const int32_t *inIndexes;
   1.321 +    int32_t indexes[16];
   1.322 +
   1.323 +    int32_t i, offset, count, size;
   1.324 +
   1.325 +    /* udata_swapDataHeader checks the arguments */
   1.326 +    headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
   1.327 +    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
   1.328 +        return 0;
   1.329 +    }
   1.330 +
   1.331 +    /* check data format and format version */
   1.332 +    pInfo=(const UDataInfo *)((const char *)inData+4);
   1.333 +    if(!(
   1.334 +        pInfo->dataFormat[0]==UCASE_FMT_0 &&    /* dataFormat="cAsE" */
   1.335 +        pInfo->dataFormat[1]==UCASE_FMT_1 &&
   1.336 +        pInfo->dataFormat[2]==UCASE_FMT_2 &&
   1.337 +        pInfo->dataFormat[3]==UCASE_FMT_3 &&
   1.338 +        ((pInfo->formatVersion[0]==1 &&
   1.339 +          pInfo->formatVersion[2]==UTRIE_SHIFT &&
   1.340 +          pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT) ||
   1.341 +         pInfo->formatVersion[0]==2 || pInfo->formatVersion[0]==3)
   1.342 +    )) {
   1.343 +        udata_printError(ds, "ucase_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as case mapping data\n",
   1.344 +                         pInfo->dataFormat[0], pInfo->dataFormat[1],
   1.345 +                         pInfo->dataFormat[2], pInfo->dataFormat[3],
   1.346 +                         pInfo->formatVersion[0]);
   1.347 +        *pErrorCode=U_UNSUPPORTED_ERROR;
   1.348 +        return 0;
   1.349 +    }
   1.350 +
   1.351 +    inBytes=(const uint8_t *)inData+headerSize;
   1.352 +    outBytes=(uint8_t *)outData+headerSize;
   1.353 +
   1.354 +    inIndexes=(const int32_t *)inBytes;
   1.355 +
   1.356 +    if(length>=0) {
   1.357 +        length-=headerSize;
   1.358 +        if(length<16*4) {
   1.359 +            udata_printError(ds, "ucase_swap(): too few bytes (%d after header) for case mapping data\n",
   1.360 +                             length);
   1.361 +            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
   1.362 +            return 0;
   1.363 +        }
   1.364 +    }
   1.365 +
   1.366 +    /* read the first 16 indexes (ICU 3.2/format version 1: UCASE_IX_TOP==16, might grow) */
   1.367 +    for(i=0; i<16; ++i) {
   1.368 +        indexes[i]=udata_readInt32(ds, inIndexes[i]);
   1.369 +    }
   1.370 +
   1.371 +    /* get the total length of the data */
   1.372 +    size=indexes[UCASE_IX_LENGTH];
   1.373 +
   1.374 +    if(length>=0) {
   1.375 +        if(length<size) {
   1.376 +            udata_printError(ds, "ucase_swap(): too few bytes (%d after header) for all of case mapping data\n",
   1.377 +                             length);
   1.378 +            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
   1.379 +            return 0;
   1.380 +        }
   1.381 +
   1.382 +        /* copy the data for inaccessible bytes */
   1.383 +        if(inBytes!=outBytes) {
   1.384 +            uprv_memcpy(outBytes, inBytes, size);
   1.385 +        }
   1.386 +
   1.387 +        offset=0;
   1.388 +
   1.389 +        /* swap the int32_t indexes[] */
   1.390 +        count=indexes[UCASE_IX_INDEX_TOP]*4;
   1.391 +        ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
   1.392 +        offset+=count;
   1.393 +
   1.394 +        /* swap the UTrie */
   1.395 +        count=indexes[UCASE_IX_TRIE_SIZE];
   1.396 +        utrie2_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
   1.397 +        offset+=count;
   1.398 +
   1.399 +        /* swap the uint16_t exceptions[] and unfold[] */
   1.400 +        count=(indexes[UCASE_IX_EXC_LENGTH]+indexes[UCASE_IX_UNFOLD_LENGTH])*2;
   1.401 +        ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
   1.402 +        offset+=count;
   1.403 +
   1.404 +        U_ASSERT(offset==size);
   1.405 +    }
   1.406 +
   1.407 +    return headerSize+size;
   1.408 +}
   1.409 +
   1.410 +/* Unicode bidi/shaping data swapping --------------------------------------- */
   1.411 +
   1.412 +static int32_t U_CALLCONV
   1.413 +ubidi_swap(const UDataSwapper *ds,
   1.414 +           const void *inData, int32_t length, void *outData,
   1.415 +           UErrorCode *pErrorCode) {
   1.416 +    const UDataInfo *pInfo;
   1.417 +    int32_t headerSize;
   1.418 +
   1.419 +    const uint8_t *inBytes;
   1.420 +    uint8_t *outBytes;
   1.421 +
   1.422 +    const int32_t *inIndexes;
   1.423 +    int32_t indexes[16];
   1.424 +
   1.425 +    int32_t i, offset, count, size;
   1.426 +
   1.427 +    /* udata_swapDataHeader checks the arguments */
   1.428 +    headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
   1.429 +    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
   1.430 +        return 0;
   1.431 +    }
   1.432 +
   1.433 +    /* check data format and format version */
   1.434 +    pInfo=(const UDataInfo *)((const char *)inData+4);
   1.435 +    if(!(
   1.436 +        pInfo->dataFormat[0]==UBIDI_FMT_0 &&    /* dataFormat="BiDi" */
   1.437 +        pInfo->dataFormat[1]==UBIDI_FMT_1 &&
   1.438 +        pInfo->dataFormat[2]==UBIDI_FMT_2 &&
   1.439 +        pInfo->dataFormat[3]==UBIDI_FMT_3 &&
   1.440 +        ((pInfo->formatVersion[0]==1 &&
   1.441 +          pInfo->formatVersion[2]==UTRIE_SHIFT &&
   1.442 +          pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT) ||
   1.443 +         pInfo->formatVersion[0]==2)
   1.444 +    )) {
   1.445 +        udata_printError(ds, "ubidi_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as bidi/shaping data\n",
   1.446 +                         pInfo->dataFormat[0], pInfo->dataFormat[1],
   1.447 +                         pInfo->dataFormat[2], pInfo->dataFormat[3],
   1.448 +                         pInfo->formatVersion[0]);
   1.449 +        *pErrorCode=U_UNSUPPORTED_ERROR;
   1.450 +        return 0;
   1.451 +    }
   1.452 +
   1.453 +    inBytes=(const uint8_t *)inData+headerSize;
   1.454 +    outBytes=(uint8_t *)outData+headerSize;
   1.455 +
   1.456 +    inIndexes=(const int32_t *)inBytes;
   1.457 +
   1.458 +    if(length>=0) {
   1.459 +        length-=headerSize;
   1.460 +        if(length<16*4) {
   1.461 +            udata_printError(ds, "ubidi_swap(): too few bytes (%d after header) for bidi/shaping data\n",
   1.462 +                             length);
   1.463 +            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
   1.464 +            return 0;
   1.465 +        }
   1.466 +    }
   1.467 +
   1.468 +    /* read the first 16 indexes (ICU 3.4/format version 1: UBIDI_IX_TOP==16, might grow) */
   1.469 +    for(i=0; i<16; ++i) {
   1.470 +        indexes[i]=udata_readInt32(ds, inIndexes[i]);
   1.471 +    }
   1.472 +
   1.473 +    /* get the total length of the data */
   1.474 +    size=indexes[UBIDI_IX_LENGTH];
   1.475 +
   1.476 +    if(length>=0) {
   1.477 +        if(length<size) {
   1.478 +            udata_printError(ds, "ubidi_swap(): too few bytes (%d after header) for all of bidi/shaping data\n",
   1.479 +                             length);
   1.480 +            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
   1.481 +            return 0;
   1.482 +        }
   1.483 +
   1.484 +        /* copy the data for inaccessible bytes */
   1.485 +        if(inBytes!=outBytes) {
   1.486 +            uprv_memcpy(outBytes, inBytes, size);
   1.487 +        }
   1.488 +
   1.489 +        offset=0;
   1.490 +
   1.491 +        /* swap the int32_t indexes[] */
   1.492 +        count=indexes[UBIDI_IX_INDEX_TOP]*4;
   1.493 +        ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
   1.494 +        offset+=count;
   1.495 +
   1.496 +        /* swap the UTrie */
   1.497 +        count=indexes[UBIDI_IX_TRIE_SIZE];
   1.498 +        utrie2_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
   1.499 +        offset+=count;
   1.500 +
   1.501 +        /* swap the uint32_t mirrors[] */
   1.502 +        count=indexes[UBIDI_IX_MIRROR_LENGTH]*4;
   1.503 +        ds->swapArray32(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
   1.504 +        offset+=count;
   1.505 +
   1.506 +        /* just skip the uint8_t jgArray[] */
   1.507 +        count=indexes[UBIDI_IX_JG_LIMIT]-indexes[UBIDI_IX_JG_START];
   1.508 +        offset+=count;
   1.509 +
   1.510 +        U_ASSERT(offset==size);
   1.511 +    }
   1.512 +
   1.513 +    return headerSize+size;
   1.514 +}
   1.515 +
   1.516 +/* Unicode normalization data swapping -------------------------------------- */
   1.517 +
   1.518 +#if !UCONFIG_NO_NORMALIZATION
   1.519 +
   1.520 +static int32_t U_CALLCONV
   1.521 +unorm_swap(const UDataSwapper *ds,
   1.522 +           const void *inData, int32_t length, void *outData,
   1.523 +           UErrorCode *pErrorCode) {
   1.524 +    const UDataInfo *pInfo;
   1.525 +    int32_t headerSize;
   1.526 +
   1.527 +    const uint8_t *inBytes;
   1.528 +    uint8_t *outBytes;
   1.529 +
   1.530 +    const int32_t *inIndexes;
   1.531 +    int32_t indexes[32];
   1.532 +
   1.533 +    int32_t i, offset, count, size;
   1.534 +
   1.535 +    /* udata_swapDataHeader checks the arguments */
   1.536 +    headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
   1.537 +    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
   1.538 +        return 0;
   1.539 +    }
   1.540 +
   1.541 +    /* check data format and format version */
   1.542 +    pInfo=(const UDataInfo *)((const char *)inData+4);
   1.543 +    if(!(
   1.544 +        pInfo->dataFormat[0]==0x4e &&   /* dataFormat="Norm" */
   1.545 +        pInfo->dataFormat[1]==0x6f &&
   1.546 +        pInfo->dataFormat[2]==0x72 &&
   1.547 +        pInfo->dataFormat[3]==0x6d &&
   1.548 +        pInfo->formatVersion[0]==2
   1.549 +    )) {
   1.550 +        udata_printError(ds, "unorm_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as unorm.icu\n",
   1.551 +                         pInfo->dataFormat[0], pInfo->dataFormat[1],
   1.552 +                         pInfo->dataFormat[2], pInfo->dataFormat[3],
   1.553 +                         pInfo->formatVersion[0]);
   1.554 +        *pErrorCode=U_UNSUPPORTED_ERROR;
   1.555 +        return 0;
   1.556 +    }
   1.557 +
   1.558 +    inBytes=(const uint8_t *)inData+headerSize;
   1.559 +    outBytes=(uint8_t *)outData+headerSize;
   1.560 +
   1.561 +    inIndexes=(const int32_t *)inBytes;
   1.562 +
   1.563 +    if(length>=0) {
   1.564 +        length-=headerSize;
   1.565 +        if(length<32*4) {
   1.566 +            udata_printError(ds, "unorm_swap(): too few bytes (%d after header) for unorm.icu\n",
   1.567 +                             length);
   1.568 +            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
   1.569 +            return 0;
   1.570 +        }
   1.571 +    }
   1.572 +
   1.573 +    /* read the first 32 indexes (ICU 2.8/format version 2.2: _NORM_INDEX_TOP==32, might grow) */
   1.574 +    for(i=0; i<32; ++i) {
   1.575 +        indexes[i]=udata_readInt32(ds, inIndexes[i]);
   1.576 +    }
   1.577 +
   1.578 +    /* calculate the total length of the data */
   1.579 +    size=
   1.580 +        32*4+ /* size of indexes[] */
   1.581 +        indexes[_NORM_INDEX_TRIE_SIZE]+
   1.582 +        indexes[_NORM_INDEX_UCHAR_COUNT]*2+
   1.583 +        indexes[_NORM_INDEX_COMBINE_DATA_COUNT]*2+
   1.584 +        indexes[_NORM_INDEX_FCD_TRIE_SIZE]+
   1.585 +        indexes[_NORM_INDEX_AUX_TRIE_SIZE]+
   1.586 +        indexes[_NORM_INDEX_CANON_SET_COUNT]*2;
   1.587 +
   1.588 +    if(length>=0) {
   1.589 +        if(length<size) {
   1.590 +            udata_printError(ds, "unorm_swap(): too few bytes (%d after header) for all of unorm.icu\n",
   1.591 +                             length);
   1.592 +            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
   1.593 +            return 0;
   1.594 +        }
   1.595 +
   1.596 +        /* copy the data for inaccessible bytes */
   1.597 +        if(inBytes!=outBytes) {
   1.598 +            uprv_memcpy(outBytes, inBytes, size);
   1.599 +        }
   1.600 +
   1.601 +        offset=0;
   1.602 +
   1.603 +        /* swap the indexes[] */
   1.604 +        count=32*4;
   1.605 +        ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
   1.606 +        offset+=count;
   1.607 +
   1.608 +        /* swap the main UTrie */
   1.609 +        count=indexes[_NORM_INDEX_TRIE_SIZE];
   1.610 +        utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
   1.611 +        offset+=count;
   1.612 +
   1.613 +        /* swap the uint16_t extraData[] and the uint16_t combiningTable[] */
   1.614 +        count=(indexes[_NORM_INDEX_UCHAR_COUNT]+indexes[_NORM_INDEX_COMBINE_DATA_COUNT])*2;
   1.615 +        ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
   1.616 +        offset+=count;
   1.617 +
   1.618 +        /* swap the FCD UTrie */
   1.619 +        count=indexes[_NORM_INDEX_FCD_TRIE_SIZE];
   1.620 +        if(count!=0) {
   1.621 +            utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
   1.622 +            offset+=count;
   1.623 +        }
   1.624 +
   1.625 +        /* swap the aux UTrie */
   1.626 +        count=indexes[_NORM_INDEX_AUX_TRIE_SIZE];
   1.627 +        if(count!=0) {
   1.628 +            utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
   1.629 +            offset+=count;
   1.630 +        }
   1.631 +
   1.632 +        /* swap the uint16_t combiningTable[] */
   1.633 +        count=indexes[_NORM_INDEX_CANON_SET_COUNT]*2;
   1.634 +        ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
   1.635 +        offset+=count;
   1.636 +    }
   1.637 +
   1.638 +    return headerSize+size;
   1.639 +}
   1.640 +
   1.641 +#endif
   1.642 +
   1.643 +/* Swap 'Test' data from gentest */
   1.644 +static int32_t U_CALLCONV
   1.645 +test_swap(const UDataSwapper *ds,
   1.646 +           const void *inData, int32_t length, void *outData,
   1.647 +           UErrorCode *pErrorCode) {
   1.648 +    const UDataInfo *pInfo;
   1.649 +    int32_t headerSize;
   1.650 +
   1.651 +    const uint8_t *inBytes;
   1.652 +    uint8_t *outBytes;
   1.653 +
   1.654 +    int32_t offset;
   1.655 +
   1.656 +    /* udata_swapDataHeader checks the arguments */
   1.657 +    headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
   1.658 +    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
   1.659 +        udata_printError(ds, "test_swap(): data header swap failed %s\n", pErrorCode != NULL ? u_errorName(*pErrorCode) : "pErrorCode is NULL");
   1.660 +        return 0;
   1.661 +    }
   1.662 +
   1.663 +    /* check data format and format version */
   1.664 +    pInfo=(const UDataInfo *)((const char *)inData+4);
   1.665 +    if(!(
   1.666 +        pInfo->dataFormat[0]==0x54 &&   /* dataFormat="Norm" */
   1.667 +        pInfo->dataFormat[1]==0x65 &&
   1.668 +        pInfo->dataFormat[2]==0x73 &&
   1.669 +        pInfo->dataFormat[3]==0x74 &&
   1.670 +        pInfo->formatVersion[0]==1
   1.671 +    )) {
   1.672 +        udata_printError(ds, "test_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as testdata\n",
   1.673 +                         pInfo->dataFormat[0], pInfo->dataFormat[1],
   1.674 +                         pInfo->dataFormat[2], pInfo->dataFormat[3],
   1.675 +                         pInfo->formatVersion[0]);
   1.676 +        *pErrorCode=U_UNSUPPORTED_ERROR;
   1.677 +        return 0;
   1.678 +    }
   1.679 +
   1.680 +    inBytes=(const uint8_t *)inData+headerSize;
   1.681 +    outBytes=(uint8_t *)outData+headerSize;
   1.682 +
   1.683 +    int32_t size16 = 2; // 16bit plus padding
   1.684 +    int32_t sizeStr = 5; // 4 char inv-str plus null
   1.685 +    int32_t size = size16 + sizeStr;
   1.686 +
   1.687 +    if(length>=0) {
   1.688 +        if(length<size) {
   1.689 +            udata_printError(ds, "test_swap(): too few bytes (%d after header, wanted %d) for all of testdata\n",
   1.690 +                             length, size);
   1.691 +            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
   1.692 +            return 0;
   1.693 +        }
   1.694 +
   1.695 +	offset =0;
   1.696 +	/* swap a 1 entry array */
   1.697 +        ds->swapArray16(ds, inBytes+offset, size16, outBytes+offset, pErrorCode);
   1.698 +	offset+=size16;
   1.699 +	ds->swapInvChars(ds, inBytes+offset, sizeStr, outBytes+offset, pErrorCode);
   1.700 +    }
   1.701 +
   1.702 +    return headerSize+size;
   1.703 +}
   1.704 +
   1.705 +/* swap any data (except a .dat package) ------------------------------------ */
   1.706 +
   1.707 +static const struct {
   1.708 +    uint8_t dataFormat[4];
   1.709 +    UDataSwapFn *swapFn;
   1.710 +} swapFns[]={
   1.711 +    { { 0x52, 0x65, 0x73, 0x42 }, ures_swap },          /* dataFormat="ResB" */
   1.712 +#if !UCONFIG_NO_LEGACY_CONVERSION
   1.713 +    { { 0x63, 0x6e, 0x76, 0x74 }, ucnv_swap },          /* dataFormat="cnvt" */
   1.714 +#endif
   1.715 +#if !UCONFIG_NO_CONVERSION
   1.716 +    { { 0x43, 0x76, 0x41, 0x6c }, ucnv_swapAliases },   /* dataFormat="CvAl" */
   1.717 +#endif
   1.718 +#if !UCONFIG_NO_IDNA
   1.719 +    { { 0x53, 0x50, 0x52, 0x50 }, usprep_swap },        /* dataFormat="SPRP" */
   1.720 +#endif
   1.721 +    /* insert data formats here, descending by expected frequency of occurrence */
   1.722 +    { { 0x55, 0x50, 0x72, 0x6f }, uprops_swap },        /* dataFormat="UPro" */
   1.723 +
   1.724 +    { { UCASE_FMT_0, UCASE_FMT_1, UCASE_FMT_2, UCASE_FMT_3 },
   1.725 +                                  ucase_swap },         /* dataFormat="cAsE" */
   1.726 +
   1.727 +    { { UBIDI_FMT_0, UBIDI_FMT_1, UBIDI_FMT_2, UBIDI_FMT_3 },
   1.728 +                                  ubidi_swap },         /* dataFormat="BiDi" */
   1.729 +
   1.730 +#if !UCONFIG_NO_NORMALIZATION
   1.731 +    { { 0x4e, 0x6f, 0x72, 0x6d }, unorm_swap },         /* dataFormat="Norm" */
   1.732 +    { { 0x4e, 0x72, 0x6d, 0x32 }, unorm2_swap },        /* dataFormat="Nrm2" */
   1.733 +#endif
   1.734 +#if !UCONFIG_NO_COLLATION
   1.735 +    { { 0x55, 0x43, 0x6f, 0x6c }, ucol_swap },          /* dataFormat="UCol" */
   1.736 +    { { 0x49, 0x6e, 0x76, 0x43 }, ucol_swapInverseUCA },/* dataFormat="InvC" */
   1.737 +#endif
   1.738 +#if !UCONFIG_NO_BREAK_ITERATION
   1.739 +    { { 0x42, 0x72, 0x6b, 0x20 }, ubrk_swap },          /* dataFormat="Brk " */
   1.740 +    { { 0x44, 0x69, 0x63, 0x74 }, udict_swap },         /* dataFormat="Dict" */
   1.741 +#endif
   1.742 +    { { 0x70, 0x6e, 0x61, 0x6d }, upname_swap },        /* dataFormat="pnam" */
   1.743 +    { { 0x75, 0x6e, 0x61, 0x6d }, uchar_swapNames },    /* dataFormat="unam" */
   1.744 +#if !UCONFIG_NO_NORMALIZATION
   1.745 +    { { 0x43, 0x66, 0x75, 0x20 }, uspoof_swap },         /* dataFormat="Cfu " */
   1.746 +#endif
   1.747 +    { { 0x54, 0x65, 0x73, 0x74 }, test_swap }            /* dataFormat="Test" */
   1.748 +};
   1.749 +
   1.750 +U_CAPI int32_t U_EXPORT2
   1.751 +udata_swap(const UDataSwapper *ds,
   1.752 +           const void *inData, int32_t length, void *outData,
   1.753 +           UErrorCode *pErrorCode) {
   1.754 +    char dataFormatChars[4];
   1.755 +    const UDataInfo *pInfo;
   1.756 +    int32_t i, swappedLength;
   1.757 +
   1.758 +    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
   1.759 +        return 0;
   1.760 +    }
   1.761 +
   1.762 +    /*
   1.763 +     * Preflight the header first; checks for illegal arguments, too.
   1.764 +     * Do not swap the header right away because the format-specific swapper
   1.765 +     * will swap it, get the headerSize again, and also use the header
   1.766 +     * information. Otherwise we would have to pass some of the information
   1.767 +     * and not be able to use the UDataSwapFn signature.
   1.768 +     */
   1.769 +    udata_swapDataHeader(ds, inData, -1, NULL, pErrorCode);
   1.770 +
   1.771 +    /*
   1.772 +     * If we wanted udata_swap() to also handle non-loadable data like a UTrie,
   1.773 +     * then we could check here for further known magic values and structures.
   1.774 +     */
   1.775 +    if(U_FAILURE(*pErrorCode)) {
   1.776 +        return 0; /* the data format was not recognized */
   1.777 +    }
   1.778 +
   1.779 +    pInfo=(const UDataInfo *)((const char *)inData+4);
   1.780 +
   1.781 +    {
   1.782 +        /* convert the data format from ASCII to Unicode to the system charset */
   1.783 +        UChar u[4]={
   1.784 +             pInfo->dataFormat[0], pInfo->dataFormat[1],
   1.785 +             pInfo->dataFormat[2], pInfo->dataFormat[3]
   1.786 +        };
   1.787 +
   1.788 +        if(uprv_isInvariantUString(u, 4)) {
   1.789 +            u_UCharsToChars(u, dataFormatChars, 4);
   1.790 +        } else {
   1.791 +            dataFormatChars[0]=dataFormatChars[1]=dataFormatChars[2]=dataFormatChars[3]='?';
   1.792 +        }
   1.793 +    }
   1.794 +
   1.795 +    /* dispatch to the swap function for the dataFormat */
   1.796 +    for(i=0; i<LENGTHOF(swapFns); ++i) {
   1.797 +        if(0==memcmp(swapFns[i].dataFormat, pInfo->dataFormat, 4)) {
   1.798 +            swappedLength=swapFns[i].swapFn(ds, inData, length, outData, pErrorCode);
   1.799 +
   1.800 +            if(U_FAILURE(*pErrorCode)) {
   1.801 +                udata_printError(ds, "udata_swap(): failure swapping data format %02x.%02x.%02x.%02x (\"%c%c%c%c\") - %s\n",
   1.802 +                                 pInfo->dataFormat[0], pInfo->dataFormat[1],
   1.803 +                                 pInfo->dataFormat[2], pInfo->dataFormat[3],
   1.804 +                                 dataFormatChars[0], dataFormatChars[1],
   1.805 +                                 dataFormatChars[2], dataFormatChars[3],
   1.806 +                                 u_errorName(*pErrorCode));
   1.807 +            } else if(swappedLength<(length-15)) {
   1.808 +                /* swapped less than expected */
   1.809 +                udata_printError(ds, "udata_swap() warning: swapped only %d out of %d bytes - data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n",
   1.810 +                                 swappedLength, length,
   1.811 +                                 pInfo->dataFormat[0], pInfo->dataFormat[1],
   1.812 +                                 pInfo->dataFormat[2], pInfo->dataFormat[3],
   1.813 +                                 dataFormatChars[0], dataFormatChars[1],
   1.814 +                                 dataFormatChars[2], dataFormatChars[3],
   1.815 +                                 u_errorName(*pErrorCode));
   1.816 +            }
   1.817 +
   1.818 +            return swappedLength;
   1.819 +        }
   1.820 +    }
   1.821 +
   1.822 +    /* the dataFormat was not recognized */
   1.823 +    udata_printError(ds, "udata_swap(): unknown data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n",
   1.824 +                     pInfo->dataFormat[0], pInfo->dataFormat[1],
   1.825 +                     pInfo->dataFormat[2], pInfo->dataFormat[3],
   1.826 +                     dataFormatChars[0], dataFormatChars[1],
   1.827 +                     dataFormatChars[2], dataFormatChars[3]);
   1.828 +
   1.829 +    *pErrorCode=U_UNSUPPORTED_ERROR;
   1.830 +    return 0;
   1.831 +}

mercurial