intl/icu/source/tools/icuswap/icuswap.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/tools/icuswap/icuswap.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,648 @@
     1.4 +/*
     1.5 +*******************************************************************************
     1.6 +*
     1.7 +*   Copyright (C) 2003-2007, International Business Machines
     1.8 +*   Corporation and others.  All Rights Reserved.
     1.9 +*
    1.10 +*******************************************************************************
    1.11 +*   file name:  icuswap.cpp
    1.12 +*   encoding:   US-ASCII
    1.13 +*   tab size:   8 (not used)
    1.14 +*   indentation:4
    1.15 +*
    1.16 +*   created on: 2003aug08
    1.17 +*   created by: Markus W. Scherer
    1.18 +*
    1.19 +*   This tool takes an ICU data file and "swaps" it, that is, changes its
    1.20 +*   platform properties between big-/little-endianness and ASCII/EBCDIC charset
    1.21 +*   families.
    1.22 +*   The modified data file is written to a new file.
    1.23 +*   Useful as an install-time tool for shipping only one flavor of ICU data
    1.24 +*   and preparing data files for the target platform.
    1.25 +*   Will not work with data DLLs (shared libraries).
    1.26 +*/
    1.27 +
    1.28 +#include "unicode/utypes.h"
    1.29 +#include "unicode/putil.h"
    1.30 +#include "unicode/udata.h"
    1.31 +#include "cmemory.h"
    1.32 +#include "cstring.h"
    1.33 +#include "uinvchar.h"
    1.34 +#include "uarrsort.h"
    1.35 +#include "ucmndata.h"
    1.36 +#include "udataswp.h"
    1.37 +#include "swapimpl.h"
    1.38 +#include "toolutil.h"
    1.39 +#include "uoptions.h"
    1.40 +
    1.41 +#include <stdio.h>
    1.42 +#include <stdlib.h>
    1.43 +#include <string.h>
    1.44 +
    1.45 +/* definitions */
    1.46 +
    1.47 +#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
    1.48 +#define DEFAULT_PADDING_LENGTH 15
    1.49 +
    1.50 +static UOption options[]={
    1.51 +    UOPTION_HELP_H,
    1.52 +    UOPTION_HELP_QUESTION_MARK,
    1.53 +    UOPTION_DEF("type", 't', UOPT_REQUIRES_ARG)
    1.54 +};
    1.55 +
    1.56 +enum {
    1.57 +    OPT_HELP_H,
    1.58 +    OPT_HELP_QUESTION_MARK,
    1.59 +    OPT_OUT_TYPE
    1.60 +};
    1.61 +
    1.62 +static int32_t
    1.63 +fileSize(FILE *f) {
    1.64 +    int32_t size;
    1.65 +
    1.66 +    fseek(f, 0, SEEK_END);
    1.67 +    size=(int32_t)ftell(f);
    1.68 +    fseek(f, 0, SEEK_SET);
    1.69 +    return size;
    1.70 +}
    1.71 +
    1.72 +/**
    1.73 + * Swap an ICU .dat package, including swapping of enclosed items.
    1.74 + */
    1.75 +U_CFUNC int32_t U_CALLCONV
    1.76 +udata_swapPackage(const char *inFilename, const char *outFilename,
    1.77 +                  const UDataSwapper *ds,
    1.78 +                  const void *inData, int32_t length, void *outData,
    1.79 +                  UErrorCode *pErrorCode);
    1.80 +
    1.81 +U_CDECL_BEGIN
    1.82 +static void U_CALLCONV
    1.83 +printError(void *context, const char *fmt, va_list args) {
    1.84 +    vfprintf((FILE *)context, fmt, args);
    1.85 +}
    1.86 +U_CDECL_END
    1.87 +
    1.88 +static int
    1.89 +printUsage(const char *pname, UBool ishelp) {
    1.90 +    fprintf(stderr,
    1.91 +            "%csage: %s [ -h, -?, --help ] -tl|-tb|-te|--type=b|... infilename outfilename\n",
    1.92 +            ishelp ? 'U' : 'u', pname);
    1.93 +    if(ishelp) {
    1.94 +        fprintf(stderr,
    1.95 +              "\nOptions: -h, -?, --help    print this message and exit\n"
    1.96 +                "         Read the input file, swap its platform properties according\n"
    1.97 +                "         to the -t or --type option, and write the result to the output file.\n"
    1.98 +                "         -tl               change to little-endian/ASCII charset family\n"
    1.99 +                "         -tb               change to big-endian/ASCII charset family\n"
   1.100 +                "         -te               change to big-endian/EBCDIC charset family\n");
   1.101 +    }
   1.102 +
   1.103 +    return !ishelp;
   1.104 +}
   1.105 +
   1.106 +extern int
   1.107 +main(int argc, char *argv[]) {
   1.108 +    FILE *in, *out;
   1.109 +    const char *pname;
   1.110 +    char *data;
   1.111 +    int32_t length;
   1.112 +    UBool ishelp;
   1.113 +    int rc;
   1.114 +
   1.115 +    UDataSwapper *ds;
   1.116 +    const UDataInfo *pInfo;
   1.117 +    UErrorCode errorCode;
   1.118 +    uint8_t outCharset;
   1.119 +    UBool outIsBigEndian;
   1.120 +
   1.121 +    U_MAIN_INIT_ARGS(argc, argv);
   1.122 +
   1.123 +    fprintf(stderr, "Warning: icuswap is an obsolete tool and it will be removed in the next ICU release.\nPlease use the icupkg tool instead.\n");
   1.124 +
   1.125 +    /* get the program basename */
   1.126 +    pname=strrchr(argv[0], U_FILE_SEP_CHAR);
   1.127 +    if(pname==NULL) {
   1.128 +        pname=strrchr(argv[0], '/');
   1.129 +    }
   1.130 +    if(pname!=NULL) {
   1.131 +        ++pname;
   1.132 +    } else {
   1.133 +        pname=argv[0];
   1.134 +    }
   1.135 +
   1.136 +    argc=u_parseArgs(argc, argv, LENGTHOF(options), options);
   1.137 +    ishelp=options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur;
   1.138 +    if(ishelp || argc!=3) {
   1.139 +        return printUsage(pname, ishelp);
   1.140 +    }
   1.141 +
   1.142 +    /* parse the output type option */
   1.143 +    data=(char *)options[OPT_OUT_TYPE].value;
   1.144 +    if(data[0]==0 || data[1]!=0) {
   1.145 +        /* the type must be exactly one letter */
   1.146 +        return printUsage(pname, FALSE);
   1.147 +    }
   1.148 +    switch(data[0]) {
   1.149 +    case 'l':
   1.150 +        outIsBigEndian=FALSE;
   1.151 +        outCharset=U_ASCII_FAMILY;
   1.152 +        break;
   1.153 +    case 'b':
   1.154 +        outIsBigEndian=TRUE;
   1.155 +        outCharset=U_ASCII_FAMILY;
   1.156 +        break;
   1.157 +    case 'e':
   1.158 +        outIsBigEndian=TRUE;
   1.159 +        outCharset=U_EBCDIC_FAMILY;
   1.160 +        break;
   1.161 +    default:
   1.162 +        return printUsage(pname, FALSE);
   1.163 +    }
   1.164 +
   1.165 +    in=out=NULL;
   1.166 +    data=NULL;
   1.167 +
   1.168 +    /* open the input file, get its length, allocate memory for it, read the file */
   1.169 +    in=fopen(argv[1], "rb");
   1.170 +    if(in==NULL) {
   1.171 +        fprintf(stderr, "%s: unable to open input file \"%s\"\n", pname, argv[1]);
   1.172 +        rc=2;
   1.173 +        goto done;
   1.174 +    }
   1.175 +
   1.176 +    length=fileSize(in);
   1.177 +    if(length<DEFAULT_PADDING_LENGTH) {
   1.178 +        fprintf(stderr, "%s: empty input file \"%s\"\n", pname, argv[1]);
   1.179 +        rc=2;
   1.180 +        goto done;
   1.181 +    }
   1.182 +
   1.183 +    /*
   1.184 +     * +15: udata_swapPackage() may need to add a few padding bytes to the
   1.185 +     * last item if charset swapping is done,
   1.186 +     * because the last item may be resorted into the middle and then needs
   1.187 +     * additional padding bytes
   1.188 +     */
   1.189 +    data=(char *)malloc(length+DEFAULT_PADDING_LENGTH);
   1.190 +    if(data==NULL) {
   1.191 +        fprintf(stderr, "%s: error allocating memory for \"%s\"\n", pname, argv[1]);
   1.192 +        rc=2;
   1.193 +        goto done;
   1.194 +    }
   1.195 +
   1.196 +    /* set the last 15 bytes to the usual padding byte, see udata_swapPackage() */
   1.197 +    uprv_memset(data+length-DEFAULT_PADDING_LENGTH, 0xaa, DEFAULT_PADDING_LENGTH);
   1.198 +
   1.199 +    if(length!=(int32_t)fread(data, 1, length, in)) {
   1.200 +        fprintf(stderr, "%s: error reading \"%s\"\n", pname, argv[1]);
   1.201 +        rc=3;
   1.202 +        goto done;
   1.203 +    }
   1.204 +
   1.205 +    fclose(in);
   1.206 +    in=NULL;
   1.207 +
   1.208 +    /* swap the data in-place */
   1.209 +    errorCode=U_ZERO_ERROR;
   1.210 +    ds=udata_openSwapperForInputData(data, length, outIsBigEndian, outCharset, &errorCode);
   1.211 +    if(U_FAILURE(errorCode)) {
   1.212 +        fprintf(stderr, "%s: udata_openSwapperForInputData(\"%s\") failed - %s\n",
   1.213 +                pname, argv[1], u_errorName(errorCode));
   1.214 +        rc=4;
   1.215 +        goto done;
   1.216 +    }
   1.217 +
   1.218 +    ds->printError=printError;
   1.219 +    ds->printErrorContext=stderr;
   1.220 +
   1.221 +    /* speculative cast, protected by the following length check */
   1.222 +    pInfo=(const UDataInfo *)((const char *)data+4);
   1.223 +
   1.224 +    if( length>=20 &&
   1.225 +        pInfo->dataFormat[0]==0x43 &&   /* dataFormat="CmnD" */
   1.226 +        pInfo->dataFormat[1]==0x6d &&
   1.227 +        pInfo->dataFormat[2]==0x6e &&
   1.228 +        pInfo->dataFormat[3]==0x44
   1.229 +    ) {
   1.230 +        /*
   1.231 +         * swap the .dat package
   1.232 +         * udata_swapPackage() needs to rename ToC name entries from the old package
   1.233 +         * name to the new one.
   1.234 +         * We pass it the filenames, and udata_swapPackage() will extract the
   1.235 +         * package names.
   1.236 +         */
   1.237 +        length=udata_swapPackage(argv[1], argv[2], ds, data, length, data, &errorCode);
   1.238 +        udata_closeSwapper(ds);
   1.239 +        if(U_FAILURE(errorCode)) {
   1.240 +            fprintf(stderr, "%s: udata_swapPackage(\"%s\") failed - %s\n",
   1.241 +                    pname, argv[1], u_errorName(errorCode));
   1.242 +            rc=4;
   1.243 +            goto done;
   1.244 +        }
   1.245 +    } else {
   1.246 +        /* swap the data, which is not a .dat package */
   1.247 +        length=udata_swap(ds, data, length, data, &errorCode);
   1.248 +        udata_closeSwapper(ds);
   1.249 +        if(U_FAILURE(errorCode)) {
   1.250 +            fprintf(stderr, "%s: udata_swap(\"%s\") failed - %s\n",
   1.251 +                    pname, argv[1], u_errorName(errorCode));
   1.252 +            rc=4;
   1.253 +            goto done;
   1.254 +        }
   1.255 +    }
   1.256 +
   1.257 +    out=fopen(argv[2], "wb");
   1.258 +    if(out==NULL) {
   1.259 +        fprintf(stderr, "%s: unable to open output file \"%s\"\n", pname, argv[2]);
   1.260 +        rc=5;
   1.261 +        goto done;
   1.262 +    }
   1.263 +
   1.264 +    if(length!=(int32_t)fwrite(data, 1, length, out)) {
   1.265 +        fprintf(stderr, "%s: error writing \"%s\"\n", pname, argv[2]);
   1.266 +        rc=6;
   1.267 +        goto done;
   1.268 +    }
   1.269 +
   1.270 +    fclose(out);
   1.271 +    out=NULL;
   1.272 +
   1.273 +    /* all done */
   1.274 +    rc=0;
   1.275 +
   1.276 +done:
   1.277 +    if(in!=NULL) {
   1.278 +        fclose(in);
   1.279 +    }
   1.280 +    if(out!=NULL) {
   1.281 +        fclose(out);
   1.282 +    }
   1.283 +    if(data!=NULL) {
   1.284 +        free(data);
   1.285 +    }
   1.286 +    return rc;
   1.287 +}
   1.288 +
   1.289 +/* swap .dat package files -------------------------------------------------- */
   1.290 +
   1.291 +static int32_t
   1.292 +extractPackageName(const UDataSwapper *ds, const char *filename,
   1.293 +                   char pkg[], int32_t capacity,
   1.294 +                   UErrorCode *pErrorCode) {
   1.295 +    const char *basename;
   1.296 +    int32_t len;
   1.297 +
   1.298 +    if(U_FAILURE(*pErrorCode)) {
   1.299 +        return 0;
   1.300 +    }
   1.301 +
   1.302 +    basename=findBasename(filename);
   1.303 +    len=(int32_t)uprv_strlen(basename)-4; /* -4: subtract the length of ".dat" */
   1.304 +
   1.305 +    if(len<=0 || 0!=uprv_strcmp(basename+len, ".dat")) {
   1.306 +        udata_printError(ds, "udata_swapPackage(): \"%s\" is not recognized as a package filename (must end with .dat)\n",
   1.307 +                         basename);
   1.308 +        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   1.309 +        return 0;
   1.310 +    }
   1.311 +
   1.312 +    if(len>=capacity) {
   1.313 +        udata_printError(ds, "udata_swapPackage(): the package name \"%s\" is too long (>=%ld)\n",
   1.314 +                         (long)capacity);
   1.315 +        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   1.316 +        return 0;
   1.317 +    }
   1.318 +
   1.319 +    uprv_memcpy(pkg, basename, len);
   1.320 +    pkg[len]=0;
   1.321 +    return len;
   1.322 +}
   1.323 +
   1.324 +struct ToCEntry {
   1.325 +    uint32_t nameOffset, inOffset, outOffset, length;
   1.326 +};
   1.327 +
   1.328 +U_CDECL_BEGIN
   1.329 +static int32_t U_CALLCONV
   1.330 +compareToCEntries(const void *context, const void *left, const void *right) {
   1.331 +    const char *chars=(const char *)context;
   1.332 +    return (int32_t)uprv_strcmp(chars+((const ToCEntry *)left)->nameOffset,
   1.333 +                                chars+((const ToCEntry *)right)->nameOffset);
   1.334 +}
   1.335 +U_CDECL_END
   1.336 +
   1.337 +U_CFUNC int32_t U_CALLCONV
   1.338 +udata_swapPackage(const char *inFilename, const char *outFilename,
   1.339 +                  const UDataSwapper *ds,
   1.340 +                  const void *inData, int32_t length, void *outData,
   1.341 +                  UErrorCode *pErrorCode) {
   1.342 +    const UDataInfo *pInfo;
   1.343 +    int32_t headerSize;
   1.344 +
   1.345 +    const uint8_t *inBytes;
   1.346 +    uint8_t *outBytes;
   1.347 +
   1.348 +    uint32_t itemCount, offset, i;
   1.349 +    int32_t itemLength;
   1.350 +
   1.351 +    const UDataOffsetTOCEntry *inEntries;
   1.352 +    UDataOffsetTOCEntry *outEntries;
   1.353 +
   1.354 +    ToCEntry *table;
   1.355 +
   1.356 +    char inPkgName[32], outPkgName[32];
   1.357 +    int32_t inPkgNameLength, outPkgNameLength;
   1.358 +
   1.359 +    /* udata_swapDataHeader checks the arguments */
   1.360 +    headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
   1.361 +    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
   1.362 +        return 0;
   1.363 +    }
   1.364 +
   1.365 +    /* check data format and format version */
   1.366 +    pInfo=(const UDataInfo *)((const char *)inData+4);
   1.367 +    if(!(
   1.368 +        pInfo->dataFormat[0]==0x43 &&   /* dataFormat="CmnD" */
   1.369 +        pInfo->dataFormat[1]==0x6d &&
   1.370 +        pInfo->dataFormat[2]==0x6e &&
   1.371 +        pInfo->dataFormat[3]==0x44 &&
   1.372 +        pInfo->formatVersion[0]==1
   1.373 +    )) {
   1.374 +        udata_printError(ds, "udata_swapPackage(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as an ICU .dat package\n",
   1.375 +                         pInfo->dataFormat[0], pInfo->dataFormat[1],
   1.376 +                         pInfo->dataFormat[2], pInfo->dataFormat[3],
   1.377 +                         pInfo->formatVersion[0]);
   1.378 +        *pErrorCode=U_UNSUPPORTED_ERROR;
   1.379 +        return 0;
   1.380 +    }
   1.381 +
   1.382 +    /*
   1.383 +     * We need to change the ToC name entries so that they have the correct
   1.384 +     * package name prefix.
   1.385 +     * Extract the package names from the in/out filenames.
   1.386 +     */
   1.387 +    inPkgNameLength=extractPackageName(
   1.388 +                        ds, inFilename,
   1.389 +                        inPkgName, (int32_t)sizeof(inPkgName),
   1.390 +                        pErrorCode);
   1.391 +    outPkgNameLength=extractPackageName(
   1.392 +                        ds, outFilename,
   1.393 +                        outPkgName, (int32_t)sizeof(outPkgName),
   1.394 +                        pErrorCode);
   1.395 +    if(U_FAILURE(*pErrorCode)) {
   1.396 +        return 0;
   1.397 +    }
   1.398 +
   1.399 +    /*
   1.400 +     * It is possible to work with inPkgNameLength!=outPkgNameLength,
   1.401 +     * but then the length of the data file would change more significantly,
   1.402 +     * which we are not currently prepared for.
   1.403 +     */
   1.404 +    if(inPkgNameLength!=outPkgNameLength) {
   1.405 +        udata_printError(ds, "udata_swapPackage(): the package names \"%s\" and \"%s\" must have the same length\n",
   1.406 +                         inPkgName, outPkgName);
   1.407 +        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   1.408 +        return 0;
   1.409 +    }
   1.410 +
   1.411 +    inBytes=(const uint8_t *)inData+headerSize;
   1.412 +    inEntries=(const UDataOffsetTOCEntry *)(inBytes+4);
   1.413 +
   1.414 +    if(length<0) {
   1.415 +        /* preflighting */
   1.416 +        itemCount=ds->readUInt32(*(const uint32_t *)inBytes);
   1.417 +        if(itemCount==0) {
   1.418 +            /* no items: count only the item count and return */
   1.419 +            return headerSize+4;
   1.420 +        }
   1.421 +
   1.422 +        /* read the last item's offset and preflight it */
   1.423 +        offset=ds->readUInt32(inEntries[itemCount-1].dataOffset);
   1.424 +        itemLength=udata_swap(ds, inBytes+offset, -1, NULL, pErrorCode);
   1.425 +
   1.426 +        if(U_SUCCESS(*pErrorCode)) {
   1.427 +            return headerSize+offset+(uint32_t)itemLength;
   1.428 +        } else {
   1.429 +            return 0;
   1.430 +        }
   1.431 +    } else {
   1.432 +        /* check that the itemCount fits, then the ToC table, then at least the header of the last item */
   1.433 +        length-=headerSize;
   1.434 +        if(length<4) {
   1.435 +            /* itemCount does not fit */
   1.436 +            offset=0xffffffff;
   1.437 +            itemCount=0; /* make compilers happy */
   1.438 +        } else {
   1.439 +            itemCount=ds->readUInt32(*(const uint32_t *)inBytes);
   1.440 +            if(itemCount==0) {
   1.441 +                offset=4;
   1.442 +            } else if((uint32_t)length<(4+8*itemCount)) {
   1.443 +                /* ToC table does not fit */
   1.444 +                offset=0xffffffff;
   1.445 +            } else {
   1.446 +                /* offset of the last item plus at least 20 bytes for its header */
   1.447 +                offset=20+ds->readUInt32(inEntries[itemCount-1].dataOffset);
   1.448 +            }
   1.449 +        }
   1.450 +        if((uint32_t)length<offset) {
   1.451 +            udata_printError(ds, "udata_swapPackage(): too few bytes (%d after header) for a .dat package\n",
   1.452 +                             length);
   1.453 +            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
   1.454 +            return 0;
   1.455 +        }
   1.456 +
   1.457 +        outBytes=(uint8_t *)outData+headerSize;
   1.458 +
   1.459 +        /* swap the item count */
   1.460 +        ds->swapArray32(ds, inBytes, 4, outBytes, pErrorCode);
   1.461 +
   1.462 +        if(itemCount==0) {
   1.463 +            /* no items: just return now */
   1.464 +            return headerSize+4;
   1.465 +        }
   1.466 +
   1.467 +        /* swap the item name strings */
   1.468 +        offset=4+8*itemCount;
   1.469 +        itemLength=(int32_t)(ds->readUInt32(inEntries[0].dataOffset)-offset);
   1.470 +        udata_swapInvStringBlock(ds, inBytes+offset, itemLength, outBytes+offset, pErrorCode);
   1.471 +        if(U_FAILURE(*pErrorCode)) {
   1.472 +            udata_printError(ds, "udata_swapPackage() failed to swap the data item name strings\n");
   1.473 +            return 0;
   1.474 +        }
   1.475 +        /* keep offset and itemLength in case we allocate and copy the strings below */
   1.476 +
   1.477 +        /* swap the package names into the output charset */
   1.478 +        if(ds->outCharset!=U_CHARSET_FAMILY) {
   1.479 +            UDataSwapper *ds2;
   1.480 +            ds2=udata_openSwapper(TRUE, U_CHARSET_FAMILY, TRUE, ds->outCharset, pErrorCode);
   1.481 +            ds2->swapInvChars(ds2, inPkgName, inPkgNameLength, inPkgName, pErrorCode);
   1.482 +            ds2->swapInvChars(ds2, outPkgName, outPkgNameLength, outPkgName, pErrorCode);
   1.483 +            udata_closeSwapper(ds2);
   1.484 +            if(U_FAILURE(*pErrorCode)) {
   1.485 +                udata_printError(ds, "udata_swapPackage() failed to swap the input/output package names\n");
   1.486 +            }
   1.487 +        }
   1.488 +
   1.489 +        /* change the prefix of each ToC entry name from the old to the new package name */
   1.490 +        {
   1.491 +            char *entryName;
   1.492 +
   1.493 +            for(i=0; i<itemCount; ++i) {
   1.494 +                entryName=(char *)inBytes+ds->readUInt32(inEntries[i].nameOffset);
   1.495 +
   1.496 +                if(0==uprv_memcmp(entryName, inPkgName, inPkgNameLength)) {
   1.497 +                    uprv_memcpy(entryName, outPkgName, inPkgNameLength);
   1.498 +                } else {
   1.499 +                    udata_printError(ds, "udata_swapPackage() failed: ToC item %ld does not have the input package name as a prefix\n",
   1.500 +                                     (long)i);
   1.501 +                    *pErrorCode=U_INVALID_FORMAT_ERROR;
   1.502 +                    return 0;
   1.503 +                }
   1.504 +            }
   1.505 +        }
   1.506 +
   1.507 +        /*
   1.508 +         * Allocate the ToC table and, if necessary, a temporary buffer for
   1.509 +         * pseudo-in-place swapping.
   1.510 +         *
   1.511 +         * We cannot swap in-place because:
   1.512 +         *
   1.513 +         * 1. If the swapping of an item fails mid-way, then in-place swapping
   1.514 +         * has destroyed its data.
   1.515 +         * Out-of-place swapping allows us to then copy its original data.
   1.516 +         *
   1.517 +         * 2. If swapping changes the charset family, then we must resort
   1.518 +         * not only the ToC table but also the data items themselves.
   1.519 +         * This requires a permutation and is best done with separate in/out
   1.520 +         * buffers.
   1.521 +         *
   1.522 +         * We swapped the strings above to avoid the malloc below if string swapping fails.
   1.523 +         */
   1.524 +        if(inData==outData) {
   1.525 +            /* +15: prepare for extra padding of a newly-last item */
   1.526 +            table=(ToCEntry *)uprv_malloc(itemCount*sizeof(ToCEntry)+length+DEFAULT_PADDING_LENGTH);
   1.527 +            if(table!=NULL) {
   1.528 +                outBytes=(uint8_t *)(table+itemCount);
   1.529 +
   1.530 +                /* copy the item count and the swapped strings */
   1.531 +                uprv_memcpy(outBytes, inBytes, 4);
   1.532 +                uprv_memcpy(outBytes+offset, inBytes+offset, itemLength);
   1.533 +            }
   1.534 +        } else {
   1.535 +            table=(ToCEntry *)uprv_malloc(itemCount*sizeof(ToCEntry));
   1.536 +        }
   1.537 +        if(table==NULL) {
   1.538 +            udata_printError(ds, "udata_swapPackage(): out of memory allocating %d bytes\n",
   1.539 +                             inData==outData ?
   1.540 +                                 itemCount*sizeof(ToCEntry)+length+DEFAULT_PADDING_LENGTH :
   1.541 +                                 itemCount*sizeof(ToCEntry));
   1.542 +            *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
   1.543 +            return 0;
   1.544 +        }
   1.545 +        outEntries=(UDataOffsetTOCEntry *)(outBytes+4);
   1.546 +
   1.547 +        /* read the ToC table */
   1.548 +        for(i=0; i<itemCount; ++i) {
   1.549 +            table[i].nameOffset=ds->readUInt32(inEntries[i].nameOffset);
   1.550 +            table[i].inOffset=ds->readUInt32(inEntries[i].dataOffset);
   1.551 +            if(i>0) {
   1.552 +                table[i-1].length=table[i].inOffset-table[i-1].inOffset;
   1.553 +            }
   1.554 +        }
   1.555 +        table[itemCount-1].length=(uint32_t)length-table[itemCount-1].inOffset;
   1.556 +
   1.557 +        if(ds->inCharset==ds->outCharset) {
   1.558 +            /* no charset swapping, no resorting: keep item offsets the same */
   1.559 +            for(i=0; i<itemCount; ++i) {
   1.560 +                table[i].outOffset=table[i].inOffset;
   1.561 +            }
   1.562 +        } else {
   1.563 +            /* charset swapping: resort items by their swapped names */
   1.564 +
   1.565 +            /*
   1.566 +             * Before the actual sorting, we need to make sure that each item
   1.567 +             * has a length that is a multiple of 16 bytes so that all items
   1.568 +             * are 16-aligned.
   1.569 +             * Only the old last item may be missing up to 15 padding bytes.
   1.570 +             * Add padding bytes for it.
   1.571 +             * Since the icuswap main() function has already allocated enough
   1.572 +             * input buffer space and set the last 15 bytes there to 0xaa,
   1.573 +             * we only need to increase the total data length and the length
   1.574 +             * of the last item here.
   1.575 +             */
   1.576 +            if((length&0xf)!=0) {
   1.577 +                int32_t delta=16-(length&0xf);
   1.578 +                length+=delta;
   1.579 +                table[itemCount-1].length+=(uint32_t)delta;
   1.580 +            }
   1.581 +
   1.582 +            /* Save the offset before we sort the TOC. */
   1.583 +            offset=table[0].inOffset;
   1.584 +            /* sort the TOC entries */
   1.585 +            uprv_sortArray(table, (int32_t)itemCount, (int32_t)sizeof(ToCEntry),
   1.586 +                           compareToCEntries, outBytes, FALSE, pErrorCode);
   1.587 +
   1.588 +            /*
   1.589 +             * Note: Before sorting, the inOffset values were in order.
   1.590 +             * Now the outOffset values are in order.
   1.591 +             */
   1.592 +
   1.593 +            /* assign outOffset values */
   1.594 +            for(i=0; i<itemCount; ++i) {
   1.595 +                table[i].outOffset=offset;
   1.596 +                offset+=table[i].length;
   1.597 +            }
   1.598 +        }
   1.599 +
   1.600 +        /* write the output ToC table */
   1.601 +        for(i=0; i<itemCount; ++i) {
   1.602 +            ds->writeUInt32(&outEntries[i].nameOffset, table[i].nameOffset);
   1.603 +            ds->writeUInt32(&outEntries[i].dataOffset, table[i].outOffset);
   1.604 +        }
   1.605 +
   1.606 +        /* swap each data item */
   1.607 +        for(i=0; i<itemCount; ++i) {
   1.608 +            /* first copy the item bytes to make sure that unreachable bytes are copied */ 
   1.609 +            uprv_memcpy(outBytes+table[i].outOffset, inBytes+table[i].inOffset, table[i].length);
   1.610 +
   1.611 +            /* swap the item */
   1.612 +            udata_swap(ds, inBytes+table[i].inOffset, (int32_t)table[i].length,
   1.613 +                          outBytes+table[i].outOffset, pErrorCode);
   1.614 +
   1.615 +            if(U_FAILURE(*pErrorCode)) {
   1.616 +                if(ds->outCharset==U_CHARSET_FAMILY) {
   1.617 +                    udata_printError(ds, "warning: udata_swapPackage() failed to swap item \"%s\"\n"
   1.618 +                                         "    at inOffset 0x%x length 0x%x - %s\n"
   1.619 +                                         "    the data item will be copied, not swapped\n\n",
   1.620 +                                     (char *)outBytes+table[i].nameOffset,
   1.621 +                                     table[i].inOffset, table[i].length, u_errorName(*pErrorCode));
   1.622 +                } else {
   1.623 +                    udata_printError(ds, "warning: udata_swapPackage() failed to swap an item\n"
   1.624 +                                         "    at inOffset 0x%x length 0x%x - %s\n"
   1.625 +                                         "    the data item will be copied, not swapped\n\n",
   1.626 +                                     table[i].inOffset, table[i].length, u_errorName(*pErrorCode));
   1.627 +                }
   1.628 +                /* reset the error code, copy the data item, and continue */
   1.629 +                *pErrorCode=U_ZERO_ERROR;
   1.630 +                uprv_memcpy(outBytes+table[i].outOffset, inBytes+table[i].inOffset, table[i].length);
   1.631 +            }
   1.632 +        }
   1.633 +
   1.634 +        if(inData==outData) {
   1.635 +            /* copy the data from the temporary buffer to the in-place buffer */
   1.636 +            uprv_memcpy((uint8_t *)outData+headerSize, outBytes, length);
   1.637 +        }
   1.638 +        uprv_free(table);
   1.639 +
   1.640 +        return headerSize+length;
   1.641 +    }
   1.642 +}
   1.643 +
   1.644 +/*
   1.645 + * Hey, Emacs, please set the following:
   1.646 + *
   1.647 + * Local Variables:
   1.648 + * indent-tabs-mode: nil
   1.649 + * End:
   1.650 + *
   1.651 + */

mercurial