intl/icu/source/tools/icuswap/icuswap.cpp

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rw-r--r--

Correct previous dual key logic pending first delivery installment.

michael@0 1 /*
michael@0 2 *******************************************************************************
michael@0 3 *
michael@0 4 * Copyright (C) 2003-2007, International Business Machines
michael@0 5 * Corporation and others. All Rights Reserved.
michael@0 6 *
michael@0 7 *******************************************************************************
michael@0 8 * file name: icuswap.cpp
michael@0 9 * encoding: US-ASCII
michael@0 10 * tab size: 8 (not used)
michael@0 11 * indentation:4
michael@0 12 *
michael@0 13 * created on: 2003aug08
michael@0 14 * created by: Markus W. Scherer
michael@0 15 *
michael@0 16 * This tool takes an ICU data file and "swaps" it, that is, changes its
michael@0 17 * platform properties between big-/little-endianness and ASCII/EBCDIC charset
michael@0 18 * families.
michael@0 19 * The modified data file is written to a new file.
michael@0 20 * Useful as an install-time tool for shipping only one flavor of ICU data
michael@0 21 * and preparing data files for the target platform.
michael@0 22 * Will not work with data DLLs (shared libraries).
michael@0 23 */
michael@0 24
michael@0 25 #include "unicode/utypes.h"
michael@0 26 #include "unicode/putil.h"
michael@0 27 #include "unicode/udata.h"
michael@0 28 #include "cmemory.h"
michael@0 29 #include "cstring.h"
michael@0 30 #include "uinvchar.h"
michael@0 31 #include "uarrsort.h"
michael@0 32 #include "ucmndata.h"
michael@0 33 #include "udataswp.h"
michael@0 34 #include "swapimpl.h"
michael@0 35 #include "toolutil.h"
michael@0 36 #include "uoptions.h"
michael@0 37
michael@0 38 #include <stdio.h>
michael@0 39 #include <stdlib.h>
michael@0 40 #include <string.h>
michael@0 41
michael@0 42 /* definitions */
michael@0 43
michael@0 44 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
michael@0 45 #define DEFAULT_PADDING_LENGTH 15
michael@0 46
michael@0 47 static UOption options[]={
michael@0 48 UOPTION_HELP_H,
michael@0 49 UOPTION_HELP_QUESTION_MARK,
michael@0 50 UOPTION_DEF("type", 't', UOPT_REQUIRES_ARG)
michael@0 51 };
michael@0 52
michael@0 53 enum {
michael@0 54 OPT_HELP_H,
michael@0 55 OPT_HELP_QUESTION_MARK,
michael@0 56 OPT_OUT_TYPE
michael@0 57 };
michael@0 58
michael@0 59 static int32_t
michael@0 60 fileSize(FILE *f) {
michael@0 61 int32_t size;
michael@0 62
michael@0 63 fseek(f, 0, SEEK_END);
michael@0 64 size=(int32_t)ftell(f);
michael@0 65 fseek(f, 0, SEEK_SET);
michael@0 66 return size;
michael@0 67 }
michael@0 68
michael@0 69 /**
michael@0 70 * Swap an ICU .dat package, including swapping of enclosed items.
michael@0 71 */
michael@0 72 U_CFUNC int32_t U_CALLCONV
michael@0 73 udata_swapPackage(const char *inFilename, const char *outFilename,
michael@0 74 const UDataSwapper *ds,
michael@0 75 const void *inData, int32_t length, void *outData,
michael@0 76 UErrorCode *pErrorCode);
michael@0 77
michael@0 78 U_CDECL_BEGIN
michael@0 79 static void U_CALLCONV
michael@0 80 printError(void *context, const char *fmt, va_list args) {
michael@0 81 vfprintf((FILE *)context, fmt, args);
michael@0 82 }
michael@0 83 U_CDECL_END
michael@0 84
michael@0 85 static int
michael@0 86 printUsage(const char *pname, UBool ishelp) {
michael@0 87 fprintf(stderr,
michael@0 88 "%csage: %s [ -h, -?, --help ] -tl|-tb|-te|--type=b|... infilename outfilename\n",
michael@0 89 ishelp ? 'U' : 'u', pname);
michael@0 90 if(ishelp) {
michael@0 91 fprintf(stderr,
michael@0 92 "\nOptions: -h, -?, --help print this message and exit\n"
michael@0 93 " Read the input file, swap its platform properties according\n"
michael@0 94 " to the -t or --type option, and write the result to the output file.\n"
michael@0 95 " -tl change to little-endian/ASCII charset family\n"
michael@0 96 " -tb change to big-endian/ASCII charset family\n"
michael@0 97 " -te change to big-endian/EBCDIC charset family\n");
michael@0 98 }
michael@0 99
michael@0 100 return !ishelp;
michael@0 101 }
michael@0 102
michael@0 103 extern int
michael@0 104 main(int argc, char *argv[]) {
michael@0 105 FILE *in, *out;
michael@0 106 const char *pname;
michael@0 107 char *data;
michael@0 108 int32_t length;
michael@0 109 UBool ishelp;
michael@0 110 int rc;
michael@0 111
michael@0 112 UDataSwapper *ds;
michael@0 113 const UDataInfo *pInfo;
michael@0 114 UErrorCode errorCode;
michael@0 115 uint8_t outCharset;
michael@0 116 UBool outIsBigEndian;
michael@0 117
michael@0 118 U_MAIN_INIT_ARGS(argc, argv);
michael@0 119
michael@0 120 fprintf(stderr, "Warning: icuswap is an obsolete tool and it will be removed in the next ICU release.\nPlease use the icupkg tool instead.\n");
michael@0 121
michael@0 122 /* get the program basename */
michael@0 123 pname=strrchr(argv[0], U_FILE_SEP_CHAR);
michael@0 124 if(pname==NULL) {
michael@0 125 pname=strrchr(argv[0], '/');
michael@0 126 }
michael@0 127 if(pname!=NULL) {
michael@0 128 ++pname;
michael@0 129 } else {
michael@0 130 pname=argv[0];
michael@0 131 }
michael@0 132
michael@0 133 argc=u_parseArgs(argc, argv, LENGTHOF(options), options);
michael@0 134 ishelp=options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur;
michael@0 135 if(ishelp || argc!=3) {
michael@0 136 return printUsage(pname, ishelp);
michael@0 137 }
michael@0 138
michael@0 139 /* parse the output type option */
michael@0 140 data=(char *)options[OPT_OUT_TYPE].value;
michael@0 141 if(data[0]==0 || data[1]!=0) {
michael@0 142 /* the type must be exactly one letter */
michael@0 143 return printUsage(pname, FALSE);
michael@0 144 }
michael@0 145 switch(data[0]) {
michael@0 146 case 'l':
michael@0 147 outIsBigEndian=FALSE;
michael@0 148 outCharset=U_ASCII_FAMILY;
michael@0 149 break;
michael@0 150 case 'b':
michael@0 151 outIsBigEndian=TRUE;
michael@0 152 outCharset=U_ASCII_FAMILY;
michael@0 153 break;
michael@0 154 case 'e':
michael@0 155 outIsBigEndian=TRUE;
michael@0 156 outCharset=U_EBCDIC_FAMILY;
michael@0 157 break;
michael@0 158 default:
michael@0 159 return printUsage(pname, FALSE);
michael@0 160 }
michael@0 161
michael@0 162 in=out=NULL;
michael@0 163 data=NULL;
michael@0 164
michael@0 165 /* open the input file, get its length, allocate memory for it, read the file */
michael@0 166 in=fopen(argv[1], "rb");
michael@0 167 if(in==NULL) {
michael@0 168 fprintf(stderr, "%s: unable to open input file \"%s\"\n", pname, argv[1]);
michael@0 169 rc=2;
michael@0 170 goto done;
michael@0 171 }
michael@0 172
michael@0 173 length=fileSize(in);
michael@0 174 if(length<DEFAULT_PADDING_LENGTH) {
michael@0 175 fprintf(stderr, "%s: empty input file \"%s\"\n", pname, argv[1]);
michael@0 176 rc=2;
michael@0 177 goto done;
michael@0 178 }
michael@0 179
michael@0 180 /*
michael@0 181 * +15: udata_swapPackage() may need to add a few padding bytes to the
michael@0 182 * last item if charset swapping is done,
michael@0 183 * because the last item may be resorted into the middle and then needs
michael@0 184 * additional padding bytes
michael@0 185 */
michael@0 186 data=(char *)malloc(length+DEFAULT_PADDING_LENGTH);
michael@0 187 if(data==NULL) {
michael@0 188 fprintf(stderr, "%s: error allocating memory for \"%s\"\n", pname, argv[1]);
michael@0 189 rc=2;
michael@0 190 goto done;
michael@0 191 }
michael@0 192
michael@0 193 /* set the last 15 bytes to the usual padding byte, see udata_swapPackage() */
michael@0 194 uprv_memset(data+length-DEFAULT_PADDING_LENGTH, 0xaa, DEFAULT_PADDING_LENGTH);
michael@0 195
michael@0 196 if(length!=(int32_t)fread(data, 1, length, in)) {
michael@0 197 fprintf(stderr, "%s: error reading \"%s\"\n", pname, argv[1]);
michael@0 198 rc=3;
michael@0 199 goto done;
michael@0 200 }
michael@0 201
michael@0 202 fclose(in);
michael@0 203 in=NULL;
michael@0 204
michael@0 205 /* swap the data in-place */
michael@0 206 errorCode=U_ZERO_ERROR;
michael@0 207 ds=udata_openSwapperForInputData(data, length, outIsBigEndian, outCharset, &errorCode);
michael@0 208 if(U_FAILURE(errorCode)) {
michael@0 209 fprintf(stderr, "%s: udata_openSwapperForInputData(\"%s\") failed - %s\n",
michael@0 210 pname, argv[1], u_errorName(errorCode));
michael@0 211 rc=4;
michael@0 212 goto done;
michael@0 213 }
michael@0 214
michael@0 215 ds->printError=printError;
michael@0 216 ds->printErrorContext=stderr;
michael@0 217
michael@0 218 /* speculative cast, protected by the following length check */
michael@0 219 pInfo=(const UDataInfo *)((const char *)data+4);
michael@0 220
michael@0 221 if( length>=20 &&
michael@0 222 pInfo->dataFormat[0]==0x43 && /* dataFormat="CmnD" */
michael@0 223 pInfo->dataFormat[1]==0x6d &&
michael@0 224 pInfo->dataFormat[2]==0x6e &&
michael@0 225 pInfo->dataFormat[3]==0x44
michael@0 226 ) {
michael@0 227 /*
michael@0 228 * swap the .dat package
michael@0 229 * udata_swapPackage() needs to rename ToC name entries from the old package
michael@0 230 * name to the new one.
michael@0 231 * We pass it the filenames, and udata_swapPackage() will extract the
michael@0 232 * package names.
michael@0 233 */
michael@0 234 length=udata_swapPackage(argv[1], argv[2], ds, data, length, data, &errorCode);
michael@0 235 udata_closeSwapper(ds);
michael@0 236 if(U_FAILURE(errorCode)) {
michael@0 237 fprintf(stderr, "%s: udata_swapPackage(\"%s\") failed - %s\n",
michael@0 238 pname, argv[1], u_errorName(errorCode));
michael@0 239 rc=4;
michael@0 240 goto done;
michael@0 241 }
michael@0 242 } else {
michael@0 243 /* swap the data, which is not a .dat package */
michael@0 244 length=udata_swap(ds, data, length, data, &errorCode);
michael@0 245 udata_closeSwapper(ds);
michael@0 246 if(U_FAILURE(errorCode)) {
michael@0 247 fprintf(stderr, "%s: udata_swap(\"%s\") failed - %s\n",
michael@0 248 pname, argv[1], u_errorName(errorCode));
michael@0 249 rc=4;
michael@0 250 goto done;
michael@0 251 }
michael@0 252 }
michael@0 253
michael@0 254 out=fopen(argv[2], "wb");
michael@0 255 if(out==NULL) {
michael@0 256 fprintf(stderr, "%s: unable to open output file \"%s\"\n", pname, argv[2]);
michael@0 257 rc=5;
michael@0 258 goto done;
michael@0 259 }
michael@0 260
michael@0 261 if(length!=(int32_t)fwrite(data, 1, length, out)) {
michael@0 262 fprintf(stderr, "%s: error writing \"%s\"\n", pname, argv[2]);
michael@0 263 rc=6;
michael@0 264 goto done;
michael@0 265 }
michael@0 266
michael@0 267 fclose(out);
michael@0 268 out=NULL;
michael@0 269
michael@0 270 /* all done */
michael@0 271 rc=0;
michael@0 272
michael@0 273 done:
michael@0 274 if(in!=NULL) {
michael@0 275 fclose(in);
michael@0 276 }
michael@0 277 if(out!=NULL) {
michael@0 278 fclose(out);
michael@0 279 }
michael@0 280 if(data!=NULL) {
michael@0 281 free(data);
michael@0 282 }
michael@0 283 return rc;
michael@0 284 }
michael@0 285
michael@0 286 /* swap .dat package files -------------------------------------------------- */
michael@0 287
michael@0 288 static int32_t
michael@0 289 extractPackageName(const UDataSwapper *ds, const char *filename,
michael@0 290 char pkg[], int32_t capacity,
michael@0 291 UErrorCode *pErrorCode) {
michael@0 292 const char *basename;
michael@0 293 int32_t len;
michael@0 294
michael@0 295 if(U_FAILURE(*pErrorCode)) {
michael@0 296 return 0;
michael@0 297 }
michael@0 298
michael@0 299 basename=findBasename(filename);
michael@0 300 len=(int32_t)uprv_strlen(basename)-4; /* -4: subtract the length of ".dat" */
michael@0 301
michael@0 302 if(len<=0 || 0!=uprv_strcmp(basename+len, ".dat")) {
michael@0 303 udata_printError(ds, "udata_swapPackage(): \"%s\" is not recognized as a package filename (must end with .dat)\n",
michael@0 304 basename);
michael@0 305 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 306 return 0;
michael@0 307 }
michael@0 308
michael@0 309 if(len>=capacity) {
michael@0 310 udata_printError(ds, "udata_swapPackage(): the package name \"%s\" is too long (>=%ld)\n",
michael@0 311 (long)capacity);
michael@0 312 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 313 return 0;
michael@0 314 }
michael@0 315
michael@0 316 uprv_memcpy(pkg, basename, len);
michael@0 317 pkg[len]=0;
michael@0 318 return len;
michael@0 319 }
michael@0 320
michael@0 321 struct ToCEntry {
michael@0 322 uint32_t nameOffset, inOffset, outOffset, length;
michael@0 323 };
michael@0 324
michael@0 325 U_CDECL_BEGIN
michael@0 326 static int32_t U_CALLCONV
michael@0 327 compareToCEntries(const void *context, const void *left, const void *right) {
michael@0 328 const char *chars=(const char *)context;
michael@0 329 return (int32_t)uprv_strcmp(chars+((const ToCEntry *)left)->nameOffset,
michael@0 330 chars+((const ToCEntry *)right)->nameOffset);
michael@0 331 }
michael@0 332 U_CDECL_END
michael@0 333
michael@0 334 U_CFUNC int32_t U_CALLCONV
michael@0 335 udata_swapPackage(const char *inFilename, const char *outFilename,
michael@0 336 const UDataSwapper *ds,
michael@0 337 const void *inData, int32_t length, void *outData,
michael@0 338 UErrorCode *pErrorCode) {
michael@0 339 const UDataInfo *pInfo;
michael@0 340 int32_t headerSize;
michael@0 341
michael@0 342 const uint8_t *inBytes;
michael@0 343 uint8_t *outBytes;
michael@0 344
michael@0 345 uint32_t itemCount, offset, i;
michael@0 346 int32_t itemLength;
michael@0 347
michael@0 348 const UDataOffsetTOCEntry *inEntries;
michael@0 349 UDataOffsetTOCEntry *outEntries;
michael@0 350
michael@0 351 ToCEntry *table;
michael@0 352
michael@0 353 char inPkgName[32], outPkgName[32];
michael@0 354 int32_t inPkgNameLength, outPkgNameLength;
michael@0 355
michael@0 356 /* udata_swapDataHeader checks the arguments */
michael@0 357 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
michael@0 358 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
michael@0 359 return 0;
michael@0 360 }
michael@0 361
michael@0 362 /* check data format and format version */
michael@0 363 pInfo=(const UDataInfo *)((const char *)inData+4);
michael@0 364 if(!(
michael@0 365 pInfo->dataFormat[0]==0x43 && /* dataFormat="CmnD" */
michael@0 366 pInfo->dataFormat[1]==0x6d &&
michael@0 367 pInfo->dataFormat[2]==0x6e &&
michael@0 368 pInfo->dataFormat[3]==0x44 &&
michael@0 369 pInfo->formatVersion[0]==1
michael@0 370 )) {
michael@0 371 udata_printError(ds, "udata_swapPackage(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as an ICU .dat package\n",
michael@0 372 pInfo->dataFormat[0], pInfo->dataFormat[1],
michael@0 373 pInfo->dataFormat[2], pInfo->dataFormat[3],
michael@0 374 pInfo->formatVersion[0]);
michael@0 375 *pErrorCode=U_UNSUPPORTED_ERROR;
michael@0 376 return 0;
michael@0 377 }
michael@0 378
michael@0 379 /*
michael@0 380 * We need to change the ToC name entries so that they have the correct
michael@0 381 * package name prefix.
michael@0 382 * Extract the package names from the in/out filenames.
michael@0 383 */
michael@0 384 inPkgNameLength=extractPackageName(
michael@0 385 ds, inFilename,
michael@0 386 inPkgName, (int32_t)sizeof(inPkgName),
michael@0 387 pErrorCode);
michael@0 388 outPkgNameLength=extractPackageName(
michael@0 389 ds, outFilename,
michael@0 390 outPkgName, (int32_t)sizeof(outPkgName),
michael@0 391 pErrorCode);
michael@0 392 if(U_FAILURE(*pErrorCode)) {
michael@0 393 return 0;
michael@0 394 }
michael@0 395
michael@0 396 /*
michael@0 397 * It is possible to work with inPkgNameLength!=outPkgNameLength,
michael@0 398 * but then the length of the data file would change more significantly,
michael@0 399 * which we are not currently prepared for.
michael@0 400 */
michael@0 401 if(inPkgNameLength!=outPkgNameLength) {
michael@0 402 udata_printError(ds, "udata_swapPackage(): the package names \"%s\" and \"%s\" must have the same length\n",
michael@0 403 inPkgName, outPkgName);
michael@0 404 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 405 return 0;
michael@0 406 }
michael@0 407
michael@0 408 inBytes=(const uint8_t *)inData+headerSize;
michael@0 409 inEntries=(const UDataOffsetTOCEntry *)(inBytes+4);
michael@0 410
michael@0 411 if(length<0) {
michael@0 412 /* preflighting */
michael@0 413 itemCount=ds->readUInt32(*(const uint32_t *)inBytes);
michael@0 414 if(itemCount==0) {
michael@0 415 /* no items: count only the item count and return */
michael@0 416 return headerSize+4;
michael@0 417 }
michael@0 418
michael@0 419 /* read the last item's offset and preflight it */
michael@0 420 offset=ds->readUInt32(inEntries[itemCount-1].dataOffset);
michael@0 421 itemLength=udata_swap(ds, inBytes+offset, -1, NULL, pErrorCode);
michael@0 422
michael@0 423 if(U_SUCCESS(*pErrorCode)) {
michael@0 424 return headerSize+offset+(uint32_t)itemLength;
michael@0 425 } else {
michael@0 426 return 0;
michael@0 427 }
michael@0 428 } else {
michael@0 429 /* check that the itemCount fits, then the ToC table, then at least the header of the last item */
michael@0 430 length-=headerSize;
michael@0 431 if(length<4) {
michael@0 432 /* itemCount does not fit */
michael@0 433 offset=0xffffffff;
michael@0 434 itemCount=0; /* make compilers happy */
michael@0 435 } else {
michael@0 436 itemCount=ds->readUInt32(*(const uint32_t *)inBytes);
michael@0 437 if(itemCount==0) {
michael@0 438 offset=4;
michael@0 439 } else if((uint32_t)length<(4+8*itemCount)) {
michael@0 440 /* ToC table does not fit */
michael@0 441 offset=0xffffffff;
michael@0 442 } else {
michael@0 443 /* offset of the last item plus at least 20 bytes for its header */
michael@0 444 offset=20+ds->readUInt32(inEntries[itemCount-1].dataOffset);
michael@0 445 }
michael@0 446 }
michael@0 447 if((uint32_t)length<offset) {
michael@0 448 udata_printError(ds, "udata_swapPackage(): too few bytes (%d after header) for a .dat package\n",
michael@0 449 length);
michael@0 450 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
michael@0 451 return 0;
michael@0 452 }
michael@0 453
michael@0 454 outBytes=(uint8_t *)outData+headerSize;
michael@0 455
michael@0 456 /* swap the item count */
michael@0 457 ds->swapArray32(ds, inBytes, 4, outBytes, pErrorCode);
michael@0 458
michael@0 459 if(itemCount==0) {
michael@0 460 /* no items: just return now */
michael@0 461 return headerSize+4;
michael@0 462 }
michael@0 463
michael@0 464 /* swap the item name strings */
michael@0 465 offset=4+8*itemCount;
michael@0 466 itemLength=(int32_t)(ds->readUInt32(inEntries[0].dataOffset)-offset);
michael@0 467 udata_swapInvStringBlock(ds, inBytes+offset, itemLength, outBytes+offset, pErrorCode);
michael@0 468 if(U_FAILURE(*pErrorCode)) {
michael@0 469 udata_printError(ds, "udata_swapPackage() failed to swap the data item name strings\n");
michael@0 470 return 0;
michael@0 471 }
michael@0 472 /* keep offset and itemLength in case we allocate and copy the strings below */
michael@0 473
michael@0 474 /* swap the package names into the output charset */
michael@0 475 if(ds->outCharset!=U_CHARSET_FAMILY) {
michael@0 476 UDataSwapper *ds2;
michael@0 477 ds2=udata_openSwapper(TRUE, U_CHARSET_FAMILY, TRUE, ds->outCharset, pErrorCode);
michael@0 478 ds2->swapInvChars(ds2, inPkgName, inPkgNameLength, inPkgName, pErrorCode);
michael@0 479 ds2->swapInvChars(ds2, outPkgName, outPkgNameLength, outPkgName, pErrorCode);
michael@0 480 udata_closeSwapper(ds2);
michael@0 481 if(U_FAILURE(*pErrorCode)) {
michael@0 482 udata_printError(ds, "udata_swapPackage() failed to swap the input/output package names\n");
michael@0 483 }
michael@0 484 }
michael@0 485
michael@0 486 /* change the prefix of each ToC entry name from the old to the new package name */
michael@0 487 {
michael@0 488 char *entryName;
michael@0 489
michael@0 490 for(i=0; i<itemCount; ++i) {
michael@0 491 entryName=(char *)inBytes+ds->readUInt32(inEntries[i].nameOffset);
michael@0 492
michael@0 493 if(0==uprv_memcmp(entryName, inPkgName, inPkgNameLength)) {
michael@0 494 uprv_memcpy(entryName, outPkgName, inPkgNameLength);
michael@0 495 } else {
michael@0 496 udata_printError(ds, "udata_swapPackage() failed: ToC item %ld does not have the input package name as a prefix\n",
michael@0 497 (long)i);
michael@0 498 *pErrorCode=U_INVALID_FORMAT_ERROR;
michael@0 499 return 0;
michael@0 500 }
michael@0 501 }
michael@0 502 }
michael@0 503
michael@0 504 /*
michael@0 505 * Allocate the ToC table and, if necessary, a temporary buffer for
michael@0 506 * pseudo-in-place swapping.
michael@0 507 *
michael@0 508 * We cannot swap in-place because:
michael@0 509 *
michael@0 510 * 1. If the swapping of an item fails mid-way, then in-place swapping
michael@0 511 * has destroyed its data.
michael@0 512 * Out-of-place swapping allows us to then copy its original data.
michael@0 513 *
michael@0 514 * 2. If swapping changes the charset family, then we must resort
michael@0 515 * not only the ToC table but also the data items themselves.
michael@0 516 * This requires a permutation and is best done with separate in/out
michael@0 517 * buffers.
michael@0 518 *
michael@0 519 * We swapped the strings above to avoid the malloc below if string swapping fails.
michael@0 520 */
michael@0 521 if(inData==outData) {
michael@0 522 /* +15: prepare for extra padding of a newly-last item */
michael@0 523 table=(ToCEntry *)uprv_malloc(itemCount*sizeof(ToCEntry)+length+DEFAULT_PADDING_LENGTH);
michael@0 524 if(table!=NULL) {
michael@0 525 outBytes=(uint8_t *)(table+itemCount);
michael@0 526
michael@0 527 /* copy the item count and the swapped strings */
michael@0 528 uprv_memcpy(outBytes, inBytes, 4);
michael@0 529 uprv_memcpy(outBytes+offset, inBytes+offset, itemLength);
michael@0 530 }
michael@0 531 } else {
michael@0 532 table=(ToCEntry *)uprv_malloc(itemCount*sizeof(ToCEntry));
michael@0 533 }
michael@0 534 if(table==NULL) {
michael@0 535 udata_printError(ds, "udata_swapPackage(): out of memory allocating %d bytes\n",
michael@0 536 inData==outData ?
michael@0 537 itemCount*sizeof(ToCEntry)+length+DEFAULT_PADDING_LENGTH :
michael@0 538 itemCount*sizeof(ToCEntry));
michael@0 539 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
michael@0 540 return 0;
michael@0 541 }
michael@0 542 outEntries=(UDataOffsetTOCEntry *)(outBytes+4);
michael@0 543
michael@0 544 /* read the ToC table */
michael@0 545 for(i=0; i<itemCount; ++i) {
michael@0 546 table[i].nameOffset=ds->readUInt32(inEntries[i].nameOffset);
michael@0 547 table[i].inOffset=ds->readUInt32(inEntries[i].dataOffset);
michael@0 548 if(i>0) {
michael@0 549 table[i-1].length=table[i].inOffset-table[i-1].inOffset;
michael@0 550 }
michael@0 551 }
michael@0 552 table[itemCount-1].length=(uint32_t)length-table[itemCount-1].inOffset;
michael@0 553
michael@0 554 if(ds->inCharset==ds->outCharset) {
michael@0 555 /* no charset swapping, no resorting: keep item offsets the same */
michael@0 556 for(i=0; i<itemCount; ++i) {
michael@0 557 table[i].outOffset=table[i].inOffset;
michael@0 558 }
michael@0 559 } else {
michael@0 560 /* charset swapping: resort items by their swapped names */
michael@0 561
michael@0 562 /*
michael@0 563 * Before the actual sorting, we need to make sure that each item
michael@0 564 * has a length that is a multiple of 16 bytes so that all items
michael@0 565 * are 16-aligned.
michael@0 566 * Only the old last item may be missing up to 15 padding bytes.
michael@0 567 * Add padding bytes for it.
michael@0 568 * Since the icuswap main() function has already allocated enough
michael@0 569 * input buffer space and set the last 15 bytes there to 0xaa,
michael@0 570 * we only need to increase the total data length and the length
michael@0 571 * of the last item here.
michael@0 572 */
michael@0 573 if((length&0xf)!=0) {
michael@0 574 int32_t delta=16-(length&0xf);
michael@0 575 length+=delta;
michael@0 576 table[itemCount-1].length+=(uint32_t)delta;
michael@0 577 }
michael@0 578
michael@0 579 /* Save the offset before we sort the TOC. */
michael@0 580 offset=table[0].inOffset;
michael@0 581 /* sort the TOC entries */
michael@0 582 uprv_sortArray(table, (int32_t)itemCount, (int32_t)sizeof(ToCEntry),
michael@0 583 compareToCEntries, outBytes, FALSE, pErrorCode);
michael@0 584
michael@0 585 /*
michael@0 586 * Note: Before sorting, the inOffset values were in order.
michael@0 587 * Now the outOffset values are in order.
michael@0 588 */
michael@0 589
michael@0 590 /* assign outOffset values */
michael@0 591 for(i=0; i<itemCount; ++i) {
michael@0 592 table[i].outOffset=offset;
michael@0 593 offset+=table[i].length;
michael@0 594 }
michael@0 595 }
michael@0 596
michael@0 597 /* write the output ToC table */
michael@0 598 for(i=0; i<itemCount; ++i) {
michael@0 599 ds->writeUInt32(&outEntries[i].nameOffset, table[i].nameOffset);
michael@0 600 ds->writeUInt32(&outEntries[i].dataOffset, table[i].outOffset);
michael@0 601 }
michael@0 602
michael@0 603 /* swap each data item */
michael@0 604 for(i=0; i<itemCount; ++i) {
michael@0 605 /* first copy the item bytes to make sure that unreachable bytes are copied */
michael@0 606 uprv_memcpy(outBytes+table[i].outOffset, inBytes+table[i].inOffset, table[i].length);
michael@0 607
michael@0 608 /* swap the item */
michael@0 609 udata_swap(ds, inBytes+table[i].inOffset, (int32_t)table[i].length,
michael@0 610 outBytes+table[i].outOffset, pErrorCode);
michael@0 611
michael@0 612 if(U_FAILURE(*pErrorCode)) {
michael@0 613 if(ds->outCharset==U_CHARSET_FAMILY) {
michael@0 614 udata_printError(ds, "warning: udata_swapPackage() failed to swap item \"%s\"\n"
michael@0 615 " at inOffset 0x%x length 0x%x - %s\n"
michael@0 616 " the data item will be copied, not swapped\n\n",
michael@0 617 (char *)outBytes+table[i].nameOffset,
michael@0 618 table[i].inOffset, table[i].length, u_errorName(*pErrorCode));
michael@0 619 } else {
michael@0 620 udata_printError(ds, "warning: udata_swapPackage() failed to swap an item\n"
michael@0 621 " at inOffset 0x%x length 0x%x - %s\n"
michael@0 622 " the data item will be copied, not swapped\n\n",
michael@0 623 table[i].inOffset, table[i].length, u_errorName(*pErrorCode));
michael@0 624 }
michael@0 625 /* reset the error code, copy the data item, and continue */
michael@0 626 *pErrorCode=U_ZERO_ERROR;
michael@0 627 uprv_memcpy(outBytes+table[i].outOffset, inBytes+table[i].inOffset, table[i].length);
michael@0 628 }
michael@0 629 }
michael@0 630
michael@0 631 if(inData==outData) {
michael@0 632 /* copy the data from the temporary buffer to the in-place buffer */
michael@0 633 uprv_memcpy((uint8_t *)outData+headerSize, outBytes, length);
michael@0 634 }
michael@0 635 uprv_free(table);
michael@0 636
michael@0 637 return headerSize+length;
michael@0 638 }
michael@0 639 }
michael@0 640
michael@0 641 /*
michael@0 642 * Hey, Emacs, please set the following:
michael@0 643 *
michael@0 644 * Local Variables:
michael@0 645 * indent-tabs-mode: nil
michael@0 646 * End:
michael@0 647 *
michael@0 648 */

mercurial