Wed, 31 Dec 2014 07:22:50 +0100
Correct previous dual key logic pending first delivery installment.
michael@0 | 1 | /* |
michael@0 | 2 | ******************************************************************************* |
michael@0 | 3 | * |
michael@0 | 4 | * Copyright (C) 2003-2007, International Business Machines |
michael@0 | 5 | * Corporation and others. All Rights Reserved. |
michael@0 | 6 | * |
michael@0 | 7 | ******************************************************************************* |
michael@0 | 8 | * file name: icuswap.cpp |
michael@0 | 9 | * encoding: US-ASCII |
michael@0 | 10 | * tab size: 8 (not used) |
michael@0 | 11 | * indentation:4 |
michael@0 | 12 | * |
michael@0 | 13 | * created on: 2003aug08 |
michael@0 | 14 | * created by: Markus W. Scherer |
michael@0 | 15 | * |
michael@0 | 16 | * This tool takes an ICU data file and "swaps" it, that is, changes its |
michael@0 | 17 | * platform properties between big-/little-endianness and ASCII/EBCDIC charset |
michael@0 | 18 | * families. |
michael@0 | 19 | * The modified data file is written to a new file. |
michael@0 | 20 | * Useful as an install-time tool for shipping only one flavor of ICU data |
michael@0 | 21 | * and preparing data files for the target platform. |
michael@0 | 22 | * Will not work with data DLLs (shared libraries). |
michael@0 | 23 | */ |
michael@0 | 24 | |
michael@0 | 25 | #include "unicode/utypes.h" |
michael@0 | 26 | #include "unicode/putil.h" |
michael@0 | 27 | #include "unicode/udata.h" |
michael@0 | 28 | #include "cmemory.h" |
michael@0 | 29 | #include "cstring.h" |
michael@0 | 30 | #include "uinvchar.h" |
michael@0 | 31 | #include "uarrsort.h" |
michael@0 | 32 | #include "ucmndata.h" |
michael@0 | 33 | #include "udataswp.h" |
michael@0 | 34 | #include "swapimpl.h" |
michael@0 | 35 | #include "toolutil.h" |
michael@0 | 36 | #include "uoptions.h" |
michael@0 | 37 | |
michael@0 | 38 | #include <stdio.h> |
michael@0 | 39 | #include <stdlib.h> |
michael@0 | 40 | #include <string.h> |
michael@0 | 41 | |
michael@0 | 42 | /* definitions */ |
michael@0 | 43 | |
michael@0 | 44 | #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) |
michael@0 | 45 | #define DEFAULT_PADDING_LENGTH 15 |
michael@0 | 46 | |
michael@0 | 47 | static UOption options[]={ |
michael@0 | 48 | UOPTION_HELP_H, |
michael@0 | 49 | UOPTION_HELP_QUESTION_MARK, |
michael@0 | 50 | UOPTION_DEF("type", 't', UOPT_REQUIRES_ARG) |
michael@0 | 51 | }; |
michael@0 | 52 | |
michael@0 | 53 | enum { |
michael@0 | 54 | OPT_HELP_H, |
michael@0 | 55 | OPT_HELP_QUESTION_MARK, |
michael@0 | 56 | OPT_OUT_TYPE |
michael@0 | 57 | }; |
michael@0 | 58 | |
michael@0 | 59 | static int32_t |
michael@0 | 60 | fileSize(FILE *f) { |
michael@0 | 61 | int32_t size; |
michael@0 | 62 | |
michael@0 | 63 | fseek(f, 0, SEEK_END); |
michael@0 | 64 | size=(int32_t)ftell(f); |
michael@0 | 65 | fseek(f, 0, SEEK_SET); |
michael@0 | 66 | return size; |
michael@0 | 67 | } |
michael@0 | 68 | |
michael@0 | 69 | /** |
michael@0 | 70 | * Swap an ICU .dat package, including swapping of enclosed items. |
michael@0 | 71 | */ |
michael@0 | 72 | U_CFUNC int32_t U_CALLCONV |
michael@0 | 73 | udata_swapPackage(const char *inFilename, const char *outFilename, |
michael@0 | 74 | const UDataSwapper *ds, |
michael@0 | 75 | const void *inData, int32_t length, void *outData, |
michael@0 | 76 | UErrorCode *pErrorCode); |
michael@0 | 77 | |
michael@0 | 78 | U_CDECL_BEGIN |
michael@0 | 79 | static void U_CALLCONV |
michael@0 | 80 | printError(void *context, const char *fmt, va_list args) { |
michael@0 | 81 | vfprintf((FILE *)context, fmt, args); |
michael@0 | 82 | } |
michael@0 | 83 | U_CDECL_END |
michael@0 | 84 | |
michael@0 | 85 | static int |
michael@0 | 86 | printUsage(const char *pname, UBool ishelp) { |
michael@0 | 87 | fprintf(stderr, |
michael@0 | 88 | "%csage: %s [ -h, -?, --help ] -tl|-tb|-te|--type=b|... infilename outfilename\n", |
michael@0 | 89 | ishelp ? 'U' : 'u', pname); |
michael@0 | 90 | if(ishelp) { |
michael@0 | 91 | fprintf(stderr, |
michael@0 | 92 | "\nOptions: -h, -?, --help print this message and exit\n" |
michael@0 | 93 | " Read the input file, swap its platform properties according\n" |
michael@0 | 94 | " to the -t or --type option, and write the result to the output file.\n" |
michael@0 | 95 | " -tl change to little-endian/ASCII charset family\n" |
michael@0 | 96 | " -tb change to big-endian/ASCII charset family\n" |
michael@0 | 97 | " -te change to big-endian/EBCDIC charset family\n"); |
michael@0 | 98 | } |
michael@0 | 99 | |
michael@0 | 100 | return !ishelp; |
michael@0 | 101 | } |
michael@0 | 102 | |
michael@0 | 103 | extern int |
michael@0 | 104 | main(int argc, char *argv[]) { |
michael@0 | 105 | FILE *in, *out; |
michael@0 | 106 | const char *pname; |
michael@0 | 107 | char *data; |
michael@0 | 108 | int32_t length; |
michael@0 | 109 | UBool ishelp; |
michael@0 | 110 | int rc; |
michael@0 | 111 | |
michael@0 | 112 | UDataSwapper *ds; |
michael@0 | 113 | const UDataInfo *pInfo; |
michael@0 | 114 | UErrorCode errorCode; |
michael@0 | 115 | uint8_t outCharset; |
michael@0 | 116 | UBool outIsBigEndian; |
michael@0 | 117 | |
michael@0 | 118 | U_MAIN_INIT_ARGS(argc, argv); |
michael@0 | 119 | |
michael@0 | 120 | fprintf(stderr, "Warning: icuswap is an obsolete tool and it will be removed in the next ICU release.\nPlease use the icupkg tool instead.\n"); |
michael@0 | 121 | |
michael@0 | 122 | /* get the program basename */ |
michael@0 | 123 | pname=strrchr(argv[0], U_FILE_SEP_CHAR); |
michael@0 | 124 | if(pname==NULL) { |
michael@0 | 125 | pname=strrchr(argv[0], '/'); |
michael@0 | 126 | } |
michael@0 | 127 | if(pname!=NULL) { |
michael@0 | 128 | ++pname; |
michael@0 | 129 | } else { |
michael@0 | 130 | pname=argv[0]; |
michael@0 | 131 | } |
michael@0 | 132 | |
michael@0 | 133 | argc=u_parseArgs(argc, argv, LENGTHOF(options), options); |
michael@0 | 134 | ishelp=options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur; |
michael@0 | 135 | if(ishelp || argc!=3) { |
michael@0 | 136 | return printUsage(pname, ishelp); |
michael@0 | 137 | } |
michael@0 | 138 | |
michael@0 | 139 | /* parse the output type option */ |
michael@0 | 140 | data=(char *)options[OPT_OUT_TYPE].value; |
michael@0 | 141 | if(data[0]==0 || data[1]!=0) { |
michael@0 | 142 | /* the type must be exactly one letter */ |
michael@0 | 143 | return printUsage(pname, FALSE); |
michael@0 | 144 | } |
michael@0 | 145 | switch(data[0]) { |
michael@0 | 146 | case 'l': |
michael@0 | 147 | outIsBigEndian=FALSE; |
michael@0 | 148 | outCharset=U_ASCII_FAMILY; |
michael@0 | 149 | break; |
michael@0 | 150 | case 'b': |
michael@0 | 151 | outIsBigEndian=TRUE; |
michael@0 | 152 | outCharset=U_ASCII_FAMILY; |
michael@0 | 153 | break; |
michael@0 | 154 | case 'e': |
michael@0 | 155 | outIsBigEndian=TRUE; |
michael@0 | 156 | outCharset=U_EBCDIC_FAMILY; |
michael@0 | 157 | break; |
michael@0 | 158 | default: |
michael@0 | 159 | return printUsage(pname, FALSE); |
michael@0 | 160 | } |
michael@0 | 161 | |
michael@0 | 162 | in=out=NULL; |
michael@0 | 163 | data=NULL; |
michael@0 | 164 | |
michael@0 | 165 | /* open the input file, get its length, allocate memory for it, read the file */ |
michael@0 | 166 | in=fopen(argv[1], "rb"); |
michael@0 | 167 | if(in==NULL) { |
michael@0 | 168 | fprintf(stderr, "%s: unable to open input file \"%s\"\n", pname, argv[1]); |
michael@0 | 169 | rc=2; |
michael@0 | 170 | goto done; |
michael@0 | 171 | } |
michael@0 | 172 | |
michael@0 | 173 | length=fileSize(in); |
michael@0 | 174 | if(length<DEFAULT_PADDING_LENGTH) { |
michael@0 | 175 | fprintf(stderr, "%s: empty input file \"%s\"\n", pname, argv[1]); |
michael@0 | 176 | rc=2; |
michael@0 | 177 | goto done; |
michael@0 | 178 | } |
michael@0 | 179 | |
michael@0 | 180 | /* |
michael@0 | 181 | * +15: udata_swapPackage() may need to add a few padding bytes to the |
michael@0 | 182 | * last item if charset swapping is done, |
michael@0 | 183 | * because the last item may be resorted into the middle and then needs |
michael@0 | 184 | * additional padding bytes |
michael@0 | 185 | */ |
michael@0 | 186 | data=(char *)malloc(length+DEFAULT_PADDING_LENGTH); |
michael@0 | 187 | if(data==NULL) { |
michael@0 | 188 | fprintf(stderr, "%s: error allocating memory for \"%s\"\n", pname, argv[1]); |
michael@0 | 189 | rc=2; |
michael@0 | 190 | goto done; |
michael@0 | 191 | } |
michael@0 | 192 | |
michael@0 | 193 | /* set the last 15 bytes to the usual padding byte, see udata_swapPackage() */ |
michael@0 | 194 | uprv_memset(data+length-DEFAULT_PADDING_LENGTH, 0xaa, DEFAULT_PADDING_LENGTH); |
michael@0 | 195 | |
michael@0 | 196 | if(length!=(int32_t)fread(data, 1, length, in)) { |
michael@0 | 197 | fprintf(stderr, "%s: error reading \"%s\"\n", pname, argv[1]); |
michael@0 | 198 | rc=3; |
michael@0 | 199 | goto done; |
michael@0 | 200 | } |
michael@0 | 201 | |
michael@0 | 202 | fclose(in); |
michael@0 | 203 | in=NULL; |
michael@0 | 204 | |
michael@0 | 205 | /* swap the data in-place */ |
michael@0 | 206 | errorCode=U_ZERO_ERROR; |
michael@0 | 207 | ds=udata_openSwapperForInputData(data, length, outIsBigEndian, outCharset, &errorCode); |
michael@0 | 208 | if(U_FAILURE(errorCode)) { |
michael@0 | 209 | fprintf(stderr, "%s: udata_openSwapperForInputData(\"%s\") failed - %s\n", |
michael@0 | 210 | pname, argv[1], u_errorName(errorCode)); |
michael@0 | 211 | rc=4; |
michael@0 | 212 | goto done; |
michael@0 | 213 | } |
michael@0 | 214 | |
michael@0 | 215 | ds->printError=printError; |
michael@0 | 216 | ds->printErrorContext=stderr; |
michael@0 | 217 | |
michael@0 | 218 | /* speculative cast, protected by the following length check */ |
michael@0 | 219 | pInfo=(const UDataInfo *)((const char *)data+4); |
michael@0 | 220 | |
michael@0 | 221 | if( length>=20 && |
michael@0 | 222 | pInfo->dataFormat[0]==0x43 && /* dataFormat="CmnD" */ |
michael@0 | 223 | pInfo->dataFormat[1]==0x6d && |
michael@0 | 224 | pInfo->dataFormat[2]==0x6e && |
michael@0 | 225 | pInfo->dataFormat[3]==0x44 |
michael@0 | 226 | ) { |
michael@0 | 227 | /* |
michael@0 | 228 | * swap the .dat package |
michael@0 | 229 | * udata_swapPackage() needs to rename ToC name entries from the old package |
michael@0 | 230 | * name to the new one. |
michael@0 | 231 | * We pass it the filenames, and udata_swapPackage() will extract the |
michael@0 | 232 | * package names. |
michael@0 | 233 | */ |
michael@0 | 234 | length=udata_swapPackage(argv[1], argv[2], ds, data, length, data, &errorCode); |
michael@0 | 235 | udata_closeSwapper(ds); |
michael@0 | 236 | if(U_FAILURE(errorCode)) { |
michael@0 | 237 | fprintf(stderr, "%s: udata_swapPackage(\"%s\") failed - %s\n", |
michael@0 | 238 | pname, argv[1], u_errorName(errorCode)); |
michael@0 | 239 | rc=4; |
michael@0 | 240 | goto done; |
michael@0 | 241 | } |
michael@0 | 242 | } else { |
michael@0 | 243 | /* swap the data, which is not a .dat package */ |
michael@0 | 244 | length=udata_swap(ds, data, length, data, &errorCode); |
michael@0 | 245 | udata_closeSwapper(ds); |
michael@0 | 246 | if(U_FAILURE(errorCode)) { |
michael@0 | 247 | fprintf(stderr, "%s: udata_swap(\"%s\") failed - %s\n", |
michael@0 | 248 | pname, argv[1], u_errorName(errorCode)); |
michael@0 | 249 | rc=4; |
michael@0 | 250 | goto done; |
michael@0 | 251 | } |
michael@0 | 252 | } |
michael@0 | 253 | |
michael@0 | 254 | out=fopen(argv[2], "wb"); |
michael@0 | 255 | if(out==NULL) { |
michael@0 | 256 | fprintf(stderr, "%s: unable to open output file \"%s\"\n", pname, argv[2]); |
michael@0 | 257 | rc=5; |
michael@0 | 258 | goto done; |
michael@0 | 259 | } |
michael@0 | 260 | |
michael@0 | 261 | if(length!=(int32_t)fwrite(data, 1, length, out)) { |
michael@0 | 262 | fprintf(stderr, "%s: error writing \"%s\"\n", pname, argv[2]); |
michael@0 | 263 | rc=6; |
michael@0 | 264 | goto done; |
michael@0 | 265 | } |
michael@0 | 266 | |
michael@0 | 267 | fclose(out); |
michael@0 | 268 | out=NULL; |
michael@0 | 269 | |
michael@0 | 270 | /* all done */ |
michael@0 | 271 | rc=0; |
michael@0 | 272 | |
michael@0 | 273 | done: |
michael@0 | 274 | if(in!=NULL) { |
michael@0 | 275 | fclose(in); |
michael@0 | 276 | } |
michael@0 | 277 | if(out!=NULL) { |
michael@0 | 278 | fclose(out); |
michael@0 | 279 | } |
michael@0 | 280 | if(data!=NULL) { |
michael@0 | 281 | free(data); |
michael@0 | 282 | } |
michael@0 | 283 | return rc; |
michael@0 | 284 | } |
michael@0 | 285 | |
michael@0 | 286 | /* swap .dat package files -------------------------------------------------- */ |
michael@0 | 287 | |
michael@0 | 288 | static int32_t |
michael@0 | 289 | extractPackageName(const UDataSwapper *ds, const char *filename, |
michael@0 | 290 | char pkg[], int32_t capacity, |
michael@0 | 291 | UErrorCode *pErrorCode) { |
michael@0 | 292 | const char *basename; |
michael@0 | 293 | int32_t len; |
michael@0 | 294 | |
michael@0 | 295 | if(U_FAILURE(*pErrorCode)) { |
michael@0 | 296 | return 0; |
michael@0 | 297 | } |
michael@0 | 298 | |
michael@0 | 299 | basename=findBasename(filename); |
michael@0 | 300 | len=(int32_t)uprv_strlen(basename)-4; /* -4: subtract the length of ".dat" */ |
michael@0 | 301 | |
michael@0 | 302 | if(len<=0 || 0!=uprv_strcmp(basename+len, ".dat")) { |
michael@0 | 303 | udata_printError(ds, "udata_swapPackage(): \"%s\" is not recognized as a package filename (must end with .dat)\n", |
michael@0 | 304 | basename); |
michael@0 | 305 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
michael@0 | 306 | return 0; |
michael@0 | 307 | } |
michael@0 | 308 | |
michael@0 | 309 | if(len>=capacity) { |
michael@0 | 310 | udata_printError(ds, "udata_swapPackage(): the package name \"%s\" is too long (>=%ld)\n", |
michael@0 | 311 | (long)capacity); |
michael@0 | 312 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
michael@0 | 313 | return 0; |
michael@0 | 314 | } |
michael@0 | 315 | |
michael@0 | 316 | uprv_memcpy(pkg, basename, len); |
michael@0 | 317 | pkg[len]=0; |
michael@0 | 318 | return len; |
michael@0 | 319 | } |
michael@0 | 320 | |
michael@0 | 321 | struct ToCEntry { |
michael@0 | 322 | uint32_t nameOffset, inOffset, outOffset, length; |
michael@0 | 323 | }; |
michael@0 | 324 | |
michael@0 | 325 | U_CDECL_BEGIN |
michael@0 | 326 | static int32_t U_CALLCONV |
michael@0 | 327 | compareToCEntries(const void *context, const void *left, const void *right) { |
michael@0 | 328 | const char *chars=(const char *)context; |
michael@0 | 329 | return (int32_t)uprv_strcmp(chars+((const ToCEntry *)left)->nameOffset, |
michael@0 | 330 | chars+((const ToCEntry *)right)->nameOffset); |
michael@0 | 331 | } |
michael@0 | 332 | U_CDECL_END |
michael@0 | 333 | |
michael@0 | 334 | U_CFUNC int32_t U_CALLCONV |
michael@0 | 335 | udata_swapPackage(const char *inFilename, const char *outFilename, |
michael@0 | 336 | const UDataSwapper *ds, |
michael@0 | 337 | const void *inData, int32_t length, void *outData, |
michael@0 | 338 | UErrorCode *pErrorCode) { |
michael@0 | 339 | const UDataInfo *pInfo; |
michael@0 | 340 | int32_t headerSize; |
michael@0 | 341 | |
michael@0 | 342 | const uint8_t *inBytes; |
michael@0 | 343 | uint8_t *outBytes; |
michael@0 | 344 | |
michael@0 | 345 | uint32_t itemCount, offset, i; |
michael@0 | 346 | int32_t itemLength; |
michael@0 | 347 | |
michael@0 | 348 | const UDataOffsetTOCEntry *inEntries; |
michael@0 | 349 | UDataOffsetTOCEntry *outEntries; |
michael@0 | 350 | |
michael@0 | 351 | ToCEntry *table; |
michael@0 | 352 | |
michael@0 | 353 | char inPkgName[32], outPkgName[32]; |
michael@0 | 354 | int32_t inPkgNameLength, outPkgNameLength; |
michael@0 | 355 | |
michael@0 | 356 | /* udata_swapDataHeader checks the arguments */ |
michael@0 | 357 | headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); |
michael@0 | 358 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
michael@0 | 359 | return 0; |
michael@0 | 360 | } |
michael@0 | 361 | |
michael@0 | 362 | /* check data format and format version */ |
michael@0 | 363 | pInfo=(const UDataInfo *)((const char *)inData+4); |
michael@0 | 364 | if(!( |
michael@0 | 365 | pInfo->dataFormat[0]==0x43 && /* dataFormat="CmnD" */ |
michael@0 | 366 | pInfo->dataFormat[1]==0x6d && |
michael@0 | 367 | pInfo->dataFormat[2]==0x6e && |
michael@0 | 368 | pInfo->dataFormat[3]==0x44 && |
michael@0 | 369 | pInfo->formatVersion[0]==1 |
michael@0 | 370 | )) { |
michael@0 | 371 | udata_printError(ds, "udata_swapPackage(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as an ICU .dat package\n", |
michael@0 | 372 | pInfo->dataFormat[0], pInfo->dataFormat[1], |
michael@0 | 373 | pInfo->dataFormat[2], pInfo->dataFormat[3], |
michael@0 | 374 | pInfo->formatVersion[0]); |
michael@0 | 375 | *pErrorCode=U_UNSUPPORTED_ERROR; |
michael@0 | 376 | return 0; |
michael@0 | 377 | } |
michael@0 | 378 | |
michael@0 | 379 | /* |
michael@0 | 380 | * We need to change the ToC name entries so that they have the correct |
michael@0 | 381 | * package name prefix. |
michael@0 | 382 | * Extract the package names from the in/out filenames. |
michael@0 | 383 | */ |
michael@0 | 384 | inPkgNameLength=extractPackageName( |
michael@0 | 385 | ds, inFilename, |
michael@0 | 386 | inPkgName, (int32_t)sizeof(inPkgName), |
michael@0 | 387 | pErrorCode); |
michael@0 | 388 | outPkgNameLength=extractPackageName( |
michael@0 | 389 | ds, outFilename, |
michael@0 | 390 | outPkgName, (int32_t)sizeof(outPkgName), |
michael@0 | 391 | pErrorCode); |
michael@0 | 392 | if(U_FAILURE(*pErrorCode)) { |
michael@0 | 393 | return 0; |
michael@0 | 394 | } |
michael@0 | 395 | |
michael@0 | 396 | /* |
michael@0 | 397 | * It is possible to work with inPkgNameLength!=outPkgNameLength, |
michael@0 | 398 | * but then the length of the data file would change more significantly, |
michael@0 | 399 | * which we are not currently prepared for. |
michael@0 | 400 | */ |
michael@0 | 401 | if(inPkgNameLength!=outPkgNameLength) { |
michael@0 | 402 | udata_printError(ds, "udata_swapPackage(): the package names \"%s\" and \"%s\" must have the same length\n", |
michael@0 | 403 | inPkgName, outPkgName); |
michael@0 | 404 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
michael@0 | 405 | return 0; |
michael@0 | 406 | } |
michael@0 | 407 | |
michael@0 | 408 | inBytes=(const uint8_t *)inData+headerSize; |
michael@0 | 409 | inEntries=(const UDataOffsetTOCEntry *)(inBytes+4); |
michael@0 | 410 | |
michael@0 | 411 | if(length<0) { |
michael@0 | 412 | /* preflighting */ |
michael@0 | 413 | itemCount=ds->readUInt32(*(const uint32_t *)inBytes); |
michael@0 | 414 | if(itemCount==0) { |
michael@0 | 415 | /* no items: count only the item count and return */ |
michael@0 | 416 | return headerSize+4; |
michael@0 | 417 | } |
michael@0 | 418 | |
michael@0 | 419 | /* read the last item's offset and preflight it */ |
michael@0 | 420 | offset=ds->readUInt32(inEntries[itemCount-1].dataOffset); |
michael@0 | 421 | itemLength=udata_swap(ds, inBytes+offset, -1, NULL, pErrorCode); |
michael@0 | 422 | |
michael@0 | 423 | if(U_SUCCESS(*pErrorCode)) { |
michael@0 | 424 | return headerSize+offset+(uint32_t)itemLength; |
michael@0 | 425 | } else { |
michael@0 | 426 | return 0; |
michael@0 | 427 | } |
michael@0 | 428 | } else { |
michael@0 | 429 | /* check that the itemCount fits, then the ToC table, then at least the header of the last item */ |
michael@0 | 430 | length-=headerSize; |
michael@0 | 431 | if(length<4) { |
michael@0 | 432 | /* itemCount does not fit */ |
michael@0 | 433 | offset=0xffffffff; |
michael@0 | 434 | itemCount=0; /* make compilers happy */ |
michael@0 | 435 | } else { |
michael@0 | 436 | itemCount=ds->readUInt32(*(const uint32_t *)inBytes); |
michael@0 | 437 | if(itemCount==0) { |
michael@0 | 438 | offset=4; |
michael@0 | 439 | } else if((uint32_t)length<(4+8*itemCount)) { |
michael@0 | 440 | /* ToC table does not fit */ |
michael@0 | 441 | offset=0xffffffff; |
michael@0 | 442 | } else { |
michael@0 | 443 | /* offset of the last item plus at least 20 bytes for its header */ |
michael@0 | 444 | offset=20+ds->readUInt32(inEntries[itemCount-1].dataOffset); |
michael@0 | 445 | } |
michael@0 | 446 | } |
michael@0 | 447 | if((uint32_t)length<offset) { |
michael@0 | 448 | udata_printError(ds, "udata_swapPackage(): too few bytes (%d after header) for a .dat package\n", |
michael@0 | 449 | length); |
michael@0 | 450 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; |
michael@0 | 451 | return 0; |
michael@0 | 452 | } |
michael@0 | 453 | |
michael@0 | 454 | outBytes=(uint8_t *)outData+headerSize; |
michael@0 | 455 | |
michael@0 | 456 | /* swap the item count */ |
michael@0 | 457 | ds->swapArray32(ds, inBytes, 4, outBytes, pErrorCode); |
michael@0 | 458 | |
michael@0 | 459 | if(itemCount==0) { |
michael@0 | 460 | /* no items: just return now */ |
michael@0 | 461 | return headerSize+4; |
michael@0 | 462 | } |
michael@0 | 463 | |
michael@0 | 464 | /* swap the item name strings */ |
michael@0 | 465 | offset=4+8*itemCount; |
michael@0 | 466 | itemLength=(int32_t)(ds->readUInt32(inEntries[0].dataOffset)-offset); |
michael@0 | 467 | udata_swapInvStringBlock(ds, inBytes+offset, itemLength, outBytes+offset, pErrorCode); |
michael@0 | 468 | if(U_FAILURE(*pErrorCode)) { |
michael@0 | 469 | udata_printError(ds, "udata_swapPackage() failed to swap the data item name strings\n"); |
michael@0 | 470 | return 0; |
michael@0 | 471 | } |
michael@0 | 472 | /* keep offset and itemLength in case we allocate and copy the strings below */ |
michael@0 | 473 | |
michael@0 | 474 | /* swap the package names into the output charset */ |
michael@0 | 475 | if(ds->outCharset!=U_CHARSET_FAMILY) { |
michael@0 | 476 | UDataSwapper *ds2; |
michael@0 | 477 | ds2=udata_openSwapper(TRUE, U_CHARSET_FAMILY, TRUE, ds->outCharset, pErrorCode); |
michael@0 | 478 | ds2->swapInvChars(ds2, inPkgName, inPkgNameLength, inPkgName, pErrorCode); |
michael@0 | 479 | ds2->swapInvChars(ds2, outPkgName, outPkgNameLength, outPkgName, pErrorCode); |
michael@0 | 480 | udata_closeSwapper(ds2); |
michael@0 | 481 | if(U_FAILURE(*pErrorCode)) { |
michael@0 | 482 | udata_printError(ds, "udata_swapPackage() failed to swap the input/output package names\n"); |
michael@0 | 483 | } |
michael@0 | 484 | } |
michael@0 | 485 | |
michael@0 | 486 | /* change the prefix of each ToC entry name from the old to the new package name */ |
michael@0 | 487 | { |
michael@0 | 488 | char *entryName; |
michael@0 | 489 | |
michael@0 | 490 | for(i=0; i<itemCount; ++i) { |
michael@0 | 491 | entryName=(char *)inBytes+ds->readUInt32(inEntries[i].nameOffset); |
michael@0 | 492 | |
michael@0 | 493 | if(0==uprv_memcmp(entryName, inPkgName, inPkgNameLength)) { |
michael@0 | 494 | uprv_memcpy(entryName, outPkgName, inPkgNameLength); |
michael@0 | 495 | } else { |
michael@0 | 496 | udata_printError(ds, "udata_swapPackage() failed: ToC item %ld does not have the input package name as a prefix\n", |
michael@0 | 497 | (long)i); |
michael@0 | 498 | *pErrorCode=U_INVALID_FORMAT_ERROR; |
michael@0 | 499 | return 0; |
michael@0 | 500 | } |
michael@0 | 501 | } |
michael@0 | 502 | } |
michael@0 | 503 | |
michael@0 | 504 | /* |
michael@0 | 505 | * Allocate the ToC table and, if necessary, a temporary buffer for |
michael@0 | 506 | * pseudo-in-place swapping. |
michael@0 | 507 | * |
michael@0 | 508 | * We cannot swap in-place because: |
michael@0 | 509 | * |
michael@0 | 510 | * 1. If the swapping of an item fails mid-way, then in-place swapping |
michael@0 | 511 | * has destroyed its data. |
michael@0 | 512 | * Out-of-place swapping allows us to then copy its original data. |
michael@0 | 513 | * |
michael@0 | 514 | * 2. If swapping changes the charset family, then we must resort |
michael@0 | 515 | * not only the ToC table but also the data items themselves. |
michael@0 | 516 | * This requires a permutation and is best done with separate in/out |
michael@0 | 517 | * buffers. |
michael@0 | 518 | * |
michael@0 | 519 | * We swapped the strings above to avoid the malloc below if string swapping fails. |
michael@0 | 520 | */ |
michael@0 | 521 | if(inData==outData) { |
michael@0 | 522 | /* +15: prepare for extra padding of a newly-last item */ |
michael@0 | 523 | table=(ToCEntry *)uprv_malloc(itemCount*sizeof(ToCEntry)+length+DEFAULT_PADDING_LENGTH); |
michael@0 | 524 | if(table!=NULL) { |
michael@0 | 525 | outBytes=(uint8_t *)(table+itemCount); |
michael@0 | 526 | |
michael@0 | 527 | /* copy the item count and the swapped strings */ |
michael@0 | 528 | uprv_memcpy(outBytes, inBytes, 4); |
michael@0 | 529 | uprv_memcpy(outBytes+offset, inBytes+offset, itemLength); |
michael@0 | 530 | } |
michael@0 | 531 | } else { |
michael@0 | 532 | table=(ToCEntry *)uprv_malloc(itemCount*sizeof(ToCEntry)); |
michael@0 | 533 | } |
michael@0 | 534 | if(table==NULL) { |
michael@0 | 535 | udata_printError(ds, "udata_swapPackage(): out of memory allocating %d bytes\n", |
michael@0 | 536 | inData==outData ? |
michael@0 | 537 | itemCount*sizeof(ToCEntry)+length+DEFAULT_PADDING_LENGTH : |
michael@0 | 538 | itemCount*sizeof(ToCEntry)); |
michael@0 | 539 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
michael@0 | 540 | return 0; |
michael@0 | 541 | } |
michael@0 | 542 | outEntries=(UDataOffsetTOCEntry *)(outBytes+4); |
michael@0 | 543 | |
michael@0 | 544 | /* read the ToC table */ |
michael@0 | 545 | for(i=0; i<itemCount; ++i) { |
michael@0 | 546 | table[i].nameOffset=ds->readUInt32(inEntries[i].nameOffset); |
michael@0 | 547 | table[i].inOffset=ds->readUInt32(inEntries[i].dataOffset); |
michael@0 | 548 | if(i>0) { |
michael@0 | 549 | table[i-1].length=table[i].inOffset-table[i-1].inOffset; |
michael@0 | 550 | } |
michael@0 | 551 | } |
michael@0 | 552 | table[itemCount-1].length=(uint32_t)length-table[itemCount-1].inOffset; |
michael@0 | 553 | |
michael@0 | 554 | if(ds->inCharset==ds->outCharset) { |
michael@0 | 555 | /* no charset swapping, no resorting: keep item offsets the same */ |
michael@0 | 556 | for(i=0; i<itemCount; ++i) { |
michael@0 | 557 | table[i].outOffset=table[i].inOffset; |
michael@0 | 558 | } |
michael@0 | 559 | } else { |
michael@0 | 560 | /* charset swapping: resort items by their swapped names */ |
michael@0 | 561 | |
michael@0 | 562 | /* |
michael@0 | 563 | * Before the actual sorting, we need to make sure that each item |
michael@0 | 564 | * has a length that is a multiple of 16 bytes so that all items |
michael@0 | 565 | * are 16-aligned. |
michael@0 | 566 | * Only the old last item may be missing up to 15 padding bytes. |
michael@0 | 567 | * Add padding bytes for it. |
michael@0 | 568 | * Since the icuswap main() function has already allocated enough |
michael@0 | 569 | * input buffer space and set the last 15 bytes there to 0xaa, |
michael@0 | 570 | * we only need to increase the total data length and the length |
michael@0 | 571 | * of the last item here. |
michael@0 | 572 | */ |
michael@0 | 573 | if((length&0xf)!=0) { |
michael@0 | 574 | int32_t delta=16-(length&0xf); |
michael@0 | 575 | length+=delta; |
michael@0 | 576 | table[itemCount-1].length+=(uint32_t)delta; |
michael@0 | 577 | } |
michael@0 | 578 | |
michael@0 | 579 | /* Save the offset before we sort the TOC. */ |
michael@0 | 580 | offset=table[0].inOffset; |
michael@0 | 581 | /* sort the TOC entries */ |
michael@0 | 582 | uprv_sortArray(table, (int32_t)itemCount, (int32_t)sizeof(ToCEntry), |
michael@0 | 583 | compareToCEntries, outBytes, FALSE, pErrorCode); |
michael@0 | 584 | |
michael@0 | 585 | /* |
michael@0 | 586 | * Note: Before sorting, the inOffset values were in order. |
michael@0 | 587 | * Now the outOffset values are in order. |
michael@0 | 588 | */ |
michael@0 | 589 | |
michael@0 | 590 | /* assign outOffset values */ |
michael@0 | 591 | for(i=0; i<itemCount; ++i) { |
michael@0 | 592 | table[i].outOffset=offset; |
michael@0 | 593 | offset+=table[i].length; |
michael@0 | 594 | } |
michael@0 | 595 | } |
michael@0 | 596 | |
michael@0 | 597 | /* write the output ToC table */ |
michael@0 | 598 | for(i=0; i<itemCount; ++i) { |
michael@0 | 599 | ds->writeUInt32(&outEntries[i].nameOffset, table[i].nameOffset); |
michael@0 | 600 | ds->writeUInt32(&outEntries[i].dataOffset, table[i].outOffset); |
michael@0 | 601 | } |
michael@0 | 602 | |
michael@0 | 603 | /* swap each data item */ |
michael@0 | 604 | for(i=0; i<itemCount; ++i) { |
michael@0 | 605 | /* first copy the item bytes to make sure that unreachable bytes are copied */ |
michael@0 | 606 | uprv_memcpy(outBytes+table[i].outOffset, inBytes+table[i].inOffset, table[i].length); |
michael@0 | 607 | |
michael@0 | 608 | /* swap the item */ |
michael@0 | 609 | udata_swap(ds, inBytes+table[i].inOffset, (int32_t)table[i].length, |
michael@0 | 610 | outBytes+table[i].outOffset, pErrorCode); |
michael@0 | 611 | |
michael@0 | 612 | if(U_FAILURE(*pErrorCode)) { |
michael@0 | 613 | if(ds->outCharset==U_CHARSET_FAMILY) { |
michael@0 | 614 | udata_printError(ds, "warning: udata_swapPackage() failed to swap item \"%s\"\n" |
michael@0 | 615 | " at inOffset 0x%x length 0x%x - %s\n" |
michael@0 | 616 | " the data item will be copied, not swapped\n\n", |
michael@0 | 617 | (char *)outBytes+table[i].nameOffset, |
michael@0 | 618 | table[i].inOffset, table[i].length, u_errorName(*pErrorCode)); |
michael@0 | 619 | } else { |
michael@0 | 620 | udata_printError(ds, "warning: udata_swapPackage() failed to swap an item\n" |
michael@0 | 621 | " at inOffset 0x%x length 0x%x - %s\n" |
michael@0 | 622 | " the data item will be copied, not swapped\n\n", |
michael@0 | 623 | table[i].inOffset, table[i].length, u_errorName(*pErrorCode)); |
michael@0 | 624 | } |
michael@0 | 625 | /* reset the error code, copy the data item, and continue */ |
michael@0 | 626 | *pErrorCode=U_ZERO_ERROR; |
michael@0 | 627 | uprv_memcpy(outBytes+table[i].outOffset, inBytes+table[i].inOffset, table[i].length); |
michael@0 | 628 | } |
michael@0 | 629 | } |
michael@0 | 630 | |
michael@0 | 631 | if(inData==outData) { |
michael@0 | 632 | /* copy the data from the temporary buffer to the in-place buffer */ |
michael@0 | 633 | uprv_memcpy((uint8_t *)outData+headerSize, outBytes, length); |
michael@0 | 634 | } |
michael@0 | 635 | uprv_free(table); |
michael@0 | 636 | |
michael@0 | 637 | return headerSize+length; |
michael@0 | 638 | } |
michael@0 | 639 | } |
michael@0 | 640 | |
michael@0 | 641 | /* |
michael@0 | 642 | * Hey, Emacs, please set the following: |
michael@0 | 643 | * |
michael@0 | 644 | * Local Variables: |
michael@0 | 645 | * indent-tabs-mode: nil |
michael@0 | 646 | * End: |
michael@0 | 647 | * |
michael@0 | 648 | */ |