michael@0: /* michael@0: ******************************************************************************* michael@0: * michael@0: * Copyright (C) 2003-2007, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: * michael@0: ******************************************************************************* michael@0: * file name: icuswap.cpp michael@0: * encoding: US-ASCII michael@0: * tab size: 8 (not used) michael@0: * indentation:4 michael@0: * michael@0: * created on: 2003aug08 michael@0: * created by: Markus W. Scherer michael@0: * michael@0: * This tool takes an ICU data file and "swaps" it, that is, changes its michael@0: * platform properties between big-/little-endianness and ASCII/EBCDIC charset michael@0: * families. michael@0: * The modified data file is written to a new file. michael@0: * Useful as an install-time tool for shipping only one flavor of ICU data michael@0: * and preparing data files for the target platform. michael@0: * Will not work with data DLLs (shared libraries). michael@0: */ michael@0: michael@0: #include "unicode/utypes.h" michael@0: #include "unicode/putil.h" michael@0: #include "unicode/udata.h" michael@0: #include "cmemory.h" michael@0: #include "cstring.h" michael@0: #include "uinvchar.h" michael@0: #include "uarrsort.h" michael@0: #include "ucmndata.h" michael@0: #include "udataswp.h" michael@0: #include "swapimpl.h" michael@0: #include "toolutil.h" michael@0: #include "uoptions.h" michael@0: michael@0: #include michael@0: #include michael@0: #include michael@0: michael@0: /* definitions */ michael@0: michael@0: #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) michael@0: #define DEFAULT_PADDING_LENGTH 15 michael@0: michael@0: static UOption options[]={ michael@0: UOPTION_HELP_H, michael@0: UOPTION_HELP_QUESTION_MARK, michael@0: UOPTION_DEF("type", 't', UOPT_REQUIRES_ARG) michael@0: }; michael@0: michael@0: enum { michael@0: OPT_HELP_H, michael@0: OPT_HELP_QUESTION_MARK, michael@0: OPT_OUT_TYPE michael@0: }; michael@0: michael@0: static int32_t michael@0: fileSize(FILE *f) { michael@0: int32_t size; michael@0: michael@0: fseek(f, 0, SEEK_END); michael@0: size=(int32_t)ftell(f); michael@0: fseek(f, 0, SEEK_SET); michael@0: return size; michael@0: } michael@0: michael@0: /** michael@0: * Swap an ICU .dat package, including swapping of enclosed items. michael@0: */ michael@0: U_CFUNC int32_t U_CALLCONV michael@0: udata_swapPackage(const char *inFilename, const char *outFilename, michael@0: const UDataSwapper *ds, michael@0: const void *inData, int32_t length, void *outData, michael@0: UErrorCode *pErrorCode); michael@0: michael@0: U_CDECL_BEGIN michael@0: static void U_CALLCONV michael@0: printError(void *context, const char *fmt, va_list args) { michael@0: vfprintf((FILE *)context, fmt, args); michael@0: } michael@0: U_CDECL_END michael@0: michael@0: static int michael@0: printUsage(const char *pname, UBool ishelp) { michael@0: fprintf(stderr, michael@0: "%csage: %s [ -h, -?, --help ] -tl|-tb|-te|--type=b|... infilename outfilename\n", michael@0: ishelp ? 'U' : 'u', pname); michael@0: if(ishelp) { michael@0: fprintf(stderr, michael@0: "\nOptions: -h, -?, --help print this message and exit\n" michael@0: " Read the input file, swap its platform properties according\n" michael@0: " to the -t or --type option, and write the result to the output file.\n" michael@0: " -tl change to little-endian/ASCII charset family\n" michael@0: " -tb change to big-endian/ASCII charset family\n" michael@0: " -te change to big-endian/EBCDIC charset family\n"); michael@0: } michael@0: michael@0: return !ishelp; michael@0: } michael@0: michael@0: extern int michael@0: main(int argc, char *argv[]) { michael@0: FILE *in, *out; michael@0: const char *pname; michael@0: char *data; michael@0: int32_t length; michael@0: UBool ishelp; michael@0: int rc; michael@0: michael@0: UDataSwapper *ds; michael@0: const UDataInfo *pInfo; michael@0: UErrorCode errorCode; michael@0: uint8_t outCharset; michael@0: UBool outIsBigEndian; michael@0: michael@0: U_MAIN_INIT_ARGS(argc, argv); michael@0: michael@0: fprintf(stderr, "Warning: icuswap is an obsolete tool and it will be removed in the next ICU release.\nPlease use the icupkg tool instead.\n"); michael@0: michael@0: /* get the program basename */ michael@0: pname=strrchr(argv[0], U_FILE_SEP_CHAR); michael@0: if(pname==NULL) { michael@0: pname=strrchr(argv[0], '/'); michael@0: } michael@0: if(pname!=NULL) { michael@0: ++pname; michael@0: } else { michael@0: pname=argv[0]; michael@0: } michael@0: michael@0: argc=u_parseArgs(argc, argv, LENGTHOF(options), options); michael@0: ishelp=options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur; michael@0: if(ishelp || argc!=3) { michael@0: return printUsage(pname, ishelp); michael@0: } michael@0: michael@0: /* parse the output type option */ michael@0: data=(char *)options[OPT_OUT_TYPE].value; michael@0: if(data[0]==0 || data[1]!=0) { michael@0: /* the type must be exactly one letter */ michael@0: return printUsage(pname, FALSE); michael@0: } michael@0: switch(data[0]) { michael@0: case 'l': michael@0: outIsBigEndian=FALSE; michael@0: outCharset=U_ASCII_FAMILY; michael@0: break; michael@0: case 'b': michael@0: outIsBigEndian=TRUE; michael@0: outCharset=U_ASCII_FAMILY; michael@0: break; michael@0: case 'e': michael@0: outIsBigEndian=TRUE; michael@0: outCharset=U_EBCDIC_FAMILY; michael@0: break; michael@0: default: michael@0: return printUsage(pname, FALSE); michael@0: } michael@0: michael@0: in=out=NULL; michael@0: data=NULL; michael@0: michael@0: /* open the input file, get its length, allocate memory for it, read the file */ michael@0: in=fopen(argv[1], "rb"); michael@0: if(in==NULL) { michael@0: fprintf(stderr, "%s: unable to open input file \"%s\"\n", pname, argv[1]); michael@0: rc=2; michael@0: goto done; michael@0: } michael@0: michael@0: length=fileSize(in); michael@0: if(lengthprintError=printError; michael@0: ds->printErrorContext=stderr; michael@0: michael@0: /* speculative cast, protected by the following length check */ michael@0: pInfo=(const UDataInfo *)((const char *)data+4); michael@0: michael@0: if( length>=20 && michael@0: pInfo->dataFormat[0]==0x43 && /* dataFormat="CmnD" */ michael@0: pInfo->dataFormat[1]==0x6d && michael@0: pInfo->dataFormat[2]==0x6e && michael@0: pInfo->dataFormat[3]==0x44 michael@0: ) { michael@0: /* michael@0: * swap the .dat package michael@0: * udata_swapPackage() needs to rename ToC name entries from the old package michael@0: * name to the new one. michael@0: * We pass it the filenames, and udata_swapPackage() will extract the michael@0: * package names. michael@0: */ michael@0: length=udata_swapPackage(argv[1], argv[2], ds, data, length, data, &errorCode); michael@0: udata_closeSwapper(ds); michael@0: if(U_FAILURE(errorCode)) { michael@0: fprintf(stderr, "%s: udata_swapPackage(\"%s\") failed - %s\n", michael@0: pname, argv[1], u_errorName(errorCode)); michael@0: rc=4; michael@0: goto done; michael@0: } michael@0: } else { michael@0: /* swap the data, which is not a .dat package */ michael@0: length=udata_swap(ds, data, length, data, &errorCode); michael@0: udata_closeSwapper(ds); michael@0: if(U_FAILURE(errorCode)) { michael@0: fprintf(stderr, "%s: udata_swap(\"%s\") failed - %s\n", michael@0: pname, argv[1], u_errorName(errorCode)); michael@0: rc=4; michael@0: goto done; michael@0: } michael@0: } michael@0: michael@0: out=fopen(argv[2], "wb"); michael@0: if(out==NULL) { michael@0: fprintf(stderr, "%s: unable to open output file \"%s\"\n", pname, argv[2]); michael@0: rc=5; michael@0: goto done; michael@0: } michael@0: michael@0: if(length!=(int32_t)fwrite(data, 1, length, out)) { michael@0: fprintf(stderr, "%s: error writing \"%s\"\n", pname, argv[2]); michael@0: rc=6; michael@0: goto done; michael@0: } michael@0: michael@0: fclose(out); michael@0: out=NULL; michael@0: michael@0: /* all done */ michael@0: rc=0; michael@0: michael@0: done: michael@0: if(in!=NULL) { michael@0: fclose(in); michael@0: } michael@0: if(out!=NULL) { michael@0: fclose(out); michael@0: } michael@0: if(data!=NULL) { michael@0: free(data); michael@0: } michael@0: return rc; michael@0: } michael@0: michael@0: /* swap .dat package files -------------------------------------------------- */ michael@0: michael@0: static int32_t michael@0: extractPackageName(const UDataSwapper *ds, const char *filename, michael@0: char pkg[], int32_t capacity, michael@0: UErrorCode *pErrorCode) { michael@0: const char *basename; michael@0: int32_t len; michael@0: michael@0: if(U_FAILURE(*pErrorCode)) { michael@0: return 0; michael@0: } michael@0: michael@0: basename=findBasename(filename); michael@0: len=(int32_t)uprv_strlen(basename)-4; /* -4: subtract the length of ".dat" */ michael@0: michael@0: if(len<=0 || 0!=uprv_strcmp(basename+len, ".dat")) { michael@0: udata_printError(ds, "udata_swapPackage(): \"%s\" is not recognized as a package filename (must end with .dat)\n", michael@0: basename); michael@0: *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; michael@0: return 0; michael@0: } michael@0: michael@0: if(len>=capacity) { michael@0: udata_printError(ds, "udata_swapPackage(): the package name \"%s\" is too long (>=%ld)\n", michael@0: (long)capacity); michael@0: *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; michael@0: return 0; michael@0: } michael@0: michael@0: uprv_memcpy(pkg, basename, len); michael@0: pkg[len]=0; michael@0: return len; michael@0: } michael@0: michael@0: struct ToCEntry { michael@0: uint32_t nameOffset, inOffset, outOffset, length; michael@0: }; michael@0: michael@0: U_CDECL_BEGIN michael@0: static int32_t U_CALLCONV michael@0: compareToCEntries(const void *context, const void *left, const void *right) { michael@0: const char *chars=(const char *)context; michael@0: return (int32_t)uprv_strcmp(chars+((const ToCEntry *)left)->nameOffset, michael@0: chars+((const ToCEntry *)right)->nameOffset); michael@0: } michael@0: U_CDECL_END michael@0: michael@0: U_CFUNC int32_t U_CALLCONV michael@0: udata_swapPackage(const char *inFilename, const char *outFilename, michael@0: const UDataSwapper *ds, michael@0: const void *inData, int32_t length, void *outData, michael@0: UErrorCode *pErrorCode) { michael@0: const UDataInfo *pInfo; michael@0: int32_t headerSize; michael@0: michael@0: const uint8_t *inBytes; michael@0: uint8_t *outBytes; michael@0: michael@0: uint32_t itemCount, offset, i; michael@0: int32_t itemLength; michael@0: michael@0: const UDataOffsetTOCEntry *inEntries; michael@0: UDataOffsetTOCEntry *outEntries; michael@0: michael@0: ToCEntry *table; michael@0: michael@0: char inPkgName[32], outPkgName[32]; michael@0: int32_t inPkgNameLength, outPkgNameLength; michael@0: michael@0: /* udata_swapDataHeader checks the arguments */ michael@0: headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); michael@0: if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { michael@0: return 0; michael@0: } michael@0: michael@0: /* check data format and format version */ michael@0: pInfo=(const UDataInfo *)((const char *)inData+4); michael@0: if(!( michael@0: pInfo->dataFormat[0]==0x43 && /* dataFormat="CmnD" */ michael@0: pInfo->dataFormat[1]==0x6d && michael@0: pInfo->dataFormat[2]==0x6e && michael@0: pInfo->dataFormat[3]==0x44 && michael@0: pInfo->formatVersion[0]==1 michael@0: )) { michael@0: udata_printError(ds, "udata_swapPackage(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as an ICU .dat package\n", michael@0: pInfo->dataFormat[0], pInfo->dataFormat[1], michael@0: pInfo->dataFormat[2], pInfo->dataFormat[3], michael@0: pInfo->formatVersion[0]); michael@0: *pErrorCode=U_UNSUPPORTED_ERROR; michael@0: return 0; michael@0: } michael@0: michael@0: /* michael@0: * We need to change the ToC name entries so that they have the correct michael@0: * package name prefix. michael@0: * Extract the package names from the in/out filenames. michael@0: */ michael@0: inPkgNameLength=extractPackageName( michael@0: ds, inFilename, michael@0: inPkgName, (int32_t)sizeof(inPkgName), michael@0: pErrorCode); michael@0: outPkgNameLength=extractPackageName( michael@0: ds, outFilename, michael@0: outPkgName, (int32_t)sizeof(outPkgName), michael@0: pErrorCode); michael@0: if(U_FAILURE(*pErrorCode)) { michael@0: return 0; michael@0: } michael@0: michael@0: /* michael@0: * It is possible to work with inPkgNameLength!=outPkgNameLength, michael@0: * but then the length of the data file would change more significantly, michael@0: * which we are not currently prepared for. michael@0: */ michael@0: if(inPkgNameLength!=outPkgNameLength) { michael@0: udata_printError(ds, "udata_swapPackage(): the package names \"%s\" and \"%s\" must have the same length\n", michael@0: inPkgName, outPkgName); michael@0: *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; michael@0: return 0; michael@0: } michael@0: michael@0: inBytes=(const uint8_t *)inData+headerSize; michael@0: inEntries=(const UDataOffsetTOCEntry *)(inBytes+4); michael@0: michael@0: if(length<0) { michael@0: /* preflighting */ michael@0: itemCount=ds->readUInt32(*(const uint32_t *)inBytes); michael@0: if(itemCount==0) { michael@0: /* no items: count only the item count and return */ michael@0: return headerSize+4; michael@0: } michael@0: michael@0: /* read the last item's offset and preflight it */ michael@0: offset=ds->readUInt32(inEntries[itemCount-1].dataOffset); michael@0: itemLength=udata_swap(ds, inBytes+offset, -1, NULL, pErrorCode); michael@0: michael@0: if(U_SUCCESS(*pErrorCode)) { michael@0: return headerSize+offset+(uint32_t)itemLength; michael@0: } else { michael@0: return 0; michael@0: } michael@0: } else { michael@0: /* check that the itemCount fits, then the ToC table, then at least the header of the last item */ michael@0: length-=headerSize; michael@0: if(length<4) { michael@0: /* itemCount does not fit */ michael@0: offset=0xffffffff; michael@0: itemCount=0; /* make compilers happy */ michael@0: } else { michael@0: itemCount=ds->readUInt32(*(const uint32_t *)inBytes); michael@0: if(itemCount==0) { michael@0: offset=4; michael@0: } else if((uint32_t)length<(4+8*itemCount)) { michael@0: /* ToC table does not fit */ michael@0: offset=0xffffffff; michael@0: } else { michael@0: /* offset of the last item plus at least 20 bytes for its header */ michael@0: offset=20+ds->readUInt32(inEntries[itemCount-1].dataOffset); michael@0: } michael@0: } michael@0: if((uint32_t)lengthswapArray32(ds, inBytes, 4, outBytes, pErrorCode); michael@0: michael@0: if(itemCount==0) { michael@0: /* no items: just return now */ michael@0: return headerSize+4; michael@0: } michael@0: michael@0: /* swap the item name strings */ michael@0: offset=4+8*itemCount; michael@0: itemLength=(int32_t)(ds->readUInt32(inEntries[0].dataOffset)-offset); michael@0: udata_swapInvStringBlock(ds, inBytes+offset, itemLength, outBytes+offset, pErrorCode); michael@0: if(U_FAILURE(*pErrorCode)) { michael@0: udata_printError(ds, "udata_swapPackage() failed to swap the data item name strings\n"); michael@0: return 0; michael@0: } michael@0: /* keep offset and itemLength in case we allocate and copy the strings below */ michael@0: michael@0: /* swap the package names into the output charset */ michael@0: if(ds->outCharset!=U_CHARSET_FAMILY) { michael@0: UDataSwapper *ds2; michael@0: ds2=udata_openSwapper(TRUE, U_CHARSET_FAMILY, TRUE, ds->outCharset, pErrorCode); michael@0: ds2->swapInvChars(ds2, inPkgName, inPkgNameLength, inPkgName, pErrorCode); michael@0: ds2->swapInvChars(ds2, outPkgName, outPkgNameLength, outPkgName, pErrorCode); michael@0: udata_closeSwapper(ds2); michael@0: if(U_FAILURE(*pErrorCode)) { michael@0: udata_printError(ds, "udata_swapPackage() failed to swap the input/output package names\n"); michael@0: } michael@0: } michael@0: michael@0: /* change the prefix of each ToC entry name from the old to the new package name */ michael@0: { michael@0: char *entryName; michael@0: michael@0: for(i=0; ireadUInt32(inEntries[i].nameOffset); michael@0: michael@0: if(0==uprv_memcmp(entryName, inPkgName, inPkgNameLength)) { michael@0: uprv_memcpy(entryName, outPkgName, inPkgNameLength); michael@0: } else { michael@0: udata_printError(ds, "udata_swapPackage() failed: ToC item %ld does not have the input package name as a prefix\n", michael@0: (long)i); michael@0: *pErrorCode=U_INVALID_FORMAT_ERROR; michael@0: return 0; michael@0: } michael@0: } michael@0: } michael@0: michael@0: /* michael@0: * Allocate the ToC table and, if necessary, a temporary buffer for michael@0: * pseudo-in-place swapping. michael@0: * michael@0: * We cannot swap in-place because: michael@0: * michael@0: * 1. If the swapping of an item fails mid-way, then in-place swapping michael@0: * has destroyed its data. michael@0: * Out-of-place swapping allows us to then copy its original data. michael@0: * michael@0: * 2. If swapping changes the charset family, then we must resort michael@0: * not only the ToC table but also the data items themselves. michael@0: * This requires a permutation and is best done with separate in/out michael@0: * buffers. michael@0: * michael@0: * We swapped the strings above to avoid the malloc below if string swapping fails. michael@0: */ michael@0: if(inData==outData) { michael@0: /* +15: prepare for extra padding of a newly-last item */ michael@0: table=(ToCEntry *)uprv_malloc(itemCount*sizeof(ToCEntry)+length+DEFAULT_PADDING_LENGTH); michael@0: if(table!=NULL) { michael@0: outBytes=(uint8_t *)(table+itemCount); michael@0: michael@0: /* copy the item count and the swapped strings */ michael@0: uprv_memcpy(outBytes, inBytes, 4); michael@0: uprv_memcpy(outBytes+offset, inBytes+offset, itemLength); michael@0: } michael@0: } else { michael@0: table=(ToCEntry *)uprv_malloc(itemCount*sizeof(ToCEntry)); michael@0: } michael@0: if(table==NULL) { michael@0: udata_printError(ds, "udata_swapPackage(): out of memory allocating %d bytes\n", michael@0: inData==outData ? michael@0: itemCount*sizeof(ToCEntry)+length+DEFAULT_PADDING_LENGTH : michael@0: itemCount*sizeof(ToCEntry)); michael@0: *pErrorCode=U_MEMORY_ALLOCATION_ERROR; michael@0: return 0; michael@0: } michael@0: outEntries=(UDataOffsetTOCEntry *)(outBytes+4); michael@0: michael@0: /* read the ToC table */ michael@0: for(i=0; ireadUInt32(inEntries[i].nameOffset); michael@0: table[i].inOffset=ds->readUInt32(inEntries[i].dataOffset); michael@0: if(i>0) { michael@0: table[i-1].length=table[i].inOffset-table[i-1].inOffset; michael@0: } michael@0: } michael@0: table[itemCount-1].length=(uint32_t)length-table[itemCount-1].inOffset; michael@0: michael@0: if(ds->inCharset==ds->outCharset) { michael@0: /* no charset swapping, no resorting: keep item offsets the same */ michael@0: for(i=0; iwriteUInt32(&outEntries[i].nameOffset, table[i].nameOffset); michael@0: ds->writeUInt32(&outEntries[i].dataOffset, table[i].outOffset); michael@0: } michael@0: michael@0: /* swap each data item */ michael@0: for(i=0; ioutCharset==U_CHARSET_FAMILY) { michael@0: udata_printError(ds, "warning: udata_swapPackage() failed to swap item \"%s\"\n" michael@0: " at inOffset 0x%x length 0x%x - %s\n" michael@0: " the data item will be copied, not swapped\n\n", michael@0: (char *)outBytes+table[i].nameOffset, michael@0: table[i].inOffset, table[i].length, u_errorName(*pErrorCode)); michael@0: } else { michael@0: udata_printError(ds, "warning: udata_swapPackage() failed to swap an item\n" michael@0: " at inOffset 0x%x length 0x%x - %s\n" michael@0: " the data item will be copied, not swapped\n\n", michael@0: table[i].inOffset, table[i].length, u_errorName(*pErrorCode)); michael@0: } michael@0: /* reset the error code, copy the data item, and continue */ michael@0: *pErrorCode=U_ZERO_ERROR; michael@0: uprv_memcpy(outBytes+table[i].outOffset, inBytes+table[i].inOffset, table[i].length); michael@0: } michael@0: } michael@0: michael@0: if(inData==outData) { michael@0: /* copy the data from the temporary buffer to the in-place buffer */ michael@0: uprv_memcpy((uint8_t *)outData+headerSize, outBytes, length); michael@0: } michael@0: uprv_free(table); michael@0: michael@0: return headerSize+length; michael@0: } michael@0: } michael@0: michael@0: /* michael@0: * Hey, Emacs, please set the following: michael@0: * michael@0: * Local Variables: michael@0: * indent-tabs-mode: nil michael@0: * End: michael@0: * michael@0: */