michael@0: /* michael@0: ******************************************************************************* michael@0: * michael@0: * Copyright (C) 1999-2013, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: * michael@0: ******************************************************************************* michael@0: * file name: package.cpp michael@0: * encoding: US-ASCII michael@0: * tab size: 8 (not used) michael@0: * indentation:4 michael@0: * michael@0: * created on: 2005aug25 michael@0: * created by: Markus W. Scherer michael@0: * michael@0: * Read, modify, and write ICU .dat data package files. michael@0: * This is an integral part of the icupkg tool, moved to the toolutil library michael@0: * because parts of tool implementations tend to be later shared by michael@0: * other tools. michael@0: * Subsumes functionality and implementation code from michael@0: * gencmn, decmn, and icuswap tools. michael@0: */ michael@0: michael@0: #include "unicode/utypes.h" michael@0: #include "unicode/putil.h" michael@0: #include "unicode/udata.h" michael@0: #include "cstring.h" michael@0: #include "uarrsort.h" michael@0: #include "ucmndata.h" michael@0: #include "udataswp.h" michael@0: #include "swapimpl.h" michael@0: #include "toolutil.h" michael@0: #include "package.h" michael@0: #include "cmemory.h" michael@0: michael@0: #include michael@0: #include michael@0: #include michael@0: michael@0: michael@0: static const int32_t kItemsChunk = 256; /* How much to increase the filesarray by each time */ michael@0: michael@0: // general definitions ----------------------------------------------------- *** michael@0: michael@0: #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) michael@0: michael@0: /* UDataInfo cf. udata.h */ michael@0: static const UDataInfo dataInfo={ michael@0: (uint16_t)sizeof(UDataInfo), michael@0: 0, michael@0: michael@0: U_IS_BIG_ENDIAN, michael@0: U_CHARSET_FAMILY, michael@0: (uint8_t)sizeof(UChar), michael@0: 0, michael@0: michael@0: {0x43, 0x6d, 0x6e, 0x44}, /* dataFormat="CmnD" */ michael@0: {1, 0, 0, 0}, /* formatVersion */ michael@0: {3, 0, 0, 0} /* dataVersion */ michael@0: }; michael@0: michael@0: U_CDECL_BEGIN michael@0: static void U_CALLCONV michael@0: printPackageError(void *context, const char *fmt, va_list args) { michael@0: vfprintf((FILE *)context, fmt, args); michael@0: } michael@0: U_CDECL_END michael@0: michael@0: static uint16_t michael@0: readSwapUInt16(uint16_t x) { michael@0: return (uint16_t)((x<<8)|(x>>8)); michael@0: } michael@0: michael@0: // platform types ---------------------------------------------------------- *** michael@0: michael@0: static const char *types="lb?e"; michael@0: michael@0: enum { TYPE_L, TYPE_B, TYPE_LE, TYPE_E, TYPE_COUNT }; michael@0: michael@0: static inline int32_t michael@0: makeTypeEnum(uint8_t charset, UBool isBigEndian) { michael@0: return 2*(int32_t)charset+isBigEndian; michael@0: } michael@0: michael@0: static inline int32_t michael@0: makeTypeEnum(char type) { michael@0: return michael@0: type == 'l' ? TYPE_L : michael@0: type == 'b' ? TYPE_B : michael@0: type == 'e' ? TYPE_E : michael@0: -1; michael@0: } michael@0: michael@0: static inline char michael@0: makeTypeLetter(uint8_t charset, UBool isBigEndian) { michael@0: return types[makeTypeEnum(charset, isBigEndian)]; michael@0: } michael@0: michael@0: static inline char michael@0: makeTypeLetter(int32_t typeEnum) { michael@0: return types[typeEnum]; michael@0: } michael@0: michael@0: static void michael@0: makeTypeProps(char type, uint8_t &charset, UBool &isBigEndian) { michael@0: int32_t typeEnum=makeTypeEnum(type); michael@0: charset=(uint8_t)(typeEnum>>1); michael@0: isBigEndian=(UBool)(typeEnum&1); michael@0: } michael@0: michael@0: U_CFUNC const UDataInfo * michael@0: getDataInfo(const uint8_t *data, int32_t length, michael@0: int32_t &infoLength, int32_t &headerLength, michael@0: UErrorCode *pErrorCode) { michael@0: const DataHeader *pHeader; michael@0: const UDataInfo *pInfo; michael@0: michael@0: if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { michael@0: return NULL; michael@0: } michael@0: if( data==NULL || michael@0: (length>=0 && length<(int32_t)sizeof(DataHeader)) michael@0: ) { michael@0: *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; michael@0: return NULL; michael@0: } michael@0: michael@0: pHeader=(const DataHeader *)data; michael@0: pInfo=&pHeader->info; michael@0: if( (length>=0 && length<(int32_t)sizeof(DataHeader)) || michael@0: pHeader->dataHeader.magic1!=0xda || michael@0: pHeader->dataHeader.magic2!=0x27 || michael@0: pInfo->sizeofUChar!=2 michael@0: ) { michael@0: *pErrorCode=U_UNSUPPORTED_ERROR; michael@0: return NULL; michael@0: } michael@0: michael@0: if(pInfo->isBigEndian==U_IS_BIG_ENDIAN) { michael@0: headerLength=pHeader->dataHeader.headerSize; michael@0: infoLength=pInfo->size; michael@0: } else { michael@0: headerLength=readSwapUInt16(pHeader->dataHeader.headerSize); michael@0: infoLength=readSwapUInt16(pInfo->size); michael@0: } michael@0: michael@0: if( headerLength<(int32_t)sizeof(DataHeader) || michael@0: infoLength<(int32_t)sizeof(UDataInfo) || michael@0: headerLength<(int32_t)(sizeof(pHeader->dataHeader)+infoLength) || michael@0: (length>=0 && lengthcharsetFamily, (UBool)pInfo->isBigEndian); michael@0: } michael@0: michael@0: // file handling ----------------------------------------------------------- *** michael@0: michael@0: static void michael@0: extractPackageName(const char *filename, michael@0: char pkg[], int32_t capacity) { michael@0: const char *basename; michael@0: int32_t len; michael@0: michael@0: basename=findBasename(filename); michael@0: len=(int32_t)strlen(basename)-4; /* -4: subtract the length of ".dat" */ michael@0: michael@0: if(len<=0 || 0!=strcmp(basename+len, ".dat")) { michael@0: fprintf(stderr, "icupkg: \"%s\" is not recognized as a package filename (must end with .dat)\n", michael@0: basename); michael@0: exit(U_ILLEGAL_ARGUMENT_ERROR); michael@0: } michael@0: michael@0: if(len>=capacity) { michael@0: fprintf(stderr, "icupkg: the package name \"%s\" is too long (>=%ld)\n", michael@0: basename, (long)capacity); michael@0: exit(U_ILLEGAL_ARGUMENT_ERROR); michael@0: } michael@0: michael@0: memcpy(pkg, basename, len); michael@0: pkg[len]=0; michael@0: } michael@0: michael@0: static int32_t michael@0: getFileLength(FILE *f) { michael@0: int32_t length; michael@0: michael@0: fseek(f, 0, SEEK_END); michael@0: length=(int32_t)ftell(f); michael@0: fseek(f, 0, SEEK_SET); michael@0: return length; michael@0: } michael@0: michael@0: /* michael@0: * Turn tree separators and alternate file separators into normal file separators. michael@0: */ michael@0: #if U_TREE_ENTRY_SEP_CHAR==U_FILE_SEP_CHAR && U_FILE_ALT_SEP_CHAR==U_FILE_SEP_CHAR michael@0: #define treeToPath(s) michael@0: #else michael@0: static void michael@0: treeToPath(char *s) { michael@0: char *t; michael@0: michael@0: for(t=s; *t!=0; ++t) { michael@0: if(*t==U_TREE_ENTRY_SEP_CHAR || *t==U_FILE_ALT_SEP_CHAR) { michael@0: *t=U_FILE_SEP_CHAR; michael@0: } michael@0: } michael@0: } michael@0: #endif michael@0: michael@0: /* michael@0: * Turn file separators into tree separators. michael@0: */ michael@0: #if U_TREE_ENTRY_SEP_CHAR==U_FILE_SEP_CHAR && U_FILE_ALT_SEP_CHAR==U_FILE_SEP_CHAR michael@0: #define pathToTree(s) michael@0: #else michael@0: static void michael@0: pathToTree(char *s) { michael@0: char *t; michael@0: michael@0: for(t=s; *t!=0; ++t) { michael@0: if(*t==U_FILE_SEP_CHAR || *t==U_FILE_ALT_SEP_CHAR) { michael@0: *t=U_TREE_ENTRY_SEP_CHAR; michael@0: } michael@0: } michael@0: } michael@0: #endif michael@0: michael@0: /* michael@0: * Prepend the path (if any) to the name and run the name through treeToName(). michael@0: */ michael@0: static void michael@0: makeFullFilename(const char *path, const char *name, michael@0: char *filename, int32_t capacity) { michael@0: char *s; michael@0: michael@0: // prepend the path unless NULL or empty michael@0: if(path!=NULL && path[0]!=0) { michael@0: if((int32_t)(strlen(path)+1)>=capacity) { michael@0: fprintf(stderr, "pathname too long: \"%s\"\n", path); michael@0: exit(U_BUFFER_OVERFLOW_ERROR); michael@0: } michael@0: strcpy(filename, path); michael@0: michael@0: // make sure the path ends with a file separator michael@0: s=strchr(filename, 0); michael@0: if(*(s-1)!=U_FILE_SEP_CHAR && *(s-1)!=U_FILE_ALT_SEP_CHAR) { michael@0: *s++=U_FILE_SEP_CHAR; michael@0: } michael@0: } else { michael@0: s=filename; michael@0: } michael@0: michael@0: // turn the name into a filename, turn tree separators into file separators michael@0: if((int32_t)((s-filename)+strlen(name))>=capacity) { michael@0: fprintf(stderr, "path/filename too long: \"%s%s\"\n", filename, name); michael@0: exit(U_BUFFER_OVERFLOW_ERROR); michael@0: } michael@0: strcpy(s, name); michael@0: treeToPath(s); michael@0: } michael@0: michael@0: static void michael@0: makeFullFilenameAndDirs(const char *path, const char *name, michael@0: char *filename, int32_t capacity) { michael@0: char *sep; michael@0: UErrorCode errorCode; michael@0: michael@0: makeFullFilename(path, name, filename, capacity); michael@0: michael@0: // make tree directories michael@0: errorCode=U_ZERO_ERROR; michael@0: sep=strchr(filename, 0)-strlen(name); michael@0: while((sep=strchr(sep, U_FILE_SEP_CHAR))!=NULL) { michael@0: if(sep!=filename) { michael@0: *sep=0; // truncate temporarily michael@0: uprv_mkdir(filename, &errorCode); michael@0: if(U_FAILURE(errorCode)) { michael@0: fprintf(stderr, "icupkg: unable to create tree directory \"%s\"\n", filename); michael@0: exit(U_FILE_ACCESS_ERROR); michael@0: } michael@0: } michael@0: *sep++=U_FILE_SEP_CHAR; // restore file separator character michael@0: } michael@0: } michael@0: michael@0: static uint8_t * michael@0: readFile(const char *path, const char *name, int32_t &length, char &type) { michael@0: char filename[1024]; michael@0: FILE *file; michael@0: uint8_t *data; michael@0: UErrorCode errorCode; michael@0: int32_t fileLength, typeEnum; michael@0: michael@0: makeFullFilename(path, name, filename, (int32_t)sizeof(filename)); michael@0: michael@0: /* open the input file, get its length, allocate memory for it, read the file */ michael@0: file=fopen(filename, "rb"); michael@0: if(file==NULL) { michael@0: fprintf(stderr, "icupkg: unable to open input file \"%s\"\n", filename); michael@0: exit(U_FILE_ACCESS_ERROR); michael@0: } michael@0: michael@0: /* get the file length */ michael@0: fileLength=getFileLength(file); michael@0: if(ferror(file) || fileLength<=0) { michael@0: fprintf(stderr, "icupkg: empty input file \"%s\"\n", filename); michael@0: fclose(file); michael@0: exit(U_FILE_ACCESS_ERROR); michael@0: } michael@0: michael@0: /* allocate the buffer, pad to multiple of 16 */ michael@0: length=(fileLength+0xf)&~0xf; michael@0: data=(uint8_t *)uprv_malloc(length); michael@0: if(data==NULL) { michael@0: fclose(file); michael@0: fprintf(stderr, "icupkg: malloc error allocating %d bytes.\n", (int)length); michael@0: exit(U_MEMORY_ALLOCATION_ERROR); michael@0: } michael@0: michael@0: /* read the file */ michael@0: if(fileLength!=(int32_t)fread(data, 1, fileLength, file)) { michael@0: fprintf(stderr, "icupkg: error reading \"%s\"\n", filename); michael@0: fclose(file); michael@0: free(data); michael@0: exit(U_FILE_ACCESS_ERROR); michael@0: } michael@0: michael@0: /* pad the file to a multiple of 16 using the usual padding byte */ michael@0: if(fileLengthname, ((Item *)right)->name); michael@0: } michael@0: michael@0: U_CDECL_END michael@0: michael@0: U_NAMESPACE_BEGIN michael@0: michael@0: Package::Package() michael@0: : doAutoPrefix(FALSE), prefixEndsWithType(FALSE) { michael@0: inPkgName[0]=0; michael@0: pkgPrefix[0]=0; michael@0: inData=NULL; michael@0: inLength=0; michael@0: inCharset=U_CHARSET_FAMILY; michael@0: inIsBigEndian=U_IS_BIG_ENDIAN; michael@0: michael@0: itemCount=0; michael@0: itemMax=0; michael@0: items=NULL; michael@0: michael@0: inStringTop=outStringTop=0; michael@0: michael@0: matchMode=0; michael@0: findPrefix=findSuffix=NULL; michael@0: findPrefixLength=findSuffixLength=0; michael@0: findNextIndex=-1; michael@0: michael@0: // create a header for an empty package michael@0: DataHeader *pHeader; michael@0: pHeader=(DataHeader *)header; michael@0: pHeader->dataHeader.magic1=0xda; michael@0: pHeader->dataHeader.magic2=0x27; michael@0: memcpy(&pHeader->info, &dataInfo, sizeof(dataInfo)); michael@0: headerLength=(int32_t)(4+sizeof(dataInfo)); michael@0: if(headerLength&0xf) { michael@0: /* NUL-pad the header to a multiple of 16 */ michael@0: int32_t length=(headerLength+0xf)&~0xf; michael@0: memset(header+headerLength, 0, length-headerLength); michael@0: headerLength=length; michael@0: } michael@0: pHeader->dataHeader.headerSize=(uint16_t)headerLength; michael@0: } michael@0: michael@0: Package::~Package() { michael@0: int32_t idx; michael@0: michael@0: free(inData); michael@0: michael@0: for(idx=0; idx=sizeof(pkgPrefix)) { michael@0: fprintf(stderr, "icupkg: --toc_prefix %s too long\n", p); michael@0: exit(U_ILLEGAL_ARGUMENT_ERROR); michael@0: } michael@0: strcpy(pkgPrefix, p); michael@0: } michael@0: michael@0: void michael@0: Package::readPackage(const char *filename) { michael@0: UDataSwapper *ds; michael@0: const UDataInfo *pInfo; michael@0: UErrorCode errorCode; michael@0: michael@0: const uint8_t *inBytes; michael@0: michael@0: int32_t length, offset, i; michael@0: int32_t itemLength, typeEnum; michael@0: char type; michael@0: michael@0: const UDataOffsetTOCEntry *inEntries; michael@0: michael@0: extractPackageName(filename, inPkgName, (int32_t)sizeof(inPkgName)); michael@0: michael@0: /* read the file */ michael@0: inData=readFile(NULL, filename, inLength, type); michael@0: length=inLength; michael@0: michael@0: /* michael@0: * swap the header - even if the swapping itself is a no-op michael@0: * because it tells us the header length michael@0: */ michael@0: errorCode=U_ZERO_ERROR; michael@0: makeTypeProps(type, inCharset, inIsBigEndian); michael@0: ds=udata_openSwapper(inIsBigEndian, inCharset, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode); michael@0: if(U_FAILURE(errorCode)) { michael@0: fprintf(stderr, "icupkg: udata_openSwapper(\"%s\") failed - %s\n", michael@0: filename, u_errorName(errorCode)); michael@0: exit(errorCode); michael@0: } michael@0: michael@0: ds->printError=printPackageError; michael@0: ds->printErrorContext=stderr; michael@0: michael@0: headerLength=sizeof(header); michael@0: if(lengthdataFormat[0]==0x43 && /* dataFormat="CmnD" */ michael@0: pInfo->dataFormat[1]==0x6d && michael@0: pInfo->dataFormat[2]==0x6e && michael@0: pInfo->dataFormat[3]==0x44 && michael@0: pInfo->formatVersion[0]==1 michael@0: )) { michael@0: fprintf(stderr, "icupkg: data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as an ICU .dat package\n", michael@0: pInfo->dataFormat[0], pInfo->dataFormat[1], michael@0: pInfo->dataFormat[2], pInfo->dataFormat[3], michael@0: pInfo->formatVersion[0]); michael@0: exit(U_UNSUPPORTED_ERROR); michael@0: } michael@0: inIsBigEndian=(UBool)pInfo->isBigEndian; michael@0: inCharset=pInfo->charsetFamily; michael@0: michael@0: inBytes=(const uint8_t *)inData+headerLength; michael@0: inEntries=(const UDataOffsetTOCEntry *)(inBytes+4); michael@0: michael@0: /* check that the itemCount fits, then the ToC table, then at least the header of the last item */ michael@0: length-=headerLength; michael@0: if(length<4) { michael@0: /* itemCount does not fit */ michael@0: offset=0x7fffffff; michael@0: } else { michael@0: itemCount=udata_readInt32(ds, *(const int32_t *)inBytes); michael@0: setItemCapacity(itemCount); /* resize so there's space */ michael@0: if(itemCount==0) { michael@0: offset=4; michael@0: } else if(length<(4+8*itemCount)) { michael@0: /* ToC table does not fit */ michael@0: offset=0x7fffffff; michael@0: } else { michael@0: /* offset of the last item plus at least 20 bytes for its header */ michael@0: offset=20+(int32_t)ds->readUInt32(inEntries[itemCount-1].dataOffset); michael@0: } michael@0: } michael@0: if(lengthitemMax) { michael@0: fprintf(stderr, "icupkg: too many items, maximum is %d\n", itemMax); michael@0: exit(U_BUFFER_OVERFLOW_ERROR); michael@0: } michael@0: michael@0: /* swap the item name strings */ michael@0: int32_t stringsOffset=4+8*itemCount; michael@0: itemLength=(int32_t)(ds->readUInt32(inEntries[0].dataOffset))-stringsOffset; michael@0: michael@0: // don't include padding bytes at the end of the item names michael@0: while(itemLength>0 && inBytes[stringsOffset+itemLength-1]!=0) { michael@0: --itemLength; michael@0: } michael@0: michael@0: if((inStringTop+itemLength)>STRING_STORE_SIZE) { michael@0: fprintf(stderr, "icupkg: total length of item name strings too long\n"); michael@0: exit(U_BUFFER_OVERFLOW_ERROR); michael@0: } michael@0: michael@0: inItemStrings=inStrings+inStringTop; michael@0: ds->swapInvChars(ds, inBytes+stringsOffset, itemLength, inItemStrings, &errorCode); michael@0: if(U_FAILURE(errorCode)) { michael@0: fprintf(stderr, "icupkg failed to swap the input .dat package item name strings\n"); michael@0: exit(U_INVALID_FORMAT_ERROR); michael@0: } michael@0: inStringTop+=itemLength; michael@0: michael@0: // reset the Item entries michael@0: memset(items, 0, itemCount*sizeof(Item)); michael@0: michael@0: /* michael@0: * Get the common prefix of the items. michael@0: * New-style ICU .dat packages use tree separators ('/') between package names, michael@0: * tree names, and item names, michael@0: * while old-style ICU .dat packages (before multi-tree support) michael@0: * use an underscore ('_') between package and item names. michael@0: */ michael@0: offset=(int32_t)ds->readUInt32(inEntries[0].nameOffset)-stringsOffset; michael@0: s=inItemStrings+offset; // name of the first entry michael@0: int32_t prefixLength; michael@0: if(doAutoPrefix) { michael@0: // Use the first entry's prefix. Must be a new-style package. michael@0: const char *prefixLimit=strchr(s, U_TREE_ENTRY_SEP_CHAR); michael@0: if(prefixLimit==NULL) { michael@0: fprintf(stderr, michael@0: "icupkg: --auto_toc_prefix[_with_type] but " michael@0: "the first entry \"%s\" does not contain a '%c'\n", michael@0: s, U_TREE_ENTRY_SEP_CHAR); michael@0: exit(U_INVALID_FORMAT_ERROR); michael@0: } michael@0: prefixLength=(int32_t)(prefixLimit-s); michael@0: if(prefixLength==0 || prefixLength>=LENGTHOF(pkgPrefix)) { michael@0: fprintf(stderr, michael@0: "icupkg: --auto_toc_prefix[_with_type] but " michael@0: "the prefix of the first entry \"%s\" is empty or too long\n", michael@0: s); michael@0: exit(U_INVALID_FORMAT_ERROR); michael@0: } michael@0: if(prefixEndsWithType && s[prefixLength-1]!=type) { michael@0: fprintf(stderr, michael@0: "icupkg: --auto_toc_prefix_with_type but " michael@0: "the prefix of the first entry \"%s\" does not end with '%c'\n", michael@0: s, type); michael@0: exit(U_INVALID_FORMAT_ERROR); michael@0: } michael@0: memcpy(pkgPrefix, s, prefixLength); michael@0: memcpy(prefix, s, ++prefixLength); // include the / michael@0: } else { michael@0: // Use the package basename as prefix. michael@0: int32_t inPkgNameLength=strlen(inPkgName); michael@0: memcpy(prefix, inPkgName, inPkgNameLength); michael@0: prefixLength=inPkgNameLength; michael@0: michael@0: if( (int32_t)strlen(s)>=(inPkgNameLength+2) && michael@0: 0==memcmp(s, inPkgName, inPkgNameLength) && michael@0: s[inPkgNameLength]=='_' michael@0: ) { michael@0: // old-style .dat package michael@0: prefix[prefixLength++]='_'; michael@0: } else { michael@0: // new-style .dat package michael@0: prefix[prefixLength++]=U_TREE_ENTRY_SEP_CHAR; michael@0: // if it turns out to not contain U_TREE_ENTRY_SEP_CHAR michael@0: // then the test in the loop below will fail michael@0: } michael@0: } michael@0: prefix[prefixLength]=0; michael@0: michael@0: /* read the ToC table */ michael@0: for(i=0; ireadUInt32(inEntries[i].nameOffset)-stringsOffset; michael@0: s=inItemStrings+offset; michael@0: if(0!=strncmp(s, prefix, prefixLength) || s[prefixLength]==0) { michael@0: fprintf(stderr, "icupkg: input .dat item name \"%s\" does not start with \"%s\"\n", michael@0: s, prefix); michael@0: exit(U_INVALID_FORMAT_ERROR); michael@0: } michael@0: items[i].name=s+prefixLength; michael@0: michael@0: // set the item's data michael@0: items[i].data=(uint8_t *)inBytes+ds->readUInt32(inEntries[i].dataOffset); michael@0: if(i>0) { michael@0: items[i-1].length=(int32_t)(items[i].data-items[i-1].data); michael@0: michael@0: // set the previous item's platform type michael@0: typeEnum=getTypeEnumForInputData(items[i-1].data, items[i-1].length, &errorCode); michael@0: if(typeEnum<0 || U_FAILURE(errorCode)) { michael@0: fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[i-1].name, filename); michael@0: exit(U_INVALID_FORMAT_ERROR); michael@0: } michael@0: items[i-1].type=makeTypeLetter(typeEnum); michael@0: } michael@0: items[i].isDataOwned=FALSE; michael@0: } michael@0: // set the last item's length michael@0: items[itemCount-1].length=length-ds->readUInt32(inEntries[itemCount-1].dataOffset); michael@0: michael@0: // set the last item's platform type michael@0: typeEnum=getTypeEnumForInputData(items[itemCount-1].data, items[itemCount-1].length, &errorCode); michael@0: if(typeEnum<0 || U_FAILURE(errorCode)) { michael@0: fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[i-1].name, filename); michael@0: exit(U_INVALID_FORMAT_ERROR); michael@0: } michael@0: items[itemCount-1].type=makeTypeLetter(typeEnum); michael@0: michael@0: if(type!=U_ICUDATA_TYPE_LETTER[0]) { michael@0: // sort the item names for the local charset michael@0: sortItems(); michael@0: } michael@0: } michael@0: michael@0: udata_closeSwapper(ds); michael@0: } michael@0: michael@0: char michael@0: Package::getInType() { michael@0: return makeTypeLetter(inCharset, inIsBigEndian); michael@0: } michael@0: michael@0: void michael@0: Package::writePackage(const char *filename, char outType, const char *comment) { michael@0: char prefix[MAX_PKG_NAME_LENGTH+4]; michael@0: UDataOffsetTOCEntry entry; michael@0: UDataSwapper *dsLocalToOut, *ds[TYPE_COUNT]; michael@0: FILE *file; michael@0: Item *pItem; michael@0: char *name; michael@0: UErrorCode errorCode; michael@0: int32_t i, length, prefixLength, maxItemLength, basenameOffset, offset, outInt32; michael@0: uint8_t outCharset; michael@0: UBool outIsBigEndian; michael@0: michael@0: extractPackageName(filename, prefix, MAX_PKG_NAME_LENGTH); michael@0: michael@0: // if there is an explicit comment, then use it, else use what's in the current header michael@0: if(comment!=NULL) { michael@0: /* get the header size minus the current comment */ michael@0: DataHeader *pHeader; michael@0: int32_t length; michael@0: michael@0: pHeader=(DataHeader *)header; michael@0: headerLength=4+pHeader->info.size; michael@0: length=(int32_t)strlen(comment); michael@0: if((int32_t)(headerLength+length)>=(int32_t)sizeof(header)) { michael@0: fprintf(stderr, "icupkg: comment too long\n"); michael@0: exit(U_BUFFER_OVERFLOW_ERROR); michael@0: } michael@0: memcpy(header+headerLength, comment, length+1); michael@0: headerLength+=length; michael@0: if(headerLength&0xf) { michael@0: /* NUL-pad the header to a multiple of 16 */ michael@0: length=(headerLength+0xf)&~0xf; michael@0: memset(header+headerLength, 0, length-headerLength); michael@0: headerLength=length; michael@0: } michael@0: pHeader->dataHeader.headerSize=(uint16_t)headerLength; michael@0: } michael@0: michael@0: makeTypeProps(outType, outCharset, outIsBigEndian); michael@0: michael@0: // open (TYPE_COUNT-2) swappers michael@0: // one is a no-op for local type==outType michael@0: // one type (TYPE_LE) is bogus michael@0: errorCode=U_ZERO_ERROR; michael@0: i=makeTypeEnum(outType); michael@0: ds[TYPE_B]= i==TYPE_B ? NULL : udata_openSwapper(TRUE, U_ASCII_FAMILY, outIsBigEndian, outCharset, &errorCode); michael@0: ds[TYPE_L]= i==TYPE_L ? NULL : udata_openSwapper(FALSE, U_ASCII_FAMILY, outIsBigEndian, outCharset, &errorCode); michael@0: ds[TYPE_LE]=NULL; michael@0: ds[TYPE_E]= i==TYPE_E ? NULL : udata_openSwapper(TRUE, U_EBCDIC_FAMILY, outIsBigEndian, outCharset, &errorCode); michael@0: if(U_FAILURE(errorCode)) { michael@0: fprintf(stderr, "icupkg: udata_openSwapper() failed - %s\n", u_errorName(errorCode)); michael@0: exit(errorCode); michael@0: } michael@0: for(i=0; iprintError=printPackageError; michael@0: ds[i]->printErrorContext=stderr; michael@0: } michael@0: } michael@0: michael@0: dsLocalToOut=ds[makeTypeEnum(U_CHARSET_FAMILY, U_IS_BIG_ENDIAN)]; michael@0: michael@0: // create the file and write its contents michael@0: file=fopen(filename, "wb"); michael@0: if(file==NULL) { michael@0: fprintf(stderr, "icupkg: unable to create file \"%s\"\n", filename); michael@0: exit(U_FILE_ACCESS_ERROR); michael@0: } michael@0: michael@0: // swap and write the header michael@0: if(dsLocalToOut!=NULL) { michael@0: udata_swapDataHeader(dsLocalToOut, header, headerLength, header, &errorCode); michael@0: if(U_FAILURE(errorCode)) { michael@0: fprintf(stderr, "icupkg: udata_swapDataHeader(local to out) failed - %s\n", u_errorName(errorCode)); michael@0: exit(errorCode); michael@0: } michael@0: } michael@0: length=(int32_t)fwrite(header, 1, headerLength, file); michael@0: if(length!=headerLength) { michael@0: fprintf(stderr, "icupkg: unable to write complete header to file \"%s\"\n", filename); michael@0: exit(U_FILE_ACCESS_ERROR); michael@0: } michael@0: michael@0: // prepare and swap the package name with a tree separator michael@0: // for prepending to item names michael@0: if(pkgPrefix[0]==0) { michael@0: prefixLength=(int32_t)strlen(prefix); michael@0: } else { michael@0: prefixLength=(int32_t)strlen(pkgPrefix); michael@0: memcpy(prefix, pkgPrefix, prefixLength); michael@0: if(prefixEndsWithType) { michael@0: prefix[prefixLength-1]=outType; michael@0: } michael@0: } michael@0: prefix[prefixLength++]=U_TREE_ENTRY_SEP_CHAR; michael@0: prefix[prefixLength]=0; michael@0: if(dsLocalToOut!=NULL) { michael@0: dsLocalToOut->swapInvChars(dsLocalToOut, prefix, prefixLength, prefix, &errorCode); michael@0: if(U_FAILURE(errorCode)) { michael@0: fprintf(stderr, "icupkg: swapInvChars(output package name) failed - %s\n", u_errorName(errorCode)); michael@0: exit(errorCode); michael@0: } michael@0: michael@0: // swap and sort the item names (sorting needs to be done in the output charset) michael@0: dsLocalToOut->swapInvChars(dsLocalToOut, inStrings, inStringTop, inStrings, &errorCode); michael@0: if(U_FAILURE(errorCode)) { michael@0: fprintf(stderr, "icupkg: swapInvChars(item names) failed - %s\n", u_errorName(errorCode)); michael@0: exit(errorCode); michael@0: } michael@0: sortItems(); michael@0: } michael@0: michael@0: // create the output item names in sorted order, with the package name prepended to each michael@0: for(i=0; iswapArray32(dsLocalToOut, &outInt32, 4, &outInt32, &errorCode); michael@0: if(U_FAILURE(errorCode)) { michael@0: fprintf(stderr, "icupkg: swapArray32(item count) failed - %s\n", u_errorName(errorCode)); michael@0: exit(errorCode); michael@0: } michael@0: } michael@0: length=(int32_t)fwrite(&outInt32, 1, 4, file); michael@0: if(length!=4) { michael@0: fprintf(stderr, "icupkg: unable to write complete item count to file \"%s\"\n", filename); michael@0: exit(U_FILE_ACCESS_ERROR); michael@0: } michael@0: michael@0: // then write the item entries (and collect the maxItemLength) michael@0: maxItemLength=0; michael@0: for(i=0; iswapArray32(dsLocalToOut, &entry, 8, &entry, &errorCode); michael@0: if(U_FAILURE(errorCode)) { michael@0: fprintf(stderr, "icupkg: swapArray32(item entry %ld) failed - %s\n", (long)i, u_errorName(errorCode)); michael@0: exit(errorCode); michael@0: } michael@0: } michael@0: length=(int32_t)fwrite(&entry, 1, 8, file); michael@0: if(length!=8) { michael@0: fprintf(stderr, "icupkg: unable to write complete item entry %ld to file \"%s\"\n", (long)i, filename); michael@0: exit(U_FILE_ACCESS_ERROR); michael@0: } michael@0: michael@0: length=items[i].length; michael@0: if(length>maxItemLength) { michael@0: maxItemLength=length; michael@0: } michael@0: offset+=length; michael@0: } michael@0: michael@0: // write the item names michael@0: length=(int32_t)fwrite(outStrings, 1, outStringTop, file); michael@0: if(length!=outStringTop) { michael@0: fprintf(stderr, "icupkg: unable to write complete item names to file \"%s\"\n", filename); michael@0: exit(U_FILE_ACCESS_ERROR); michael@0: } michael@0: michael@0: // write the items michael@0: for(pItem=items, i=0; itype); michael@0: if(ds[type]!=NULL) { michael@0: // swap each item from its platform properties to the desired ones michael@0: udata_swap( michael@0: ds[type], michael@0: pItem->data, pItem->length, pItem->data, michael@0: &errorCode); michael@0: if(U_FAILURE(errorCode)) { michael@0: fprintf(stderr, "icupkg: udata_swap(item %ld) failed - %s\n", (long)i, u_errorName(errorCode)); michael@0: exit(errorCode); michael@0: } michael@0: } michael@0: length=(int32_t)fwrite(pItem->data, 1, pItem->length, file); michael@0: if(length!=pItem->length) { michael@0: fprintf(stderr, "icupkg: unable to write complete item %ld to file \"%s\"\n", (long)i, filename); michael@0: exit(U_FILE_ACCESS_ERROR); michael@0: } michael@0: } michael@0: michael@0: if(ferror(file)) { michael@0: fprintf(stderr, "icupkg: unable to write complete file \"%s\"\n", filename); michael@0: exit(U_FILE_ACCESS_ERROR); michael@0: } michael@0: michael@0: fclose(file); michael@0: for(i=0; i=0) { michael@0: result=strncmp(name, items[i].name, length); michael@0: } else { michael@0: result=strcmp(name, items[i].name); michael@0: } michael@0: michael@0: if(result==0) { michael@0: /* found */ michael@0: if(length>=0) { michael@0: /* michael@0: * if we compared just prefixes, then we may need to back up michael@0: * to the first item with this prefix michael@0: */ michael@0: while(i>0 && 0==strncmp(name, items[i-1].name, length)) { michael@0: --i; michael@0: } michael@0: } michael@0: return i; michael@0: } else if(result<0) { michael@0: limit=i; michael@0: } else /* result>0 */ { michael@0: start=i+1; michael@0: } michael@0: } michael@0: michael@0: return ~start; /* not found, return binary-not of the insertion point */ michael@0: } michael@0: michael@0: void michael@0: Package::findItems(const char *pattern) { michael@0: const char *wild; michael@0: michael@0: if(pattern==NULL || *pattern==0) { michael@0: findNextIndex=-1; michael@0: return; michael@0: } michael@0: michael@0: findPrefix=pattern; michael@0: findSuffix=NULL; michael@0: findSuffixLength=0; michael@0: michael@0: wild=strchr(pattern, '*'); michael@0: if(wild==NULL) { michael@0: // no wildcard michael@0: findPrefixLength=(int32_t)strlen(pattern); michael@0: } else { michael@0: // one wildcard michael@0: findPrefixLength=(int32_t)(wild-pattern); michael@0: findSuffix=wild+1; michael@0: findSuffixLength=(int32_t)strlen(findSuffix); michael@0: if(NULL!=strchr(findSuffix, '*')) { michael@0: // two or more wildcards michael@0: fprintf(stderr, "icupkg: syntax error (more than one '*') in item pattern \"%s\"\n", pattern); michael@0: exit(U_PARSE_ERROR); michael@0: } michael@0: } michael@0: michael@0: if(findPrefixLength==0) { michael@0: findNextIndex=0; michael@0: } else { michael@0: findNextIndex=findItem(findPrefix, findPrefixLength); michael@0: } michael@0: } michael@0: michael@0: int32_t michael@0: Package::findNextItem() { michael@0: const char *name, *middle, *treeSep; michael@0: int32_t idx, nameLength, middleLength; michael@0: michael@0: if(findNextIndex<0) { michael@0: return -1; michael@0: } michael@0: michael@0: while(findNextIndex0 && 0!=memcmp(findPrefix, name, findPrefixLength)) { michael@0: // left the range of names with this prefix michael@0: break; michael@0: } michael@0: middle=name+findPrefixLength; michael@0: middleLength=nameLength-findPrefixLength-findSuffixLength; michael@0: if(findSuffixLength>0 && 0!=memcmp(findSuffix, name+(nameLength-findSuffixLength), findSuffixLength)) { michael@0: // suffix does not match michael@0: continue; michael@0: } michael@0: // prefix & suffix match michael@0: michael@0: if(matchMode&MATCH_NOSLASH) { michael@0: treeSep=strchr(middle, U_TREE_ENTRY_SEP_CHAR); michael@0: if(treeSep!=NULL && (treeSep-middle)name, pItem->data, pItem->length, FALSE, pItem->type); michael@0: } michael@0: } michael@0: michael@0: void michael@0: Package::removeItem(int32_t idx) { michael@0: if(idx>=0) { michael@0: // remove the item michael@0: if(items[idx].isDataOwned) { michael@0: free(items[idx].data); michael@0: } michael@0: michael@0: // move the following items up michael@0: if((idx+1)=0) { michael@0: removeItem(idx); michael@0: } michael@0: } michael@0: michael@0: void michael@0: Package::removeItems(const Package &listPkg) { michael@0: const Item *pItem; michael@0: int32_t i; michael@0: michael@0: for(pItem=listPkg.items, i=0; iname); michael@0: } michael@0: } michael@0: michael@0: void michael@0: Package::extractItem(const char *filesPath, const char *outName, int32_t idx, char outType) { michael@0: char filename[1024]; michael@0: UDataSwapper *ds; michael@0: FILE *file; michael@0: Item *pItem; michael@0: int32_t fileLength; michael@0: uint8_t itemCharset, outCharset; michael@0: UBool itemIsBigEndian, outIsBigEndian; michael@0: michael@0: if(idx<0 || itemCount<=idx) { michael@0: return; michael@0: } michael@0: pItem=items+idx; michael@0: michael@0: // swap the data to the outType michael@0: // outType==0: don't swap michael@0: if(outType!=0 && pItem->type!=outType) { michael@0: // open the swapper michael@0: UErrorCode errorCode=U_ZERO_ERROR; michael@0: makeTypeProps(pItem->type, itemCharset, itemIsBigEndian); michael@0: makeTypeProps(outType, outCharset, outIsBigEndian); michael@0: ds=udata_openSwapper(itemIsBigEndian, itemCharset, outIsBigEndian, outCharset, &errorCode); michael@0: if(U_FAILURE(errorCode)) { michael@0: fprintf(stderr, "icupkg: udata_openSwapper(item %ld) failed - %s\n", michael@0: (long)idx, u_errorName(errorCode)); michael@0: exit(errorCode); michael@0: } michael@0: michael@0: ds->printError=printPackageError; michael@0: ds->printErrorContext=stderr; michael@0: michael@0: // swap the item from its platform properties to the desired ones michael@0: udata_swap(ds, pItem->data, pItem->length, pItem->data, &errorCode); michael@0: if(U_FAILURE(errorCode)) { michael@0: fprintf(stderr, "icupkg: udata_swap(item %ld) failed - %s\n", (long)idx, u_errorName(errorCode)); michael@0: exit(errorCode); michael@0: } michael@0: udata_closeSwapper(ds); michael@0: pItem->type=outType; michael@0: } michael@0: michael@0: // create the file and write its contents michael@0: makeFullFilenameAndDirs(filesPath, outName, filename, (int32_t)sizeof(filename)); michael@0: file=fopen(filename, "wb"); michael@0: if(file==NULL) { michael@0: fprintf(stderr, "icupkg: unable to create file \"%s\"\n", filename); michael@0: exit(U_FILE_ACCESS_ERROR); michael@0: } michael@0: fileLength=(int32_t)fwrite(pItem->data, 1, pItem->length, file); michael@0: michael@0: if(ferror(file) || fileLength!=pItem->length) { michael@0: fprintf(stderr, "icupkg: unable to write complete file \"%s\"\n", filename); michael@0: exit(U_FILE_ACCESS_ERROR); michael@0: } michael@0: fclose(file); michael@0: } michael@0: michael@0: void michael@0: Package::extractItem(const char *filesPath, int32_t idx, char outType) { michael@0: extractItem(filesPath, items[idx].name, idx, outType); michael@0: } michael@0: michael@0: void michael@0: Package::extractItems(const char *filesPath, const char *pattern, char outType) { michael@0: int32_t idx; michael@0: michael@0: findItems(pattern); michael@0: while((idx=findNextItem())>=0) { michael@0: extractItem(filesPath, idx, outType); michael@0: } michael@0: } michael@0: michael@0: void michael@0: Package::extractItems(const char *filesPath, const Package &listPkg, char outType) { michael@0: const Item *pItem; michael@0: int32_t i; michael@0: michael@0: for(pItem=listPkg.items, i=0; iname, outType); michael@0: } michael@0: } michael@0: michael@0: int32_t michael@0: Package::getItemCount() const { michael@0: return itemCount; michael@0: } michael@0: michael@0: const Item * michael@0: Package::getItem(int32_t idx) const { michael@0: if (0 <= idx && idx < itemCount) { michael@0: return &items[idx]; michael@0: } michael@0: return NULL; michael@0: } michael@0: michael@0: void michael@0: Package::checkDependency(void *context, const char *itemName, const char *targetName) { michael@0: // check dependency: make sure the target item is in the package michael@0: Package *me=(Package *)context; michael@0: if(me->findItem(targetName)<0) { michael@0: me->isMissingItems=TRUE; michael@0: fprintf(stderr, "Item %s depends on missing item %s\n", itemName, targetName); michael@0: } michael@0: } michael@0: michael@0: UBool michael@0: Package::checkDependencies() { michael@0: isMissingItems=FALSE; michael@0: enumDependencies(this, checkDependency); michael@0: return (UBool)!isMissingItems; michael@0: } michael@0: michael@0: void michael@0: Package::enumDependencies(void *context, CheckDependency check) { michael@0: int32_t i; michael@0: michael@0: for(i=0; iSTRING_STORE_SIZE) { michael@0: fprintf(stderr, "icupkg: string storage overflow\n"); michael@0: exit(U_BUFFER_OVERFLOW_ERROR); michael@0: } michael@0: if(in) { michael@0: inStringTop=top; michael@0: } else { michael@0: outStringTop=top; michael@0: } michael@0: return p; michael@0: } michael@0: michael@0: void michael@0: Package::sortItems() { michael@0: UErrorCode errorCode=U_ZERO_ERROR; michael@0: uprv_sortArray(items, itemCount, (int32_t)sizeof(Item), compareItems, NULL, FALSE, &errorCode); michael@0: if(U_FAILURE(errorCode)) { michael@0: fprintf(stderr, "icupkg: sorting item names failed - %s\n", u_errorName(errorCode)); michael@0: exit(errorCode); michael@0: } michael@0: } michael@0: michael@0: void Package::setItemCapacity(int32_t max) michael@0: { michael@0: if(max<=itemMax) { michael@0: return; michael@0: } michael@0: Item *newItems = (Item*)uprv_malloc(max * sizeof(items[0])); michael@0: Item *oldItems = items; michael@0: if(newItems == NULL) { michael@0: fprintf(stderr, "icupkg: Out of memory trying to allocate %lu bytes for %d items\n", michael@0: (unsigned long)max*sizeof(items[0]), max); michael@0: exit(U_MEMORY_ALLOCATION_ERROR); michael@0: } michael@0: if(items && itemCount>0) { michael@0: uprv_memcpy(newItems, items, itemCount*sizeof(items[0])); michael@0: } michael@0: itemMax = max; michael@0: items = newItems; michael@0: uprv_free(oldItems); michael@0: } michael@0: michael@0: void Package::ensureItemCapacity() michael@0: { michael@0: if((itemCount+1)>itemMax) { michael@0: setItemCapacity(itemCount+kItemsChunk); michael@0: } michael@0: } michael@0: michael@0: U_NAMESPACE_END