diff -r 000000000000 -r 6474c204b198 intl/icu/source/tools/toolutil/pkg_gencmn.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/intl/icu/source/tools/toolutil/pkg_gencmn.c Wed Dec 31 06:09:35 2014 +0100 @@ -0,0 +1,571 @@ +/****************************************************************************** + * Copyright (C) 2008-2012, International Business Machines + * Corporation and others. All Rights Reserved. + ******************************************************************************* + */ +#include "unicode/utypes.h" + +#include +#include +#include "unicode/utypes.h" +#include "unicode/putil.h" +#include "cmemory.h" +#include "cstring.h" +#include "filestrm.h" +#include "toolutil.h" +#include "unicode/uclean.h" +#include "unewdata.h" +#include "putilimp.h" +#include "pkg_gencmn.h" + +#define STRING_STORE_SIZE 200000 + +#define COMMON_DATA_NAME U_ICUDATA_NAME +#define DATA_TYPE "dat" + +/* ICU package data file format (.dat files) ------------------------------- *** + +Description of the data format after the usual ICU data file header +(UDataInfo etc.). + +Format version 1 + +A .dat package file contains a simple Table of Contents of item names, +followed by the items themselves: + +1. ToC table + +uint32_t count; - number of items +UDataOffsetTOCEntry entry[count]; - pair of uint32_t values per item: + uint32_t nameOffset; - offset of the item name + uint32_t dataOffset; - offset of the item data +both are byte offsets from the beginning of the data + +2. item name strings + +All item names are stored as char * strings in one block between the ToC table +and the data items. + +3. data items + +The data items are stored following the item names block. +Each data item is 16-aligned. +The data items are stored in the sorted order of their names. + +Therefore, the top of the name strings block is the offset of the first item, +the length of the last item is the difference between its offset and +the .dat file length, and the length of all previous items is the difference +between its offset and the next one. + +----------------------------------------------------------------------------- */ + +/* UDataInfo cf. udata.h */ +static const UDataInfo dataInfo={ + sizeof(UDataInfo), + 0, + + U_IS_BIG_ENDIAN, + U_CHARSET_FAMILY, + sizeof(UChar), + 0, + + {0x43, 0x6d, 0x6e, 0x44}, /* dataFormat="CmnD" */ + {1, 0, 0, 0}, /* formatVersion */ + {3, 0, 0, 0} /* dataVersion */ +}; + +static uint32_t maxSize; + +static char stringStore[STRING_STORE_SIZE]; +static uint32_t stringTop=0, basenameTotal=0; + +typedef struct { + char *pathname, *basename; + uint32_t basenameLength, basenameOffset, fileSize, fileOffset; +} File; + +#define CHUNK_FILE_COUNT 256 +static File *files = NULL; +static uint32_t fileCount=0; +static uint32_t fileMax = 0; + + +static char *symPrefix = NULL; + +#define LINE_BUFFER_SIZE 512 +/* prototypes --------------------------------------------------------------- */ + +static void +addFile(const char *filename, const char *name, const char *source, UBool sourceTOC, UBool verbose); + +static char * +allocString(uint32_t length); + +static int +compareFiles(const void *file1, const void *file2); + +static char * +pathToFullPath(const char *path, const char *source); + +/* map non-tree separator (such as '\') to tree separator ('/') inplace. */ +static void +fixDirToTreePath(char *s); +/* -------------------------------------------------------------------------- */ + +U_CAPI void U_EXPORT2 +createCommonDataFile(const char *destDir, const char *name, const char *entrypointName, const char *type, const char *source, const char *copyRight, + const char *dataFile, uint32_t max_size, UBool sourceTOC, UBool verbose, char *gencmnFileName) { + static char buffer[4096]; + char *line; + char *linePtr; + char *s = NULL; + UErrorCode errorCode=U_ZERO_ERROR; + uint32_t i, fileOffset, basenameOffset, length, nread; + FileStream *in, *file; + + line = (char *)uprv_malloc(sizeof(char) * LINE_BUFFER_SIZE); + if (line == NULL) { + fprintf(stderr, "gencmn: unable to allocate memory for line buffer of size %d\n", LINE_BUFFER_SIZE); + exit(U_MEMORY_ALLOCATION_ERROR); + } + + linePtr = line; + + maxSize = max_size; + + if (destDir == NULL) { + destDir = u_getDataDirectory(); + } + if (name == NULL) { + name = COMMON_DATA_NAME; + } + if (type == NULL) { + type = DATA_TYPE; + } + if (source == NULL) { + source = "."; + } + + if (dataFile == NULL) { + in = T_FileStream_stdin(); + } else { + in = T_FileStream_open(dataFile, "r"); + if(in == NULL) { + fprintf(stderr, "gencmn: unable to open input file %s\n", dataFile); + exit(U_FILE_ACCESS_ERROR); + } + } + + if (verbose) { + if(sourceTOC) { + printf("generating %s_%s.c (table of contents source file)\n", name, type); + } else { + printf("generating %s.%s (common data file with table of contents)\n", name, type); + } + } + + /* read the list of files and get their lengths */ + while((s != NULL && *s != 0) || (s=T_FileStream_readLine(in, (line=linePtr), + LINE_BUFFER_SIZE))!=NULL) { + /* remove trailing newline characters and parse space separated items */ + if (s != NULL && *s != 0) { + line=s; + } else { + s=line; + } + while(*s!=0) { + if(*s==' ') { + *s=0; + ++s; + break; + } else if(*s=='\r' || *s=='\n') { + *s=0; + break; + } + ++s; + } + + /* check for comment */ + + if (*line == '#') { + continue; + } + + /* add the file */ +#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) + { + char *t; + while((t = uprv_strchr(line,U_FILE_ALT_SEP_CHAR))) { + *t = U_FILE_SEP_CHAR; + } + } +#endif + addFile(getLongPathname(line), name, source, sourceTOC, verbose); + } + + uprv_free(linePtr); + + if(in!=T_FileStream_stdin()) { + T_FileStream_close(in); + } + + if(fileCount==0) { + fprintf(stderr, "gencmn: no files listed in %s\n", dataFile == NULL ? "" : dataFile); + return; + } + + /* sort the files by basename */ + qsort(files, fileCount, sizeof(File), compareFiles); + + if(!sourceTOC) { + UNewDataMemory *out; + + /* determine the offsets of all basenames and files in this common one */ + basenameOffset=4+8*fileCount; + fileOffset=(basenameOffset+(basenameTotal+15))&~0xf; + for(i=0; ifilename && *(s-1)!=U_FILE_SEP_CHAR) { + *s++=U_FILE_SEP_CHAR; + } + uprv_strcpy(s, name); + if(*(type)!=0) { + s+=uprv_strlen(s); + *s++='_'; + uprv_strcpy(s, type); + } + s+=uprv_strlen(s); + uprv_strcpy(s, ".c"); + + /* open the output file */ + out=T_FileStream_open(filename, "w"); + if (gencmnFileName != NULL) { + uprv_strcpy(gencmnFileName, filename); + } + if(out==NULL) { + fprintf(stderr, "gencmn: unable to open .c output file %s\n", filename); + exit(U_FILE_ACCESS_ERROR); + } + + /* write the source file */ + sprintf(buffer, + "/*\n" + " * ICU common data table of contents for %s.%s\n" + " * Automatically generated by icu/source/tools/gencmn/gencmn .\n" + " */\n\n" + "#include \"unicode/utypes.h\"\n" + "#include \"unicode/udata.h\"\n" + "\n" + "/* external symbol declarations for data (%d files) */\n", + name, type, fileCount); + T_FileStream_writeLine(out, buffer); + + sprintf(buffer, "extern const char\n %s%s[]", symPrefix?symPrefix:"", files[0].pathname); + T_FileStream_writeLine(out, buffer); + for(i=1; imaxSize) { + if (verbose) { + printf("%s ignored (size %ld > %ld)\n", fullPath, (long)length, (long)maxSize); + } + return; + } + files[fileCount].fileSize=length; + } else { + char *t; + /* get and store the basename */ + /* need to include the package name */ + length = (uint32_t)(uprv_strlen(filename) + 1 + uprv_strlen(name) + 1); + s=allocString(length); + uprv_strcpy(s, name); + uprv_strcat(s, U_TREE_ENTRY_SEP_STRING); + uprv_strcat(s, filename); + fixDirToTreePath(s); + files[fileCount].basename=s; + /* turn the basename into an entry point name and store in the pathname field */ + t=files[fileCount].pathname=allocString(length); + while(--length>0) { + if(*s=='.' || *s=='-' || *s=='/') { + *t='_'; + } else { + *t=*s; + } + ++s; + ++t; + } + *t=0; + } + ++fileCount; +} + +static char * +allocString(uint32_t length) { + uint32_t top=stringTop+length; + char *p; + + if(top>STRING_STORE_SIZE) { + fprintf(stderr, "gencmn: out of memory\n"); + exit(U_MEMORY_ALLOCATION_ERROR); + } + p=stringStore+stringTop; + stringTop=top; + return p; +} + +static char * +pathToFullPath(const char *path, const char *source) { + int32_t length; + int32_t newLength; + char *fullPath; + int32_t n; + + length = (uint32_t)(uprv_strlen(path) + 1); + newLength = (length + 1 + (int32_t)uprv_strlen(source)); + fullPath = uprv_malloc(newLength); + if(source != NULL) { + uprv_strcpy(fullPath, source); + uprv_strcat(fullPath, U_FILE_SEP_STRING); + } else { + fullPath[0] = 0; + } + n = (int32_t)uprv_strlen(fullPath); + fullPath[n] = 0; /* Suppress compiler warning for unused variable n */ + /* when conditional code below is not compiled. */ + uprv_strcat(fullPath, path); + +#if (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) +#if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) + /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */ + for(;fullPath[n];n++) { + if(fullPath[n] == U_FILE_ALT_SEP_CHAR) { + fullPath[n] = U_FILE_SEP_CHAR; + } + } +#endif +#endif +#if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) + /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */ + for(;fullPath[n];n++) { + if(fullPath[n] == U_TREE_ENTRY_SEP_CHAR) { + fullPath[n] = U_FILE_SEP_CHAR; + } + } +#endif + return fullPath; +} + +static int +compareFiles(const void *file1, const void *file2) { + /* sort by basename */ + return uprv_strcmp(((File *)file1)->basename, ((File *)file2)->basename); +} + +static void +fixDirToTreePath(char *s) +{ +#if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) || ((U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)) + char *t; +#endif +#if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) + for(t=s;t=uprv_strchr(t,U_FILE_SEP_CHAR);) { + *t = U_TREE_ENTRY_SEP_CHAR; + } +#endif +#if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) + for(t=s;t=uprv_strchr(t,U_FILE_ALT_SEP_CHAR);) { + *t = U_TREE_ENTRY_SEP_CHAR; + } +#endif +}