michael@0: /****************************************************************************** michael@0: * Copyright (C) 2008-2012, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: ******************************************************************************* michael@0: */ michael@0: #include "unicode/utypes.h" michael@0: michael@0: #include michael@0: #include michael@0: #include "unicode/utypes.h" michael@0: #include "unicode/putil.h" michael@0: #include "cmemory.h" michael@0: #include "cstring.h" michael@0: #include "filestrm.h" michael@0: #include "toolutil.h" michael@0: #include "unicode/uclean.h" michael@0: #include "unewdata.h" michael@0: #include "putilimp.h" michael@0: #include "pkg_gencmn.h" michael@0: michael@0: #define STRING_STORE_SIZE 200000 michael@0: michael@0: #define COMMON_DATA_NAME U_ICUDATA_NAME michael@0: #define DATA_TYPE "dat" michael@0: michael@0: /* ICU package data file format (.dat files) ------------------------------- *** michael@0: michael@0: Description of the data format after the usual ICU data file header michael@0: (UDataInfo etc.). michael@0: michael@0: Format version 1 michael@0: michael@0: A .dat package file contains a simple Table of Contents of item names, michael@0: followed by the items themselves: michael@0: michael@0: 1. ToC table michael@0: michael@0: uint32_t count; - number of items michael@0: UDataOffsetTOCEntry entry[count]; - pair of uint32_t values per item: michael@0: uint32_t nameOffset; - offset of the item name michael@0: uint32_t dataOffset; - offset of the item data michael@0: both are byte offsets from the beginning of the data michael@0: michael@0: 2. item name strings michael@0: michael@0: All item names are stored as char * strings in one block between the ToC table michael@0: and the data items. michael@0: michael@0: 3. data items michael@0: michael@0: The data items are stored following the item names block. michael@0: Each data item is 16-aligned. michael@0: The data items are stored in the sorted order of their names. michael@0: michael@0: Therefore, the top of the name strings block is the offset of the first item, michael@0: the length of the last item is the difference between its offset and michael@0: the .dat file length, and the length of all previous items is the difference michael@0: between its offset and the next one. michael@0: michael@0: ----------------------------------------------------------------------------- */ michael@0: michael@0: /* UDataInfo cf. udata.h */ michael@0: static const UDataInfo dataInfo={ michael@0: sizeof(UDataInfo), michael@0: 0, michael@0: michael@0: U_IS_BIG_ENDIAN, michael@0: U_CHARSET_FAMILY, michael@0: sizeof(UChar), michael@0: 0, michael@0: michael@0: {0x43, 0x6d, 0x6e, 0x44}, /* dataFormat="CmnD" */ michael@0: {1, 0, 0, 0}, /* formatVersion */ michael@0: {3, 0, 0, 0} /* dataVersion */ michael@0: }; michael@0: michael@0: static uint32_t maxSize; michael@0: michael@0: static char stringStore[STRING_STORE_SIZE]; michael@0: static uint32_t stringTop=0, basenameTotal=0; michael@0: michael@0: typedef struct { michael@0: char *pathname, *basename; michael@0: uint32_t basenameLength, basenameOffset, fileSize, fileOffset; michael@0: } File; michael@0: michael@0: #define CHUNK_FILE_COUNT 256 michael@0: static File *files = NULL; michael@0: static uint32_t fileCount=0; michael@0: static uint32_t fileMax = 0; michael@0: michael@0: michael@0: static char *symPrefix = NULL; michael@0: michael@0: #define LINE_BUFFER_SIZE 512 michael@0: /* prototypes --------------------------------------------------------------- */ michael@0: michael@0: static void michael@0: addFile(const char *filename, const char *name, const char *source, UBool sourceTOC, UBool verbose); michael@0: michael@0: static char * michael@0: allocString(uint32_t length); michael@0: michael@0: static int michael@0: compareFiles(const void *file1, const void *file2); michael@0: michael@0: static char * michael@0: pathToFullPath(const char *path, const char *source); michael@0: michael@0: /* map non-tree separator (such as '\') to tree separator ('/') inplace. */ michael@0: static void michael@0: fixDirToTreePath(char *s); michael@0: /* -------------------------------------------------------------------------- */ michael@0: michael@0: U_CAPI void U_EXPORT2 michael@0: createCommonDataFile(const char *destDir, const char *name, const char *entrypointName, const char *type, const char *source, const char *copyRight, michael@0: const char *dataFile, uint32_t max_size, UBool sourceTOC, UBool verbose, char *gencmnFileName) { michael@0: static char buffer[4096]; michael@0: char *line; michael@0: char *linePtr; michael@0: char *s = NULL; michael@0: UErrorCode errorCode=U_ZERO_ERROR; michael@0: uint32_t i, fileOffset, basenameOffset, length, nread; michael@0: FileStream *in, *file; michael@0: michael@0: line = (char *)uprv_malloc(sizeof(char) * LINE_BUFFER_SIZE); michael@0: if (line == NULL) { michael@0: fprintf(stderr, "gencmn: unable to allocate memory for line buffer of size %d\n", LINE_BUFFER_SIZE); michael@0: exit(U_MEMORY_ALLOCATION_ERROR); michael@0: } michael@0: michael@0: linePtr = line; michael@0: michael@0: maxSize = max_size; michael@0: michael@0: if (destDir == NULL) { michael@0: destDir = u_getDataDirectory(); michael@0: } michael@0: if (name == NULL) { michael@0: name = COMMON_DATA_NAME; michael@0: } michael@0: if (type == NULL) { michael@0: type = DATA_TYPE; michael@0: } michael@0: if (source == NULL) { michael@0: source = "."; michael@0: } michael@0: michael@0: if (dataFile == NULL) { michael@0: in = T_FileStream_stdin(); michael@0: } else { michael@0: in = T_FileStream_open(dataFile, "r"); michael@0: if(in == NULL) { michael@0: fprintf(stderr, "gencmn: unable to open input file %s\n", dataFile); michael@0: exit(U_FILE_ACCESS_ERROR); michael@0: } michael@0: } michael@0: michael@0: if (verbose) { michael@0: if(sourceTOC) { michael@0: printf("generating %s_%s.c (table of contents source file)\n", name, type); michael@0: } else { michael@0: printf("generating %s.%s (common data file with table of contents)\n", name, type); michael@0: } michael@0: } michael@0: michael@0: /* read the list of files and get their lengths */ michael@0: while((s != NULL && *s != 0) || (s=T_FileStream_readLine(in, (line=linePtr), michael@0: LINE_BUFFER_SIZE))!=NULL) { michael@0: /* remove trailing newline characters and parse space separated items */ michael@0: if (s != NULL && *s != 0) { michael@0: line=s; michael@0: } else { michael@0: s=line; michael@0: } michael@0: while(*s!=0) { michael@0: if(*s==' ') { michael@0: *s=0; michael@0: ++s; michael@0: break; michael@0: } else if(*s=='\r' || *s=='\n') { michael@0: *s=0; michael@0: break; michael@0: } michael@0: ++s; michael@0: } michael@0: michael@0: /* check for comment */ michael@0: michael@0: if (*line == '#') { michael@0: continue; michael@0: } michael@0: michael@0: /* add the file */ michael@0: #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) michael@0: { michael@0: char *t; michael@0: while((t = uprv_strchr(line,U_FILE_ALT_SEP_CHAR))) { michael@0: *t = U_FILE_SEP_CHAR; michael@0: } michael@0: } michael@0: #endif michael@0: addFile(getLongPathname(line), name, source, sourceTOC, verbose); michael@0: } michael@0: michael@0: uprv_free(linePtr); michael@0: michael@0: if(in!=T_FileStream_stdin()) { michael@0: T_FileStream_close(in); michael@0: } michael@0: michael@0: if(fileCount==0) { michael@0: fprintf(stderr, "gencmn: no files listed in %s\n", dataFile == NULL ? "" : dataFile); michael@0: return; michael@0: } michael@0: michael@0: /* sort the files by basename */ michael@0: qsort(files, fileCount, sizeof(File), compareFiles); michael@0: michael@0: if(!sourceTOC) { michael@0: UNewDataMemory *out; michael@0: michael@0: /* determine the offsets of all basenames and files in this common one */ michael@0: basenameOffset=4+8*fileCount; michael@0: fileOffset=(basenameOffset+(basenameTotal+15))&~0xf; michael@0: for(i=0; ifilename && *(s-1)!=U_FILE_SEP_CHAR) { michael@0: *s++=U_FILE_SEP_CHAR; michael@0: } michael@0: uprv_strcpy(s, name); michael@0: if(*(type)!=0) { michael@0: s+=uprv_strlen(s); michael@0: *s++='_'; michael@0: uprv_strcpy(s, type); michael@0: } michael@0: s+=uprv_strlen(s); michael@0: uprv_strcpy(s, ".c"); michael@0: michael@0: /* open the output file */ michael@0: out=T_FileStream_open(filename, "w"); michael@0: if (gencmnFileName != NULL) { michael@0: uprv_strcpy(gencmnFileName, filename); michael@0: } michael@0: if(out==NULL) { michael@0: fprintf(stderr, "gencmn: unable to open .c output file %s\n", filename); michael@0: exit(U_FILE_ACCESS_ERROR); michael@0: } michael@0: michael@0: /* write the source file */ michael@0: sprintf(buffer, michael@0: "/*\n" michael@0: " * ICU common data table of contents for %s.%s\n" michael@0: " * Automatically generated by icu/source/tools/gencmn/gencmn .\n" michael@0: " */\n\n" michael@0: "#include \"unicode/utypes.h\"\n" michael@0: "#include \"unicode/udata.h\"\n" michael@0: "\n" michael@0: "/* external symbol declarations for data (%d files) */\n", michael@0: name, type, fileCount); michael@0: T_FileStream_writeLine(out, buffer); michael@0: michael@0: sprintf(buffer, "extern const char\n %s%s[]", symPrefix?symPrefix:"", files[0].pathname); michael@0: T_FileStream_writeLine(out, buffer); michael@0: for(i=1; imaxSize) { michael@0: if (verbose) { michael@0: printf("%s ignored (size %ld > %ld)\n", fullPath, (long)length, (long)maxSize); michael@0: } michael@0: return; michael@0: } michael@0: files[fileCount].fileSize=length; michael@0: } else { michael@0: char *t; michael@0: /* get and store the basename */ michael@0: /* need to include the package name */ michael@0: length = (uint32_t)(uprv_strlen(filename) + 1 + uprv_strlen(name) + 1); michael@0: s=allocString(length); michael@0: uprv_strcpy(s, name); michael@0: uprv_strcat(s, U_TREE_ENTRY_SEP_STRING); michael@0: uprv_strcat(s, filename); michael@0: fixDirToTreePath(s); michael@0: files[fileCount].basename=s; michael@0: /* turn the basename into an entry point name and store in the pathname field */ michael@0: t=files[fileCount].pathname=allocString(length); michael@0: while(--length>0) { michael@0: if(*s=='.' || *s=='-' || *s=='/') { michael@0: *t='_'; michael@0: } else { michael@0: *t=*s; michael@0: } michael@0: ++s; michael@0: ++t; michael@0: } michael@0: *t=0; michael@0: } michael@0: ++fileCount; michael@0: } michael@0: michael@0: static char * michael@0: allocString(uint32_t length) { michael@0: uint32_t top=stringTop+length; michael@0: char *p; michael@0: michael@0: if(top>STRING_STORE_SIZE) { michael@0: fprintf(stderr, "gencmn: out of memory\n"); michael@0: exit(U_MEMORY_ALLOCATION_ERROR); michael@0: } michael@0: p=stringStore+stringTop; michael@0: stringTop=top; michael@0: return p; michael@0: } michael@0: michael@0: static char * michael@0: pathToFullPath(const char *path, const char *source) { michael@0: int32_t length; michael@0: int32_t newLength; michael@0: char *fullPath; michael@0: int32_t n; michael@0: michael@0: length = (uint32_t)(uprv_strlen(path) + 1); michael@0: newLength = (length + 1 + (int32_t)uprv_strlen(source)); michael@0: fullPath = uprv_malloc(newLength); michael@0: if(source != NULL) { michael@0: uprv_strcpy(fullPath, source); michael@0: uprv_strcat(fullPath, U_FILE_SEP_STRING); michael@0: } else { michael@0: fullPath[0] = 0; michael@0: } michael@0: n = (int32_t)uprv_strlen(fullPath); michael@0: fullPath[n] = 0; /* Suppress compiler warning for unused variable n */ michael@0: /* when conditional code below is not compiled. */ michael@0: uprv_strcat(fullPath, path); michael@0: michael@0: #if (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) michael@0: #if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) michael@0: /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */ michael@0: for(;fullPath[n];n++) { michael@0: if(fullPath[n] == U_FILE_ALT_SEP_CHAR) { michael@0: fullPath[n] = U_FILE_SEP_CHAR; michael@0: } michael@0: } michael@0: #endif michael@0: #endif michael@0: #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) michael@0: /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */ michael@0: for(;fullPath[n];n++) { michael@0: if(fullPath[n] == U_TREE_ENTRY_SEP_CHAR) { michael@0: fullPath[n] = U_FILE_SEP_CHAR; michael@0: } michael@0: } michael@0: #endif michael@0: return fullPath; michael@0: } michael@0: michael@0: static int michael@0: compareFiles(const void *file1, const void *file2) { michael@0: /* sort by basename */ michael@0: return uprv_strcmp(((File *)file1)->basename, ((File *)file2)->basename); michael@0: } michael@0: michael@0: static void michael@0: fixDirToTreePath(char *s) michael@0: { michael@0: #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) || ((U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)) michael@0: char *t; michael@0: #endif michael@0: #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) michael@0: for(t=s;t=uprv_strchr(t,U_FILE_SEP_CHAR);) { michael@0: *t = U_TREE_ENTRY_SEP_CHAR; michael@0: } michael@0: #endif michael@0: #if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) michael@0: for(t=s;t=uprv_strchr(t,U_FILE_ALT_SEP_CHAR);) { michael@0: *t = U_TREE_ENTRY_SEP_CHAR; michael@0: } michael@0: #endif michael@0: }