intl/icu/source/tools/toolutil/pkg_gencmn.c

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /******************************************************************************
     2  *   Copyright (C) 2008-2012, International Business Machines
     3  *   Corporation and others.  All Rights Reserved.
     4  *******************************************************************************
     5  */
     6 #include "unicode/utypes.h"
     8 #include <stdio.h>
     9 #include <stdlib.h>
    10 #include "unicode/utypes.h"
    11 #include "unicode/putil.h"
    12 #include "cmemory.h"
    13 #include "cstring.h"
    14 #include "filestrm.h"
    15 #include "toolutil.h"
    16 #include "unicode/uclean.h"
    17 #include "unewdata.h"
    18 #include "putilimp.h"
    19 #include "pkg_gencmn.h"
    21 #define STRING_STORE_SIZE 200000
    23 #define COMMON_DATA_NAME U_ICUDATA_NAME
    24 #define DATA_TYPE "dat"
    26 /* ICU package data file format (.dat files) ------------------------------- ***
    28 Description of the data format after the usual ICU data file header
    29 (UDataInfo etc.).
    31 Format version 1
    33 A .dat package file contains a simple Table of Contents of item names,
    34 followed by the items themselves:
    36 1. ToC table
    38 uint32_t count; - number of items
    39 UDataOffsetTOCEntry entry[count]; - pair of uint32_t values per item:
    40     uint32_t nameOffset; - offset of the item name
    41     uint32_t dataOffset; - offset of the item data
    42 both are byte offsets from the beginning of the data
    44 2. item name strings
    46 All item names are stored as char * strings in one block between the ToC table
    47 and the data items.
    49 3. data items
    51 The data items are stored following the item names block.
    52 Each data item is 16-aligned.
    53 The data items are stored in the sorted order of their names.
    55 Therefore, the top of the name strings block is the offset of the first item,
    56 the length of the last item is the difference between its offset and
    57 the .dat file length, and the length of all previous items is the difference
    58 between its offset and the next one.
    60 ----------------------------------------------------------------------------- */
    62 /* UDataInfo cf. udata.h */
    63 static const UDataInfo dataInfo={
    64     sizeof(UDataInfo),
    65     0,
    67     U_IS_BIG_ENDIAN,
    68     U_CHARSET_FAMILY,
    69     sizeof(UChar),
    70     0,
    72     {0x43, 0x6d, 0x6e, 0x44},     /* dataFormat="CmnD" */
    73     {1, 0, 0, 0},                 /* formatVersion */
    74     {3, 0, 0, 0}                  /* dataVersion */
    75 };
    77 static uint32_t maxSize;
    79 static char stringStore[STRING_STORE_SIZE];
    80 static uint32_t stringTop=0, basenameTotal=0;
    82 typedef struct {
    83     char *pathname, *basename;
    84     uint32_t basenameLength, basenameOffset, fileSize, fileOffset;
    85 } File;
    87 #define CHUNK_FILE_COUNT 256
    88 static File *files = NULL;
    89 static uint32_t fileCount=0;
    90 static uint32_t fileMax = 0;
    93 static char *symPrefix = NULL;
    95 #define LINE_BUFFER_SIZE 512
    96 /* prototypes --------------------------------------------------------------- */
    98 static void
    99 addFile(const char *filename, const char *name, const char *source, UBool sourceTOC, UBool verbose);
   101 static char *
   102 allocString(uint32_t length);
   104 static int
   105 compareFiles(const void *file1, const void *file2);
   107 static char *
   108 pathToFullPath(const char *path, const char *source);
   110 /* map non-tree separator (such as '\') to tree separator ('/') inplace. */
   111 static void
   112 fixDirToTreePath(char *s);
   113 /* -------------------------------------------------------------------------- */
   115 U_CAPI void U_EXPORT2
   116 createCommonDataFile(const char *destDir, const char *name, const char *entrypointName, const char *type, const char *source, const char *copyRight,
   117                      const char *dataFile, uint32_t max_size, UBool sourceTOC, UBool verbose, char *gencmnFileName) {
   118     static char buffer[4096];
   119     char *line;
   120     char *linePtr;
   121     char *s = NULL;
   122     UErrorCode errorCode=U_ZERO_ERROR;
   123     uint32_t i, fileOffset, basenameOffset, length, nread;
   124     FileStream *in, *file;
   126     line = (char *)uprv_malloc(sizeof(char) * LINE_BUFFER_SIZE);
   127     if (line == NULL) {
   128         fprintf(stderr, "gencmn: unable to allocate memory for line buffer of size %d\n", LINE_BUFFER_SIZE);
   129         exit(U_MEMORY_ALLOCATION_ERROR);
   130     }
   132     linePtr = line;
   134     maxSize = max_size;
   136     if (destDir == NULL) {
   137         destDir = u_getDataDirectory();
   138     }
   139     if (name == NULL) {
   140         name = COMMON_DATA_NAME;
   141     }
   142     if (type == NULL) {
   143         type = DATA_TYPE;
   144     }
   145     if (source == NULL) {
   146         source = ".";
   147     }
   149     if (dataFile == NULL) {
   150         in = T_FileStream_stdin();
   151     } else {
   152         in = T_FileStream_open(dataFile, "r");
   153         if(in == NULL) {
   154             fprintf(stderr, "gencmn: unable to open input file %s\n", dataFile);
   155             exit(U_FILE_ACCESS_ERROR);
   156         }
   157     }
   159     if (verbose) {
   160         if(sourceTOC) {
   161             printf("generating %s_%s.c (table of contents source file)\n", name, type);
   162         } else {
   163             printf("generating %s.%s (common data file with table of contents)\n", name, type);
   164         }
   165     }
   167     /* read the list of files and get their lengths */
   168     while((s != NULL && *s != 0) || (s=T_FileStream_readLine(in, (line=linePtr),
   169                                                              LINE_BUFFER_SIZE))!=NULL) {
   170         /* remove trailing newline characters and parse space separated items */
   171         if (s != NULL && *s != 0) {
   172             line=s;
   173         } else {
   174             s=line;
   175         }
   176         while(*s!=0) {
   177             if(*s==' ') {
   178                 *s=0;
   179                 ++s;
   180                 break;
   181             } else if(*s=='\r' || *s=='\n') {
   182                 *s=0;
   183                 break;
   184             }
   185             ++s;
   186         }
   188         /* check for comment */
   190         if (*line == '#') {
   191             continue;
   192         }
   194         /* add the file */
   195 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
   196         {
   197           char *t;
   198           while((t = uprv_strchr(line,U_FILE_ALT_SEP_CHAR))) {
   199             *t = U_FILE_SEP_CHAR;
   200           }
   201         }
   202 #endif
   203         addFile(getLongPathname(line), name, source, sourceTOC, verbose);
   204     }
   206     uprv_free(linePtr);
   208     if(in!=T_FileStream_stdin()) {
   209         T_FileStream_close(in);
   210     }
   212     if(fileCount==0) {
   213         fprintf(stderr, "gencmn: no files listed in %s\n", dataFile == NULL ? "<stdin>" : dataFile);
   214         return;
   215     }
   217     /* sort the files by basename */
   218     qsort(files, fileCount, sizeof(File), compareFiles);
   220     if(!sourceTOC) {
   221         UNewDataMemory *out;
   223         /* determine the offsets of all basenames and files in this common one */
   224         basenameOffset=4+8*fileCount;
   225         fileOffset=(basenameOffset+(basenameTotal+15))&~0xf;
   226         for(i=0; i<fileCount; ++i) {
   227             files[i].fileOffset=fileOffset;
   228             fileOffset+=(files[i].fileSize+15)&~0xf;
   229             files[i].basenameOffset=basenameOffset;
   230             basenameOffset+=files[i].basenameLength;
   231         }
   233         /* create the output file */
   234         out=udata_create(destDir, type, name,
   235                          &dataInfo,
   236                          copyRight == NULL ? U_COPYRIGHT_STRING : copyRight,
   237                          &errorCode);
   238         if(U_FAILURE(errorCode)) {
   239             fprintf(stderr, "gencmn: udata_create(-d %s -n %s -t %s) failed - %s\n",
   240                 destDir, name, type,
   241                 u_errorName(errorCode));
   242             exit(errorCode);
   243         }
   245         /* write the table of contents */
   246         udata_write32(out, fileCount);
   247         for(i=0; i<fileCount; ++i) {
   248             udata_write32(out, files[i].basenameOffset);
   249             udata_write32(out, files[i].fileOffset);
   250         }
   252         /* write the basenames */
   253         for(i=0; i<fileCount; ++i) {
   254             udata_writeString(out, files[i].basename, files[i].basenameLength);
   255         }
   256         length=4+8*fileCount+basenameTotal;
   258         /* copy the files */
   259         for(i=0; i<fileCount; ++i) {
   260             /* pad to 16-align the next file */
   261             length&=0xf;
   262             if(length!=0) {
   263                 udata_writePadding(out, 16-length);
   264             }
   266             if (verbose) {
   267                 printf("adding %s (%ld byte%s)\n", files[i].pathname, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s");
   268             }
   270             /* copy the next file */
   271             file=T_FileStream_open(files[i].pathname, "rb");
   272             if(file==NULL) {
   273                 fprintf(stderr, "gencmn: unable to open listed file %s\n", files[i].pathname);
   274                 exit(U_FILE_ACCESS_ERROR);
   275             }
   276             for(nread = 0;;) {
   277                 length=T_FileStream_read(file, buffer, sizeof(buffer));
   278                 if(length <= 0) {
   279                     break;
   280                 }
   281                 nread += length;
   282                 udata_writeBlock(out, buffer, length);
   283             }
   284             T_FileStream_close(file);
   285             length=files[i].fileSize;
   287             if (nread != files[i].fileSize) {
   288               fprintf(stderr, "gencmn: unable to read %s properly (got %ld/%ld byte%s)\n", files[i].pathname,  (long)nread, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s");
   289                 exit(U_FILE_ACCESS_ERROR);
   290             }
   291         }
   293         /* pad to 16-align the last file (cleaner, avoids growing .dat files in icuswap) */
   294         length&=0xf;
   295         if(length!=0) {
   296             udata_writePadding(out, 16-length);
   297         }
   299         /* finish */
   300         udata_finish(out, &errorCode);
   301         if(U_FAILURE(errorCode)) {
   302             fprintf(stderr, "gencmn: udata_finish() failed - %s\n", u_errorName(errorCode));
   303             exit(errorCode);
   304         }
   305     } else {
   306         /* write a .c source file with the table of contents */
   307         char *filename;
   308         FileStream *out;
   310         /* create the output filename */
   311         filename=s=buffer;
   312         uprv_strcpy(filename, destDir);
   313         s=filename+uprv_strlen(filename);
   314         if(s>filename && *(s-1)!=U_FILE_SEP_CHAR) {
   315             *s++=U_FILE_SEP_CHAR;
   316         }
   317         uprv_strcpy(s, name);
   318         if(*(type)!=0) {
   319             s+=uprv_strlen(s);
   320             *s++='_';
   321             uprv_strcpy(s, type);
   322         }
   323         s+=uprv_strlen(s);
   324         uprv_strcpy(s, ".c");
   326         /* open the output file */
   327         out=T_FileStream_open(filename, "w");
   328         if (gencmnFileName != NULL) {
   329             uprv_strcpy(gencmnFileName, filename);
   330         }
   331         if(out==NULL) {
   332             fprintf(stderr, "gencmn: unable to open .c output file %s\n", filename);
   333             exit(U_FILE_ACCESS_ERROR);
   334         }
   336         /* write the source file */
   337         sprintf(buffer,
   338             "/*\n"
   339             " * ICU common data table of contents for %s.%s\n"
   340             " * Automatically generated by icu/source/tools/gencmn/gencmn .\n"
   341             " */\n\n"
   342             "#include \"unicode/utypes.h\"\n"
   343             "#include \"unicode/udata.h\"\n"
   344             "\n"
   345             "/* external symbol declarations for data (%d files) */\n",
   346                 name, type, fileCount);
   347         T_FileStream_writeLine(out, buffer);
   349         sprintf(buffer, "extern const char\n    %s%s[]", symPrefix?symPrefix:"", files[0].pathname);
   350         T_FileStream_writeLine(out, buffer);
   351         for(i=1; i<fileCount; ++i) {
   352             sprintf(buffer, ",\n    %s%s[]", symPrefix?symPrefix:"", files[i].pathname);
   353             T_FileStream_writeLine(out, buffer);
   354         }
   355         T_FileStream_writeLine(out, ";\n\n");
   357         sprintf(
   358             buffer,
   359             "U_EXPORT struct {\n"
   360             "    uint16_t headerSize;\n"
   361             "    uint8_t magic1, magic2;\n"
   362             "    UDataInfo info;\n"
   363             "    char padding[%lu];\n"
   364             "    uint32_t count, reserved;\n"
   365             "    struct {\n"
   366             "        const char *name;\n"
   367             "        const void *data;\n"
   368             "    } toc[%lu];\n"
   369             "} U_EXPORT2 %s_dat = {\n"
   370             "    32, 0xda, 0x27, {\n"
   371             "        %lu, 0,\n"
   372             "        %u, %u, %u, 0,\n"
   373             "        {0x54, 0x6f, 0x43, 0x50},\n"
   374             "        {1, 0, 0, 0},\n"
   375             "        {0, 0, 0, 0}\n"
   376             "    },\n"
   377             "    \"\", %lu, 0, {\n",
   378             (unsigned long)32-4-sizeof(UDataInfo),
   379             (unsigned long)fileCount,
   380             entrypointName,
   381             (unsigned long)sizeof(UDataInfo),
   382             U_IS_BIG_ENDIAN,
   383             U_CHARSET_FAMILY,
   384             U_SIZEOF_UCHAR,
   385             (unsigned long)fileCount
   386         );
   387         T_FileStream_writeLine(out, buffer);
   389         sprintf(buffer, "        { \"%s\", %s%s }", files[0].basename, symPrefix?symPrefix:"", files[0].pathname);
   390         T_FileStream_writeLine(out, buffer);
   391         for(i=1; i<fileCount; ++i) {
   392             sprintf(buffer, ",\n        { \"%s\", %s%s }", files[i].basename, symPrefix?symPrefix:"", files[i].pathname);
   393             T_FileStream_writeLine(out, buffer);
   394         }
   396         T_FileStream_writeLine(out, "\n    }\n};\n");
   397         T_FileStream_close(out);
   399         uprv_free(symPrefix);
   400     }
   401 }
   403 static void
   404 addFile(const char *filename, const char *name, const char *source, UBool sourceTOC, UBool verbose) {
   405     char *s;
   406     uint32_t length;
   407     char *fullPath = NULL;
   409     if(fileCount==fileMax) {
   410       fileMax += CHUNK_FILE_COUNT;
   411       files = uprv_realloc(files, fileMax*sizeof(files[0])); /* note: never freed. */
   412       if(files==NULL) {
   413         fprintf(stderr, "pkgdata/gencmn: Could not allocate %u bytes for %d files\n", (unsigned int)(fileMax*sizeof(files[0])), fileCount);
   414         exit(U_MEMORY_ALLOCATION_ERROR);
   415       }
   416     }
   418     if(!sourceTOC) {
   419         FileStream *file;
   421         if(uprv_pathIsAbsolute(filename)) {
   422             fprintf(stderr, "gencmn: Error: absolute path encountered. Old style paths are not supported. Use relative paths such as 'fur.res' or 'translit%cfur.res'.\n\tBad path: '%s'\n", U_FILE_SEP_CHAR, filename);
   423             exit(U_ILLEGAL_ARGUMENT_ERROR);
   424         }
   425         fullPath = pathToFullPath(filename, source);
   426         /* store the pathname */
   427         length = (uint32_t)(uprv_strlen(filename) + 1 + uprv_strlen(name) + 1);
   428         s=allocString(length);
   429         uprv_strcpy(s, name);
   430         uprv_strcat(s, U_TREE_ENTRY_SEP_STRING);
   431         uprv_strcat(s, filename);
   433         /* get the basename */
   434         fixDirToTreePath(s);
   435         files[fileCount].basename=s;
   436         files[fileCount].basenameLength=length;
   438         files[fileCount].pathname=fullPath;
   440         basenameTotal+=length;
   442         /* try to open the file */
   443         file=T_FileStream_open(fullPath, "rb");
   444         if(file==NULL) {
   445             fprintf(stderr, "gencmn: unable to open listed file %s\n", fullPath);
   446             exit(U_FILE_ACCESS_ERROR);
   447         }
   449         /* get the file length */
   450         length=T_FileStream_size(file);
   451         if(T_FileStream_error(file) || length<=20) {
   452             fprintf(stderr, "gencmn: unable to get length of listed file %s\n", fullPath);
   453             exit(U_FILE_ACCESS_ERROR);
   454         }
   456         T_FileStream_close(file);
   458         /* do not add files that are longer than maxSize */
   459         if(maxSize && length>maxSize) {
   460             if (verbose) {
   461                 printf("%s ignored (size %ld > %ld)\n", fullPath, (long)length, (long)maxSize);
   462             }
   463             return;
   464         }
   465         files[fileCount].fileSize=length;
   466     } else {
   467         char *t;
   468         /* get and store the basename */
   469         /* need to include the package name */
   470         length = (uint32_t)(uprv_strlen(filename) + 1 + uprv_strlen(name) + 1);
   471         s=allocString(length);
   472         uprv_strcpy(s, name);
   473         uprv_strcat(s, U_TREE_ENTRY_SEP_STRING);
   474         uprv_strcat(s, filename);
   475         fixDirToTreePath(s);
   476         files[fileCount].basename=s;
   477         /* turn the basename into an entry point name and store in the pathname field */
   478         t=files[fileCount].pathname=allocString(length);
   479         while(--length>0) {
   480             if(*s=='.' || *s=='-' || *s=='/') {
   481                 *t='_';
   482             } else {
   483                 *t=*s;
   484             }
   485             ++s;
   486             ++t;
   487         }
   488         *t=0;
   489     }
   490     ++fileCount;
   491 }
   493 static char *
   494 allocString(uint32_t length) {
   495     uint32_t top=stringTop+length;
   496     char *p;
   498     if(top>STRING_STORE_SIZE) {
   499         fprintf(stderr, "gencmn: out of memory\n");
   500         exit(U_MEMORY_ALLOCATION_ERROR);
   501     }
   502     p=stringStore+stringTop;
   503     stringTop=top;
   504     return p;
   505 }
   507 static char *
   508 pathToFullPath(const char *path, const char *source) {
   509     int32_t length;
   510     int32_t newLength;
   511     char *fullPath;
   512     int32_t n;
   514     length = (uint32_t)(uprv_strlen(path) + 1);
   515     newLength = (length + 1 + (int32_t)uprv_strlen(source));
   516     fullPath = uprv_malloc(newLength);
   517     if(source != NULL) {
   518         uprv_strcpy(fullPath, source);
   519         uprv_strcat(fullPath, U_FILE_SEP_STRING);
   520     } else {
   521         fullPath[0] = 0;
   522     }
   523     n = (int32_t)uprv_strlen(fullPath);
   524     fullPath[n] = 0;       /* Suppress compiler warning for unused variable n    */
   525                            /*  when conditional code below is not compiled.      */
   526     uprv_strcat(fullPath, path);
   528 #if (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
   529 #if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR)
   530     /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */
   531     for(;fullPath[n];n++) {
   532         if(fullPath[n] == U_FILE_ALT_SEP_CHAR) {
   533             fullPath[n] = U_FILE_SEP_CHAR;
   534         }
   535     }
   536 #endif
   537 #endif
   538 #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
   539     /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */
   540     for(;fullPath[n];n++) {
   541         if(fullPath[n] == U_TREE_ENTRY_SEP_CHAR) {
   542             fullPath[n] = U_FILE_SEP_CHAR;
   543         }
   544     }
   545 #endif
   546     return fullPath;
   547 }
   549 static int
   550 compareFiles(const void *file1, const void *file2) {
   551     /* sort by basename */
   552     return uprv_strcmp(((File *)file1)->basename, ((File *)file2)->basename);
   553 }
   555 static void
   556 fixDirToTreePath(char *s)
   557 {
   558 #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) || ((U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR))
   559     char *t;
   560 #endif
   561 #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
   562     for(t=s;t=uprv_strchr(t,U_FILE_SEP_CHAR);) {
   563         *t = U_TREE_ENTRY_SEP_CHAR;
   564     }
   565 #endif
   566 #if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
   567     for(t=s;t=uprv_strchr(t,U_FILE_ALT_SEP_CHAR);) {
   568         *t = U_TREE_ENTRY_SEP_CHAR;
   569     }
   570 #endif
   571 }

mercurial