intl/icu/source/tools/makeconv/makeconv.c

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /*
     2  ********************************************************************************
     3  *
     4  *   Copyright (C) 1998-2012, International Business Machines
     5  *   Corporation and others.  All Rights Reserved.
     6  *
     7  ********************************************************************************
     8  *
     9  *
    10  *  makeconv.c:
    11  *  tool creating a binary (compressed) representation of the conversion mapping
    12  *  table (IBM NLTC ucmap format).
    13  *
    14  *  05/04/2000    helena     Added fallback mapping into the picture...
    15  *  06/29/2000  helena      Major rewrite of the callback APIs.
    16  */
    18 #include <stdio.h>
    19 #include "unicode/putil.h"
    20 #include "unicode/ucnv_err.h"
    21 #include "ucnv_bld.h"
    22 #include "ucnv_imp.h"
    23 #include "ucnv_cnv.h"
    24 #include "cstring.h"
    25 #include "cmemory.h"
    26 #include "uinvchar.h"
    27 #include "filestrm.h"
    28 #include "toolutil.h"
    29 #include "uoptions.h"
    30 #include "unicode/udata.h"
    31 #include "unewdata.h"
    32 #include "uparse.h"
    33 #include "ucm.h"
    34 #include "makeconv.h"
    35 #include "genmbcs.h"
    37 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
    39 #define DEBUG 0
    41 typedef struct ConvData {
    42     UCMFile *ucm;
    43     NewConverter *cnvData, *extData;
    44     UConverterSharedData sharedData;
    45     UConverterStaticData staticData;
    46 } ConvData;
    48 static void
    49 initConvData(ConvData *data) {
    50     uprv_memset(data, 0, sizeof(ConvData));
    51     data->sharedData.structSize=sizeof(UConverterSharedData);
    52     data->staticData.structSize=sizeof(UConverterStaticData);
    53     data->sharedData.staticData=&data->staticData;
    54 }
    56 static void
    57 cleanupConvData(ConvData *data) {
    58     if(data!=NULL) {
    59         if(data->cnvData!=NULL) {
    60             data->cnvData->close(data->cnvData);
    61             data->cnvData=NULL;
    62         }
    63         if(data->extData!=NULL) {
    64             data->extData->close(data->extData);
    65             data->extData=NULL;
    66         }
    67         ucm_close(data->ucm);
    68         data->ucm=NULL;
    69     }
    70 }
    72 /*
    73  * from ucnvstat.c - static prototypes of data-based converters
    74  */
    75 extern const UConverterStaticData * ucnv_converterStaticData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES];
    77 /*
    78  * Global - verbosity
    79  */
    80 UBool VERBOSE = FALSE;
    81 UBool SMALL = FALSE;
    82 UBool IGNORE_SISO_CHECK = FALSE;
    84 static void
    85 createConverter(ConvData *data, const char* converterName, UErrorCode *pErrorCode);
    87 /*
    88  * Set up the UNewData and write the converter..
    89  */
    90 static void
    91 writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status);
    93 UBool haveCopyright=TRUE;
    95 static UDataInfo dataInfo={
    96     sizeof(UDataInfo),
    97     0,
    99     U_IS_BIG_ENDIAN,
   100     U_CHARSET_FAMILY,
   101     sizeof(UChar),
   102     0,
   104     {0x63, 0x6e, 0x76, 0x74},     /* dataFormat="cnvt" */
   105     {6, 2, 0, 0},                 /* formatVersion */
   106     {0, 0, 0, 0}                  /* dataVersion (calculated at runtime) */
   107 };
   109 static void
   110 writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status)
   111 {
   112     UNewDataMemory *mem = NULL;
   113     uint32_t sz2;
   114     uint32_t size = 0;
   115     int32_t tableType;
   117     if(U_FAILURE(*status))
   118       {
   119         return;
   120       }
   122     tableType=TABLE_NONE;
   123     if(data->cnvData!=NULL) {
   124         tableType|=TABLE_BASE;
   125     }
   126     if(data->extData!=NULL) {
   127         tableType|=TABLE_EXT;
   128     }
   130     mem = udata_create(cnvDir, "cnv", cnvName, &dataInfo, haveCopyright ? U_COPYRIGHT_STRING : NULL, status);
   132     if(U_FAILURE(*status))
   133       {
   134         fprintf(stderr, "Couldn't create the udata %s.%s: %s\n",
   135                 cnvName,
   136                 "cnv",
   137                 u_errorName(*status));
   138         return;
   139       }
   141     if(VERBOSE)
   142       {
   143         printf("- Opened udata %s.%s\n", cnvName, "cnv");
   144       }
   147     /* all read only, clean, platform independent data.  Mmmm. :)  */
   148     udata_writeBlock(mem, &data->staticData, sizeof(UConverterStaticData));
   149     size += sizeof(UConverterStaticData); /* Is 4-aligned  - by size */
   150     /* Now, write the table */
   151     if(tableType&TABLE_BASE) {
   152         size += data->cnvData->write(data->cnvData, &data->staticData, mem, tableType);
   153     }
   154     if(tableType&TABLE_EXT) {
   155         size += data->extData->write(data->extData, &data->staticData, mem, tableType);
   156     }
   158     sz2 = udata_finish(mem, status);
   159     if(size != sz2)
   160     {
   161         fprintf(stderr, "error: wrote %u bytes to the .cnv file but counted %u bytes\n", (int)sz2, (int)size);
   162         *status=U_INTERNAL_PROGRAM_ERROR;
   163     }
   164     if(VERBOSE)
   165     {
   166       printf("- Wrote %u bytes to the udata.\n", (int)sz2);
   167     }
   168 }
   170 enum {
   171     OPT_HELP_H,
   172     OPT_HELP_QUESTION_MARK,
   173     OPT_COPYRIGHT,
   174     OPT_VERSION,
   175     OPT_DESTDIR,
   176     OPT_VERBOSE,
   177     OPT_SMALL,
   178     OPT_IGNORE_SISO_CHECK,
   179     OPT_COUNT
   180 };
   182 static UOption options[]={
   183     UOPTION_HELP_H,
   184     UOPTION_HELP_QUESTION_MARK,
   185     UOPTION_COPYRIGHT,
   186     UOPTION_VERSION,
   187     UOPTION_DESTDIR,
   188     UOPTION_VERBOSE,
   189     { "small", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 },
   190     { "ignore-siso-check", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 }
   191 };
   193 int main(int argc, char* argv[])
   194 {
   195     ConvData data;
   196     UErrorCode err = U_ZERO_ERROR, localError;
   197     char outFileName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
   198     const char* destdir, *arg;
   199     size_t destdirlen;
   200     char* dot = NULL, *outBasename;
   201     char cnvName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
   202     char cnvNameWithPkg[UCNV_MAX_FULL_FILE_NAME_LENGTH];
   203     UVersionInfo icuVersion;
   204     UBool printFilename;
   206     err = U_ZERO_ERROR;
   208     U_MAIN_INIT_ARGS(argc, argv);
   210     /* Set up the ICU version number */
   211     u_getVersion(icuVersion);
   212     uprv_memcpy(&dataInfo.dataVersion, &icuVersion, sizeof(UVersionInfo));
   214     /* preset then read command line options */
   215     options[OPT_DESTDIR].value=u_getDataDirectory();
   216     argc=u_parseArgs(argc, argv, LENGTHOF(options), options);
   218     /* error handling, printing usage message */
   219     if(argc<0) {
   220         fprintf(stderr,
   221             "error in command line argument \"%s\"\n",
   222             argv[-argc]);
   223     } else if(argc<2) {
   224         argc=-1;
   225     }
   226     if(argc<0 || options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur) {
   227         FILE *stdfile=argc<0 ? stderr : stdout;
   228         fprintf(stdfile,
   229             "usage: %s [-options] files...\n"
   230             "\tread .ucm codepage mapping files and write .cnv files\n"
   231             "options:\n"
   232             "\t-h or -? or --help  this usage text\n"
   233             "\t-V or --version     show a version message\n"
   234             "\t-c or --copyright   include a copyright notice\n"
   235             "\t-d or --destdir     destination directory, followed by the path\n"
   236             "\t-v or --verbose     Turn on verbose output\n",
   237             argv[0]);
   238         fprintf(stdfile,
   239             "\t      --small       Generate smaller .cnv files. They will be\n"
   240             "\t                    significantly smaller but may not be compatible with\n"
   241             "\t                    older versions of ICU and will require heap memory\n"
   242             "\t                    allocation when loaded.\n"
   243             "\t      --ignore-siso-check         Use SI/SO other than 0xf/0xe.\n");
   244         return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
   245     }
   247     if(options[OPT_VERSION].doesOccur) {
   248         printf("makeconv version %u.%u, ICU tool to read .ucm codepage mapping files and write .cnv files\n",
   249                dataInfo.formatVersion[0], dataInfo.formatVersion[1]);
   250         printf("%s\n", U_COPYRIGHT_STRING);
   251         exit(0);
   252     }
   254     /* get the options values */
   255     haveCopyright = options[OPT_COPYRIGHT].doesOccur;
   256     destdir = options[OPT_DESTDIR].value;
   257     VERBOSE = options[OPT_VERBOSE].doesOccur;
   258     SMALL = options[OPT_SMALL].doesOccur;
   260     if (options[OPT_IGNORE_SISO_CHECK].doesOccur) {
   261         IGNORE_SISO_CHECK = TRUE;
   262     }
   264     if (destdir != NULL && *destdir != 0) {
   265         uprv_strcpy(outFileName, destdir);
   266         destdirlen = uprv_strlen(destdir);
   267         outBasename = outFileName + destdirlen;
   268         if (*(outBasename - 1) != U_FILE_SEP_CHAR) {
   269             *outBasename++ = U_FILE_SEP_CHAR;
   270             ++destdirlen;
   271         }
   272     } else {
   273         destdirlen = 0;
   274         outBasename = outFileName;
   275     }
   277 #if DEBUG
   278     {
   279       int i;
   280       printf("makeconv: processing %d files...\n", argc - 1);
   281       for(i=1; i<argc; ++i) {
   282         printf("%s ", argv[i]);
   283       }
   284       printf("\n");
   285       fflush(stdout);
   286     }
   287 #endif
   289     err = U_ZERO_ERROR;
   290     printFilename = (UBool) (argc > 2 || VERBOSE);
   291     for (++argv; --argc; ++argv)
   292     {
   293         arg = getLongPathname(*argv);
   295         /* Check for potential buffer overflow */
   296         if(strlen(arg) >= UCNV_MAX_FULL_FILE_NAME_LENGTH)
   297         {
   298             fprintf(stderr, "%s\n", u_errorName(U_BUFFER_OVERFLOW_ERROR));
   299             return U_BUFFER_OVERFLOW_ERROR;
   300         }
   302         /*produces the right destination path for display*/
   303         if (destdirlen != 0)
   304         {
   305             const char *basename;
   307             /* find the last file sepator */
   308             basename = findBasename(arg);
   309             uprv_strcpy(outBasename, basename);
   310         }
   311         else
   312         {
   313             uprv_strcpy(outFileName, arg);
   314         }
   316         /*removes the extension if any is found*/
   317         dot = uprv_strrchr(outBasename, '.');
   318         if (dot)
   319         {
   320             *dot = '\0';
   321         }
   323         /* the basename without extension is the converter name */
   324         uprv_strcpy(cnvName, outBasename);
   326         /*Adds the target extension*/
   327         uprv_strcat(outBasename, CONVERTER_FILE_EXTENSION);
   329 #if DEBUG
   330         printf("makeconv: processing %s  ...\n", arg);
   331         fflush(stdout);
   332 #endif
   333         localError = U_ZERO_ERROR;
   334         initConvData(&data);
   335         createConverter(&data, arg, &localError);
   337         if (U_FAILURE(localError))
   338         {
   339             /* if an error is found, print out an error msg and keep going */
   340             fprintf(stderr, "Error creating converter for \"%s\" file for \"%s\" (%s)\n", outFileName, arg,
   341                 u_errorName(localError));
   342             if(U_SUCCESS(err)) {
   343                 err = localError;
   344             }
   345         }
   346         else
   347         {
   348             /* Insure the static data name matches the  file name */
   349             /* Changed to ignore directory and only compare base name
   350              LDH 1/2/08*/
   351             char *p;
   352             p = strrchr(cnvName, U_FILE_SEP_CHAR); /* Find last file separator */
   354             if(p == NULL)            /* OK, try alternate */
   355             {
   356                 p = strrchr(cnvName, U_FILE_ALT_SEP_CHAR);
   357                 if(p == NULL)
   358                 {
   359                     p=cnvName; /* If no separators, no problem */
   360                 }
   361             }
   362             else
   363             {
   364                 p++;   /* If found separtor, don't include it in compare */
   365             }
   366             if(uprv_stricmp(p,data.staticData.name))
   367             {
   368                 fprintf(stderr, "Warning: %s%s claims to be '%s'\n",
   369                     cnvName,  CONVERTER_FILE_EXTENSION,
   370                     data.staticData.name);
   371             }
   373             uprv_strcpy((char*)data.staticData.name, cnvName);
   375             if(!uprv_isInvariantString((char*)data.staticData.name, -1)) {
   376                 fprintf(stderr,
   377                     "Error: A converter name must contain only invariant characters.\n"
   378                     "%s is not a valid converter name.\n",
   379                     data.staticData.name);
   380                 if(U_SUCCESS(err)) {
   381                     err = U_INVALID_TABLE_FORMAT;
   382                 }
   383             }
   385             uprv_strcpy(cnvNameWithPkg, cnvName);
   387             localError = U_ZERO_ERROR;
   388             writeConverterData(&data, cnvNameWithPkg, destdir, &localError);
   390             if(U_FAILURE(localError))
   391             {
   392                 /* if an error is found, print out an error msg and keep going*/
   393                 fprintf(stderr, "Error writing \"%s\" file for \"%s\" (%s)\n", outFileName, arg,
   394                     u_errorName(localError));
   395                 if(U_SUCCESS(err)) {
   396                     err = localError;
   397                 }
   398             }
   399             else if (printFilename)
   400             {
   401                 puts(outBasename);
   402             }
   403         }
   404         fflush(stdout);
   405         fflush(stderr);
   407         cleanupConvData(&data);
   408     }
   410     return err;
   411 }
   413 static void
   414 getPlatformAndCCSIDFromName(const char *name, int8_t *pPlatform, int32_t *pCCSID) {
   415     if( (name[0]=='i' || name[0]=='I') &&
   416         (name[1]=='b' || name[1]=='B') &&
   417         (name[2]=='m' || name[2]=='M')
   418     ) {
   419         name+=3;
   420         if(*name=='-') {
   421             ++name;
   422         }
   423         *pPlatform=UCNV_IBM;
   424         *pCCSID=(int32_t)uprv_strtoul(name, NULL, 10);
   425     } else {
   426         *pPlatform=UCNV_UNKNOWN;
   427         *pCCSID=0;
   428     }
   429 }
   431 static void
   432 readHeader(ConvData *data,
   433            FileStream* convFile,
   434            const char* converterName,
   435            UErrorCode *pErrorCode) {
   436     char line[1024];
   437     char *s, *key, *value;
   438     const UConverterStaticData *prototype;
   439     UConverterStaticData *staticData;
   441     if(U_FAILURE(*pErrorCode)) {
   442         return;
   443     }
   445     staticData=&data->staticData;
   446     staticData->platform=UCNV_IBM;
   447     staticData->subCharLen=0;
   449     while(T_FileStream_readLine(convFile, line, sizeof(line))) {
   450         /* basic parsing and handling of state-related items */
   451         if(ucm_parseHeaderLine(data->ucm, line, &key, &value)) {
   452             continue;
   453         }
   455         /* stop at the beginning of the mapping section */
   456         if(uprv_strcmp(line, "CHARMAP")==0) {
   457             break;
   458         }
   460         /* collect the information from the header field, ignore unknown keys */
   461         if(uprv_strcmp(key, "code_set_name")==0) {
   462             if(*value!=0) {
   463                 uprv_strcpy((char *)staticData->name, value);
   464                 getPlatformAndCCSIDFromName(value, &staticData->platform, &staticData->codepage);
   465             }
   466         } else if(uprv_strcmp(key, "subchar")==0) {
   467             uint8_t bytes[UCNV_EXT_MAX_BYTES];
   468             int8_t length;
   470             s=value;
   471             length=ucm_parseBytes(bytes, line, (const char **)&s);
   472             if(1<=length && length<=4 && *s==0) {
   473                 staticData->subCharLen=length;
   474                 uprv_memcpy(staticData->subChar, bytes, length);
   475             } else {
   476                 fprintf(stderr, "error: illegal <subchar> %s\n", value);
   477                 *pErrorCode=U_INVALID_TABLE_FORMAT;
   478                 return;
   479             }
   480         } else if(uprv_strcmp(key, "subchar1")==0) {
   481             uint8_t bytes[UCNV_EXT_MAX_BYTES];
   483             s=value;
   484             if(1==ucm_parseBytes(bytes, line, (const char **)&s) && *s==0) {
   485                 staticData->subChar1=bytes[0];
   486             } else {
   487                 fprintf(stderr, "error: illegal <subchar1> %s\n", value);
   488                 *pErrorCode=U_INVALID_TABLE_FORMAT;
   489                 return;
   490             }
   491         }
   492     }
   494     /* copy values from the UCMFile to the static data */
   495     staticData->maxBytesPerChar=(int8_t)data->ucm->states.maxCharLength;
   496     staticData->minBytesPerChar=(int8_t)data->ucm->states.minCharLength;
   497     staticData->conversionType=data->ucm->states.conversionType;
   499     if(staticData->conversionType==UCNV_UNSUPPORTED_CONVERTER) {
   500         fprintf(stderr, "ucm error: missing conversion type (<uconv_class>)\n");
   501         *pErrorCode=U_INVALID_TABLE_FORMAT;
   502         return;
   503     }
   505     /*
   506      * Now that we know the type, copy any 'default' values from the table.
   507      * We need not check the type any further because the parser only
   508      * recognizes what we have prototypes for.
   509      *
   510      * For delta (extension-only) tables, copy values from the base file
   511      * instead, see createConverter().
   512      */
   513     if(data->ucm->baseName[0]==0) {
   514         prototype=ucnv_converterStaticData[staticData->conversionType];
   515         if(prototype!=NULL) {
   516             if(staticData->name[0]==0) {
   517                 uprv_strcpy((char *)staticData->name, prototype->name);
   518             }
   520             if(staticData->codepage==0) {
   521                 staticData->codepage=prototype->codepage;
   522             }
   524             if(staticData->platform==0) {
   525                 staticData->platform=prototype->platform;
   526             }
   528             if(staticData->minBytesPerChar==0) {
   529                 staticData->minBytesPerChar=prototype->minBytesPerChar;
   530             }
   532             if(staticData->maxBytesPerChar==0) {
   533                 staticData->maxBytesPerChar=prototype->maxBytesPerChar;
   534             }
   536             if(staticData->subCharLen==0) {
   537                 staticData->subCharLen=prototype->subCharLen;
   538                 if(prototype->subCharLen>0) {
   539                     uprv_memcpy(staticData->subChar, prototype->subChar, prototype->subCharLen);
   540                 }
   541             }
   542         }
   543     }
   545     if(data->ucm->states.outputType<0) {
   546         data->ucm->states.outputType=(int8_t)data->ucm->states.maxCharLength-1;
   547     }
   549     if( staticData->subChar1!=0 &&
   550             (staticData->minBytesPerChar>1 ||
   551                 (staticData->conversionType!=UCNV_MBCS &&
   552                  staticData->conversionType!=UCNV_EBCDIC_STATEFUL))
   553     ) {
   554         fprintf(stderr, "error: <subchar1> defined for a type other than MBCS or EBCDIC_STATEFUL\n");
   555         *pErrorCode=U_INVALID_TABLE_FORMAT;
   556     }
   557 }
   559 /* return TRUE if a base table was read, FALSE for an extension table */
   560 static UBool
   561 readFile(ConvData *data, const char* converterName,
   562          UErrorCode *pErrorCode) {
   563     char line[1024];
   564     char *end;
   565     FileStream *convFile;
   567     UCMStates *baseStates;
   568     UBool dataIsBase;
   570     if(U_FAILURE(*pErrorCode)) {
   571         return FALSE;
   572     }
   574     data->ucm=ucm_open();
   576     convFile=T_FileStream_open(converterName, "r");
   577     if(convFile==NULL) {
   578         *pErrorCode=U_FILE_ACCESS_ERROR;
   579         return FALSE;
   580     }
   582     readHeader(data, convFile, converterName, pErrorCode);
   583     if(U_FAILURE(*pErrorCode)) {
   584         return FALSE;
   585     }
   587     if(data->ucm->baseName[0]==0) {
   588         dataIsBase=TRUE;
   589         baseStates=&data->ucm->states;
   590         ucm_processStates(baseStates, IGNORE_SISO_CHECK);
   591     } else {
   592         dataIsBase=FALSE;
   593         baseStates=NULL;
   594     }
   596     /* read the base table */
   597     ucm_readTable(data->ucm, convFile, dataIsBase, baseStates, pErrorCode);
   598     if(U_FAILURE(*pErrorCode)) {
   599         return FALSE;
   600     }
   602     /* read an extension table if there is one */
   603     while(T_FileStream_readLine(convFile, line, sizeof(line))) {
   604         end=uprv_strchr(line, 0);
   605         while(line<end &&
   606               (*(end-1)=='\n' || *(end-1)=='\r' || *(end-1)==' ' || *(end-1)=='\t')) {
   607             --end;
   608         }
   609         *end=0;
   611         if(line[0]=='#' || u_skipWhitespace(line)==end) {
   612             continue; /* ignore empty and comment lines */
   613         }
   615         if(0==uprv_strcmp(line, "CHARMAP")) {
   616             /* read the extension table */
   617             ucm_readTable(data->ucm, convFile, FALSE, baseStates, pErrorCode);
   618         } else {
   619             fprintf(stderr, "unexpected text after the base mapping table\n");
   620         }
   621         break;
   622     }
   624     T_FileStream_close(convFile);
   626     if(data->ucm->base->flagsType==UCM_FLAGS_MIXED || data->ucm->ext->flagsType==UCM_FLAGS_MIXED) {
   627         fprintf(stderr, "error: some entries have the mapping precision (with '|'), some do not\n");
   628         *pErrorCode=U_INVALID_TABLE_FORMAT;
   629     }
   631     return dataIsBase;
   632 }
   634 static void
   635 createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCode) {
   636     ConvData baseData;
   637     UBool dataIsBase;
   639     UConverterStaticData *staticData;
   640     UCMStates *states, *baseStates;
   642     if(U_FAILURE(*pErrorCode)) {
   643         return;
   644     }
   646     initConvData(data);
   648     dataIsBase=readFile(data, converterName, pErrorCode);
   649     if(U_FAILURE(*pErrorCode)) {
   650         return;
   651     }
   653     staticData=&data->staticData;
   654     states=&data->ucm->states;
   656     if(dataIsBase) {
   657         /*
   658          * Build a normal .cnv file with a base table
   659          * and an optional extension table.
   660          */
   661         data->cnvData=MBCSOpen(data->ucm);
   662         if(data->cnvData==NULL) {
   663             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
   665         } else if(!data->cnvData->isValid(data->cnvData,
   666                             staticData->subChar, staticData->subCharLen)
   667         ) {
   668             fprintf(stderr, "       the substitution character byte sequence is illegal in this codepage structure!\n");
   669             *pErrorCode=U_INVALID_TABLE_FORMAT;
   671         } else if(staticData->subChar1!=0 &&
   672                     !data->cnvData->isValid(data->cnvData, &staticData->subChar1, 1)
   673         ) {
   674             fprintf(stderr, "       the subchar1 byte is illegal in this codepage structure!\n");
   675             *pErrorCode=U_INVALID_TABLE_FORMAT;
   677         } else if(
   678             data->ucm->ext->mappingsLength>0 &&
   679             !ucm_checkBaseExt(states, data->ucm->base, data->ucm->ext, data->ucm->ext, FALSE)
   680         ) {
   681             *pErrorCode=U_INVALID_TABLE_FORMAT;
   682         } else if(data->ucm->base->flagsType&UCM_FLAGS_EXPLICIT) {
   683             /* sort the table so that it can be turned into UTF-8-friendly data */
   684             ucm_sortTable(data->ucm->base);
   685         }
   687         if(U_SUCCESS(*pErrorCode)) {
   688             if(
   689                 /* add the base table after ucm_checkBaseExt()! */
   690                 !data->cnvData->addTable(data->cnvData, data->ucm->base, &data->staticData)
   691             ) {
   692                 *pErrorCode=U_INVALID_TABLE_FORMAT;
   693             } else {
   694                 /*
   695                  * addTable() may have requested moving more mappings to the extension table
   696                  * if they fit into the base toUnicode table but not into the
   697                  * base fromUnicode table.
   698                  * (Especially for UTF-8-friendly fromUnicode tables.)
   699                  * Such mappings will have the MBCS_FROM_U_EXT_FLAG set, which causes them
   700                  * to be excluded from the extension toUnicode data.
   701                  * See MBCSOkForBaseFromUnicode() for which mappings do not fit into
   702                  * the base fromUnicode table.
   703                  */
   704                 ucm_moveMappings(data->ucm->base, data->ucm->ext);
   705                 ucm_sortTable(data->ucm->ext);
   706                 if(data->ucm->ext->mappingsLength>0) {
   707                     /* prepare the extension table, if there is one */
   708                     data->extData=CnvExtOpen(data->ucm);
   709                     if(data->extData==NULL) {
   710                         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
   711                     } else if(
   712                         !data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)
   713                     ) {
   714                         *pErrorCode=U_INVALID_TABLE_FORMAT;
   715                     }
   716                 }
   717             }
   718         }
   719     } else {
   720         /* Build an extension-only .cnv file. */
   721         char baseFilename[500];
   722         char *basename;
   724         initConvData(&baseData);
   726         /* assemble a path/filename for data->ucm->baseName */
   727         uprv_strcpy(baseFilename, converterName);
   728         basename=(char *)findBasename(baseFilename);
   729         uprv_strcpy(basename, data->ucm->baseName);
   730         uprv_strcat(basename, ".ucm");
   732         /* read the base table */
   733         dataIsBase=readFile(&baseData, baseFilename, pErrorCode);
   734         if(U_FAILURE(*pErrorCode)) {
   735             return;
   736         } else if(!dataIsBase) {
   737             fprintf(stderr, "error: the <icu:base> file \"%s\" is not a base table file\n", baseFilename);
   738             *pErrorCode=U_INVALID_TABLE_FORMAT;
   739         } else {
   740             /* prepare the extension table */
   741             data->extData=CnvExtOpen(data->ucm);
   742             if(data->extData==NULL) {
   743                 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
   744             } else {
   745                 /* fill in gaps in extension file header fields */
   746                 UCMapping *m, *mLimit;
   747                 uint8_t fallbackFlags;
   749                 baseStates=&baseData.ucm->states;
   750                 if(states->conversionType==UCNV_DBCS) {
   751                     staticData->minBytesPerChar=(int8_t)(states->minCharLength=2);
   752                 } else if(states->minCharLength==0) {
   753                     staticData->minBytesPerChar=(int8_t)(states->minCharLength=baseStates->minCharLength);
   754                 }
   755                 if(states->maxCharLength<states->minCharLength) {
   756                     staticData->maxBytesPerChar=(int8_t)(states->maxCharLength=baseStates->maxCharLength);
   757                 }
   759                 if(staticData->subCharLen==0) {
   760                     uprv_memcpy(staticData->subChar, baseData.staticData.subChar, 4);
   761                     staticData->subCharLen=baseData.staticData.subCharLen;
   762                 }
   763                 /*
   764                  * do not copy subChar1 -
   765                  * only use what is explicitly specified
   766                  * because it cannot be unset in the extension file header
   767                  */
   769                 /* get the fallback flags */
   770                 fallbackFlags=0;
   771                 for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
   772                     m<mLimit && fallbackFlags!=3;
   773                     ++m
   774                 ) {
   775                     if(m->f==1) {
   776                         fallbackFlags|=1;
   777                     } else if(m->f==3) {
   778                         fallbackFlags|=2;
   779                     }
   780                 }
   782                 if(fallbackFlags&1) {
   783                     staticData->hasFromUnicodeFallback=TRUE;
   784                 }
   785                 if(fallbackFlags&2) {
   786                     staticData->hasToUnicodeFallback=TRUE;
   787                 }
   789                 if(1!=ucm_countChars(baseStates, staticData->subChar, staticData->subCharLen)) {
   790                     fprintf(stderr, "       the substitution character byte sequence is illegal in this codepage structure!\n");
   791                     *pErrorCode=U_INVALID_TABLE_FORMAT;
   793                 } else if(staticData->subChar1!=0 && 1!=ucm_countChars(baseStates, &staticData->subChar1, 1)) {
   794                     fprintf(stderr, "       the subchar1 byte is illegal in this codepage structure!\n");
   795                     *pErrorCode=U_INVALID_TABLE_FORMAT;
   797                 } else if(
   798                     !ucm_checkValidity(data->ucm->ext, baseStates) ||
   799                     !ucm_checkBaseExt(baseStates, baseData.ucm->base, data->ucm->ext, data->ucm->ext, FALSE)
   800                 ) {
   801                     *pErrorCode=U_INVALID_TABLE_FORMAT;
   802                 } else {
   803                     if(states->maxCharLength>1) {
   804                         /*
   805                          * When building a normal .cnv file with a base table
   806                          * for an MBCS (not SBCS) table with explicit precision flags,
   807                          * the MBCSAddTable() function marks some mappings for moving
   808                          * to the extension table.
   809                          * They fit into the base toUnicode table but not into the
   810                          * base fromUnicode table.
   811                          * (Note: We do have explicit precision flags because they are
   812                          * required for extension table generation, and
   813                          * ucm_checkBaseExt() verified it.)
   814                          *
   815                          * We do not call MBCSAddTable() here (we probably could)
   816                          * so we need to do the analysis before building the extension table.
   817                          * We assume that MBCSAddTable() will build a UTF-8-friendly table.
   818                          * Redundant mappings in the extension table are ok except they cost some size.
   819                          *
   820                          * Do this after ucm_checkBaseExt().
   821                          */
   822                         const MBCSData *mbcsData=MBCSGetDummy();
   823                         int32_t needsMove=0;
   824                         for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
   825                             m<mLimit;
   826                             ++m
   827                         ) {
   828                             if(!MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m->bLen, m->u, m->f)) {
   829                                 m->f|=MBCS_FROM_U_EXT_FLAG;
   830                                 m->moveFlag=UCM_MOVE_TO_EXT;
   831                                 ++needsMove;
   832                             }
   833                         }
   835                         if(needsMove!=0) {
   836                             ucm_moveMappings(baseData.ucm->base, data->ucm->ext);
   837                             ucm_sortTable(data->ucm->ext);
   838                         }
   839                     }
   840                     if(!data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)) {
   841                         *pErrorCode=U_INVALID_TABLE_FORMAT;
   842                     }
   843                 }
   844             }
   845         }
   847         cleanupConvData(&baseData);
   848     }
   849 }
   851 /*
   852  * Hey, Emacs, please set the following:
   853  *
   854  * Local Variables:
   855  * indent-tabs-mode: nil
   856  * End:
   857  *
   858  */

mercurial