intl/icu/source/tools/makeconv/makeconv.c

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/tools/makeconv/makeconv.c	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,858 @@
     1.4 +/*
     1.5 + ********************************************************************************
     1.6 + *
     1.7 + *   Copyright (C) 1998-2012, International Business Machines
     1.8 + *   Corporation and others.  All Rights Reserved.
     1.9 + *
    1.10 + ********************************************************************************
    1.11 + *
    1.12 + *
    1.13 + *  makeconv.c:
    1.14 + *  tool creating a binary (compressed) representation of the conversion mapping
    1.15 + *  table (IBM NLTC ucmap format).
    1.16 + *
    1.17 + *  05/04/2000    helena     Added fallback mapping into the picture...
    1.18 + *  06/29/2000  helena      Major rewrite of the callback APIs.
    1.19 + */
    1.20 +
    1.21 +#include <stdio.h>
    1.22 +#include "unicode/putil.h"
    1.23 +#include "unicode/ucnv_err.h"
    1.24 +#include "ucnv_bld.h"
    1.25 +#include "ucnv_imp.h"
    1.26 +#include "ucnv_cnv.h"
    1.27 +#include "cstring.h"
    1.28 +#include "cmemory.h"
    1.29 +#include "uinvchar.h"
    1.30 +#include "filestrm.h"
    1.31 +#include "toolutil.h"
    1.32 +#include "uoptions.h"
    1.33 +#include "unicode/udata.h"
    1.34 +#include "unewdata.h"
    1.35 +#include "uparse.h"
    1.36 +#include "ucm.h"
    1.37 +#include "makeconv.h"
    1.38 +#include "genmbcs.h"
    1.39 +
    1.40 +#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
    1.41 +
    1.42 +#define DEBUG 0
    1.43 +
    1.44 +typedef struct ConvData {
    1.45 +    UCMFile *ucm;
    1.46 +    NewConverter *cnvData, *extData;
    1.47 +    UConverterSharedData sharedData;
    1.48 +    UConverterStaticData staticData;
    1.49 +} ConvData;
    1.50 +
    1.51 +static void
    1.52 +initConvData(ConvData *data) {
    1.53 +    uprv_memset(data, 0, sizeof(ConvData));
    1.54 +    data->sharedData.structSize=sizeof(UConverterSharedData);
    1.55 +    data->staticData.structSize=sizeof(UConverterStaticData);
    1.56 +    data->sharedData.staticData=&data->staticData;
    1.57 +}
    1.58 +
    1.59 +static void
    1.60 +cleanupConvData(ConvData *data) {
    1.61 +    if(data!=NULL) {
    1.62 +        if(data->cnvData!=NULL) {
    1.63 +            data->cnvData->close(data->cnvData);
    1.64 +            data->cnvData=NULL;
    1.65 +        }
    1.66 +        if(data->extData!=NULL) {
    1.67 +            data->extData->close(data->extData);
    1.68 +            data->extData=NULL;
    1.69 +        }
    1.70 +        ucm_close(data->ucm);
    1.71 +        data->ucm=NULL;
    1.72 +    }
    1.73 +}
    1.74 +
    1.75 +/*
    1.76 + * from ucnvstat.c - static prototypes of data-based converters
    1.77 + */
    1.78 +extern const UConverterStaticData * ucnv_converterStaticData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES];
    1.79 +
    1.80 +/*
    1.81 + * Global - verbosity
    1.82 + */
    1.83 +UBool VERBOSE = FALSE;
    1.84 +UBool SMALL = FALSE;
    1.85 +UBool IGNORE_SISO_CHECK = FALSE;
    1.86 +
    1.87 +static void
    1.88 +createConverter(ConvData *data, const char* converterName, UErrorCode *pErrorCode);
    1.89 +
    1.90 +/*
    1.91 + * Set up the UNewData and write the converter..
    1.92 + */
    1.93 +static void
    1.94 +writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status);
    1.95 +
    1.96 +UBool haveCopyright=TRUE;
    1.97 +
    1.98 +static UDataInfo dataInfo={
    1.99 +    sizeof(UDataInfo),
   1.100 +    0,
   1.101 +
   1.102 +    U_IS_BIG_ENDIAN,
   1.103 +    U_CHARSET_FAMILY,
   1.104 +    sizeof(UChar),
   1.105 +    0,
   1.106 +
   1.107 +    {0x63, 0x6e, 0x76, 0x74},     /* dataFormat="cnvt" */
   1.108 +    {6, 2, 0, 0},                 /* formatVersion */
   1.109 +    {0, 0, 0, 0}                  /* dataVersion (calculated at runtime) */
   1.110 +};
   1.111 +
   1.112 +static void
   1.113 +writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status)
   1.114 +{
   1.115 +    UNewDataMemory *mem = NULL;
   1.116 +    uint32_t sz2;
   1.117 +    uint32_t size = 0;
   1.118 +    int32_t tableType;
   1.119 +
   1.120 +    if(U_FAILURE(*status))
   1.121 +      {
   1.122 +        return;
   1.123 +      }
   1.124 +
   1.125 +    tableType=TABLE_NONE;
   1.126 +    if(data->cnvData!=NULL) {
   1.127 +        tableType|=TABLE_BASE;
   1.128 +    }
   1.129 +    if(data->extData!=NULL) {
   1.130 +        tableType|=TABLE_EXT;
   1.131 +    }
   1.132 +
   1.133 +    mem = udata_create(cnvDir, "cnv", cnvName, &dataInfo, haveCopyright ? U_COPYRIGHT_STRING : NULL, status);
   1.134 +
   1.135 +    if(U_FAILURE(*status))
   1.136 +      {
   1.137 +        fprintf(stderr, "Couldn't create the udata %s.%s: %s\n",
   1.138 +                cnvName,
   1.139 +                "cnv",
   1.140 +                u_errorName(*status));
   1.141 +        return;
   1.142 +      }
   1.143 +
   1.144 +    if(VERBOSE)
   1.145 +      {
   1.146 +        printf("- Opened udata %s.%s\n", cnvName, "cnv");
   1.147 +      }
   1.148 +
   1.149 +
   1.150 +    /* all read only, clean, platform independent data.  Mmmm. :)  */
   1.151 +    udata_writeBlock(mem, &data->staticData, sizeof(UConverterStaticData));
   1.152 +    size += sizeof(UConverterStaticData); /* Is 4-aligned  - by size */
   1.153 +    /* Now, write the table */
   1.154 +    if(tableType&TABLE_BASE) {
   1.155 +        size += data->cnvData->write(data->cnvData, &data->staticData, mem, tableType);
   1.156 +    }
   1.157 +    if(tableType&TABLE_EXT) {
   1.158 +        size += data->extData->write(data->extData, &data->staticData, mem, tableType);
   1.159 +    }
   1.160 +
   1.161 +    sz2 = udata_finish(mem, status);
   1.162 +    if(size != sz2)
   1.163 +    {
   1.164 +        fprintf(stderr, "error: wrote %u bytes to the .cnv file but counted %u bytes\n", (int)sz2, (int)size);
   1.165 +        *status=U_INTERNAL_PROGRAM_ERROR;
   1.166 +    }
   1.167 +    if(VERBOSE)
   1.168 +    {
   1.169 +      printf("- Wrote %u bytes to the udata.\n", (int)sz2);
   1.170 +    }
   1.171 +}
   1.172 +
   1.173 +enum {
   1.174 +    OPT_HELP_H,
   1.175 +    OPT_HELP_QUESTION_MARK,
   1.176 +    OPT_COPYRIGHT,
   1.177 +    OPT_VERSION,
   1.178 +    OPT_DESTDIR,
   1.179 +    OPT_VERBOSE,
   1.180 +    OPT_SMALL,
   1.181 +    OPT_IGNORE_SISO_CHECK,
   1.182 +    OPT_COUNT
   1.183 +};
   1.184 +
   1.185 +static UOption options[]={
   1.186 +    UOPTION_HELP_H,
   1.187 +    UOPTION_HELP_QUESTION_MARK,
   1.188 +    UOPTION_COPYRIGHT,
   1.189 +    UOPTION_VERSION,
   1.190 +    UOPTION_DESTDIR,
   1.191 +    UOPTION_VERBOSE,
   1.192 +    { "small", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 },
   1.193 +    { "ignore-siso-check", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 }
   1.194 +};
   1.195 +
   1.196 +int main(int argc, char* argv[])
   1.197 +{
   1.198 +    ConvData data;
   1.199 +    UErrorCode err = U_ZERO_ERROR, localError;
   1.200 +    char outFileName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
   1.201 +    const char* destdir, *arg;
   1.202 +    size_t destdirlen;
   1.203 +    char* dot = NULL, *outBasename;
   1.204 +    char cnvName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
   1.205 +    char cnvNameWithPkg[UCNV_MAX_FULL_FILE_NAME_LENGTH];
   1.206 +    UVersionInfo icuVersion;
   1.207 +    UBool printFilename;
   1.208 +
   1.209 +    err = U_ZERO_ERROR;
   1.210 +
   1.211 +    U_MAIN_INIT_ARGS(argc, argv);
   1.212 +
   1.213 +    /* Set up the ICU version number */
   1.214 +    u_getVersion(icuVersion);
   1.215 +    uprv_memcpy(&dataInfo.dataVersion, &icuVersion, sizeof(UVersionInfo));
   1.216 +
   1.217 +    /* preset then read command line options */
   1.218 +    options[OPT_DESTDIR].value=u_getDataDirectory();
   1.219 +    argc=u_parseArgs(argc, argv, LENGTHOF(options), options);
   1.220 +
   1.221 +    /* error handling, printing usage message */
   1.222 +    if(argc<0) {
   1.223 +        fprintf(stderr,
   1.224 +            "error in command line argument \"%s\"\n",
   1.225 +            argv[-argc]);
   1.226 +    } else if(argc<2) {
   1.227 +        argc=-1;
   1.228 +    }
   1.229 +    if(argc<0 || options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur) {
   1.230 +        FILE *stdfile=argc<0 ? stderr : stdout;
   1.231 +        fprintf(stdfile,
   1.232 +            "usage: %s [-options] files...\n"
   1.233 +            "\tread .ucm codepage mapping files and write .cnv files\n"
   1.234 +            "options:\n"
   1.235 +            "\t-h or -? or --help  this usage text\n"
   1.236 +            "\t-V or --version     show a version message\n"
   1.237 +            "\t-c or --copyright   include a copyright notice\n"
   1.238 +            "\t-d or --destdir     destination directory, followed by the path\n"
   1.239 +            "\t-v or --verbose     Turn on verbose output\n",
   1.240 +            argv[0]);
   1.241 +        fprintf(stdfile,
   1.242 +            "\t      --small       Generate smaller .cnv files. They will be\n"
   1.243 +            "\t                    significantly smaller but may not be compatible with\n"
   1.244 +            "\t                    older versions of ICU and will require heap memory\n"
   1.245 +            "\t                    allocation when loaded.\n"
   1.246 +            "\t      --ignore-siso-check         Use SI/SO other than 0xf/0xe.\n");
   1.247 +        return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
   1.248 +    }
   1.249 +
   1.250 +    if(options[OPT_VERSION].doesOccur) {
   1.251 +        printf("makeconv version %u.%u, ICU tool to read .ucm codepage mapping files and write .cnv files\n",
   1.252 +               dataInfo.formatVersion[0], dataInfo.formatVersion[1]);
   1.253 +        printf("%s\n", U_COPYRIGHT_STRING);
   1.254 +        exit(0);
   1.255 +    }
   1.256 +
   1.257 +    /* get the options values */
   1.258 +    haveCopyright = options[OPT_COPYRIGHT].doesOccur;
   1.259 +    destdir = options[OPT_DESTDIR].value;
   1.260 +    VERBOSE = options[OPT_VERBOSE].doesOccur;
   1.261 +    SMALL = options[OPT_SMALL].doesOccur;
   1.262 +
   1.263 +    if (options[OPT_IGNORE_SISO_CHECK].doesOccur) {
   1.264 +        IGNORE_SISO_CHECK = TRUE;
   1.265 +    }
   1.266 +
   1.267 +    if (destdir != NULL && *destdir != 0) {
   1.268 +        uprv_strcpy(outFileName, destdir);
   1.269 +        destdirlen = uprv_strlen(destdir);
   1.270 +        outBasename = outFileName + destdirlen;
   1.271 +        if (*(outBasename - 1) != U_FILE_SEP_CHAR) {
   1.272 +            *outBasename++ = U_FILE_SEP_CHAR;
   1.273 +            ++destdirlen;
   1.274 +        }
   1.275 +    } else {
   1.276 +        destdirlen = 0;
   1.277 +        outBasename = outFileName;
   1.278 +    }
   1.279 +
   1.280 +#if DEBUG
   1.281 +    {
   1.282 +      int i;
   1.283 +      printf("makeconv: processing %d files...\n", argc - 1);
   1.284 +      for(i=1; i<argc; ++i) {
   1.285 +        printf("%s ", argv[i]);
   1.286 +      }
   1.287 +      printf("\n");
   1.288 +      fflush(stdout);
   1.289 +    }
   1.290 +#endif
   1.291 +
   1.292 +    err = U_ZERO_ERROR;
   1.293 +    printFilename = (UBool) (argc > 2 || VERBOSE);
   1.294 +    for (++argv; --argc; ++argv)
   1.295 +    {
   1.296 +        arg = getLongPathname(*argv);
   1.297 +
   1.298 +        /* Check for potential buffer overflow */
   1.299 +        if(strlen(arg) >= UCNV_MAX_FULL_FILE_NAME_LENGTH)
   1.300 +        {
   1.301 +            fprintf(stderr, "%s\n", u_errorName(U_BUFFER_OVERFLOW_ERROR));
   1.302 +            return U_BUFFER_OVERFLOW_ERROR;
   1.303 +        }
   1.304 +
   1.305 +        /*produces the right destination path for display*/
   1.306 +        if (destdirlen != 0)
   1.307 +        {
   1.308 +            const char *basename;
   1.309 +
   1.310 +            /* find the last file sepator */
   1.311 +            basename = findBasename(arg);
   1.312 +            uprv_strcpy(outBasename, basename);
   1.313 +        }
   1.314 +        else
   1.315 +        {
   1.316 +            uprv_strcpy(outFileName, arg);
   1.317 +        }
   1.318 +
   1.319 +        /*removes the extension if any is found*/
   1.320 +        dot = uprv_strrchr(outBasename, '.');
   1.321 +        if (dot)
   1.322 +        {
   1.323 +            *dot = '\0';
   1.324 +        }
   1.325 +
   1.326 +        /* the basename without extension is the converter name */
   1.327 +        uprv_strcpy(cnvName, outBasename);
   1.328 +
   1.329 +        /*Adds the target extension*/
   1.330 +        uprv_strcat(outBasename, CONVERTER_FILE_EXTENSION);
   1.331 +
   1.332 +#if DEBUG
   1.333 +        printf("makeconv: processing %s  ...\n", arg);
   1.334 +        fflush(stdout);
   1.335 +#endif
   1.336 +        localError = U_ZERO_ERROR;
   1.337 +        initConvData(&data);
   1.338 +        createConverter(&data, arg, &localError);
   1.339 +
   1.340 +        if (U_FAILURE(localError))
   1.341 +        {
   1.342 +            /* if an error is found, print out an error msg and keep going */
   1.343 +            fprintf(stderr, "Error creating converter for \"%s\" file for \"%s\" (%s)\n", outFileName, arg,
   1.344 +                u_errorName(localError));
   1.345 +            if(U_SUCCESS(err)) {
   1.346 +                err = localError;
   1.347 +            }
   1.348 +        }
   1.349 +        else
   1.350 +        {
   1.351 +            /* Insure the static data name matches the  file name */
   1.352 +            /* Changed to ignore directory and only compare base name
   1.353 +             LDH 1/2/08*/
   1.354 +            char *p;
   1.355 +            p = strrchr(cnvName, U_FILE_SEP_CHAR); /* Find last file separator */
   1.356 +
   1.357 +            if(p == NULL)            /* OK, try alternate */
   1.358 +            {
   1.359 +                p = strrchr(cnvName, U_FILE_ALT_SEP_CHAR);
   1.360 +                if(p == NULL)
   1.361 +                {
   1.362 +                    p=cnvName; /* If no separators, no problem */
   1.363 +                }
   1.364 +            }
   1.365 +            else
   1.366 +            {
   1.367 +                p++;   /* If found separtor, don't include it in compare */
   1.368 +            }
   1.369 +            if(uprv_stricmp(p,data.staticData.name))
   1.370 +            {
   1.371 +                fprintf(stderr, "Warning: %s%s claims to be '%s'\n",
   1.372 +                    cnvName,  CONVERTER_FILE_EXTENSION,
   1.373 +                    data.staticData.name);
   1.374 +            }
   1.375 +
   1.376 +            uprv_strcpy((char*)data.staticData.name, cnvName);
   1.377 +
   1.378 +            if(!uprv_isInvariantString((char*)data.staticData.name, -1)) {
   1.379 +                fprintf(stderr,
   1.380 +                    "Error: A converter name must contain only invariant characters.\n"
   1.381 +                    "%s is not a valid converter name.\n",
   1.382 +                    data.staticData.name);
   1.383 +                if(U_SUCCESS(err)) {
   1.384 +                    err = U_INVALID_TABLE_FORMAT;
   1.385 +                }
   1.386 +            }
   1.387 +
   1.388 +            uprv_strcpy(cnvNameWithPkg, cnvName);
   1.389 +
   1.390 +            localError = U_ZERO_ERROR;
   1.391 +            writeConverterData(&data, cnvNameWithPkg, destdir, &localError);
   1.392 +
   1.393 +            if(U_FAILURE(localError))
   1.394 +            {
   1.395 +                /* if an error is found, print out an error msg and keep going*/
   1.396 +                fprintf(stderr, "Error writing \"%s\" file for \"%s\" (%s)\n", outFileName, arg,
   1.397 +                    u_errorName(localError));
   1.398 +                if(U_SUCCESS(err)) {
   1.399 +                    err = localError;
   1.400 +                }
   1.401 +            }
   1.402 +            else if (printFilename)
   1.403 +            {
   1.404 +                puts(outBasename);
   1.405 +            }
   1.406 +        }
   1.407 +        fflush(stdout);
   1.408 +        fflush(stderr);
   1.409 +
   1.410 +        cleanupConvData(&data);
   1.411 +    }
   1.412 +
   1.413 +    return err;
   1.414 +}
   1.415 +
   1.416 +static void
   1.417 +getPlatformAndCCSIDFromName(const char *name, int8_t *pPlatform, int32_t *pCCSID) {
   1.418 +    if( (name[0]=='i' || name[0]=='I') &&
   1.419 +        (name[1]=='b' || name[1]=='B') &&
   1.420 +        (name[2]=='m' || name[2]=='M')
   1.421 +    ) {
   1.422 +        name+=3;
   1.423 +        if(*name=='-') {
   1.424 +            ++name;
   1.425 +        }
   1.426 +        *pPlatform=UCNV_IBM;
   1.427 +        *pCCSID=(int32_t)uprv_strtoul(name, NULL, 10);
   1.428 +    } else {
   1.429 +        *pPlatform=UCNV_UNKNOWN;
   1.430 +        *pCCSID=0;
   1.431 +    }
   1.432 +}
   1.433 +
   1.434 +static void
   1.435 +readHeader(ConvData *data,
   1.436 +           FileStream* convFile,
   1.437 +           const char* converterName,
   1.438 +           UErrorCode *pErrorCode) {
   1.439 +    char line[1024];
   1.440 +    char *s, *key, *value;
   1.441 +    const UConverterStaticData *prototype;
   1.442 +    UConverterStaticData *staticData;
   1.443 +
   1.444 +    if(U_FAILURE(*pErrorCode)) {
   1.445 +        return;
   1.446 +    }
   1.447 +
   1.448 +    staticData=&data->staticData;
   1.449 +    staticData->platform=UCNV_IBM;
   1.450 +    staticData->subCharLen=0;
   1.451 +
   1.452 +    while(T_FileStream_readLine(convFile, line, sizeof(line))) {
   1.453 +        /* basic parsing and handling of state-related items */
   1.454 +        if(ucm_parseHeaderLine(data->ucm, line, &key, &value)) {
   1.455 +            continue;
   1.456 +        }
   1.457 +
   1.458 +        /* stop at the beginning of the mapping section */
   1.459 +        if(uprv_strcmp(line, "CHARMAP")==0) {
   1.460 +            break;
   1.461 +        }
   1.462 +
   1.463 +        /* collect the information from the header field, ignore unknown keys */
   1.464 +        if(uprv_strcmp(key, "code_set_name")==0) {
   1.465 +            if(*value!=0) {
   1.466 +                uprv_strcpy((char *)staticData->name, value);
   1.467 +                getPlatformAndCCSIDFromName(value, &staticData->platform, &staticData->codepage);
   1.468 +            }
   1.469 +        } else if(uprv_strcmp(key, "subchar")==0) {
   1.470 +            uint8_t bytes[UCNV_EXT_MAX_BYTES];
   1.471 +            int8_t length;
   1.472 +
   1.473 +            s=value;
   1.474 +            length=ucm_parseBytes(bytes, line, (const char **)&s);
   1.475 +            if(1<=length && length<=4 && *s==0) {
   1.476 +                staticData->subCharLen=length;
   1.477 +                uprv_memcpy(staticData->subChar, bytes, length);
   1.478 +            } else {
   1.479 +                fprintf(stderr, "error: illegal <subchar> %s\n", value);
   1.480 +                *pErrorCode=U_INVALID_TABLE_FORMAT;
   1.481 +                return;
   1.482 +            }
   1.483 +        } else if(uprv_strcmp(key, "subchar1")==0) {
   1.484 +            uint8_t bytes[UCNV_EXT_MAX_BYTES];
   1.485 +
   1.486 +            s=value;
   1.487 +            if(1==ucm_parseBytes(bytes, line, (const char **)&s) && *s==0) {
   1.488 +                staticData->subChar1=bytes[0];
   1.489 +            } else {
   1.490 +                fprintf(stderr, "error: illegal <subchar1> %s\n", value);
   1.491 +                *pErrorCode=U_INVALID_TABLE_FORMAT;
   1.492 +                return;
   1.493 +            }
   1.494 +        }
   1.495 +    }
   1.496 +
   1.497 +    /* copy values from the UCMFile to the static data */
   1.498 +    staticData->maxBytesPerChar=(int8_t)data->ucm->states.maxCharLength;
   1.499 +    staticData->minBytesPerChar=(int8_t)data->ucm->states.minCharLength;
   1.500 +    staticData->conversionType=data->ucm->states.conversionType;
   1.501 +
   1.502 +    if(staticData->conversionType==UCNV_UNSUPPORTED_CONVERTER) {
   1.503 +        fprintf(stderr, "ucm error: missing conversion type (<uconv_class>)\n");
   1.504 +        *pErrorCode=U_INVALID_TABLE_FORMAT;
   1.505 +        return;
   1.506 +    }
   1.507 +
   1.508 +    /*
   1.509 +     * Now that we know the type, copy any 'default' values from the table.
   1.510 +     * We need not check the type any further because the parser only
   1.511 +     * recognizes what we have prototypes for.
   1.512 +     *
   1.513 +     * For delta (extension-only) tables, copy values from the base file
   1.514 +     * instead, see createConverter().
   1.515 +     */
   1.516 +    if(data->ucm->baseName[0]==0) {
   1.517 +        prototype=ucnv_converterStaticData[staticData->conversionType];
   1.518 +        if(prototype!=NULL) {
   1.519 +            if(staticData->name[0]==0) {
   1.520 +                uprv_strcpy((char *)staticData->name, prototype->name);
   1.521 +            }
   1.522 +
   1.523 +            if(staticData->codepage==0) {
   1.524 +                staticData->codepage=prototype->codepage;
   1.525 +            }
   1.526 +
   1.527 +            if(staticData->platform==0) {
   1.528 +                staticData->platform=prototype->platform;
   1.529 +            }
   1.530 +
   1.531 +            if(staticData->minBytesPerChar==0) {
   1.532 +                staticData->minBytesPerChar=prototype->minBytesPerChar;
   1.533 +            }
   1.534 +
   1.535 +            if(staticData->maxBytesPerChar==0) {
   1.536 +                staticData->maxBytesPerChar=prototype->maxBytesPerChar;
   1.537 +            }
   1.538 +
   1.539 +            if(staticData->subCharLen==0) {
   1.540 +                staticData->subCharLen=prototype->subCharLen;
   1.541 +                if(prototype->subCharLen>0) {
   1.542 +                    uprv_memcpy(staticData->subChar, prototype->subChar, prototype->subCharLen);
   1.543 +                }
   1.544 +            }
   1.545 +        }
   1.546 +    }
   1.547 +
   1.548 +    if(data->ucm->states.outputType<0) {
   1.549 +        data->ucm->states.outputType=(int8_t)data->ucm->states.maxCharLength-1;
   1.550 +    }
   1.551 +
   1.552 +    if( staticData->subChar1!=0 &&
   1.553 +            (staticData->minBytesPerChar>1 ||
   1.554 +                (staticData->conversionType!=UCNV_MBCS &&
   1.555 +                 staticData->conversionType!=UCNV_EBCDIC_STATEFUL))
   1.556 +    ) {
   1.557 +        fprintf(stderr, "error: <subchar1> defined for a type other than MBCS or EBCDIC_STATEFUL\n");
   1.558 +        *pErrorCode=U_INVALID_TABLE_FORMAT;
   1.559 +    }
   1.560 +}
   1.561 +
   1.562 +/* return TRUE if a base table was read, FALSE for an extension table */
   1.563 +static UBool
   1.564 +readFile(ConvData *data, const char* converterName,
   1.565 +         UErrorCode *pErrorCode) {
   1.566 +    char line[1024];
   1.567 +    char *end;
   1.568 +    FileStream *convFile;
   1.569 +
   1.570 +    UCMStates *baseStates;
   1.571 +    UBool dataIsBase;
   1.572 +
   1.573 +    if(U_FAILURE(*pErrorCode)) {
   1.574 +        return FALSE;
   1.575 +    }
   1.576 +
   1.577 +    data->ucm=ucm_open();
   1.578 +
   1.579 +    convFile=T_FileStream_open(converterName, "r");
   1.580 +    if(convFile==NULL) {
   1.581 +        *pErrorCode=U_FILE_ACCESS_ERROR;
   1.582 +        return FALSE;
   1.583 +    }
   1.584 +
   1.585 +    readHeader(data, convFile, converterName, pErrorCode);
   1.586 +    if(U_FAILURE(*pErrorCode)) {
   1.587 +        return FALSE;
   1.588 +    }
   1.589 +
   1.590 +    if(data->ucm->baseName[0]==0) {
   1.591 +        dataIsBase=TRUE;
   1.592 +        baseStates=&data->ucm->states;
   1.593 +        ucm_processStates(baseStates, IGNORE_SISO_CHECK);
   1.594 +    } else {
   1.595 +        dataIsBase=FALSE;
   1.596 +        baseStates=NULL;
   1.597 +    }
   1.598 +
   1.599 +    /* read the base table */
   1.600 +    ucm_readTable(data->ucm, convFile, dataIsBase, baseStates, pErrorCode);
   1.601 +    if(U_FAILURE(*pErrorCode)) {
   1.602 +        return FALSE;
   1.603 +    }
   1.604 +
   1.605 +    /* read an extension table if there is one */
   1.606 +    while(T_FileStream_readLine(convFile, line, sizeof(line))) {
   1.607 +        end=uprv_strchr(line, 0);
   1.608 +        while(line<end &&
   1.609 +              (*(end-1)=='\n' || *(end-1)=='\r' || *(end-1)==' ' || *(end-1)=='\t')) {
   1.610 +            --end;
   1.611 +        }
   1.612 +        *end=0;
   1.613 +
   1.614 +        if(line[0]=='#' || u_skipWhitespace(line)==end) {
   1.615 +            continue; /* ignore empty and comment lines */
   1.616 +        }
   1.617 +
   1.618 +        if(0==uprv_strcmp(line, "CHARMAP")) {
   1.619 +            /* read the extension table */
   1.620 +            ucm_readTable(data->ucm, convFile, FALSE, baseStates, pErrorCode);
   1.621 +        } else {
   1.622 +            fprintf(stderr, "unexpected text after the base mapping table\n");
   1.623 +        }
   1.624 +        break;
   1.625 +    }
   1.626 +
   1.627 +    T_FileStream_close(convFile);
   1.628 +
   1.629 +    if(data->ucm->base->flagsType==UCM_FLAGS_MIXED || data->ucm->ext->flagsType==UCM_FLAGS_MIXED) {
   1.630 +        fprintf(stderr, "error: some entries have the mapping precision (with '|'), some do not\n");
   1.631 +        *pErrorCode=U_INVALID_TABLE_FORMAT;
   1.632 +    }
   1.633 +
   1.634 +    return dataIsBase;
   1.635 +}
   1.636 +
   1.637 +static void
   1.638 +createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCode) {
   1.639 +    ConvData baseData;
   1.640 +    UBool dataIsBase;
   1.641 +
   1.642 +    UConverterStaticData *staticData;
   1.643 +    UCMStates *states, *baseStates;
   1.644 +
   1.645 +    if(U_FAILURE(*pErrorCode)) {
   1.646 +        return;
   1.647 +    }
   1.648 +
   1.649 +    initConvData(data);
   1.650 +
   1.651 +    dataIsBase=readFile(data, converterName, pErrorCode);
   1.652 +    if(U_FAILURE(*pErrorCode)) {
   1.653 +        return;
   1.654 +    }
   1.655 +
   1.656 +    staticData=&data->staticData;
   1.657 +    states=&data->ucm->states;
   1.658 +
   1.659 +    if(dataIsBase) {
   1.660 +        /*
   1.661 +         * Build a normal .cnv file with a base table
   1.662 +         * and an optional extension table.
   1.663 +         */
   1.664 +        data->cnvData=MBCSOpen(data->ucm);
   1.665 +        if(data->cnvData==NULL) {
   1.666 +            *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
   1.667 +
   1.668 +        } else if(!data->cnvData->isValid(data->cnvData,
   1.669 +                            staticData->subChar, staticData->subCharLen)
   1.670 +        ) {
   1.671 +            fprintf(stderr, "       the substitution character byte sequence is illegal in this codepage structure!\n");
   1.672 +            *pErrorCode=U_INVALID_TABLE_FORMAT;
   1.673 +
   1.674 +        } else if(staticData->subChar1!=0 &&
   1.675 +                    !data->cnvData->isValid(data->cnvData, &staticData->subChar1, 1)
   1.676 +        ) {
   1.677 +            fprintf(stderr, "       the subchar1 byte is illegal in this codepage structure!\n");
   1.678 +            *pErrorCode=U_INVALID_TABLE_FORMAT;
   1.679 +
   1.680 +        } else if(
   1.681 +            data->ucm->ext->mappingsLength>0 &&
   1.682 +            !ucm_checkBaseExt(states, data->ucm->base, data->ucm->ext, data->ucm->ext, FALSE)
   1.683 +        ) {
   1.684 +            *pErrorCode=U_INVALID_TABLE_FORMAT;
   1.685 +        } else if(data->ucm->base->flagsType&UCM_FLAGS_EXPLICIT) {
   1.686 +            /* sort the table so that it can be turned into UTF-8-friendly data */
   1.687 +            ucm_sortTable(data->ucm->base);
   1.688 +        }
   1.689 +
   1.690 +        if(U_SUCCESS(*pErrorCode)) {
   1.691 +            if(
   1.692 +                /* add the base table after ucm_checkBaseExt()! */
   1.693 +                !data->cnvData->addTable(data->cnvData, data->ucm->base, &data->staticData)
   1.694 +            ) {
   1.695 +                *pErrorCode=U_INVALID_TABLE_FORMAT;
   1.696 +            } else {
   1.697 +                /*
   1.698 +                 * addTable() may have requested moving more mappings to the extension table
   1.699 +                 * if they fit into the base toUnicode table but not into the
   1.700 +                 * base fromUnicode table.
   1.701 +                 * (Especially for UTF-8-friendly fromUnicode tables.)
   1.702 +                 * Such mappings will have the MBCS_FROM_U_EXT_FLAG set, which causes them
   1.703 +                 * to be excluded from the extension toUnicode data.
   1.704 +                 * See MBCSOkForBaseFromUnicode() for which mappings do not fit into
   1.705 +                 * the base fromUnicode table.
   1.706 +                 */
   1.707 +                ucm_moveMappings(data->ucm->base, data->ucm->ext);
   1.708 +                ucm_sortTable(data->ucm->ext);
   1.709 +                if(data->ucm->ext->mappingsLength>0) {
   1.710 +                    /* prepare the extension table, if there is one */
   1.711 +                    data->extData=CnvExtOpen(data->ucm);
   1.712 +                    if(data->extData==NULL) {
   1.713 +                        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
   1.714 +                    } else if(
   1.715 +                        !data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)
   1.716 +                    ) {
   1.717 +                        *pErrorCode=U_INVALID_TABLE_FORMAT;
   1.718 +                    }
   1.719 +                }
   1.720 +            }
   1.721 +        }
   1.722 +    } else {
   1.723 +        /* Build an extension-only .cnv file. */
   1.724 +        char baseFilename[500];
   1.725 +        char *basename;
   1.726 +
   1.727 +        initConvData(&baseData);
   1.728 +
   1.729 +        /* assemble a path/filename for data->ucm->baseName */
   1.730 +        uprv_strcpy(baseFilename, converterName);
   1.731 +        basename=(char *)findBasename(baseFilename);
   1.732 +        uprv_strcpy(basename, data->ucm->baseName);
   1.733 +        uprv_strcat(basename, ".ucm");
   1.734 +
   1.735 +        /* read the base table */
   1.736 +        dataIsBase=readFile(&baseData, baseFilename, pErrorCode);
   1.737 +        if(U_FAILURE(*pErrorCode)) {
   1.738 +            return;
   1.739 +        } else if(!dataIsBase) {
   1.740 +            fprintf(stderr, "error: the <icu:base> file \"%s\" is not a base table file\n", baseFilename);
   1.741 +            *pErrorCode=U_INVALID_TABLE_FORMAT;
   1.742 +        } else {
   1.743 +            /* prepare the extension table */
   1.744 +            data->extData=CnvExtOpen(data->ucm);
   1.745 +            if(data->extData==NULL) {
   1.746 +                *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
   1.747 +            } else {
   1.748 +                /* fill in gaps in extension file header fields */
   1.749 +                UCMapping *m, *mLimit;
   1.750 +                uint8_t fallbackFlags;
   1.751 +
   1.752 +                baseStates=&baseData.ucm->states;
   1.753 +                if(states->conversionType==UCNV_DBCS) {
   1.754 +                    staticData->minBytesPerChar=(int8_t)(states->minCharLength=2);
   1.755 +                } else if(states->minCharLength==0) {
   1.756 +                    staticData->minBytesPerChar=(int8_t)(states->minCharLength=baseStates->minCharLength);
   1.757 +                }
   1.758 +                if(states->maxCharLength<states->minCharLength) {
   1.759 +                    staticData->maxBytesPerChar=(int8_t)(states->maxCharLength=baseStates->maxCharLength);
   1.760 +                }
   1.761 +
   1.762 +                if(staticData->subCharLen==0) {
   1.763 +                    uprv_memcpy(staticData->subChar, baseData.staticData.subChar, 4);
   1.764 +                    staticData->subCharLen=baseData.staticData.subCharLen;
   1.765 +                }
   1.766 +                /*
   1.767 +                 * do not copy subChar1 -
   1.768 +                 * only use what is explicitly specified
   1.769 +                 * because it cannot be unset in the extension file header
   1.770 +                 */
   1.771 +
   1.772 +                /* get the fallback flags */
   1.773 +                fallbackFlags=0;
   1.774 +                for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
   1.775 +                    m<mLimit && fallbackFlags!=3;
   1.776 +                    ++m
   1.777 +                ) {
   1.778 +                    if(m->f==1) {
   1.779 +                        fallbackFlags|=1;
   1.780 +                    } else if(m->f==3) {
   1.781 +                        fallbackFlags|=2;
   1.782 +                    }
   1.783 +                }
   1.784 +
   1.785 +                if(fallbackFlags&1) {
   1.786 +                    staticData->hasFromUnicodeFallback=TRUE;
   1.787 +                }
   1.788 +                if(fallbackFlags&2) {
   1.789 +                    staticData->hasToUnicodeFallback=TRUE;
   1.790 +                }
   1.791 +
   1.792 +                if(1!=ucm_countChars(baseStates, staticData->subChar, staticData->subCharLen)) {
   1.793 +                    fprintf(stderr, "       the substitution character byte sequence is illegal in this codepage structure!\n");
   1.794 +                    *pErrorCode=U_INVALID_TABLE_FORMAT;
   1.795 +
   1.796 +                } else if(staticData->subChar1!=0 && 1!=ucm_countChars(baseStates, &staticData->subChar1, 1)) {
   1.797 +                    fprintf(stderr, "       the subchar1 byte is illegal in this codepage structure!\n");
   1.798 +                    *pErrorCode=U_INVALID_TABLE_FORMAT;
   1.799 +
   1.800 +                } else if(
   1.801 +                    !ucm_checkValidity(data->ucm->ext, baseStates) ||
   1.802 +                    !ucm_checkBaseExt(baseStates, baseData.ucm->base, data->ucm->ext, data->ucm->ext, FALSE)
   1.803 +                ) {
   1.804 +                    *pErrorCode=U_INVALID_TABLE_FORMAT;
   1.805 +                } else {
   1.806 +                    if(states->maxCharLength>1) {
   1.807 +                        /*
   1.808 +                         * When building a normal .cnv file with a base table
   1.809 +                         * for an MBCS (not SBCS) table with explicit precision flags,
   1.810 +                         * the MBCSAddTable() function marks some mappings for moving
   1.811 +                         * to the extension table.
   1.812 +                         * They fit into the base toUnicode table but not into the
   1.813 +                         * base fromUnicode table.
   1.814 +                         * (Note: We do have explicit precision flags because they are
   1.815 +                         * required for extension table generation, and
   1.816 +                         * ucm_checkBaseExt() verified it.)
   1.817 +                         *
   1.818 +                         * We do not call MBCSAddTable() here (we probably could)
   1.819 +                         * so we need to do the analysis before building the extension table.
   1.820 +                         * We assume that MBCSAddTable() will build a UTF-8-friendly table.
   1.821 +                         * Redundant mappings in the extension table are ok except they cost some size.
   1.822 +                         *
   1.823 +                         * Do this after ucm_checkBaseExt().
   1.824 +                         */
   1.825 +                        const MBCSData *mbcsData=MBCSGetDummy();
   1.826 +                        int32_t needsMove=0;
   1.827 +                        for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
   1.828 +                            m<mLimit;
   1.829 +                            ++m
   1.830 +                        ) {
   1.831 +                            if(!MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m->bLen, m->u, m->f)) {
   1.832 +                                m->f|=MBCS_FROM_U_EXT_FLAG;
   1.833 +                                m->moveFlag=UCM_MOVE_TO_EXT;
   1.834 +                                ++needsMove;
   1.835 +                            }
   1.836 +                        }
   1.837 +
   1.838 +                        if(needsMove!=0) {
   1.839 +                            ucm_moveMappings(baseData.ucm->base, data->ucm->ext);
   1.840 +                            ucm_sortTable(data->ucm->ext);
   1.841 +                        }
   1.842 +                    }
   1.843 +                    if(!data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)) {
   1.844 +                        *pErrorCode=U_INVALID_TABLE_FORMAT;
   1.845 +                    }
   1.846 +                }
   1.847 +            }
   1.848 +        }
   1.849 +
   1.850 +        cleanupConvData(&baseData);
   1.851 +    }
   1.852 +}
   1.853 +
   1.854 +/*
   1.855 + * Hey, Emacs, please set the following:
   1.856 + *
   1.857 + * Local Variables:
   1.858 + * indent-tabs-mode: nil
   1.859 + * End:
   1.860 + *
   1.861 + */

mercurial