intl/icu/source/tools/makeconv/makeconv.c

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /*
michael@0 2 ********************************************************************************
michael@0 3 *
michael@0 4 * Copyright (C) 1998-2012, International Business Machines
michael@0 5 * Corporation and others. All Rights Reserved.
michael@0 6 *
michael@0 7 ********************************************************************************
michael@0 8 *
michael@0 9 *
michael@0 10 * makeconv.c:
michael@0 11 * tool creating a binary (compressed) representation of the conversion mapping
michael@0 12 * table (IBM NLTC ucmap format).
michael@0 13 *
michael@0 14 * 05/04/2000 helena Added fallback mapping into the picture...
michael@0 15 * 06/29/2000 helena Major rewrite of the callback APIs.
michael@0 16 */
michael@0 17
michael@0 18 #include <stdio.h>
michael@0 19 #include "unicode/putil.h"
michael@0 20 #include "unicode/ucnv_err.h"
michael@0 21 #include "ucnv_bld.h"
michael@0 22 #include "ucnv_imp.h"
michael@0 23 #include "ucnv_cnv.h"
michael@0 24 #include "cstring.h"
michael@0 25 #include "cmemory.h"
michael@0 26 #include "uinvchar.h"
michael@0 27 #include "filestrm.h"
michael@0 28 #include "toolutil.h"
michael@0 29 #include "uoptions.h"
michael@0 30 #include "unicode/udata.h"
michael@0 31 #include "unewdata.h"
michael@0 32 #include "uparse.h"
michael@0 33 #include "ucm.h"
michael@0 34 #include "makeconv.h"
michael@0 35 #include "genmbcs.h"
michael@0 36
michael@0 37 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
michael@0 38
michael@0 39 #define DEBUG 0
michael@0 40
michael@0 41 typedef struct ConvData {
michael@0 42 UCMFile *ucm;
michael@0 43 NewConverter *cnvData, *extData;
michael@0 44 UConverterSharedData sharedData;
michael@0 45 UConverterStaticData staticData;
michael@0 46 } ConvData;
michael@0 47
michael@0 48 static void
michael@0 49 initConvData(ConvData *data) {
michael@0 50 uprv_memset(data, 0, sizeof(ConvData));
michael@0 51 data->sharedData.structSize=sizeof(UConverterSharedData);
michael@0 52 data->staticData.structSize=sizeof(UConverterStaticData);
michael@0 53 data->sharedData.staticData=&data->staticData;
michael@0 54 }
michael@0 55
michael@0 56 static void
michael@0 57 cleanupConvData(ConvData *data) {
michael@0 58 if(data!=NULL) {
michael@0 59 if(data->cnvData!=NULL) {
michael@0 60 data->cnvData->close(data->cnvData);
michael@0 61 data->cnvData=NULL;
michael@0 62 }
michael@0 63 if(data->extData!=NULL) {
michael@0 64 data->extData->close(data->extData);
michael@0 65 data->extData=NULL;
michael@0 66 }
michael@0 67 ucm_close(data->ucm);
michael@0 68 data->ucm=NULL;
michael@0 69 }
michael@0 70 }
michael@0 71
michael@0 72 /*
michael@0 73 * from ucnvstat.c - static prototypes of data-based converters
michael@0 74 */
michael@0 75 extern const UConverterStaticData * ucnv_converterStaticData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES];
michael@0 76
michael@0 77 /*
michael@0 78 * Global - verbosity
michael@0 79 */
michael@0 80 UBool VERBOSE = FALSE;
michael@0 81 UBool SMALL = FALSE;
michael@0 82 UBool IGNORE_SISO_CHECK = FALSE;
michael@0 83
michael@0 84 static void
michael@0 85 createConverter(ConvData *data, const char* converterName, UErrorCode *pErrorCode);
michael@0 86
michael@0 87 /*
michael@0 88 * Set up the UNewData and write the converter..
michael@0 89 */
michael@0 90 static void
michael@0 91 writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status);
michael@0 92
michael@0 93 UBool haveCopyright=TRUE;
michael@0 94
michael@0 95 static UDataInfo dataInfo={
michael@0 96 sizeof(UDataInfo),
michael@0 97 0,
michael@0 98
michael@0 99 U_IS_BIG_ENDIAN,
michael@0 100 U_CHARSET_FAMILY,
michael@0 101 sizeof(UChar),
michael@0 102 0,
michael@0 103
michael@0 104 {0x63, 0x6e, 0x76, 0x74}, /* dataFormat="cnvt" */
michael@0 105 {6, 2, 0, 0}, /* formatVersion */
michael@0 106 {0, 0, 0, 0} /* dataVersion (calculated at runtime) */
michael@0 107 };
michael@0 108
michael@0 109 static void
michael@0 110 writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status)
michael@0 111 {
michael@0 112 UNewDataMemory *mem = NULL;
michael@0 113 uint32_t sz2;
michael@0 114 uint32_t size = 0;
michael@0 115 int32_t tableType;
michael@0 116
michael@0 117 if(U_FAILURE(*status))
michael@0 118 {
michael@0 119 return;
michael@0 120 }
michael@0 121
michael@0 122 tableType=TABLE_NONE;
michael@0 123 if(data->cnvData!=NULL) {
michael@0 124 tableType|=TABLE_BASE;
michael@0 125 }
michael@0 126 if(data->extData!=NULL) {
michael@0 127 tableType|=TABLE_EXT;
michael@0 128 }
michael@0 129
michael@0 130 mem = udata_create(cnvDir, "cnv", cnvName, &dataInfo, haveCopyright ? U_COPYRIGHT_STRING : NULL, status);
michael@0 131
michael@0 132 if(U_FAILURE(*status))
michael@0 133 {
michael@0 134 fprintf(stderr, "Couldn't create the udata %s.%s: %s\n",
michael@0 135 cnvName,
michael@0 136 "cnv",
michael@0 137 u_errorName(*status));
michael@0 138 return;
michael@0 139 }
michael@0 140
michael@0 141 if(VERBOSE)
michael@0 142 {
michael@0 143 printf("- Opened udata %s.%s\n", cnvName, "cnv");
michael@0 144 }
michael@0 145
michael@0 146
michael@0 147 /* all read only, clean, platform independent data. Mmmm. :) */
michael@0 148 udata_writeBlock(mem, &data->staticData, sizeof(UConverterStaticData));
michael@0 149 size += sizeof(UConverterStaticData); /* Is 4-aligned - by size */
michael@0 150 /* Now, write the table */
michael@0 151 if(tableType&TABLE_BASE) {
michael@0 152 size += data->cnvData->write(data->cnvData, &data->staticData, mem, tableType);
michael@0 153 }
michael@0 154 if(tableType&TABLE_EXT) {
michael@0 155 size += data->extData->write(data->extData, &data->staticData, mem, tableType);
michael@0 156 }
michael@0 157
michael@0 158 sz2 = udata_finish(mem, status);
michael@0 159 if(size != sz2)
michael@0 160 {
michael@0 161 fprintf(stderr, "error: wrote %u bytes to the .cnv file but counted %u bytes\n", (int)sz2, (int)size);
michael@0 162 *status=U_INTERNAL_PROGRAM_ERROR;
michael@0 163 }
michael@0 164 if(VERBOSE)
michael@0 165 {
michael@0 166 printf("- Wrote %u bytes to the udata.\n", (int)sz2);
michael@0 167 }
michael@0 168 }
michael@0 169
michael@0 170 enum {
michael@0 171 OPT_HELP_H,
michael@0 172 OPT_HELP_QUESTION_MARK,
michael@0 173 OPT_COPYRIGHT,
michael@0 174 OPT_VERSION,
michael@0 175 OPT_DESTDIR,
michael@0 176 OPT_VERBOSE,
michael@0 177 OPT_SMALL,
michael@0 178 OPT_IGNORE_SISO_CHECK,
michael@0 179 OPT_COUNT
michael@0 180 };
michael@0 181
michael@0 182 static UOption options[]={
michael@0 183 UOPTION_HELP_H,
michael@0 184 UOPTION_HELP_QUESTION_MARK,
michael@0 185 UOPTION_COPYRIGHT,
michael@0 186 UOPTION_VERSION,
michael@0 187 UOPTION_DESTDIR,
michael@0 188 UOPTION_VERBOSE,
michael@0 189 { "small", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 },
michael@0 190 { "ignore-siso-check", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 }
michael@0 191 };
michael@0 192
michael@0 193 int main(int argc, char* argv[])
michael@0 194 {
michael@0 195 ConvData data;
michael@0 196 UErrorCode err = U_ZERO_ERROR, localError;
michael@0 197 char outFileName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
michael@0 198 const char* destdir, *arg;
michael@0 199 size_t destdirlen;
michael@0 200 char* dot = NULL, *outBasename;
michael@0 201 char cnvName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
michael@0 202 char cnvNameWithPkg[UCNV_MAX_FULL_FILE_NAME_LENGTH];
michael@0 203 UVersionInfo icuVersion;
michael@0 204 UBool printFilename;
michael@0 205
michael@0 206 err = U_ZERO_ERROR;
michael@0 207
michael@0 208 U_MAIN_INIT_ARGS(argc, argv);
michael@0 209
michael@0 210 /* Set up the ICU version number */
michael@0 211 u_getVersion(icuVersion);
michael@0 212 uprv_memcpy(&dataInfo.dataVersion, &icuVersion, sizeof(UVersionInfo));
michael@0 213
michael@0 214 /* preset then read command line options */
michael@0 215 options[OPT_DESTDIR].value=u_getDataDirectory();
michael@0 216 argc=u_parseArgs(argc, argv, LENGTHOF(options), options);
michael@0 217
michael@0 218 /* error handling, printing usage message */
michael@0 219 if(argc<0) {
michael@0 220 fprintf(stderr,
michael@0 221 "error in command line argument \"%s\"\n",
michael@0 222 argv[-argc]);
michael@0 223 } else if(argc<2) {
michael@0 224 argc=-1;
michael@0 225 }
michael@0 226 if(argc<0 || options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur) {
michael@0 227 FILE *stdfile=argc<0 ? stderr : stdout;
michael@0 228 fprintf(stdfile,
michael@0 229 "usage: %s [-options] files...\n"
michael@0 230 "\tread .ucm codepage mapping files and write .cnv files\n"
michael@0 231 "options:\n"
michael@0 232 "\t-h or -? or --help this usage text\n"
michael@0 233 "\t-V or --version show a version message\n"
michael@0 234 "\t-c or --copyright include a copyright notice\n"
michael@0 235 "\t-d or --destdir destination directory, followed by the path\n"
michael@0 236 "\t-v or --verbose Turn on verbose output\n",
michael@0 237 argv[0]);
michael@0 238 fprintf(stdfile,
michael@0 239 "\t --small Generate smaller .cnv files. They will be\n"
michael@0 240 "\t significantly smaller but may not be compatible with\n"
michael@0 241 "\t older versions of ICU and will require heap memory\n"
michael@0 242 "\t allocation when loaded.\n"
michael@0 243 "\t --ignore-siso-check Use SI/SO other than 0xf/0xe.\n");
michael@0 244 return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
michael@0 245 }
michael@0 246
michael@0 247 if(options[OPT_VERSION].doesOccur) {
michael@0 248 printf("makeconv version %u.%u, ICU tool to read .ucm codepage mapping files and write .cnv files\n",
michael@0 249 dataInfo.formatVersion[0], dataInfo.formatVersion[1]);
michael@0 250 printf("%s\n", U_COPYRIGHT_STRING);
michael@0 251 exit(0);
michael@0 252 }
michael@0 253
michael@0 254 /* get the options values */
michael@0 255 haveCopyright = options[OPT_COPYRIGHT].doesOccur;
michael@0 256 destdir = options[OPT_DESTDIR].value;
michael@0 257 VERBOSE = options[OPT_VERBOSE].doesOccur;
michael@0 258 SMALL = options[OPT_SMALL].doesOccur;
michael@0 259
michael@0 260 if (options[OPT_IGNORE_SISO_CHECK].doesOccur) {
michael@0 261 IGNORE_SISO_CHECK = TRUE;
michael@0 262 }
michael@0 263
michael@0 264 if (destdir != NULL && *destdir != 0) {
michael@0 265 uprv_strcpy(outFileName, destdir);
michael@0 266 destdirlen = uprv_strlen(destdir);
michael@0 267 outBasename = outFileName + destdirlen;
michael@0 268 if (*(outBasename - 1) != U_FILE_SEP_CHAR) {
michael@0 269 *outBasename++ = U_FILE_SEP_CHAR;
michael@0 270 ++destdirlen;
michael@0 271 }
michael@0 272 } else {
michael@0 273 destdirlen = 0;
michael@0 274 outBasename = outFileName;
michael@0 275 }
michael@0 276
michael@0 277 #if DEBUG
michael@0 278 {
michael@0 279 int i;
michael@0 280 printf("makeconv: processing %d files...\n", argc - 1);
michael@0 281 for(i=1; i<argc; ++i) {
michael@0 282 printf("%s ", argv[i]);
michael@0 283 }
michael@0 284 printf("\n");
michael@0 285 fflush(stdout);
michael@0 286 }
michael@0 287 #endif
michael@0 288
michael@0 289 err = U_ZERO_ERROR;
michael@0 290 printFilename = (UBool) (argc > 2 || VERBOSE);
michael@0 291 for (++argv; --argc; ++argv)
michael@0 292 {
michael@0 293 arg = getLongPathname(*argv);
michael@0 294
michael@0 295 /* Check for potential buffer overflow */
michael@0 296 if(strlen(arg) >= UCNV_MAX_FULL_FILE_NAME_LENGTH)
michael@0 297 {
michael@0 298 fprintf(stderr, "%s\n", u_errorName(U_BUFFER_OVERFLOW_ERROR));
michael@0 299 return U_BUFFER_OVERFLOW_ERROR;
michael@0 300 }
michael@0 301
michael@0 302 /*produces the right destination path for display*/
michael@0 303 if (destdirlen != 0)
michael@0 304 {
michael@0 305 const char *basename;
michael@0 306
michael@0 307 /* find the last file sepator */
michael@0 308 basename = findBasename(arg);
michael@0 309 uprv_strcpy(outBasename, basename);
michael@0 310 }
michael@0 311 else
michael@0 312 {
michael@0 313 uprv_strcpy(outFileName, arg);
michael@0 314 }
michael@0 315
michael@0 316 /*removes the extension if any is found*/
michael@0 317 dot = uprv_strrchr(outBasename, '.');
michael@0 318 if (dot)
michael@0 319 {
michael@0 320 *dot = '\0';
michael@0 321 }
michael@0 322
michael@0 323 /* the basename without extension is the converter name */
michael@0 324 uprv_strcpy(cnvName, outBasename);
michael@0 325
michael@0 326 /*Adds the target extension*/
michael@0 327 uprv_strcat(outBasename, CONVERTER_FILE_EXTENSION);
michael@0 328
michael@0 329 #if DEBUG
michael@0 330 printf("makeconv: processing %s ...\n", arg);
michael@0 331 fflush(stdout);
michael@0 332 #endif
michael@0 333 localError = U_ZERO_ERROR;
michael@0 334 initConvData(&data);
michael@0 335 createConverter(&data, arg, &localError);
michael@0 336
michael@0 337 if (U_FAILURE(localError))
michael@0 338 {
michael@0 339 /* if an error is found, print out an error msg and keep going */
michael@0 340 fprintf(stderr, "Error creating converter for \"%s\" file for \"%s\" (%s)\n", outFileName, arg,
michael@0 341 u_errorName(localError));
michael@0 342 if(U_SUCCESS(err)) {
michael@0 343 err = localError;
michael@0 344 }
michael@0 345 }
michael@0 346 else
michael@0 347 {
michael@0 348 /* Insure the static data name matches the file name */
michael@0 349 /* Changed to ignore directory and only compare base name
michael@0 350 LDH 1/2/08*/
michael@0 351 char *p;
michael@0 352 p = strrchr(cnvName, U_FILE_SEP_CHAR); /* Find last file separator */
michael@0 353
michael@0 354 if(p == NULL) /* OK, try alternate */
michael@0 355 {
michael@0 356 p = strrchr(cnvName, U_FILE_ALT_SEP_CHAR);
michael@0 357 if(p == NULL)
michael@0 358 {
michael@0 359 p=cnvName; /* If no separators, no problem */
michael@0 360 }
michael@0 361 }
michael@0 362 else
michael@0 363 {
michael@0 364 p++; /* If found separtor, don't include it in compare */
michael@0 365 }
michael@0 366 if(uprv_stricmp(p,data.staticData.name))
michael@0 367 {
michael@0 368 fprintf(stderr, "Warning: %s%s claims to be '%s'\n",
michael@0 369 cnvName, CONVERTER_FILE_EXTENSION,
michael@0 370 data.staticData.name);
michael@0 371 }
michael@0 372
michael@0 373 uprv_strcpy((char*)data.staticData.name, cnvName);
michael@0 374
michael@0 375 if(!uprv_isInvariantString((char*)data.staticData.name, -1)) {
michael@0 376 fprintf(stderr,
michael@0 377 "Error: A converter name must contain only invariant characters.\n"
michael@0 378 "%s is not a valid converter name.\n",
michael@0 379 data.staticData.name);
michael@0 380 if(U_SUCCESS(err)) {
michael@0 381 err = U_INVALID_TABLE_FORMAT;
michael@0 382 }
michael@0 383 }
michael@0 384
michael@0 385 uprv_strcpy(cnvNameWithPkg, cnvName);
michael@0 386
michael@0 387 localError = U_ZERO_ERROR;
michael@0 388 writeConverterData(&data, cnvNameWithPkg, destdir, &localError);
michael@0 389
michael@0 390 if(U_FAILURE(localError))
michael@0 391 {
michael@0 392 /* if an error is found, print out an error msg and keep going*/
michael@0 393 fprintf(stderr, "Error writing \"%s\" file for \"%s\" (%s)\n", outFileName, arg,
michael@0 394 u_errorName(localError));
michael@0 395 if(U_SUCCESS(err)) {
michael@0 396 err = localError;
michael@0 397 }
michael@0 398 }
michael@0 399 else if (printFilename)
michael@0 400 {
michael@0 401 puts(outBasename);
michael@0 402 }
michael@0 403 }
michael@0 404 fflush(stdout);
michael@0 405 fflush(stderr);
michael@0 406
michael@0 407 cleanupConvData(&data);
michael@0 408 }
michael@0 409
michael@0 410 return err;
michael@0 411 }
michael@0 412
michael@0 413 static void
michael@0 414 getPlatformAndCCSIDFromName(const char *name, int8_t *pPlatform, int32_t *pCCSID) {
michael@0 415 if( (name[0]=='i' || name[0]=='I') &&
michael@0 416 (name[1]=='b' || name[1]=='B') &&
michael@0 417 (name[2]=='m' || name[2]=='M')
michael@0 418 ) {
michael@0 419 name+=3;
michael@0 420 if(*name=='-') {
michael@0 421 ++name;
michael@0 422 }
michael@0 423 *pPlatform=UCNV_IBM;
michael@0 424 *pCCSID=(int32_t)uprv_strtoul(name, NULL, 10);
michael@0 425 } else {
michael@0 426 *pPlatform=UCNV_UNKNOWN;
michael@0 427 *pCCSID=0;
michael@0 428 }
michael@0 429 }
michael@0 430
michael@0 431 static void
michael@0 432 readHeader(ConvData *data,
michael@0 433 FileStream* convFile,
michael@0 434 const char* converterName,
michael@0 435 UErrorCode *pErrorCode) {
michael@0 436 char line[1024];
michael@0 437 char *s, *key, *value;
michael@0 438 const UConverterStaticData *prototype;
michael@0 439 UConverterStaticData *staticData;
michael@0 440
michael@0 441 if(U_FAILURE(*pErrorCode)) {
michael@0 442 return;
michael@0 443 }
michael@0 444
michael@0 445 staticData=&data->staticData;
michael@0 446 staticData->platform=UCNV_IBM;
michael@0 447 staticData->subCharLen=0;
michael@0 448
michael@0 449 while(T_FileStream_readLine(convFile, line, sizeof(line))) {
michael@0 450 /* basic parsing and handling of state-related items */
michael@0 451 if(ucm_parseHeaderLine(data->ucm, line, &key, &value)) {
michael@0 452 continue;
michael@0 453 }
michael@0 454
michael@0 455 /* stop at the beginning of the mapping section */
michael@0 456 if(uprv_strcmp(line, "CHARMAP")==0) {
michael@0 457 break;
michael@0 458 }
michael@0 459
michael@0 460 /* collect the information from the header field, ignore unknown keys */
michael@0 461 if(uprv_strcmp(key, "code_set_name")==0) {
michael@0 462 if(*value!=0) {
michael@0 463 uprv_strcpy((char *)staticData->name, value);
michael@0 464 getPlatformAndCCSIDFromName(value, &staticData->platform, &staticData->codepage);
michael@0 465 }
michael@0 466 } else if(uprv_strcmp(key, "subchar")==0) {
michael@0 467 uint8_t bytes[UCNV_EXT_MAX_BYTES];
michael@0 468 int8_t length;
michael@0 469
michael@0 470 s=value;
michael@0 471 length=ucm_parseBytes(bytes, line, (const char **)&s);
michael@0 472 if(1<=length && length<=4 && *s==0) {
michael@0 473 staticData->subCharLen=length;
michael@0 474 uprv_memcpy(staticData->subChar, bytes, length);
michael@0 475 } else {
michael@0 476 fprintf(stderr, "error: illegal <subchar> %s\n", value);
michael@0 477 *pErrorCode=U_INVALID_TABLE_FORMAT;
michael@0 478 return;
michael@0 479 }
michael@0 480 } else if(uprv_strcmp(key, "subchar1")==0) {
michael@0 481 uint8_t bytes[UCNV_EXT_MAX_BYTES];
michael@0 482
michael@0 483 s=value;
michael@0 484 if(1==ucm_parseBytes(bytes, line, (const char **)&s) && *s==0) {
michael@0 485 staticData->subChar1=bytes[0];
michael@0 486 } else {
michael@0 487 fprintf(stderr, "error: illegal <subchar1> %s\n", value);
michael@0 488 *pErrorCode=U_INVALID_TABLE_FORMAT;
michael@0 489 return;
michael@0 490 }
michael@0 491 }
michael@0 492 }
michael@0 493
michael@0 494 /* copy values from the UCMFile to the static data */
michael@0 495 staticData->maxBytesPerChar=(int8_t)data->ucm->states.maxCharLength;
michael@0 496 staticData->minBytesPerChar=(int8_t)data->ucm->states.minCharLength;
michael@0 497 staticData->conversionType=data->ucm->states.conversionType;
michael@0 498
michael@0 499 if(staticData->conversionType==UCNV_UNSUPPORTED_CONVERTER) {
michael@0 500 fprintf(stderr, "ucm error: missing conversion type (<uconv_class>)\n");
michael@0 501 *pErrorCode=U_INVALID_TABLE_FORMAT;
michael@0 502 return;
michael@0 503 }
michael@0 504
michael@0 505 /*
michael@0 506 * Now that we know the type, copy any 'default' values from the table.
michael@0 507 * We need not check the type any further because the parser only
michael@0 508 * recognizes what we have prototypes for.
michael@0 509 *
michael@0 510 * For delta (extension-only) tables, copy values from the base file
michael@0 511 * instead, see createConverter().
michael@0 512 */
michael@0 513 if(data->ucm->baseName[0]==0) {
michael@0 514 prototype=ucnv_converterStaticData[staticData->conversionType];
michael@0 515 if(prototype!=NULL) {
michael@0 516 if(staticData->name[0]==0) {
michael@0 517 uprv_strcpy((char *)staticData->name, prototype->name);
michael@0 518 }
michael@0 519
michael@0 520 if(staticData->codepage==0) {
michael@0 521 staticData->codepage=prototype->codepage;
michael@0 522 }
michael@0 523
michael@0 524 if(staticData->platform==0) {
michael@0 525 staticData->platform=prototype->platform;
michael@0 526 }
michael@0 527
michael@0 528 if(staticData->minBytesPerChar==0) {
michael@0 529 staticData->minBytesPerChar=prototype->minBytesPerChar;
michael@0 530 }
michael@0 531
michael@0 532 if(staticData->maxBytesPerChar==0) {
michael@0 533 staticData->maxBytesPerChar=prototype->maxBytesPerChar;
michael@0 534 }
michael@0 535
michael@0 536 if(staticData->subCharLen==0) {
michael@0 537 staticData->subCharLen=prototype->subCharLen;
michael@0 538 if(prototype->subCharLen>0) {
michael@0 539 uprv_memcpy(staticData->subChar, prototype->subChar, prototype->subCharLen);
michael@0 540 }
michael@0 541 }
michael@0 542 }
michael@0 543 }
michael@0 544
michael@0 545 if(data->ucm->states.outputType<0) {
michael@0 546 data->ucm->states.outputType=(int8_t)data->ucm->states.maxCharLength-1;
michael@0 547 }
michael@0 548
michael@0 549 if( staticData->subChar1!=0 &&
michael@0 550 (staticData->minBytesPerChar>1 ||
michael@0 551 (staticData->conversionType!=UCNV_MBCS &&
michael@0 552 staticData->conversionType!=UCNV_EBCDIC_STATEFUL))
michael@0 553 ) {
michael@0 554 fprintf(stderr, "error: <subchar1> defined for a type other than MBCS or EBCDIC_STATEFUL\n");
michael@0 555 *pErrorCode=U_INVALID_TABLE_FORMAT;
michael@0 556 }
michael@0 557 }
michael@0 558
michael@0 559 /* return TRUE if a base table was read, FALSE for an extension table */
michael@0 560 static UBool
michael@0 561 readFile(ConvData *data, const char* converterName,
michael@0 562 UErrorCode *pErrorCode) {
michael@0 563 char line[1024];
michael@0 564 char *end;
michael@0 565 FileStream *convFile;
michael@0 566
michael@0 567 UCMStates *baseStates;
michael@0 568 UBool dataIsBase;
michael@0 569
michael@0 570 if(U_FAILURE(*pErrorCode)) {
michael@0 571 return FALSE;
michael@0 572 }
michael@0 573
michael@0 574 data->ucm=ucm_open();
michael@0 575
michael@0 576 convFile=T_FileStream_open(converterName, "r");
michael@0 577 if(convFile==NULL) {
michael@0 578 *pErrorCode=U_FILE_ACCESS_ERROR;
michael@0 579 return FALSE;
michael@0 580 }
michael@0 581
michael@0 582 readHeader(data, convFile, converterName, pErrorCode);
michael@0 583 if(U_FAILURE(*pErrorCode)) {
michael@0 584 return FALSE;
michael@0 585 }
michael@0 586
michael@0 587 if(data->ucm->baseName[0]==0) {
michael@0 588 dataIsBase=TRUE;
michael@0 589 baseStates=&data->ucm->states;
michael@0 590 ucm_processStates(baseStates, IGNORE_SISO_CHECK);
michael@0 591 } else {
michael@0 592 dataIsBase=FALSE;
michael@0 593 baseStates=NULL;
michael@0 594 }
michael@0 595
michael@0 596 /* read the base table */
michael@0 597 ucm_readTable(data->ucm, convFile, dataIsBase, baseStates, pErrorCode);
michael@0 598 if(U_FAILURE(*pErrorCode)) {
michael@0 599 return FALSE;
michael@0 600 }
michael@0 601
michael@0 602 /* read an extension table if there is one */
michael@0 603 while(T_FileStream_readLine(convFile, line, sizeof(line))) {
michael@0 604 end=uprv_strchr(line, 0);
michael@0 605 while(line<end &&
michael@0 606 (*(end-1)=='\n' || *(end-1)=='\r' || *(end-1)==' ' || *(end-1)=='\t')) {
michael@0 607 --end;
michael@0 608 }
michael@0 609 *end=0;
michael@0 610
michael@0 611 if(line[0]=='#' || u_skipWhitespace(line)==end) {
michael@0 612 continue; /* ignore empty and comment lines */
michael@0 613 }
michael@0 614
michael@0 615 if(0==uprv_strcmp(line, "CHARMAP")) {
michael@0 616 /* read the extension table */
michael@0 617 ucm_readTable(data->ucm, convFile, FALSE, baseStates, pErrorCode);
michael@0 618 } else {
michael@0 619 fprintf(stderr, "unexpected text after the base mapping table\n");
michael@0 620 }
michael@0 621 break;
michael@0 622 }
michael@0 623
michael@0 624 T_FileStream_close(convFile);
michael@0 625
michael@0 626 if(data->ucm->base->flagsType==UCM_FLAGS_MIXED || data->ucm->ext->flagsType==UCM_FLAGS_MIXED) {
michael@0 627 fprintf(stderr, "error: some entries have the mapping precision (with '|'), some do not\n");
michael@0 628 *pErrorCode=U_INVALID_TABLE_FORMAT;
michael@0 629 }
michael@0 630
michael@0 631 return dataIsBase;
michael@0 632 }
michael@0 633
michael@0 634 static void
michael@0 635 createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCode) {
michael@0 636 ConvData baseData;
michael@0 637 UBool dataIsBase;
michael@0 638
michael@0 639 UConverterStaticData *staticData;
michael@0 640 UCMStates *states, *baseStates;
michael@0 641
michael@0 642 if(U_FAILURE(*pErrorCode)) {
michael@0 643 return;
michael@0 644 }
michael@0 645
michael@0 646 initConvData(data);
michael@0 647
michael@0 648 dataIsBase=readFile(data, converterName, pErrorCode);
michael@0 649 if(U_FAILURE(*pErrorCode)) {
michael@0 650 return;
michael@0 651 }
michael@0 652
michael@0 653 staticData=&data->staticData;
michael@0 654 states=&data->ucm->states;
michael@0 655
michael@0 656 if(dataIsBase) {
michael@0 657 /*
michael@0 658 * Build a normal .cnv file with a base table
michael@0 659 * and an optional extension table.
michael@0 660 */
michael@0 661 data->cnvData=MBCSOpen(data->ucm);
michael@0 662 if(data->cnvData==NULL) {
michael@0 663 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
michael@0 664
michael@0 665 } else if(!data->cnvData->isValid(data->cnvData,
michael@0 666 staticData->subChar, staticData->subCharLen)
michael@0 667 ) {
michael@0 668 fprintf(stderr, " the substitution character byte sequence is illegal in this codepage structure!\n");
michael@0 669 *pErrorCode=U_INVALID_TABLE_FORMAT;
michael@0 670
michael@0 671 } else if(staticData->subChar1!=0 &&
michael@0 672 !data->cnvData->isValid(data->cnvData, &staticData->subChar1, 1)
michael@0 673 ) {
michael@0 674 fprintf(stderr, " the subchar1 byte is illegal in this codepage structure!\n");
michael@0 675 *pErrorCode=U_INVALID_TABLE_FORMAT;
michael@0 676
michael@0 677 } else if(
michael@0 678 data->ucm->ext->mappingsLength>0 &&
michael@0 679 !ucm_checkBaseExt(states, data->ucm->base, data->ucm->ext, data->ucm->ext, FALSE)
michael@0 680 ) {
michael@0 681 *pErrorCode=U_INVALID_TABLE_FORMAT;
michael@0 682 } else if(data->ucm->base->flagsType&UCM_FLAGS_EXPLICIT) {
michael@0 683 /* sort the table so that it can be turned into UTF-8-friendly data */
michael@0 684 ucm_sortTable(data->ucm->base);
michael@0 685 }
michael@0 686
michael@0 687 if(U_SUCCESS(*pErrorCode)) {
michael@0 688 if(
michael@0 689 /* add the base table after ucm_checkBaseExt()! */
michael@0 690 !data->cnvData->addTable(data->cnvData, data->ucm->base, &data->staticData)
michael@0 691 ) {
michael@0 692 *pErrorCode=U_INVALID_TABLE_FORMAT;
michael@0 693 } else {
michael@0 694 /*
michael@0 695 * addTable() may have requested moving more mappings to the extension table
michael@0 696 * if they fit into the base toUnicode table but not into the
michael@0 697 * base fromUnicode table.
michael@0 698 * (Especially for UTF-8-friendly fromUnicode tables.)
michael@0 699 * Such mappings will have the MBCS_FROM_U_EXT_FLAG set, which causes them
michael@0 700 * to be excluded from the extension toUnicode data.
michael@0 701 * See MBCSOkForBaseFromUnicode() for which mappings do not fit into
michael@0 702 * the base fromUnicode table.
michael@0 703 */
michael@0 704 ucm_moveMappings(data->ucm->base, data->ucm->ext);
michael@0 705 ucm_sortTable(data->ucm->ext);
michael@0 706 if(data->ucm->ext->mappingsLength>0) {
michael@0 707 /* prepare the extension table, if there is one */
michael@0 708 data->extData=CnvExtOpen(data->ucm);
michael@0 709 if(data->extData==NULL) {
michael@0 710 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
michael@0 711 } else if(
michael@0 712 !data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)
michael@0 713 ) {
michael@0 714 *pErrorCode=U_INVALID_TABLE_FORMAT;
michael@0 715 }
michael@0 716 }
michael@0 717 }
michael@0 718 }
michael@0 719 } else {
michael@0 720 /* Build an extension-only .cnv file. */
michael@0 721 char baseFilename[500];
michael@0 722 char *basename;
michael@0 723
michael@0 724 initConvData(&baseData);
michael@0 725
michael@0 726 /* assemble a path/filename for data->ucm->baseName */
michael@0 727 uprv_strcpy(baseFilename, converterName);
michael@0 728 basename=(char *)findBasename(baseFilename);
michael@0 729 uprv_strcpy(basename, data->ucm->baseName);
michael@0 730 uprv_strcat(basename, ".ucm");
michael@0 731
michael@0 732 /* read the base table */
michael@0 733 dataIsBase=readFile(&baseData, baseFilename, pErrorCode);
michael@0 734 if(U_FAILURE(*pErrorCode)) {
michael@0 735 return;
michael@0 736 } else if(!dataIsBase) {
michael@0 737 fprintf(stderr, "error: the <icu:base> file \"%s\" is not a base table file\n", baseFilename);
michael@0 738 *pErrorCode=U_INVALID_TABLE_FORMAT;
michael@0 739 } else {
michael@0 740 /* prepare the extension table */
michael@0 741 data->extData=CnvExtOpen(data->ucm);
michael@0 742 if(data->extData==NULL) {
michael@0 743 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
michael@0 744 } else {
michael@0 745 /* fill in gaps in extension file header fields */
michael@0 746 UCMapping *m, *mLimit;
michael@0 747 uint8_t fallbackFlags;
michael@0 748
michael@0 749 baseStates=&baseData.ucm->states;
michael@0 750 if(states->conversionType==UCNV_DBCS) {
michael@0 751 staticData->minBytesPerChar=(int8_t)(states->minCharLength=2);
michael@0 752 } else if(states->minCharLength==0) {
michael@0 753 staticData->minBytesPerChar=(int8_t)(states->minCharLength=baseStates->minCharLength);
michael@0 754 }
michael@0 755 if(states->maxCharLength<states->minCharLength) {
michael@0 756 staticData->maxBytesPerChar=(int8_t)(states->maxCharLength=baseStates->maxCharLength);
michael@0 757 }
michael@0 758
michael@0 759 if(staticData->subCharLen==0) {
michael@0 760 uprv_memcpy(staticData->subChar, baseData.staticData.subChar, 4);
michael@0 761 staticData->subCharLen=baseData.staticData.subCharLen;
michael@0 762 }
michael@0 763 /*
michael@0 764 * do not copy subChar1 -
michael@0 765 * only use what is explicitly specified
michael@0 766 * because it cannot be unset in the extension file header
michael@0 767 */
michael@0 768
michael@0 769 /* get the fallback flags */
michael@0 770 fallbackFlags=0;
michael@0 771 for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
michael@0 772 m<mLimit && fallbackFlags!=3;
michael@0 773 ++m
michael@0 774 ) {
michael@0 775 if(m->f==1) {
michael@0 776 fallbackFlags|=1;
michael@0 777 } else if(m->f==3) {
michael@0 778 fallbackFlags|=2;
michael@0 779 }
michael@0 780 }
michael@0 781
michael@0 782 if(fallbackFlags&1) {
michael@0 783 staticData->hasFromUnicodeFallback=TRUE;
michael@0 784 }
michael@0 785 if(fallbackFlags&2) {
michael@0 786 staticData->hasToUnicodeFallback=TRUE;
michael@0 787 }
michael@0 788
michael@0 789 if(1!=ucm_countChars(baseStates, staticData->subChar, staticData->subCharLen)) {
michael@0 790 fprintf(stderr, " the substitution character byte sequence is illegal in this codepage structure!\n");
michael@0 791 *pErrorCode=U_INVALID_TABLE_FORMAT;
michael@0 792
michael@0 793 } else if(staticData->subChar1!=0 && 1!=ucm_countChars(baseStates, &staticData->subChar1, 1)) {
michael@0 794 fprintf(stderr, " the subchar1 byte is illegal in this codepage structure!\n");
michael@0 795 *pErrorCode=U_INVALID_TABLE_FORMAT;
michael@0 796
michael@0 797 } else if(
michael@0 798 !ucm_checkValidity(data->ucm->ext, baseStates) ||
michael@0 799 !ucm_checkBaseExt(baseStates, baseData.ucm->base, data->ucm->ext, data->ucm->ext, FALSE)
michael@0 800 ) {
michael@0 801 *pErrorCode=U_INVALID_TABLE_FORMAT;
michael@0 802 } else {
michael@0 803 if(states->maxCharLength>1) {
michael@0 804 /*
michael@0 805 * When building a normal .cnv file with a base table
michael@0 806 * for an MBCS (not SBCS) table with explicit precision flags,
michael@0 807 * the MBCSAddTable() function marks some mappings for moving
michael@0 808 * to the extension table.
michael@0 809 * They fit into the base toUnicode table but not into the
michael@0 810 * base fromUnicode table.
michael@0 811 * (Note: We do have explicit precision flags because they are
michael@0 812 * required for extension table generation, and
michael@0 813 * ucm_checkBaseExt() verified it.)
michael@0 814 *
michael@0 815 * We do not call MBCSAddTable() here (we probably could)
michael@0 816 * so we need to do the analysis before building the extension table.
michael@0 817 * We assume that MBCSAddTable() will build a UTF-8-friendly table.
michael@0 818 * Redundant mappings in the extension table are ok except they cost some size.
michael@0 819 *
michael@0 820 * Do this after ucm_checkBaseExt().
michael@0 821 */
michael@0 822 const MBCSData *mbcsData=MBCSGetDummy();
michael@0 823 int32_t needsMove=0;
michael@0 824 for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
michael@0 825 m<mLimit;
michael@0 826 ++m
michael@0 827 ) {
michael@0 828 if(!MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m->bLen, m->u, m->f)) {
michael@0 829 m->f|=MBCS_FROM_U_EXT_FLAG;
michael@0 830 m->moveFlag=UCM_MOVE_TO_EXT;
michael@0 831 ++needsMove;
michael@0 832 }
michael@0 833 }
michael@0 834
michael@0 835 if(needsMove!=0) {
michael@0 836 ucm_moveMappings(baseData.ucm->base, data->ucm->ext);
michael@0 837 ucm_sortTable(data->ucm->ext);
michael@0 838 }
michael@0 839 }
michael@0 840 if(!data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)) {
michael@0 841 *pErrorCode=U_INVALID_TABLE_FORMAT;
michael@0 842 }
michael@0 843 }
michael@0 844 }
michael@0 845 }
michael@0 846
michael@0 847 cleanupConvData(&baseData);
michael@0 848 }
michael@0 849 }
michael@0 850
michael@0 851 /*
michael@0 852 * Hey, Emacs, please set the following:
michael@0 853 *
michael@0 854 * Local Variables:
michael@0 855 * indent-tabs-mode: nil
michael@0 856 * End:
michael@0 857 *
michael@0 858 */

mercurial