1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/tools/makeconv/makeconv.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,858 @@ 1.4 +/* 1.5 + ******************************************************************************** 1.6 + * 1.7 + * Copyright (C) 1998-2012, International Business Machines 1.8 + * Corporation and others. All Rights Reserved. 1.9 + * 1.10 + ******************************************************************************** 1.11 + * 1.12 + * 1.13 + * makeconv.c: 1.14 + * tool creating a binary (compressed) representation of the conversion mapping 1.15 + * table (IBM NLTC ucmap format). 1.16 + * 1.17 + * 05/04/2000 helena Added fallback mapping into the picture... 1.18 + * 06/29/2000 helena Major rewrite of the callback APIs. 1.19 + */ 1.20 + 1.21 +#include <stdio.h> 1.22 +#include "unicode/putil.h" 1.23 +#include "unicode/ucnv_err.h" 1.24 +#include "ucnv_bld.h" 1.25 +#include "ucnv_imp.h" 1.26 +#include "ucnv_cnv.h" 1.27 +#include "cstring.h" 1.28 +#include "cmemory.h" 1.29 +#include "uinvchar.h" 1.30 +#include "filestrm.h" 1.31 +#include "toolutil.h" 1.32 +#include "uoptions.h" 1.33 +#include "unicode/udata.h" 1.34 +#include "unewdata.h" 1.35 +#include "uparse.h" 1.36 +#include "ucm.h" 1.37 +#include "makeconv.h" 1.38 +#include "genmbcs.h" 1.39 + 1.40 +#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 1.41 + 1.42 +#define DEBUG 0 1.43 + 1.44 +typedef struct ConvData { 1.45 + UCMFile *ucm; 1.46 + NewConverter *cnvData, *extData; 1.47 + UConverterSharedData sharedData; 1.48 + UConverterStaticData staticData; 1.49 +} ConvData; 1.50 + 1.51 +static void 1.52 +initConvData(ConvData *data) { 1.53 + uprv_memset(data, 0, sizeof(ConvData)); 1.54 + data->sharedData.structSize=sizeof(UConverterSharedData); 1.55 + data->staticData.structSize=sizeof(UConverterStaticData); 1.56 + data->sharedData.staticData=&data->staticData; 1.57 +} 1.58 + 1.59 +static void 1.60 +cleanupConvData(ConvData *data) { 1.61 + if(data!=NULL) { 1.62 + if(data->cnvData!=NULL) { 1.63 + data->cnvData->close(data->cnvData); 1.64 + data->cnvData=NULL; 1.65 + } 1.66 + if(data->extData!=NULL) { 1.67 + data->extData->close(data->extData); 1.68 + data->extData=NULL; 1.69 + } 1.70 + ucm_close(data->ucm); 1.71 + data->ucm=NULL; 1.72 + } 1.73 +} 1.74 + 1.75 +/* 1.76 + * from ucnvstat.c - static prototypes of data-based converters 1.77 + */ 1.78 +extern const UConverterStaticData * ucnv_converterStaticData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES]; 1.79 + 1.80 +/* 1.81 + * Global - verbosity 1.82 + */ 1.83 +UBool VERBOSE = FALSE; 1.84 +UBool SMALL = FALSE; 1.85 +UBool IGNORE_SISO_CHECK = FALSE; 1.86 + 1.87 +static void 1.88 +createConverter(ConvData *data, const char* converterName, UErrorCode *pErrorCode); 1.89 + 1.90 +/* 1.91 + * Set up the UNewData and write the converter.. 1.92 + */ 1.93 +static void 1.94 +writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status); 1.95 + 1.96 +UBool haveCopyright=TRUE; 1.97 + 1.98 +static UDataInfo dataInfo={ 1.99 + sizeof(UDataInfo), 1.100 + 0, 1.101 + 1.102 + U_IS_BIG_ENDIAN, 1.103 + U_CHARSET_FAMILY, 1.104 + sizeof(UChar), 1.105 + 0, 1.106 + 1.107 + {0x63, 0x6e, 0x76, 0x74}, /* dataFormat="cnvt" */ 1.108 + {6, 2, 0, 0}, /* formatVersion */ 1.109 + {0, 0, 0, 0} /* dataVersion (calculated at runtime) */ 1.110 +}; 1.111 + 1.112 +static void 1.113 +writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status) 1.114 +{ 1.115 + UNewDataMemory *mem = NULL; 1.116 + uint32_t sz2; 1.117 + uint32_t size = 0; 1.118 + int32_t tableType; 1.119 + 1.120 + if(U_FAILURE(*status)) 1.121 + { 1.122 + return; 1.123 + } 1.124 + 1.125 + tableType=TABLE_NONE; 1.126 + if(data->cnvData!=NULL) { 1.127 + tableType|=TABLE_BASE; 1.128 + } 1.129 + if(data->extData!=NULL) { 1.130 + tableType|=TABLE_EXT; 1.131 + } 1.132 + 1.133 + mem = udata_create(cnvDir, "cnv", cnvName, &dataInfo, haveCopyright ? U_COPYRIGHT_STRING : NULL, status); 1.134 + 1.135 + if(U_FAILURE(*status)) 1.136 + { 1.137 + fprintf(stderr, "Couldn't create the udata %s.%s: %s\n", 1.138 + cnvName, 1.139 + "cnv", 1.140 + u_errorName(*status)); 1.141 + return; 1.142 + } 1.143 + 1.144 + if(VERBOSE) 1.145 + { 1.146 + printf("- Opened udata %s.%s\n", cnvName, "cnv"); 1.147 + } 1.148 + 1.149 + 1.150 + /* all read only, clean, platform independent data. Mmmm. :) */ 1.151 + udata_writeBlock(mem, &data->staticData, sizeof(UConverterStaticData)); 1.152 + size += sizeof(UConverterStaticData); /* Is 4-aligned - by size */ 1.153 + /* Now, write the table */ 1.154 + if(tableType&TABLE_BASE) { 1.155 + size += data->cnvData->write(data->cnvData, &data->staticData, mem, tableType); 1.156 + } 1.157 + if(tableType&TABLE_EXT) { 1.158 + size += data->extData->write(data->extData, &data->staticData, mem, tableType); 1.159 + } 1.160 + 1.161 + sz2 = udata_finish(mem, status); 1.162 + if(size != sz2) 1.163 + { 1.164 + fprintf(stderr, "error: wrote %u bytes to the .cnv file but counted %u bytes\n", (int)sz2, (int)size); 1.165 + *status=U_INTERNAL_PROGRAM_ERROR; 1.166 + } 1.167 + if(VERBOSE) 1.168 + { 1.169 + printf("- Wrote %u bytes to the udata.\n", (int)sz2); 1.170 + } 1.171 +} 1.172 + 1.173 +enum { 1.174 + OPT_HELP_H, 1.175 + OPT_HELP_QUESTION_MARK, 1.176 + OPT_COPYRIGHT, 1.177 + OPT_VERSION, 1.178 + OPT_DESTDIR, 1.179 + OPT_VERBOSE, 1.180 + OPT_SMALL, 1.181 + OPT_IGNORE_SISO_CHECK, 1.182 + OPT_COUNT 1.183 +}; 1.184 + 1.185 +static UOption options[]={ 1.186 + UOPTION_HELP_H, 1.187 + UOPTION_HELP_QUESTION_MARK, 1.188 + UOPTION_COPYRIGHT, 1.189 + UOPTION_VERSION, 1.190 + UOPTION_DESTDIR, 1.191 + UOPTION_VERBOSE, 1.192 + { "small", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 }, 1.193 + { "ignore-siso-check", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 } 1.194 +}; 1.195 + 1.196 +int main(int argc, char* argv[]) 1.197 +{ 1.198 + ConvData data; 1.199 + UErrorCode err = U_ZERO_ERROR, localError; 1.200 + char outFileName[UCNV_MAX_FULL_FILE_NAME_LENGTH]; 1.201 + const char* destdir, *arg; 1.202 + size_t destdirlen; 1.203 + char* dot = NULL, *outBasename; 1.204 + char cnvName[UCNV_MAX_FULL_FILE_NAME_LENGTH]; 1.205 + char cnvNameWithPkg[UCNV_MAX_FULL_FILE_NAME_LENGTH]; 1.206 + UVersionInfo icuVersion; 1.207 + UBool printFilename; 1.208 + 1.209 + err = U_ZERO_ERROR; 1.210 + 1.211 + U_MAIN_INIT_ARGS(argc, argv); 1.212 + 1.213 + /* Set up the ICU version number */ 1.214 + u_getVersion(icuVersion); 1.215 + uprv_memcpy(&dataInfo.dataVersion, &icuVersion, sizeof(UVersionInfo)); 1.216 + 1.217 + /* preset then read command line options */ 1.218 + options[OPT_DESTDIR].value=u_getDataDirectory(); 1.219 + argc=u_parseArgs(argc, argv, LENGTHOF(options), options); 1.220 + 1.221 + /* error handling, printing usage message */ 1.222 + if(argc<0) { 1.223 + fprintf(stderr, 1.224 + "error in command line argument \"%s\"\n", 1.225 + argv[-argc]); 1.226 + } else if(argc<2) { 1.227 + argc=-1; 1.228 + } 1.229 + if(argc<0 || options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur) { 1.230 + FILE *stdfile=argc<0 ? stderr : stdout; 1.231 + fprintf(stdfile, 1.232 + "usage: %s [-options] files...\n" 1.233 + "\tread .ucm codepage mapping files and write .cnv files\n" 1.234 + "options:\n" 1.235 + "\t-h or -? or --help this usage text\n" 1.236 + "\t-V or --version show a version message\n" 1.237 + "\t-c or --copyright include a copyright notice\n" 1.238 + "\t-d or --destdir destination directory, followed by the path\n" 1.239 + "\t-v or --verbose Turn on verbose output\n", 1.240 + argv[0]); 1.241 + fprintf(stdfile, 1.242 + "\t --small Generate smaller .cnv files. They will be\n" 1.243 + "\t significantly smaller but may not be compatible with\n" 1.244 + "\t older versions of ICU and will require heap memory\n" 1.245 + "\t allocation when loaded.\n" 1.246 + "\t --ignore-siso-check Use SI/SO other than 0xf/0xe.\n"); 1.247 + return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR; 1.248 + } 1.249 + 1.250 + if(options[OPT_VERSION].doesOccur) { 1.251 + printf("makeconv version %u.%u, ICU tool to read .ucm codepage mapping files and write .cnv files\n", 1.252 + dataInfo.formatVersion[0], dataInfo.formatVersion[1]); 1.253 + printf("%s\n", U_COPYRIGHT_STRING); 1.254 + exit(0); 1.255 + } 1.256 + 1.257 + /* get the options values */ 1.258 + haveCopyright = options[OPT_COPYRIGHT].doesOccur; 1.259 + destdir = options[OPT_DESTDIR].value; 1.260 + VERBOSE = options[OPT_VERBOSE].doesOccur; 1.261 + SMALL = options[OPT_SMALL].doesOccur; 1.262 + 1.263 + if (options[OPT_IGNORE_SISO_CHECK].doesOccur) { 1.264 + IGNORE_SISO_CHECK = TRUE; 1.265 + } 1.266 + 1.267 + if (destdir != NULL && *destdir != 0) { 1.268 + uprv_strcpy(outFileName, destdir); 1.269 + destdirlen = uprv_strlen(destdir); 1.270 + outBasename = outFileName + destdirlen; 1.271 + if (*(outBasename - 1) != U_FILE_SEP_CHAR) { 1.272 + *outBasename++ = U_FILE_SEP_CHAR; 1.273 + ++destdirlen; 1.274 + } 1.275 + } else { 1.276 + destdirlen = 0; 1.277 + outBasename = outFileName; 1.278 + } 1.279 + 1.280 +#if DEBUG 1.281 + { 1.282 + int i; 1.283 + printf("makeconv: processing %d files...\n", argc - 1); 1.284 + for(i=1; i<argc; ++i) { 1.285 + printf("%s ", argv[i]); 1.286 + } 1.287 + printf("\n"); 1.288 + fflush(stdout); 1.289 + } 1.290 +#endif 1.291 + 1.292 + err = U_ZERO_ERROR; 1.293 + printFilename = (UBool) (argc > 2 || VERBOSE); 1.294 + for (++argv; --argc; ++argv) 1.295 + { 1.296 + arg = getLongPathname(*argv); 1.297 + 1.298 + /* Check for potential buffer overflow */ 1.299 + if(strlen(arg) >= UCNV_MAX_FULL_FILE_NAME_LENGTH) 1.300 + { 1.301 + fprintf(stderr, "%s\n", u_errorName(U_BUFFER_OVERFLOW_ERROR)); 1.302 + return U_BUFFER_OVERFLOW_ERROR; 1.303 + } 1.304 + 1.305 + /*produces the right destination path for display*/ 1.306 + if (destdirlen != 0) 1.307 + { 1.308 + const char *basename; 1.309 + 1.310 + /* find the last file sepator */ 1.311 + basename = findBasename(arg); 1.312 + uprv_strcpy(outBasename, basename); 1.313 + } 1.314 + else 1.315 + { 1.316 + uprv_strcpy(outFileName, arg); 1.317 + } 1.318 + 1.319 + /*removes the extension if any is found*/ 1.320 + dot = uprv_strrchr(outBasename, '.'); 1.321 + if (dot) 1.322 + { 1.323 + *dot = '\0'; 1.324 + } 1.325 + 1.326 + /* the basename without extension is the converter name */ 1.327 + uprv_strcpy(cnvName, outBasename); 1.328 + 1.329 + /*Adds the target extension*/ 1.330 + uprv_strcat(outBasename, CONVERTER_FILE_EXTENSION); 1.331 + 1.332 +#if DEBUG 1.333 + printf("makeconv: processing %s ...\n", arg); 1.334 + fflush(stdout); 1.335 +#endif 1.336 + localError = U_ZERO_ERROR; 1.337 + initConvData(&data); 1.338 + createConverter(&data, arg, &localError); 1.339 + 1.340 + if (U_FAILURE(localError)) 1.341 + { 1.342 + /* if an error is found, print out an error msg and keep going */ 1.343 + fprintf(stderr, "Error creating converter for \"%s\" file for \"%s\" (%s)\n", outFileName, arg, 1.344 + u_errorName(localError)); 1.345 + if(U_SUCCESS(err)) { 1.346 + err = localError; 1.347 + } 1.348 + } 1.349 + else 1.350 + { 1.351 + /* Insure the static data name matches the file name */ 1.352 + /* Changed to ignore directory and only compare base name 1.353 + LDH 1/2/08*/ 1.354 + char *p; 1.355 + p = strrchr(cnvName, U_FILE_SEP_CHAR); /* Find last file separator */ 1.356 + 1.357 + if(p == NULL) /* OK, try alternate */ 1.358 + { 1.359 + p = strrchr(cnvName, U_FILE_ALT_SEP_CHAR); 1.360 + if(p == NULL) 1.361 + { 1.362 + p=cnvName; /* If no separators, no problem */ 1.363 + } 1.364 + } 1.365 + else 1.366 + { 1.367 + p++; /* If found separtor, don't include it in compare */ 1.368 + } 1.369 + if(uprv_stricmp(p,data.staticData.name)) 1.370 + { 1.371 + fprintf(stderr, "Warning: %s%s claims to be '%s'\n", 1.372 + cnvName, CONVERTER_FILE_EXTENSION, 1.373 + data.staticData.name); 1.374 + } 1.375 + 1.376 + uprv_strcpy((char*)data.staticData.name, cnvName); 1.377 + 1.378 + if(!uprv_isInvariantString((char*)data.staticData.name, -1)) { 1.379 + fprintf(stderr, 1.380 + "Error: A converter name must contain only invariant characters.\n" 1.381 + "%s is not a valid converter name.\n", 1.382 + data.staticData.name); 1.383 + if(U_SUCCESS(err)) { 1.384 + err = U_INVALID_TABLE_FORMAT; 1.385 + } 1.386 + } 1.387 + 1.388 + uprv_strcpy(cnvNameWithPkg, cnvName); 1.389 + 1.390 + localError = U_ZERO_ERROR; 1.391 + writeConverterData(&data, cnvNameWithPkg, destdir, &localError); 1.392 + 1.393 + if(U_FAILURE(localError)) 1.394 + { 1.395 + /* if an error is found, print out an error msg and keep going*/ 1.396 + fprintf(stderr, "Error writing \"%s\" file for \"%s\" (%s)\n", outFileName, arg, 1.397 + u_errorName(localError)); 1.398 + if(U_SUCCESS(err)) { 1.399 + err = localError; 1.400 + } 1.401 + } 1.402 + else if (printFilename) 1.403 + { 1.404 + puts(outBasename); 1.405 + } 1.406 + } 1.407 + fflush(stdout); 1.408 + fflush(stderr); 1.409 + 1.410 + cleanupConvData(&data); 1.411 + } 1.412 + 1.413 + return err; 1.414 +} 1.415 + 1.416 +static void 1.417 +getPlatformAndCCSIDFromName(const char *name, int8_t *pPlatform, int32_t *pCCSID) { 1.418 + if( (name[0]=='i' || name[0]=='I') && 1.419 + (name[1]=='b' || name[1]=='B') && 1.420 + (name[2]=='m' || name[2]=='M') 1.421 + ) { 1.422 + name+=3; 1.423 + if(*name=='-') { 1.424 + ++name; 1.425 + } 1.426 + *pPlatform=UCNV_IBM; 1.427 + *pCCSID=(int32_t)uprv_strtoul(name, NULL, 10); 1.428 + } else { 1.429 + *pPlatform=UCNV_UNKNOWN; 1.430 + *pCCSID=0; 1.431 + } 1.432 +} 1.433 + 1.434 +static void 1.435 +readHeader(ConvData *data, 1.436 + FileStream* convFile, 1.437 + const char* converterName, 1.438 + UErrorCode *pErrorCode) { 1.439 + char line[1024]; 1.440 + char *s, *key, *value; 1.441 + const UConverterStaticData *prototype; 1.442 + UConverterStaticData *staticData; 1.443 + 1.444 + if(U_FAILURE(*pErrorCode)) { 1.445 + return; 1.446 + } 1.447 + 1.448 + staticData=&data->staticData; 1.449 + staticData->platform=UCNV_IBM; 1.450 + staticData->subCharLen=0; 1.451 + 1.452 + while(T_FileStream_readLine(convFile, line, sizeof(line))) { 1.453 + /* basic parsing and handling of state-related items */ 1.454 + if(ucm_parseHeaderLine(data->ucm, line, &key, &value)) { 1.455 + continue; 1.456 + } 1.457 + 1.458 + /* stop at the beginning of the mapping section */ 1.459 + if(uprv_strcmp(line, "CHARMAP")==0) { 1.460 + break; 1.461 + } 1.462 + 1.463 + /* collect the information from the header field, ignore unknown keys */ 1.464 + if(uprv_strcmp(key, "code_set_name")==0) { 1.465 + if(*value!=0) { 1.466 + uprv_strcpy((char *)staticData->name, value); 1.467 + getPlatformAndCCSIDFromName(value, &staticData->platform, &staticData->codepage); 1.468 + } 1.469 + } else if(uprv_strcmp(key, "subchar")==0) { 1.470 + uint8_t bytes[UCNV_EXT_MAX_BYTES]; 1.471 + int8_t length; 1.472 + 1.473 + s=value; 1.474 + length=ucm_parseBytes(bytes, line, (const char **)&s); 1.475 + if(1<=length && length<=4 && *s==0) { 1.476 + staticData->subCharLen=length; 1.477 + uprv_memcpy(staticData->subChar, bytes, length); 1.478 + } else { 1.479 + fprintf(stderr, "error: illegal <subchar> %s\n", value); 1.480 + *pErrorCode=U_INVALID_TABLE_FORMAT; 1.481 + return; 1.482 + } 1.483 + } else if(uprv_strcmp(key, "subchar1")==0) { 1.484 + uint8_t bytes[UCNV_EXT_MAX_BYTES]; 1.485 + 1.486 + s=value; 1.487 + if(1==ucm_parseBytes(bytes, line, (const char **)&s) && *s==0) { 1.488 + staticData->subChar1=bytes[0]; 1.489 + } else { 1.490 + fprintf(stderr, "error: illegal <subchar1> %s\n", value); 1.491 + *pErrorCode=U_INVALID_TABLE_FORMAT; 1.492 + return; 1.493 + } 1.494 + } 1.495 + } 1.496 + 1.497 + /* copy values from the UCMFile to the static data */ 1.498 + staticData->maxBytesPerChar=(int8_t)data->ucm->states.maxCharLength; 1.499 + staticData->minBytesPerChar=(int8_t)data->ucm->states.minCharLength; 1.500 + staticData->conversionType=data->ucm->states.conversionType; 1.501 + 1.502 + if(staticData->conversionType==UCNV_UNSUPPORTED_CONVERTER) { 1.503 + fprintf(stderr, "ucm error: missing conversion type (<uconv_class>)\n"); 1.504 + *pErrorCode=U_INVALID_TABLE_FORMAT; 1.505 + return; 1.506 + } 1.507 + 1.508 + /* 1.509 + * Now that we know the type, copy any 'default' values from the table. 1.510 + * We need not check the type any further because the parser only 1.511 + * recognizes what we have prototypes for. 1.512 + * 1.513 + * For delta (extension-only) tables, copy values from the base file 1.514 + * instead, see createConverter(). 1.515 + */ 1.516 + if(data->ucm->baseName[0]==0) { 1.517 + prototype=ucnv_converterStaticData[staticData->conversionType]; 1.518 + if(prototype!=NULL) { 1.519 + if(staticData->name[0]==0) { 1.520 + uprv_strcpy((char *)staticData->name, prototype->name); 1.521 + } 1.522 + 1.523 + if(staticData->codepage==0) { 1.524 + staticData->codepage=prototype->codepage; 1.525 + } 1.526 + 1.527 + if(staticData->platform==0) { 1.528 + staticData->platform=prototype->platform; 1.529 + } 1.530 + 1.531 + if(staticData->minBytesPerChar==0) { 1.532 + staticData->minBytesPerChar=prototype->minBytesPerChar; 1.533 + } 1.534 + 1.535 + if(staticData->maxBytesPerChar==0) { 1.536 + staticData->maxBytesPerChar=prototype->maxBytesPerChar; 1.537 + } 1.538 + 1.539 + if(staticData->subCharLen==0) { 1.540 + staticData->subCharLen=prototype->subCharLen; 1.541 + if(prototype->subCharLen>0) { 1.542 + uprv_memcpy(staticData->subChar, prototype->subChar, prototype->subCharLen); 1.543 + } 1.544 + } 1.545 + } 1.546 + } 1.547 + 1.548 + if(data->ucm->states.outputType<0) { 1.549 + data->ucm->states.outputType=(int8_t)data->ucm->states.maxCharLength-1; 1.550 + } 1.551 + 1.552 + if( staticData->subChar1!=0 && 1.553 + (staticData->minBytesPerChar>1 || 1.554 + (staticData->conversionType!=UCNV_MBCS && 1.555 + staticData->conversionType!=UCNV_EBCDIC_STATEFUL)) 1.556 + ) { 1.557 + fprintf(stderr, "error: <subchar1> defined for a type other than MBCS or EBCDIC_STATEFUL\n"); 1.558 + *pErrorCode=U_INVALID_TABLE_FORMAT; 1.559 + } 1.560 +} 1.561 + 1.562 +/* return TRUE if a base table was read, FALSE for an extension table */ 1.563 +static UBool 1.564 +readFile(ConvData *data, const char* converterName, 1.565 + UErrorCode *pErrorCode) { 1.566 + char line[1024]; 1.567 + char *end; 1.568 + FileStream *convFile; 1.569 + 1.570 + UCMStates *baseStates; 1.571 + UBool dataIsBase; 1.572 + 1.573 + if(U_FAILURE(*pErrorCode)) { 1.574 + return FALSE; 1.575 + } 1.576 + 1.577 + data->ucm=ucm_open(); 1.578 + 1.579 + convFile=T_FileStream_open(converterName, "r"); 1.580 + if(convFile==NULL) { 1.581 + *pErrorCode=U_FILE_ACCESS_ERROR; 1.582 + return FALSE; 1.583 + } 1.584 + 1.585 + readHeader(data, convFile, converterName, pErrorCode); 1.586 + if(U_FAILURE(*pErrorCode)) { 1.587 + return FALSE; 1.588 + } 1.589 + 1.590 + if(data->ucm->baseName[0]==0) { 1.591 + dataIsBase=TRUE; 1.592 + baseStates=&data->ucm->states; 1.593 + ucm_processStates(baseStates, IGNORE_SISO_CHECK); 1.594 + } else { 1.595 + dataIsBase=FALSE; 1.596 + baseStates=NULL; 1.597 + } 1.598 + 1.599 + /* read the base table */ 1.600 + ucm_readTable(data->ucm, convFile, dataIsBase, baseStates, pErrorCode); 1.601 + if(U_FAILURE(*pErrorCode)) { 1.602 + return FALSE; 1.603 + } 1.604 + 1.605 + /* read an extension table if there is one */ 1.606 + while(T_FileStream_readLine(convFile, line, sizeof(line))) { 1.607 + end=uprv_strchr(line, 0); 1.608 + while(line<end && 1.609 + (*(end-1)=='\n' || *(end-1)=='\r' || *(end-1)==' ' || *(end-1)=='\t')) { 1.610 + --end; 1.611 + } 1.612 + *end=0; 1.613 + 1.614 + if(line[0]=='#' || u_skipWhitespace(line)==end) { 1.615 + continue; /* ignore empty and comment lines */ 1.616 + } 1.617 + 1.618 + if(0==uprv_strcmp(line, "CHARMAP")) { 1.619 + /* read the extension table */ 1.620 + ucm_readTable(data->ucm, convFile, FALSE, baseStates, pErrorCode); 1.621 + } else { 1.622 + fprintf(stderr, "unexpected text after the base mapping table\n"); 1.623 + } 1.624 + break; 1.625 + } 1.626 + 1.627 + T_FileStream_close(convFile); 1.628 + 1.629 + if(data->ucm->base->flagsType==UCM_FLAGS_MIXED || data->ucm->ext->flagsType==UCM_FLAGS_MIXED) { 1.630 + fprintf(stderr, "error: some entries have the mapping precision (with '|'), some do not\n"); 1.631 + *pErrorCode=U_INVALID_TABLE_FORMAT; 1.632 + } 1.633 + 1.634 + return dataIsBase; 1.635 +} 1.636 + 1.637 +static void 1.638 +createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCode) { 1.639 + ConvData baseData; 1.640 + UBool dataIsBase; 1.641 + 1.642 + UConverterStaticData *staticData; 1.643 + UCMStates *states, *baseStates; 1.644 + 1.645 + if(U_FAILURE(*pErrorCode)) { 1.646 + return; 1.647 + } 1.648 + 1.649 + initConvData(data); 1.650 + 1.651 + dataIsBase=readFile(data, converterName, pErrorCode); 1.652 + if(U_FAILURE(*pErrorCode)) { 1.653 + return; 1.654 + } 1.655 + 1.656 + staticData=&data->staticData; 1.657 + states=&data->ucm->states; 1.658 + 1.659 + if(dataIsBase) { 1.660 + /* 1.661 + * Build a normal .cnv file with a base table 1.662 + * and an optional extension table. 1.663 + */ 1.664 + data->cnvData=MBCSOpen(data->ucm); 1.665 + if(data->cnvData==NULL) { 1.666 + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 1.667 + 1.668 + } else if(!data->cnvData->isValid(data->cnvData, 1.669 + staticData->subChar, staticData->subCharLen) 1.670 + ) { 1.671 + fprintf(stderr, " the substitution character byte sequence is illegal in this codepage structure!\n"); 1.672 + *pErrorCode=U_INVALID_TABLE_FORMAT; 1.673 + 1.674 + } else if(staticData->subChar1!=0 && 1.675 + !data->cnvData->isValid(data->cnvData, &staticData->subChar1, 1) 1.676 + ) { 1.677 + fprintf(stderr, " the subchar1 byte is illegal in this codepage structure!\n"); 1.678 + *pErrorCode=U_INVALID_TABLE_FORMAT; 1.679 + 1.680 + } else if( 1.681 + data->ucm->ext->mappingsLength>0 && 1.682 + !ucm_checkBaseExt(states, data->ucm->base, data->ucm->ext, data->ucm->ext, FALSE) 1.683 + ) { 1.684 + *pErrorCode=U_INVALID_TABLE_FORMAT; 1.685 + } else if(data->ucm->base->flagsType&UCM_FLAGS_EXPLICIT) { 1.686 + /* sort the table so that it can be turned into UTF-8-friendly data */ 1.687 + ucm_sortTable(data->ucm->base); 1.688 + } 1.689 + 1.690 + if(U_SUCCESS(*pErrorCode)) { 1.691 + if( 1.692 + /* add the base table after ucm_checkBaseExt()! */ 1.693 + !data->cnvData->addTable(data->cnvData, data->ucm->base, &data->staticData) 1.694 + ) { 1.695 + *pErrorCode=U_INVALID_TABLE_FORMAT; 1.696 + } else { 1.697 + /* 1.698 + * addTable() may have requested moving more mappings to the extension table 1.699 + * if they fit into the base toUnicode table but not into the 1.700 + * base fromUnicode table. 1.701 + * (Especially for UTF-8-friendly fromUnicode tables.) 1.702 + * Such mappings will have the MBCS_FROM_U_EXT_FLAG set, which causes them 1.703 + * to be excluded from the extension toUnicode data. 1.704 + * See MBCSOkForBaseFromUnicode() for which mappings do not fit into 1.705 + * the base fromUnicode table. 1.706 + */ 1.707 + ucm_moveMappings(data->ucm->base, data->ucm->ext); 1.708 + ucm_sortTable(data->ucm->ext); 1.709 + if(data->ucm->ext->mappingsLength>0) { 1.710 + /* prepare the extension table, if there is one */ 1.711 + data->extData=CnvExtOpen(data->ucm); 1.712 + if(data->extData==NULL) { 1.713 + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 1.714 + } else if( 1.715 + !data->extData->addTable(data->extData, data->ucm->ext, &data->staticData) 1.716 + ) { 1.717 + *pErrorCode=U_INVALID_TABLE_FORMAT; 1.718 + } 1.719 + } 1.720 + } 1.721 + } 1.722 + } else { 1.723 + /* Build an extension-only .cnv file. */ 1.724 + char baseFilename[500]; 1.725 + char *basename; 1.726 + 1.727 + initConvData(&baseData); 1.728 + 1.729 + /* assemble a path/filename for data->ucm->baseName */ 1.730 + uprv_strcpy(baseFilename, converterName); 1.731 + basename=(char *)findBasename(baseFilename); 1.732 + uprv_strcpy(basename, data->ucm->baseName); 1.733 + uprv_strcat(basename, ".ucm"); 1.734 + 1.735 + /* read the base table */ 1.736 + dataIsBase=readFile(&baseData, baseFilename, pErrorCode); 1.737 + if(U_FAILURE(*pErrorCode)) { 1.738 + return; 1.739 + } else if(!dataIsBase) { 1.740 + fprintf(stderr, "error: the <icu:base> file \"%s\" is not a base table file\n", baseFilename); 1.741 + *pErrorCode=U_INVALID_TABLE_FORMAT; 1.742 + } else { 1.743 + /* prepare the extension table */ 1.744 + data->extData=CnvExtOpen(data->ucm); 1.745 + if(data->extData==NULL) { 1.746 + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 1.747 + } else { 1.748 + /* fill in gaps in extension file header fields */ 1.749 + UCMapping *m, *mLimit; 1.750 + uint8_t fallbackFlags; 1.751 + 1.752 + baseStates=&baseData.ucm->states; 1.753 + if(states->conversionType==UCNV_DBCS) { 1.754 + staticData->minBytesPerChar=(int8_t)(states->minCharLength=2); 1.755 + } else if(states->minCharLength==0) { 1.756 + staticData->minBytesPerChar=(int8_t)(states->minCharLength=baseStates->minCharLength); 1.757 + } 1.758 + if(states->maxCharLength<states->minCharLength) { 1.759 + staticData->maxBytesPerChar=(int8_t)(states->maxCharLength=baseStates->maxCharLength); 1.760 + } 1.761 + 1.762 + if(staticData->subCharLen==0) { 1.763 + uprv_memcpy(staticData->subChar, baseData.staticData.subChar, 4); 1.764 + staticData->subCharLen=baseData.staticData.subCharLen; 1.765 + } 1.766 + /* 1.767 + * do not copy subChar1 - 1.768 + * only use what is explicitly specified 1.769 + * because it cannot be unset in the extension file header 1.770 + */ 1.771 + 1.772 + /* get the fallback flags */ 1.773 + fallbackFlags=0; 1.774 + for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength; 1.775 + m<mLimit && fallbackFlags!=3; 1.776 + ++m 1.777 + ) { 1.778 + if(m->f==1) { 1.779 + fallbackFlags|=1; 1.780 + } else if(m->f==3) { 1.781 + fallbackFlags|=2; 1.782 + } 1.783 + } 1.784 + 1.785 + if(fallbackFlags&1) { 1.786 + staticData->hasFromUnicodeFallback=TRUE; 1.787 + } 1.788 + if(fallbackFlags&2) { 1.789 + staticData->hasToUnicodeFallback=TRUE; 1.790 + } 1.791 + 1.792 + if(1!=ucm_countChars(baseStates, staticData->subChar, staticData->subCharLen)) { 1.793 + fprintf(stderr, " the substitution character byte sequence is illegal in this codepage structure!\n"); 1.794 + *pErrorCode=U_INVALID_TABLE_FORMAT; 1.795 + 1.796 + } else if(staticData->subChar1!=0 && 1!=ucm_countChars(baseStates, &staticData->subChar1, 1)) { 1.797 + fprintf(stderr, " the subchar1 byte is illegal in this codepage structure!\n"); 1.798 + *pErrorCode=U_INVALID_TABLE_FORMAT; 1.799 + 1.800 + } else if( 1.801 + !ucm_checkValidity(data->ucm->ext, baseStates) || 1.802 + !ucm_checkBaseExt(baseStates, baseData.ucm->base, data->ucm->ext, data->ucm->ext, FALSE) 1.803 + ) { 1.804 + *pErrorCode=U_INVALID_TABLE_FORMAT; 1.805 + } else { 1.806 + if(states->maxCharLength>1) { 1.807 + /* 1.808 + * When building a normal .cnv file with a base table 1.809 + * for an MBCS (not SBCS) table with explicit precision flags, 1.810 + * the MBCSAddTable() function marks some mappings for moving 1.811 + * to the extension table. 1.812 + * They fit into the base toUnicode table but not into the 1.813 + * base fromUnicode table. 1.814 + * (Note: We do have explicit precision flags because they are 1.815 + * required for extension table generation, and 1.816 + * ucm_checkBaseExt() verified it.) 1.817 + * 1.818 + * We do not call MBCSAddTable() here (we probably could) 1.819 + * so we need to do the analysis before building the extension table. 1.820 + * We assume that MBCSAddTable() will build a UTF-8-friendly table. 1.821 + * Redundant mappings in the extension table are ok except they cost some size. 1.822 + * 1.823 + * Do this after ucm_checkBaseExt(). 1.824 + */ 1.825 + const MBCSData *mbcsData=MBCSGetDummy(); 1.826 + int32_t needsMove=0; 1.827 + for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength; 1.828 + m<mLimit; 1.829 + ++m 1.830 + ) { 1.831 + if(!MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m->bLen, m->u, m->f)) { 1.832 + m->f|=MBCS_FROM_U_EXT_FLAG; 1.833 + m->moveFlag=UCM_MOVE_TO_EXT; 1.834 + ++needsMove; 1.835 + } 1.836 + } 1.837 + 1.838 + if(needsMove!=0) { 1.839 + ucm_moveMappings(baseData.ucm->base, data->ucm->ext); 1.840 + ucm_sortTable(data->ucm->ext); 1.841 + } 1.842 + } 1.843 + if(!data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)) { 1.844 + *pErrorCode=U_INVALID_TABLE_FORMAT; 1.845 + } 1.846 + } 1.847 + } 1.848 + } 1.849 + 1.850 + cleanupConvData(&baseData); 1.851 + } 1.852 +} 1.853 + 1.854 +/* 1.855 + * Hey, Emacs, please set the following: 1.856 + * 1.857 + * Local Variables: 1.858 + * indent-tabs-mode: nil 1.859 + * End: 1.860 + * 1.861 + */