michael@0: /* michael@0: ******************************************************************************** michael@0: * michael@0: * Copyright (C) 1998-2012, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: * michael@0: ******************************************************************************** michael@0: * michael@0: * michael@0: * makeconv.c: michael@0: * tool creating a binary (compressed) representation of the conversion mapping michael@0: * table (IBM NLTC ucmap format). michael@0: * michael@0: * 05/04/2000 helena Added fallback mapping into the picture... michael@0: * 06/29/2000 helena Major rewrite of the callback APIs. michael@0: */ michael@0: michael@0: #include michael@0: #include "unicode/putil.h" michael@0: #include "unicode/ucnv_err.h" michael@0: #include "ucnv_bld.h" michael@0: #include "ucnv_imp.h" michael@0: #include "ucnv_cnv.h" michael@0: #include "cstring.h" michael@0: #include "cmemory.h" michael@0: #include "uinvchar.h" michael@0: #include "filestrm.h" michael@0: #include "toolutil.h" michael@0: #include "uoptions.h" michael@0: #include "unicode/udata.h" michael@0: #include "unewdata.h" michael@0: #include "uparse.h" michael@0: #include "ucm.h" michael@0: #include "makeconv.h" michael@0: #include "genmbcs.h" michael@0: michael@0: #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) michael@0: michael@0: #define DEBUG 0 michael@0: michael@0: typedef struct ConvData { michael@0: UCMFile *ucm; michael@0: NewConverter *cnvData, *extData; michael@0: UConverterSharedData sharedData; michael@0: UConverterStaticData staticData; michael@0: } ConvData; michael@0: michael@0: static void michael@0: initConvData(ConvData *data) { michael@0: uprv_memset(data, 0, sizeof(ConvData)); michael@0: data->sharedData.structSize=sizeof(UConverterSharedData); michael@0: data->staticData.structSize=sizeof(UConverterStaticData); michael@0: data->sharedData.staticData=&data->staticData; michael@0: } michael@0: michael@0: static void michael@0: cleanupConvData(ConvData *data) { michael@0: if(data!=NULL) { michael@0: if(data->cnvData!=NULL) { michael@0: data->cnvData->close(data->cnvData); michael@0: data->cnvData=NULL; michael@0: } michael@0: if(data->extData!=NULL) { michael@0: data->extData->close(data->extData); michael@0: data->extData=NULL; michael@0: } michael@0: ucm_close(data->ucm); michael@0: data->ucm=NULL; michael@0: } michael@0: } michael@0: michael@0: /* michael@0: * from ucnvstat.c - static prototypes of data-based converters michael@0: */ michael@0: extern const UConverterStaticData * ucnv_converterStaticData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES]; michael@0: michael@0: /* michael@0: * Global - verbosity michael@0: */ michael@0: UBool VERBOSE = FALSE; michael@0: UBool SMALL = FALSE; michael@0: UBool IGNORE_SISO_CHECK = FALSE; michael@0: michael@0: static void michael@0: createConverter(ConvData *data, const char* converterName, UErrorCode *pErrorCode); michael@0: michael@0: /* michael@0: * Set up the UNewData and write the converter.. michael@0: */ michael@0: static void michael@0: writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status); michael@0: michael@0: UBool haveCopyright=TRUE; michael@0: michael@0: static UDataInfo dataInfo={ michael@0: sizeof(UDataInfo), michael@0: 0, michael@0: michael@0: U_IS_BIG_ENDIAN, michael@0: U_CHARSET_FAMILY, michael@0: sizeof(UChar), michael@0: 0, michael@0: michael@0: {0x63, 0x6e, 0x76, 0x74}, /* dataFormat="cnvt" */ michael@0: {6, 2, 0, 0}, /* formatVersion */ michael@0: {0, 0, 0, 0} /* dataVersion (calculated at runtime) */ michael@0: }; michael@0: michael@0: static void michael@0: writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status) michael@0: { michael@0: UNewDataMemory *mem = NULL; michael@0: uint32_t sz2; michael@0: uint32_t size = 0; michael@0: int32_t tableType; michael@0: michael@0: if(U_FAILURE(*status)) michael@0: { michael@0: return; michael@0: } michael@0: michael@0: tableType=TABLE_NONE; michael@0: if(data->cnvData!=NULL) { michael@0: tableType|=TABLE_BASE; michael@0: } michael@0: if(data->extData!=NULL) { michael@0: tableType|=TABLE_EXT; michael@0: } michael@0: michael@0: mem = udata_create(cnvDir, "cnv", cnvName, &dataInfo, haveCopyright ? U_COPYRIGHT_STRING : NULL, status); michael@0: michael@0: if(U_FAILURE(*status)) michael@0: { michael@0: fprintf(stderr, "Couldn't create the udata %s.%s: %s\n", michael@0: cnvName, michael@0: "cnv", michael@0: u_errorName(*status)); michael@0: return; michael@0: } michael@0: michael@0: if(VERBOSE) michael@0: { michael@0: printf("- Opened udata %s.%s\n", cnvName, "cnv"); michael@0: } michael@0: michael@0: michael@0: /* all read only, clean, platform independent data. Mmmm. :) */ michael@0: udata_writeBlock(mem, &data->staticData, sizeof(UConverterStaticData)); michael@0: size += sizeof(UConverterStaticData); /* Is 4-aligned - by size */ michael@0: /* Now, write the table */ michael@0: if(tableType&TABLE_BASE) { michael@0: size += data->cnvData->write(data->cnvData, &data->staticData, mem, tableType); michael@0: } michael@0: if(tableType&TABLE_EXT) { michael@0: size += data->extData->write(data->extData, &data->staticData, mem, tableType); michael@0: } michael@0: michael@0: sz2 = udata_finish(mem, status); michael@0: if(size != sz2) michael@0: { michael@0: fprintf(stderr, "error: wrote %u bytes to the .cnv file but counted %u bytes\n", (int)sz2, (int)size); michael@0: *status=U_INTERNAL_PROGRAM_ERROR; michael@0: } michael@0: if(VERBOSE) michael@0: { michael@0: printf("- Wrote %u bytes to the udata.\n", (int)sz2); michael@0: } michael@0: } michael@0: michael@0: enum { michael@0: OPT_HELP_H, michael@0: OPT_HELP_QUESTION_MARK, michael@0: OPT_COPYRIGHT, michael@0: OPT_VERSION, michael@0: OPT_DESTDIR, michael@0: OPT_VERBOSE, michael@0: OPT_SMALL, michael@0: OPT_IGNORE_SISO_CHECK, michael@0: OPT_COUNT michael@0: }; michael@0: michael@0: static UOption options[]={ michael@0: UOPTION_HELP_H, michael@0: UOPTION_HELP_QUESTION_MARK, michael@0: UOPTION_COPYRIGHT, michael@0: UOPTION_VERSION, michael@0: UOPTION_DESTDIR, michael@0: UOPTION_VERBOSE, michael@0: { "small", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 }, michael@0: { "ignore-siso-check", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 } michael@0: }; michael@0: michael@0: int main(int argc, char* argv[]) michael@0: { michael@0: ConvData data; michael@0: UErrorCode err = U_ZERO_ERROR, localError; michael@0: char outFileName[UCNV_MAX_FULL_FILE_NAME_LENGTH]; michael@0: const char* destdir, *arg; michael@0: size_t destdirlen; michael@0: char* dot = NULL, *outBasename; michael@0: char cnvName[UCNV_MAX_FULL_FILE_NAME_LENGTH]; michael@0: char cnvNameWithPkg[UCNV_MAX_FULL_FILE_NAME_LENGTH]; michael@0: UVersionInfo icuVersion; michael@0: UBool printFilename; michael@0: michael@0: err = U_ZERO_ERROR; michael@0: michael@0: U_MAIN_INIT_ARGS(argc, argv); michael@0: michael@0: /* Set up the ICU version number */ michael@0: u_getVersion(icuVersion); michael@0: uprv_memcpy(&dataInfo.dataVersion, &icuVersion, sizeof(UVersionInfo)); michael@0: michael@0: /* preset then read command line options */ michael@0: options[OPT_DESTDIR].value=u_getDataDirectory(); michael@0: argc=u_parseArgs(argc, argv, LENGTHOF(options), options); michael@0: michael@0: /* error handling, printing usage message */ michael@0: if(argc<0) { michael@0: fprintf(stderr, michael@0: "error in command line argument \"%s\"\n", michael@0: argv[-argc]); michael@0: } else if(argc<2) { michael@0: argc=-1; michael@0: } michael@0: if(argc<0 || options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur) { michael@0: FILE *stdfile=argc<0 ? stderr : stdout; michael@0: fprintf(stdfile, michael@0: "usage: %s [-options] files...\n" michael@0: "\tread .ucm codepage mapping files and write .cnv files\n" michael@0: "options:\n" michael@0: "\t-h or -? or --help this usage text\n" michael@0: "\t-V or --version show a version message\n" michael@0: "\t-c or --copyright include a copyright notice\n" michael@0: "\t-d or --destdir destination directory, followed by the path\n" michael@0: "\t-v or --verbose Turn on verbose output\n", michael@0: argv[0]); michael@0: fprintf(stdfile, michael@0: "\t --small Generate smaller .cnv files. They will be\n" michael@0: "\t significantly smaller but may not be compatible with\n" michael@0: "\t older versions of ICU and will require heap memory\n" michael@0: "\t allocation when loaded.\n" michael@0: "\t --ignore-siso-check Use SI/SO other than 0xf/0xe.\n"); michael@0: return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR; michael@0: } michael@0: michael@0: if(options[OPT_VERSION].doesOccur) { michael@0: printf("makeconv version %u.%u, ICU tool to read .ucm codepage mapping files and write .cnv files\n", michael@0: dataInfo.formatVersion[0], dataInfo.formatVersion[1]); michael@0: printf("%s\n", U_COPYRIGHT_STRING); michael@0: exit(0); michael@0: } michael@0: michael@0: /* get the options values */ michael@0: haveCopyright = options[OPT_COPYRIGHT].doesOccur; michael@0: destdir = options[OPT_DESTDIR].value; michael@0: VERBOSE = options[OPT_VERBOSE].doesOccur; michael@0: SMALL = options[OPT_SMALL].doesOccur; michael@0: michael@0: if (options[OPT_IGNORE_SISO_CHECK].doesOccur) { michael@0: IGNORE_SISO_CHECK = TRUE; michael@0: } michael@0: michael@0: if (destdir != NULL && *destdir != 0) { michael@0: uprv_strcpy(outFileName, destdir); michael@0: destdirlen = uprv_strlen(destdir); michael@0: outBasename = outFileName + destdirlen; michael@0: if (*(outBasename - 1) != U_FILE_SEP_CHAR) { michael@0: *outBasename++ = U_FILE_SEP_CHAR; michael@0: ++destdirlen; michael@0: } michael@0: } else { michael@0: destdirlen = 0; michael@0: outBasename = outFileName; michael@0: } michael@0: michael@0: #if DEBUG michael@0: { michael@0: int i; michael@0: printf("makeconv: processing %d files...\n", argc - 1); michael@0: for(i=1; i 2 || VERBOSE); michael@0: for (++argv; --argc; ++argv) michael@0: { michael@0: arg = getLongPathname(*argv); michael@0: michael@0: /* Check for potential buffer overflow */ michael@0: if(strlen(arg) >= UCNV_MAX_FULL_FILE_NAME_LENGTH) michael@0: { michael@0: fprintf(stderr, "%s\n", u_errorName(U_BUFFER_OVERFLOW_ERROR)); michael@0: return U_BUFFER_OVERFLOW_ERROR; michael@0: } michael@0: michael@0: /*produces the right destination path for display*/ michael@0: if (destdirlen != 0) michael@0: { michael@0: const char *basename; michael@0: michael@0: /* find the last file sepator */ michael@0: basename = findBasename(arg); michael@0: uprv_strcpy(outBasename, basename); michael@0: } michael@0: else michael@0: { michael@0: uprv_strcpy(outFileName, arg); michael@0: } michael@0: michael@0: /*removes the extension if any is found*/ michael@0: dot = uprv_strrchr(outBasename, '.'); michael@0: if (dot) michael@0: { michael@0: *dot = '\0'; michael@0: } michael@0: michael@0: /* the basename without extension is the converter name */ michael@0: uprv_strcpy(cnvName, outBasename); michael@0: michael@0: /*Adds the target extension*/ michael@0: uprv_strcat(outBasename, CONVERTER_FILE_EXTENSION); michael@0: michael@0: #if DEBUG michael@0: printf("makeconv: processing %s ...\n", arg); michael@0: fflush(stdout); michael@0: #endif michael@0: localError = U_ZERO_ERROR; michael@0: initConvData(&data); michael@0: createConverter(&data, arg, &localError); michael@0: michael@0: if (U_FAILURE(localError)) michael@0: { michael@0: /* if an error is found, print out an error msg and keep going */ michael@0: fprintf(stderr, "Error creating converter for \"%s\" file for \"%s\" (%s)\n", outFileName, arg, michael@0: u_errorName(localError)); michael@0: if(U_SUCCESS(err)) { michael@0: err = localError; michael@0: } michael@0: } michael@0: else michael@0: { michael@0: /* Insure the static data name matches the file name */ michael@0: /* Changed to ignore directory and only compare base name michael@0: LDH 1/2/08*/ michael@0: char *p; michael@0: p = strrchr(cnvName, U_FILE_SEP_CHAR); /* Find last file separator */ michael@0: michael@0: if(p == NULL) /* OK, try alternate */ michael@0: { michael@0: p = strrchr(cnvName, U_FILE_ALT_SEP_CHAR); michael@0: if(p == NULL) michael@0: { michael@0: p=cnvName; /* If no separators, no problem */ michael@0: } michael@0: } michael@0: else michael@0: { michael@0: p++; /* If found separtor, don't include it in compare */ michael@0: } michael@0: if(uprv_stricmp(p,data.staticData.name)) michael@0: { michael@0: fprintf(stderr, "Warning: %s%s claims to be '%s'\n", michael@0: cnvName, CONVERTER_FILE_EXTENSION, michael@0: data.staticData.name); michael@0: } michael@0: michael@0: uprv_strcpy((char*)data.staticData.name, cnvName); michael@0: michael@0: if(!uprv_isInvariantString((char*)data.staticData.name, -1)) { michael@0: fprintf(stderr, michael@0: "Error: A converter name must contain only invariant characters.\n" michael@0: "%s is not a valid converter name.\n", michael@0: data.staticData.name); michael@0: if(U_SUCCESS(err)) { michael@0: err = U_INVALID_TABLE_FORMAT; michael@0: } michael@0: } michael@0: michael@0: uprv_strcpy(cnvNameWithPkg, cnvName); michael@0: michael@0: localError = U_ZERO_ERROR; michael@0: writeConverterData(&data, cnvNameWithPkg, destdir, &localError); michael@0: michael@0: if(U_FAILURE(localError)) michael@0: { michael@0: /* if an error is found, print out an error msg and keep going*/ michael@0: fprintf(stderr, "Error writing \"%s\" file for \"%s\" (%s)\n", outFileName, arg, michael@0: u_errorName(localError)); michael@0: if(U_SUCCESS(err)) { michael@0: err = localError; michael@0: } michael@0: } michael@0: else if (printFilename) michael@0: { michael@0: puts(outBasename); michael@0: } michael@0: } michael@0: fflush(stdout); michael@0: fflush(stderr); michael@0: michael@0: cleanupConvData(&data); michael@0: } michael@0: michael@0: return err; michael@0: } michael@0: michael@0: static void michael@0: getPlatformAndCCSIDFromName(const char *name, int8_t *pPlatform, int32_t *pCCSID) { michael@0: if( (name[0]=='i' || name[0]=='I') && michael@0: (name[1]=='b' || name[1]=='B') && michael@0: (name[2]=='m' || name[2]=='M') michael@0: ) { michael@0: name+=3; michael@0: if(*name=='-') { michael@0: ++name; michael@0: } michael@0: *pPlatform=UCNV_IBM; michael@0: *pCCSID=(int32_t)uprv_strtoul(name, NULL, 10); michael@0: } else { michael@0: *pPlatform=UCNV_UNKNOWN; michael@0: *pCCSID=0; michael@0: } michael@0: } michael@0: michael@0: static void michael@0: readHeader(ConvData *data, michael@0: FileStream* convFile, michael@0: const char* converterName, michael@0: UErrorCode *pErrorCode) { michael@0: char line[1024]; michael@0: char *s, *key, *value; michael@0: const UConverterStaticData *prototype; michael@0: UConverterStaticData *staticData; michael@0: michael@0: if(U_FAILURE(*pErrorCode)) { michael@0: return; michael@0: } michael@0: michael@0: staticData=&data->staticData; michael@0: staticData->platform=UCNV_IBM; michael@0: staticData->subCharLen=0; michael@0: michael@0: while(T_FileStream_readLine(convFile, line, sizeof(line))) { michael@0: /* basic parsing and handling of state-related items */ michael@0: if(ucm_parseHeaderLine(data->ucm, line, &key, &value)) { michael@0: continue; michael@0: } michael@0: michael@0: /* stop at the beginning of the mapping section */ michael@0: if(uprv_strcmp(line, "CHARMAP")==0) { michael@0: break; michael@0: } michael@0: michael@0: /* collect the information from the header field, ignore unknown keys */ michael@0: if(uprv_strcmp(key, "code_set_name")==0) { michael@0: if(*value!=0) { michael@0: uprv_strcpy((char *)staticData->name, value); michael@0: getPlatformAndCCSIDFromName(value, &staticData->platform, &staticData->codepage); michael@0: } michael@0: } else if(uprv_strcmp(key, "subchar")==0) { michael@0: uint8_t bytes[UCNV_EXT_MAX_BYTES]; michael@0: int8_t length; michael@0: michael@0: s=value; michael@0: length=ucm_parseBytes(bytes, line, (const char **)&s); michael@0: if(1<=length && length<=4 && *s==0) { michael@0: staticData->subCharLen=length; michael@0: uprv_memcpy(staticData->subChar, bytes, length); michael@0: } else { michael@0: fprintf(stderr, "error: illegal %s\n", value); michael@0: *pErrorCode=U_INVALID_TABLE_FORMAT; michael@0: return; michael@0: } michael@0: } else if(uprv_strcmp(key, "subchar1")==0) { michael@0: uint8_t bytes[UCNV_EXT_MAX_BYTES]; michael@0: michael@0: s=value; michael@0: if(1==ucm_parseBytes(bytes, line, (const char **)&s) && *s==0) { michael@0: staticData->subChar1=bytes[0]; michael@0: } else { michael@0: fprintf(stderr, "error: illegal %s\n", value); michael@0: *pErrorCode=U_INVALID_TABLE_FORMAT; michael@0: return; michael@0: } michael@0: } michael@0: } michael@0: michael@0: /* copy values from the UCMFile to the static data */ michael@0: staticData->maxBytesPerChar=(int8_t)data->ucm->states.maxCharLength; michael@0: staticData->minBytesPerChar=(int8_t)data->ucm->states.minCharLength; michael@0: staticData->conversionType=data->ucm->states.conversionType; michael@0: michael@0: if(staticData->conversionType==UCNV_UNSUPPORTED_CONVERTER) { michael@0: fprintf(stderr, "ucm error: missing conversion type ()\n"); michael@0: *pErrorCode=U_INVALID_TABLE_FORMAT; michael@0: return; michael@0: } michael@0: michael@0: /* michael@0: * Now that we know the type, copy any 'default' values from the table. michael@0: * We need not check the type any further because the parser only michael@0: * recognizes what we have prototypes for. michael@0: * michael@0: * For delta (extension-only) tables, copy values from the base file michael@0: * instead, see createConverter(). michael@0: */ michael@0: if(data->ucm->baseName[0]==0) { michael@0: prototype=ucnv_converterStaticData[staticData->conversionType]; michael@0: if(prototype!=NULL) { michael@0: if(staticData->name[0]==0) { michael@0: uprv_strcpy((char *)staticData->name, prototype->name); michael@0: } michael@0: michael@0: if(staticData->codepage==0) { michael@0: staticData->codepage=prototype->codepage; michael@0: } michael@0: michael@0: if(staticData->platform==0) { michael@0: staticData->platform=prototype->platform; michael@0: } michael@0: michael@0: if(staticData->minBytesPerChar==0) { michael@0: staticData->minBytesPerChar=prototype->minBytesPerChar; michael@0: } michael@0: michael@0: if(staticData->maxBytesPerChar==0) { michael@0: staticData->maxBytesPerChar=prototype->maxBytesPerChar; michael@0: } michael@0: michael@0: if(staticData->subCharLen==0) { michael@0: staticData->subCharLen=prototype->subCharLen; michael@0: if(prototype->subCharLen>0) { michael@0: uprv_memcpy(staticData->subChar, prototype->subChar, prototype->subCharLen); michael@0: } michael@0: } michael@0: } michael@0: } michael@0: michael@0: if(data->ucm->states.outputType<0) { michael@0: data->ucm->states.outputType=(int8_t)data->ucm->states.maxCharLength-1; michael@0: } michael@0: michael@0: if( staticData->subChar1!=0 && michael@0: (staticData->minBytesPerChar>1 || michael@0: (staticData->conversionType!=UCNV_MBCS && michael@0: staticData->conversionType!=UCNV_EBCDIC_STATEFUL)) michael@0: ) { michael@0: fprintf(stderr, "error: defined for a type other than MBCS or EBCDIC_STATEFUL\n"); michael@0: *pErrorCode=U_INVALID_TABLE_FORMAT; michael@0: } michael@0: } michael@0: michael@0: /* return TRUE if a base table was read, FALSE for an extension table */ michael@0: static UBool michael@0: readFile(ConvData *data, const char* converterName, michael@0: UErrorCode *pErrorCode) { michael@0: char line[1024]; michael@0: char *end; michael@0: FileStream *convFile; michael@0: michael@0: UCMStates *baseStates; michael@0: UBool dataIsBase; michael@0: michael@0: if(U_FAILURE(*pErrorCode)) { michael@0: return FALSE; michael@0: } michael@0: michael@0: data->ucm=ucm_open(); michael@0: michael@0: convFile=T_FileStream_open(converterName, "r"); michael@0: if(convFile==NULL) { michael@0: *pErrorCode=U_FILE_ACCESS_ERROR; michael@0: return FALSE; michael@0: } michael@0: michael@0: readHeader(data, convFile, converterName, pErrorCode); michael@0: if(U_FAILURE(*pErrorCode)) { michael@0: return FALSE; michael@0: } michael@0: michael@0: if(data->ucm->baseName[0]==0) { michael@0: dataIsBase=TRUE; michael@0: baseStates=&data->ucm->states; michael@0: ucm_processStates(baseStates, IGNORE_SISO_CHECK); michael@0: } else { michael@0: dataIsBase=FALSE; michael@0: baseStates=NULL; michael@0: } michael@0: michael@0: /* read the base table */ michael@0: ucm_readTable(data->ucm, convFile, dataIsBase, baseStates, pErrorCode); michael@0: if(U_FAILURE(*pErrorCode)) { michael@0: return FALSE; michael@0: } michael@0: michael@0: /* read an extension table if there is one */ michael@0: while(T_FileStream_readLine(convFile, line, sizeof(line))) { michael@0: end=uprv_strchr(line, 0); michael@0: while(lineucm, convFile, FALSE, baseStates, pErrorCode); michael@0: } else { michael@0: fprintf(stderr, "unexpected text after the base mapping table\n"); michael@0: } michael@0: break; michael@0: } michael@0: michael@0: T_FileStream_close(convFile); michael@0: michael@0: if(data->ucm->base->flagsType==UCM_FLAGS_MIXED || data->ucm->ext->flagsType==UCM_FLAGS_MIXED) { michael@0: fprintf(stderr, "error: some entries have the mapping precision (with '|'), some do not\n"); michael@0: *pErrorCode=U_INVALID_TABLE_FORMAT; michael@0: } michael@0: michael@0: return dataIsBase; michael@0: } michael@0: michael@0: static void michael@0: createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCode) { michael@0: ConvData baseData; michael@0: UBool dataIsBase; michael@0: michael@0: UConverterStaticData *staticData; michael@0: UCMStates *states, *baseStates; michael@0: michael@0: if(U_FAILURE(*pErrorCode)) { michael@0: return; michael@0: } michael@0: michael@0: initConvData(data); michael@0: michael@0: dataIsBase=readFile(data, converterName, pErrorCode); michael@0: if(U_FAILURE(*pErrorCode)) { michael@0: return; michael@0: } michael@0: michael@0: staticData=&data->staticData; michael@0: states=&data->ucm->states; michael@0: michael@0: if(dataIsBase) { michael@0: /* michael@0: * Build a normal .cnv file with a base table michael@0: * and an optional extension table. michael@0: */ michael@0: data->cnvData=MBCSOpen(data->ucm); michael@0: if(data->cnvData==NULL) { michael@0: *pErrorCode=U_MEMORY_ALLOCATION_ERROR; michael@0: michael@0: } else if(!data->cnvData->isValid(data->cnvData, michael@0: staticData->subChar, staticData->subCharLen) michael@0: ) { michael@0: fprintf(stderr, " the substitution character byte sequence is illegal in this codepage structure!\n"); michael@0: *pErrorCode=U_INVALID_TABLE_FORMAT; michael@0: michael@0: } else if(staticData->subChar1!=0 && michael@0: !data->cnvData->isValid(data->cnvData, &staticData->subChar1, 1) michael@0: ) { michael@0: fprintf(stderr, " the subchar1 byte is illegal in this codepage structure!\n"); michael@0: *pErrorCode=U_INVALID_TABLE_FORMAT; michael@0: michael@0: } else if( michael@0: data->ucm->ext->mappingsLength>0 && michael@0: !ucm_checkBaseExt(states, data->ucm->base, data->ucm->ext, data->ucm->ext, FALSE) michael@0: ) { michael@0: *pErrorCode=U_INVALID_TABLE_FORMAT; michael@0: } else if(data->ucm->base->flagsType&UCM_FLAGS_EXPLICIT) { michael@0: /* sort the table so that it can be turned into UTF-8-friendly data */ michael@0: ucm_sortTable(data->ucm->base); michael@0: } michael@0: michael@0: if(U_SUCCESS(*pErrorCode)) { michael@0: if( michael@0: /* add the base table after ucm_checkBaseExt()! */ michael@0: !data->cnvData->addTable(data->cnvData, data->ucm->base, &data->staticData) michael@0: ) { michael@0: *pErrorCode=U_INVALID_TABLE_FORMAT; michael@0: } else { michael@0: /* michael@0: * addTable() may have requested moving more mappings to the extension table michael@0: * if they fit into the base toUnicode table but not into the michael@0: * base fromUnicode table. michael@0: * (Especially for UTF-8-friendly fromUnicode tables.) michael@0: * Such mappings will have the MBCS_FROM_U_EXT_FLAG set, which causes them michael@0: * to be excluded from the extension toUnicode data. michael@0: * See MBCSOkForBaseFromUnicode() for which mappings do not fit into michael@0: * the base fromUnicode table. michael@0: */ michael@0: ucm_moveMappings(data->ucm->base, data->ucm->ext); michael@0: ucm_sortTable(data->ucm->ext); michael@0: if(data->ucm->ext->mappingsLength>0) { michael@0: /* prepare the extension table, if there is one */ michael@0: data->extData=CnvExtOpen(data->ucm); michael@0: if(data->extData==NULL) { michael@0: *pErrorCode=U_MEMORY_ALLOCATION_ERROR; michael@0: } else if( michael@0: !data->extData->addTable(data->extData, data->ucm->ext, &data->staticData) michael@0: ) { michael@0: *pErrorCode=U_INVALID_TABLE_FORMAT; michael@0: } michael@0: } michael@0: } michael@0: } michael@0: } else { michael@0: /* Build an extension-only .cnv file. */ michael@0: char baseFilename[500]; michael@0: char *basename; michael@0: michael@0: initConvData(&baseData); michael@0: michael@0: /* assemble a path/filename for data->ucm->baseName */ michael@0: uprv_strcpy(baseFilename, converterName); michael@0: basename=(char *)findBasename(baseFilename); michael@0: uprv_strcpy(basename, data->ucm->baseName); michael@0: uprv_strcat(basename, ".ucm"); michael@0: michael@0: /* read the base table */ michael@0: dataIsBase=readFile(&baseData, baseFilename, pErrorCode); michael@0: if(U_FAILURE(*pErrorCode)) { michael@0: return; michael@0: } else if(!dataIsBase) { michael@0: fprintf(stderr, "error: the file \"%s\" is not a base table file\n", baseFilename); michael@0: *pErrorCode=U_INVALID_TABLE_FORMAT; michael@0: } else { michael@0: /* prepare the extension table */ michael@0: data->extData=CnvExtOpen(data->ucm); michael@0: if(data->extData==NULL) { michael@0: *pErrorCode=U_MEMORY_ALLOCATION_ERROR; michael@0: } else { michael@0: /* fill in gaps in extension file header fields */ michael@0: UCMapping *m, *mLimit; michael@0: uint8_t fallbackFlags; michael@0: michael@0: baseStates=&baseData.ucm->states; michael@0: if(states->conversionType==UCNV_DBCS) { michael@0: staticData->minBytesPerChar=(int8_t)(states->minCharLength=2); michael@0: } else if(states->minCharLength==0) { michael@0: staticData->minBytesPerChar=(int8_t)(states->minCharLength=baseStates->minCharLength); michael@0: } michael@0: if(states->maxCharLengthminCharLength) { michael@0: staticData->maxBytesPerChar=(int8_t)(states->maxCharLength=baseStates->maxCharLength); michael@0: } michael@0: michael@0: if(staticData->subCharLen==0) { michael@0: uprv_memcpy(staticData->subChar, baseData.staticData.subChar, 4); michael@0: staticData->subCharLen=baseData.staticData.subCharLen; michael@0: } michael@0: /* michael@0: * do not copy subChar1 - michael@0: * only use what is explicitly specified michael@0: * because it cannot be unset in the extension file header michael@0: */ michael@0: michael@0: /* get the fallback flags */ michael@0: fallbackFlags=0; michael@0: for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength; michael@0: mf==1) { michael@0: fallbackFlags|=1; michael@0: } else if(m->f==3) { michael@0: fallbackFlags|=2; michael@0: } michael@0: } michael@0: michael@0: if(fallbackFlags&1) { michael@0: staticData->hasFromUnicodeFallback=TRUE; michael@0: } michael@0: if(fallbackFlags&2) { michael@0: staticData->hasToUnicodeFallback=TRUE; michael@0: } michael@0: michael@0: if(1!=ucm_countChars(baseStates, staticData->subChar, staticData->subCharLen)) { michael@0: fprintf(stderr, " the substitution character byte sequence is illegal in this codepage structure!\n"); michael@0: *pErrorCode=U_INVALID_TABLE_FORMAT; michael@0: michael@0: } else if(staticData->subChar1!=0 && 1!=ucm_countChars(baseStates, &staticData->subChar1, 1)) { michael@0: fprintf(stderr, " the subchar1 byte is illegal in this codepage structure!\n"); michael@0: *pErrorCode=U_INVALID_TABLE_FORMAT; michael@0: michael@0: } else if( michael@0: !ucm_checkValidity(data->ucm->ext, baseStates) || michael@0: !ucm_checkBaseExt(baseStates, baseData.ucm->base, data->ucm->ext, data->ucm->ext, FALSE) michael@0: ) { michael@0: *pErrorCode=U_INVALID_TABLE_FORMAT; michael@0: } else { michael@0: if(states->maxCharLength>1) { michael@0: /* michael@0: * When building a normal .cnv file with a base table michael@0: * for an MBCS (not SBCS) table with explicit precision flags, michael@0: * the MBCSAddTable() function marks some mappings for moving michael@0: * to the extension table. michael@0: * They fit into the base toUnicode table but not into the michael@0: * base fromUnicode table. michael@0: * (Note: We do have explicit precision flags because they are michael@0: * required for extension table generation, and michael@0: * ucm_checkBaseExt() verified it.) michael@0: * michael@0: * We do not call MBCSAddTable() here (we probably could) michael@0: * so we need to do the analysis before building the extension table. michael@0: * We assume that MBCSAddTable() will build a UTF-8-friendly table. michael@0: * Redundant mappings in the extension table are ok except they cost some size. michael@0: * michael@0: * Do this after ucm_checkBaseExt(). michael@0: */ michael@0: const MBCSData *mbcsData=MBCSGetDummy(); michael@0: int32_t needsMove=0; michael@0: for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength; michael@0: mb.bytes, m->bLen, m->u, m->f)) { michael@0: m->f|=MBCS_FROM_U_EXT_FLAG; michael@0: m->moveFlag=UCM_MOVE_TO_EXT; michael@0: ++needsMove; michael@0: } michael@0: } michael@0: michael@0: if(needsMove!=0) { michael@0: ucm_moveMappings(baseData.ucm->base, data->ucm->ext); michael@0: ucm_sortTable(data->ucm->ext); michael@0: } michael@0: } michael@0: if(!data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)) { michael@0: *pErrorCode=U_INVALID_TABLE_FORMAT; michael@0: } michael@0: } michael@0: } michael@0: } michael@0: michael@0: cleanupConvData(&baseData); michael@0: } michael@0: } michael@0: michael@0: /* michael@0: * Hey, Emacs, please set the following: michael@0: * michael@0: * Local Variables: michael@0: * indent-tabs-mode: nil michael@0: * End: michael@0: * michael@0: */