Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
1 /*
2 ********************************************************************************
3 *
4 * Copyright (C) 1998-2012, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 ********************************************************************************
8 *
9 *
10 * makeconv.c:
11 * tool creating a binary (compressed) representation of the conversion mapping
12 * table (IBM NLTC ucmap format).
13 *
14 * 05/04/2000 helena Added fallback mapping into the picture...
15 * 06/29/2000 helena Major rewrite of the callback APIs.
16 */
18 #include <stdio.h>
19 #include "unicode/putil.h"
20 #include "unicode/ucnv_err.h"
21 #include "ucnv_bld.h"
22 #include "ucnv_imp.h"
23 #include "ucnv_cnv.h"
24 #include "cstring.h"
25 #include "cmemory.h"
26 #include "uinvchar.h"
27 #include "filestrm.h"
28 #include "toolutil.h"
29 #include "uoptions.h"
30 #include "unicode/udata.h"
31 #include "unewdata.h"
32 #include "uparse.h"
33 #include "ucm.h"
34 #include "makeconv.h"
35 #include "genmbcs.h"
37 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
39 #define DEBUG 0
41 typedef struct ConvData {
42 UCMFile *ucm;
43 NewConverter *cnvData, *extData;
44 UConverterSharedData sharedData;
45 UConverterStaticData staticData;
46 } ConvData;
48 static void
49 initConvData(ConvData *data) {
50 uprv_memset(data, 0, sizeof(ConvData));
51 data->sharedData.structSize=sizeof(UConverterSharedData);
52 data->staticData.structSize=sizeof(UConverterStaticData);
53 data->sharedData.staticData=&data->staticData;
54 }
56 static void
57 cleanupConvData(ConvData *data) {
58 if(data!=NULL) {
59 if(data->cnvData!=NULL) {
60 data->cnvData->close(data->cnvData);
61 data->cnvData=NULL;
62 }
63 if(data->extData!=NULL) {
64 data->extData->close(data->extData);
65 data->extData=NULL;
66 }
67 ucm_close(data->ucm);
68 data->ucm=NULL;
69 }
70 }
72 /*
73 * from ucnvstat.c - static prototypes of data-based converters
74 */
75 extern const UConverterStaticData * ucnv_converterStaticData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES];
77 /*
78 * Global - verbosity
79 */
80 UBool VERBOSE = FALSE;
81 UBool SMALL = FALSE;
82 UBool IGNORE_SISO_CHECK = FALSE;
84 static void
85 createConverter(ConvData *data, const char* converterName, UErrorCode *pErrorCode);
87 /*
88 * Set up the UNewData and write the converter..
89 */
90 static void
91 writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status);
93 UBool haveCopyright=TRUE;
95 static UDataInfo dataInfo={
96 sizeof(UDataInfo),
97 0,
99 U_IS_BIG_ENDIAN,
100 U_CHARSET_FAMILY,
101 sizeof(UChar),
102 0,
104 {0x63, 0x6e, 0x76, 0x74}, /* dataFormat="cnvt" */
105 {6, 2, 0, 0}, /* formatVersion */
106 {0, 0, 0, 0} /* dataVersion (calculated at runtime) */
107 };
109 static void
110 writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status)
111 {
112 UNewDataMemory *mem = NULL;
113 uint32_t sz2;
114 uint32_t size = 0;
115 int32_t tableType;
117 if(U_FAILURE(*status))
118 {
119 return;
120 }
122 tableType=TABLE_NONE;
123 if(data->cnvData!=NULL) {
124 tableType|=TABLE_BASE;
125 }
126 if(data->extData!=NULL) {
127 tableType|=TABLE_EXT;
128 }
130 mem = udata_create(cnvDir, "cnv", cnvName, &dataInfo, haveCopyright ? U_COPYRIGHT_STRING : NULL, status);
132 if(U_FAILURE(*status))
133 {
134 fprintf(stderr, "Couldn't create the udata %s.%s: %s\n",
135 cnvName,
136 "cnv",
137 u_errorName(*status));
138 return;
139 }
141 if(VERBOSE)
142 {
143 printf("- Opened udata %s.%s\n", cnvName, "cnv");
144 }
147 /* all read only, clean, platform independent data. Mmmm. :) */
148 udata_writeBlock(mem, &data->staticData, sizeof(UConverterStaticData));
149 size += sizeof(UConverterStaticData); /* Is 4-aligned - by size */
150 /* Now, write the table */
151 if(tableType&TABLE_BASE) {
152 size += data->cnvData->write(data->cnvData, &data->staticData, mem, tableType);
153 }
154 if(tableType&TABLE_EXT) {
155 size += data->extData->write(data->extData, &data->staticData, mem, tableType);
156 }
158 sz2 = udata_finish(mem, status);
159 if(size != sz2)
160 {
161 fprintf(stderr, "error: wrote %u bytes to the .cnv file but counted %u bytes\n", (int)sz2, (int)size);
162 *status=U_INTERNAL_PROGRAM_ERROR;
163 }
164 if(VERBOSE)
165 {
166 printf("- Wrote %u bytes to the udata.\n", (int)sz2);
167 }
168 }
170 enum {
171 OPT_HELP_H,
172 OPT_HELP_QUESTION_MARK,
173 OPT_COPYRIGHT,
174 OPT_VERSION,
175 OPT_DESTDIR,
176 OPT_VERBOSE,
177 OPT_SMALL,
178 OPT_IGNORE_SISO_CHECK,
179 OPT_COUNT
180 };
182 static UOption options[]={
183 UOPTION_HELP_H,
184 UOPTION_HELP_QUESTION_MARK,
185 UOPTION_COPYRIGHT,
186 UOPTION_VERSION,
187 UOPTION_DESTDIR,
188 UOPTION_VERBOSE,
189 { "small", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 },
190 { "ignore-siso-check", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 }
191 };
193 int main(int argc, char* argv[])
194 {
195 ConvData data;
196 UErrorCode err = U_ZERO_ERROR, localError;
197 char outFileName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
198 const char* destdir, *arg;
199 size_t destdirlen;
200 char* dot = NULL, *outBasename;
201 char cnvName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
202 char cnvNameWithPkg[UCNV_MAX_FULL_FILE_NAME_LENGTH];
203 UVersionInfo icuVersion;
204 UBool printFilename;
206 err = U_ZERO_ERROR;
208 U_MAIN_INIT_ARGS(argc, argv);
210 /* Set up the ICU version number */
211 u_getVersion(icuVersion);
212 uprv_memcpy(&dataInfo.dataVersion, &icuVersion, sizeof(UVersionInfo));
214 /* preset then read command line options */
215 options[OPT_DESTDIR].value=u_getDataDirectory();
216 argc=u_parseArgs(argc, argv, LENGTHOF(options), options);
218 /* error handling, printing usage message */
219 if(argc<0) {
220 fprintf(stderr,
221 "error in command line argument \"%s\"\n",
222 argv[-argc]);
223 } else if(argc<2) {
224 argc=-1;
225 }
226 if(argc<0 || options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur) {
227 FILE *stdfile=argc<0 ? stderr : stdout;
228 fprintf(stdfile,
229 "usage: %s [-options] files...\n"
230 "\tread .ucm codepage mapping files and write .cnv files\n"
231 "options:\n"
232 "\t-h or -? or --help this usage text\n"
233 "\t-V or --version show a version message\n"
234 "\t-c or --copyright include a copyright notice\n"
235 "\t-d or --destdir destination directory, followed by the path\n"
236 "\t-v or --verbose Turn on verbose output\n",
237 argv[0]);
238 fprintf(stdfile,
239 "\t --small Generate smaller .cnv files. They will be\n"
240 "\t significantly smaller but may not be compatible with\n"
241 "\t older versions of ICU and will require heap memory\n"
242 "\t allocation when loaded.\n"
243 "\t --ignore-siso-check Use SI/SO other than 0xf/0xe.\n");
244 return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
245 }
247 if(options[OPT_VERSION].doesOccur) {
248 printf("makeconv version %u.%u, ICU tool to read .ucm codepage mapping files and write .cnv files\n",
249 dataInfo.formatVersion[0], dataInfo.formatVersion[1]);
250 printf("%s\n", U_COPYRIGHT_STRING);
251 exit(0);
252 }
254 /* get the options values */
255 haveCopyright = options[OPT_COPYRIGHT].doesOccur;
256 destdir = options[OPT_DESTDIR].value;
257 VERBOSE = options[OPT_VERBOSE].doesOccur;
258 SMALL = options[OPT_SMALL].doesOccur;
260 if (options[OPT_IGNORE_SISO_CHECK].doesOccur) {
261 IGNORE_SISO_CHECK = TRUE;
262 }
264 if (destdir != NULL && *destdir != 0) {
265 uprv_strcpy(outFileName, destdir);
266 destdirlen = uprv_strlen(destdir);
267 outBasename = outFileName + destdirlen;
268 if (*(outBasename - 1) != U_FILE_SEP_CHAR) {
269 *outBasename++ = U_FILE_SEP_CHAR;
270 ++destdirlen;
271 }
272 } else {
273 destdirlen = 0;
274 outBasename = outFileName;
275 }
277 #if DEBUG
278 {
279 int i;
280 printf("makeconv: processing %d files...\n", argc - 1);
281 for(i=1; i<argc; ++i) {
282 printf("%s ", argv[i]);
283 }
284 printf("\n");
285 fflush(stdout);
286 }
287 #endif
289 err = U_ZERO_ERROR;
290 printFilename = (UBool) (argc > 2 || VERBOSE);
291 for (++argv; --argc; ++argv)
292 {
293 arg = getLongPathname(*argv);
295 /* Check for potential buffer overflow */
296 if(strlen(arg) >= UCNV_MAX_FULL_FILE_NAME_LENGTH)
297 {
298 fprintf(stderr, "%s\n", u_errorName(U_BUFFER_OVERFLOW_ERROR));
299 return U_BUFFER_OVERFLOW_ERROR;
300 }
302 /*produces the right destination path for display*/
303 if (destdirlen != 0)
304 {
305 const char *basename;
307 /* find the last file sepator */
308 basename = findBasename(arg);
309 uprv_strcpy(outBasename, basename);
310 }
311 else
312 {
313 uprv_strcpy(outFileName, arg);
314 }
316 /*removes the extension if any is found*/
317 dot = uprv_strrchr(outBasename, '.');
318 if (dot)
319 {
320 *dot = '\0';
321 }
323 /* the basename without extension is the converter name */
324 uprv_strcpy(cnvName, outBasename);
326 /*Adds the target extension*/
327 uprv_strcat(outBasename, CONVERTER_FILE_EXTENSION);
329 #if DEBUG
330 printf("makeconv: processing %s ...\n", arg);
331 fflush(stdout);
332 #endif
333 localError = U_ZERO_ERROR;
334 initConvData(&data);
335 createConverter(&data, arg, &localError);
337 if (U_FAILURE(localError))
338 {
339 /* if an error is found, print out an error msg and keep going */
340 fprintf(stderr, "Error creating converter for \"%s\" file for \"%s\" (%s)\n", outFileName, arg,
341 u_errorName(localError));
342 if(U_SUCCESS(err)) {
343 err = localError;
344 }
345 }
346 else
347 {
348 /* Insure the static data name matches the file name */
349 /* Changed to ignore directory and only compare base name
350 LDH 1/2/08*/
351 char *p;
352 p = strrchr(cnvName, U_FILE_SEP_CHAR); /* Find last file separator */
354 if(p == NULL) /* OK, try alternate */
355 {
356 p = strrchr(cnvName, U_FILE_ALT_SEP_CHAR);
357 if(p == NULL)
358 {
359 p=cnvName; /* If no separators, no problem */
360 }
361 }
362 else
363 {
364 p++; /* If found separtor, don't include it in compare */
365 }
366 if(uprv_stricmp(p,data.staticData.name))
367 {
368 fprintf(stderr, "Warning: %s%s claims to be '%s'\n",
369 cnvName, CONVERTER_FILE_EXTENSION,
370 data.staticData.name);
371 }
373 uprv_strcpy((char*)data.staticData.name, cnvName);
375 if(!uprv_isInvariantString((char*)data.staticData.name, -1)) {
376 fprintf(stderr,
377 "Error: A converter name must contain only invariant characters.\n"
378 "%s is not a valid converter name.\n",
379 data.staticData.name);
380 if(U_SUCCESS(err)) {
381 err = U_INVALID_TABLE_FORMAT;
382 }
383 }
385 uprv_strcpy(cnvNameWithPkg, cnvName);
387 localError = U_ZERO_ERROR;
388 writeConverterData(&data, cnvNameWithPkg, destdir, &localError);
390 if(U_FAILURE(localError))
391 {
392 /* if an error is found, print out an error msg and keep going*/
393 fprintf(stderr, "Error writing \"%s\" file for \"%s\" (%s)\n", outFileName, arg,
394 u_errorName(localError));
395 if(U_SUCCESS(err)) {
396 err = localError;
397 }
398 }
399 else if (printFilename)
400 {
401 puts(outBasename);
402 }
403 }
404 fflush(stdout);
405 fflush(stderr);
407 cleanupConvData(&data);
408 }
410 return err;
411 }
413 static void
414 getPlatformAndCCSIDFromName(const char *name, int8_t *pPlatform, int32_t *pCCSID) {
415 if( (name[0]=='i' || name[0]=='I') &&
416 (name[1]=='b' || name[1]=='B') &&
417 (name[2]=='m' || name[2]=='M')
418 ) {
419 name+=3;
420 if(*name=='-') {
421 ++name;
422 }
423 *pPlatform=UCNV_IBM;
424 *pCCSID=(int32_t)uprv_strtoul(name, NULL, 10);
425 } else {
426 *pPlatform=UCNV_UNKNOWN;
427 *pCCSID=0;
428 }
429 }
431 static void
432 readHeader(ConvData *data,
433 FileStream* convFile,
434 const char* converterName,
435 UErrorCode *pErrorCode) {
436 char line[1024];
437 char *s, *key, *value;
438 const UConverterStaticData *prototype;
439 UConverterStaticData *staticData;
441 if(U_FAILURE(*pErrorCode)) {
442 return;
443 }
445 staticData=&data->staticData;
446 staticData->platform=UCNV_IBM;
447 staticData->subCharLen=0;
449 while(T_FileStream_readLine(convFile, line, sizeof(line))) {
450 /* basic parsing and handling of state-related items */
451 if(ucm_parseHeaderLine(data->ucm, line, &key, &value)) {
452 continue;
453 }
455 /* stop at the beginning of the mapping section */
456 if(uprv_strcmp(line, "CHARMAP")==0) {
457 break;
458 }
460 /* collect the information from the header field, ignore unknown keys */
461 if(uprv_strcmp(key, "code_set_name")==0) {
462 if(*value!=0) {
463 uprv_strcpy((char *)staticData->name, value);
464 getPlatformAndCCSIDFromName(value, &staticData->platform, &staticData->codepage);
465 }
466 } else if(uprv_strcmp(key, "subchar")==0) {
467 uint8_t bytes[UCNV_EXT_MAX_BYTES];
468 int8_t length;
470 s=value;
471 length=ucm_parseBytes(bytes, line, (const char **)&s);
472 if(1<=length && length<=4 && *s==0) {
473 staticData->subCharLen=length;
474 uprv_memcpy(staticData->subChar, bytes, length);
475 } else {
476 fprintf(stderr, "error: illegal <subchar> %s\n", value);
477 *pErrorCode=U_INVALID_TABLE_FORMAT;
478 return;
479 }
480 } else if(uprv_strcmp(key, "subchar1")==0) {
481 uint8_t bytes[UCNV_EXT_MAX_BYTES];
483 s=value;
484 if(1==ucm_parseBytes(bytes, line, (const char **)&s) && *s==0) {
485 staticData->subChar1=bytes[0];
486 } else {
487 fprintf(stderr, "error: illegal <subchar1> %s\n", value);
488 *pErrorCode=U_INVALID_TABLE_FORMAT;
489 return;
490 }
491 }
492 }
494 /* copy values from the UCMFile to the static data */
495 staticData->maxBytesPerChar=(int8_t)data->ucm->states.maxCharLength;
496 staticData->minBytesPerChar=(int8_t)data->ucm->states.minCharLength;
497 staticData->conversionType=data->ucm->states.conversionType;
499 if(staticData->conversionType==UCNV_UNSUPPORTED_CONVERTER) {
500 fprintf(stderr, "ucm error: missing conversion type (<uconv_class>)\n");
501 *pErrorCode=U_INVALID_TABLE_FORMAT;
502 return;
503 }
505 /*
506 * Now that we know the type, copy any 'default' values from the table.
507 * We need not check the type any further because the parser only
508 * recognizes what we have prototypes for.
509 *
510 * For delta (extension-only) tables, copy values from the base file
511 * instead, see createConverter().
512 */
513 if(data->ucm->baseName[0]==0) {
514 prototype=ucnv_converterStaticData[staticData->conversionType];
515 if(prototype!=NULL) {
516 if(staticData->name[0]==0) {
517 uprv_strcpy((char *)staticData->name, prototype->name);
518 }
520 if(staticData->codepage==0) {
521 staticData->codepage=prototype->codepage;
522 }
524 if(staticData->platform==0) {
525 staticData->platform=prototype->platform;
526 }
528 if(staticData->minBytesPerChar==0) {
529 staticData->minBytesPerChar=prototype->minBytesPerChar;
530 }
532 if(staticData->maxBytesPerChar==0) {
533 staticData->maxBytesPerChar=prototype->maxBytesPerChar;
534 }
536 if(staticData->subCharLen==0) {
537 staticData->subCharLen=prototype->subCharLen;
538 if(prototype->subCharLen>0) {
539 uprv_memcpy(staticData->subChar, prototype->subChar, prototype->subCharLen);
540 }
541 }
542 }
543 }
545 if(data->ucm->states.outputType<0) {
546 data->ucm->states.outputType=(int8_t)data->ucm->states.maxCharLength-1;
547 }
549 if( staticData->subChar1!=0 &&
550 (staticData->minBytesPerChar>1 ||
551 (staticData->conversionType!=UCNV_MBCS &&
552 staticData->conversionType!=UCNV_EBCDIC_STATEFUL))
553 ) {
554 fprintf(stderr, "error: <subchar1> defined for a type other than MBCS or EBCDIC_STATEFUL\n");
555 *pErrorCode=U_INVALID_TABLE_FORMAT;
556 }
557 }
559 /* return TRUE if a base table was read, FALSE for an extension table */
560 static UBool
561 readFile(ConvData *data, const char* converterName,
562 UErrorCode *pErrorCode) {
563 char line[1024];
564 char *end;
565 FileStream *convFile;
567 UCMStates *baseStates;
568 UBool dataIsBase;
570 if(U_FAILURE(*pErrorCode)) {
571 return FALSE;
572 }
574 data->ucm=ucm_open();
576 convFile=T_FileStream_open(converterName, "r");
577 if(convFile==NULL) {
578 *pErrorCode=U_FILE_ACCESS_ERROR;
579 return FALSE;
580 }
582 readHeader(data, convFile, converterName, pErrorCode);
583 if(U_FAILURE(*pErrorCode)) {
584 return FALSE;
585 }
587 if(data->ucm->baseName[0]==0) {
588 dataIsBase=TRUE;
589 baseStates=&data->ucm->states;
590 ucm_processStates(baseStates, IGNORE_SISO_CHECK);
591 } else {
592 dataIsBase=FALSE;
593 baseStates=NULL;
594 }
596 /* read the base table */
597 ucm_readTable(data->ucm, convFile, dataIsBase, baseStates, pErrorCode);
598 if(U_FAILURE(*pErrorCode)) {
599 return FALSE;
600 }
602 /* read an extension table if there is one */
603 while(T_FileStream_readLine(convFile, line, sizeof(line))) {
604 end=uprv_strchr(line, 0);
605 while(line<end &&
606 (*(end-1)=='\n' || *(end-1)=='\r' || *(end-1)==' ' || *(end-1)=='\t')) {
607 --end;
608 }
609 *end=0;
611 if(line[0]=='#' || u_skipWhitespace(line)==end) {
612 continue; /* ignore empty and comment lines */
613 }
615 if(0==uprv_strcmp(line, "CHARMAP")) {
616 /* read the extension table */
617 ucm_readTable(data->ucm, convFile, FALSE, baseStates, pErrorCode);
618 } else {
619 fprintf(stderr, "unexpected text after the base mapping table\n");
620 }
621 break;
622 }
624 T_FileStream_close(convFile);
626 if(data->ucm->base->flagsType==UCM_FLAGS_MIXED || data->ucm->ext->flagsType==UCM_FLAGS_MIXED) {
627 fprintf(stderr, "error: some entries have the mapping precision (with '|'), some do not\n");
628 *pErrorCode=U_INVALID_TABLE_FORMAT;
629 }
631 return dataIsBase;
632 }
634 static void
635 createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCode) {
636 ConvData baseData;
637 UBool dataIsBase;
639 UConverterStaticData *staticData;
640 UCMStates *states, *baseStates;
642 if(U_FAILURE(*pErrorCode)) {
643 return;
644 }
646 initConvData(data);
648 dataIsBase=readFile(data, converterName, pErrorCode);
649 if(U_FAILURE(*pErrorCode)) {
650 return;
651 }
653 staticData=&data->staticData;
654 states=&data->ucm->states;
656 if(dataIsBase) {
657 /*
658 * Build a normal .cnv file with a base table
659 * and an optional extension table.
660 */
661 data->cnvData=MBCSOpen(data->ucm);
662 if(data->cnvData==NULL) {
663 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
665 } else if(!data->cnvData->isValid(data->cnvData,
666 staticData->subChar, staticData->subCharLen)
667 ) {
668 fprintf(stderr, " the substitution character byte sequence is illegal in this codepage structure!\n");
669 *pErrorCode=U_INVALID_TABLE_FORMAT;
671 } else if(staticData->subChar1!=0 &&
672 !data->cnvData->isValid(data->cnvData, &staticData->subChar1, 1)
673 ) {
674 fprintf(stderr, " the subchar1 byte is illegal in this codepage structure!\n");
675 *pErrorCode=U_INVALID_TABLE_FORMAT;
677 } else if(
678 data->ucm->ext->mappingsLength>0 &&
679 !ucm_checkBaseExt(states, data->ucm->base, data->ucm->ext, data->ucm->ext, FALSE)
680 ) {
681 *pErrorCode=U_INVALID_TABLE_FORMAT;
682 } else if(data->ucm->base->flagsType&UCM_FLAGS_EXPLICIT) {
683 /* sort the table so that it can be turned into UTF-8-friendly data */
684 ucm_sortTable(data->ucm->base);
685 }
687 if(U_SUCCESS(*pErrorCode)) {
688 if(
689 /* add the base table after ucm_checkBaseExt()! */
690 !data->cnvData->addTable(data->cnvData, data->ucm->base, &data->staticData)
691 ) {
692 *pErrorCode=U_INVALID_TABLE_FORMAT;
693 } else {
694 /*
695 * addTable() may have requested moving more mappings to the extension table
696 * if they fit into the base toUnicode table but not into the
697 * base fromUnicode table.
698 * (Especially for UTF-8-friendly fromUnicode tables.)
699 * Such mappings will have the MBCS_FROM_U_EXT_FLAG set, which causes them
700 * to be excluded from the extension toUnicode data.
701 * See MBCSOkForBaseFromUnicode() for which mappings do not fit into
702 * the base fromUnicode table.
703 */
704 ucm_moveMappings(data->ucm->base, data->ucm->ext);
705 ucm_sortTable(data->ucm->ext);
706 if(data->ucm->ext->mappingsLength>0) {
707 /* prepare the extension table, if there is one */
708 data->extData=CnvExtOpen(data->ucm);
709 if(data->extData==NULL) {
710 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
711 } else if(
712 !data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)
713 ) {
714 *pErrorCode=U_INVALID_TABLE_FORMAT;
715 }
716 }
717 }
718 }
719 } else {
720 /* Build an extension-only .cnv file. */
721 char baseFilename[500];
722 char *basename;
724 initConvData(&baseData);
726 /* assemble a path/filename for data->ucm->baseName */
727 uprv_strcpy(baseFilename, converterName);
728 basename=(char *)findBasename(baseFilename);
729 uprv_strcpy(basename, data->ucm->baseName);
730 uprv_strcat(basename, ".ucm");
732 /* read the base table */
733 dataIsBase=readFile(&baseData, baseFilename, pErrorCode);
734 if(U_FAILURE(*pErrorCode)) {
735 return;
736 } else if(!dataIsBase) {
737 fprintf(stderr, "error: the <icu:base> file \"%s\" is not a base table file\n", baseFilename);
738 *pErrorCode=U_INVALID_TABLE_FORMAT;
739 } else {
740 /* prepare the extension table */
741 data->extData=CnvExtOpen(data->ucm);
742 if(data->extData==NULL) {
743 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
744 } else {
745 /* fill in gaps in extension file header fields */
746 UCMapping *m, *mLimit;
747 uint8_t fallbackFlags;
749 baseStates=&baseData.ucm->states;
750 if(states->conversionType==UCNV_DBCS) {
751 staticData->minBytesPerChar=(int8_t)(states->minCharLength=2);
752 } else if(states->minCharLength==0) {
753 staticData->minBytesPerChar=(int8_t)(states->minCharLength=baseStates->minCharLength);
754 }
755 if(states->maxCharLength<states->minCharLength) {
756 staticData->maxBytesPerChar=(int8_t)(states->maxCharLength=baseStates->maxCharLength);
757 }
759 if(staticData->subCharLen==0) {
760 uprv_memcpy(staticData->subChar, baseData.staticData.subChar, 4);
761 staticData->subCharLen=baseData.staticData.subCharLen;
762 }
763 /*
764 * do not copy subChar1 -
765 * only use what is explicitly specified
766 * because it cannot be unset in the extension file header
767 */
769 /* get the fallback flags */
770 fallbackFlags=0;
771 for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
772 m<mLimit && fallbackFlags!=3;
773 ++m
774 ) {
775 if(m->f==1) {
776 fallbackFlags|=1;
777 } else if(m->f==3) {
778 fallbackFlags|=2;
779 }
780 }
782 if(fallbackFlags&1) {
783 staticData->hasFromUnicodeFallback=TRUE;
784 }
785 if(fallbackFlags&2) {
786 staticData->hasToUnicodeFallback=TRUE;
787 }
789 if(1!=ucm_countChars(baseStates, staticData->subChar, staticData->subCharLen)) {
790 fprintf(stderr, " the substitution character byte sequence is illegal in this codepage structure!\n");
791 *pErrorCode=U_INVALID_TABLE_FORMAT;
793 } else if(staticData->subChar1!=0 && 1!=ucm_countChars(baseStates, &staticData->subChar1, 1)) {
794 fprintf(stderr, " the subchar1 byte is illegal in this codepage structure!\n");
795 *pErrorCode=U_INVALID_TABLE_FORMAT;
797 } else if(
798 !ucm_checkValidity(data->ucm->ext, baseStates) ||
799 !ucm_checkBaseExt(baseStates, baseData.ucm->base, data->ucm->ext, data->ucm->ext, FALSE)
800 ) {
801 *pErrorCode=U_INVALID_TABLE_FORMAT;
802 } else {
803 if(states->maxCharLength>1) {
804 /*
805 * When building a normal .cnv file with a base table
806 * for an MBCS (not SBCS) table with explicit precision flags,
807 * the MBCSAddTable() function marks some mappings for moving
808 * to the extension table.
809 * They fit into the base toUnicode table but not into the
810 * base fromUnicode table.
811 * (Note: We do have explicit precision flags because they are
812 * required for extension table generation, and
813 * ucm_checkBaseExt() verified it.)
814 *
815 * We do not call MBCSAddTable() here (we probably could)
816 * so we need to do the analysis before building the extension table.
817 * We assume that MBCSAddTable() will build a UTF-8-friendly table.
818 * Redundant mappings in the extension table are ok except they cost some size.
819 *
820 * Do this after ucm_checkBaseExt().
821 */
822 const MBCSData *mbcsData=MBCSGetDummy();
823 int32_t needsMove=0;
824 for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
825 m<mLimit;
826 ++m
827 ) {
828 if(!MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m->bLen, m->u, m->f)) {
829 m->f|=MBCS_FROM_U_EXT_FLAG;
830 m->moveFlag=UCM_MOVE_TO_EXT;
831 ++needsMove;
832 }
833 }
835 if(needsMove!=0) {
836 ucm_moveMappings(baseData.ucm->base, data->ucm->ext);
837 ucm_sortTable(data->ucm->ext);
838 }
839 }
840 if(!data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)) {
841 *pErrorCode=U_INVALID_TABLE_FORMAT;
842 }
843 }
844 }
845 }
847 cleanupConvData(&baseData);
848 }
849 }
851 /*
852 * Hey, Emacs, please set the following:
853 *
854 * Local Variables:
855 * indent-tabs-mode: nil
856 * End:
857 *
858 */