|
1 /* |
|
2 ******************************************************************************** |
|
3 * |
|
4 * Copyright (C) 1998-2012, International Business Machines |
|
5 * Corporation and others. All Rights Reserved. |
|
6 * |
|
7 ******************************************************************************** |
|
8 * |
|
9 * |
|
10 * makeconv.c: |
|
11 * tool creating a binary (compressed) representation of the conversion mapping |
|
12 * table (IBM NLTC ucmap format). |
|
13 * |
|
14 * 05/04/2000 helena Added fallback mapping into the picture... |
|
15 * 06/29/2000 helena Major rewrite of the callback APIs. |
|
16 */ |
|
17 |
|
18 #include <stdio.h> |
|
19 #include "unicode/putil.h" |
|
20 #include "unicode/ucnv_err.h" |
|
21 #include "ucnv_bld.h" |
|
22 #include "ucnv_imp.h" |
|
23 #include "ucnv_cnv.h" |
|
24 #include "cstring.h" |
|
25 #include "cmemory.h" |
|
26 #include "uinvchar.h" |
|
27 #include "filestrm.h" |
|
28 #include "toolutil.h" |
|
29 #include "uoptions.h" |
|
30 #include "unicode/udata.h" |
|
31 #include "unewdata.h" |
|
32 #include "uparse.h" |
|
33 #include "ucm.h" |
|
34 #include "makeconv.h" |
|
35 #include "genmbcs.h" |
|
36 |
|
37 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) |
|
38 |
|
39 #define DEBUG 0 |
|
40 |
|
41 typedef struct ConvData { |
|
42 UCMFile *ucm; |
|
43 NewConverter *cnvData, *extData; |
|
44 UConverterSharedData sharedData; |
|
45 UConverterStaticData staticData; |
|
46 } ConvData; |
|
47 |
|
48 static void |
|
49 initConvData(ConvData *data) { |
|
50 uprv_memset(data, 0, sizeof(ConvData)); |
|
51 data->sharedData.structSize=sizeof(UConverterSharedData); |
|
52 data->staticData.structSize=sizeof(UConverterStaticData); |
|
53 data->sharedData.staticData=&data->staticData; |
|
54 } |
|
55 |
|
56 static void |
|
57 cleanupConvData(ConvData *data) { |
|
58 if(data!=NULL) { |
|
59 if(data->cnvData!=NULL) { |
|
60 data->cnvData->close(data->cnvData); |
|
61 data->cnvData=NULL; |
|
62 } |
|
63 if(data->extData!=NULL) { |
|
64 data->extData->close(data->extData); |
|
65 data->extData=NULL; |
|
66 } |
|
67 ucm_close(data->ucm); |
|
68 data->ucm=NULL; |
|
69 } |
|
70 } |
|
71 |
|
72 /* |
|
73 * from ucnvstat.c - static prototypes of data-based converters |
|
74 */ |
|
75 extern const UConverterStaticData * ucnv_converterStaticData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES]; |
|
76 |
|
77 /* |
|
78 * Global - verbosity |
|
79 */ |
|
80 UBool VERBOSE = FALSE; |
|
81 UBool SMALL = FALSE; |
|
82 UBool IGNORE_SISO_CHECK = FALSE; |
|
83 |
|
84 static void |
|
85 createConverter(ConvData *data, const char* converterName, UErrorCode *pErrorCode); |
|
86 |
|
87 /* |
|
88 * Set up the UNewData and write the converter.. |
|
89 */ |
|
90 static void |
|
91 writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status); |
|
92 |
|
93 UBool haveCopyright=TRUE; |
|
94 |
|
95 static UDataInfo dataInfo={ |
|
96 sizeof(UDataInfo), |
|
97 0, |
|
98 |
|
99 U_IS_BIG_ENDIAN, |
|
100 U_CHARSET_FAMILY, |
|
101 sizeof(UChar), |
|
102 0, |
|
103 |
|
104 {0x63, 0x6e, 0x76, 0x74}, /* dataFormat="cnvt" */ |
|
105 {6, 2, 0, 0}, /* formatVersion */ |
|
106 {0, 0, 0, 0} /* dataVersion (calculated at runtime) */ |
|
107 }; |
|
108 |
|
109 static void |
|
110 writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status) |
|
111 { |
|
112 UNewDataMemory *mem = NULL; |
|
113 uint32_t sz2; |
|
114 uint32_t size = 0; |
|
115 int32_t tableType; |
|
116 |
|
117 if(U_FAILURE(*status)) |
|
118 { |
|
119 return; |
|
120 } |
|
121 |
|
122 tableType=TABLE_NONE; |
|
123 if(data->cnvData!=NULL) { |
|
124 tableType|=TABLE_BASE; |
|
125 } |
|
126 if(data->extData!=NULL) { |
|
127 tableType|=TABLE_EXT; |
|
128 } |
|
129 |
|
130 mem = udata_create(cnvDir, "cnv", cnvName, &dataInfo, haveCopyright ? U_COPYRIGHT_STRING : NULL, status); |
|
131 |
|
132 if(U_FAILURE(*status)) |
|
133 { |
|
134 fprintf(stderr, "Couldn't create the udata %s.%s: %s\n", |
|
135 cnvName, |
|
136 "cnv", |
|
137 u_errorName(*status)); |
|
138 return; |
|
139 } |
|
140 |
|
141 if(VERBOSE) |
|
142 { |
|
143 printf("- Opened udata %s.%s\n", cnvName, "cnv"); |
|
144 } |
|
145 |
|
146 |
|
147 /* all read only, clean, platform independent data. Mmmm. :) */ |
|
148 udata_writeBlock(mem, &data->staticData, sizeof(UConverterStaticData)); |
|
149 size += sizeof(UConverterStaticData); /* Is 4-aligned - by size */ |
|
150 /* Now, write the table */ |
|
151 if(tableType&TABLE_BASE) { |
|
152 size += data->cnvData->write(data->cnvData, &data->staticData, mem, tableType); |
|
153 } |
|
154 if(tableType&TABLE_EXT) { |
|
155 size += data->extData->write(data->extData, &data->staticData, mem, tableType); |
|
156 } |
|
157 |
|
158 sz2 = udata_finish(mem, status); |
|
159 if(size != sz2) |
|
160 { |
|
161 fprintf(stderr, "error: wrote %u bytes to the .cnv file but counted %u bytes\n", (int)sz2, (int)size); |
|
162 *status=U_INTERNAL_PROGRAM_ERROR; |
|
163 } |
|
164 if(VERBOSE) |
|
165 { |
|
166 printf("- Wrote %u bytes to the udata.\n", (int)sz2); |
|
167 } |
|
168 } |
|
169 |
|
170 enum { |
|
171 OPT_HELP_H, |
|
172 OPT_HELP_QUESTION_MARK, |
|
173 OPT_COPYRIGHT, |
|
174 OPT_VERSION, |
|
175 OPT_DESTDIR, |
|
176 OPT_VERBOSE, |
|
177 OPT_SMALL, |
|
178 OPT_IGNORE_SISO_CHECK, |
|
179 OPT_COUNT |
|
180 }; |
|
181 |
|
182 static UOption options[]={ |
|
183 UOPTION_HELP_H, |
|
184 UOPTION_HELP_QUESTION_MARK, |
|
185 UOPTION_COPYRIGHT, |
|
186 UOPTION_VERSION, |
|
187 UOPTION_DESTDIR, |
|
188 UOPTION_VERBOSE, |
|
189 { "small", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 }, |
|
190 { "ignore-siso-check", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 } |
|
191 }; |
|
192 |
|
193 int main(int argc, char* argv[]) |
|
194 { |
|
195 ConvData data; |
|
196 UErrorCode err = U_ZERO_ERROR, localError; |
|
197 char outFileName[UCNV_MAX_FULL_FILE_NAME_LENGTH]; |
|
198 const char* destdir, *arg; |
|
199 size_t destdirlen; |
|
200 char* dot = NULL, *outBasename; |
|
201 char cnvName[UCNV_MAX_FULL_FILE_NAME_LENGTH]; |
|
202 char cnvNameWithPkg[UCNV_MAX_FULL_FILE_NAME_LENGTH]; |
|
203 UVersionInfo icuVersion; |
|
204 UBool printFilename; |
|
205 |
|
206 err = U_ZERO_ERROR; |
|
207 |
|
208 U_MAIN_INIT_ARGS(argc, argv); |
|
209 |
|
210 /* Set up the ICU version number */ |
|
211 u_getVersion(icuVersion); |
|
212 uprv_memcpy(&dataInfo.dataVersion, &icuVersion, sizeof(UVersionInfo)); |
|
213 |
|
214 /* preset then read command line options */ |
|
215 options[OPT_DESTDIR].value=u_getDataDirectory(); |
|
216 argc=u_parseArgs(argc, argv, LENGTHOF(options), options); |
|
217 |
|
218 /* error handling, printing usage message */ |
|
219 if(argc<0) { |
|
220 fprintf(stderr, |
|
221 "error in command line argument \"%s\"\n", |
|
222 argv[-argc]); |
|
223 } else if(argc<2) { |
|
224 argc=-1; |
|
225 } |
|
226 if(argc<0 || options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur) { |
|
227 FILE *stdfile=argc<0 ? stderr : stdout; |
|
228 fprintf(stdfile, |
|
229 "usage: %s [-options] files...\n" |
|
230 "\tread .ucm codepage mapping files and write .cnv files\n" |
|
231 "options:\n" |
|
232 "\t-h or -? or --help this usage text\n" |
|
233 "\t-V or --version show a version message\n" |
|
234 "\t-c or --copyright include a copyright notice\n" |
|
235 "\t-d or --destdir destination directory, followed by the path\n" |
|
236 "\t-v or --verbose Turn on verbose output\n", |
|
237 argv[0]); |
|
238 fprintf(stdfile, |
|
239 "\t --small Generate smaller .cnv files. They will be\n" |
|
240 "\t significantly smaller but may not be compatible with\n" |
|
241 "\t older versions of ICU and will require heap memory\n" |
|
242 "\t allocation when loaded.\n" |
|
243 "\t --ignore-siso-check Use SI/SO other than 0xf/0xe.\n"); |
|
244 return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR; |
|
245 } |
|
246 |
|
247 if(options[OPT_VERSION].doesOccur) { |
|
248 printf("makeconv version %u.%u, ICU tool to read .ucm codepage mapping files and write .cnv files\n", |
|
249 dataInfo.formatVersion[0], dataInfo.formatVersion[1]); |
|
250 printf("%s\n", U_COPYRIGHT_STRING); |
|
251 exit(0); |
|
252 } |
|
253 |
|
254 /* get the options values */ |
|
255 haveCopyright = options[OPT_COPYRIGHT].doesOccur; |
|
256 destdir = options[OPT_DESTDIR].value; |
|
257 VERBOSE = options[OPT_VERBOSE].doesOccur; |
|
258 SMALL = options[OPT_SMALL].doesOccur; |
|
259 |
|
260 if (options[OPT_IGNORE_SISO_CHECK].doesOccur) { |
|
261 IGNORE_SISO_CHECK = TRUE; |
|
262 } |
|
263 |
|
264 if (destdir != NULL && *destdir != 0) { |
|
265 uprv_strcpy(outFileName, destdir); |
|
266 destdirlen = uprv_strlen(destdir); |
|
267 outBasename = outFileName + destdirlen; |
|
268 if (*(outBasename - 1) != U_FILE_SEP_CHAR) { |
|
269 *outBasename++ = U_FILE_SEP_CHAR; |
|
270 ++destdirlen; |
|
271 } |
|
272 } else { |
|
273 destdirlen = 0; |
|
274 outBasename = outFileName; |
|
275 } |
|
276 |
|
277 #if DEBUG |
|
278 { |
|
279 int i; |
|
280 printf("makeconv: processing %d files...\n", argc - 1); |
|
281 for(i=1; i<argc; ++i) { |
|
282 printf("%s ", argv[i]); |
|
283 } |
|
284 printf("\n"); |
|
285 fflush(stdout); |
|
286 } |
|
287 #endif |
|
288 |
|
289 err = U_ZERO_ERROR; |
|
290 printFilename = (UBool) (argc > 2 || VERBOSE); |
|
291 for (++argv; --argc; ++argv) |
|
292 { |
|
293 arg = getLongPathname(*argv); |
|
294 |
|
295 /* Check for potential buffer overflow */ |
|
296 if(strlen(arg) >= UCNV_MAX_FULL_FILE_NAME_LENGTH) |
|
297 { |
|
298 fprintf(stderr, "%s\n", u_errorName(U_BUFFER_OVERFLOW_ERROR)); |
|
299 return U_BUFFER_OVERFLOW_ERROR; |
|
300 } |
|
301 |
|
302 /*produces the right destination path for display*/ |
|
303 if (destdirlen != 0) |
|
304 { |
|
305 const char *basename; |
|
306 |
|
307 /* find the last file sepator */ |
|
308 basename = findBasename(arg); |
|
309 uprv_strcpy(outBasename, basename); |
|
310 } |
|
311 else |
|
312 { |
|
313 uprv_strcpy(outFileName, arg); |
|
314 } |
|
315 |
|
316 /*removes the extension if any is found*/ |
|
317 dot = uprv_strrchr(outBasename, '.'); |
|
318 if (dot) |
|
319 { |
|
320 *dot = '\0'; |
|
321 } |
|
322 |
|
323 /* the basename without extension is the converter name */ |
|
324 uprv_strcpy(cnvName, outBasename); |
|
325 |
|
326 /*Adds the target extension*/ |
|
327 uprv_strcat(outBasename, CONVERTER_FILE_EXTENSION); |
|
328 |
|
329 #if DEBUG |
|
330 printf("makeconv: processing %s ...\n", arg); |
|
331 fflush(stdout); |
|
332 #endif |
|
333 localError = U_ZERO_ERROR; |
|
334 initConvData(&data); |
|
335 createConverter(&data, arg, &localError); |
|
336 |
|
337 if (U_FAILURE(localError)) |
|
338 { |
|
339 /* if an error is found, print out an error msg and keep going */ |
|
340 fprintf(stderr, "Error creating converter for \"%s\" file for \"%s\" (%s)\n", outFileName, arg, |
|
341 u_errorName(localError)); |
|
342 if(U_SUCCESS(err)) { |
|
343 err = localError; |
|
344 } |
|
345 } |
|
346 else |
|
347 { |
|
348 /* Insure the static data name matches the file name */ |
|
349 /* Changed to ignore directory and only compare base name |
|
350 LDH 1/2/08*/ |
|
351 char *p; |
|
352 p = strrchr(cnvName, U_FILE_SEP_CHAR); /* Find last file separator */ |
|
353 |
|
354 if(p == NULL) /* OK, try alternate */ |
|
355 { |
|
356 p = strrchr(cnvName, U_FILE_ALT_SEP_CHAR); |
|
357 if(p == NULL) |
|
358 { |
|
359 p=cnvName; /* If no separators, no problem */ |
|
360 } |
|
361 } |
|
362 else |
|
363 { |
|
364 p++; /* If found separtor, don't include it in compare */ |
|
365 } |
|
366 if(uprv_stricmp(p,data.staticData.name)) |
|
367 { |
|
368 fprintf(stderr, "Warning: %s%s claims to be '%s'\n", |
|
369 cnvName, CONVERTER_FILE_EXTENSION, |
|
370 data.staticData.name); |
|
371 } |
|
372 |
|
373 uprv_strcpy((char*)data.staticData.name, cnvName); |
|
374 |
|
375 if(!uprv_isInvariantString((char*)data.staticData.name, -1)) { |
|
376 fprintf(stderr, |
|
377 "Error: A converter name must contain only invariant characters.\n" |
|
378 "%s is not a valid converter name.\n", |
|
379 data.staticData.name); |
|
380 if(U_SUCCESS(err)) { |
|
381 err = U_INVALID_TABLE_FORMAT; |
|
382 } |
|
383 } |
|
384 |
|
385 uprv_strcpy(cnvNameWithPkg, cnvName); |
|
386 |
|
387 localError = U_ZERO_ERROR; |
|
388 writeConverterData(&data, cnvNameWithPkg, destdir, &localError); |
|
389 |
|
390 if(U_FAILURE(localError)) |
|
391 { |
|
392 /* if an error is found, print out an error msg and keep going*/ |
|
393 fprintf(stderr, "Error writing \"%s\" file for \"%s\" (%s)\n", outFileName, arg, |
|
394 u_errorName(localError)); |
|
395 if(U_SUCCESS(err)) { |
|
396 err = localError; |
|
397 } |
|
398 } |
|
399 else if (printFilename) |
|
400 { |
|
401 puts(outBasename); |
|
402 } |
|
403 } |
|
404 fflush(stdout); |
|
405 fflush(stderr); |
|
406 |
|
407 cleanupConvData(&data); |
|
408 } |
|
409 |
|
410 return err; |
|
411 } |
|
412 |
|
413 static void |
|
414 getPlatformAndCCSIDFromName(const char *name, int8_t *pPlatform, int32_t *pCCSID) { |
|
415 if( (name[0]=='i' || name[0]=='I') && |
|
416 (name[1]=='b' || name[1]=='B') && |
|
417 (name[2]=='m' || name[2]=='M') |
|
418 ) { |
|
419 name+=3; |
|
420 if(*name=='-') { |
|
421 ++name; |
|
422 } |
|
423 *pPlatform=UCNV_IBM; |
|
424 *pCCSID=(int32_t)uprv_strtoul(name, NULL, 10); |
|
425 } else { |
|
426 *pPlatform=UCNV_UNKNOWN; |
|
427 *pCCSID=0; |
|
428 } |
|
429 } |
|
430 |
|
431 static void |
|
432 readHeader(ConvData *data, |
|
433 FileStream* convFile, |
|
434 const char* converterName, |
|
435 UErrorCode *pErrorCode) { |
|
436 char line[1024]; |
|
437 char *s, *key, *value; |
|
438 const UConverterStaticData *prototype; |
|
439 UConverterStaticData *staticData; |
|
440 |
|
441 if(U_FAILURE(*pErrorCode)) { |
|
442 return; |
|
443 } |
|
444 |
|
445 staticData=&data->staticData; |
|
446 staticData->platform=UCNV_IBM; |
|
447 staticData->subCharLen=0; |
|
448 |
|
449 while(T_FileStream_readLine(convFile, line, sizeof(line))) { |
|
450 /* basic parsing and handling of state-related items */ |
|
451 if(ucm_parseHeaderLine(data->ucm, line, &key, &value)) { |
|
452 continue; |
|
453 } |
|
454 |
|
455 /* stop at the beginning of the mapping section */ |
|
456 if(uprv_strcmp(line, "CHARMAP")==0) { |
|
457 break; |
|
458 } |
|
459 |
|
460 /* collect the information from the header field, ignore unknown keys */ |
|
461 if(uprv_strcmp(key, "code_set_name")==0) { |
|
462 if(*value!=0) { |
|
463 uprv_strcpy((char *)staticData->name, value); |
|
464 getPlatformAndCCSIDFromName(value, &staticData->platform, &staticData->codepage); |
|
465 } |
|
466 } else if(uprv_strcmp(key, "subchar")==0) { |
|
467 uint8_t bytes[UCNV_EXT_MAX_BYTES]; |
|
468 int8_t length; |
|
469 |
|
470 s=value; |
|
471 length=ucm_parseBytes(bytes, line, (const char **)&s); |
|
472 if(1<=length && length<=4 && *s==0) { |
|
473 staticData->subCharLen=length; |
|
474 uprv_memcpy(staticData->subChar, bytes, length); |
|
475 } else { |
|
476 fprintf(stderr, "error: illegal <subchar> %s\n", value); |
|
477 *pErrorCode=U_INVALID_TABLE_FORMAT; |
|
478 return; |
|
479 } |
|
480 } else if(uprv_strcmp(key, "subchar1")==0) { |
|
481 uint8_t bytes[UCNV_EXT_MAX_BYTES]; |
|
482 |
|
483 s=value; |
|
484 if(1==ucm_parseBytes(bytes, line, (const char **)&s) && *s==0) { |
|
485 staticData->subChar1=bytes[0]; |
|
486 } else { |
|
487 fprintf(stderr, "error: illegal <subchar1> %s\n", value); |
|
488 *pErrorCode=U_INVALID_TABLE_FORMAT; |
|
489 return; |
|
490 } |
|
491 } |
|
492 } |
|
493 |
|
494 /* copy values from the UCMFile to the static data */ |
|
495 staticData->maxBytesPerChar=(int8_t)data->ucm->states.maxCharLength; |
|
496 staticData->minBytesPerChar=(int8_t)data->ucm->states.minCharLength; |
|
497 staticData->conversionType=data->ucm->states.conversionType; |
|
498 |
|
499 if(staticData->conversionType==UCNV_UNSUPPORTED_CONVERTER) { |
|
500 fprintf(stderr, "ucm error: missing conversion type (<uconv_class>)\n"); |
|
501 *pErrorCode=U_INVALID_TABLE_FORMAT; |
|
502 return; |
|
503 } |
|
504 |
|
505 /* |
|
506 * Now that we know the type, copy any 'default' values from the table. |
|
507 * We need not check the type any further because the parser only |
|
508 * recognizes what we have prototypes for. |
|
509 * |
|
510 * For delta (extension-only) tables, copy values from the base file |
|
511 * instead, see createConverter(). |
|
512 */ |
|
513 if(data->ucm->baseName[0]==0) { |
|
514 prototype=ucnv_converterStaticData[staticData->conversionType]; |
|
515 if(prototype!=NULL) { |
|
516 if(staticData->name[0]==0) { |
|
517 uprv_strcpy((char *)staticData->name, prototype->name); |
|
518 } |
|
519 |
|
520 if(staticData->codepage==0) { |
|
521 staticData->codepage=prototype->codepage; |
|
522 } |
|
523 |
|
524 if(staticData->platform==0) { |
|
525 staticData->platform=prototype->platform; |
|
526 } |
|
527 |
|
528 if(staticData->minBytesPerChar==0) { |
|
529 staticData->minBytesPerChar=prototype->minBytesPerChar; |
|
530 } |
|
531 |
|
532 if(staticData->maxBytesPerChar==0) { |
|
533 staticData->maxBytesPerChar=prototype->maxBytesPerChar; |
|
534 } |
|
535 |
|
536 if(staticData->subCharLen==0) { |
|
537 staticData->subCharLen=prototype->subCharLen; |
|
538 if(prototype->subCharLen>0) { |
|
539 uprv_memcpy(staticData->subChar, prototype->subChar, prototype->subCharLen); |
|
540 } |
|
541 } |
|
542 } |
|
543 } |
|
544 |
|
545 if(data->ucm->states.outputType<0) { |
|
546 data->ucm->states.outputType=(int8_t)data->ucm->states.maxCharLength-1; |
|
547 } |
|
548 |
|
549 if( staticData->subChar1!=0 && |
|
550 (staticData->minBytesPerChar>1 || |
|
551 (staticData->conversionType!=UCNV_MBCS && |
|
552 staticData->conversionType!=UCNV_EBCDIC_STATEFUL)) |
|
553 ) { |
|
554 fprintf(stderr, "error: <subchar1> defined for a type other than MBCS or EBCDIC_STATEFUL\n"); |
|
555 *pErrorCode=U_INVALID_TABLE_FORMAT; |
|
556 } |
|
557 } |
|
558 |
|
559 /* return TRUE if a base table was read, FALSE for an extension table */ |
|
560 static UBool |
|
561 readFile(ConvData *data, const char* converterName, |
|
562 UErrorCode *pErrorCode) { |
|
563 char line[1024]; |
|
564 char *end; |
|
565 FileStream *convFile; |
|
566 |
|
567 UCMStates *baseStates; |
|
568 UBool dataIsBase; |
|
569 |
|
570 if(U_FAILURE(*pErrorCode)) { |
|
571 return FALSE; |
|
572 } |
|
573 |
|
574 data->ucm=ucm_open(); |
|
575 |
|
576 convFile=T_FileStream_open(converterName, "r"); |
|
577 if(convFile==NULL) { |
|
578 *pErrorCode=U_FILE_ACCESS_ERROR; |
|
579 return FALSE; |
|
580 } |
|
581 |
|
582 readHeader(data, convFile, converterName, pErrorCode); |
|
583 if(U_FAILURE(*pErrorCode)) { |
|
584 return FALSE; |
|
585 } |
|
586 |
|
587 if(data->ucm->baseName[0]==0) { |
|
588 dataIsBase=TRUE; |
|
589 baseStates=&data->ucm->states; |
|
590 ucm_processStates(baseStates, IGNORE_SISO_CHECK); |
|
591 } else { |
|
592 dataIsBase=FALSE; |
|
593 baseStates=NULL; |
|
594 } |
|
595 |
|
596 /* read the base table */ |
|
597 ucm_readTable(data->ucm, convFile, dataIsBase, baseStates, pErrorCode); |
|
598 if(U_FAILURE(*pErrorCode)) { |
|
599 return FALSE; |
|
600 } |
|
601 |
|
602 /* read an extension table if there is one */ |
|
603 while(T_FileStream_readLine(convFile, line, sizeof(line))) { |
|
604 end=uprv_strchr(line, 0); |
|
605 while(line<end && |
|
606 (*(end-1)=='\n' || *(end-1)=='\r' || *(end-1)==' ' || *(end-1)=='\t')) { |
|
607 --end; |
|
608 } |
|
609 *end=0; |
|
610 |
|
611 if(line[0]=='#' || u_skipWhitespace(line)==end) { |
|
612 continue; /* ignore empty and comment lines */ |
|
613 } |
|
614 |
|
615 if(0==uprv_strcmp(line, "CHARMAP")) { |
|
616 /* read the extension table */ |
|
617 ucm_readTable(data->ucm, convFile, FALSE, baseStates, pErrorCode); |
|
618 } else { |
|
619 fprintf(stderr, "unexpected text after the base mapping table\n"); |
|
620 } |
|
621 break; |
|
622 } |
|
623 |
|
624 T_FileStream_close(convFile); |
|
625 |
|
626 if(data->ucm->base->flagsType==UCM_FLAGS_MIXED || data->ucm->ext->flagsType==UCM_FLAGS_MIXED) { |
|
627 fprintf(stderr, "error: some entries have the mapping precision (with '|'), some do not\n"); |
|
628 *pErrorCode=U_INVALID_TABLE_FORMAT; |
|
629 } |
|
630 |
|
631 return dataIsBase; |
|
632 } |
|
633 |
|
634 static void |
|
635 createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCode) { |
|
636 ConvData baseData; |
|
637 UBool dataIsBase; |
|
638 |
|
639 UConverterStaticData *staticData; |
|
640 UCMStates *states, *baseStates; |
|
641 |
|
642 if(U_FAILURE(*pErrorCode)) { |
|
643 return; |
|
644 } |
|
645 |
|
646 initConvData(data); |
|
647 |
|
648 dataIsBase=readFile(data, converterName, pErrorCode); |
|
649 if(U_FAILURE(*pErrorCode)) { |
|
650 return; |
|
651 } |
|
652 |
|
653 staticData=&data->staticData; |
|
654 states=&data->ucm->states; |
|
655 |
|
656 if(dataIsBase) { |
|
657 /* |
|
658 * Build a normal .cnv file with a base table |
|
659 * and an optional extension table. |
|
660 */ |
|
661 data->cnvData=MBCSOpen(data->ucm); |
|
662 if(data->cnvData==NULL) { |
|
663 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
|
664 |
|
665 } else if(!data->cnvData->isValid(data->cnvData, |
|
666 staticData->subChar, staticData->subCharLen) |
|
667 ) { |
|
668 fprintf(stderr, " the substitution character byte sequence is illegal in this codepage structure!\n"); |
|
669 *pErrorCode=U_INVALID_TABLE_FORMAT; |
|
670 |
|
671 } else if(staticData->subChar1!=0 && |
|
672 !data->cnvData->isValid(data->cnvData, &staticData->subChar1, 1) |
|
673 ) { |
|
674 fprintf(stderr, " the subchar1 byte is illegal in this codepage structure!\n"); |
|
675 *pErrorCode=U_INVALID_TABLE_FORMAT; |
|
676 |
|
677 } else if( |
|
678 data->ucm->ext->mappingsLength>0 && |
|
679 !ucm_checkBaseExt(states, data->ucm->base, data->ucm->ext, data->ucm->ext, FALSE) |
|
680 ) { |
|
681 *pErrorCode=U_INVALID_TABLE_FORMAT; |
|
682 } else if(data->ucm->base->flagsType&UCM_FLAGS_EXPLICIT) { |
|
683 /* sort the table so that it can be turned into UTF-8-friendly data */ |
|
684 ucm_sortTable(data->ucm->base); |
|
685 } |
|
686 |
|
687 if(U_SUCCESS(*pErrorCode)) { |
|
688 if( |
|
689 /* add the base table after ucm_checkBaseExt()! */ |
|
690 !data->cnvData->addTable(data->cnvData, data->ucm->base, &data->staticData) |
|
691 ) { |
|
692 *pErrorCode=U_INVALID_TABLE_FORMAT; |
|
693 } else { |
|
694 /* |
|
695 * addTable() may have requested moving more mappings to the extension table |
|
696 * if they fit into the base toUnicode table but not into the |
|
697 * base fromUnicode table. |
|
698 * (Especially for UTF-8-friendly fromUnicode tables.) |
|
699 * Such mappings will have the MBCS_FROM_U_EXT_FLAG set, which causes them |
|
700 * to be excluded from the extension toUnicode data. |
|
701 * See MBCSOkForBaseFromUnicode() for which mappings do not fit into |
|
702 * the base fromUnicode table. |
|
703 */ |
|
704 ucm_moveMappings(data->ucm->base, data->ucm->ext); |
|
705 ucm_sortTable(data->ucm->ext); |
|
706 if(data->ucm->ext->mappingsLength>0) { |
|
707 /* prepare the extension table, if there is one */ |
|
708 data->extData=CnvExtOpen(data->ucm); |
|
709 if(data->extData==NULL) { |
|
710 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
|
711 } else if( |
|
712 !data->extData->addTable(data->extData, data->ucm->ext, &data->staticData) |
|
713 ) { |
|
714 *pErrorCode=U_INVALID_TABLE_FORMAT; |
|
715 } |
|
716 } |
|
717 } |
|
718 } |
|
719 } else { |
|
720 /* Build an extension-only .cnv file. */ |
|
721 char baseFilename[500]; |
|
722 char *basename; |
|
723 |
|
724 initConvData(&baseData); |
|
725 |
|
726 /* assemble a path/filename for data->ucm->baseName */ |
|
727 uprv_strcpy(baseFilename, converterName); |
|
728 basename=(char *)findBasename(baseFilename); |
|
729 uprv_strcpy(basename, data->ucm->baseName); |
|
730 uprv_strcat(basename, ".ucm"); |
|
731 |
|
732 /* read the base table */ |
|
733 dataIsBase=readFile(&baseData, baseFilename, pErrorCode); |
|
734 if(U_FAILURE(*pErrorCode)) { |
|
735 return; |
|
736 } else if(!dataIsBase) { |
|
737 fprintf(stderr, "error: the <icu:base> file \"%s\" is not a base table file\n", baseFilename); |
|
738 *pErrorCode=U_INVALID_TABLE_FORMAT; |
|
739 } else { |
|
740 /* prepare the extension table */ |
|
741 data->extData=CnvExtOpen(data->ucm); |
|
742 if(data->extData==NULL) { |
|
743 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
|
744 } else { |
|
745 /* fill in gaps in extension file header fields */ |
|
746 UCMapping *m, *mLimit; |
|
747 uint8_t fallbackFlags; |
|
748 |
|
749 baseStates=&baseData.ucm->states; |
|
750 if(states->conversionType==UCNV_DBCS) { |
|
751 staticData->minBytesPerChar=(int8_t)(states->minCharLength=2); |
|
752 } else if(states->minCharLength==0) { |
|
753 staticData->minBytesPerChar=(int8_t)(states->minCharLength=baseStates->minCharLength); |
|
754 } |
|
755 if(states->maxCharLength<states->minCharLength) { |
|
756 staticData->maxBytesPerChar=(int8_t)(states->maxCharLength=baseStates->maxCharLength); |
|
757 } |
|
758 |
|
759 if(staticData->subCharLen==0) { |
|
760 uprv_memcpy(staticData->subChar, baseData.staticData.subChar, 4); |
|
761 staticData->subCharLen=baseData.staticData.subCharLen; |
|
762 } |
|
763 /* |
|
764 * do not copy subChar1 - |
|
765 * only use what is explicitly specified |
|
766 * because it cannot be unset in the extension file header |
|
767 */ |
|
768 |
|
769 /* get the fallback flags */ |
|
770 fallbackFlags=0; |
|
771 for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength; |
|
772 m<mLimit && fallbackFlags!=3; |
|
773 ++m |
|
774 ) { |
|
775 if(m->f==1) { |
|
776 fallbackFlags|=1; |
|
777 } else if(m->f==3) { |
|
778 fallbackFlags|=2; |
|
779 } |
|
780 } |
|
781 |
|
782 if(fallbackFlags&1) { |
|
783 staticData->hasFromUnicodeFallback=TRUE; |
|
784 } |
|
785 if(fallbackFlags&2) { |
|
786 staticData->hasToUnicodeFallback=TRUE; |
|
787 } |
|
788 |
|
789 if(1!=ucm_countChars(baseStates, staticData->subChar, staticData->subCharLen)) { |
|
790 fprintf(stderr, " the substitution character byte sequence is illegal in this codepage structure!\n"); |
|
791 *pErrorCode=U_INVALID_TABLE_FORMAT; |
|
792 |
|
793 } else if(staticData->subChar1!=0 && 1!=ucm_countChars(baseStates, &staticData->subChar1, 1)) { |
|
794 fprintf(stderr, " the subchar1 byte is illegal in this codepage structure!\n"); |
|
795 *pErrorCode=U_INVALID_TABLE_FORMAT; |
|
796 |
|
797 } else if( |
|
798 !ucm_checkValidity(data->ucm->ext, baseStates) || |
|
799 !ucm_checkBaseExt(baseStates, baseData.ucm->base, data->ucm->ext, data->ucm->ext, FALSE) |
|
800 ) { |
|
801 *pErrorCode=U_INVALID_TABLE_FORMAT; |
|
802 } else { |
|
803 if(states->maxCharLength>1) { |
|
804 /* |
|
805 * When building a normal .cnv file with a base table |
|
806 * for an MBCS (not SBCS) table with explicit precision flags, |
|
807 * the MBCSAddTable() function marks some mappings for moving |
|
808 * to the extension table. |
|
809 * They fit into the base toUnicode table but not into the |
|
810 * base fromUnicode table. |
|
811 * (Note: We do have explicit precision flags because they are |
|
812 * required for extension table generation, and |
|
813 * ucm_checkBaseExt() verified it.) |
|
814 * |
|
815 * We do not call MBCSAddTable() here (we probably could) |
|
816 * so we need to do the analysis before building the extension table. |
|
817 * We assume that MBCSAddTable() will build a UTF-8-friendly table. |
|
818 * Redundant mappings in the extension table are ok except they cost some size. |
|
819 * |
|
820 * Do this after ucm_checkBaseExt(). |
|
821 */ |
|
822 const MBCSData *mbcsData=MBCSGetDummy(); |
|
823 int32_t needsMove=0; |
|
824 for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength; |
|
825 m<mLimit; |
|
826 ++m |
|
827 ) { |
|
828 if(!MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m->bLen, m->u, m->f)) { |
|
829 m->f|=MBCS_FROM_U_EXT_FLAG; |
|
830 m->moveFlag=UCM_MOVE_TO_EXT; |
|
831 ++needsMove; |
|
832 } |
|
833 } |
|
834 |
|
835 if(needsMove!=0) { |
|
836 ucm_moveMappings(baseData.ucm->base, data->ucm->ext); |
|
837 ucm_sortTable(data->ucm->ext); |
|
838 } |
|
839 } |
|
840 if(!data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)) { |
|
841 *pErrorCode=U_INVALID_TABLE_FORMAT; |
|
842 } |
|
843 } |
|
844 } |
|
845 } |
|
846 |
|
847 cleanupConvData(&baseData); |
|
848 } |
|
849 } |
|
850 |
|
851 /* |
|
852 * Hey, Emacs, please set the following: |
|
853 * |
|
854 * Local Variables: |
|
855 * indent-tabs-mode: nil |
|
856 * End: |
|
857 * |
|
858 */ |