|
1 /* |
|
2 ******************************************************************************* |
|
3 * |
|
4 * Copyright (C) 2003-2013, International Business Machines |
|
5 * Corporation and others. All Rights Reserved. |
|
6 * |
|
7 ******************************************************************************* |
|
8 * file name: ucm.c |
|
9 * encoding: US-ASCII |
|
10 * tab size: 8 (not used) |
|
11 * indentation:4 |
|
12 * |
|
13 * created on: 2003jun20 |
|
14 * created by: Markus W. Scherer |
|
15 * |
|
16 * This file reads a .ucm file, stores its mappings and sorts them. |
|
17 * It implements handling of Unicode conversion mappings from .ucm files |
|
18 * for makeconv, canonucm, rptp2ucm, etc. |
|
19 * |
|
20 * Unicode code point sequences with a length of more than 1, |
|
21 * as well as byte sequences with more than 4 bytes or more than one complete |
|
22 * character sequence are handled to support m:n mappings. |
|
23 */ |
|
24 |
|
25 #include "unicode/utypes.h" |
|
26 #include "unicode/ustring.h" |
|
27 #include "cstring.h" |
|
28 #include "cmemory.h" |
|
29 #include "filestrm.h" |
|
30 #include "uarrsort.h" |
|
31 #include "ucnvmbcs.h" |
|
32 #include "ucnv_bld.h" |
|
33 #include "ucnv_ext.h" |
|
34 #include "uparse.h" |
|
35 #include "ucm.h" |
|
36 #include <stdio.h> |
|
37 |
|
38 #if !UCONFIG_NO_CONVERSION |
|
39 |
|
40 /* -------------------------------------------------------------------------- */ |
|
41 |
|
42 static void |
|
43 printMapping(UCMapping *m, UChar32 *codePoints, uint8_t *bytes, FILE *f) { |
|
44 int32_t j; |
|
45 |
|
46 for(j=0; j<m->uLen; ++j) { |
|
47 fprintf(f, "<U%04lX>", (long)codePoints[j]); |
|
48 } |
|
49 |
|
50 fputc(' ', f); |
|
51 |
|
52 for(j=0; j<m->bLen; ++j) { |
|
53 fprintf(f, "\\x%02X", bytes[j]); |
|
54 } |
|
55 |
|
56 if(m->f>=0) { |
|
57 fprintf(f, " |%u\n", m->f); |
|
58 } else { |
|
59 fputs("\n", f); |
|
60 } |
|
61 } |
|
62 |
|
63 U_CAPI void U_EXPORT2 |
|
64 ucm_printMapping(UCMTable *table, UCMapping *m, FILE *f) { |
|
65 printMapping(m, UCM_GET_CODE_POINTS(table, m), UCM_GET_BYTES(table, m), f); |
|
66 } |
|
67 |
|
68 U_CAPI void U_EXPORT2 |
|
69 ucm_printTable(UCMTable *table, FILE *f, UBool byUnicode) { |
|
70 UCMapping *m; |
|
71 int32_t i, length; |
|
72 |
|
73 m=table->mappings; |
|
74 length=table->mappingsLength; |
|
75 if(byUnicode) { |
|
76 for(i=0; i<length; ++m, ++i) { |
|
77 ucm_printMapping(table, m, f); |
|
78 } |
|
79 } else { |
|
80 const int32_t *map=table->reverseMap; |
|
81 for(i=0; i<length; ++i) { |
|
82 ucm_printMapping(table, m+map[i], f); |
|
83 } |
|
84 } |
|
85 } |
|
86 |
|
87 /* mapping comparisons ------------------------------------------------------ */ |
|
88 |
|
89 static int32_t |
|
90 compareUnicode(UCMTable *lTable, const UCMapping *l, |
|
91 UCMTable *rTable, const UCMapping *r) { |
|
92 const UChar32 *lu, *ru; |
|
93 int32_t result, i, length; |
|
94 |
|
95 if(l->uLen==1 && r->uLen==1) { |
|
96 /* compare two single code points */ |
|
97 return l->u-r->u; |
|
98 } |
|
99 |
|
100 /* get pointers to the code point sequences */ |
|
101 lu=UCM_GET_CODE_POINTS(lTable, l); |
|
102 ru=UCM_GET_CODE_POINTS(rTable, r); |
|
103 |
|
104 /* get the minimum length */ |
|
105 if(l->uLen<=r->uLen) { |
|
106 length=l->uLen; |
|
107 } else { |
|
108 length=r->uLen; |
|
109 } |
|
110 |
|
111 /* compare the code points */ |
|
112 for(i=0; i<length; ++i) { |
|
113 result=lu[i]-ru[i]; |
|
114 if(result!=0) { |
|
115 return result; |
|
116 } |
|
117 } |
|
118 |
|
119 /* compare the lengths */ |
|
120 return l->uLen-r->uLen; |
|
121 } |
|
122 |
|
123 static int32_t |
|
124 compareBytes(UCMTable *lTable, const UCMapping *l, |
|
125 UCMTable *rTable, const UCMapping *r, |
|
126 UBool lexical) { |
|
127 const uint8_t *lb, *rb; |
|
128 int32_t result, i, length; |
|
129 |
|
130 /* |
|
131 * A lexical comparison is used for sorting in the builder, to allow |
|
132 * an efficient search for a byte sequence that could be a prefix |
|
133 * of a previously entered byte sequence. |
|
134 * |
|
135 * Comparing by lengths first is for compatibility with old .ucm tools |
|
136 * like canonucm and rptp2ucm. |
|
137 */ |
|
138 if(lexical) { |
|
139 /* get the minimum length and continue */ |
|
140 if(l->bLen<=r->bLen) { |
|
141 length=l->bLen; |
|
142 } else { |
|
143 length=r->bLen; |
|
144 } |
|
145 } else { |
|
146 /* compare lengths first */ |
|
147 result=l->bLen-r->bLen; |
|
148 if(result!=0) { |
|
149 return result; |
|
150 } else { |
|
151 length=l->bLen; |
|
152 } |
|
153 } |
|
154 |
|
155 /* get pointers to the byte sequences */ |
|
156 lb=UCM_GET_BYTES(lTable, l); |
|
157 rb=UCM_GET_BYTES(rTable, r); |
|
158 |
|
159 /* compare the bytes */ |
|
160 for(i=0; i<length; ++i) { |
|
161 result=lb[i]-rb[i]; |
|
162 if(result!=0) { |
|
163 return result; |
|
164 } |
|
165 } |
|
166 |
|
167 /* compare the lengths */ |
|
168 return l->bLen-r->bLen; |
|
169 } |
|
170 |
|
171 /* compare UCMappings for sorting */ |
|
172 static int32_t |
|
173 compareMappings(UCMTable *lTable, const UCMapping *l, |
|
174 UCMTable *rTable, const UCMapping *r, |
|
175 UBool uFirst) { |
|
176 int32_t result; |
|
177 |
|
178 /* choose which side to compare first */ |
|
179 if(uFirst) { |
|
180 /* Unicode then bytes */ |
|
181 result=compareUnicode(lTable, l, rTable, r); |
|
182 if(result==0) { |
|
183 result=compareBytes(lTable, l, rTable, r, FALSE); /* not lexically, like canonucm */ |
|
184 } |
|
185 } else { |
|
186 /* bytes then Unicode */ |
|
187 result=compareBytes(lTable, l, rTable, r, TRUE); /* lexically, for builder */ |
|
188 if(result==0) { |
|
189 result=compareUnicode(lTable, l, rTable, r); |
|
190 } |
|
191 } |
|
192 |
|
193 if(result!=0) { |
|
194 return result; |
|
195 } |
|
196 |
|
197 /* compare the flags */ |
|
198 return l->f-r->f; |
|
199 } |
|
200 |
|
201 /* sorting by Unicode first sorts mappings directly */ |
|
202 static int32_t |
|
203 compareMappingsUnicodeFirst(const void *context, const void *left, const void *right) { |
|
204 return compareMappings( |
|
205 (UCMTable *)context, (const UCMapping *)left, |
|
206 (UCMTable *)context, (const UCMapping *)right, TRUE); |
|
207 } |
|
208 |
|
209 /* sorting by bytes first sorts the reverseMap; use indirection to mappings */ |
|
210 static int32_t |
|
211 compareMappingsBytesFirst(const void *context, const void *left, const void *right) { |
|
212 UCMTable *table=(UCMTable *)context; |
|
213 int32_t l=*(const int32_t *)left, r=*(const int32_t *)right; |
|
214 return compareMappings( |
|
215 table, table->mappings+l, |
|
216 table, table->mappings+r, FALSE); |
|
217 } |
|
218 |
|
219 U_CAPI void U_EXPORT2 |
|
220 ucm_sortTable(UCMTable *t) { |
|
221 UErrorCode errorCode; |
|
222 int32_t i; |
|
223 |
|
224 if(t->isSorted) { |
|
225 return; |
|
226 } |
|
227 |
|
228 errorCode=U_ZERO_ERROR; |
|
229 |
|
230 /* 1. sort by Unicode first */ |
|
231 uprv_sortArray(t->mappings, t->mappingsLength, sizeof(UCMapping), |
|
232 compareMappingsUnicodeFirst, t, |
|
233 FALSE, &errorCode); |
|
234 |
|
235 /* build the reverseMap */ |
|
236 if(t->reverseMap==NULL) { |
|
237 /* |
|
238 * allocate mappingsCapacity instead of mappingsLength so that |
|
239 * if mappings are added, the reverseMap need not be |
|
240 * reallocated each time |
|
241 * (see ucm_moveMappings() and ucm_addMapping()) |
|
242 */ |
|
243 t->reverseMap=(int32_t *)uprv_malloc(t->mappingsCapacity*sizeof(int32_t)); |
|
244 if(t->reverseMap==NULL) { |
|
245 fprintf(stderr, "ucm error: unable to allocate reverseMap\n"); |
|
246 exit(U_MEMORY_ALLOCATION_ERROR); |
|
247 } |
|
248 } |
|
249 for(i=0; i<t->mappingsLength; ++i) { |
|
250 t->reverseMap[i]=i; |
|
251 } |
|
252 |
|
253 /* 2. sort reverseMap by mappings bytes first */ |
|
254 uprv_sortArray(t->reverseMap, t->mappingsLength, sizeof(int32_t), |
|
255 compareMappingsBytesFirst, t, |
|
256 FALSE, &errorCode); |
|
257 |
|
258 if(U_FAILURE(errorCode)) { |
|
259 fprintf(stderr, "ucm error: sortTable()/uprv_sortArray() fails - %s\n", |
|
260 u_errorName(errorCode)); |
|
261 exit(errorCode); |
|
262 } |
|
263 |
|
264 t->isSorted=TRUE; |
|
265 } |
|
266 |
|
267 /* |
|
268 * remove mappings with their move flag set from the base table |
|
269 * and move some of them (with UCM_MOVE_TO_EXT) to the extension table |
|
270 */ |
|
271 U_CAPI void U_EXPORT2 |
|
272 ucm_moveMappings(UCMTable *base, UCMTable *ext) { |
|
273 UCMapping *mb, *mbLimit; |
|
274 int8_t flag; |
|
275 |
|
276 mb=base->mappings; |
|
277 mbLimit=mb+base->mappingsLength; |
|
278 |
|
279 while(mb<mbLimit) { |
|
280 flag=mb->moveFlag; |
|
281 if(flag!=0) { |
|
282 /* reset the move flag */ |
|
283 mb->moveFlag=0; |
|
284 |
|
285 if(ext!=NULL && (flag&UCM_MOVE_TO_EXT)) { |
|
286 /* add the mapping to the extension table */ |
|
287 ucm_addMapping(ext, mb, UCM_GET_CODE_POINTS(base, mb), UCM_GET_BYTES(base, mb)); |
|
288 } |
|
289 |
|
290 /* remove this mapping: move the last base mapping down and overwrite the current one */ |
|
291 if(mb<(mbLimit-1)) { |
|
292 uprv_memcpy(mb, mbLimit-1, sizeof(UCMapping)); |
|
293 } |
|
294 --mbLimit; |
|
295 --base->mappingsLength; |
|
296 base->isSorted=FALSE; |
|
297 } else { |
|
298 ++mb; |
|
299 } |
|
300 } |
|
301 } |
|
302 |
|
303 enum { |
|
304 NEEDS_MOVE=1, |
|
305 HAS_ERRORS=2 |
|
306 }; |
|
307 |
|
308 static uint8_t |
|
309 checkBaseExtUnicode(UCMStates *baseStates, UCMTable *base, UCMTable *ext, |
|
310 UBool moveToExt, UBool intersectBase) { |
|
311 UCMapping *mb, *me, *mbLimit, *meLimit; |
|
312 int32_t cmp; |
|
313 uint8_t result; |
|
314 |
|
315 mb=base->mappings; |
|
316 mbLimit=mb+base->mappingsLength; |
|
317 |
|
318 me=ext->mappings; |
|
319 meLimit=me+ext->mappingsLength; |
|
320 |
|
321 result=0; |
|
322 |
|
323 for(;;) { |
|
324 /* skip irrelevant mappings on both sides */ |
|
325 for(;;) { |
|
326 if(mb==mbLimit) { |
|
327 return result; |
|
328 } |
|
329 |
|
330 if((0<=mb->f && mb->f<=2) || mb->f==4) { |
|
331 break; |
|
332 } |
|
333 |
|
334 ++mb; |
|
335 } |
|
336 |
|
337 for(;;) { |
|
338 if(me==meLimit) { |
|
339 return result; |
|
340 } |
|
341 |
|
342 if((0<=me->f && me->f<=2) || me->f==4) { |
|
343 break; |
|
344 } |
|
345 |
|
346 ++me; |
|
347 } |
|
348 |
|
349 /* compare the base and extension mappings */ |
|
350 cmp=compareUnicode(base, mb, ext, me); |
|
351 if(cmp<0) { |
|
352 if(intersectBase && (intersectBase!=2 || mb->bLen>1)) { |
|
353 /* |
|
354 * mapping in base but not in ext, move it |
|
355 * |
|
356 * if ext is DBCS, move DBCS mappings here |
|
357 * and check SBCS ones for Unicode prefix below |
|
358 */ |
|
359 mb->moveFlag|=UCM_MOVE_TO_EXT; |
|
360 result|=NEEDS_MOVE; |
|
361 |
|
362 /* does mb map from an input sequence that is a prefix of me's? */ |
|
363 } else if( mb->uLen<me->uLen && |
|
364 0==uprv_memcmp(UCM_GET_CODE_POINTS(base, mb), UCM_GET_CODE_POINTS(ext, me), 4*mb->uLen) |
|
365 ) { |
|
366 if(moveToExt) { |
|
367 /* mark this mapping to be moved to the extension table */ |
|
368 mb->moveFlag|=UCM_MOVE_TO_EXT; |
|
369 result|=NEEDS_MOVE; |
|
370 } else { |
|
371 fprintf(stderr, |
|
372 "ucm error: the base table contains a mapping whose input sequence\n" |
|
373 " is a prefix of the input sequence of an extension mapping\n"); |
|
374 ucm_printMapping(base, mb, stderr); |
|
375 ucm_printMapping(ext, me, stderr); |
|
376 result|=HAS_ERRORS; |
|
377 } |
|
378 } |
|
379 |
|
380 ++mb; |
|
381 } else if(cmp==0) { |
|
382 /* |
|
383 * same output: remove the extension mapping, |
|
384 * otherwise treat as an error |
|
385 */ |
|
386 if( mb->f==me->f && mb->bLen==me->bLen && |
|
387 0==uprv_memcmp(UCM_GET_BYTES(base, mb), UCM_GET_BYTES(ext, me), mb->bLen) |
|
388 ) { |
|
389 me->moveFlag|=UCM_REMOVE_MAPPING; |
|
390 result|=NEEDS_MOVE; |
|
391 } else if(intersectBase) { |
|
392 /* mapping in base but not in ext, move it */ |
|
393 mb->moveFlag|=UCM_MOVE_TO_EXT; |
|
394 result|=NEEDS_MOVE; |
|
395 } else { |
|
396 fprintf(stderr, |
|
397 "ucm error: the base table contains a mapping whose input sequence\n" |
|
398 " is the same as the input sequence of an extension mapping\n" |
|
399 " but it maps differently\n"); |
|
400 ucm_printMapping(base, mb, stderr); |
|
401 ucm_printMapping(ext, me, stderr); |
|
402 result|=HAS_ERRORS; |
|
403 } |
|
404 |
|
405 ++mb; |
|
406 } else /* cmp>0 */ { |
|
407 ++me; |
|
408 } |
|
409 } |
|
410 } |
|
411 |
|
412 static uint8_t |
|
413 checkBaseExtBytes(UCMStates *baseStates, UCMTable *base, UCMTable *ext, |
|
414 UBool moveToExt, UBool intersectBase) { |
|
415 UCMapping *mb, *me; |
|
416 int32_t *baseMap, *extMap; |
|
417 int32_t b, e, bLimit, eLimit, cmp; |
|
418 uint8_t result; |
|
419 UBool isSISO; |
|
420 |
|
421 baseMap=base->reverseMap; |
|
422 extMap=ext->reverseMap; |
|
423 |
|
424 b=e=0; |
|
425 bLimit=base->mappingsLength; |
|
426 eLimit=ext->mappingsLength; |
|
427 |
|
428 result=0; |
|
429 |
|
430 isSISO=(UBool)(baseStates->outputType==MBCS_OUTPUT_2_SISO); |
|
431 |
|
432 for(;;) { |
|
433 /* skip irrelevant mappings on both sides */ |
|
434 for(;; ++b) { |
|
435 if(b==bLimit) { |
|
436 return result; |
|
437 } |
|
438 mb=base->mappings+baseMap[b]; |
|
439 |
|
440 if(intersectBase==2 && mb->bLen==1) { |
|
441 /* |
|
442 * comparing a base against a DBCS extension: |
|
443 * leave SBCS base mappings alone |
|
444 */ |
|
445 continue; |
|
446 } |
|
447 |
|
448 if(mb->f==0 || mb->f==3) { |
|
449 break; |
|
450 } |
|
451 } |
|
452 |
|
453 for(;;) { |
|
454 if(e==eLimit) { |
|
455 return result; |
|
456 } |
|
457 me=ext->mappings+extMap[e]; |
|
458 |
|
459 if(me->f==0 || me->f==3) { |
|
460 break; |
|
461 } |
|
462 |
|
463 ++e; |
|
464 } |
|
465 |
|
466 /* compare the base and extension mappings */ |
|
467 cmp=compareBytes(base, mb, ext, me, TRUE); |
|
468 if(cmp<0) { |
|
469 if(intersectBase) { |
|
470 /* mapping in base but not in ext, move it */ |
|
471 mb->moveFlag|=UCM_MOVE_TO_EXT; |
|
472 result|=NEEDS_MOVE; |
|
473 |
|
474 /* |
|
475 * does mb map from an input sequence that is a prefix of me's? |
|
476 * for SI/SO tables, a single byte is never a prefix because it |
|
477 * occurs in a separate single-byte state |
|
478 */ |
|
479 } else if( mb->bLen<me->bLen && |
|
480 (!isSISO || mb->bLen>1) && |
|
481 0==uprv_memcmp(UCM_GET_BYTES(base, mb), UCM_GET_BYTES(ext, me), mb->bLen) |
|
482 ) { |
|
483 if(moveToExt) { |
|
484 /* mark this mapping to be moved to the extension table */ |
|
485 mb->moveFlag|=UCM_MOVE_TO_EXT; |
|
486 result|=NEEDS_MOVE; |
|
487 } else { |
|
488 fprintf(stderr, |
|
489 "ucm error: the base table contains a mapping whose input sequence\n" |
|
490 " is a prefix of the input sequence of an extension mapping\n"); |
|
491 ucm_printMapping(base, mb, stderr); |
|
492 ucm_printMapping(ext, me, stderr); |
|
493 result|=HAS_ERRORS; |
|
494 } |
|
495 } |
|
496 |
|
497 ++b; |
|
498 } else if(cmp==0) { |
|
499 /* |
|
500 * same output: remove the extension mapping, |
|
501 * otherwise treat as an error |
|
502 */ |
|
503 if( mb->f==me->f && mb->uLen==me->uLen && |
|
504 0==uprv_memcmp(UCM_GET_CODE_POINTS(base, mb), UCM_GET_CODE_POINTS(ext, me), 4*mb->uLen) |
|
505 ) { |
|
506 me->moveFlag|=UCM_REMOVE_MAPPING; |
|
507 result|=NEEDS_MOVE; |
|
508 } else if(intersectBase) { |
|
509 /* mapping in base but not in ext, move it */ |
|
510 mb->moveFlag|=UCM_MOVE_TO_EXT; |
|
511 result|=NEEDS_MOVE; |
|
512 } else { |
|
513 fprintf(stderr, |
|
514 "ucm error: the base table contains a mapping whose input sequence\n" |
|
515 " is the same as the input sequence of an extension mapping\n" |
|
516 " but it maps differently\n"); |
|
517 ucm_printMapping(base, mb, stderr); |
|
518 ucm_printMapping(ext, me, stderr); |
|
519 result|=HAS_ERRORS; |
|
520 } |
|
521 |
|
522 ++b; |
|
523 } else /* cmp>0 */ { |
|
524 ++e; |
|
525 } |
|
526 } |
|
527 } |
|
528 |
|
529 U_CAPI UBool U_EXPORT2 |
|
530 ucm_checkValidity(UCMTable *table, UCMStates *baseStates) { |
|
531 UCMapping *m, *mLimit; |
|
532 int32_t count; |
|
533 UBool isOK; |
|
534 |
|
535 m=table->mappings; |
|
536 mLimit=m+table->mappingsLength; |
|
537 isOK=TRUE; |
|
538 |
|
539 while(m<mLimit) { |
|
540 count=ucm_countChars(baseStates, UCM_GET_BYTES(table, m), m->bLen); |
|
541 if(count<1) { |
|
542 ucm_printMapping(table, m, stderr); |
|
543 isOK=FALSE; |
|
544 } |
|
545 ++m; |
|
546 } |
|
547 |
|
548 return isOK; |
|
549 } |
|
550 |
|
551 U_CAPI UBool U_EXPORT2 |
|
552 ucm_checkBaseExt(UCMStates *baseStates, |
|
553 UCMTable *base, UCMTable *ext, UCMTable *moveTarget, |
|
554 UBool intersectBase) { |
|
555 uint8_t result; |
|
556 |
|
557 /* if we have an extension table, we must always use precision flags */ |
|
558 if(base->flagsType&UCM_FLAGS_IMPLICIT) { |
|
559 fprintf(stderr, "ucm error: the base table contains mappings without precision flags\n"); |
|
560 return FALSE; |
|
561 } |
|
562 if(ext->flagsType&UCM_FLAGS_IMPLICIT) { |
|
563 fprintf(stderr, "ucm error: extension table contains mappings without precision flags\n"); |
|
564 return FALSE; |
|
565 } |
|
566 |
|
567 /* checking requires both tables to be sorted */ |
|
568 ucm_sortTable(base); |
|
569 ucm_sortTable(ext); |
|
570 |
|
571 /* check */ |
|
572 result= |
|
573 checkBaseExtUnicode(baseStates, base, ext, (UBool)(moveTarget!=NULL), intersectBase)| |
|
574 checkBaseExtBytes(baseStates, base, ext, (UBool)(moveTarget!=NULL), intersectBase); |
|
575 |
|
576 if(result&HAS_ERRORS) { |
|
577 return FALSE; |
|
578 } |
|
579 |
|
580 if(result&NEEDS_MOVE) { |
|
581 ucm_moveMappings(ext, NULL); |
|
582 ucm_moveMappings(base, moveTarget); |
|
583 ucm_sortTable(base); |
|
584 ucm_sortTable(ext); |
|
585 if(moveTarget!=NULL) { |
|
586 ucm_sortTable(moveTarget); |
|
587 } |
|
588 } |
|
589 |
|
590 return TRUE; |
|
591 } |
|
592 |
|
593 /* merge tables for rptp2ucm ------------------------------------------------ */ |
|
594 |
|
595 U_CAPI void U_EXPORT2 |
|
596 ucm_mergeTables(UCMTable *fromUTable, UCMTable *toUTable, |
|
597 const uint8_t *subchar, int32_t subcharLength, |
|
598 uint8_t subchar1) { |
|
599 UCMapping *fromUMapping, *toUMapping; |
|
600 int32_t fromUIndex, toUIndex, fromUTop, toUTop, cmp; |
|
601 |
|
602 ucm_sortTable(fromUTable); |
|
603 ucm_sortTable(toUTable); |
|
604 |
|
605 fromUMapping=fromUTable->mappings; |
|
606 toUMapping=toUTable->mappings; |
|
607 |
|
608 fromUTop=fromUTable->mappingsLength; |
|
609 toUTop=toUTable->mappingsLength; |
|
610 |
|
611 fromUIndex=toUIndex=0; |
|
612 |
|
613 while(fromUIndex<fromUTop && toUIndex<toUTop) { |
|
614 cmp=compareMappings(fromUTable, fromUMapping, toUTable, toUMapping, TRUE); |
|
615 if(cmp==0) { |
|
616 /* equal: roundtrip, nothing to do (flags are initially 0) */ |
|
617 ++fromUMapping; |
|
618 ++toUMapping; |
|
619 |
|
620 ++fromUIndex; |
|
621 ++toUIndex; |
|
622 } else if(cmp<0) { |
|
623 /* |
|
624 * the fromU mapping does not have a toU counterpart: |
|
625 * fallback Unicode->codepage |
|
626 */ |
|
627 if( (fromUMapping->bLen==subcharLength && |
|
628 0==uprv_memcmp(UCM_GET_BYTES(fromUTable, fromUMapping), subchar, subcharLength)) || |
|
629 (subchar1!=0 && fromUMapping->bLen==1 && fromUMapping->b.bytes[0]==subchar1) |
|
630 ) { |
|
631 fromUMapping->f=2; /* SUB mapping */ |
|
632 } else { |
|
633 fromUMapping->f=1; /* normal fallback */ |
|
634 } |
|
635 |
|
636 ++fromUMapping; |
|
637 ++fromUIndex; |
|
638 } else { |
|
639 /* |
|
640 * the toU mapping does not have a fromU counterpart: |
|
641 * (reverse) fallback codepage->Unicode, copy it to the fromU table |
|
642 */ |
|
643 |
|
644 /* ignore reverse fallbacks to Unicode SUB */ |
|
645 if(!(toUMapping->uLen==1 && (toUMapping->u==0xfffd || toUMapping->u==0x1a))) { |
|
646 toUMapping->f=3; /* reverse fallback */ |
|
647 ucm_addMapping(fromUTable, toUMapping, UCM_GET_CODE_POINTS(toUTable, toUMapping), UCM_GET_BYTES(toUTable, toUMapping)); |
|
648 |
|
649 /* the table may have been reallocated */ |
|
650 fromUMapping=fromUTable->mappings+fromUIndex; |
|
651 } |
|
652 |
|
653 ++toUMapping; |
|
654 ++toUIndex; |
|
655 } |
|
656 } |
|
657 |
|
658 /* either one or both tables are exhausted */ |
|
659 while(fromUIndex<fromUTop) { |
|
660 /* leftover fromU mappings are fallbacks */ |
|
661 if( (fromUMapping->bLen==subcharLength && |
|
662 0==uprv_memcmp(UCM_GET_BYTES(fromUTable, fromUMapping), subchar, subcharLength)) || |
|
663 (subchar1!=0 && fromUMapping->bLen==1 && fromUMapping->b.bytes[0]==subchar1) |
|
664 ) { |
|
665 fromUMapping->f=2; /* SUB mapping */ |
|
666 } else { |
|
667 fromUMapping->f=1; /* normal fallback */ |
|
668 } |
|
669 |
|
670 ++fromUMapping; |
|
671 ++fromUIndex; |
|
672 } |
|
673 |
|
674 while(toUIndex<toUTop) { |
|
675 /* leftover toU mappings are reverse fallbacks */ |
|
676 |
|
677 /* ignore reverse fallbacks to Unicode SUB */ |
|
678 if(!(toUMapping->uLen==1 && (toUMapping->u==0xfffd || toUMapping->u==0x1a))) { |
|
679 toUMapping->f=3; /* reverse fallback */ |
|
680 ucm_addMapping(fromUTable, toUMapping, UCM_GET_CODE_POINTS(toUTable, toUMapping), UCM_GET_BYTES(toUTable, toUMapping)); |
|
681 } |
|
682 |
|
683 ++toUMapping; |
|
684 ++toUIndex; |
|
685 } |
|
686 |
|
687 fromUTable->isSorted=FALSE; |
|
688 } |
|
689 |
|
690 /* separate extension mappings out of base table for rptp2ucm --------------- */ |
|
691 |
|
692 U_CAPI UBool U_EXPORT2 |
|
693 ucm_separateMappings(UCMFile *ucm, UBool isSISO) { |
|
694 UCMTable *table; |
|
695 UCMapping *m, *mLimit; |
|
696 int32_t type; |
|
697 UBool needsMove, isOK; |
|
698 |
|
699 table=ucm->base; |
|
700 m=table->mappings; |
|
701 mLimit=m+table->mappingsLength; |
|
702 |
|
703 needsMove=FALSE; |
|
704 isOK=TRUE; |
|
705 |
|
706 for(; m<mLimit; ++m) { |
|
707 if(isSISO && m->bLen==1 && (m->b.bytes[0]==0xe || m->b.bytes[0]==0xf)) { |
|
708 fprintf(stderr, "warning: removing illegal mapping from an SI/SO-stateful table\n"); |
|
709 ucm_printMapping(table, m, stderr); |
|
710 m->moveFlag|=UCM_REMOVE_MAPPING; |
|
711 needsMove=TRUE; |
|
712 continue; |
|
713 } |
|
714 |
|
715 type=ucm_mappingType( |
|
716 &ucm->states, m, |
|
717 UCM_GET_CODE_POINTS(table, m), UCM_GET_BYTES(table, m)); |
|
718 if(type<0) { |
|
719 /* illegal byte sequence */ |
|
720 printMapping(m, UCM_GET_CODE_POINTS(table, m), UCM_GET_BYTES(table, m), stderr); |
|
721 isOK=FALSE; |
|
722 } else if(type>0) { |
|
723 m->moveFlag|=UCM_MOVE_TO_EXT; |
|
724 needsMove=TRUE; |
|
725 } |
|
726 } |
|
727 |
|
728 if(!isOK) { |
|
729 return FALSE; |
|
730 } |
|
731 if(needsMove) { |
|
732 ucm_moveMappings(ucm->base, ucm->ext); |
|
733 return ucm_checkBaseExt(&ucm->states, ucm->base, ucm->ext, ucm->ext, FALSE); |
|
734 } else { |
|
735 ucm_sortTable(ucm->base); |
|
736 return TRUE; |
|
737 } |
|
738 } |
|
739 |
|
740 /* ucm parser --------------------------------------------------------------- */ |
|
741 |
|
742 U_CAPI int8_t U_EXPORT2 |
|
743 ucm_parseBytes(uint8_t bytes[UCNV_EXT_MAX_BYTES], const char *line, const char **ps) { |
|
744 const char *s=*ps; |
|
745 char *end; |
|
746 uint8_t byte; |
|
747 int8_t bLen; |
|
748 |
|
749 bLen=0; |
|
750 for(;;) { |
|
751 /* skip an optional plus sign */ |
|
752 if(bLen>0 && *s=='+') { |
|
753 ++s; |
|
754 } |
|
755 if(*s!='\\') { |
|
756 break; |
|
757 } |
|
758 |
|
759 if( s[1]!='x' || |
|
760 (byte=(uint8_t)uprv_strtoul(s+2, &end, 16), end)!=s+4 |
|
761 ) { |
|
762 fprintf(stderr, "ucm error: byte must be formatted as \\xXX (2 hex digits) - \"%s\"\n", line); |
|
763 return -1; |
|
764 } |
|
765 |
|
766 if(bLen==UCNV_EXT_MAX_BYTES) { |
|
767 fprintf(stderr, "ucm error: too many bytes on \"%s\"\n", line); |
|
768 return -1; |
|
769 } |
|
770 bytes[bLen++]=byte; |
|
771 s=end; |
|
772 } |
|
773 |
|
774 *ps=s; |
|
775 return bLen; |
|
776 } |
|
777 |
|
778 /* parse a mapping line; must not be empty */ |
|
779 U_CAPI UBool U_EXPORT2 |
|
780 ucm_parseMappingLine(UCMapping *m, |
|
781 UChar32 codePoints[UCNV_EXT_MAX_UCHARS], |
|
782 uint8_t bytes[UCNV_EXT_MAX_BYTES], |
|
783 const char *line) { |
|
784 const char *s; |
|
785 char *end; |
|
786 UChar32 cp; |
|
787 int32_t u16Length; |
|
788 int8_t uLen, bLen, f; |
|
789 |
|
790 s=line; |
|
791 uLen=bLen=0; |
|
792 |
|
793 /* parse code points */ |
|
794 for(;;) { |
|
795 /* skip an optional plus sign */ |
|
796 if(uLen>0 && *s=='+') { |
|
797 ++s; |
|
798 } |
|
799 if(*s!='<') { |
|
800 break; |
|
801 } |
|
802 |
|
803 if( s[1]!='U' || |
|
804 (cp=(UChar32)uprv_strtoul(s+2, &end, 16), end)==s+2 || |
|
805 *end!='>' |
|
806 ) { |
|
807 fprintf(stderr, "ucm error: Unicode code point must be formatted as <UXXXX> (1..6 hex digits) - \"%s\"\n", line); |
|
808 return FALSE; |
|
809 } |
|
810 if((uint32_t)cp>0x10ffff || U_IS_SURROGATE(cp)) { |
|
811 fprintf(stderr, "ucm error: Unicode code point must be 0..d7ff or e000..10ffff - \"%s\"\n", line); |
|
812 return FALSE; |
|
813 } |
|
814 |
|
815 if(uLen==UCNV_EXT_MAX_UCHARS) { |
|
816 fprintf(stderr, "ucm error: too many code points on \"%s\"\n", line); |
|
817 return FALSE; |
|
818 } |
|
819 codePoints[uLen++]=cp; |
|
820 s=end+1; |
|
821 } |
|
822 |
|
823 if(uLen==0) { |
|
824 fprintf(stderr, "ucm error: no Unicode code points on \"%s\"\n", line); |
|
825 return FALSE; |
|
826 } else if(uLen==1) { |
|
827 m->u=codePoints[0]; |
|
828 } else { |
|
829 UErrorCode errorCode=U_ZERO_ERROR; |
|
830 u_strFromUTF32(NULL, 0, &u16Length, codePoints, uLen, &errorCode); |
|
831 if( (U_FAILURE(errorCode) && errorCode!=U_BUFFER_OVERFLOW_ERROR) || |
|
832 u16Length>UCNV_EXT_MAX_UCHARS |
|
833 ) { |
|
834 fprintf(stderr, "ucm error: too many UChars on \"%s\"\n", line); |
|
835 return FALSE; |
|
836 } |
|
837 } |
|
838 |
|
839 s=u_skipWhitespace(s); |
|
840 |
|
841 /* parse bytes */ |
|
842 bLen=ucm_parseBytes(bytes, line, &s); |
|
843 |
|
844 if(bLen<0) { |
|
845 return FALSE; |
|
846 } else if(bLen==0) { |
|
847 fprintf(stderr, "ucm error: no bytes on \"%s\"\n", line); |
|
848 return FALSE; |
|
849 } else if(bLen<=4) { |
|
850 uprv_memcpy(m->b.bytes, bytes, bLen); |
|
851 } |
|
852 |
|
853 /* skip everything until the fallback indicator, even the start of a comment */ |
|
854 for(;;) { |
|
855 if(*s==0) { |
|
856 f=-1; /* no fallback indicator */ |
|
857 break; |
|
858 } else if(*s=='|') { |
|
859 f=(int8_t)(s[1]-'0'); |
|
860 if((uint8_t)f>4) { |
|
861 fprintf(stderr, "ucm error: fallback indicator must be |0..|4 - \"%s\"\n", line); |
|
862 return FALSE; |
|
863 } |
|
864 break; |
|
865 } |
|
866 ++s; |
|
867 } |
|
868 |
|
869 m->uLen=uLen; |
|
870 m->bLen=bLen; |
|
871 m->f=f; |
|
872 return TRUE; |
|
873 } |
|
874 |
|
875 /* general APIs ------------------------------------------------------------- */ |
|
876 |
|
877 U_CAPI UCMTable * U_EXPORT2 |
|
878 ucm_openTable() { |
|
879 UCMTable *table=(UCMTable *)uprv_malloc(sizeof(UCMTable)); |
|
880 if(table==NULL) { |
|
881 fprintf(stderr, "ucm error: unable to allocate a UCMTable\n"); |
|
882 exit(U_MEMORY_ALLOCATION_ERROR); |
|
883 } |
|
884 |
|
885 memset(table, 0, sizeof(UCMTable)); |
|
886 return table; |
|
887 } |
|
888 |
|
889 U_CAPI void U_EXPORT2 |
|
890 ucm_closeTable(UCMTable *table) { |
|
891 if(table!=NULL) { |
|
892 uprv_free(table->mappings); |
|
893 uprv_free(table->codePoints); |
|
894 uprv_free(table->bytes); |
|
895 uprv_free(table->reverseMap); |
|
896 uprv_free(table); |
|
897 } |
|
898 } |
|
899 |
|
900 U_CAPI void U_EXPORT2 |
|
901 ucm_resetTable(UCMTable *table) { |
|
902 if(table!=NULL) { |
|
903 table->mappingsLength=0; |
|
904 table->flagsType=0; |
|
905 table->unicodeMask=0; |
|
906 table->bytesLength=table->codePointsLength=0; |
|
907 table->isSorted=FALSE; |
|
908 } |
|
909 } |
|
910 |
|
911 U_CAPI void U_EXPORT2 |
|
912 ucm_addMapping(UCMTable *table, |
|
913 UCMapping *m, |
|
914 UChar32 codePoints[UCNV_EXT_MAX_UCHARS], |
|
915 uint8_t bytes[UCNV_EXT_MAX_BYTES]) { |
|
916 UCMapping *tm; |
|
917 UChar32 c; |
|
918 int32_t idx; |
|
919 |
|
920 if(table->mappingsLength>=table->mappingsCapacity) { |
|
921 /* make the mappings array larger */ |
|
922 if(table->mappingsCapacity==0) { |
|
923 table->mappingsCapacity=1000; |
|
924 } else { |
|
925 table->mappingsCapacity*=10; |
|
926 } |
|
927 table->mappings=(UCMapping *)uprv_realloc(table->mappings, |
|
928 table->mappingsCapacity*sizeof(UCMapping)); |
|
929 if(table->mappings==NULL) { |
|
930 fprintf(stderr, "ucm error: unable to allocate %d UCMappings\n", |
|
931 (int)table->mappingsCapacity); |
|
932 exit(U_MEMORY_ALLOCATION_ERROR); |
|
933 } |
|
934 |
|
935 if(table->reverseMap!=NULL) { |
|
936 /* the reverseMap must be reallocated in a new sort */ |
|
937 uprv_free(table->reverseMap); |
|
938 table->reverseMap=NULL; |
|
939 } |
|
940 } |
|
941 |
|
942 if(m->uLen>1 && table->codePointsCapacity==0) { |
|
943 table->codePointsCapacity=10000; |
|
944 table->codePoints=(UChar32 *)uprv_malloc(table->codePointsCapacity*4); |
|
945 if(table->codePoints==NULL) { |
|
946 fprintf(stderr, "ucm error: unable to allocate %d UChar32s\n", |
|
947 (int)table->codePointsCapacity); |
|
948 exit(U_MEMORY_ALLOCATION_ERROR); |
|
949 } |
|
950 } |
|
951 |
|
952 if(m->bLen>4 && table->bytesCapacity==0) { |
|
953 table->bytesCapacity=10000; |
|
954 table->bytes=(uint8_t *)uprv_malloc(table->bytesCapacity); |
|
955 if(table->bytes==NULL) { |
|
956 fprintf(stderr, "ucm error: unable to allocate %d bytes\n", |
|
957 (int)table->bytesCapacity); |
|
958 exit(U_MEMORY_ALLOCATION_ERROR); |
|
959 } |
|
960 } |
|
961 |
|
962 if(m->uLen>1) { |
|
963 idx=table->codePointsLength; |
|
964 table->codePointsLength+=m->uLen; |
|
965 if(table->codePointsLength>table->codePointsCapacity) { |
|
966 fprintf(stderr, "ucm error: too many code points in multiple-code point mappings\n"); |
|
967 exit(U_MEMORY_ALLOCATION_ERROR); |
|
968 } |
|
969 |
|
970 uprv_memcpy(table->codePoints+idx, codePoints, m->uLen*4); |
|
971 m->u=idx; |
|
972 } |
|
973 |
|
974 if(m->bLen>4) { |
|
975 idx=table->bytesLength; |
|
976 table->bytesLength+=m->bLen; |
|
977 if(table->bytesLength>table->bytesCapacity) { |
|
978 fprintf(stderr, "ucm error: too many bytes in mappings with >4 charset bytes\n"); |
|
979 exit(U_MEMORY_ALLOCATION_ERROR); |
|
980 } |
|
981 |
|
982 uprv_memcpy(table->bytes+idx, bytes, m->bLen); |
|
983 m->b.idx=idx; |
|
984 } |
|
985 |
|
986 /* set unicodeMask */ |
|
987 for(idx=0; idx<m->uLen; ++idx) { |
|
988 c=codePoints[idx]; |
|
989 if(c>=0x10000) { |
|
990 table->unicodeMask|=UCNV_HAS_SUPPLEMENTARY; /* there are supplementary code points */ |
|
991 } else if(U_IS_SURROGATE(c)) { |
|
992 table->unicodeMask|=UCNV_HAS_SURROGATES; /* there are surrogate code points */ |
|
993 } |
|
994 } |
|
995 |
|
996 /* set flagsType */ |
|
997 if(m->f<0) { |
|
998 table->flagsType|=UCM_FLAGS_IMPLICIT; |
|
999 } else { |
|
1000 table->flagsType|=UCM_FLAGS_EXPLICIT; |
|
1001 } |
|
1002 |
|
1003 tm=table->mappings+table->mappingsLength++; |
|
1004 uprv_memcpy(tm, m, sizeof(UCMapping)); |
|
1005 |
|
1006 table->isSorted=FALSE; |
|
1007 } |
|
1008 |
|
1009 U_CAPI UCMFile * U_EXPORT2 |
|
1010 ucm_open() { |
|
1011 UCMFile *ucm=(UCMFile *)uprv_malloc(sizeof(UCMFile)); |
|
1012 if(ucm==NULL) { |
|
1013 fprintf(stderr, "ucm error: unable to allocate a UCMFile\n"); |
|
1014 exit(U_MEMORY_ALLOCATION_ERROR); |
|
1015 } |
|
1016 |
|
1017 memset(ucm, 0, sizeof(UCMFile)); |
|
1018 |
|
1019 ucm->base=ucm_openTable(); |
|
1020 ucm->ext=ucm_openTable(); |
|
1021 |
|
1022 ucm->states.stateFlags[0]=MBCS_STATE_FLAG_DIRECT; |
|
1023 ucm->states.conversionType=UCNV_UNSUPPORTED_CONVERTER; |
|
1024 ucm->states.outputType=-1; |
|
1025 ucm->states.minCharLength=ucm->states.maxCharLength=1; |
|
1026 |
|
1027 return ucm; |
|
1028 } |
|
1029 |
|
1030 U_CAPI void U_EXPORT2 |
|
1031 ucm_close(UCMFile *ucm) { |
|
1032 if(ucm!=NULL) { |
|
1033 ucm_closeTable(ucm->base); |
|
1034 ucm_closeTable(ucm->ext); |
|
1035 uprv_free(ucm); |
|
1036 } |
|
1037 } |
|
1038 |
|
1039 U_CAPI int32_t U_EXPORT2 |
|
1040 ucm_mappingType(UCMStates *baseStates, |
|
1041 UCMapping *m, |
|
1042 UChar32 codePoints[UCNV_EXT_MAX_UCHARS], |
|
1043 uint8_t bytes[UCNV_EXT_MAX_BYTES]) { |
|
1044 /* check validity of the bytes and count the characters in them */ |
|
1045 int32_t count=ucm_countChars(baseStates, bytes, m->bLen); |
|
1046 if(count<1) { |
|
1047 /* illegal byte sequence */ |
|
1048 return -1; |
|
1049 } |
|
1050 |
|
1051 /* |
|
1052 * Suitable for an ICU conversion base table means: |
|
1053 * - a 1:1 mapping (1 Unicode code point : 1 byte sequence) |
|
1054 * - precision flag 0..3 |
|
1055 * - SBCS: any 1:1 mapping |
|
1056 * (the table stores additional bits to distinguish mapping types) |
|
1057 * - MBCS: not a |2 SUB mapping for <subchar1> |
|
1058 * - MBCS: not a |1 fallback to 0x00 |
|
1059 * - MBCS: not a multi-byte mapping with leading 0x00 bytes |
|
1060 * |
|
1061 * Further restrictions for fromUnicode tables |
|
1062 * are enforced in makeconv (MBCSOkForBaseFromUnicode()). |
|
1063 * |
|
1064 * All of the MBCS fromUnicode specific tests could be removed from here, |
|
1065 * but the ones above are for unusual mappings, and removing the tests |
|
1066 * from here would change canonucm output which seems gratuitous. |
|
1067 * (Markus Scherer 2006-nov-28) |
|
1068 * |
|
1069 * Exception: All implicit mappings (f<0) that need to be moved |
|
1070 * because of fromUnicode restrictions _must_ be moved here because |
|
1071 * makeconv uses a hack for moving mappings only for the fromUnicode table |
|
1072 * that only works with non-negative values of f. |
|
1073 */ |
|
1074 if( m->uLen==1 && count==1 && m->f<=3 && |
|
1075 (baseStates->maxCharLength==1 || |
|
1076 !((m->f==2 && m->bLen==1) || |
|
1077 (m->f==1 && bytes[0]==0) || |
|
1078 (m->f<=1 && m->bLen>1 && bytes[0]==0))) |
|
1079 ) { |
|
1080 return 0; /* suitable for a base table */ |
|
1081 } else { |
|
1082 return 1; /* needs to go into an extension table */ |
|
1083 } |
|
1084 } |
|
1085 |
|
1086 U_CAPI UBool U_EXPORT2 |
|
1087 ucm_addMappingAuto(UCMFile *ucm, UBool forBase, UCMStates *baseStates, |
|
1088 UCMapping *m, |
|
1089 UChar32 codePoints[UCNV_EXT_MAX_UCHARS], |
|
1090 uint8_t bytes[UCNV_EXT_MAX_BYTES]) { |
|
1091 int32_t type; |
|
1092 |
|
1093 if(m->f==2 && m->uLen>1) { |
|
1094 fprintf(stderr, "ucm error: illegal <subchar1> |2 mapping from multiple code points\n"); |
|
1095 printMapping(m, codePoints, bytes, stderr); |
|
1096 return FALSE; |
|
1097 } |
|
1098 |
|
1099 if(baseStates!=NULL) { |
|
1100 /* check validity of the bytes and count the characters in them */ |
|
1101 type=ucm_mappingType(baseStates, m, codePoints, bytes); |
|
1102 if(type<0) { |
|
1103 /* illegal byte sequence */ |
|
1104 printMapping(m, codePoints, bytes, stderr); |
|
1105 return FALSE; |
|
1106 } |
|
1107 } else { |
|
1108 /* not used - adding a mapping for an extension-only table before its base table is read */ |
|
1109 type=1; |
|
1110 } |
|
1111 |
|
1112 /* |
|
1113 * Add the mapping to the base table if this is requested and suitable. |
|
1114 * Otherwise, add it to the extension table. |
|
1115 */ |
|
1116 if(forBase && type==0) { |
|
1117 ucm_addMapping(ucm->base, m, codePoints, bytes); |
|
1118 } else { |
|
1119 ucm_addMapping(ucm->ext, m, codePoints, bytes); |
|
1120 } |
|
1121 |
|
1122 return TRUE; |
|
1123 } |
|
1124 |
|
1125 U_CAPI UBool U_EXPORT2 |
|
1126 ucm_addMappingFromLine(UCMFile *ucm, const char *line, UBool forBase, UCMStates *baseStates) { |
|
1127 UCMapping m={ 0 }; |
|
1128 UChar32 codePoints[UCNV_EXT_MAX_UCHARS]; |
|
1129 uint8_t bytes[UCNV_EXT_MAX_BYTES]; |
|
1130 |
|
1131 const char *s; |
|
1132 |
|
1133 /* ignore empty and comment lines */ |
|
1134 if(line[0]=='#' || *(s=u_skipWhitespace(line))==0 || *s=='\n' || *s=='\r') { |
|
1135 return TRUE; |
|
1136 } |
|
1137 |
|
1138 return |
|
1139 ucm_parseMappingLine(&m, codePoints, bytes, line) && |
|
1140 ucm_addMappingAuto(ucm, forBase, baseStates, &m, codePoints, bytes); |
|
1141 } |
|
1142 |
|
1143 U_CAPI void U_EXPORT2 |
|
1144 ucm_readTable(UCMFile *ucm, FileStream* convFile, |
|
1145 UBool forBase, UCMStates *baseStates, |
|
1146 UErrorCode *pErrorCode) { |
|
1147 char line[500]; |
|
1148 char *end; |
|
1149 UBool isOK; |
|
1150 |
|
1151 if(U_FAILURE(*pErrorCode)) { |
|
1152 return; |
|
1153 } |
|
1154 |
|
1155 isOK=TRUE; |
|
1156 |
|
1157 for(;;) { |
|
1158 /* read the next line */ |
|
1159 if(!T_FileStream_readLine(convFile, line, sizeof(line))) { |
|
1160 fprintf(stderr, "incomplete charmap section\n"); |
|
1161 isOK=FALSE; |
|
1162 break; |
|
1163 } |
|
1164 |
|
1165 /* remove CR LF */ |
|
1166 end=uprv_strchr(line, 0); |
|
1167 while(line<end && (*(end-1)=='\r' || *(end-1)=='\n')) { |
|
1168 --end; |
|
1169 } |
|
1170 *end=0; |
|
1171 |
|
1172 /* ignore empty and comment lines */ |
|
1173 if(line[0]==0 || line[0]=='#') { |
|
1174 continue; |
|
1175 } |
|
1176 |
|
1177 /* stop at the end of the mapping table */ |
|
1178 if(0==uprv_strcmp(line, "END CHARMAP")) { |
|
1179 break; |
|
1180 } |
|
1181 |
|
1182 isOK&=ucm_addMappingFromLine(ucm, line, forBase, baseStates); |
|
1183 } |
|
1184 |
|
1185 if(!isOK) { |
|
1186 *pErrorCode=U_INVALID_TABLE_FORMAT; |
|
1187 } |
|
1188 } |
|
1189 #endif |