1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/tools/toolutil/ucm.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,1189 @@ 1.4 +/* 1.5 +******************************************************************************* 1.6 +* 1.7 +* Copyright (C) 2003-2013, International Business Machines 1.8 +* Corporation and others. All Rights Reserved. 1.9 +* 1.10 +******************************************************************************* 1.11 +* file name: ucm.c 1.12 +* encoding: US-ASCII 1.13 +* tab size: 8 (not used) 1.14 +* indentation:4 1.15 +* 1.16 +* created on: 2003jun20 1.17 +* created by: Markus W. Scherer 1.18 +* 1.19 +* This file reads a .ucm file, stores its mappings and sorts them. 1.20 +* It implements handling of Unicode conversion mappings from .ucm files 1.21 +* for makeconv, canonucm, rptp2ucm, etc. 1.22 +* 1.23 +* Unicode code point sequences with a length of more than 1, 1.24 +* as well as byte sequences with more than 4 bytes or more than one complete 1.25 +* character sequence are handled to support m:n mappings. 1.26 +*/ 1.27 + 1.28 +#include "unicode/utypes.h" 1.29 +#include "unicode/ustring.h" 1.30 +#include "cstring.h" 1.31 +#include "cmemory.h" 1.32 +#include "filestrm.h" 1.33 +#include "uarrsort.h" 1.34 +#include "ucnvmbcs.h" 1.35 +#include "ucnv_bld.h" 1.36 +#include "ucnv_ext.h" 1.37 +#include "uparse.h" 1.38 +#include "ucm.h" 1.39 +#include <stdio.h> 1.40 + 1.41 +#if !UCONFIG_NO_CONVERSION 1.42 + 1.43 +/* -------------------------------------------------------------------------- */ 1.44 + 1.45 +static void 1.46 +printMapping(UCMapping *m, UChar32 *codePoints, uint8_t *bytes, FILE *f) { 1.47 + int32_t j; 1.48 + 1.49 + for(j=0; j<m->uLen; ++j) { 1.50 + fprintf(f, "<U%04lX>", (long)codePoints[j]); 1.51 + } 1.52 + 1.53 + fputc(' ', f); 1.54 + 1.55 + for(j=0; j<m->bLen; ++j) { 1.56 + fprintf(f, "\\x%02X", bytes[j]); 1.57 + } 1.58 + 1.59 + if(m->f>=0) { 1.60 + fprintf(f, " |%u\n", m->f); 1.61 + } else { 1.62 + fputs("\n", f); 1.63 + } 1.64 +} 1.65 + 1.66 +U_CAPI void U_EXPORT2 1.67 +ucm_printMapping(UCMTable *table, UCMapping *m, FILE *f) { 1.68 + printMapping(m, UCM_GET_CODE_POINTS(table, m), UCM_GET_BYTES(table, m), f); 1.69 +} 1.70 + 1.71 +U_CAPI void U_EXPORT2 1.72 +ucm_printTable(UCMTable *table, FILE *f, UBool byUnicode) { 1.73 + UCMapping *m; 1.74 + int32_t i, length; 1.75 + 1.76 + m=table->mappings; 1.77 + length=table->mappingsLength; 1.78 + if(byUnicode) { 1.79 + for(i=0; i<length; ++m, ++i) { 1.80 + ucm_printMapping(table, m, f); 1.81 + } 1.82 + } else { 1.83 + const int32_t *map=table->reverseMap; 1.84 + for(i=0; i<length; ++i) { 1.85 + ucm_printMapping(table, m+map[i], f); 1.86 + } 1.87 + } 1.88 +} 1.89 + 1.90 +/* mapping comparisons ------------------------------------------------------ */ 1.91 + 1.92 +static int32_t 1.93 +compareUnicode(UCMTable *lTable, const UCMapping *l, 1.94 + UCMTable *rTable, const UCMapping *r) { 1.95 + const UChar32 *lu, *ru; 1.96 + int32_t result, i, length; 1.97 + 1.98 + if(l->uLen==1 && r->uLen==1) { 1.99 + /* compare two single code points */ 1.100 + return l->u-r->u; 1.101 + } 1.102 + 1.103 + /* get pointers to the code point sequences */ 1.104 + lu=UCM_GET_CODE_POINTS(lTable, l); 1.105 + ru=UCM_GET_CODE_POINTS(rTable, r); 1.106 + 1.107 + /* get the minimum length */ 1.108 + if(l->uLen<=r->uLen) { 1.109 + length=l->uLen; 1.110 + } else { 1.111 + length=r->uLen; 1.112 + } 1.113 + 1.114 + /* compare the code points */ 1.115 + for(i=0; i<length; ++i) { 1.116 + result=lu[i]-ru[i]; 1.117 + if(result!=0) { 1.118 + return result; 1.119 + } 1.120 + } 1.121 + 1.122 + /* compare the lengths */ 1.123 + return l->uLen-r->uLen; 1.124 +} 1.125 + 1.126 +static int32_t 1.127 +compareBytes(UCMTable *lTable, const UCMapping *l, 1.128 + UCMTable *rTable, const UCMapping *r, 1.129 + UBool lexical) { 1.130 + const uint8_t *lb, *rb; 1.131 + int32_t result, i, length; 1.132 + 1.133 + /* 1.134 + * A lexical comparison is used for sorting in the builder, to allow 1.135 + * an efficient search for a byte sequence that could be a prefix 1.136 + * of a previously entered byte sequence. 1.137 + * 1.138 + * Comparing by lengths first is for compatibility with old .ucm tools 1.139 + * like canonucm and rptp2ucm. 1.140 + */ 1.141 + if(lexical) { 1.142 + /* get the minimum length and continue */ 1.143 + if(l->bLen<=r->bLen) { 1.144 + length=l->bLen; 1.145 + } else { 1.146 + length=r->bLen; 1.147 + } 1.148 + } else { 1.149 + /* compare lengths first */ 1.150 + result=l->bLen-r->bLen; 1.151 + if(result!=0) { 1.152 + return result; 1.153 + } else { 1.154 + length=l->bLen; 1.155 + } 1.156 + } 1.157 + 1.158 + /* get pointers to the byte sequences */ 1.159 + lb=UCM_GET_BYTES(lTable, l); 1.160 + rb=UCM_GET_BYTES(rTable, r); 1.161 + 1.162 + /* compare the bytes */ 1.163 + for(i=0; i<length; ++i) { 1.164 + result=lb[i]-rb[i]; 1.165 + if(result!=0) { 1.166 + return result; 1.167 + } 1.168 + } 1.169 + 1.170 + /* compare the lengths */ 1.171 + return l->bLen-r->bLen; 1.172 +} 1.173 + 1.174 +/* compare UCMappings for sorting */ 1.175 +static int32_t 1.176 +compareMappings(UCMTable *lTable, const UCMapping *l, 1.177 + UCMTable *rTable, const UCMapping *r, 1.178 + UBool uFirst) { 1.179 + int32_t result; 1.180 + 1.181 + /* choose which side to compare first */ 1.182 + if(uFirst) { 1.183 + /* Unicode then bytes */ 1.184 + result=compareUnicode(lTable, l, rTable, r); 1.185 + if(result==0) { 1.186 + result=compareBytes(lTable, l, rTable, r, FALSE); /* not lexically, like canonucm */ 1.187 + } 1.188 + } else { 1.189 + /* bytes then Unicode */ 1.190 + result=compareBytes(lTable, l, rTable, r, TRUE); /* lexically, for builder */ 1.191 + if(result==0) { 1.192 + result=compareUnicode(lTable, l, rTable, r); 1.193 + } 1.194 + } 1.195 + 1.196 + if(result!=0) { 1.197 + return result; 1.198 + } 1.199 + 1.200 + /* compare the flags */ 1.201 + return l->f-r->f; 1.202 +} 1.203 + 1.204 +/* sorting by Unicode first sorts mappings directly */ 1.205 +static int32_t 1.206 +compareMappingsUnicodeFirst(const void *context, const void *left, const void *right) { 1.207 + return compareMappings( 1.208 + (UCMTable *)context, (const UCMapping *)left, 1.209 + (UCMTable *)context, (const UCMapping *)right, TRUE); 1.210 +} 1.211 + 1.212 +/* sorting by bytes first sorts the reverseMap; use indirection to mappings */ 1.213 +static int32_t 1.214 +compareMappingsBytesFirst(const void *context, const void *left, const void *right) { 1.215 + UCMTable *table=(UCMTable *)context; 1.216 + int32_t l=*(const int32_t *)left, r=*(const int32_t *)right; 1.217 + return compareMappings( 1.218 + table, table->mappings+l, 1.219 + table, table->mappings+r, FALSE); 1.220 +} 1.221 + 1.222 +U_CAPI void U_EXPORT2 1.223 +ucm_sortTable(UCMTable *t) { 1.224 + UErrorCode errorCode; 1.225 + int32_t i; 1.226 + 1.227 + if(t->isSorted) { 1.228 + return; 1.229 + } 1.230 + 1.231 + errorCode=U_ZERO_ERROR; 1.232 + 1.233 + /* 1. sort by Unicode first */ 1.234 + uprv_sortArray(t->mappings, t->mappingsLength, sizeof(UCMapping), 1.235 + compareMappingsUnicodeFirst, t, 1.236 + FALSE, &errorCode); 1.237 + 1.238 + /* build the reverseMap */ 1.239 + if(t->reverseMap==NULL) { 1.240 + /* 1.241 + * allocate mappingsCapacity instead of mappingsLength so that 1.242 + * if mappings are added, the reverseMap need not be 1.243 + * reallocated each time 1.244 + * (see ucm_moveMappings() and ucm_addMapping()) 1.245 + */ 1.246 + t->reverseMap=(int32_t *)uprv_malloc(t->mappingsCapacity*sizeof(int32_t)); 1.247 + if(t->reverseMap==NULL) { 1.248 + fprintf(stderr, "ucm error: unable to allocate reverseMap\n"); 1.249 + exit(U_MEMORY_ALLOCATION_ERROR); 1.250 + } 1.251 + } 1.252 + for(i=0; i<t->mappingsLength; ++i) { 1.253 + t->reverseMap[i]=i; 1.254 + } 1.255 + 1.256 + /* 2. sort reverseMap by mappings bytes first */ 1.257 + uprv_sortArray(t->reverseMap, t->mappingsLength, sizeof(int32_t), 1.258 + compareMappingsBytesFirst, t, 1.259 + FALSE, &errorCode); 1.260 + 1.261 + if(U_FAILURE(errorCode)) { 1.262 + fprintf(stderr, "ucm error: sortTable()/uprv_sortArray() fails - %s\n", 1.263 + u_errorName(errorCode)); 1.264 + exit(errorCode); 1.265 + } 1.266 + 1.267 + t->isSorted=TRUE; 1.268 +} 1.269 + 1.270 +/* 1.271 + * remove mappings with their move flag set from the base table 1.272 + * and move some of them (with UCM_MOVE_TO_EXT) to the extension table 1.273 + */ 1.274 +U_CAPI void U_EXPORT2 1.275 +ucm_moveMappings(UCMTable *base, UCMTable *ext) { 1.276 + UCMapping *mb, *mbLimit; 1.277 + int8_t flag; 1.278 + 1.279 + mb=base->mappings; 1.280 + mbLimit=mb+base->mappingsLength; 1.281 + 1.282 + while(mb<mbLimit) { 1.283 + flag=mb->moveFlag; 1.284 + if(flag!=0) { 1.285 + /* reset the move flag */ 1.286 + mb->moveFlag=0; 1.287 + 1.288 + if(ext!=NULL && (flag&UCM_MOVE_TO_EXT)) { 1.289 + /* add the mapping to the extension table */ 1.290 + ucm_addMapping(ext, mb, UCM_GET_CODE_POINTS(base, mb), UCM_GET_BYTES(base, mb)); 1.291 + } 1.292 + 1.293 + /* remove this mapping: move the last base mapping down and overwrite the current one */ 1.294 + if(mb<(mbLimit-1)) { 1.295 + uprv_memcpy(mb, mbLimit-1, sizeof(UCMapping)); 1.296 + } 1.297 + --mbLimit; 1.298 + --base->mappingsLength; 1.299 + base->isSorted=FALSE; 1.300 + } else { 1.301 + ++mb; 1.302 + } 1.303 + } 1.304 +} 1.305 + 1.306 +enum { 1.307 + NEEDS_MOVE=1, 1.308 + HAS_ERRORS=2 1.309 +}; 1.310 + 1.311 +static uint8_t 1.312 +checkBaseExtUnicode(UCMStates *baseStates, UCMTable *base, UCMTable *ext, 1.313 + UBool moveToExt, UBool intersectBase) { 1.314 + UCMapping *mb, *me, *mbLimit, *meLimit; 1.315 + int32_t cmp; 1.316 + uint8_t result; 1.317 + 1.318 + mb=base->mappings; 1.319 + mbLimit=mb+base->mappingsLength; 1.320 + 1.321 + me=ext->mappings; 1.322 + meLimit=me+ext->mappingsLength; 1.323 + 1.324 + result=0; 1.325 + 1.326 + for(;;) { 1.327 + /* skip irrelevant mappings on both sides */ 1.328 + for(;;) { 1.329 + if(mb==mbLimit) { 1.330 + return result; 1.331 + } 1.332 + 1.333 + if((0<=mb->f && mb->f<=2) || mb->f==4) { 1.334 + break; 1.335 + } 1.336 + 1.337 + ++mb; 1.338 + } 1.339 + 1.340 + for(;;) { 1.341 + if(me==meLimit) { 1.342 + return result; 1.343 + } 1.344 + 1.345 + if((0<=me->f && me->f<=2) || me->f==4) { 1.346 + break; 1.347 + } 1.348 + 1.349 + ++me; 1.350 + } 1.351 + 1.352 + /* compare the base and extension mappings */ 1.353 + cmp=compareUnicode(base, mb, ext, me); 1.354 + if(cmp<0) { 1.355 + if(intersectBase && (intersectBase!=2 || mb->bLen>1)) { 1.356 + /* 1.357 + * mapping in base but not in ext, move it 1.358 + * 1.359 + * if ext is DBCS, move DBCS mappings here 1.360 + * and check SBCS ones for Unicode prefix below 1.361 + */ 1.362 + mb->moveFlag|=UCM_MOVE_TO_EXT; 1.363 + result|=NEEDS_MOVE; 1.364 + 1.365 + /* does mb map from an input sequence that is a prefix of me's? */ 1.366 + } else if( mb->uLen<me->uLen && 1.367 + 0==uprv_memcmp(UCM_GET_CODE_POINTS(base, mb), UCM_GET_CODE_POINTS(ext, me), 4*mb->uLen) 1.368 + ) { 1.369 + if(moveToExt) { 1.370 + /* mark this mapping to be moved to the extension table */ 1.371 + mb->moveFlag|=UCM_MOVE_TO_EXT; 1.372 + result|=NEEDS_MOVE; 1.373 + } else { 1.374 + fprintf(stderr, 1.375 + "ucm error: the base table contains a mapping whose input sequence\n" 1.376 + " is a prefix of the input sequence of an extension mapping\n"); 1.377 + ucm_printMapping(base, mb, stderr); 1.378 + ucm_printMapping(ext, me, stderr); 1.379 + result|=HAS_ERRORS; 1.380 + } 1.381 + } 1.382 + 1.383 + ++mb; 1.384 + } else if(cmp==0) { 1.385 + /* 1.386 + * same output: remove the extension mapping, 1.387 + * otherwise treat as an error 1.388 + */ 1.389 + if( mb->f==me->f && mb->bLen==me->bLen && 1.390 + 0==uprv_memcmp(UCM_GET_BYTES(base, mb), UCM_GET_BYTES(ext, me), mb->bLen) 1.391 + ) { 1.392 + me->moveFlag|=UCM_REMOVE_MAPPING; 1.393 + result|=NEEDS_MOVE; 1.394 + } else if(intersectBase) { 1.395 + /* mapping in base but not in ext, move it */ 1.396 + mb->moveFlag|=UCM_MOVE_TO_EXT; 1.397 + result|=NEEDS_MOVE; 1.398 + } else { 1.399 + fprintf(stderr, 1.400 + "ucm error: the base table contains a mapping whose input sequence\n" 1.401 + " is the same as the input sequence of an extension mapping\n" 1.402 + " but it maps differently\n"); 1.403 + ucm_printMapping(base, mb, stderr); 1.404 + ucm_printMapping(ext, me, stderr); 1.405 + result|=HAS_ERRORS; 1.406 + } 1.407 + 1.408 + ++mb; 1.409 + } else /* cmp>0 */ { 1.410 + ++me; 1.411 + } 1.412 + } 1.413 +} 1.414 + 1.415 +static uint8_t 1.416 +checkBaseExtBytes(UCMStates *baseStates, UCMTable *base, UCMTable *ext, 1.417 + UBool moveToExt, UBool intersectBase) { 1.418 + UCMapping *mb, *me; 1.419 + int32_t *baseMap, *extMap; 1.420 + int32_t b, e, bLimit, eLimit, cmp; 1.421 + uint8_t result; 1.422 + UBool isSISO; 1.423 + 1.424 + baseMap=base->reverseMap; 1.425 + extMap=ext->reverseMap; 1.426 + 1.427 + b=e=0; 1.428 + bLimit=base->mappingsLength; 1.429 + eLimit=ext->mappingsLength; 1.430 + 1.431 + result=0; 1.432 + 1.433 + isSISO=(UBool)(baseStates->outputType==MBCS_OUTPUT_2_SISO); 1.434 + 1.435 + for(;;) { 1.436 + /* skip irrelevant mappings on both sides */ 1.437 + for(;; ++b) { 1.438 + if(b==bLimit) { 1.439 + return result; 1.440 + } 1.441 + mb=base->mappings+baseMap[b]; 1.442 + 1.443 + if(intersectBase==2 && mb->bLen==1) { 1.444 + /* 1.445 + * comparing a base against a DBCS extension: 1.446 + * leave SBCS base mappings alone 1.447 + */ 1.448 + continue; 1.449 + } 1.450 + 1.451 + if(mb->f==0 || mb->f==3) { 1.452 + break; 1.453 + } 1.454 + } 1.455 + 1.456 + for(;;) { 1.457 + if(e==eLimit) { 1.458 + return result; 1.459 + } 1.460 + me=ext->mappings+extMap[e]; 1.461 + 1.462 + if(me->f==0 || me->f==3) { 1.463 + break; 1.464 + } 1.465 + 1.466 + ++e; 1.467 + } 1.468 + 1.469 + /* compare the base and extension mappings */ 1.470 + cmp=compareBytes(base, mb, ext, me, TRUE); 1.471 + if(cmp<0) { 1.472 + if(intersectBase) { 1.473 + /* mapping in base but not in ext, move it */ 1.474 + mb->moveFlag|=UCM_MOVE_TO_EXT; 1.475 + result|=NEEDS_MOVE; 1.476 + 1.477 + /* 1.478 + * does mb map from an input sequence that is a prefix of me's? 1.479 + * for SI/SO tables, a single byte is never a prefix because it 1.480 + * occurs in a separate single-byte state 1.481 + */ 1.482 + } else if( mb->bLen<me->bLen && 1.483 + (!isSISO || mb->bLen>1) && 1.484 + 0==uprv_memcmp(UCM_GET_BYTES(base, mb), UCM_GET_BYTES(ext, me), mb->bLen) 1.485 + ) { 1.486 + if(moveToExt) { 1.487 + /* mark this mapping to be moved to the extension table */ 1.488 + mb->moveFlag|=UCM_MOVE_TO_EXT; 1.489 + result|=NEEDS_MOVE; 1.490 + } else { 1.491 + fprintf(stderr, 1.492 + "ucm error: the base table contains a mapping whose input sequence\n" 1.493 + " is a prefix of the input sequence of an extension mapping\n"); 1.494 + ucm_printMapping(base, mb, stderr); 1.495 + ucm_printMapping(ext, me, stderr); 1.496 + result|=HAS_ERRORS; 1.497 + } 1.498 + } 1.499 + 1.500 + ++b; 1.501 + } else if(cmp==0) { 1.502 + /* 1.503 + * same output: remove the extension mapping, 1.504 + * otherwise treat as an error 1.505 + */ 1.506 + if( mb->f==me->f && mb->uLen==me->uLen && 1.507 + 0==uprv_memcmp(UCM_GET_CODE_POINTS(base, mb), UCM_GET_CODE_POINTS(ext, me), 4*mb->uLen) 1.508 + ) { 1.509 + me->moveFlag|=UCM_REMOVE_MAPPING; 1.510 + result|=NEEDS_MOVE; 1.511 + } else if(intersectBase) { 1.512 + /* mapping in base but not in ext, move it */ 1.513 + mb->moveFlag|=UCM_MOVE_TO_EXT; 1.514 + result|=NEEDS_MOVE; 1.515 + } else { 1.516 + fprintf(stderr, 1.517 + "ucm error: the base table contains a mapping whose input sequence\n" 1.518 + " is the same as the input sequence of an extension mapping\n" 1.519 + " but it maps differently\n"); 1.520 + ucm_printMapping(base, mb, stderr); 1.521 + ucm_printMapping(ext, me, stderr); 1.522 + result|=HAS_ERRORS; 1.523 + } 1.524 + 1.525 + ++b; 1.526 + } else /* cmp>0 */ { 1.527 + ++e; 1.528 + } 1.529 + } 1.530 +} 1.531 + 1.532 +U_CAPI UBool U_EXPORT2 1.533 +ucm_checkValidity(UCMTable *table, UCMStates *baseStates) { 1.534 + UCMapping *m, *mLimit; 1.535 + int32_t count; 1.536 + UBool isOK; 1.537 + 1.538 + m=table->mappings; 1.539 + mLimit=m+table->mappingsLength; 1.540 + isOK=TRUE; 1.541 + 1.542 + while(m<mLimit) { 1.543 + count=ucm_countChars(baseStates, UCM_GET_BYTES(table, m), m->bLen); 1.544 + if(count<1) { 1.545 + ucm_printMapping(table, m, stderr); 1.546 + isOK=FALSE; 1.547 + } 1.548 + ++m; 1.549 + } 1.550 + 1.551 + return isOK; 1.552 +} 1.553 + 1.554 +U_CAPI UBool U_EXPORT2 1.555 +ucm_checkBaseExt(UCMStates *baseStates, 1.556 + UCMTable *base, UCMTable *ext, UCMTable *moveTarget, 1.557 + UBool intersectBase) { 1.558 + uint8_t result; 1.559 + 1.560 + /* if we have an extension table, we must always use precision flags */ 1.561 + if(base->flagsType&UCM_FLAGS_IMPLICIT) { 1.562 + fprintf(stderr, "ucm error: the base table contains mappings without precision flags\n"); 1.563 + return FALSE; 1.564 + } 1.565 + if(ext->flagsType&UCM_FLAGS_IMPLICIT) { 1.566 + fprintf(stderr, "ucm error: extension table contains mappings without precision flags\n"); 1.567 + return FALSE; 1.568 + } 1.569 + 1.570 + /* checking requires both tables to be sorted */ 1.571 + ucm_sortTable(base); 1.572 + ucm_sortTable(ext); 1.573 + 1.574 + /* check */ 1.575 + result= 1.576 + checkBaseExtUnicode(baseStates, base, ext, (UBool)(moveTarget!=NULL), intersectBase)| 1.577 + checkBaseExtBytes(baseStates, base, ext, (UBool)(moveTarget!=NULL), intersectBase); 1.578 + 1.579 + if(result&HAS_ERRORS) { 1.580 + return FALSE; 1.581 + } 1.582 + 1.583 + if(result&NEEDS_MOVE) { 1.584 + ucm_moveMappings(ext, NULL); 1.585 + ucm_moveMappings(base, moveTarget); 1.586 + ucm_sortTable(base); 1.587 + ucm_sortTable(ext); 1.588 + if(moveTarget!=NULL) { 1.589 + ucm_sortTable(moveTarget); 1.590 + } 1.591 + } 1.592 + 1.593 + return TRUE; 1.594 +} 1.595 + 1.596 +/* merge tables for rptp2ucm ------------------------------------------------ */ 1.597 + 1.598 +U_CAPI void U_EXPORT2 1.599 +ucm_mergeTables(UCMTable *fromUTable, UCMTable *toUTable, 1.600 + const uint8_t *subchar, int32_t subcharLength, 1.601 + uint8_t subchar1) { 1.602 + UCMapping *fromUMapping, *toUMapping; 1.603 + int32_t fromUIndex, toUIndex, fromUTop, toUTop, cmp; 1.604 + 1.605 + ucm_sortTable(fromUTable); 1.606 + ucm_sortTable(toUTable); 1.607 + 1.608 + fromUMapping=fromUTable->mappings; 1.609 + toUMapping=toUTable->mappings; 1.610 + 1.611 + fromUTop=fromUTable->mappingsLength; 1.612 + toUTop=toUTable->mappingsLength; 1.613 + 1.614 + fromUIndex=toUIndex=0; 1.615 + 1.616 + while(fromUIndex<fromUTop && toUIndex<toUTop) { 1.617 + cmp=compareMappings(fromUTable, fromUMapping, toUTable, toUMapping, TRUE); 1.618 + if(cmp==0) { 1.619 + /* equal: roundtrip, nothing to do (flags are initially 0) */ 1.620 + ++fromUMapping; 1.621 + ++toUMapping; 1.622 + 1.623 + ++fromUIndex; 1.624 + ++toUIndex; 1.625 + } else if(cmp<0) { 1.626 + /* 1.627 + * the fromU mapping does not have a toU counterpart: 1.628 + * fallback Unicode->codepage 1.629 + */ 1.630 + if( (fromUMapping->bLen==subcharLength && 1.631 + 0==uprv_memcmp(UCM_GET_BYTES(fromUTable, fromUMapping), subchar, subcharLength)) || 1.632 + (subchar1!=0 && fromUMapping->bLen==1 && fromUMapping->b.bytes[0]==subchar1) 1.633 + ) { 1.634 + fromUMapping->f=2; /* SUB mapping */ 1.635 + } else { 1.636 + fromUMapping->f=1; /* normal fallback */ 1.637 + } 1.638 + 1.639 + ++fromUMapping; 1.640 + ++fromUIndex; 1.641 + } else { 1.642 + /* 1.643 + * the toU mapping does not have a fromU counterpart: 1.644 + * (reverse) fallback codepage->Unicode, copy it to the fromU table 1.645 + */ 1.646 + 1.647 + /* ignore reverse fallbacks to Unicode SUB */ 1.648 + if(!(toUMapping->uLen==1 && (toUMapping->u==0xfffd || toUMapping->u==0x1a))) { 1.649 + toUMapping->f=3; /* reverse fallback */ 1.650 + ucm_addMapping(fromUTable, toUMapping, UCM_GET_CODE_POINTS(toUTable, toUMapping), UCM_GET_BYTES(toUTable, toUMapping)); 1.651 + 1.652 + /* the table may have been reallocated */ 1.653 + fromUMapping=fromUTable->mappings+fromUIndex; 1.654 + } 1.655 + 1.656 + ++toUMapping; 1.657 + ++toUIndex; 1.658 + } 1.659 + } 1.660 + 1.661 + /* either one or both tables are exhausted */ 1.662 + while(fromUIndex<fromUTop) { 1.663 + /* leftover fromU mappings are fallbacks */ 1.664 + if( (fromUMapping->bLen==subcharLength && 1.665 + 0==uprv_memcmp(UCM_GET_BYTES(fromUTable, fromUMapping), subchar, subcharLength)) || 1.666 + (subchar1!=0 && fromUMapping->bLen==1 && fromUMapping->b.bytes[0]==subchar1) 1.667 + ) { 1.668 + fromUMapping->f=2; /* SUB mapping */ 1.669 + } else { 1.670 + fromUMapping->f=1; /* normal fallback */ 1.671 + } 1.672 + 1.673 + ++fromUMapping; 1.674 + ++fromUIndex; 1.675 + } 1.676 + 1.677 + while(toUIndex<toUTop) { 1.678 + /* leftover toU mappings are reverse fallbacks */ 1.679 + 1.680 + /* ignore reverse fallbacks to Unicode SUB */ 1.681 + if(!(toUMapping->uLen==1 && (toUMapping->u==0xfffd || toUMapping->u==0x1a))) { 1.682 + toUMapping->f=3; /* reverse fallback */ 1.683 + ucm_addMapping(fromUTable, toUMapping, UCM_GET_CODE_POINTS(toUTable, toUMapping), UCM_GET_BYTES(toUTable, toUMapping)); 1.684 + } 1.685 + 1.686 + ++toUMapping; 1.687 + ++toUIndex; 1.688 + } 1.689 + 1.690 + fromUTable->isSorted=FALSE; 1.691 +} 1.692 + 1.693 +/* separate extension mappings out of base table for rptp2ucm --------------- */ 1.694 + 1.695 +U_CAPI UBool U_EXPORT2 1.696 +ucm_separateMappings(UCMFile *ucm, UBool isSISO) { 1.697 + UCMTable *table; 1.698 + UCMapping *m, *mLimit; 1.699 + int32_t type; 1.700 + UBool needsMove, isOK; 1.701 + 1.702 + table=ucm->base; 1.703 + m=table->mappings; 1.704 + mLimit=m+table->mappingsLength; 1.705 + 1.706 + needsMove=FALSE; 1.707 + isOK=TRUE; 1.708 + 1.709 + for(; m<mLimit; ++m) { 1.710 + if(isSISO && m->bLen==1 && (m->b.bytes[0]==0xe || m->b.bytes[0]==0xf)) { 1.711 + fprintf(stderr, "warning: removing illegal mapping from an SI/SO-stateful table\n"); 1.712 + ucm_printMapping(table, m, stderr); 1.713 + m->moveFlag|=UCM_REMOVE_MAPPING; 1.714 + needsMove=TRUE; 1.715 + continue; 1.716 + } 1.717 + 1.718 + type=ucm_mappingType( 1.719 + &ucm->states, m, 1.720 + UCM_GET_CODE_POINTS(table, m), UCM_GET_BYTES(table, m)); 1.721 + if(type<0) { 1.722 + /* illegal byte sequence */ 1.723 + printMapping(m, UCM_GET_CODE_POINTS(table, m), UCM_GET_BYTES(table, m), stderr); 1.724 + isOK=FALSE; 1.725 + } else if(type>0) { 1.726 + m->moveFlag|=UCM_MOVE_TO_EXT; 1.727 + needsMove=TRUE; 1.728 + } 1.729 + } 1.730 + 1.731 + if(!isOK) { 1.732 + return FALSE; 1.733 + } 1.734 + if(needsMove) { 1.735 + ucm_moveMappings(ucm->base, ucm->ext); 1.736 + return ucm_checkBaseExt(&ucm->states, ucm->base, ucm->ext, ucm->ext, FALSE); 1.737 + } else { 1.738 + ucm_sortTable(ucm->base); 1.739 + return TRUE; 1.740 + } 1.741 +} 1.742 + 1.743 +/* ucm parser --------------------------------------------------------------- */ 1.744 + 1.745 +U_CAPI int8_t U_EXPORT2 1.746 +ucm_parseBytes(uint8_t bytes[UCNV_EXT_MAX_BYTES], const char *line, const char **ps) { 1.747 + const char *s=*ps; 1.748 + char *end; 1.749 + uint8_t byte; 1.750 + int8_t bLen; 1.751 + 1.752 + bLen=0; 1.753 + for(;;) { 1.754 + /* skip an optional plus sign */ 1.755 + if(bLen>0 && *s=='+') { 1.756 + ++s; 1.757 + } 1.758 + if(*s!='\\') { 1.759 + break; 1.760 + } 1.761 + 1.762 + if( s[1]!='x' || 1.763 + (byte=(uint8_t)uprv_strtoul(s+2, &end, 16), end)!=s+4 1.764 + ) { 1.765 + fprintf(stderr, "ucm error: byte must be formatted as \\xXX (2 hex digits) - \"%s\"\n", line); 1.766 + return -1; 1.767 + } 1.768 + 1.769 + if(bLen==UCNV_EXT_MAX_BYTES) { 1.770 + fprintf(stderr, "ucm error: too many bytes on \"%s\"\n", line); 1.771 + return -1; 1.772 + } 1.773 + bytes[bLen++]=byte; 1.774 + s=end; 1.775 + } 1.776 + 1.777 + *ps=s; 1.778 + return bLen; 1.779 +} 1.780 + 1.781 +/* parse a mapping line; must not be empty */ 1.782 +U_CAPI UBool U_EXPORT2 1.783 +ucm_parseMappingLine(UCMapping *m, 1.784 + UChar32 codePoints[UCNV_EXT_MAX_UCHARS], 1.785 + uint8_t bytes[UCNV_EXT_MAX_BYTES], 1.786 + const char *line) { 1.787 + const char *s; 1.788 + char *end; 1.789 + UChar32 cp; 1.790 + int32_t u16Length; 1.791 + int8_t uLen, bLen, f; 1.792 + 1.793 + s=line; 1.794 + uLen=bLen=0; 1.795 + 1.796 + /* parse code points */ 1.797 + for(;;) { 1.798 + /* skip an optional plus sign */ 1.799 + if(uLen>0 && *s=='+') { 1.800 + ++s; 1.801 + } 1.802 + if(*s!='<') { 1.803 + break; 1.804 + } 1.805 + 1.806 + if( s[1]!='U' || 1.807 + (cp=(UChar32)uprv_strtoul(s+2, &end, 16), end)==s+2 || 1.808 + *end!='>' 1.809 + ) { 1.810 + fprintf(stderr, "ucm error: Unicode code point must be formatted as <UXXXX> (1..6 hex digits) - \"%s\"\n", line); 1.811 + return FALSE; 1.812 + } 1.813 + if((uint32_t)cp>0x10ffff || U_IS_SURROGATE(cp)) { 1.814 + fprintf(stderr, "ucm error: Unicode code point must be 0..d7ff or e000..10ffff - \"%s\"\n", line); 1.815 + return FALSE; 1.816 + } 1.817 + 1.818 + if(uLen==UCNV_EXT_MAX_UCHARS) { 1.819 + fprintf(stderr, "ucm error: too many code points on \"%s\"\n", line); 1.820 + return FALSE; 1.821 + } 1.822 + codePoints[uLen++]=cp; 1.823 + s=end+1; 1.824 + } 1.825 + 1.826 + if(uLen==0) { 1.827 + fprintf(stderr, "ucm error: no Unicode code points on \"%s\"\n", line); 1.828 + return FALSE; 1.829 + } else if(uLen==1) { 1.830 + m->u=codePoints[0]; 1.831 + } else { 1.832 + UErrorCode errorCode=U_ZERO_ERROR; 1.833 + u_strFromUTF32(NULL, 0, &u16Length, codePoints, uLen, &errorCode); 1.834 + if( (U_FAILURE(errorCode) && errorCode!=U_BUFFER_OVERFLOW_ERROR) || 1.835 + u16Length>UCNV_EXT_MAX_UCHARS 1.836 + ) { 1.837 + fprintf(stderr, "ucm error: too many UChars on \"%s\"\n", line); 1.838 + return FALSE; 1.839 + } 1.840 + } 1.841 + 1.842 + s=u_skipWhitespace(s); 1.843 + 1.844 + /* parse bytes */ 1.845 + bLen=ucm_parseBytes(bytes, line, &s); 1.846 + 1.847 + if(bLen<0) { 1.848 + return FALSE; 1.849 + } else if(bLen==0) { 1.850 + fprintf(stderr, "ucm error: no bytes on \"%s\"\n", line); 1.851 + return FALSE; 1.852 + } else if(bLen<=4) { 1.853 + uprv_memcpy(m->b.bytes, bytes, bLen); 1.854 + } 1.855 + 1.856 + /* skip everything until the fallback indicator, even the start of a comment */ 1.857 + for(;;) { 1.858 + if(*s==0) { 1.859 + f=-1; /* no fallback indicator */ 1.860 + break; 1.861 + } else if(*s=='|') { 1.862 + f=(int8_t)(s[1]-'0'); 1.863 + if((uint8_t)f>4) { 1.864 + fprintf(stderr, "ucm error: fallback indicator must be |0..|4 - \"%s\"\n", line); 1.865 + return FALSE; 1.866 + } 1.867 + break; 1.868 + } 1.869 + ++s; 1.870 + } 1.871 + 1.872 + m->uLen=uLen; 1.873 + m->bLen=bLen; 1.874 + m->f=f; 1.875 + return TRUE; 1.876 +} 1.877 + 1.878 +/* general APIs ------------------------------------------------------------- */ 1.879 + 1.880 +U_CAPI UCMTable * U_EXPORT2 1.881 +ucm_openTable() { 1.882 + UCMTable *table=(UCMTable *)uprv_malloc(sizeof(UCMTable)); 1.883 + if(table==NULL) { 1.884 + fprintf(stderr, "ucm error: unable to allocate a UCMTable\n"); 1.885 + exit(U_MEMORY_ALLOCATION_ERROR); 1.886 + } 1.887 + 1.888 + memset(table, 0, sizeof(UCMTable)); 1.889 + return table; 1.890 +} 1.891 + 1.892 +U_CAPI void U_EXPORT2 1.893 +ucm_closeTable(UCMTable *table) { 1.894 + if(table!=NULL) { 1.895 + uprv_free(table->mappings); 1.896 + uprv_free(table->codePoints); 1.897 + uprv_free(table->bytes); 1.898 + uprv_free(table->reverseMap); 1.899 + uprv_free(table); 1.900 + } 1.901 +} 1.902 + 1.903 +U_CAPI void U_EXPORT2 1.904 +ucm_resetTable(UCMTable *table) { 1.905 + if(table!=NULL) { 1.906 + table->mappingsLength=0; 1.907 + table->flagsType=0; 1.908 + table->unicodeMask=0; 1.909 + table->bytesLength=table->codePointsLength=0; 1.910 + table->isSorted=FALSE; 1.911 + } 1.912 +} 1.913 + 1.914 +U_CAPI void U_EXPORT2 1.915 +ucm_addMapping(UCMTable *table, 1.916 + UCMapping *m, 1.917 + UChar32 codePoints[UCNV_EXT_MAX_UCHARS], 1.918 + uint8_t bytes[UCNV_EXT_MAX_BYTES]) { 1.919 + UCMapping *tm; 1.920 + UChar32 c; 1.921 + int32_t idx; 1.922 + 1.923 + if(table->mappingsLength>=table->mappingsCapacity) { 1.924 + /* make the mappings array larger */ 1.925 + if(table->mappingsCapacity==0) { 1.926 + table->mappingsCapacity=1000; 1.927 + } else { 1.928 + table->mappingsCapacity*=10; 1.929 + } 1.930 + table->mappings=(UCMapping *)uprv_realloc(table->mappings, 1.931 + table->mappingsCapacity*sizeof(UCMapping)); 1.932 + if(table->mappings==NULL) { 1.933 + fprintf(stderr, "ucm error: unable to allocate %d UCMappings\n", 1.934 + (int)table->mappingsCapacity); 1.935 + exit(U_MEMORY_ALLOCATION_ERROR); 1.936 + } 1.937 + 1.938 + if(table->reverseMap!=NULL) { 1.939 + /* the reverseMap must be reallocated in a new sort */ 1.940 + uprv_free(table->reverseMap); 1.941 + table->reverseMap=NULL; 1.942 + } 1.943 + } 1.944 + 1.945 + if(m->uLen>1 && table->codePointsCapacity==0) { 1.946 + table->codePointsCapacity=10000; 1.947 + table->codePoints=(UChar32 *)uprv_malloc(table->codePointsCapacity*4); 1.948 + if(table->codePoints==NULL) { 1.949 + fprintf(stderr, "ucm error: unable to allocate %d UChar32s\n", 1.950 + (int)table->codePointsCapacity); 1.951 + exit(U_MEMORY_ALLOCATION_ERROR); 1.952 + } 1.953 + } 1.954 + 1.955 + if(m->bLen>4 && table->bytesCapacity==0) { 1.956 + table->bytesCapacity=10000; 1.957 + table->bytes=(uint8_t *)uprv_malloc(table->bytesCapacity); 1.958 + if(table->bytes==NULL) { 1.959 + fprintf(stderr, "ucm error: unable to allocate %d bytes\n", 1.960 + (int)table->bytesCapacity); 1.961 + exit(U_MEMORY_ALLOCATION_ERROR); 1.962 + } 1.963 + } 1.964 + 1.965 + if(m->uLen>1) { 1.966 + idx=table->codePointsLength; 1.967 + table->codePointsLength+=m->uLen; 1.968 + if(table->codePointsLength>table->codePointsCapacity) { 1.969 + fprintf(stderr, "ucm error: too many code points in multiple-code point mappings\n"); 1.970 + exit(U_MEMORY_ALLOCATION_ERROR); 1.971 + } 1.972 + 1.973 + uprv_memcpy(table->codePoints+idx, codePoints, m->uLen*4); 1.974 + m->u=idx; 1.975 + } 1.976 + 1.977 + if(m->bLen>4) { 1.978 + idx=table->bytesLength; 1.979 + table->bytesLength+=m->bLen; 1.980 + if(table->bytesLength>table->bytesCapacity) { 1.981 + fprintf(stderr, "ucm error: too many bytes in mappings with >4 charset bytes\n"); 1.982 + exit(U_MEMORY_ALLOCATION_ERROR); 1.983 + } 1.984 + 1.985 + uprv_memcpy(table->bytes+idx, bytes, m->bLen); 1.986 + m->b.idx=idx; 1.987 + } 1.988 + 1.989 + /* set unicodeMask */ 1.990 + for(idx=0; idx<m->uLen; ++idx) { 1.991 + c=codePoints[idx]; 1.992 + if(c>=0x10000) { 1.993 + table->unicodeMask|=UCNV_HAS_SUPPLEMENTARY; /* there are supplementary code points */ 1.994 + } else if(U_IS_SURROGATE(c)) { 1.995 + table->unicodeMask|=UCNV_HAS_SURROGATES; /* there are surrogate code points */ 1.996 + } 1.997 + } 1.998 + 1.999 + /* set flagsType */ 1.1000 + if(m->f<0) { 1.1001 + table->flagsType|=UCM_FLAGS_IMPLICIT; 1.1002 + } else { 1.1003 + table->flagsType|=UCM_FLAGS_EXPLICIT; 1.1004 + } 1.1005 + 1.1006 + tm=table->mappings+table->mappingsLength++; 1.1007 + uprv_memcpy(tm, m, sizeof(UCMapping)); 1.1008 + 1.1009 + table->isSorted=FALSE; 1.1010 +} 1.1011 + 1.1012 +U_CAPI UCMFile * U_EXPORT2 1.1013 +ucm_open() { 1.1014 + UCMFile *ucm=(UCMFile *)uprv_malloc(sizeof(UCMFile)); 1.1015 + if(ucm==NULL) { 1.1016 + fprintf(stderr, "ucm error: unable to allocate a UCMFile\n"); 1.1017 + exit(U_MEMORY_ALLOCATION_ERROR); 1.1018 + } 1.1019 + 1.1020 + memset(ucm, 0, sizeof(UCMFile)); 1.1021 + 1.1022 + ucm->base=ucm_openTable(); 1.1023 + ucm->ext=ucm_openTable(); 1.1024 + 1.1025 + ucm->states.stateFlags[0]=MBCS_STATE_FLAG_DIRECT; 1.1026 + ucm->states.conversionType=UCNV_UNSUPPORTED_CONVERTER; 1.1027 + ucm->states.outputType=-1; 1.1028 + ucm->states.minCharLength=ucm->states.maxCharLength=1; 1.1029 + 1.1030 + return ucm; 1.1031 +} 1.1032 + 1.1033 +U_CAPI void U_EXPORT2 1.1034 +ucm_close(UCMFile *ucm) { 1.1035 + if(ucm!=NULL) { 1.1036 + ucm_closeTable(ucm->base); 1.1037 + ucm_closeTable(ucm->ext); 1.1038 + uprv_free(ucm); 1.1039 + } 1.1040 +} 1.1041 + 1.1042 +U_CAPI int32_t U_EXPORT2 1.1043 +ucm_mappingType(UCMStates *baseStates, 1.1044 + UCMapping *m, 1.1045 + UChar32 codePoints[UCNV_EXT_MAX_UCHARS], 1.1046 + uint8_t bytes[UCNV_EXT_MAX_BYTES]) { 1.1047 + /* check validity of the bytes and count the characters in them */ 1.1048 + int32_t count=ucm_countChars(baseStates, bytes, m->bLen); 1.1049 + if(count<1) { 1.1050 + /* illegal byte sequence */ 1.1051 + return -1; 1.1052 + } 1.1053 + 1.1054 + /* 1.1055 + * Suitable for an ICU conversion base table means: 1.1056 + * - a 1:1 mapping (1 Unicode code point : 1 byte sequence) 1.1057 + * - precision flag 0..3 1.1058 + * - SBCS: any 1:1 mapping 1.1059 + * (the table stores additional bits to distinguish mapping types) 1.1060 + * - MBCS: not a |2 SUB mapping for <subchar1> 1.1061 + * - MBCS: not a |1 fallback to 0x00 1.1062 + * - MBCS: not a multi-byte mapping with leading 0x00 bytes 1.1063 + * 1.1064 + * Further restrictions for fromUnicode tables 1.1065 + * are enforced in makeconv (MBCSOkForBaseFromUnicode()). 1.1066 + * 1.1067 + * All of the MBCS fromUnicode specific tests could be removed from here, 1.1068 + * but the ones above are for unusual mappings, and removing the tests 1.1069 + * from here would change canonucm output which seems gratuitous. 1.1070 + * (Markus Scherer 2006-nov-28) 1.1071 + * 1.1072 + * Exception: All implicit mappings (f<0) that need to be moved 1.1073 + * because of fromUnicode restrictions _must_ be moved here because 1.1074 + * makeconv uses a hack for moving mappings only for the fromUnicode table 1.1075 + * that only works with non-negative values of f. 1.1076 + */ 1.1077 + if( m->uLen==1 && count==1 && m->f<=3 && 1.1078 + (baseStates->maxCharLength==1 || 1.1079 + !((m->f==2 && m->bLen==1) || 1.1080 + (m->f==1 && bytes[0]==0) || 1.1081 + (m->f<=1 && m->bLen>1 && bytes[0]==0))) 1.1082 + ) { 1.1083 + return 0; /* suitable for a base table */ 1.1084 + } else { 1.1085 + return 1; /* needs to go into an extension table */ 1.1086 + } 1.1087 +} 1.1088 + 1.1089 +U_CAPI UBool U_EXPORT2 1.1090 +ucm_addMappingAuto(UCMFile *ucm, UBool forBase, UCMStates *baseStates, 1.1091 + UCMapping *m, 1.1092 + UChar32 codePoints[UCNV_EXT_MAX_UCHARS], 1.1093 + uint8_t bytes[UCNV_EXT_MAX_BYTES]) { 1.1094 + int32_t type; 1.1095 + 1.1096 + if(m->f==2 && m->uLen>1) { 1.1097 + fprintf(stderr, "ucm error: illegal <subchar1> |2 mapping from multiple code points\n"); 1.1098 + printMapping(m, codePoints, bytes, stderr); 1.1099 + return FALSE; 1.1100 + } 1.1101 + 1.1102 + if(baseStates!=NULL) { 1.1103 + /* check validity of the bytes and count the characters in them */ 1.1104 + type=ucm_mappingType(baseStates, m, codePoints, bytes); 1.1105 + if(type<0) { 1.1106 + /* illegal byte sequence */ 1.1107 + printMapping(m, codePoints, bytes, stderr); 1.1108 + return FALSE; 1.1109 + } 1.1110 + } else { 1.1111 + /* not used - adding a mapping for an extension-only table before its base table is read */ 1.1112 + type=1; 1.1113 + } 1.1114 + 1.1115 + /* 1.1116 + * Add the mapping to the base table if this is requested and suitable. 1.1117 + * Otherwise, add it to the extension table. 1.1118 + */ 1.1119 + if(forBase && type==0) { 1.1120 + ucm_addMapping(ucm->base, m, codePoints, bytes); 1.1121 + } else { 1.1122 + ucm_addMapping(ucm->ext, m, codePoints, bytes); 1.1123 + } 1.1124 + 1.1125 + return TRUE; 1.1126 +} 1.1127 + 1.1128 +U_CAPI UBool U_EXPORT2 1.1129 +ucm_addMappingFromLine(UCMFile *ucm, const char *line, UBool forBase, UCMStates *baseStates) { 1.1130 + UCMapping m={ 0 }; 1.1131 + UChar32 codePoints[UCNV_EXT_MAX_UCHARS]; 1.1132 + uint8_t bytes[UCNV_EXT_MAX_BYTES]; 1.1133 + 1.1134 + const char *s; 1.1135 + 1.1136 + /* ignore empty and comment lines */ 1.1137 + if(line[0]=='#' || *(s=u_skipWhitespace(line))==0 || *s=='\n' || *s=='\r') { 1.1138 + return TRUE; 1.1139 + } 1.1140 + 1.1141 + return 1.1142 + ucm_parseMappingLine(&m, codePoints, bytes, line) && 1.1143 + ucm_addMappingAuto(ucm, forBase, baseStates, &m, codePoints, bytes); 1.1144 +} 1.1145 + 1.1146 +U_CAPI void U_EXPORT2 1.1147 +ucm_readTable(UCMFile *ucm, FileStream* convFile, 1.1148 + UBool forBase, UCMStates *baseStates, 1.1149 + UErrorCode *pErrorCode) { 1.1150 + char line[500]; 1.1151 + char *end; 1.1152 + UBool isOK; 1.1153 + 1.1154 + if(U_FAILURE(*pErrorCode)) { 1.1155 + return; 1.1156 + } 1.1157 + 1.1158 + isOK=TRUE; 1.1159 + 1.1160 + for(;;) { 1.1161 + /* read the next line */ 1.1162 + if(!T_FileStream_readLine(convFile, line, sizeof(line))) { 1.1163 + fprintf(stderr, "incomplete charmap section\n"); 1.1164 + isOK=FALSE; 1.1165 + break; 1.1166 + } 1.1167 + 1.1168 + /* remove CR LF */ 1.1169 + end=uprv_strchr(line, 0); 1.1170 + while(line<end && (*(end-1)=='\r' || *(end-1)=='\n')) { 1.1171 + --end; 1.1172 + } 1.1173 + *end=0; 1.1174 + 1.1175 + /* ignore empty and comment lines */ 1.1176 + if(line[0]==0 || line[0]=='#') { 1.1177 + continue; 1.1178 + } 1.1179 + 1.1180 + /* stop at the end of the mapping table */ 1.1181 + if(0==uprv_strcmp(line, "END CHARMAP")) { 1.1182 + break; 1.1183 + } 1.1184 + 1.1185 + isOK&=ucm_addMappingFromLine(ucm, line, forBase, baseStates); 1.1186 + } 1.1187 + 1.1188 + if(!isOK) { 1.1189 + *pErrorCode=U_INVALID_TABLE_FORMAT; 1.1190 + } 1.1191 +} 1.1192 +#endif