intl/uconv/tools/umaptable.c

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/uconv/tools/umaptable.c	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,452 @@
     1.4 +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     1.5 +
     1.6 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.7 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.8 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.9 +
    1.10 +#include <stdio.h>
    1.11 +#include <string.h>
    1.12 +#include <stdlib.h>
    1.13 +#include <stdint.h>
    1.14 +
    1.15 +#define NOMAPPING 0xfffd
    1.16 +
    1.17 +typedef struct {
    1.18 +		uint16_t srcBegin;		/* 2 byte	*/
    1.19 +		uint16_t srcEnd;		/* 2 byte	*/
    1.20 +		uint16_t destBegin;		/* 2 byte	*/
    1.21 +} uFormat0;
    1.22 +
    1.23 +typedef struct {
    1.24 +		uint16_t srcBegin;		/* 2 byte	*/
    1.25 +		uint16_t srcEnd;		/* 2 byte	*/
    1.26 +		uint16_t	mappingOffset;	/* 2 byte	*/
    1.27 +} uFormat1;
    1.28 +
    1.29 +typedef struct {
    1.30 +		uint16_t srcBegin;		/* 2 byte	*/
    1.31 +		uint16_t srcEnd;		/* 2 byte	-waste	*/
    1.32 +		uint16_t destBegin;		/* 2 byte	*/
    1.33 +} uFormat2;
    1.34 +
    1.35 +typedef struct  {
    1.36 +	union {
    1.37 +		uFormat0	format0;
    1.38 +		uFormat1	format1;
    1.39 +		uFormat2	format2;
    1.40 +	} fmt;
    1.41 +} uMapCell;
    1.42 +
    1.43 +/* =================================================
    1.44 +					uTable 
    1.45 +================================================= */
    1.46 +typedef struct  {
    1.47 +	uint16_t 		itemOfList;
    1.48 +	uint16_t		offsetToFormatArray;
    1.49 +	uint16_t		offsetToMapCellArray;
    1.50 +	uint16_t		offsetToMappingTable;
    1.51 +	uint16_t		data[1];
    1.52 +} uTable;
    1.53 +
    1.54 +uint16_t umap[256][256];
    1.55 +int bInitFromOrTo = 0;
    1.56 +int bGenerateFromUnicodeTable = 0;
    1.57 +
    1.58 +#define MAXCELLNUM 1000
    1.59 +
    1.60 +static int numOfItem = 0;
    1.61 +uMapCell cell[MAXCELLNUM];
    1.62 +uint16_t    format[MAXCELLNUM / 4];
    1.63 +uint16_t   mapping[256*256];
    1.64 +static int mappinglen  = 0;
    1.65 +static int formatcount[4] = {0,0,0,0}; 
    1.66 +
    1.67 +#define SetFormat(n,f)		{ format[(n >> 2)] |= ((f) << ((n & 0x0003)	<< 2)); formatcount[f]++; }
    1.68 +#define GetFormat(n)		( format[(n >> 2)] >> ((n & 0x0003)	<< 2)) &0x00FF)
    1.69 +#define MAPVALUE(i)	(umap[(i >> 8) & 0xFF][(i) & 0xFF])
    1.70 +
    1.71 +int  FORMAT1CNST = 10 ;
    1.72 +int  FORMAT0CNST = 5 ;
    1.73 +void initmaps()
    1.74 +{
    1.75 +	int i,j;
    1.76 +	for(i=0;i<256;i++)
    1.77 +		for(j=0;j<256;j++) 
    1.78 +		{
    1.79 +			umap[i][j]=   NOMAPPING;
    1.80 +		}
    1.81 +	for(i=0;i<MAXCELLNUM / 4;i++)
    1.82 +		format[i]=0;
    1.83 +}
    1.84 +void SetMapValue(short u,short c)
    1.85 +{
    1.86 +        if(NOMAPPING == MAPVALUE(u))
    1.87 +	   MAPVALUE(u) = c & 0x0000FFFF;
    1.88 +        else {
    1.89 +           fprintf(stderr, "warning- duplicate mapping %x map to both %x and %x\n", u, MAPVALUE(u), c);
    1.90 +        }
    1.91 +}
    1.92 +void AddFormat2(uint16_t srcBegin)
    1.93 +{
    1.94 +	uint16_t destBegin = MAPVALUE(srcBegin);
    1.95 +	printf("Begin of Item %04X\n",numOfItem);
    1.96 +	printf(" Format 2\n");
    1.97 +	printf("  srcBegin = %04X\n", srcBegin);
    1.98 +	printf("  destBegin = %04X\n", destBegin );
    1.99 +	SetFormat(numOfItem,2);
   1.100 +	cell[numOfItem].fmt.format2.srcBegin = srcBegin;
   1.101 +	cell[numOfItem].fmt.format2.srcEnd = 0;
   1.102 +	cell[numOfItem].fmt.format2.destBegin = destBegin;
   1.103 +	printf("End of Item %04X \n\n",numOfItem);
   1.104 +	numOfItem++;
   1.105 +	/*	Unmark the umap */
   1.106 +	MAPVALUE(srcBegin) = NOMAPPING;
   1.107 +}
   1.108 +void AddFormat1(uint16_t srcBegin, uint16_t srcEnd)
   1.109 +{
   1.110 +	uint16_t i;
   1.111 +	printf("Begin of Item %04X\n",numOfItem);
   1.112 +	printf(" Format 1\n");
   1.113 +	printf("  srcBegin = %04X\n", srcBegin);
   1.114 +	printf("  srcEnd = %04X\n", srcEnd );
   1.115 +	printf("  mappingOffset = %04X\n", mappinglen);
   1.116 +	printf(" Mapping  = " );  
   1.117 +	SetFormat(numOfItem,1);
   1.118 +	cell[numOfItem].fmt.format1.srcBegin = srcBegin;
   1.119 +	cell[numOfItem].fmt.format1.srcEnd = srcEnd;
   1.120 +	cell[numOfItem].fmt.format1.mappingOffset = mappinglen;
   1.121 +	for(i=srcBegin ; i <= srcEnd ; i++,mappinglen++)
   1.122 +	{
   1.123 +		if( ((i-srcBegin) % 8) == 0)
   1.124 +			printf("\n  ");
   1.125 +		mapping[mappinglen]= MAPVALUE(i);
   1.126 +		printf("%04X ",(mapping[mappinglen]  ));
   1.127 +		/*	Unmark the umap */
   1.128 +		MAPVALUE(i) = NOMAPPING;
   1.129 +	}
   1.130 +	printf("\n");
   1.131 +	printf("End of Item %04X \n\n",numOfItem);
   1.132 +	numOfItem++;
   1.133 +}
   1.134 +void AddFormat0(uint16_t srcBegin, uint16_t srcEnd)
   1.135 +{
   1.136 +	uint16_t i;
   1.137 +	uint16_t destBegin = MAPVALUE(srcBegin);
   1.138 +	printf("Begin of Item %04X\n",numOfItem);
   1.139 +	printf(" Format 0\n");
   1.140 +	printf("  srcBegin = %04X\n", srcBegin);
   1.141 +	printf("  srcEnd = %04X\n", srcEnd );
   1.142 +	printf("  destBegin = %04X\n", destBegin );
   1.143 +	SetFormat(numOfItem,0);
   1.144 +	cell[numOfItem].fmt.format0.srcBegin = srcBegin;
   1.145 +	cell[numOfItem].fmt.format0.srcEnd = srcEnd;
   1.146 +	cell[numOfItem].fmt.format0.destBegin = destBegin;
   1.147 +	for(i=srcBegin ; i <= srcEnd ; i++)
   1.148 +	{
   1.149 +		/*	Unmark the umap */
   1.150 +		MAPVALUE(i) = NOMAPPING;
   1.151 +	}
   1.152 +	printf("End of Item %04X \n\n",numOfItem);
   1.153 +	numOfItem++;
   1.154 +}
   1.155 +void printnpl()
   1.156 +{
   1.157 +printf(
   1.158 +"/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */\n"
   1.159 +"/* This Source Code Form is subject to the terms of the Mozilla Public\n"
   1.160 +" * License, v. 2.0. If a copy of the MPL was not distributed with this\n"
   1.161 +" * file, You can obtain one at http://mozilla.org/MPL/2.0/. */\n");
   1.162 +}
   1.163 +void gentable()
   1.164 +{
   1.165 +	/*	OK! For now, we just use format 1 for each row */
   1.166 +	/*	We need to chage this to use other format to save the space */
   1.167 +	uint16_t begin,end;
   1.168 +	uint16_t ss,gs,gp,state,gc;	
   1.169 +	uint16_t diff, lastdiff;
   1.170 +
   1.171 +        printnpl();
   1.172 +	printf("/*========================================================\n");
   1.173 +	printf("  This is a Generated file. Please don't edit it.\n");
   1.174 +	printf("\n");
   1.175 +	printf("  The tool which used to generate this file is called umaptable.\n");
   1.176 +	printf("  You can find this tool under mozilla/intl/uconv/tools/umaptable.c.\n");
   1.177 +
   1.178 +	printf("  If you have any problem of this file. Please contact \n"); 
   1.179 +	printf("  Netscape Client International Team or \n");
   1.180 +	printf("  ftang@netscape <Frank Tang> \n");
   1.181 +	printf("\n");
   1.182 +	printf("              Table in Debug form \n");
   1.183 +
   1.184 +	for(begin = 0; MAPVALUE(begin) ==NOMAPPING; begin++)
   1.185 +		;
   1.186 +	for(end = 0xFFFF; MAPVALUE(end) ==NOMAPPING; end--)
   1.187 +		;
   1.188 +	if(end != begin)
   1.189 +	{
   1.190 +	   lastdiff = MAPVALUE(begin) - begin; 
   1.191 +		for(gp=begin+1,state = 0 ; gp<=end; gp++)
   1.192 +		{
   1.193 +			int input ;
   1.194 +	   	diff = MAPVALUE(gp) - gp; 
   1.195 +		   input = (diff == lastdiff);
   1.196 +			switch(state)
   1.197 +			{
   1.198 +			 	case 0:	
   1.199 +					if(input)
   1.200 +					{
   1.201 +						state = 1;
   1.202 +					   ss =  gp -1;
   1.203 +					   gc = 2;
   1.204 +					}
   1.205 +					break;
   1.206 +			   case 1:
   1.207 +					if(input)
   1.208 +					{
   1.209 +						if(gc++ >= FORMAT0CNST)
   1.210 +						{
   1.211 +							state = 2;
   1.212 +						}
   1.213 +					}
   1.214 +					else
   1.215 +					{
   1.216 +						state = 0;
   1.217 +					}
   1.218 +					break;
   1.219 +			   case 2:
   1.220 +					if(input)
   1.221 +					{
   1.222 +					}
   1.223 +					else
   1.224 +					{
   1.225 +					   AddFormat0(ss,gp-1);
   1.226 +						state = 0;
   1.227 +					}
   1.228 +					break;
   1.229 +			}
   1.230 +			
   1.231 +		   lastdiff = diff;
   1.232 +		}	
   1.233 +	}
   1.234 +	if(state == 2)
   1.235 +		AddFormat0(ss,end);
   1.236 +
   1.237 +	for(;(MAPVALUE(begin) ==NOMAPPING) && (begin <= end); begin++)
   1.238 +		;
   1.239 + if(begin <= end)
   1.240 + {
   1.241 +		for(;(MAPVALUE(end)==NOMAPPING) && (end >= begin); end--)
   1.242 +			;
   1.243 +		for(ss=gp=begin,state = 0 ; gp<=end; gp++)
   1.244 +		{
   1.245 +			int input = (MAPVALUE(gp) == NOMAPPING);
   1.246 +			switch(state)
   1.247 +			{
   1.248 +			case 0:
   1.249 +				if(input)
   1.250 +				{
   1.251 +					gc = 1;
   1.252 +					gs = gp;
   1.253 +					state = 1;
   1.254 +			}
   1.255 +				break;
   1.256 +			case 1:
   1.257 +				if(input)
   1.258 +				{
   1.259 +					if(gc++ >= FORMAT1CNST)
   1.260 +						state = 2;
   1.261 +				}
   1.262 +				else		
   1.263 +					state = 0;
   1.264 +				break;
   1.265 +			case 2:
   1.266 +				if(input)
   1.267 +				{		
   1.268 +				}
   1.269 +				else
   1.270 +				{
   1.271 +			   	if(gs == (ss+1))
   1.272 +						AddFormat2(ss);	
   1.273 +					else
   1.274 +						AddFormat1(ss ,gs-1);	
   1.275 +					state = 0;
   1.276 +					ss = gp;
   1.277 +				}
   1.278 +						break;
   1.279 +					}
   1.280 +				}
   1.281 +				if(end == ss)
   1.282 +					AddFormat2(ss );	
   1.283 +				else
   1.284 +					AddFormat1(ss ,end );	
   1.285 +	}
   1.286 +	printf("========================================================*/\n");
   1.287 +}
   1.288 +void writetable()
   1.289 +{
   1.290 +	uint16_t i;
   1.291 +	uint16_t off1,off2,off3;
   1.292 +	uint16_t cur = 0; 
   1.293 +	uint16_t formatitem = (((numOfItem)>>2) + 1);
   1.294 +	off1 = 4;
   1.295 +	off2 = off1 + formatitem ;
   1.296 +	off3 = off2 + numOfItem * sizeof(uMapCell) / sizeof(uint16_t);
   1.297 +	/*	write itemOfList		*/
   1.298 +	printf("/* Offset=0x%04X  ItemOfList */\n  0x%04X,\n", cur++, numOfItem);
   1.299 +
   1.300 +	/*	write offsetToFormatArray	*/
   1.301 +	printf("/*-------------------------------------------------------*/\n");
   1.302 +	printf("/* Offset=0x%04X  offsetToFormatArray */\n  0x%04X,\n",  cur++,off1);
   1.303 +
   1.304 +	/*	write offsetToMapCellArray	*/
   1.305 +	printf("/*-------------------------------------------------------*/\n");
   1.306 +	printf("/* Offset=0x%04X  offsetToMapCellArray */ \n  0x%04X,\n",  cur++,off2);
   1.307 +
   1.308 +	/*	write offsetToMappingTable	*/
   1.309 +	printf("/*-------------------------------------------------------*/\n");
   1.310 +	printf("/* Offset=0x%04X  offsetToMappingTable */ \n  0x%04X,\n", cur++,off3);
   1.311 +
   1.312 +	/*	write FormatArray		*/
   1.313 +	printf("/*-------------------------------------------------------*/\n");
   1.314 +	printf("/*       Offset=0x%04X   Start of Format Array */ \n",cur);
   1.315 +	printf("/*	Total of Format 0 : 0x%04X			 */\n"
   1.316 +			, formatcount[0]);	
   1.317 +	printf("/*	Total of Format 1 : 0x%04X			 */\n"
   1.318 +			, formatcount[1]);	
   1.319 +	printf("/*	Total of Format 2 : 0x%04X			 */\n"
   1.320 +			, formatcount[2]);	
   1.321 +	printf("/*	Total of Format 3 : 0x%04X			 */\n"
   1.322 +			, formatcount[3]);	
   1.323 +	for(i=0;i<formatitem;i++,cur++)
   1.324 +	{
   1.325 +		if((i%8) == 0)	
   1.326 +			printf("\n");
   1.327 +		printf("0x%04X, ",format[i]);
   1.328 +	}
   1.329 +	printf("\n");
   1.330 +
   1.331 +	/*	write MapCellArray		*/
   1.332 +	printf("/*-------------------------------------------------------*/\n");
   1.333 +	printf("/*       Offset=0x%04X   Start of MapCell Array */ \n",cur);
   1.334 +	for(i=0;i<numOfItem;i++,cur+=3)
   1.335 +	{
   1.336 +		printf("/* %04X */    0x%04X, 0x%04X, 0x%04X, \n", 
   1.337 +			i,
   1.338 +			cell[i].fmt.format0.srcBegin,
   1.339 +			cell[i].fmt.format0.srcEnd,
   1.340 +			cell[i].fmt.format0.destBegin
   1.341 +	        );
   1.342 +	}
   1.343 +
   1.344 +	/*	write MappingTable		*/
   1.345 +	printf("/*-------------------------------------------------------*/\n");
   1.346 +	printf("/*       Offset=0x%04X   Start of MappingTable */ \n",cur);
   1.347 +	for(i=0;i<mappinglen;i++,cur++)
   1.348 +	{
   1.349 +		if((i%8) == 0)	
   1.350 +			printf("\n/* %04X */    ",i);
   1.351 +		printf("0x%04X, ",mapping[i] );
   1.352 +	}
   1.353 +	printf("\n");
   1.354 +	printf("/*	End of table Total Length = 0x%04X * 2 */\n",cur);
   1.355 +}
   1.356 +
   1.357 +void usage()
   1.358 +{
   1.359 +  fprintf(stderr, "please indicate what kind of mapping mapping table you want to generate:\n");
   1.360 +  fprintf(stderr, "\t-uf : generate *.uf (from unicode) table, or\n");
   1.361 +  fprintf(stderr, "\t-ut : generate *.ut (to unicode) table\n");
   1.362 +}
   1.363 +void parsearg(int argc, char* argv[])
   1.364 +{
   1.365 +	int i;
   1.366 +	for(i=0;i<argc;i++)
   1.367 +	{
   1.368 +		if(strncmp("-uf", argv[i],3) == 0) {
   1.369 +                        if(! bInitFromOrTo) {
   1.370 +                           bGenerateFromUnicodeTable = 1;
   1.371 +                           bInitFromOrTo = 1;
   1.372 +                        } else {
   1.373 +                           usage();
   1.374 +                           exit(-1);
   1.375 +                        }
   1.376 +                } 
   1.377 +		if(strncmp("-ut", argv[i],3) == 0) {
   1.378 +                        if(! bInitFromOrTo) {
   1.379 +                           bGenerateFromUnicodeTable = 0;
   1.380 +                           bInitFromOrTo = 1;
   1.381 +                        } else {
   1.382 +                           usage();
   1.383 +                           exit(-1);
   1.384 +                        }
   1.385 +                } 
   1.386 +		if((strncmp("-0", argv[i],2) == 0) && ((i+1) < argc))
   1.387 +		{
   1.388 +			int cnst0;
   1.389 +			if(sscanf(argv[i+1], "%d", &cnst0) == 1)
   1.390 +			{
   1.391 +				if(cnst0 > 0)
   1.392 +				{
   1.393 +					FORMAT0CNST = cnst0;
   1.394 +				}
   1.395 +			}
   1.396 +			else
   1.397 +			{
   1.398 +				fprintf(stderr, "argc error !!!!\n");
   1.399 +				exit(-1);
   1.400 +			}
   1.401 +			i++;
   1.402 +		}
   1.403 +		if((strncmp("-1", argv[i],2) == 0) && ((i+1) < argc))
   1.404 +		{
   1.405 +			int cnst1;
   1.406 +			if(sscanf(argv[i+1], "%d", &cnst1) == 1)
   1.407 +			{
   1.408 +				if(cnst1 > 0)
   1.409 +				{
   1.410 +					FORMAT1CNST = cnst1;
   1.411 +				}
   1.412 +			}
   1.413 +			else
   1.414 +			{
   1.415 +				fprintf(stderr, "argc error !!!!\n");
   1.416 +				exit(-1);
   1.417 +			}
   1.418 +			i++;
   1.419 +		}
   1.420 +	}
   1.421 +        if(! bInitFromOrTo)
   1.422 +        {
   1.423 +                usage();
   1.424 +		exit(-1);
   1.425 +        }
   1.426 +	fprintf(stderr, "format 0 cnst = %d\n", FORMAT0CNST);
   1.427 +	fprintf(stderr, "format 1 cnst = %d\n", FORMAT1CNST);
   1.428 +        fprintf(stderr, "generate u%c table\n", 
   1.429 +                        bGenerateFromUnicodeTable ? 'f' : 't');
   1.430 +}
   1.431 +void getinput()
   1.432 +{
   1.433 +  char buf[256];
   1.434 +  short c,u;
   1.435 +  for (; fgets(buf,sizeof(buf),stdin);)
   1.436 +  {
   1.437 +     if(buf[0]=='0' && buf[1] == 'x')
   1.438 +        {
   1.439 +          sscanf(buf,"%hx %hx",&c,&u);
   1.440 +          if(bGenerateFromUnicodeTable)
   1.441 +            SetMapValue(u, c);
   1.442 +          else
   1.443 +            SetMapValue(c, u);
   1.444 +        }
   1.445 +  }
   1.446 +}
   1.447 +int main(int argc, char* argv[])
   1.448 +{
   1.449 +  parsearg(argc, argv);
   1.450 +  initmaps();
   1.451 +  getinput();
   1.452 +  gentable();
   1.453 +  writetable();
   1.454 +  return 0;
   1.455 +}

mercurial