intl/uconv/tools/umaptable.c

branch
TOR_BUG_9701
changeset 15
b8a032363ba2
equal deleted inserted replaced
-1:000000000000 0:bfa2590a27b9
1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7 #include <stdio.h>
8 #include <string.h>
9 #include <stdlib.h>
10 #include <stdint.h>
11
12 #define NOMAPPING 0xfffd
13
14 typedef struct {
15 uint16_t srcBegin; /* 2 byte */
16 uint16_t srcEnd; /* 2 byte */
17 uint16_t destBegin; /* 2 byte */
18 } uFormat0;
19
20 typedef struct {
21 uint16_t srcBegin; /* 2 byte */
22 uint16_t srcEnd; /* 2 byte */
23 uint16_t mappingOffset; /* 2 byte */
24 } uFormat1;
25
26 typedef struct {
27 uint16_t srcBegin; /* 2 byte */
28 uint16_t srcEnd; /* 2 byte -waste */
29 uint16_t destBegin; /* 2 byte */
30 } uFormat2;
31
32 typedef struct {
33 union {
34 uFormat0 format0;
35 uFormat1 format1;
36 uFormat2 format2;
37 } fmt;
38 } uMapCell;
39
40 /* =================================================
41 uTable
42 ================================================= */
43 typedef struct {
44 uint16_t itemOfList;
45 uint16_t offsetToFormatArray;
46 uint16_t offsetToMapCellArray;
47 uint16_t offsetToMappingTable;
48 uint16_t data[1];
49 } uTable;
50
51 uint16_t umap[256][256];
52 int bInitFromOrTo = 0;
53 int bGenerateFromUnicodeTable = 0;
54
55 #define MAXCELLNUM 1000
56
57 static int numOfItem = 0;
58 uMapCell cell[MAXCELLNUM];
59 uint16_t format[MAXCELLNUM / 4];
60 uint16_t mapping[256*256];
61 static int mappinglen = 0;
62 static int formatcount[4] = {0,0,0,0};
63
64 #define SetFormat(n,f) { format[(n >> 2)] |= ((f) << ((n & 0x0003) << 2)); formatcount[f]++; }
65 #define GetFormat(n) ( format[(n >> 2)] >> ((n & 0x0003) << 2)) &0x00FF)
66 #define MAPVALUE(i) (umap[(i >> 8) & 0xFF][(i) & 0xFF])
67
68 int FORMAT1CNST = 10 ;
69 int FORMAT0CNST = 5 ;
70 void initmaps()
71 {
72 int i,j;
73 for(i=0;i<256;i++)
74 for(j=0;j<256;j++)
75 {
76 umap[i][j]= NOMAPPING;
77 }
78 for(i=0;i<MAXCELLNUM / 4;i++)
79 format[i]=0;
80 }
81 void SetMapValue(short u,short c)
82 {
83 if(NOMAPPING == MAPVALUE(u))
84 MAPVALUE(u) = c & 0x0000FFFF;
85 else {
86 fprintf(stderr, "warning- duplicate mapping %x map to both %x and %x\n", u, MAPVALUE(u), c);
87 }
88 }
89 void AddFormat2(uint16_t srcBegin)
90 {
91 uint16_t destBegin = MAPVALUE(srcBegin);
92 printf("Begin of Item %04X\n",numOfItem);
93 printf(" Format 2\n");
94 printf(" srcBegin = %04X\n", srcBegin);
95 printf(" destBegin = %04X\n", destBegin );
96 SetFormat(numOfItem,2);
97 cell[numOfItem].fmt.format2.srcBegin = srcBegin;
98 cell[numOfItem].fmt.format2.srcEnd = 0;
99 cell[numOfItem].fmt.format2.destBegin = destBegin;
100 printf("End of Item %04X \n\n",numOfItem);
101 numOfItem++;
102 /* Unmark the umap */
103 MAPVALUE(srcBegin) = NOMAPPING;
104 }
105 void AddFormat1(uint16_t srcBegin, uint16_t srcEnd)
106 {
107 uint16_t i;
108 printf("Begin of Item %04X\n",numOfItem);
109 printf(" Format 1\n");
110 printf(" srcBegin = %04X\n", srcBegin);
111 printf(" srcEnd = %04X\n", srcEnd );
112 printf(" mappingOffset = %04X\n", mappinglen);
113 printf(" Mapping = " );
114 SetFormat(numOfItem,1);
115 cell[numOfItem].fmt.format1.srcBegin = srcBegin;
116 cell[numOfItem].fmt.format1.srcEnd = srcEnd;
117 cell[numOfItem].fmt.format1.mappingOffset = mappinglen;
118 for(i=srcBegin ; i <= srcEnd ; i++,mappinglen++)
119 {
120 if( ((i-srcBegin) % 8) == 0)
121 printf("\n ");
122 mapping[mappinglen]= MAPVALUE(i);
123 printf("%04X ",(mapping[mappinglen] ));
124 /* Unmark the umap */
125 MAPVALUE(i) = NOMAPPING;
126 }
127 printf("\n");
128 printf("End of Item %04X \n\n",numOfItem);
129 numOfItem++;
130 }
131 void AddFormat0(uint16_t srcBegin, uint16_t srcEnd)
132 {
133 uint16_t i;
134 uint16_t destBegin = MAPVALUE(srcBegin);
135 printf("Begin of Item %04X\n",numOfItem);
136 printf(" Format 0\n");
137 printf(" srcBegin = %04X\n", srcBegin);
138 printf(" srcEnd = %04X\n", srcEnd );
139 printf(" destBegin = %04X\n", destBegin );
140 SetFormat(numOfItem,0);
141 cell[numOfItem].fmt.format0.srcBegin = srcBegin;
142 cell[numOfItem].fmt.format0.srcEnd = srcEnd;
143 cell[numOfItem].fmt.format0.destBegin = destBegin;
144 for(i=srcBegin ; i <= srcEnd ; i++)
145 {
146 /* Unmark the umap */
147 MAPVALUE(i) = NOMAPPING;
148 }
149 printf("End of Item %04X \n\n",numOfItem);
150 numOfItem++;
151 }
152 void printnpl()
153 {
154 printf(
155 "/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */\n"
156 "/* This Source Code Form is subject to the terms of the Mozilla Public\n"
157 " * License, v. 2.0. If a copy of the MPL was not distributed with this\n"
158 " * file, You can obtain one at http://mozilla.org/MPL/2.0/. */\n");
159 }
160 void gentable()
161 {
162 /* OK! For now, we just use format 1 for each row */
163 /* We need to chage this to use other format to save the space */
164 uint16_t begin,end;
165 uint16_t ss,gs,gp,state,gc;
166 uint16_t diff, lastdiff;
167
168 printnpl();
169 printf("/*========================================================\n");
170 printf(" This is a Generated file. Please don't edit it.\n");
171 printf("\n");
172 printf(" The tool which used to generate this file is called umaptable.\n");
173 printf(" You can find this tool under mozilla/intl/uconv/tools/umaptable.c.\n");
174
175 printf(" If you have any problem of this file. Please contact \n");
176 printf(" Netscape Client International Team or \n");
177 printf(" ftang@netscape <Frank Tang> \n");
178 printf("\n");
179 printf(" Table in Debug form \n");
180
181 for(begin = 0; MAPVALUE(begin) ==NOMAPPING; begin++)
182 ;
183 for(end = 0xFFFF; MAPVALUE(end) ==NOMAPPING; end--)
184 ;
185 if(end != begin)
186 {
187 lastdiff = MAPVALUE(begin) - begin;
188 for(gp=begin+1,state = 0 ; gp<=end; gp++)
189 {
190 int input ;
191 diff = MAPVALUE(gp) - gp;
192 input = (diff == lastdiff);
193 switch(state)
194 {
195 case 0:
196 if(input)
197 {
198 state = 1;
199 ss = gp -1;
200 gc = 2;
201 }
202 break;
203 case 1:
204 if(input)
205 {
206 if(gc++ >= FORMAT0CNST)
207 {
208 state = 2;
209 }
210 }
211 else
212 {
213 state = 0;
214 }
215 break;
216 case 2:
217 if(input)
218 {
219 }
220 else
221 {
222 AddFormat0(ss,gp-1);
223 state = 0;
224 }
225 break;
226 }
227
228 lastdiff = diff;
229 }
230 }
231 if(state == 2)
232 AddFormat0(ss,end);
233
234 for(;(MAPVALUE(begin) ==NOMAPPING) && (begin <= end); begin++)
235 ;
236 if(begin <= end)
237 {
238 for(;(MAPVALUE(end)==NOMAPPING) && (end >= begin); end--)
239 ;
240 for(ss=gp=begin,state = 0 ; gp<=end; gp++)
241 {
242 int input = (MAPVALUE(gp) == NOMAPPING);
243 switch(state)
244 {
245 case 0:
246 if(input)
247 {
248 gc = 1;
249 gs = gp;
250 state = 1;
251 }
252 break;
253 case 1:
254 if(input)
255 {
256 if(gc++ >= FORMAT1CNST)
257 state = 2;
258 }
259 else
260 state = 0;
261 break;
262 case 2:
263 if(input)
264 {
265 }
266 else
267 {
268 if(gs == (ss+1))
269 AddFormat2(ss);
270 else
271 AddFormat1(ss ,gs-1);
272 state = 0;
273 ss = gp;
274 }
275 break;
276 }
277 }
278 if(end == ss)
279 AddFormat2(ss );
280 else
281 AddFormat1(ss ,end );
282 }
283 printf("========================================================*/\n");
284 }
285 void writetable()
286 {
287 uint16_t i;
288 uint16_t off1,off2,off3;
289 uint16_t cur = 0;
290 uint16_t formatitem = (((numOfItem)>>2) + 1);
291 off1 = 4;
292 off2 = off1 + formatitem ;
293 off3 = off2 + numOfItem * sizeof(uMapCell) / sizeof(uint16_t);
294 /* write itemOfList */
295 printf("/* Offset=0x%04X ItemOfList */\n 0x%04X,\n", cur++, numOfItem);
296
297 /* write offsetToFormatArray */
298 printf("/*-------------------------------------------------------*/\n");
299 printf("/* Offset=0x%04X offsetToFormatArray */\n 0x%04X,\n", cur++,off1);
300
301 /* write offsetToMapCellArray */
302 printf("/*-------------------------------------------------------*/\n");
303 printf("/* Offset=0x%04X offsetToMapCellArray */ \n 0x%04X,\n", cur++,off2);
304
305 /* write offsetToMappingTable */
306 printf("/*-------------------------------------------------------*/\n");
307 printf("/* Offset=0x%04X offsetToMappingTable */ \n 0x%04X,\n", cur++,off3);
308
309 /* write FormatArray */
310 printf("/*-------------------------------------------------------*/\n");
311 printf("/* Offset=0x%04X Start of Format Array */ \n",cur);
312 printf("/* Total of Format 0 : 0x%04X */\n"
313 , formatcount[0]);
314 printf("/* Total of Format 1 : 0x%04X */\n"
315 , formatcount[1]);
316 printf("/* Total of Format 2 : 0x%04X */\n"
317 , formatcount[2]);
318 printf("/* Total of Format 3 : 0x%04X */\n"
319 , formatcount[3]);
320 for(i=0;i<formatitem;i++,cur++)
321 {
322 if((i%8) == 0)
323 printf("\n");
324 printf("0x%04X, ",format[i]);
325 }
326 printf("\n");
327
328 /* write MapCellArray */
329 printf("/*-------------------------------------------------------*/\n");
330 printf("/* Offset=0x%04X Start of MapCell Array */ \n",cur);
331 for(i=0;i<numOfItem;i++,cur+=3)
332 {
333 printf("/* %04X */ 0x%04X, 0x%04X, 0x%04X, \n",
334 i,
335 cell[i].fmt.format0.srcBegin,
336 cell[i].fmt.format0.srcEnd,
337 cell[i].fmt.format0.destBegin
338 );
339 }
340
341 /* write MappingTable */
342 printf("/*-------------------------------------------------------*/\n");
343 printf("/* Offset=0x%04X Start of MappingTable */ \n",cur);
344 for(i=0;i<mappinglen;i++,cur++)
345 {
346 if((i%8) == 0)
347 printf("\n/* %04X */ ",i);
348 printf("0x%04X, ",mapping[i] );
349 }
350 printf("\n");
351 printf("/* End of table Total Length = 0x%04X * 2 */\n",cur);
352 }
353
354 void usage()
355 {
356 fprintf(stderr, "please indicate what kind of mapping mapping table you want to generate:\n");
357 fprintf(stderr, "\t-uf : generate *.uf (from unicode) table, or\n");
358 fprintf(stderr, "\t-ut : generate *.ut (to unicode) table\n");
359 }
360 void parsearg(int argc, char* argv[])
361 {
362 int i;
363 for(i=0;i<argc;i++)
364 {
365 if(strncmp("-uf", argv[i],3) == 0) {
366 if(! bInitFromOrTo) {
367 bGenerateFromUnicodeTable = 1;
368 bInitFromOrTo = 1;
369 } else {
370 usage();
371 exit(-1);
372 }
373 }
374 if(strncmp("-ut", argv[i],3) == 0) {
375 if(! bInitFromOrTo) {
376 bGenerateFromUnicodeTable = 0;
377 bInitFromOrTo = 1;
378 } else {
379 usage();
380 exit(-1);
381 }
382 }
383 if((strncmp("-0", argv[i],2) == 0) && ((i+1) < argc))
384 {
385 int cnst0;
386 if(sscanf(argv[i+1], "%d", &cnst0) == 1)
387 {
388 if(cnst0 > 0)
389 {
390 FORMAT0CNST = cnst0;
391 }
392 }
393 else
394 {
395 fprintf(stderr, "argc error !!!!\n");
396 exit(-1);
397 }
398 i++;
399 }
400 if((strncmp("-1", argv[i],2) == 0) && ((i+1) < argc))
401 {
402 int cnst1;
403 if(sscanf(argv[i+1], "%d", &cnst1) == 1)
404 {
405 if(cnst1 > 0)
406 {
407 FORMAT1CNST = cnst1;
408 }
409 }
410 else
411 {
412 fprintf(stderr, "argc error !!!!\n");
413 exit(-1);
414 }
415 i++;
416 }
417 }
418 if(! bInitFromOrTo)
419 {
420 usage();
421 exit(-1);
422 }
423 fprintf(stderr, "format 0 cnst = %d\n", FORMAT0CNST);
424 fprintf(stderr, "format 1 cnst = %d\n", FORMAT1CNST);
425 fprintf(stderr, "generate u%c table\n",
426 bGenerateFromUnicodeTable ? 'f' : 't');
427 }
428 void getinput()
429 {
430 char buf[256];
431 short c,u;
432 for (; fgets(buf,sizeof(buf),stdin);)
433 {
434 if(buf[0]=='0' && buf[1] == 'x')
435 {
436 sscanf(buf,"%hx %hx",&c,&u);
437 if(bGenerateFromUnicodeTable)
438 SetMapValue(u, c);
439 else
440 SetMapValue(c, u);
441 }
442 }
443 }
444 int main(int argc, char* argv[])
445 {
446 parsearg(argc, argv);
447 initmaps();
448 getinput();
449 gentable();
450 writetable();
451 return 0;
452 }

mercurial