|
1 /* |
|
2 ****************************************************************************** |
|
3 * |
|
4 * Copyright (C) 1999-2013, International Business Machines |
|
5 * Corporation and others. All Rights Reserved. |
|
6 * |
|
7 ****************************************************************************** |
|
8 * |
|
9 * |
|
10 * ucnv_io.cpp: |
|
11 * initializes global variables and defines functions pertaining to converter |
|
12 * name resolution aspect of the conversion code. |
|
13 * |
|
14 * new implementation: |
|
15 * |
|
16 * created on: 1999nov22 |
|
17 * created by: Markus W. Scherer |
|
18 * |
|
19 * Use the binary cnvalias.icu (created from convrtrs.txt) to work |
|
20 * with aliases for converter names. |
|
21 * |
|
22 * Date Name Description |
|
23 * 11/22/1999 markus Created |
|
24 * 06/28/2002 grhoten Major overhaul of the converter alias design. |
|
25 * Now an alias can map to different converters |
|
26 * depending on the specified standard. |
|
27 ******************************************************************************* |
|
28 */ |
|
29 |
|
30 #include "unicode/utypes.h" |
|
31 |
|
32 #if !UCONFIG_NO_CONVERSION |
|
33 |
|
34 #include "unicode/ucnv.h" |
|
35 #include "unicode/udata.h" |
|
36 |
|
37 #include "umutex.h" |
|
38 #include "uarrsort.h" |
|
39 #include "uassert.h" |
|
40 #include "udataswp.h" |
|
41 #include "cstring.h" |
|
42 #include "cmemory.h" |
|
43 #include "ucnv_io.h" |
|
44 #include "uenumimp.h" |
|
45 #include "ucln_cmn.h" |
|
46 |
|
47 /* Format of cnvalias.icu ----------------------------------------------------- |
|
48 * |
|
49 * cnvalias.icu is a binary, memory-mappable form of convrtrs.txt. |
|
50 * This binary form contains several tables. All indexes are to uint16_t |
|
51 * units, and not to the bytes (uint8_t units). Addressing everything on |
|
52 * 16-bit boundaries allows us to store more information with small index |
|
53 * numbers, which are also 16-bit in size. The majority of the table (except |
|
54 * the string table) are 16-bit numbers. |
|
55 * |
|
56 * First there is the size of the Table of Contents (TOC). The TOC |
|
57 * entries contain the size of each section. In order to find the offset |
|
58 * you just need to sum up the previous offsets. |
|
59 * The TOC length and entries are an array of uint32_t values. |
|
60 * The first section after the TOC starts immediately after the TOC. |
|
61 * |
|
62 * 1) This section contains a list of converters. This list contains indexes |
|
63 * into the string table for the converter name. The index of this list is |
|
64 * also used by other sections, which are mentioned later on. |
|
65 * This list is not sorted. |
|
66 * |
|
67 * 2) This section contains a list of tags. This list contains indexes |
|
68 * into the string table for the tag name. The index of this list is |
|
69 * also used by other sections, which are mentioned later on. |
|
70 * This list is in priority order of standards. |
|
71 * |
|
72 * 3) This section contains a list of sorted unique aliases. This |
|
73 * list contains indexes into the string table for the alias name. The |
|
74 * index of this list is also used by other sections, like the 4th section. |
|
75 * The index for the 3rd and 4th section is used to get the |
|
76 * alias -> converter name mapping. Section 3 and 4 form a two column table. |
|
77 * Some of the most significant bits of each index may contain other |
|
78 * information (see findConverter for details). |
|
79 * |
|
80 * 4) This section contains a list of mapped converter names. Consider this |
|
81 * as a table that maps the 3rd section to the 1st section. This list contains |
|
82 * indexes into the 1st section. The index of this list is the same index in |
|
83 * the 3rd section. There is also some extra information in the high bits of |
|
84 * each converter index in this table. Currently it's only used to say that |
|
85 * an alias mapped to this converter is ambiguous. See UCNV_CONVERTER_INDEX_MASK |
|
86 * and UCNV_AMBIGUOUS_ALIAS_MAP_BIT for more information. This section is |
|
87 * the predigested form of the 5th section so that an alias lookup can be fast. |
|
88 * |
|
89 * 5) This section contains a 2D array with indexes to the 6th section. This |
|
90 * section is the full form of all alias mappings. The column index is the |
|
91 * index into the converter list (column header). The row index is the index |
|
92 * to tag list (row header). This 2D array is the top part a 3D array. The |
|
93 * third dimension is in the 6th section. |
|
94 * |
|
95 * 6) This is blob of variable length arrays. Each array starts with a size, |
|
96 * and is followed by indexes to alias names in the string table. This is |
|
97 * the third dimension to the section 5. No other section should be referencing |
|
98 * this section. |
|
99 * |
|
100 * 7) Starting in ICU 3.6, this can be a UConverterAliasOptions struct. Its |
|
101 * presence indicates that a section 9 exists. UConverterAliasOptions specifies |
|
102 * what type of string normalization is used among other potential things in the |
|
103 * future. |
|
104 * |
|
105 * 8) This is the string table. All strings are indexed on an even address. |
|
106 * There are two reasons for this. First many chip architectures locate strings |
|
107 * faster on even address boundaries. Second, since all indexes are 16-bit |
|
108 * numbers, this string table can be 128KB in size instead of 64KB when we |
|
109 * only have strings starting on an even address. |
|
110 * |
|
111 * 9) When present this is a set of prenormalized strings from section 8. This |
|
112 * table contains normalized strings with the dashes and spaces stripped out, |
|
113 * and all strings lowercased. In the future, the options in section 7 may state |
|
114 * other types of normalization. |
|
115 * |
|
116 * Here is the concept of section 5 and 6. It's a 3D cube. Each tag |
|
117 * has a unique alias among all converters. That same alias can |
|
118 * be mentioned in other standards on different converters, |
|
119 * but only one alias per tag can be unique. |
|
120 * |
|
121 * |
|
122 * Converter Names (Usually in TR22 form) |
|
123 * -------------------------------------------. |
|
124 * T / /| |
|
125 * a / / | |
|
126 * g / / | |
|
127 * s / / | |
|
128 * / / | |
|
129 * ------------------------------------------/ | |
|
130 * A | | | |
|
131 * l | | | |
|
132 * i | | / |
|
133 * a | | / |
|
134 * s | | / |
|
135 * e | | / |
|
136 * s | |/ |
|
137 * ------------------------------------------- |
|
138 * |
|
139 * |
|
140 * |
|
141 * Here is what it really looks like. It's like swiss cheese. |
|
142 * There are holes. Some converters aren't recognized by |
|
143 * a standard, or they are really old converters that the |
|
144 * standard doesn't recognize anymore. |
|
145 * |
|
146 * Converter Names (Usually in TR22 form) |
|
147 * -------------------------------------------. |
|
148 * T /##########################################/| |
|
149 * a / # # /# |
|
150 * g / # ## ## ### # ### ### ### #/ |
|
151 * s / # ##### #### ## ## #/# |
|
152 * / ### # # ## # # # ### # # #/## |
|
153 * ------------------------------------------/# # |
|
154 * A |### # # ## # # # ### # # #|# # |
|
155 * l |# # # # # ## # #|# # |
|
156 * i |# # # # # # #|# |
|
157 * a |# #|# |
|
158 * s | #|# |
|
159 * e |
|
160 * s |
|
161 * |
|
162 */ |
|
163 |
|
164 /** |
|
165 * Used by the UEnumeration API |
|
166 */ |
|
167 typedef struct UAliasContext { |
|
168 uint32_t listOffset; |
|
169 uint32_t listIdx; |
|
170 } UAliasContext; |
|
171 |
|
172 static const char DATA_NAME[] = "cnvalias"; |
|
173 static const char DATA_TYPE[] = "icu"; |
|
174 |
|
175 static UDataMemory *gAliasData=NULL; |
|
176 static icu::UInitOnce gAliasDataInitOnce = U_INITONCE_INITIALIZER; |
|
177 |
|
178 enum { |
|
179 tocLengthIndex=0, |
|
180 converterListIndex=1, |
|
181 tagListIndex=2, |
|
182 aliasListIndex=3, |
|
183 untaggedConvArrayIndex=4, |
|
184 taggedAliasArrayIndex=5, |
|
185 taggedAliasListsIndex=6, |
|
186 tableOptionsIndex=7, |
|
187 stringTableIndex=8, |
|
188 normalizedStringTableIndex=9, |
|
189 offsetsCount, /* length of the swapper's temporary offsets[] */ |
|
190 minTocLength=8 /* min. tocLength in the file, does not count the tocLengthIndex! */ |
|
191 }; |
|
192 |
|
193 static const UConverterAliasOptions defaultTableOptions = { |
|
194 UCNV_IO_UNNORMALIZED, |
|
195 0 /* containsCnvOptionInfo */ |
|
196 }; |
|
197 static UConverterAlias gMainTable; |
|
198 |
|
199 #define GET_STRING(idx) (const char *)(gMainTable.stringTable + (idx)) |
|
200 #define GET_NORMALIZED_STRING(idx) (const char *)(gMainTable.normalizedStringTable + (idx)) |
|
201 |
|
202 static UBool U_CALLCONV |
|
203 isAcceptable(void * /*context*/, |
|
204 const char * /*type*/, const char * /*name*/, |
|
205 const UDataInfo *pInfo) { |
|
206 return (UBool)( |
|
207 pInfo->size>=20 && |
|
208 pInfo->isBigEndian==U_IS_BIG_ENDIAN && |
|
209 pInfo->charsetFamily==U_CHARSET_FAMILY && |
|
210 pInfo->dataFormat[0]==0x43 && /* dataFormat="CvAl" */ |
|
211 pInfo->dataFormat[1]==0x76 && |
|
212 pInfo->dataFormat[2]==0x41 && |
|
213 pInfo->dataFormat[3]==0x6c && |
|
214 pInfo->formatVersion[0]==3); |
|
215 } |
|
216 |
|
217 static UBool U_CALLCONV ucnv_io_cleanup(void) |
|
218 { |
|
219 if (gAliasData) { |
|
220 udata_close(gAliasData); |
|
221 gAliasData = NULL; |
|
222 } |
|
223 gAliasDataInitOnce.reset(); |
|
224 |
|
225 uprv_memset(&gMainTable, 0, sizeof(gMainTable)); |
|
226 |
|
227 return TRUE; /* Everything was cleaned up */ |
|
228 } |
|
229 |
|
230 static void U_CALLCONV initAliasData(UErrorCode &errCode) { |
|
231 UDataMemory *data; |
|
232 const uint16_t *table; |
|
233 const uint32_t *sectionSizes; |
|
234 uint32_t tableStart; |
|
235 uint32_t currOffset; |
|
236 |
|
237 ucln_common_registerCleanup(UCLN_COMMON_UCNV_IO, ucnv_io_cleanup); |
|
238 |
|
239 U_ASSERT(gAliasData == NULL); |
|
240 data = udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, &errCode); |
|
241 if(U_FAILURE(errCode)) { |
|
242 return; |
|
243 } |
|
244 |
|
245 sectionSizes = (const uint32_t *)udata_getMemory(data); |
|
246 table = (const uint16_t *)sectionSizes; |
|
247 |
|
248 tableStart = sectionSizes[0]; |
|
249 if (tableStart < minTocLength) { |
|
250 errCode = U_INVALID_FORMAT_ERROR; |
|
251 udata_close(data); |
|
252 return; |
|
253 } |
|
254 gAliasData = data; |
|
255 |
|
256 gMainTable.converterListSize = sectionSizes[1]; |
|
257 gMainTable.tagListSize = sectionSizes[2]; |
|
258 gMainTable.aliasListSize = sectionSizes[3]; |
|
259 gMainTable.untaggedConvArraySize = sectionSizes[4]; |
|
260 gMainTable.taggedAliasArraySize = sectionSizes[5]; |
|
261 gMainTable.taggedAliasListsSize = sectionSizes[6]; |
|
262 gMainTable.optionTableSize = sectionSizes[7]; |
|
263 gMainTable.stringTableSize = sectionSizes[8]; |
|
264 |
|
265 if (tableStart > 8) { |
|
266 gMainTable.normalizedStringTableSize = sectionSizes[9]; |
|
267 } |
|
268 |
|
269 currOffset = tableStart * (sizeof(uint32_t)/sizeof(uint16_t)) + (sizeof(uint32_t)/sizeof(uint16_t)); |
|
270 gMainTable.converterList = table + currOffset; |
|
271 |
|
272 currOffset += gMainTable.converterListSize; |
|
273 gMainTable.tagList = table + currOffset; |
|
274 |
|
275 currOffset += gMainTable.tagListSize; |
|
276 gMainTable.aliasList = table + currOffset; |
|
277 |
|
278 currOffset += gMainTable.aliasListSize; |
|
279 gMainTable.untaggedConvArray = table + currOffset; |
|
280 |
|
281 currOffset += gMainTable.untaggedConvArraySize; |
|
282 gMainTable.taggedAliasArray = table + currOffset; |
|
283 |
|
284 /* aliasLists is a 1's based array, but it has a padding character */ |
|
285 currOffset += gMainTable.taggedAliasArraySize; |
|
286 gMainTable.taggedAliasLists = table + currOffset; |
|
287 |
|
288 currOffset += gMainTable.taggedAliasListsSize; |
|
289 if (gMainTable.optionTableSize > 0 |
|
290 && ((const UConverterAliasOptions *)(table + currOffset))->stringNormalizationType < UCNV_IO_NORM_TYPE_COUNT) |
|
291 { |
|
292 /* Faster table */ |
|
293 gMainTable.optionTable = (const UConverterAliasOptions *)(table + currOffset); |
|
294 } |
|
295 else { |
|
296 /* Smaller table, or I can't handle this normalization mode! |
|
297 Use the original slower table lookup. */ |
|
298 gMainTable.optionTable = &defaultTableOptions; |
|
299 } |
|
300 |
|
301 currOffset += gMainTable.optionTableSize; |
|
302 gMainTable.stringTable = table + currOffset; |
|
303 |
|
304 currOffset += gMainTable.stringTableSize; |
|
305 gMainTable.normalizedStringTable = ((gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED) |
|
306 ? gMainTable.stringTable : (table + currOffset)); |
|
307 } |
|
308 |
|
309 |
|
310 static UBool |
|
311 haveAliasData(UErrorCode *pErrorCode) { |
|
312 umtx_initOnce(gAliasDataInitOnce, &initAliasData, *pErrorCode); |
|
313 return U_SUCCESS(*pErrorCode); |
|
314 } |
|
315 |
|
316 static inline UBool |
|
317 isAlias(const char *alias, UErrorCode *pErrorCode) { |
|
318 if(alias==NULL) { |
|
319 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
|
320 return FALSE; |
|
321 } |
|
322 return (UBool)(*alias!=0); |
|
323 } |
|
324 |
|
325 static uint32_t getTagNumber(const char *tagname) { |
|
326 if (gMainTable.tagList) { |
|
327 uint32_t tagNum; |
|
328 for (tagNum = 0; tagNum < gMainTable.tagListSize; tagNum++) { |
|
329 if (!uprv_stricmp(GET_STRING(gMainTable.tagList[tagNum]), tagname)) { |
|
330 return tagNum; |
|
331 } |
|
332 } |
|
333 } |
|
334 |
|
335 return UINT32_MAX; |
|
336 } |
|
337 |
|
338 /* character types relevant for ucnv_compareNames() */ |
|
339 enum { |
|
340 UIGNORE, |
|
341 ZERO, |
|
342 NONZERO, |
|
343 MINLETTER /* any values from here on are lowercase letter mappings */ |
|
344 }; |
|
345 |
|
346 /* character types for ASCII 00..7F */ |
|
347 static const uint8_t asciiTypes[128] = { |
|
348 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
349 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
350 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
351 ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0, |
|
352 0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, |
|
353 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0, |
|
354 0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, |
|
355 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0 |
|
356 }; |
|
357 |
|
358 #define GET_ASCII_TYPE(c) ((int8_t)(c) >= 0 ? asciiTypes[(uint8_t)c] : (uint8_t)UIGNORE) |
|
359 |
|
360 /* character types for EBCDIC 80..FF */ |
|
361 static const uint8_t ebcdicTypes[128] = { |
|
362 0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0, |
|
363 0, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0, |
|
364 0, 0, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0, |
|
365 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
366 0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0, |
|
367 0, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0, |
|
368 0, 0, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0, |
|
369 ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0 |
|
370 }; |
|
371 |
|
372 #define GET_EBCDIC_TYPE(c) ((int8_t)(c) < 0 ? ebcdicTypes[(c)&0x7f] : (uint8_t)UIGNORE) |
|
373 |
|
374 #if U_CHARSET_FAMILY==U_ASCII_FAMILY |
|
375 # define GET_CHAR_TYPE(c) GET_ASCII_TYPE(c) |
|
376 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY |
|
377 # define GET_CHAR_TYPE(c) GET_EBCDIC_TYPE(c) |
|
378 #else |
|
379 # error U_CHARSET_FAMILY is not valid |
|
380 #endif |
|
381 |
|
382 /* @see ucnv_compareNames */ |
|
383 U_CFUNC char * U_EXPORT2 |
|
384 ucnv_io_stripASCIIForCompare(char *dst, const char *name) { |
|
385 char *dstItr = dst; |
|
386 uint8_t type, nextType; |
|
387 char c1; |
|
388 UBool afterDigit = FALSE; |
|
389 |
|
390 while ((c1 = *name++) != 0) { |
|
391 type = GET_ASCII_TYPE(c1); |
|
392 switch (type) { |
|
393 case UIGNORE: |
|
394 afterDigit = FALSE; |
|
395 continue; /* ignore all but letters and digits */ |
|
396 case ZERO: |
|
397 if (!afterDigit) { |
|
398 nextType = GET_ASCII_TYPE(*name); |
|
399 if (nextType == ZERO || nextType == NONZERO) { |
|
400 continue; /* ignore leading zero before another digit */ |
|
401 } |
|
402 } |
|
403 break; |
|
404 case NONZERO: |
|
405 afterDigit = TRUE; |
|
406 break; |
|
407 default: |
|
408 c1 = (char)type; /* lowercased letter */ |
|
409 afterDigit = FALSE; |
|
410 break; |
|
411 } |
|
412 *dstItr++ = c1; |
|
413 } |
|
414 *dstItr = 0; |
|
415 return dst; |
|
416 } |
|
417 |
|
418 U_CFUNC char * U_EXPORT2 |
|
419 ucnv_io_stripEBCDICForCompare(char *dst, const char *name) { |
|
420 char *dstItr = dst; |
|
421 uint8_t type, nextType; |
|
422 char c1; |
|
423 UBool afterDigit = FALSE; |
|
424 |
|
425 while ((c1 = *name++) != 0) { |
|
426 type = GET_EBCDIC_TYPE(c1); |
|
427 switch (type) { |
|
428 case UIGNORE: |
|
429 afterDigit = FALSE; |
|
430 continue; /* ignore all but letters and digits */ |
|
431 case ZERO: |
|
432 if (!afterDigit) { |
|
433 nextType = GET_EBCDIC_TYPE(*name); |
|
434 if (nextType == ZERO || nextType == NONZERO) { |
|
435 continue; /* ignore leading zero before another digit */ |
|
436 } |
|
437 } |
|
438 break; |
|
439 case NONZERO: |
|
440 afterDigit = TRUE; |
|
441 break; |
|
442 default: |
|
443 c1 = (char)type; /* lowercased letter */ |
|
444 afterDigit = FALSE; |
|
445 break; |
|
446 } |
|
447 *dstItr++ = c1; |
|
448 } |
|
449 *dstItr = 0; |
|
450 return dst; |
|
451 } |
|
452 |
|
453 /** |
|
454 * Do a fuzzy compare of two converter/alias names. |
|
455 * The comparison is case-insensitive, ignores leading zeroes if they are not |
|
456 * followed by further digits, and ignores all but letters and digits. |
|
457 * Thus the strings "UTF-8", "utf_8", "u*T@f08" and "Utf 8" are exactly equivalent. |
|
458 * See section 1.4, Charset Alias Matching in Unicode Technical Standard #22 |
|
459 * at http://www.unicode.org/reports/tr22/ |
|
460 * |
|
461 * This is a symmetrical (commutative) operation; order of arguments |
|
462 * is insignificant. This is an important property for sorting the |
|
463 * list (when the list is preprocessed into binary form) and for |
|
464 * performing binary searches on it at run time. |
|
465 * |
|
466 * @param name1 a converter name or alias, zero-terminated |
|
467 * @param name2 a converter name or alias, zero-terminated |
|
468 * @return 0 if the names match, or a negative value if the name1 |
|
469 * lexically precedes name2, or a positive value if the name1 |
|
470 * lexically follows name2. |
|
471 * |
|
472 * @see ucnv_io_stripForCompare |
|
473 */ |
|
474 U_CAPI int U_EXPORT2 |
|
475 ucnv_compareNames(const char *name1, const char *name2) { |
|
476 int rc; |
|
477 uint8_t type, nextType; |
|
478 char c1, c2; |
|
479 UBool afterDigit1 = FALSE, afterDigit2 = FALSE; |
|
480 |
|
481 for (;;) { |
|
482 while ((c1 = *name1++) != 0) { |
|
483 type = GET_CHAR_TYPE(c1); |
|
484 switch (type) { |
|
485 case UIGNORE: |
|
486 afterDigit1 = FALSE; |
|
487 continue; /* ignore all but letters and digits */ |
|
488 case ZERO: |
|
489 if (!afterDigit1) { |
|
490 nextType = GET_CHAR_TYPE(*name1); |
|
491 if (nextType == ZERO || nextType == NONZERO) { |
|
492 continue; /* ignore leading zero before another digit */ |
|
493 } |
|
494 } |
|
495 break; |
|
496 case NONZERO: |
|
497 afterDigit1 = TRUE; |
|
498 break; |
|
499 default: |
|
500 c1 = (char)type; /* lowercased letter */ |
|
501 afterDigit1 = FALSE; |
|
502 break; |
|
503 } |
|
504 break; /* deliver c1 */ |
|
505 } |
|
506 while ((c2 = *name2++) != 0) { |
|
507 type = GET_CHAR_TYPE(c2); |
|
508 switch (type) { |
|
509 case UIGNORE: |
|
510 afterDigit2 = FALSE; |
|
511 continue; /* ignore all but letters and digits */ |
|
512 case ZERO: |
|
513 if (!afterDigit2) { |
|
514 nextType = GET_CHAR_TYPE(*name2); |
|
515 if (nextType == ZERO || nextType == NONZERO) { |
|
516 continue; /* ignore leading zero before another digit */ |
|
517 } |
|
518 } |
|
519 break; |
|
520 case NONZERO: |
|
521 afterDigit2 = TRUE; |
|
522 break; |
|
523 default: |
|
524 c2 = (char)type; /* lowercased letter */ |
|
525 afterDigit2 = FALSE; |
|
526 break; |
|
527 } |
|
528 break; /* deliver c2 */ |
|
529 } |
|
530 |
|
531 /* If we reach the ends of both strings then they match */ |
|
532 if ((c1|c2)==0) { |
|
533 return 0; |
|
534 } |
|
535 |
|
536 /* Case-insensitive comparison */ |
|
537 rc = (int)(unsigned char)c1 - (int)(unsigned char)c2; |
|
538 if (rc != 0) { |
|
539 return rc; |
|
540 } |
|
541 } |
|
542 } |
|
543 |
|
544 /* |
|
545 * search for an alias |
|
546 * return the converter number index for gConverterList |
|
547 */ |
|
548 static inline uint32_t |
|
549 findConverter(const char *alias, UBool *containsOption, UErrorCode *pErrorCode) { |
|
550 uint32_t mid, start, limit; |
|
551 uint32_t lastMid; |
|
552 int result; |
|
553 int isUnnormalized = (gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED); |
|
554 char strippedName[UCNV_MAX_CONVERTER_NAME_LENGTH]; |
|
555 |
|
556 if (!isUnnormalized) { |
|
557 if (uprv_strlen(alias) >= UCNV_MAX_CONVERTER_NAME_LENGTH) { |
|
558 *pErrorCode = U_BUFFER_OVERFLOW_ERROR; |
|
559 return UINT32_MAX; |
|
560 } |
|
561 |
|
562 /* Lower case and remove ignoreable characters. */ |
|
563 ucnv_io_stripForCompare(strippedName, alias); |
|
564 alias = strippedName; |
|
565 } |
|
566 |
|
567 /* do a binary search for the alias */ |
|
568 start = 0; |
|
569 limit = gMainTable.untaggedConvArraySize; |
|
570 mid = limit; |
|
571 lastMid = UINT32_MAX; |
|
572 |
|
573 for (;;) { |
|
574 mid = (uint32_t)((start + limit) / 2); |
|
575 if (lastMid == mid) { /* Have we moved? */ |
|
576 break; /* We haven't moved, and it wasn't found. */ |
|
577 } |
|
578 lastMid = mid; |
|
579 if (isUnnormalized) { |
|
580 result = ucnv_compareNames(alias, GET_STRING(gMainTable.aliasList[mid])); |
|
581 } |
|
582 else { |
|
583 result = uprv_strcmp(alias, GET_NORMALIZED_STRING(gMainTable.aliasList[mid])); |
|
584 } |
|
585 |
|
586 if (result < 0) { |
|
587 limit = mid; |
|
588 } else if (result > 0) { |
|
589 start = mid; |
|
590 } else { |
|
591 /* Since the gencnval tool folds duplicates into one entry, |
|
592 * this alias in gAliasList is unique, but different standards |
|
593 * may map an alias to different converters. |
|
594 */ |
|
595 if (gMainTable.untaggedConvArray[mid] & UCNV_AMBIGUOUS_ALIAS_MAP_BIT) { |
|
596 *pErrorCode = U_AMBIGUOUS_ALIAS_WARNING; |
|
597 } |
|
598 /* State whether the canonical converter name contains an option. |
|
599 This information is contained in this list in order to maintain backward & forward compatibility. */ |
|
600 if (containsOption) { |
|
601 UBool containsCnvOptionInfo = (UBool)gMainTable.optionTable->containsCnvOptionInfo; |
|
602 *containsOption = (UBool)((containsCnvOptionInfo |
|
603 && ((gMainTable.untaggedConvArray[mid] & UCNV_CONTAINS_OPTION_BIT) != 0)) |
|
604 || !containsCnvOptionInfo); |
|
605 } |
|
606 return gMainTable.untaggedConvArray[mid] & UCNV_CONVERTER_INDEX_MASK; |
|
607 } |
|
608 } |
|
609 |
|
610 return UINT32_MAX; |
|
611 } |
|
612 |
|
613 /* |
|
614 * Is this alias in this list? |
|
615 * alias and listOffset should be non-NULL. |
|
616 */ |
|
617 static inline UBool |
|
618 isAliasInList(const char *alias, uint32_t listOffset) { |
|
619 if (listOffset) { |
|
620 uint32_t currAlias; |
|
621 uint32_t listCount = gMainTable.taggedAliasLists[listOffset]; |
|
622 /* +1 to skip listCount */ |
|
623 const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1; |
|
624 for (currAlias = 0; currAlias < listCount; currAlias++) { |
|
625 if (currList[currAlias] |
|
626 && ucnv_compareNames(alias, GET_STRING(currList[currAlias]))==0) |
|
627 { |
|
628 return TRUE; |
|
629 } |
|
630 } |
|
631 } |
|
632 return FALSE; |
|
633 } |
|
634 |
|
635 /* |
|
636 * Search for an standard name of an alias (what is the default name |
|
637 * that this standard uses?) |
|
638 * return the listOffset for gTaggedAliasLists. If it's 0, |
|
639 * the it couldn't be found, but the parameters are valid. |
|
640 */ |
|
641 static uint32_t |
|
642 findTaggedAliasListsOffset(const char *alias, const char *standard, UErrorCode *pErrorCode) { |
|
643 uint32_t idx; |
|
644 uint32_t listOffset; |
|
645 uint32_t convNum; |
|
646 UErrorCode myErr = U_ZERO_ERROR; |
|
647 uint32_t tagNum = getTagNumber(standard); |
|
648 |
|
649 /* Make a quick guess. Hopefully they used a TR22 canonical alias. */ |
|
650 convNum = findConverter(alias, NULL, &myErr); |
|
651 if (myErr != U_ZERO_ERROR) { |
|
652 *pErrorCode = myErr; |
|
653 } |
|
654 |
|
655 if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) { |
|
656 listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum]; |
|
657 if (listOffset && gMainTable.taggedAliasLists[listOffset + 1]) { |
|
658 return listOffset; |
|
659 } |
|
660 if (myErr == U_AMBIGUOUS_ALIAS_WARNING) { |
|
661 /* Uh Oh! They used an ambiguous alias. |
|
662 We have to search the whole swiss cheese starting |
|
663 at the highest standard affinity. |
|
664 This may take a while. |
|
665 */ |
|
666 for (idx = 0; idx < gMainTable.taggedAliasArraySize; idx++) { |
|
667 listOffset = gMainTable.taggedAliasArray[idx]; |
|
668 if (listOffset && isAliasInList(alias, listOffset)) { |
|
669 uint32_t currTagNum = idx/gMainTable.converterListSize; |
|
670 uint32_t currConvNum = (idx - currTagNum*gMainTable.converterListSize); |
|
671 uint32_t tempListOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + currConvNum]; |
|
672 if (tempListOffset && gMainTable.taggedAliasLists[tempListOffset + 1]) { |
|
673 return tempListOffset; |
|
674 } |
|
675 /* else keep on looking */ |
|
676 /* We could speed this up by starting on the next row |
|
677 because an alias is unique per row, right now. |
|
678 This would change if alias versioning appears. */ |
|
679 } |
|
680 } |
|
681 /* The standard doesn't know about the alias */ |
|
682 } |
|
683 /* else no default name */ |
|
684 return 0; |
|
685 } |
|
686 /* else converter or tag not found */ |
|
687 |
|
688 return UINT32_MAX; |
|
689 } |
|
690 |
|
691 /* Return the canonical name */ |
|
692 static uint32_t |
|
693 findTaggedConverterNum(const char *alias, const char *standard, UErrorCode *pErrorCode) { |
|
694 uint32_t idx; |
|
695 uint32_t listOffset; |
|
696 uint32_t convNum; |
|
697 UErrorCode myErr = U_ZERO_ERROR; |
|
698 uint32_t tagNum = getTagNumber(standard); |
|
699 |
|
700 /* Make a quick guess. Hopefully they used a TR22 canonical alias. */ |
|
701 convNum = findConverter(alias, NULL, &myErr); |
|
702 if (myErr != U_ZERO_ERROR) { |
|
703 *pErrorCode = myErr; |
|
704 } |
|
705 |
|
706 if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) { |
|
707 listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum]; |
|
708 if (listOffset && isAliasInList(alias, listOffset)) { |
|
709 return convNum; |
|
710 } |
|
711 if (myErr == U_AMBIGUOUS_ALIAS_WARNING) { |
|
712 /* Uh Oh! They used an ambiguous alias. |
|
713 We have to search one slice of the swiss cheese. |
|
714 We search only in the requested tag, not the whole thing. |
|
715 This may take a while. |
|
716 */ |
|
717 uint32_t convStart = (tagNum)*gMainTable.converterListSize; |
|
718 uint32_t convLimit = (tagNum+1)*gMainTable.converterListSize; |
|
719 for (idx = convStart; idx < convLimit; idx++) { |
|
720 listOffset = gMainTable.taggedAliasArray[idx]; |
|
721 if (listOffset && isAliasInList(alias, listOffset)) { |
|
722 return idx-convStart; |
|
723 } |
|
724 } |
|
725 /* The standard doesn't know about the alias */ |
|
726 } |
|
727 /* else no canonical name */ |
|
728 } |
|
729 /* else converter or tag not found */ |
|
730 |
|
731 return UINT32_MAX; |
|
732 } |
|
733 |
|
734 |
|
735 |
|
736 U_CFUNC const char * |
|
737 ucnv_io_getConverterName(const char *alias, UBool *containsOption, UErrorCode *pErrorCode) { |
|
738 const char *aliasTmp = alias; |
|
739 int32_t i = 0; |
|
740 for (i = 0; i < 2; i++) { |
|
741 if (i == 1) { |
|
742 /* |
|
743 * After the first unsuccess converter lookup, check to see if |
|
744 * the name begins with 'x-'. If it does, strip it off and try |
|
745 * again. This behaviour is similar to how ICU4J does it. |
|
746 */ |
|
747 if (aliasTmp[0] == 'x' || aliasTmp[1] == '-') { |
|
748 aliasTmp = aliasTmp+2; |
|
749 } else { |
|
750 break; |
|
751 } |
|
752 } |
|
753 if(haveAliasData(pErrorCode) && isAlias(aliasTmp, pErrorCode)) { |
|
754 uint32_t convNum = findConverter(aliasTmp, containsOption, pErrorCode); |
|
755 if (convNum < gMainTable.converterListSize) { |
|
756 return GET_STRING(gMainTable.converterList[convNum]); |
|
757 } |
|
758 /* else converter not found */ |
|
759 } else { |
|
760 break; |
|
761 } |
|
762 } |
|
763 |
|
764 return NULL; |
|
765 } |
|
766 |
|
767 static int32_t U_CALLCONV |
|
768 ucnv_io_countStandardAliases(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) { |
|
769 int32_t value = 0; |
|
770 UAliasContext *myContext = (UAliasContext *)(enumerator->context); |
|
771 uint32_t listOffset = myContext->listOffset; |
|
772 |
|
773 if (listOffset) { |
|
774 value = gMainTable.taggedAliasLists[listOffset]; |
|
775 } |
|
776 return value; |
|
777 } |
|
778 |
|
779 static const char* U_CALLCONV |
|
780 ucnv_io_nextStandardAliases(UEnumeration *enumerator, |
|
781 int32_t* resultLength, |
|
782 UErrorCode * /*pErrorCode*/) |
|
783 { |
|
784 UAliasContext *myContext = (UAliasContext *)(enumerator->context); |
|
785 uint32_t listOffset = myContext->listOffset; |
|
786 |
|
787 if (listOffset) { |
|
788 uint32_t listCount = gMainTable.taggedAliasLists[listOffset]; |
|
789 const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1; |
|
790 |
|
791 if (myContext->listIdx < listCount) { |
|
792 const char *myStr = GET_STRING(currList[myContext->listIdx++]); |
|
793 if (resultLength) { |
|
794 *resultLength = (int32_t)uprv_strlen(myStr); |
|
795 } |
|
796 return myStr; |
|
797 } |
|
798 } |
|
799 /* Either we accessed a zero length list, or we enumerated too far. */ |
|
800 if (resultLength) { |
|
801 *resultLength = 0; |
|
802 } |
|
803 return NULL; |
|
804 } |
|
805 |
|
806 static void U_CALLCONV |
|
807 ucnv_io_resetStandardAliases(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) { |
|
808 ((UAliasContext *)(enumerator->context))->listIdx = 0; |
|
809 } |
|
810 |
|
811 static void U_CALLCONV |
|
812 ucnv_io_closeUEnumeration(UEnumeration *enumerator) { |
|
813 uprv_free(enumerator->context); |
|
814 uprv_free(enumerator); |
|
815 } |
|
816 |
|
817 /* Enumerate the aliases for the specified converter and standard tag */ |
|
818 static const UEnumeration gEnumAliases = { |
|
819 NULL, |
|
820 NULL, |
|
821 ucnv_io_closeUEnumeration, |
|
822 ucnv_io_countStandardAliases, |
|
823 uenum_unextDefault, |
|
824 ucnv_io_nextStandardAliases, |
|
825 ucnv_io_resetStandardAliases |
|
826 }; |
|
827 |
|
828 U_CAPI UEnumeration * U_EXPORT2 |
|
829 ucnv_openStandardNames(const char *convName, |
|
830 const char *standard, |
|
831 UErrorCode *pErrorCode) |
|
832 { |
|
833 UEnumeration *myEnum = NULL; |
|
834 if (haveAliasData(pErrorCode) && isAlias(convName, pErrorCode)) { |
|
835 uint32_t listOffset = findTaggedAliasListsOffset(convName, standard, pErrorCode); |
|
836 |
|
837 /* When listOffset == 0, we want to acknowledge that the |
|
838 converter name and standard are okay, but there |
|
839 is nothing to enumerate. */ |
|
840 if (listOffset < gMainTable.taggedAliasListsSize) { |
|
841 UAliasContext *myContext; |
|
842 |
|
843 myEnum = static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration))); |
|
844 if (myEnum == NULL) { |
|
845 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; |
|
846 return NULL; |
|
847 } |
|
848 uprv_memcpy(myEnum, &gEnumAliases, sizeof(UEnumeration)); |
|
849 myContext = static_cast<UAliasContext *>(uprv_malloc(sizeof(UAliasContext))); |
|
850 if (myContext == NULL) { |
|
851 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; |
|
852 uprv_free(myEnum); |
|
853 return NULL; |
|
854 } |
|
855 myContext->listOffset = listOffset; |
|
856 myContext->listIdx = 0; |
|
857 myEnum->context = myContext; |
|
858 } |
|
859 /* else converter or tag not found */ |
|
860 } |
|
861 return myEnum; |
|
862 } |
|
863 |
|
864 static uint16_t |
|
865 ucnv_io_countAliases(const char *alias, UErrorCode *pErrorCode) { |
|
866 if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) { |
|
867 uint32_t convNum = findConverter(alias, NULL, pErrorCode); |
|
868 if (convNum < gMainTable.converterListSize) { |
|
869 /* tagListNum - 1 is the ALL tag */ |
|
870 int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum]; |
|
871 |
|
872 if (listOffset) { |
|
873 return gMainTable.taggedAliasLists[listOffset]; |
|
874 } |
|
875 /* else this shouldn't happen. internal program error */ |
|
876 } |
|
877 /* else converter not found */ |
|
878 } |
|
879 return 0; |
|
880 } |
|
881 |
|
882 static uint16_t |
|
883 ucnv_io_getAliases(const char *alias, uint16_t start, const char **aliases, UErrorCode *pErrorCode) { |
|
884 if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) { |
|
885 uint32_t currAlias; |
|
886 uint32_t convNum = findConverter(alias, NULL, pErrorCode); |
|
887 if (convNum < gMainTable.converterListSize) { |
|
888 /* tagListNum - 1 is the ALL tag */ |
|
889 int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum]; |
|
890 |
|
891 if (listOffset) { |
|
892 uint32_t listCount = gMainTable.taggedAliasLists[listOffset]; |
|
893 /* +1 to skip listCount */ |
|
894 const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1; |
|
895 |
|
896 for (currAlias = start; currAlias < listCount; currAlias++) { |
|
897 aliases[currAlias] = GET_STRING(currList[currAlias]); |
|
898 } |
|
899 } |
|
900 /* else this shouldn't happen. internal program error */ |
|
901 } |
|
902 /* else converter not found */ |
|
903 } |
|
904 return 0; |
|
905 } |
|
906 |
|
907 static const char * |
|
908 ucnv_io_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode) { |
|
909 if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) { |
|
910 uint32_t convNum = findConverter(alias, NULL, pErrorCode); |
|
911 if (convNum < gMainTable.converterListSize) { |
|
912 /* tagListNum - 1 is the ALL tag */ |
|
913 int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum]; |
|
914 |
|
915 if (listOffset) { |
|
916 uint32_t listCount = gMainTable.taggedAliasLists[listOffset]; |
|
917 /* +1 to skip listCount */ |
|
918 const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1; |
|
919 |
|
920 if (n < listCount) { |
|
921 return GET_STRING(currList[n]); |
|
922 } |
|
923 *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; |
|
924 } |
|
925 /* else this shouldn't happen. internal program error */ |
|
926 } |
|
927 /* else converter not found */ |
|
928 } |
|
929 return NULL; |
|
930 } |
|
931 |
|
932 static uint16_t |
|
933 ucnv_io_countStandards(UErrorCode *pErrorCode) { |
|
934 if (haveAliasData(pErrorCode)) { |
|
935 /* Don't include the empty list */ |
|
936 return (uint16_t)(gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS); |
|
937 } |
|
938 |
|
939 return 0; |
|
940 } |
|
941 |
|
942 U_CAPI const char * U_EXPORT2 |
|
943 ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode) { |
|
944 if (haveAliasData(pErrorCode)) { |
|
945 if (n < gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) { |
|
946 return GET_STRING(gMainTable.tagList[n]); |
|
947 } |
|
948 *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; |
|
949 } |
|
950 |
|
951 return NULL; |
|
952 } |
|
953 |
|
954 U_CAPI const char * U_EXPORT2 |
|
955 ucnv_getStandardName(const char *alias, const char *standard, UErrorCode *pErrorCode) { |
|
956 if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) { |
|
957 uint32_t listOffset = findTaggedAliasListsOffset(alias, standard, pErrorCode); |
|
958 |
|
959 if (0 < listOffset && listOffset < gMainTable.taggedAliasListsSize) { |
|
960 const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1; |
|
961 |
|
962 /* Get the preferred name from this list */ |
|
963 if (currList[0]) { |
|
964 return GET_STRING(currList[0]); |
|
965 } |
|
966 /* else someone screwed up the alias table. */ |
|
967 /* *pErrorCode = U_INVALID_FORMAT_ERROR */ |
|
968 } |
|
969 } |
|
970 |
|
971 return NULL; |
|
972 } |
|
973 |
|
974 U_CAPI uint16_t U_EXPORT2 |
|
975 ucnv_countAliases(const char *alias, UErrorCode *pErrorCode) |
|
976 { |
|
977 return ucnv_io_countAliases(alias, pErrorCode); |
|
978 } |
|
979 |
|
980 |
|
981 U_CAPI const char* U_EXPORT2 |
|
982 ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode) |
|
983 { |
|
984 return ucnv_io_getAlias(alias, n, pErrorCode); |
|
985 } |
|
986 |
|
987 U_CAPI void U_EXPORT2 |
|
988 ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode) |
|
989 { |
|
990 ucnv_io_getAliases(alias, 0, aliases, pErrorCode); |
|
991 } |
|
992 |
|
993 U_CAPI uint16_t U_EXPORT2 |
|
994 ucnv_countStandards(void) |
|
995 { |
|
996 UErrorCode err = U_ZERO_ERROR; |
|
997 return ucnv_io_countStandards(&err); |
|
998 } |
|
999 |
|
1000 U_CAPI const char * U_EXPORT2 |
|
1001 ucnv_getCanonicalName(const char *alias, const char *standard, UErrorCode *pErrorCode) { |
|
1002 if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) { |
|
1003 uint32_t convNum = findTaggedConverterNum(alias, standard, pErrorCode); |
|
1004 |
|
1005 if (convNum < gMainTable.converterListSize) { |
|
1006 return GET_STRING(gMainTable.converterList[convNum]); |
|
1007 } |
|
1008 } |
|
1009 |
|
1010 return NULL; |
|
1011 } |
|
1012 |
|
1013 static int32_t U_CALLCONV |
|
1014 ucnv_io_countAllConverters(UEnumeration * /*enumerator*/, UErrorCode * /*pErrorCode*/) { |
|
1015 return gMainTable.converterListSize; |
|
1016 } |
|
1017 |
|
1018 static const char* U_CALLCONV |
|
1019 ucnv_io_nextAllConverters(UEnumeration *enumerator, |
|
1020 int32_t* resultLength, |
|
1021 UErrorCode * /*pErrorCode*/) |
|
1022 { |
|
1023 uint16_t *myContext = (uint16_t *)(enumerator->context); |
|
1024 |
|
1025 if (*myContext < gMainTable.converterListSize) { |
|
1026 const char *myStr = GET_STRING(gMainTable.converterList[(*myContext)++]); |
|
1027 if (resultLength) { |
|
1028 *resultLength = (int32_t)uprv_strlen(myStr); |
|
1029 } |
|
1030 return myStr; |
|
1031 } |
|
1032 /* Either we accessed a zero length list, or we enumerated too far. */ |
|
1033 if (resultLength) { |
|
1034 *resultLength = 0; |
|
1035 } |
|
1036 return NULL; |
|
1037 } |
|
1038 |
|
1039 static void U_CALLCONV |
|
1040 ucnv_io_resetAllConverters(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) { |
|
1041 *((uint16_t *)(enumerator->context)) = 0; |
|
1042 } |
|
1043 |
|
1044 static const UEnumeration gEnumAllConverters = { |
|
1045 NULL, |
|
1046 NULL, |
|
1047 ucnv_io_closeUEnumeration, |
|
1048 ucnv_io_countAllConverters, |
|
1049 uenum_unextDefault, |
|
1050 ucnv_io_nextAllConverters, |
|
1051 ucnv_io_resetAllConverters |
|
1052 }; |
|
1053 |
|
1054 U_CAPI UEnumeration * U_EXPORT2 |
|
1055 ucnv_openAllNames(UErrorCode *pErrorCode) { |
|
1056 UEnumeration *myEnum = NULL; |
|
1057 if (haveAliasData(pErrorCode)) { |
|
1058 uint16_t *myContext; |
|
1059 |
|
1060 myEnum = static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration))); |
|
1061 if (myEnum == NULL) { |
|
1062 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; |
|
1063 return NULL; |
|
1064 } |
|
1065 uprv_memcpy(myEnum, &gEnumAllConverters, sizeof(UEnumeration)); |
|
1066 myContext = static_cast<uint16_t *>(uprv_malloc(sizeof(uint16_t))); |
|
1067 if (myContext == NULL) { |
|
1068 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; |
|
1069 uprv_free(myEnum); |
|
1070 return NULL; |
|
1071 } |
|
1072 *myContext = 0; |
|
1073 myEnum->context = myContext; |
|
1074 } |
|
1075 return myEnum; |
|
1076 } |
|
1077 |
|
1078 U_CFUNC uint16_t |
|
1079 ucnv_io_countKnownConverters(UErrorCode *pErrorCode) { |
|
1080 if (haveAliasData(pErrorCode)) { |
|
1081 return (uint16_t)gMainTable.converterListSize; |
|
1082 } |
|
1083 return 0; |
|
1084 } |
|
1085 |
|
1086 /* alias table swapping ----------------------------------------------------- */ |
|
1087 |
|
1088 typedef char * U_CALLCONV StripForCompareFn(char *dst, const char *name); |
|
1089 |
|
1090 /* |
|
1091 * row of a temporary array |
|
1092 * |
|
1093 * gets platform-endian charset string indexes and sorting indexes; |
|
1094 * after sorting this array by strings, the actual arrays are permutated |
|
1095 * according to the sorting indexes |
|
1096 */ |
|
1097 typedef struct TempRow { |
|
1098 uint16_t strIndex, sortIndex; |
|
1099 } TempRow; |
|
1100 |
|
1101 typedef struct TempAliasTable { |
|
1102 const char *chars; |
|
1103 TempRow *rows; |
|
1104 uint16_t *resort; |
|
1105 StripForCompareFn *stripForCompare; |
|
1106 } TempAliasTable; |
|
1107 |
|
1108 enum { |
|
1109 STACK_ROW_CAPACITY=500 |
|
1110 }; |
|
1111 |
|
1112 static int32_t |
|
1113 io_compareRows(const void *context, const void *left, const void *right) { |
|
1114 char strippedLeft[UCNV_MAX_CONVERTER_NAME_LENGTH], |
|
1115 strippedRight[UCNV_MAX_CONVERTER_NAME_LENGTH]; |
|
1116 |
|
1117 TempAliasTable *tempTable=(TempAliasTable *)context; |
|
1118 const char *chars=tempTable->chars; |
|
1119 |
|
1120 return (int32_t)uprv_strcmp(tempTable->stripForCompare(strippedLeft, chars+2*((const TempRow *)left)->strIndex), |
|
1121 tempTable->stripForCompare(strippedRight, chars+2*((const TempRow *)right)->strIndex)); |
|
1122 } |
|
1123 |
|
1124 U_CAPI int32_t U_EXPORT2 |
|
1125 ucnv_swapAliases(const UDataSwapper *ds, |
|
1126 const void *inData, int32_t length, void *outData, |
|
1127 UErrorCode *pErrorCode) { |
|
1128 const UDataInfo *pInfo; |
|
1129 int32_t headerSize; |
|
1130 |
|
1131 const uint16_t *inTable; |
|
1132 const uint32_t *inSectionSizes; |
|
1133 uint32_t toc[offsetsCount]; |
|
1134 uint32_t offsets[offsetsCount]; /* 16-bit-addressed offsets from inTable/outTable */ |
|
1135 uint32_t i, count, tocLength, topOffset; |
|
1136 |
|
1137 TempRow rows[STACK_ROW_CAPACITY]; |
|
1138 uint16_t resort[STACK_ROW_CAPACITY]; |
|
1139 TempAliasTable tempTable; |
|
1140 |
|
1141 /* udata_swapDataHeader checks the arguments */ |
|
1142 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); |
|
1143 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
|
1144 return 0; |
|
1145 } |
|
1146 |
|
1147 /* check data format and format version */ |
|
1148 pInfo=(const UDataInfo *)((const char *)inData+4); |
|
1149 if(!( |
|
1150 pInfo->dataFormat[0]==0x43 && /* dataFormat="CvAl" */ |
|
1151 pInfo->dataFormat[1]==0x76 && |
|
1152 pInfo->dataFormat[2]==0x41 && |
|
1153 pInfo->dataFormat[3]==0x6c && |
|
1154 pInfo->formatVersion[0]==3 |
|
1155 )) { |
|
1156 udata_printError(ds, "ucnv_swapAliases(): data format %02x.%02x.%02x.%02x (format version %02x) is not an alias table\n", |
|
1157 pInfo->dataFormat[0], pInfo->dataFormat[1], |
|
1158 pInfo->dataFormat[2], pInfo->dataFormat[3], |
|
1159 pInfo->formatVersion[0]); |
|
1160 *pErrorCode=U_UNSUPPORTED_ERROR; |
|
1161 return 0; |
|
1162 } |
|
1163 |
|
1164 /* an alias table must contain at least the table of contents array */ |
|
1165 if(length>=0 && (length-headerSize)<4*(1+minTocLength)) { |
|
1166 udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n", |
|
1167 length-headerSize); |
|
1168 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; |
|
1169 return 0; |
|
1170 } |
|
1171 |
|
1172 inSectionSizes=(const uint32_t *)((const char *)inData+headerSize); |
|
1173 inTable=(const uint16_t *)inSectionSizes; |
|
1174 uprv_memset(toc, 0, sizeof(toc)); |
|
1175 toc[tocLengthIndex]=tocLength=ds->readUInt32(inSectionSizes[tocLengthIndex]); |
|
1176 if(tocLength<minTocLength || offsetsCount<=tocLength) { |
|
1177 udata_printError(ds, "ucnv_swapAliases(): table of contents contains unsupported number of sections (%u sections)\n", tocLength); |
|
1178 *pErrorCode=U_INVALID_FORMAT_ERROR; |
|
1179 return 0; |
|
1180 } |
|
1181 |
|
1182 /* read the known part of the table of contents */ |
|
1183 for(i=converterListIndex; i<=tocLength; ++i) { |
|
1184 toc[i]=ds->readUInt32(inSectionSizes[i]); |
|
1185 } |
|
1186 |
|
1187 /* compute offsets */ |
|
1188 uprv_memset(offsets, 0, sizeof(offsets)); |
|
1189 offsets[converterListIndex]=2*(1+tocLength); /* count two 16-bit units per toc entry */ |
|
1190 for(i=tagListIndex; i<=tocLength; ++i) { |
|
1191 offsets[i]=offsets[i-1]+toc[i-1]; |
|
1192 } |
|
1193 |
|
1194 /* compute the overall size of the after-header data, in numbers of 16-bit units */ |
|
1195 topOffset=offsets[i-1]+toc[i-1]; |
|
1196 |
|
1197 if(length>=0) { |
|
1198 uint16_t *outTable; |
|
1199 const uint16_t *p, *p2; |
|
1200 uint16_t *q, *q2; |
|
1201 uint16_t oldIndex; |
|
1202 |
|
1203 if((length-headerSize)<(2*(int32_t)topOffset)) { |
|
1204 udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n", |
|
1205 length-headerSize); |
|
1206 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; |
|
1207 return 0; |
|
1208 } |
|
1209 |
|
1210 outTable=(uint16_t *)((char *)outData+headerSize); |
|
1211 |
|
1212 /* swap the entire table of contents */ |
|
1213 ds->swapArray32(ds, inTable, 4*(1+tocLength), outTable, pErrorCode); |
|
1214 |
|
1215 /* swap unormalized strings & normalized strings */ |
|
1216 ds->swapInvChars(ds, inTable+offsets[stringTableIndex], 2*(int32_t)(toc[stringTableIndex]+toc[normalizedStringTableIndex]), |
|
1217 outTable+offsets[stringTableIndex], pErrorCode); |
|
1218 if(U_FAILURE(*pErrorCode)) { |
|
1219 udata_printError(ds, "ucnv_swapAliases().swapInvChars(charset names) failed\n"); |
|
1220 return 0; |
|
1221 } |
|
1222 |
|
1223 if(ds->inCharset==ds->outCharset) { |
|
1224 /* no need to sort, just swap all 16-bit values together */ |
|
1225 ds->swapArray16(ds, |
|
1226 inTable+offsets[converterListIndex], |
|
1227 2*(int32_t)(offsets[stringTableIndex]-offsets[converterListIndex]), |
|
1228 outTable+offsets[converterListIndex], |
|
1229 pErrorCode); |
|
1230 } else { |
|
1231 /* allocate the temporary table for sorting */ |
|
1232 count=toc[aliasListIndex]; |
|
1233 |
|
1234 tempTable.chars=(const char *)(outTable+offsets[stringTableIndex]); /* sort by outCharset */ |
|
1235 |
|
1236 if(count<=STACK_ROW_CAPACITY) { |
|
1237 tempTable.rows=rows; |
|
1238 tempTable.resort=resort; |
|
1239 } else { |
|
1240 tempTable.rows=(TempRow *)uprv_malloc(count*sizeof(TempRow)+count*2); |
|
1241 if(tempTable.rows==NULL) { |
|
1242 udata_printError(ds, "ucnv_swapAliases(): unable to allocate memory for sorting tables (max length: %u)\n", |
|
1243 count); |
|
1244 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
|
1245 return 0; |
|
1246 } |
|
1247 tempTable.resort=(uint16_t *)(tempTable.rows+count); |
|
1248 } |
|
1249 |
|
1250 if(ds->outCharset==U_ASCII_FAMILY) { |
|
1251 tempTable.stripForCompare=ucnv_io_stripASCIIForCompare; |
|
1252 } else /* U_EBCDIC_FAMILY */ { |
|
1253 tempTable.stripForCompare=ucnv_io_stripEBCDICForCompare; |
|
1254 } |
|
1255 |
|
1256 /* |
|
1257 * Sort unique aliases+mapped names. |
|
1258 * |
|
1259 * We need to sort the list again by outCharset strings because they |
|
1260 * sort differently for different charset families. |
|
1261 * First we set up a temporary table with the string indexes and |
|
1262 * sorting indexes and sort that. |
|
1263 * Then we permutate and copy/swap the actual values. |
|
1264 */ |
|
1265 p=inTable+offsets[aliasListIndex]; |
|
1266 q=outTable+offsets[aliasListIndex]; |
|
1267 |
|
1268 p2=inTable+offsets[untaggedConvArrayIndex]; |
|
1269 q2=outTable+offsets[untaggedConvArrayIndex]; |
|
1270 |
|
1271 for(i=0; i<count; ++i) { |
|
1272 tempTable.rows[i].strIndex=ds->readUInt16(p[i]); |
|
1273 tempTable.rows[i].sortIndex=(uint16_t)i; |
|
1274 } |
|
1275 |
|
1276 uprv_sortArray(tempTable.rows, (int32_t)count, sizeof(TempRow), |
|
1277 io_compareRows, &tempTable, |
|
1278 FALSE, pErrorCode); |
|
1279 |
|
1280 if(U_SUCCESS(*pErrorCode)) { |
|
1281 /* copy/swap/permutate items */ |
|
1282 if(p!=q) { |
|
1283 for(i=0; i<count; ++i) { |
|
1284 oldIndex=tempTable.rows[i].sortIndex; |
|
1285 ds->swapArray16(ds, p+oldIndex, 2, q+i, pErrorCode); |
|
1286 ds->swapArray16(ds, p2+oldIndex, 2, q2+i, pErrorCode); |
|
1287 } |
|
1288 } else { |
|
1289 /* |
|
1290 * If we swap in-place, then the permutation must use another |
|
1291 * temporary array (tempTable.resort) |
|
1292 * before the results are copied to the outBundle. |
|
1293 */ |
|
1294 uint16_t *r=tempTable.resort; |
|
1295 |
|
1296 for(i=0; i<count; ++i) { |
|
1297 oldIndex=tempTable.rows[i].sortIndex; |
|
1298 ds->swapArray16(ds, p+oldIndex, 2, r+i, pErrorCode); |
|
1299 } |
|
1300 uprv_memcpy(q, r, 2*count); |
|
1301 |
|
1302 for(i=0; i<count; ++i) { |
|
1303 oldIndex=tempTable.rows[i].sortIndex; |
|
1304 ds->swapArray16(ds, p2+oldIndex, 2, r+i, pErrorCode); |
|
1305 } |
|
1306 uprv_memcpy(q2, r, 2*count); |
|
1307 } |
|
1308 } |
|
1309 |
|
1310 if(tempTable.rows!=rows) { |
|
1311 uprv_free(tempTable.rows); |
|
1312 } |
|
1313 |
|
1314 if(U_FAILURE(*pErrorCode)) { |
|
1315 udata_printError(ds, "ucnv_swapAliases().uprv_sortArray(%u items) failed\n", |
|
1316 count); |
|
1317 return 0; |
|
1318 } |
|
1319 |
|
1320 /* swap remaining 16-bit values */ |
|
1321 ds->swapArray16(ds, |
|
1322 inTable+offsets[converterListIndex], |
|
1323 2*(int32_t)(offsets[aliasListIndex]-offsets[converterListIndex]), |
|
1324 outTable+offsets[converterListIndex], |
|
1325 pErrorCode); |
|
1326 ds->swapArray16(ds, |
|
1327 inTable+offsets[taggedAliasArrayIndex], |
|
1328 2*(int32_t)(offsets[stringTableIndex]-offsets[taggedAliasArrayIndex]), |
|
1329 outTable+offsets[taggedAliasArrayIndex], |
|
1330 pErrorCode); |
|
1331 } |
|
1332 } |
|
1333 |
|
1334 return headerSize+2*(int32_t)topOffset; |
|
1335 } |
|
1336 |
|
1337 #endif |
|
1338 |
|
1339 |
|
1340 /* |
|
1341 * Hey, Emacs, please set the following: |
|
1342 * |
|
1343 * Local Variables: |
|
1344 * indent-tabs-mode: nil |
|
1345 * End: |
|
1346 * |
|
1347 */ |