michael@0: /* michael@0: ****************************************************************************** michael@0: * michael@0: * Copyright (C) 1999-2013, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: * michael@0: ****************************************************************************** michael@0: * michael@0: * michael@0: * ucnv_io.cpp: michael@0: * initializes global variables and defines functions pertaining to converter michael@0: * name resolution aspect of the conversion code. michael@0: * michael@0: * new implementation: michael@0: * michael@0: * created on: 1999nov22 michael@0: * created by: Markus W. Scherer michael@0: * michael@0: * Use the binary cnvalias.icu (created from convrtrs.txt) to work michael@0: * with aliases for converter names. michael@0: * michael@0: * Date Name Description michael@0: * 11/22/1999 markus Created michael@0: * 06/28/2002 grhoten Major overhaul of the converter alias design. michael@0: * Now an alias can map to different converters michael@0: * depending on the specified standard. michael@0: ******************************************************************************* michael@0: */ michael@0: michael@0: #include "unicode/utypes.h" michael@0: michael@0: #if !UCONFIG_NO_CONVERSION michael@0: michael@0: #include "unicode/ucnv.h" michael@0: #include "unicode/udata.h" michael@0: michael@0: #include "umutex.h" michael@0: #include "uarrsort.h" michael@0: #include "uassert.h" michael@0: #include "udataswp.h" michael@0: #include "cstring.h" michael@0: #include "cmemory.h" michael@0: #include "ucnv_io.h" michael@0: #include "uenumimp.h" michael@0: #include "ucln_cmn.h" michael@0: michael@0: /* Format of cnvalias.icu ----------------------------------------------------- michael@0: * michael@0: * cnvalias.icu is a binary, memory-mappable form of convrtrs.txt. michael@0: * This binary form contains several tables. All indexes are to uint16_t michael@0: * units, and not to the bytes (uint8_t units). Addressing everything on michael@0: * 16-bit boundaries allows us to store more information with small index michael@0: * numbers, which are also 16-bit in size. The majority of the table (except michael@0: * the string table) are 16-bit numbers. michael@0: * michael@0: * First there is the size of the Table of Contents (TOC). The TOC michael@0: * entries contain the size of each section. In order to find the offset michael@0: * you just need to sum up the previous offsets. michael@0: * The TOC length and entries are an array of uint32_t values. michael@0: * The first section after the TOC starts immediately after the TOC. michael@0: * michael@0: * 1) This section contains a list of converters. This list contains indexes michael@0: * into the string table for the converter name. The index of this list is michael@0: * also used by other sections, which are mentioned later on. michael@0: * This list is not sorted. michael@0: * michael@0: * 2) This section contains a list of tags. This list contains indexes michael@0: * into the string table for the tag name. The index of this list is michael@0: * also used by other sections, which are mentioned later on. michael@0: * This list is in priority order of standards. michael@0: * michael@0: * 3) This section contains a list of sorted unique aliases. This michael@0: * list contains indexes into the string table for the alias name. The michael@0: * index of this list is also used by other sections, like the 4th section. michael@0: * The index for the 3rd and 4th section is used to get the michael@0: * alias -> converter name mapping. Section 3 and 4 form a two column table. michael@0: * Some of the most significant bits of each index may contain other michael@0: * information (see findConverter for details). michael@0: * michael@0: * 4) This section contains a list of mapped converter names. Consider this michael@0: * as a table that maps the 3rd section to the 1st section. This list contains michael@0: * indexes into the 1st section. The index of this list is the same index in michael@0: * the 3rd section. There is also some extra information in the high bits of michael@0: * each converter index in this table. Currently it's only used to say that michael@0: * an alias mapped to this converter is ambiguous. See UCNV_CONVERTER_INDEX_MASK michael@0: * and UCNV_AMBIGUOUS_ALIAS_MAP_BIT for more information. This section is michael@0: * the predigested form of the 5th section so that an alias lookup can be fast. michael@0: * michael@0: * 5) This section contains a 2D array with indexes to the 6th section. This michael@0: * section is the full form of all alias mappings. The column index is the michael@0: * index into the converter list (column header). The row index is the index michael@0: * to tag list (row header). This 2D array is the top part a 3D array. The michael@0: * third dimension is in the 6th section. michael@0: * michael@0: * 6) This is blob of variable length arrays. Each array starts with a size, michael@0: * and is followed by indexes to alias names in the string table. This is michael@0: * the third dimension to the section 5. No other section should be referencing michael@0: * this section. michael@0: * michael@0: * 7) Starting in ICU 3.6, this can be a UConverterAliasOptions struct. Its michael@0: * presence indicates that a section 9 exists. UConverterAliasOptions specifies michael@0: * what type of string normalization is used among other potential things in the michael@0: * future. michael@0: * michael@0: * 8) This is the string table. All strings are indexed on an even address. michael@0: * There are two reasons for this. First many chip architectures locate strings michael@0: * faster on even address boundaries. Second, since all indexes are 16-bit michael@0: * numbers, this string table can be 128KB in size instead of 64KB when we michael@0: * only have strings starting on an even address. michael@0: * michael@0: * 9) When present this is a set of prenormalized strings from section 8. This michael@0: * table contains normalized strings with the dashes and spaces stripped out, michael@0: * and all strings lowercased. In the future, the options in section 7 may state michael@0: * other types of normalization. michael@0: * michael@0: * Here is the concept of section 5 and 6. It's a 3D cube. Each tag michael@0: * has a unique alias among all converters. That same alias can michael@0: * be mentioned in other standards on different converters, michael@0: * but only one alias per tag can be unique. michael@0: * michael@0: * michael@0: * Converter Names (Usually in TR22 form) michael@0: * -------------------------------------------. michael@0: * T / /| michael@0: * a / / | michael@0: * g / / | michael@0: * s / / | michael@0: * / / | michael@0: * ------------------------------------------/ | michael@0: * A | | | michael@0: * l | | | michael@0: * i | | / michael@0: * a | | / michael@0: * s | | / michael@0: * e | | / michael@0: * s | |/ michael@0: * ------------------------------------------- michael@0: * michael@0: * michael@0: * michael@0: * Here is what it really looks like. It's like swiss cheese. michael@0: * There are holes. Some converters aren't recognized by michael@0: * a standard, or they are really old converters that the michael@0: * standard doesn't recognize anymore. michael@0: * michael@0: * Converter Names (Usually in TR22 form) michael@0: * -------------------------------------------. michael@0: * T /##########################################/| michael@0: * a / # # /# michael@0: * g / # ## ## ### # ### ### ### #/ michael@0: * s / # ##### #### ## ## #/# michael@0: * / ### # # ## # # # ### # # #/## michael@0: * ------------------------------------------/# # michael@0: * A |### # # ## # # # ### # # #|# # michael@0: * l |# # # # # ## # #|# # michael@0: * i |# # # # # # #|# michael@0: * a |# #|# michael@0: * s | #|# michael@0: * e michael@0: * s michael@0: * michael@0: */ michael@0: michael@0: /** michael@0: * Used by the UEnumeration API michael@0: */ michael@0: typedef struct UAliasContext { michael@0: uint32_t listOffset; michael@0: uint32_t listIdx; michael@0: } UAliasContext; michael@0: michael@0: static const char DATA_NAME[] = "cnvalias"; michael@0: static const char DATA_TYPE[] = "icu"; michael@0: michael@0: static UDataMemory *gAliasData=NULL; michael@0: static icu::UInitOnce gAliasDataInitOnce = U_INITONCE_INITIALIZER; michael@0: michael@0: enum { michael@0: tocLengthIndex=0, michael@0: converterListIndex=1, michael@0: tagListIndex=2, michael@0: aliasListIndex=3, michael@0: untaggedConvArrayIndex=4, michael@0: taggedAliasArrayIndex=5, michael@0: taggedAliasListsIndex=6, michael@0: tableOptionsIndex=7, michael@0: stringTableIndex=8, michael@0: normalizedStringTableIndex=9, michael@0: offsetsCount, /* length of the swapper's temporary offsets[] */ michael@0: minTocLength=8 /* min. tocLength in the file, does not count the tocLengthIndex! */ michael@0: }; michael@0: michael@0: static const UConverterAliasOptions defaultTableOptions = { michael@0: UCNV_IO_UNNORMALIZED, michael@0: 0 /* containsCnvOptionInfo */ michael@0: }; michael@0: static UConverterAlias gMainTable; michael@0: michael@0: #define GET_STRING(idx) (const char *)(gMainTable.stringTable + (idx)) michael@0: #define GET_NORMALIZED_STRING(idx) (const char *)(gMainTable.normalizedStringTable + (idx)) michael@0: michael@0: static UBool U_CALLCONV michael@0: isAcceptable(void * /*context*/, michael@0: const char * /*type*/, const char * /*name*/, michael@0: const UDataInfo *pInfo) { michael@0: return (UBool)( michael@0: pInfo->size>=20 && michael@0: pInfo->isBigEndian==U_IS_BIG_ENDIAN && michael@0: pInfo->charsetFamily==U_CHARSET_FAMILY && michael@0: pInfo->dataFormat[0]==0x43 && /* dataFormat="CvAl" */ michael@0: pInfo->dataFormat[1]==0x76 && michael@0: pInfo->dataFormat[2]==0x41 && michael@0: pInfo->dataFormat[3]==0x6c && michael@0: pInfo->formatVersion[0]==3); michael@0: } michael@0: michael@0: static UBool U_CALLCONV ucnv_io_cleanup(void) michael@0: { michael@0: if (gAliasData) { michael@0: udata_close(gAliasData); michael@0: gAliasData = NULL; michael@0: } michael@0: gAliasDataInitOnce.reset(); michael@0: michael@0: uprv_memset(&gMainTable, 0, sizeof(gMainTable)); michael@0: michael@0: return TRUE; /* Everything was cleaned up */ michael@0: } michael@0: michael@0: static void U_CALLCONV initAliasData(UErrorCode &errCode) { michael@0: UDataMemory *data; michael@0: const uint16_t *table; michael@0: const uint32_t *sectionSizes; michael@0: uint32_t tableStart; michael@0: uint32_t currOffset; michael@0: michael@0: ucln_common_registerCleanup(UCLN_COMMON_UCNV_IO, ucnv_io_cleanup); michael@0: michael@0: U_ASSERT(gAliasData == NULL); michael@0: data = udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, &errCode); michael@0: if(U_FAILURE(errCode)) { michael@0: return; michael@0: } michael@0: michael@0: sectionSizes = (const uint32_t *)udata_getMemory(data); michael@0: table = (const uint16_t *)sectionSizes; michael@0: michael@0: tableStart = sectionSizes[0]; michael@0: if (tableStart < minTocLength) { michael@0: errCode = U_INVALID_FORMAT_ERROR; michael@0: udata_close(data); michael@0: return; michael@0: } michael@0: gAliasData = data; michael@0: michael@0: gMainTable.converterListSize = sectionSizes[1]; michael@0: gMainTable.tagListSize = sectionSizes[2]; michael@0: gMainTable.aliasListSize = sectionSizes[3]; michael@0: gMainTable.untaggedConvArraySize = sectionSizes[4]; michael@0: gMainTable.taggedAliasArraySize = sectionSizes[5]; michael@0: gMainTable.taggedAliasListsSize = sectionSizes[6]; michael@0: gMainTable.optionTableSize = sectionSizes[7]; michael@0: gMainTable.stringTableSize = sectionSizes[8]; michael@0: michael@0: if (tableStart > 8) { michael@0: gMainTable.normalizedStringTableSize = sectionSizes[9]; michael@0: } michael@0: michael@0: currOffset = tableStart * (sizeof(uint32_t)/sizeof(uint16_t)) + (sizeof(uint32_t)/sizeof(uint16_t)); michael@0: gMainTable.converterList = table + currOffset; michael@0: michael@0: currOffset += gMainTable.converterListSize; michael@0: gMainTable.tagList = table + currOffset; michael@0: michael@0: currOffset += gMainTable.tagListSize; michael@0: gMainTable.aliasList = table + currOffset; michael@0: michael@0: currOffset += gMainTable.aliasListSize; michael@0: gMainTable.untaggedConvArray = table + currOffset; michael@0: michael@0: currOffset += gMainTable.untaggedConvArraySize; michael@0: gMainTable.taggedAliasArray = table + currOffset; michael@0: michael@0: /* aliasLists is a 1's based array, but it has a padding character */ michael@0: currOffset += gMainTable.taggedAliasArraySize; michael@0: gMainTable.taggedAliasLists = table + currOffset; michael@0: michael@0: currOffset += gMainTable.taggedAliasListsSize; michael@0: if (gMainTable.optionTableSize > 0 michael@0: && ((const UConverterAliasOptions *)(table + currOffset))->stringNormalizationType < UCNV_IO_NORM_TYPE_COUNT) michael@0: { michael@0: /* Faster table */ michael@0: gMainTable.optionTable = (const UConverterAliasOptions *)(table + currOffset); michael@0: } michael@0: else { michael@0: /* Smaller table, or I can't handle this normalization mode! michael@0: Use the original slower table lookup. */ michael@0: gMainTable.optionTable = &defaultTableOptions; michael@0: } michael@0: michael@0: currOffset += gMainTable.optionTableSize; michael@0: gMainTable.stringTable = table + currOffset; michael@0: michael@0: currOffset += gMainTable.stringTableSize; michael@0: gMainTable.normalizedStringTable = ((gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED) michael@0: ? gMainTable.stringTable : (table + currOffset)); michael@0: } michael@0: michael@0: michael@0: static UBool michael@0: haveAliasData(UErrorCode *pErrorCode) { michael@0: umtx_initOnce(gAliasDataInitOnce, &initAliasData, *pErrorCode); michael@0: return U_SUCCESS(*pErrorCode); michael@0: } michael@0: michael@0: static inline UBool michael@0: isAlias(const char *alias, UErrorCode *pErrorCode) { michael@0: if(alias==NULL) { michael@0: *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; michael@0: return FALSE; michael@0: } michael@0: return (UBool)(*alias!=0); michael@0: } michael@0: michael@0: static uint32_t getTagNumber(const char *tagname) { michael@0: if (gMainTable.tagList) { michael@0: uint32_t tagNum; michael@0: for (tagNum = 0; tagNum < gMainTable.tagListSize; tagNum++) { michael@0: if (!uprv_stricmp(GET_STRING(gMainTable.tagList[tagNum]), tagname)) { michael@0: return tagNum; michael@0: } michael@0: } michael@0: } michael@0: michael@0: return UINT32_MAX; michael@0: } michael@0: michael@0: /* character types relevant for ucnv_compareNames() */ michael@0: enum { michael@0: UIGNORE, michael@0: ZERO, michael@0: NONZERO, michael@0: MINLETTER /* any values from here on are lowercase letter mappings */ michael@0: }; michael@0: michael@0: /* character types for ASCII 00..7F */ michael@0: static const uint8_t asciiTypes[128] = { michael@0: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, michael@0: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, michael@0: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, michael@0: ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0, michael@0: 0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, michael@0: 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0, michael@0: 0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, michael@0: 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0 michael@0: }; michael@0: michael@0: #define GET_ASCII_TYPE(c) ((int8_t)(c) >= 0 ? asciiTypes[(uint8_t)c] : (uint8_t)UIGNORE) michael@0: michael@0: /* character types for EBCDIC 80..FF */ michael@0: static const uint8_t ebcdicTypes[128] = { michael@0: 0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0, michael@0: 0, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0, michael@0: 0, 0, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0, michael@0: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, michael@0: 0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0, michael@0: 0, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0, michael@0: 0, 0, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0, michael@0: ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0 michael@0: }; michael@0: michael@0: #define GET_EBCDIC_TYPE(c) ((int8_t)(c) < 0 ? ebcdicTypes[(c)&0x7f] : (uint8_t)UIGNORE) michael@0: michael@0: #if U_CHARSET_FAMILY==U_ASCII_FAMILY michael@0: # define GET_CHAR_TYPE(c) GET_ASCII_TYPE(c) michael@0: #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY michael@0: # define GET_CHAR_TYPE(c) GET_EBCDIC_TYPE(c) michael@0: #else michael@0: # error U_CHARSET_FAMILY is not valid michael@0: #endif michael@0: michael@0: /* @see ucnv_compareNames */ michael@0: U_CFUNC char * U_EXPORT2 michael@0: ucnv_io_stripASCIIForCompare(char *dst, const char *name) { michael@0: char *dstItr = dst; michael@0: uint8_t type, nextType; michael@0: char c1; michael@0: UBool afterDigit = FALSE; michael@0: michael@0: while ((c1 = *name++) != 0) { michael@0: type = GET_ASCII_TYPE(c1); michael@0: switch (type) { michael@0: case UIGNORE: michael@0: afterDigit = FALSE; michael@0: continue; /* ignore all but letters and digits */ michael@0: case ZERO: michael@0: if (!afterDigit) { michael@0: nextType = GET_ASCII_TYPE(*name); michael@0: if (nextType == ZERO || nextType == NONZERO) { michael@0: continue; /* ignore leading zero before another digit */ michael@0: } michael@0: } michael@0: break; michael@0: case NONZERO: michael@0: afterDigit = TRUE; michael@0: break; michael@0: default: michael@0: c1 = (char)type; /* lowercased letter */ michael@0: afterDigit = FALSE; michael@0: break; michael@0: } michael@0: *dstItr++ = c1; michael@0: } michael@0: *dstItr = 0; michael@0: return dst; michael@0: } michael@0: michael@0: U_CFUNC char * U_EXPORT2 michael@0: ucnv_io_stripEBCDICForCompare(char *dst, const char *name) { michael@0: char *dstItr = dst; michael@0: uint8_t type, nextType; michael@0: char c1; michael@0: UBool afterDigit = FALSE; michael@0: michael@0: while ((c1 = *name++) != 0) { michael@0: type = GET_EBCDIC_TYPE(c1); michael@0: switch (type) { michael@0: case UIGNORE: michael@0: afterDigit = FALSE; michael@0: continue; /* ignore all but letters and digits */ michael@0: case ZERO: michael@0: if (!afterDigit) { michael@0: nextType = GET_EBCDIC_TYPE(*name); michael@0: if (nextType == ZERO || nextType == NONZERO) { michael@0: continue; /* ignore leading zero before another digit */ michael@0: } michael@0: } michael@0: break; michael@0: case NONZERO: michael@0: afterDigit = TRUE; michael@0: break; michael@0: default: michael@0: c1 = (char)type; /* lowercased letter */ michael@0: afterDigit = FALSE; michael@0: break; michael@0: } michael@0: *dstItr++ = c1; michael@0: } michael@0: *dstItr = 0; michael@0: return dst; michael@0: } michael@0: michael@0: /** michael@0: * Do a fuzzy compare of two converter/alias names. michael@0: * The comparison is case-insensitive, ignores leading zeroes if they are not michael@0: * followed by further digits, and ignores all but letters and digits. michael@0: * Thus the strings "UTF-8", "utf_8", "u*T@f08" and "Utf 8" are exactly equivalent. michael@0: * See section 1.4, Charset Alias Matching in Unicode Technical Standard #22 michael@0: * at http://www.unicode.org/reports/tr22/ michael@0: * michael@0: * This is a symmetrical (commutative) operation; order of arguments michael@0: * is insignificant. This is an important property for sorting the michael@0: * list (when the list is preprocessed into binary form) and for michael@0: * performing binary searches on it at run time. michael@0: * michael@0: * @param name1 a converter name or alias, zero-terminated michael@0: * @param name2 a converter name or alias, zero-terminated michael@0: * @return 0 if the names match, or a negative value if the name1 michael@0: * lexically precedes name2, or a positive value if the name1 michael@0: * lexically follows name2. michael@0: * michael@0: * @see ucnv_io_stripForCompare michael@0: */ michael@0: U_CAPI int U_EXPORT2 michael@0: ucnv_compareNames(const char *name1, const char *name2) { michael@0: int rc; michael@0: uint8_t type, nextType; michael@0: char c1, c2; michael@0: UBool afterDigit1 = FALSE, afterDigit2 = FALSE; michael@0: michael@0: for (;;) { michael@0: while ((c1 = *name1++) != 0) { michael@0: type = GET_CHAR_TYPE(c1); michael@0: switch (type) { michael@0: case UIGNORE: michael@0: afterDigit1 = FALSE; michael@0: continue; /* ignore all but letters and digits */ michael@0: case ZERO: michael@0: if (!afterDigit1) { michael@0: nextType = GET_CHAR_TYPE(*name1); michael@0: if (nextType == ZERO || nextType == NONZERO) { michael@0: continue; /* ignore leading zero before another digit */ michael@0: } michael@0: } michael@0: break; michael@0: case NONZERO: michael@0: afterDigit1 = TRUE; michael@0: break; michael@0: default: michael@0: c1 = (char)type; /* lowercased letter */ michael@0: afterDigit1 = FALSE; michael@0: break; michael@0: } michael@0: break; /* deliver c1 */ michael@0: } michael@0: while ((c2 = *name2++) != 0) { michael@0: type = GET_CHAR_TYPE(c2); michael@0: switch (type) { michael@0: case UIGNORE: michael@0: afterDigit2 = FALSE; michael@0: continue; /* ignore all but letters and digits */ michael@0: case ZERO: michael@0: if (!afterDigit2) { michael@0: nextType = GET_CHAR_TYPE(*name2); michael@0: if (nextType == ZERO || nextType == NONZERO) { michael@0: continue; /* ignore leading zero before another digit */ michael@0: } michael@0: } michael@0: break; michael@0: case NONZERO: michael@0: afterDigit2 = TRUE; michael@0: break; michael@0: default: michael@0: c2 = (char)type; /* lowercased letter */ michael@0: afterDigit2 = FALSE; michael@0: break; michael@0: } michael@0: break; /* deliver c2 */ michael@0: } michael@0: michael@0: /* If we reach the ends of both strings then they match */ michael@0: if ((c1|c2)==0) { michael@0: return 0; michael@0: } michael@0: michael@0: /* Case-insensitive comparison */ michael@0: rc = (int)(unsigned char)c1 - (int)(unsigned char)c2; michael@0: if (rc != 0) { michael@0: return rc; michael@0: } michael@0: } michael@0: } michael@0: michael@0: /* michael@0: * search for an alias michael@0: * return the converter number index for gConverterList michael@0: */ michael@0: static inline uint32_t michael@0: findConverter(const char *alias, UBool *containsOption, UErrorCode *pErrorCode) { michael@0: uint32_t mid, start, limit; michael@0: uint32_t lastMid; michael@0: int result; michael@0: int isUnnormalized = (gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED); michael@0: char strippedName[UCNV_MAX_CONVERTER_NAME_LENGTH]; michael@0: michael@0: if (!isUnnormalized) { michael@0: if (uprv_strlen(alias) >= UCNV_MAX_CONVERTER_NAME_LENGTH) { michael@0: *pErrorCode = U_BUFFER_OVERFLOW_ERROR; michael@0: return UINT32_MAX; michael@0: } michael@0: michael@0: /* Lower case and remove ignoreable characters. */ michael@0: ucnv_io_stripForCompare(strippedName, alias); michael@0: alias = strippedName; michael@0: } michael@0: michael@0: /* do a binary search for the alias */ michael@0: start = 0; michael@0: limit = gMainTable.untaggedConvArraySize; michael@0: mid = limit; michael@0: lastMid = UINT32_MAX; michael@0: michael@0: for (;;) { michael@0: mid = (uint32_t)((start + limit) / 2); michael@0: if (lastMid == mid) { /* Have we moved? */ michael@0: break; /* We haven't moved, and it wasn't found. */ michael@0: } michael@0: lastMid = mid; michael@0: if (isUnnormalized) { michael@0: result = ucnv_compareNames(alias, GET_STRING(gMainTable.aliasList[mid])); michael@0: } michael@0: else { michael@0: result = uprv_strcmp(alias, GET_NORMALIZED_STRING(gMainTable.aliasList[mid])); michael@0: } michael@0: michael@0: if (result < 0) { michael@0: limit = mid; michael@0: } else if (result > 0) { michael@0: start = mid; michael@0: } else { michael@0: /* Since the gencnval tool folds duplicates into one entry, michael@0: * this alias in gAliasList is unique, but different standards michael@0: * may map an alias to different converters. michael@0: */ michael@0: if (gMainTable.untaggedConvArray[mid] & UCNV_AMBIGUOUS_ALIAS_MAP_BIT) { michael@0: *pErrorCode = U_AMBIGUOUS_ALIAS_WARNING; michael@0: } michael@0: /* State whether the canonical converter name contains an option. michael@0: This information is contained in this list in order to maintain backward & forward compatibility. */ michael@0: if (containsOption) { michael@0: UBool containsCnvOptionInfo = (UBool)gMainTable.optionTable->containsCnvOptionInfo; michael@0: *containsOption = (UBool)((containsCnvOptionInfo michael@0: && ((gMainTable.untaggedConvArray[mid] & UCNV_CONTAINS_OPTION_BIT) != 0)) michael@0: || !containsCnvOptionInfo); michael@0: } michael@0: return gMainTable.untaggedConvArray[mid] & UCNV_CONVERTER_INDEX_MASK; michael@0: } michael@0: } michael@0: michael@0: return UINT32_MAX; michael@0: } michael@0: michael@0: /* michael@0: * Is this alias in this list? michael@0: * alias and listOffset should be non-NULL. michael@0: */ michael@0: static inline UBool michael@0: isAliasInList(const char *alias, uint32_t listOffset) { michael@0: if (listOffset) { michael@0: uint32_t currAlias; michael@0: uint32_t listCount = gMainTable.taggedAliasLists[listOffset]; michael@0: /* +1 to skip listCount */ michael@0: const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1; michael@0: for (currAlias = 0; currAlias < listCount; currAlias++) { michael@0: if (currList[currAlias] michael@0: && ucnv_compareNames(alias, GET_STRING(currList[currAlias]))==0) michael@0: { michael@0: return TRUE; michael@0: } michael@0: } michael@0: } michael@0: return FALSE; michael@0: } michael@0: michael@0: /* michael@0: * Search for an standard name of an alias (what is the default name michael@0: * that this standard uses?) michael@0: * return the listOffset for gTaggedAliasLists. If it's 0, michael@0: * the it couldn't be found, but the parameters are valid. michael@0: */ michael@0: static uint32_t michael@0: findTaggedAliasListsOffset(const char *alias, const char *standard, UErrorCode *pErrorCode) { michael@0: uint32_t idx; michael@0: uint32_t listOffset; michael@0: uint32_t convNum; michael@0: UErrorCode myErr = U_ZERO_ERROR; michael@0: uint32_t tagNum = getTagNumber(standard); michael@0: michael@0: /* Make a quick guess. Hopefully they used a TR22 canonical alias. */ michael@0: convNum = findConverter(alias, NULL, &myErr); michael@0: if (myErr != U_ZERO_ERROR) { michael@0: *pErrorCode = myErr; michael@0: } michael@0: michael@0: if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) { michael@0: listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum]; michael@0: if (listOffset && gMainTable.taggedAliasLists[listOffset + 1]) { michael@0: return listOffset; michael@0: } michael@0: if (myErr == U_AMBIGUOUS_ALIAS_WARNING) { michael@0: /* Uh Oh! They used an ambiguous alias. michael@0: We have to search the whole swiss cheese starting michael@0: at the highest standard affinity. michael@0: This may take a while. michael@0: */ michael@0: for (idx = 0; idx < gMainTable.taggedAliasArraySize; idx++) { michael@0: listOffset = gMainTable.taggedAliasArray[idx]; michael@0: if (listOffset && isAliasInList(alias, listOffset)) { michael@0: uint32_t currTagNum = idx/gMainTable.converterListSize; michael@0: uint32_t currConvNum = (idx - currTagNum*gMainTable.converterListSize); michael@0: uint32_t tempListOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + currConvNum]; michael@0: if (tempListOffset && gMainTable.taggedAliasLists[tempListOffset + 1]) { michael@0: return tempListOffset; michael@0: } michael@0: /* else keep on looking */ michael@0: /* We could speed this up by starting on the next row michael@0: because an alias is unique per row, right now. michael@0: This would change if alias versioning appears. */ michael@0: } michael@0: } michael@0: /* The standard doesn't know about the alias */ michael@0: } michael@0: /* else no default name */ michael@0: return 0; michael@0: } michael@0: /* else converter or tag not found */ michael@0: michael@0: return UINT32_MAX; michael@0: } michael@0: michael@0: /* Return the canonical name */ michael@0: static uint32_t michael@0: findTaggedConverterNum(const char *alias, const char *standard, UErrorCode *pErrorCode) { michael@0: uint32_t idx; michael@0: uint32_t listOffset; michael@0: uint32_t convNum; michael@0: UErrorCode myErr = U_ZERO_ERROR; michael@0: uint32_t tagNum = getTagNumber(standard); michael@0: michael@0: /* Make a quick guess. Hopefully they used a TR22 canonical alias. */ michael@0: convNum = findConverter(alias, NULL, &myErr); michael@0: if (myErr != U_ZERO_ERROR) { michael@0: *pErrorCode = myErr; michael@0: } michael@0: michael@0: if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) { michael@0: listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum]; michael@0: if (listOffset && isAliasInList(alias, listOffset)) { michael@0: return convNum; michael@0: } michael@0: if (myErr == U_AMBIGUOUS_ALIAS_WARNING) { michael@0: /* Uh Oh! They used an ambiguous alias. michael@0: We have to search one slice of the swiss cheese. michael@0: We search only in the requested tag, not the whole thing. michael@0: This may take a while. michael@0: */ michael@0: uint32_t convStart = (tagNum)*gMainTable.converterListSize; michael@0: uint32_t convLimit = (tagNum+1)*gMainTable.converterListSize; michael@0: for (idx = convStart; idx < convLimit; idx++) { michael@0: listOffset = gMainTable.taggedAliasArray[idx]; michael@0: if (listOffset && isAliasInList(alias, listOffset)) { michael@0: return idx-convStart; michael@0: } michael@0: } michael@0: /* The standard doesn't know about the alias */ michael@0: } michael@0: /* else no canonical name */ michael@0: } michael@0: /* else converter or tag not found */ michael@0: michael@0: return UINT32_MAX; michael@0: } michael@0: michael@0: michael@0: michael@0: U_CFUNC const char * michael@0: ucnv_io_getConverterName(const char *alias, UBool *containsOption, UErrorCode *pErrorCode) { michael@0: const char *aliasTmp = alias; michael@0: int32_t i = 0; michael@0: for (i = 0; i < 2; i++) { michael@0: if (i == 1) { michael@0: /* michael@0: * After the first unsuccess converter lookup, check to see if michael@0: * the name begins with 'x-'. If it does, strip it off and try michael@0: * again. This behaviour is similar to how ICU4J does it. michael@0: */ michael@0: if (aliasTmp[0] == 'x' || aliasTmp[1] == '-') { michael@0: aliasTmp = aliasTmp+2; michael@0: } else { michael@0: break; michael@0: } michael@0: } michael@0: if(haveAliasData(pErrorCode) && isAlias(aliasTmp, pErrorCode)) { michael@0: uint32_t convNum = findConverter(aliasTmp, containsOption, pErrorCode); michael@0: if (convNum < gMainTable.converterListSize) { michael@0: return GET_STRING(gMainTable.converterList[convNum]); michael@0: } michael@0: /* else converter not found */ michael@0: } else { michael@0: break; michael@0: } michael@0: } michael@0: michael@0: return NULL; michael@0: } michael@0: michael@0: static int32_t U_CALLCONV michael@0: ucnv_io_countStandardAliases(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) { michael@0: int32_t value = 0; michael@0: UAliasContext *myContext = (UAliasContext *)(enumerator->context); michael@0: uint32_t listOffset = myContext->listOffset; michael@0: michael@0: if (listOffset) { michael@0: value = gMainTable.taggedAliasLists[listOffset]; michael@0: } michael@0: return value; michael@0: } michael@0: michael@0: static const char* U_CALLCONV michael@0: ucnv_io_nextStandardAliases(UEnumeration *enumerator, michael@0: int32_t* resultLength, michael@0: UErrorCode * /*pErrorCode*/) michael@0: { michael@0: UAliasContext *myContext = (UAliasContext *)(enumerator->context); michael@0: uint32_t listOffset = myContext->listOffset; michael@0: michael@0: if (listOffset) { michael@0: uint32_t listCount = gMainTable.taggedAliasLists[listOffset]; michael@0: const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1; michael@0: michael@0: if (myContext->listIdx < listCount) { michael@0: const char *myStr = GET_STRING(currList[myContext->listIdx++]); michael@0: if (resultLength) { michael@0: *resultLength = (int32_t)uprv_strlen(myStr); michael@0: } michael@0: return myStr; michael@0: } michael@0: } michael@0: /* Either we accessed a zero length list, or we enumerated too far. */ michael@0: if (resultLength) { michael@0: *resultLength = 0; michael@0: } michael@0: return NULL; michael@0: } michael@0: michael@0: static void U_CALLCONV michael@0: ucnv_io_resetStandardAliases(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) { michael@0: ((UAliasContext *)(enumerator->context))->listIdx = 0; michael@0: } michael@0: michael@0: static void U_CALLCONV michael@0: ucnv_io_closeUEnumeration(UEnumeration *enumerator) { michael@0: uprv_free(enumerator->context); michael@0: uprv_free(enumerator); michael@0: } michael@0: michael@0: /* Enumerate the aliases for the specified converter and standard tag */ michael@0: static const UEnumeration gEnumAliases = { michael@0: NULL, michael@0: NULL, michael@0: ucnv_io_closeUEnumeration, michael@0: ucnv_io_countStandardAliases, michael@0: uenum_unextDefault, michael@0: ucnv_io_nextStandardAliases, michael@0: ucnv_io_resetStandardAliases michael@0: }; michael@0: michael@0: U_CAPI UEnumeration * U_EXPORT2 michael@0: ucnv_openStandardNames(const char *convName, michael@0: const char *standard, michael@0: UErrorCode *pErrorCode) michael@0: { michael@0: UEnumeration *myEnum = NULL; michael@0: if (haveAliasData(pErrorCode) && isAlias(convName, pErrorCode)) { michael@0: uint32_t listOffset = findTaggedAliasListsOffset(convName, standard, pErrorCode); michael@0: michael@0: /* When listOffset == 0, we want to acknowledge that the michael@0: converter name and standard are okay, but there michael@0: is nothing to enumerate. */ michael@0: if (listOffset < gMainTable.taggedAliasListsSize) { michael@0: UAliasContext *myContext; michael@0: michael@0: myEnum = static_cast(uprv_malloc(sizeof(UEnumeration))); michael@0: if (myEnum == NULL) { michael@0: *pErrorCode = U_MEMORY_ALLOCATION_ERROR; michael@0: return NULL; michael@0: } michael@0: uprv_memcpy(myEnum, &gEnumAliases, sizeof(UEnumeration)); michael@0: myContext = static_cast(uprv_malloc(sizeof(UAliasContext))); michael@0: if (myContext == NULL) { michael@0: *pErrorCode = U_MEMORY_ALLOCATION_ERROR; michael@0: uprv_free(myEnum); michael@0: return NULL; michael@0: } michael@0: myContext->listOffset = listOffset; michael@0: myContext->listIdx = 0; michael@0: myEnum->context = myContext; michael@0: } michael@0: /* else converter or tag not found */ michael@0: } michael@0: return myEnum; michael@0: } michael@0: michael@0: static uint16_t michael@0: ucnv_io_countAliases(const char *alias, UErrorCode *pErrorCode) { michael@0: if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) { michael@0: uint32_t convNum = findConverter(alias, NULL, pErrorCode); michael@0: if (convNum < gMainTable.converterListSize) { michael@0: /* tagListNum - 1 is the ALL tag */ michael@0: int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum]; michael@0: michael@0: if (listOffset) { michael@0: return gMainTable.taggedAliasLists[listOffset]; michael@0: } michael@0: /* else this shouldn't happen. internal program error */ michael@0: } michael@0: /* else converter not found */ michael@0: } michael@0: return 0; michael@0: } michael@0: michael@0: static uint16_t michael@0: ucnv_io_getAliases(const char *alias, uint16_t start, const char **aliases, UErrorCode *pErrorCode) { michael@0: if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) { michael@0: uint32_t currAlias; michael@0: uint32_t convNum = findConverter(alias, NULL, pErrorCode); michael@0: if (convNum < gMainTable.converterListSize) { michael@0: /* tagListNum - 1 is the ALL tag */ michael@0: int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum]; michael@0: michael@0: if (listOffset) { michael@0: uint32_t listCount = gMainTable.taggedAliasLists[listOffset]; michael@0: /* +1 to skip listCount */ michael@0: const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1; michael@0: michael@0: for (currAlias = start; currAlias < listCount; currAlias++) { michael@0: aliases[currAlias] = GET_STRING(currList[currAlias]); michael@0: } michael@0: } michael@0: /* else this shouldn't happen. internal program error */ michael@0: } michael@0: /* else converter not found */ michael@0: } michael@0: return 0; michael@0: } michael@0: michael@0: static const char * michael@0: ucnv_io_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode) { michael@0: if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) { michael@0: uint32_t convNum = findConverter(alias, NULL, pErrorCode); michael@0: if (convNum < gMainTable.converterListSize) { michael@0: /* tagListNum - 1 is the ALL tag */ michael@0: int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum]; michael@0: michael@0: if (listOffset) { michael@0: uint32_t listCount = gMainTable.taggedAliasLists[listOffset]; michael@0: /* +1 to skip listCount */ michael@0: const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1; michael@0: michael@0: if (n < listCount) { michael@0: return GET_STRING(currList[n]); michael@0: } michael@0: *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; michael@0: } michael@0: /* else this shouldn't happen. internal program error */ michael@0: } michael@0: /* else converter not found */ michael@0: } michael@0: return NULL; michael@0: } michael@0: michael@0: static uint16_t michael@0: ucnv_io_countStandards(UErrorCode *pErrorCode) { michael@0: if (haveAliasData(pErrorCode)) { michael@0: /* Don't include the empty list */ michael@0: return (uint16_t)(gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS); michael@0: } michael@0: michael@0: return 0; michael@0: } michael@0: michael@0: U_CAPI const char * U_EXPORT2 michael@0: ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode) { michael@0: if (haveAliasData(pErrorCode)) { michael@0: if (n < gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) { michael@0: return GET_STRING(gMainTable.tagList[n]); michael@0: } michael@0: *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; michael@0: } michael@0: michael@0: return NULL; michael@0: } michael@0: michael@0: U_CAPI const char * U_EXPORT2 michael@0: ucnv_getStandardName(const char *alias, const char *standard, UErrorCode *pErrorCode) { michael@0: if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) { michael@0: uint32_t listOffset = findTaggedAliasListsOffset(alias, standard, pErrorCode); michael@0: michael@0: if (0 < listOffset && listOffset < gMainTable.taggedAliasListsSize) { michael@0: const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1; michael@0: michael@0: /* Get the preferred name from this list */ michael@0: if (currList[0]) { michael@0: return GET_STRING(currList[0]); michael@0: } michael@0: /* else someone screwed up the alias table. */ michael@0: /* *pErrorCode = U_INVALID_FORMAT_ERROR */ michael@0: } michael@0: } michael@0: michael@0: return NULL; michael@0: } michael@0: michael@0: U_CAPI uint16_t U_EXPORT2 michael@0: ucnv_countAliases(const char *alias, UErrorCode *pErrorCode) michael@0: { michael@0: return ucnv_io_countAliases(alias, pErrorCode); michael@0: } michael@0: michael@0: michael@0: U_CAPI const char* U_EXPORT2 michael@0: ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode) michael@0: { michael@0: return ucnv_io_getAlias(alias, n, pErrorCode); michael@0: } michael@0: michael@0: U_CAPI void U_EXPORT2 michael@0: ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode) michael@0: { michael@0: ucnv_io_getAliases(alias, 0, aliases, pErrorCode); michael@0: } michael@0: michael@0: U_CAPI uint16_t U_EXPORT2 michael@0: ucnv_countStandards(void) michael@0: { michael@0: UErrorCode err = U_ZERO_ERROR; michael@0: return ucnv_io_countStandards(&err); michael@0: } michael@0: michael@0: U_CAPI const char * U_EXPORT2 michael@0: ucnv_getCanonicalName(const char *alias, const char *standard, UErrorCode *pErrorCode) { michael@0: if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) { michael@0: uint32_t convNum = findTaggedConverterNum(alias, standard, pErrorCode); michael@0: michael@0: if (convNum < gMainTable.converterListSize) { michael@0: return GET_STRING(gMainTable.converterList[convNum]); michael@0: } michael@0: } michael@0: michael@0: return NULL; michael@0: } michael@0: michael@0: static int32_t U_CALLCONV michael@0: ucnv_io_countAllConverters(UEnumeration * /*enumerator*/, UErrorCode * /*pErrorCode*/) { michael@0: return gMainTable.converterListSize; michael@0: } michael@0: michael@0: static const char* U_CALLCONV michael@0: ucnv_io_nextAllConverters(UEnumeration *enumerator, michael@0: int32_t* resultLength, michael@0: UErrorCode * /*pErrorCode*/) michael@0: { michael@0: uint16_t *myContext = (uint16_t *)(enumerator->context); michael@0: michael@0: if (*myContext < gMainTable.converterListSize) { michael@0: const char *myStr = GET_STRING(gMainTable.converterList[(*myContext)++]); michael@0: if (resultLength) { michael@0: *resultLength = (int32_t)uprv_strlen(myStr); michael@0: } michael@0: return myStr; michael@0: } michael@0: /* Either we accessed a zero length list, or we enumerated too far. */ michael@0: if (resultLength) { michael@0: *resultLength = 0; michael@0: } michael@0: return NULL; michael@0: } michael@0: michael@0: static void U_CALLCONV michael@0: ucnv_io_resetAllConverters(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) { michael@0: *((uint16_t *)(enumerator->context)) = 0; michael@0: } michael@0: michael@0: static const UEnumeration gEnumAllConverters = { michael@0: NULL, michael@0: NULL, michael@0: ucnv_io_closeUEnumeration, michael@0: ucnv_io_countAllConverters, michael@0: uenum_unextDefault, michael@0: ucnv_io_nextAllConverters, michael@0: ucnv_io_resetAllConverters michael@0: }; michael@0: michael@0: U_CAPI UEnumeration * U_EXPORT2 michael@0: ucnv_openAllNames(UErrorCode *pErrorCode) { michael@0: UEnumeration *myEnum = NULL; michael@0: if (haveAliasData(pErrorCode)) { michael@0: uint16_t *myContext; michael@0: michael@0: myEnum = static_cast(uprv_malloc(sizeof(UEnumeration))); michael@0: if (myEnum == NULL) { michael@0: *pErrorCode = U_MEMORY_ALLOCATION_ERROR; michael@0: return NULL; michael@0: } michael@0: uprv_memcpy(myEnum, &gEnumAllConverters, sizeof(UEnumeration)); michael@0: myContext = static_cast(uprv_malloc(sizeof(uint16_t))); michael@0: if (myContext == NULL) { michael@0: *pErrorCode = U_MEMORY_ALLOCATION_ERROR; michael@0: uprv_free(myEnum); michael@0: return NULL; michael@0: } michael@0: *myContext = 0; michael@0: myEnum->context = myContext; michael@0: } michael@0: return myEnum; michael@0: } michael@0: michael@0: U_CFUNC uint16_t michael@0: ucnv_io_countKnownConverters(UErrorCode *pErrorCode) { michael@0: if (haveAliasData(pErrorCode)) { michael@0: return (uint16_t)gMainTable.converterListSize; michael@0: } michael@0: return 0; michael@0: } michael@0: michael@0: /* alias table swapping ----------------------------------------------------- */ michael@0: michael@0: typedef char * U_CALLCONV StripForCompareFn(char *dst, const char *name); michael@0: michael@0: /* michael@0: * row of a temporary array michael@0: * michael@0: * gets platform-endian charset string indexes and sorting indexes; michael@0: * after sorting this array by strings, the actual arrays are permutated michael@0: * according to the sorting indexes michael@0: */ michael@0: typedef struct TempRow { michael@0: uint16_t strIndex, sortIndex; michael@0: } TempRow; michael@0: michael@0: typedef struct TempAliasTable { michael@0: const char *chars; michael@0: TempRow *rows; michael@0: uint16_t *resort; michael@0: StripForCompareFn *stripForCompare; michael@0: } TempAliasTable; michael@0: michael@0: enum { michael@0: STACK_ROW_CAPACITY=500 michael@0: }; michael@0: michael@0: static int32_t michael@0: io_compareRows(const void *context, const void *left, const void *right) { michael@0: char strippedLeft[UCNV_MAX_CONVERTER_NAME_LENGTH], michael@0: strippedRight[UCNV_MAX_CONVERTER_NAME_LENGTH]; michael@0: michael@0: TempAliasTable *tempTable=(TempAliasTable *)context; michael@0: const char *chars=tempTable->chars; michael@0: michael@0: return (int32_t)uprv_strcmp(tempTable->stripForCompare(strippedLeft, chars+2*((const TempRow *)left)->strIndex), michael@0: tempTable->stripForCompare(strippedRight, chars+2*((const TempRow *)right)->strIndex)); michael@0: } michael@0: michael@0: U_CAPI int32_t U_EXPORT2 michael@0: ucnv_swapAliases(const UDataSwapper *ds, michael@0: const void *inData, int32_t length, void *outData, michael@0: UErrorCode *pErrorCode) { michael@0: const UDataInfo *pInfo; michael@0: int32_t headerSize; michael@0: michael@0: const uint16_t *inTable; michael@0: const uint32_t *inSectionSizes; michael@0: uint32_t toc[offsetsCount]; michael@0: uint32_t offsets[offsetsCount]; /* 16-bit-addressed offsets from inTable/outTable */ michael@0: uint32_t i, count, tocLength, topOffset; michael@0: michael@0: TempRow rows[STACK_ROW_CAPACITY]; michael@0: uint16_t resort[STACK_ROW_CAPACITY]; michael@0: TempAliasTable tempTable; michael@0: michael@0: /* udata_swapDataHeader checks the arguments */ michael@0: headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); michael@0: if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { michael@0: return 0; michael@0: } michael@0: michael@0: /* check data format and format version */ michael@0: pInfo=(const UDataInfo *)((const char *)inData+4); michael@0: if(!( michael@0: pInfo->dataFormat[0]==0x43 && /* dataFormat="CvAl" */ michael@0: pInfo->dataFormat[1]==0x76 && michael@0: pInfo->dataFormat[2]==0x41 && michael@0: pInfo->dataFormat[3]==0x6c && michael@0: pInfo->formatVersion[0]==3 michael@0: )) { michael@0: udata_printError(ds, "ucnv_swapAliases(): data format %02x.%02x.%02x.%02x (format version %02x) is not an alias table\n", michael@0: pInfo->dataFormat[0], pInfo->dataFormat[1], michael@0: pInfo->dataFormat[2], pInfo->dataFormat[3], michael@0: pInfo->formatVersion[0]); michael@0: *pErrorCode=U_UNSUPPORTED_ERROR; michael@0: return 0; michael@0: } michael@0: michael@0: /* an alias table must contain at least the table of contents array */ michael@0: if(length>=0 && (length-headerSize)<4*(1+minTocLength)) { michael@0: udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n", michael@0: length-headerSize); michael@0: *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; michael@0: return 0; michael@0: } michael@0: michael@0: inSectionSizes=(const uint32_t *)((const char *)inData+headerSize); michael@0: inTable=(const uint16_t *)inSectionSizes; michael@0: uprv_memset(toc, 0, sizeof(toc)); michael@0: toc[tocLengthIndex]=tocLength=ds->readUInt32(inSectionSizes[tocLengthIndex]); michael@0: if(tocLengthreadUInt32(inSectionSizes[i]); michael@0: } michael@0: michael@0: /* compute offsets */ michael@0: uprv_memset(offsets, 0, sizeof(offsets)); michael@0: offsets[converterListIndex]=2*(1+tocLength); /* count two 16-bit units per toc entry */ michael@0: for(i=tagListIndex; i<=tocLength; ++i) { michael@0: offsets[i]=offsets[i-1]+toc[i-1]; michael@0: } michael@0: michael@0: /* compute the overall size of the after-header data, in numbers of 16-bit units */ michael@0: topOffset=offsets[i-1]+toc[i-1]; michael@0: michael@0: if(length>=0) { michael@0: uint16_t *outTable; michael@0: const uint16_t *p, *p2; michael@0: uint16_t *q, *q2; michael@0: uint16_t oldIndex; michael@0: michael@0: if((length-headerSize)<(2*(int32_t)topOffset)) { michael@0: udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n", michael@0: length-headerSize); michael@0: *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; michael@0: return 0; michael@0: } michael@0: michael@0: outTable=(uint16_t *)((char *)outData+headerSize); michael@0: michael@0: /* swap the entire table of contents */ michael@0: ds->swapArray32(ds, inTable, 4*(1+tocLength), outTable, pErrorCode); michael@0: michael@0: /* swap unormalized strings & normalized strings */ michael@0: ds->swapInvChars(ds, inTable+offsets[stringTableIndex], 2*(int32_t)(toc[stringTableIndex]+toc[normalizedStringTableIndex]), michael@0: outTable+offsets[stringTableIndex], pErrorCode); michael@0: if(U_FAILURE(*pErrorCode)) { michael@0: udata_printError(ds, "ucnv_swapAliases().swapInvChars(charset names) failed\n"); michael@0: return 0; michael@0: } michael@0: michael@0: if(ds->inCharset==ds->outCharset) { michael@0: /* no need to sort, just swap all 16-bit values together */ michael@0: ds->swapArray16(ds, michael@0: inTable+offsets[converterListIndex], michael@0: 2*(int32_t)(offsets[stringTableIndex]-offsets[converterListIndex]), michael@0: outTable+offsets[converterListIndex], michael@0: pErrorCode); michael@0: } else { michael@0: /* allocate the temporary table for sorting */ michael@0: count=toc[aliasListIndex]; michael@0: michael@0: tempTable.chars=(const char *)(outTable+offsets[stringTableIndex]); /* sort by outCharset */ michael@0: michael@0: if(count<=STACK_ROW_CAPACITY) { michael@0: tempTable.rows=rows; michael@0: tempTable.resort=resort; michael@0: } else { michael@0: tempTable.rows=(TempRow *)uprv_malloc(count*sizeof(TempRow)+count*2); michael@0: if(tempTable.rows==NULL) { michael@0: udata_printError(ds, "ucnv_swapAliases(): unable to allocate memory for sorting tables (max length: %u)\n", michael@0: count); michael@0: *pErrorCode=U_MEMORY_ALLOCATION_ERROR; michael@0: return 0; michael@0: } michael@0: tempTable.resort=(uint16_t *)(tempTable.rows+count); michael@0: } michael@0: michael@0: if(ds->outCharset==U_ASCII_FAMILY) { michael@0: tempTable.stripForCompare=ucnv_io_stripASCIIForCompare; michael@0: } else /* U_EBCDIC_FAMILY */ { michael@0: tempTable.stripForCompare=ucnv_io_stripEBCDICForCompare; michael@0: } michael@0: michael@0: /* michael@0: * Sort unique aliases+mapped names. michael@0: * michael@0: * We need to sort the list again by outCharset strings because they michael@0: * sort differently for different charset families. michael@0: * First we set up a temporary table with the string indexes and michael@0: * sorting indexes and sort that. michael@0: * Then we permutate and copy/swap the actual values. michael@0: */ michael@0: p=inTable+offsets[aliasListIndex]; michael@0: q=outTable+offsets[aliasListIndex]; michael@0: michael@0: p2=inTable+offsets[untaggedConvArrayIndex]; michael@0: q2=outTable+offsets[untaggedConvArrayIndex]; michael@0: michael@0: for(i=0; ireadUInt16(p[i]); michael@0: tempTable.rows[i].sortIndex=(uint16_t)i; michael@0: } michael@0: michael@0: uprv_sortArray(tempTable.rows, (int32_t)count, sizeof(TempRow), michael@0: io_compareRows, &tempTable, michael@0: FALSE, pErrorCode); michael@0: michael@0: if(U_SUCCESS(*pErrorCode)) { michael@0: /* copy/swap/permutate items */ michael@0: if(p!=q) { michael@0: for(i=0; iswapArray16(ds, p+oldIndex, 2, q+i, pErrorCode); michael@0: ds->swapArray16(ds, p2+oldIndex, 2, q2+i, pErrorCode); michael@0: } michael@0: } else { michael@0: /* michael@0: * If we swap in-place, then the permutation must use another michael@0: * temporary array (tempTable.resort) michael@0: * before the results are copied to the outBundle. michael@0: */ michael@0: uint16_t *r=tempTable.resort; michael@0: michael@0: for(i=0; iswapArray16(ds, p+oldIndex, 2, r+i, pErrorCode); michael@0: } michael@0: uprv_memcpy(q, r, 2*count); michael@0: michael@0: for(i=0; iswapArray16(ds, p2+oldIndex, 2, r+i, pErrorCode); michael@0: } michael@0: uprv_memcpy(q2, r, 2*count); michael@0: } michael@0: } michael@0: michael@0: if(tempTable.rows!=rows) { michael@0: uprv_free(tempTable.rows); michael@0: } michael@0: michael@0: if(U_FAILURE(*pErrorCode)) { michael@0: udata_printError(ds, "ucnv_swapAliases().uprv_sortArray(%u items) failed\n", michael@0: count); michael@0: return 0; michael@0: } michael@0: michael@0: /* swap remaining 16-bit values */ michael@0: ds->swapArray16(ds, michael@0: inTable+offsets[converterListIndex], michael@0: 2*(int32_t)(offsets[aliasListIndex]-offsets[converterListIndex]), michael@0: outTable+offsets[converterListIndex], michael@0: pErrorCode); michael@0: ds->swapArray16(ds, michael@0: inTable+offsets[taggedAliasArrayIndex], michael@0: 2*(int32_t)(offsets[stringTableIndex]-offsets[taggedAliasArrayIndex]), michael@0: outTable+offsets[taggedAliasArrayIndex], michael@0: pErrorCode); michael@0: } michael@0: } michael@0: michael@0: return headerSize+2*(int32_t)topOffset; michael@0: } michael@0: michael@0: #endif michael@0: michael@0: michael@0: /* michael@0: * Hey, Emacs, please set the following: michael@0: * michael@0: * Local Variables: michael@0: * indent-tabs-mode: nil michael@0: * End: michael@0: * michael@0: */