intl/icu/source/common/ucnv_io.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /*
     2 ******************************************************************************
     3 *
     4 *   Copyright (C) 1999-2013, International Business Machines
     5 *   Corporation and others.  All Rights Reserved.
     6 *
     7 ******************************************************************************
     8 *
     9 *
    10 *  ucnv_io.cpp:
    11 *  initializes global variables and defines functions pertaining to converter 
    12 *  name resolution aspect of the conversion code.
    13 *
    14 *   new implementation:
    15 *
    16 *   created on: 1999nov22
    17 *   created by: Markus W. Scherer
    18 *
    19 *   Use the binary cnvalias.icu (created from convrtrs.txt) to work
    20 *   with aliases for converter names.
    21 *
    22 *   Date        Name        Description
    23 *   11/22/1999  markus      Created
    24 *   06/28/2002  grhoten     Major overhaul of the converter alias design.
    25 *                           Now an alias can map to different converters
    26 *                           depending on the specified standard.
    27 *******************************************************************************
    28 */
    30 #include "unicode/utypes.h"
    32 #if !UCONFIG_NO_CONVERSION
    34 #include "unicode/ucnv.h"
    35 #include "unicode/udata.h"
    37 #include "umutex.h"
    38 #include "uarrsort.h"
    39 #include "uassert.h"
    40 #include "udataswp.h"
    41 #include "cstring.h"
    42 #include "cmemory.h"
    43 #include "ucnv_io.h"
    44 #include "uenumimp.h"
    45 #include "ucln_cmn.h"
    47 /* Format of cnvalias.icu -----------------------------------------------------
    48  *
    49  * cnvalias.icu is a binary, memory-mappable form of convrtrs.txt.
    50  * This binary form contains several tables. All indexes are to uint16_t
    51  * units, and not to the bytes (uint8_t units). Addressing everything on
    52  * 16-bit boundaries allows us to store more information with small index
    53  * numbers, which are also 16-bit in size. The majority of the table (except
    54  * the string table) are 16-bit numbers.
    55  *
    56  * First there is the size of the Table of Contents (TOC). The TOC
    57  * entries contain the size of each section. In order to find the offset
    58  * you just need to sum up the previous offsets.
    59  * The TOC length and entries are an array of uint32_t values.
    60  * The first section after the TOC starts immediately after the TOC.
    61  *
    62  * 1) This section contains a list of converters. This list contains indexes
    63  * into the string table for the converter name. The index of this list is
    64  * also used by other sections, which are mentioned later on.
    65  * This list is not sorted.
    66  *
    67  * 2) This section contains a list of tags. This list contains indexes
    68  * into the string table for the tag name. The index of this list is
    69  * also used by other sections, which are mentioned later on.
    70  * This list is in priority order of standards.
    71  *
    72  * 3) This section contains a list of sorted unique aliases. This
    73  * list contains indexes into the string table for the alias name. The
    74  * index of this list is also used by other sections, like the 4th section.
    75  * The index for the 3rd and 4th section is used to get the
    76  * alias -> converter name mapping. Section 3 and 4 form a two column table.
    77  * Some of the most significant bits of each index may contain other
    78  * information (see findConverter for details).
    79  *
    80  * 4) This section contains a list of mapped converter names. Consider this
    81  * as a table that maps the 3rd section to the 1st section. This list contains
    82  * indexes into the 1st section. The index of this list is the same index in
    83  * the 3rd section. There is also some extra information in the high bits of
    84  * each converter index in this table. Currently it's only used to say that
    85  * an alias mapped to this converter is ambiguous. See UCNV_CONVERTER_INDEX_MASK
    86  * and UCNV_AMBIGUOUS_ALIAS_MAP_BIT for more information. This section is
    87  * the predigested form of the 5th section so that an alias lookup can be fast.
    88  *
    89  * 5) This section contains a 2D array with indexes to the 6th section. This
    90  * section is the full form of all alias mappings. The column index is the
    91  * index into the converter list (column header). The row index is the index
    92  * to tag list (row header). This 2D array is the top part a 3D array. The
    93  * third dimension is in the 6th section.
    94  *
    95  * 6) This is blob of variable length arrays. Each array starts with a size,
    96  * and is followed by indexes to alias names in the string table. This is
    97  * the third dimension to the section 5. No other section should be referencing
    98  * this section.
    99  *
   100  * 7) Starting in ICU 3.6, this can be a UConverterAliasOptions struct. Its
   101  * presence indicates that a section 9 exists. UConverterAliasOptions specifies
   102  * what type of string normalization is used among other potential things in the
   103  * future.
   104  *
   105  * 8) This is the string table. All strings are indexed on an even address.
   106  * There are two reasons for this. First many chip architectures locate strings
   107  * faster on even address boundaries. Second, since all indexes are 16-bit
   108  * numbers, this string table can be 128KB in size instead of 64KB when we
   109  * only have strings starting on an even address.
   110  *
   111  * 9) When present this is a set of prenormalized strings from section 8. This
   112  * table contains normalized strings with the dashes and spaces stripped out,
   113  * and all strings lowercased. In the future, the options in section 7 may state
   114  * other types of normalization.
   115  *
   116  * Here is the concept of section 5 and 6. It's a 3D cube. Each tag
   117  * has a unique alias among all converters. That same alias can
   118  * be mentioned in other standards on different converters,
   119  * but only one alias per tag can be unique.
   120  *
   121  *
   122  *              Converter Names (Usually in TR22 form)
   123  *           -------------------------------------------.
   124  *     T    /                                          /|
   125  *     a   /                                          / |
   126  *     g  /                                          /  |
   127  *     s /                                          /   |
   128  *      /                                          /    |
   129  *      ------------------------------------------/     |
   130  *    A |                                         |     |
   131  *    l |                                         |     |
   132  *    i |                                         |    /
   133  *    a |                                         |   /
   134  *    s |                                         |  /
   135  *    e |                                         | /
   136  *    s |                                         |/
   137  *      -------------------------------------------
   138  *
   139  *
   140  *
   141  * Here is what it really looks like. It's like swiss cheese.
   142  * There are holes. Some converters aren't recognized by
   143  * a standard, or they are really old converters that the
   144  * standard doesn't recognize anymore.
   145  *
   146  *              Converter Names (Usually in TR22 form)
   147  *           -------------------------------------------.
   148  *     T    /##########################################/|
   149  *     a   /     #            #                       /#
   150  *     g  /  #      ##     ##     ### # ### ### ### #/
   151  *     s / #             #####  ####        ##  ## #/#
   152  *      / ### # # ##  #  #   #          ### # #   #/##
   153  *      ------------------------------------------/# #
   154  *    A |### # # ##  #  #   #          ### # #   #|# #
   155  *    l |# # #    #     #               ## #     #|# #
   156  *    i |# # #    #     #                #       #|#
   157  *    a |#                                       #|#
   158  *    s |                                        #|#
   159  *    e
   160  *    s
   161  *
   162  */
   164 /**
   165  * Used by the UEnumeration API
   166  */
   167 typedef struct UAliasContext {
   168     uint32_t listOffset;
   169     uint32_t listIdx;
   170 } UAliasContext;
   172 static const char DATA_NAME[] = "cnvalias";
   173 static const char DATA_TYPE[] = "icu";
   175 static UDataMemory *gAliasData=NULL;
   176 static icu::UInitOnce gAliasDataInitOnce = U_INITONCE_INITIALIZER;
   178 enum {
   179     tocLengthIndex=0,
   180     converterListIndex=1,
   181     tagListIndex=2,
   182     aliasListIndex=3,
   183     untaggedConvArrayIndex=4,
   184     taggedAliasArrayIndex=5,
   185     taggedAliasListsIndex=6,
   186     tableOptionsIndex=7,
   187     stringTableIndex=8,
   188     normalizedStringTableIndex=9,
   189     offsetsCount,    /* length of the swapper's temporary offsets[] */
   190     minTocLength=8 /* min. tocLength in the file, does not count the tocLengthIndex! */
   191 };
   193 static const UConverterAliasOptions defaultTableOptions = {
   194     UCNV_IO_UNNORMALIZED,
   195     0 /* containsCnvOptionInfo */
   196 };
   197 static UConverterAlias gMainTable;
   199 #define GET_STRING(idx) (const char *)(gMainTable.stringTable + (idx))
   200 #define GET_NORMALIZED_STRING(idx) (const char *)(gMainTable.normalizedStringTable + (idx))
   202 static UBool U_CALLCONV
   203 isAcceptable(void * /*context*/,
   204              const char * /*type*/, const char * /*name*/,
   205              const UDataInfo *pInfo) {
   206     return (UBool)(
   207         pInfo->size>=20 &&
   208         pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
   209         pInfo->charsetFamily==U_CHARSET_FAMILY &&
   210         pInfo->dataFormat[0]==0x43 &&   /* dataFormat="CvAl" */
   211         pInfo->dataFormat[1]==0x76 &&
   212         pInfo->dataFormat[2]==0x41 &&
   213         pInfo->dataFormat[3]==0x6c &&
   214         pInfo->formatVersion[0]==3);
   215 }
   217 static UBool U_CALLCONV ucnv_io_cleanup(void)
   218 {
   219     if (gAliasData) {
   220         udata_close(gAliasData);
   221         gAliasData = NULL;
   222     }
   223     gAliasDataInitOnce.reset();
   225     uprv_memset(&gMainTable, 0, sizeof(gMainTable));
   227     return TRUE;                   /* Everything was cleaned up */
   228 }
   230 static void U_CALLCONV initAliasData(UErrorCode &errCode) {
   231     UDataMemory *data;
   232     const uint16_t *table;
   233     const uint32_t *sectionSizes;
   234     uint32_t tableStart;
   235     uint32_t currOffset;
   237     ucln_common_registerCleanup(UCLN_COMMON_UCNV_IO, ucnv_io_cleanup);
   239     U_ASSERT(gAliasData == NULL);
   240     data = udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, &errCode);
   241     if(U_FAILURE(errCode)) {
   242         return;
   243     }
   245     sectionSizes = (const uint32_t *)udata_getMemory(data);
   246     table = (const uint16_t *)sectionSizes;
   248     tableStart      = sectionSizes[0];
   249     if (tableStart < minTocLength) {
   250         errCode = U_INVALID_FORMAT_ERROR;
   251         udata_close(data);
   252         return;
   253     }
   254     gAliasData = data;
   256     gMainTable.converterListSize      = sectionSizes[1];
   257     gMainTable.tagListSize            = sectionSizes[2];
   258     gMainTable.aliasListSize          = sectionSizes[3];
   259     gMainTable.untaggedConvArraySize  = sectionSizes[4];
   260     gMainTable.taggedAliasArraySize   = sectionSizes[5];
   261     gMainTable.taggedAliasListsSize   = sectionSizes[6];
   262     gMainTable.optionTableSize        = sectionSizes[7];
   263     gMainTable.stringTableSize        = sectionSizes[8];
   265     if (tableStart > 8) {
   266         gMainTable.normalizedStringTableSize = sectionSizes[9];
   267     }
   269     currOffset = tableStart * (sizeof(uint32_t)/sizeof(uint16_t)) + (sizeof(uint32_t)/sizeof(uint16_t));
   270     gMainTable.converterList = table + currOffset;
   272     currOffset += gMainTable.converterListSize;
   273     gMainTable.tagList = table + currOffset;
   275     currOffset += gMainTable.tagListSize;
   276     gMainTable.aliasList = table + currOffset;
   278     currOffset += gMainTable.aliasListSize;
   279     gMainTable.untaggedConvArray = table + currOffset;
   281     currOffset += gMainTable.untaggedConvArraySize;
   282     gMainTable.taggedAliasArray = table + currOffset;
   284     /* aliasLists is a 1's based array, but it has a padding character */
   285     currOffset += gMainTable.taggedAliasArraySize;
   286     gMainTable.taggedAliasLists = table + currOffset;
   288     currOffset += gMainTable.taggedAliasListsSize;
   289     if (gMainTable.optionTableSize > 0
   290         && ((const UConverterAliasOptions *)(table + currOffset))->stringNormalizationType < UCNV_IO_NORM_TYPE_COUNT)
   291     {
   292         /* Faster table */
   293         gMainTable.optionTable = (const UConverterAliasOptions *)(table + currOffset);
   294     }
   295     else {
   296         /* Smaller table, or I can't handle this normalization mode!
   297         Use the original slower table lookup. */
   298         gMainTable.optionTable = &defaultTableOptions;
   299     }
   301     currOffset += gMainTable.optionTableSize;
   302     gMainTable.stringTable = table + currOffset;
   304     currOffset += gMainTable.stringTableSize;
   305     gMainTable.normalizedStringTable = ((gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED)
   306         ? gMainTable.stringTable : (table + currOffset));
   307 }
   310 static UBool
   311 haveAliasData(UErrorCode *pErrorCode) {
   312     umtx_initOnce(gAliasDataInitOnce, &initAliasData, *pErrorCode);
   313     return U_SUCCESS(*pErrorCode);
   314 }
   316 static inline UBool
   317 isAlias(const char *alias, UErrorCode *pErrorCode) {
   318     if(alias==NULL) {
   319         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   320         return FALSE;
   321     }
   322     return (UBool)(*alias!=0);
   323 }
   325 static uint32_t getTagNumber(const char *tagname) {
   326     if (gMainTable.tagList) {
   327         uint32_t tagNum;
   328         for (tagNum = 0; tagNum < gMainTable.tagListSize; tagNum++) {
   329             if (!uprv_stricmp(GET_STRING(gMainTable.tagList[tagNum]), tagname)) {
   330                 return tagNum;
   331             }
   332         }
   333     }
   335     return UINT32_MAX;
   336 }
   338 /* character types relevant for ucnv_compareNames() */
   339 enum {
   340     UIGNORE,
   341     ZERO,
   342     NONZERO,
   343     MINLETTER /* any values from here on are lowercase letter mappings */
   344 };
   346 /* character types for ASCII 00..7F */
   347 static const uint8_t asciiTypes[128] = {
   348     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   349     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   350     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   351     ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0,
   352     0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
   353     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0,
   354     0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
   355     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0
   356 };
   358 #define GET_ASCII_TYPE(c) ((int8_t)(c) >= 0 ? asciiTypes[(uint8_t)c] : (uint8_t)UIGNORE)
   360 /* character types for EBCDIC 80..FF */
   361 static const uint8_t ebcdicTypes[128] = {
   362     0,    0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0,
   363     0,    0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0,
   364     0,    0,    0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0,
   365     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   366     0,    0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0,
   367     0,    0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0,
   368     0,    0,    0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0,
   369     ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0
   370 };
   372 #define GET_EBCDIC_TYPE(c) ((int8_t)(c) < 0 ? ebcdicTypes[(c)&0x7f] : (uint8_t)UIGNORE)
   374 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
   375 #   define GET_CHAR_TYPE(c) GET_ASCII_TYPE(c)
   376 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
   377 #   define GET_CHAR_TYPE(c) GET_EBCDIC_TYPE(c)
   378 #else
   379 #   error U_CHARSET_FAMILY is not valid
   380 #endif
   382 /* @see ucnv_compareNames */
   383 U_CFUNC char * U_EXPORT2
   384 ucnv_io_stripASCIIForCompare(char *dst, const char *name) {
   385     char *dstItr = dst;
   386     uint8_t type, nextType;
   387     char c1;
   388     UBool afterDigit = FALSE;
   390     while ((c1 = *name++) != 0) {
   391         type = GET_ASCII_TYPE(c1);
   392         switch (type) {
   393         case UIGNORE:
   394             afterDigit = FALSE;
   395             continue; /* ignore all but letters and digits */
   396         case ZERO:
   397             if (!afterDigit) {
   398                 nextType = GET_ASCII_TYPE(*name);
   399                 if (nextType == ZERO || nextType == NONZERO) {
   400                     continue; /* ignore leading zero before another digit */
   401                 }
   402             }
   403             break;
   404         case NONZERO:
   405             afterDigit = TRUE;
   406             break;
   407         default:
   408             c1 = (char)type; /* lowercased letter */
   409             afterDigit = FALSE;
   410             break;
   411         }
   412         *dstItr++ = c1;
   413     }
   414     *dstItr = 0;
   415     return dst;
   416 }
   418 U_CFUNC char * U_EXPORT2
   419 ucnv_io_stripEBCDICForCompare(char *dst, const char *name) {
   420     char *dstItr = dst;
   421     uint8_t type, nextType;
   422     char c1;
   423     UBool afterDigit = FALSE;
   425     while ((c1 = *name++) != 0) {
   426         type = GET_EBCDIC_TYPE(c1);
   427         switch (type) {
   428         case UIGNORE:
   429             afterDigit = FALSE;
   430             continue; /* ignore all but letters and digits */
   431         case ZERO:
   432             if (!afterDigit) {
   433                 nextType = GET_EBCDIC_TYPE(*name);
   434                 if (nextType == ZERO || nextType == NONZERO) {
   435                     continue; /* ignore leading zero before another digit */
   436                 }
   437             }
   438             break;
   439         case NONZERO:
   440             afterDigit = TRUE;
   441             break;
   442         default:
   443             c1 = (char)type; /* lowercased letter */
   444             afterDigit = FALSE;
   445             break;
   446         }
   447         *dstItr++ = c1;
   448     }
   449     *dstItr = 0;
   450     return dst;
   451 }
   453 /**
   454  * Do a fuzzy compare of two converter/alias names.
   455  * The comparison is case-insensitive, ignores leading zeroes if they are not
   456  * followed by further digits, and ignores all but letters and digits.
   457  * Thus the strings "UTF-8", "utf_8", "u*T@f08" and "Utf 8" are exactly equivalent.
   458  * See section 1.4, Charset Alias Matching in Unicode Technical Standard #22
   459  * at http://www.unicode.org/reports/tr22/
   460  *
   461  * This is a symmetrical (commutative) operation; order of arguments
   462  * is insignificant.  This is an important property for sorting the
   463  * list (when the list is preprocessed into binary form) and for
   464  * performing binary searches on it at run time.
   465  *
   466  * @param name1 a converter name or alias, zero-terminated
   467  * @param name2 a converter name or alias, zero-terminated
   468  * @return 0 if the names match, or a negative value if the name1
   469  * lexically precedes name2, or a positive value if the name1
   470  * lexically follows name2.
   471  *
   472  * @see ucnv_io_stripForCompare
   473  */
   474 U_CAPI int U_EXPORT2
   475 ucnv_compareNames(const char *name1, const char *name2) {
   476     int rc;
   477     uint8_t type, nextType;
   478     char c1, c2;
   479     UBool afterDigit1 = FALSE, afterDigit2 = FALSE;
   481     for (;;) {
   482         while ((c1 = *name1++) != 0) {
   483             type = GET_CHAR_TYPE(c1);
   484             switch (type) {
   485             case UIGNORE:
   486                 afterDigit1 = FALSE;
   487                 continue; /* ignore all but letters and digits */
   488             case ZERO:
   489                 if (!afterDigit1) {
   490                     nextType = GET_CHAR_TYPE(*name1);
   491                     if (nextType == ZERO || nextType == NONZERO) {
   492                         continue; /* ignore leading zero before another digit */
   493                     }
   494                 }
   495                 break;
   496             case NONZERO:
   497                 afterDigit1 = TRUE;
   498                 break;
   499             default:
   500                 c1 = (char)type; /* lowercased letter */
   501                 afterDigit1 = FALSE;
   502                 break;
   503             }
   504             break; /* deliver c1 */
   505         }
   506         while ((c2 = *name2++) != 0) {
   507             type = GET_CHAR_TYPE(c2);
   508             switch (type) {
   509             case UIGNORE:
   510                 afterDigit2 = FALSE;
   511                 continue; /* ignore all but letters and digits */
   512             case ZERO:
   513                 if (!afterDigit2) {
   514                     nextType = GET_CHAR_TYPE(*name2);
   515                     if (nextType == ZERO || nextType == NONZERO) {
   516                         continue; /* ignore leading zero before another digit */
   517                     }
   518                 }
   519                 break;
   520             case NONZERO:
   521                 afterDigit2 = TRUE;
   522                 break;
   523             default:
   524                 c2 = (char)type; /* lowercased letter */
   525                 afterDigit2 = FALSE;
   526                 break;
   527             }
   528             break; /* deliver c2 */
   529         }
   531         /* If we reach the ends of both strings then they match */
   532         if ((c1|c2)==0) {
   533             return 0;
   534         }
   536         /* Case-insensitive comparison */
   537         rc = (int)(unsigned char)c1 - (int)(unsigned char)c2;
   538         if (rc != 0) {
   539             return rc;
   540         }
   541     }
   542 }
   544 /*
   545  * search for an alias
   546  * return the converter number index for gConverterList
   547  */
   548 static inline uint32_t
   549 findConverter(const char *alias, UBool *containsOption, UErrorCode *pErrorCode) {
   550     uint32_t mid, start, limit;
   551     uint32_t lastMid;
   552     int result;
   553     int isUnnormalized = (gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED);
   554     char strippedName[UCNV_MAX_CONVERTER_NAME_LENGTH];
   556     if (!isUnnormalized) {
   557         if (uprv_strlen(alias) >= UCNV_MAX_CONVERTER_NAME_LENGTH) {
   558             *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
   559             return UINT32_MAX;
   560         }
   562         /* Lower case and remove ignoreable characters. */
   563         ucnv_io_stripForCompare(strippedName, alias);
   564         alias = strippedName;
   565     }
   567     /* do a binary search for the alias */
   568     start = 0;
   569     limit = gMainTable.untaggedConvArraySize;
   570     mid = limit;
   571     lastMid = UINT32_MAX;
   573     for (;;) {
   574         mid = (uint32_t)((start + limit) / 2);
   575         if (lastMid == mid) {   /* Have we moved? */
   576             break;  /* We haven't moved, and it wasn't found. */
   577         }
   578         lastMid = mid;
   579         if (isUnnormalized) {
   580             result = ucnv_compareNames(alias, GET_STRING(gMainTable.aliasList[mid]));
   581         }
   582         else {
   583             result = uprv_strcmp(alias, GET_NORMALIZED_STRING(gMainTable.aliasList[mid]));
   584         }
   586         if (result < 0) {
   587             limit = mid;
   588         } else if (result > 0) {
   589             start = mid;
   590         } else {
   591             /* Since the gencnval tool folds duplicates into one entry,
   592              * this alias in gAliasList is unique, but different standards
   593              * may map an alias to different converters.
   594              */
   595             if (gMainTable.untaggedConvArray[mid] & UCNV_AMBIGUOUS_ALIAS_MAP_BIT) {
   596                 *pErrorCode = U_AMBIGUOUS_ALIAS_WARNING;
   597             }
   598             /* State whether the canonical converter name contains an option.
   599             This information is contained in this list in order to maintain backward & forward compatibility. */
   600             if (containsOption) {
   601                 UBool containsCnvOptionInfo = (UBool)gMainTable.optionTable->containsCnvOptionInfo;
   602                 *containsOption = (UBool)((containsCnvOptionInfo
   603                     && ((gMainTable.untaggedConvArray[mid] & UCNV_CONTAINS_OPTION_BIT) != 0))
   604                     || !containsCnvOptionInfo);
   605             }
   606             return gMainTable.untaggedConvArray[mid] & UCNV_CONVERTER_INDEX_MASK;
   607         }
   608     }
   610     return UINT32_MAX;
   611 }
   613 /*
   614  * Is this alias in this list?
   615  * alias and listOffset should be non-NULL.
   616  */
   617 static inline UBool
   618 isAliasInList(const char *alias, uint32_t listOffset) {
   619     if (listOffset) {
   620         uint32_t currAlias;
   621         uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
   622         /* +1 to skip listCount */
   623         const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
   624         for (currAlias = 0; currAlias < listCount; currAlias++) {
   625             if (currList[currAlias]
   626                 && ucnv_compareNames(alias, GET_STRING(currList[currAlias]))==0)
   627             {
   628                 return TRUE;
   629             }
   630         }
   631     }
   632     return FALSE;
   633 }
   635 /*
   636  * Search for an standard name of an alias (what is the default name
   637  * that this standard uses?)
   638  * return the listOffset for gTaggedAliasLists. If it's 0,
   639  * the it couldn't be found, but the parameters are valid.
   640  */
   641 static uint32_t
   642 findTaggedAliasListsOffset(const char *alias, const char *standard, UErrorCode *pErrorCode) {
   643     uint32_t idx;
   644     uint32_t listOffset;
   645     uint32_t convNum;
   646     UErrorCode myErr = U_ZERO_ERROR;
   647     uint32_t tagNum = getTagNumber(standard);
   649     /* Make a quick guess. Hopefully they used a TR22 canonical alias. */
   650     convNum = findConverter(alias, NULL, &myErr);
   651     if (myErr != U_ZERO_ERROR) {
   652         *pErrorCode = myErr;
   653     }
   655     if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) {
   656         listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum];
   657         if (listOffset && gMainTable.taggedAliasLists[listOffset + 1]) {
   658             return listOffset;
   659         }
   660         if (myErr == U_AMBIGUOUS_ALIAS_WARNING) {
   661             /* Uh Oh! They used an ambiguous alias.
   662                We have to search the whole swiss cheese starting
   663                at the highest standard affinity.
   664                This may take a while.
   665             */
   666             for (idx = 0; idx < gMainTable.taggedAliasArraySize; idx++) {
   667                 listOffset = gMainTable.taggedAliasArray[idx];
   668                 if (listOffset && isAliasInList(alias, listOffset)) {
   669                     uint32_t currTagNum = idx/gMainTable.converterListSize;
   670                     uint32_t currConvNum = (idx - currTagNum*gMainTable.converterListSize);
   671                     uint32_t tempListOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + currConvNum];
   672                     if (tempListOffset && gMainTable.taggedAliasLists[tempListOffset + 1]) {
   673                         return tempListOffset;
   674                     }
   675                     /* else keep on looking */
   676                     /* We could speed this up by starting on the next row
   677                        because an alias is unique per row, right now.
   678                        This would change if alias versioning appears. */
   679                 }
   680             }
   681             /* The standard doesn't know about the alias */
   682         }
   683         /* else no default name */
   684         return 0;
   685     }
   686     /* else converter or tag not found */
   688     return UINT32_MAX;
   689 }
   691 /* Return the canonical name */
   692 static uint32_t
   693 findTaggedConverterNum(const char *alias, const char *standard, UErrorCode *pErrorCode) {
   694     uint32_t idx;
   695     uint32_t listOffset;
   696     uint32_t convNum;
   697     UErrorCode myErr = U_ZERO_ERROR;
   698     uint32_t tagNum = getTagNumber(standard);
   700     /* Make a quick guess. Hopefully they used a TR22 canonical alias. */
   701     convNum = findConverter(alias, NULL, &myErr);
   702     if (myErr != U_ZERO_ERROR) {
   703         *pErrorCode = myErr;
   704     }
   706     if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) {
   707         listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum];
   708         if (listOffset && isAliasInList(alias, listOffset)) {
   709             return convNum;
   710         }
   711         if (myErr == U_AMBIGUOUS_ALIAS_WARNING) {
   712             /* Uh Oh! They used an ambiguous alias.
   713                We have to search one slice of the swiss cheese.
   714                We search only in the requested tag, not the whole thing.
   715                This may take a while.
   716             */
   717             uint32_t convStart = (tagNum)*gMainTable.converterListSize;
   718             uint32_t convLimit = (tagNum+1)*gMainTable.converterListSize;
   719             for (idx = convStart; idx < convLimit; idx++) {
   720                 listOffset = gMainTable.taggedAliasArray[idx];
   721                 if (listOffset && isAliasInList(alias, listOffset)) {
   722                     return idx-convStart;
   723                 }
   724             }
   725             /* The standard doesn't know about the alias */
   726         }
   727         /* else no canonical name */
   728     }
   729     /* else converter or tag not found */
   731     return UINT32_MAX;
   732 }
   736 U_CFUNC const char *
   737 ucnv_io_getConverterName(const char *alias, UBool *containsOption, UErrorCode *pErrorCode) {
   738     const char *aliasTmp = alias;
   739     int32_t i = 0;
   740     for (i = 0; i < 2; i++) {
   741         if (i == 1) {
   742             /*
   743              * After the first unsuccess converter lookup, check to see if
   744              * the name begins with 'x-'. If it does, strip it off and try
   745              * again.  This behaviour is similar to how ICU4J does it.
   746              */
   747             if (aliasTmp[0] == 'x' || aliasTmp[1] == '-') {
   748                 aliasTmp = aliasTmp+2;
   749             } else {
   750                 break;
   751             }
   752         }
   753         if(haveAliasData(pErrorCode) && isAlias(aliasTmp, pErrorCode)) {
   754             uint32_t convNum = findConverter(aliasTmp, containsOption, pErrorCode);
   755             if (convNum < gMainTable.converterListSize) {
   756                 return GET_STRING(gMainTable.converterList[convNum]);
   757             }
   758             /* else converter not found */
   759         } else {
   760             break;
   761         }
   762     }
   764     return NULL;
   765 }
   767 static int32_t U_CALLCONV
   768 ucnv_io_countStandardAliases(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) {
   769     int32_t value = 0;
   770     UAliasContext *myContext = (UAliasContext *)(enumerator->context);
   771     uint32_t listOffset = myContext->listOffset;
   773     if (listOffset) {
   774         value = gMainTable.taggedAliasLists[listOffset];
   775     }
   776     return value;
   777 }
   779 static const char* U_CALLCONV
   780 ucnv_io_nextStandardAliases(UEnumeration *enumerator,
   781                             int32_t* resultLength,
   782                             UErrorCode * /*pErrorCode*/)
   783 {
   784     UAliasContext *myContext = (UAliasContext *)(enumerator->context);
   785     uint32_t listOffset = myContext->listOffset;
   787     if (listOffset) {
   788         uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
   789         const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
   791         if (myContext->listIdx < listCount) {
   792             const char *myStr = GET_STRING(currList[myContext->listIdx++]);
   793             if (resultLength) {
   794                 *resultLength = (int32_t)uprv_strlen(myStr);
   795             }
   796             return myStr;
   797         }
   798     }
   799     /* Either we accessed a zero length list, or we enumerated too far. */
   800     if (resultLength) {
   801         *resultLength = 0;
   802     }
   803     return NULL;
   804 }
   806 static void U_CALLCONV
   807 ucnv_io_resetStandardAliases(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) {
   808     ((UAliasContext *)(enumerator->context))->listIdx = 0;
   809 }
   811 static void U_CALLCONV
   812 ucnv_io_closeUEnumeration(UEnumeration *enumerator) {
   813     uprv_free(enumerator->context);
   814     uprv_free(enumerator);
   815 }
   817 /* Enumerate the aliases for the specified converter and standard tag */
   818 static const UEnumeration gEnumAliases = {
   819     NULL,
   820     NULL,
   821     ucnv_io_closeUEnumeration,
   822     ucnv_io_countStandardAliases,
   823     uenum_unextDefault,
   824     ucnv_io_nextStandardAliases,
   825     ucnv_io_resetStandardAliases
   826 };
   828 U_CAPI UEnumeration * U_EXPORT2
   829 ucnv_openStandardNames(const char *convName,
   830                        const char *standard,
   831                        UErrorCode *pErrorCode)
   832 {
   833     UEnumeration *myEnum = NULL;
   834     if (haveAliasData(pErrorCode) && isAlias(convName, pErrorCode)) {
   835         uint32_t listOffset = findTaggedAliasListsOffset(convName, standard, pErrorCode);
   837         /* When listOffset == 0, we want to acknowledge that the
   838            converter name and standard are okay, but there
   839            is nothing to enumerate. */
   840         if (listOffset < gMainTable.taggedAliasListsSize) {
   841             UAliasContext *myContext;
   843             myEnum = static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration)));
   844             if (myEnum == NULL) {
   845                 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
   846                 return NULL;
   847             }
   848             uprv_memcpy(myEnum, &gEnumAliases, sizeof(UEnumeration));
   849             myContext = static_cast<UAliasContext *>(uprv_malloc(sizeof(UAliasContext)));
   850             if (myContext == NULL) {
   851                 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
   852                 uprv_free(myEnum);
   853                 return NULL;
   854             }
   855             myContext->listOffset = listOffset;
   856             myContext->listIdx = 0;
   857             myEnum->context = myContext;
   858         }
   859         /* else converter or tag not found */
   860     }
   861     return myEnum;
   862 }
   864 static uint16_t
   865 ucnv_io_countAliases(const char *alias, UErrorCode *pErrorCode) {
   866     if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
   867         uint32_t convNum = findConverter(alias, NULL, pErrorCode);
   868         if (convNum < gMainTable.converterListSize) {
   869             /* tagListNum - 1 is the ALL tag */
   870             int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
   872             if (listOffset) {
   873                 return gMainTable.taggedAliasLists[listOffset];
   874             }
   875             /* else this shouldn't happen. internal program error */
   876         }
   877         /* else converter not found */
   878     }
   879     return 0;
   880 }
   882 static uint16_t
   883 ucnv_io_getAliases(const char *alias, uint16_t start, const char **aliases, UErrorCode *pErrorCode) {
   884     if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
   885         uint32_t currAlias;
   886         uint32_t convNum = findConverter(alias, NULL, pErrorCode);
   887         if (convNum < gMainTable.converterListSize) {
   888             /* tagListNum - 1 is the ALL tag */
   889             int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
   891             if (listOffset) {
   892                 uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
   893                 /* +1 to skip listCount */
   894                 const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
   896                 for (currAlias = start; currAlias < listCount; currAlias++) {
   897                     aliases[currAlias] = GET_STRING(currList[currAlias]);
   898                 }
   899             }
   900             /* else this shouldn't happen. internal program error */
   901         }
   902         /* else converter not found */
   903     }
   904     return 0;
   905 }
   907 static const char *
   908 ucnv_io_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode) {
   909     if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
   910         uint32_t convNum = findConverter(alias, NULL, pErrorCode);
   911         if (convNum < gMainTable.converterListSize) {
   912             /* tagListNum - 1 is the ALL tag */
   913             int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
   915             if (listOffset) {
   916                 uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
   917                 /* +1 to skip listCount */
   918                 const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
   920                 if (n < listCount)  {
   921                     return GET_STRING(currList[n]);
   922                 }
   923                 *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
   924             }
   925             /* else this shouldn't happen. internal program error */
   926         }
   927         /* else converter not found */
   928     }
   929     return NULL;
   930 }
   932 static uint16_t
   933 ucnv_io_countStandards(UErrorCode *pErrorCode) {
   934     if (haveAliasData(pErrorCode)) {
   935         /* Don't include the empty list */
   936         return (uint16_t)(gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS);
   937     }
   939     return 0;
   940 }
   942 U_CAPI const char * U_EXPORT2
   943 ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode) {
   944     if (haveAliasData(pErrorCode)) {
   945         if (n < gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) {
   946             return GET_STRING(gMainTable.tagList[n]);
   947         }
   948         *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
   949     }
   951     return NULL;
   952 }
   954 U_CAPI const char * U_EXPORT2
   955 ucnv_getStandardName(const char *alias, const char *standard, UErrorCode *pErrorCode) {
   956     if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
   957         uint32_t listOffset = findTaggedAliasListsOffset(alias, standard, pErrorCode);
   959         if (0 < listOffset && listOffset < gMainTable.taggedAliasListsSize) {
   960             const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
   962             /* Get the preferred name from this list */
   963             if (currList[0]) {
   964                 return GET_STRING(currList[0]);
   965             }
   966             /* else someone screwed up the alias table. */
   967             /* *pErrorCode = U_INVALID_FORMAT_ERROR */
   968         }
   969     }
   971     return NULL;
   972 }
   974 U_CAPI uint16_t U_EXPORT2
   975 ucnv_countAliases(const char *alias, UErrorCode *pErrorCode)
   976 {
   977     return ucnv_io_countAliases(alias, pErrorCode);
   978 }
   981 U_CAPI const char* U_EXPORT2
   982 ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode)
   983 {
   984     return ucnv_io_getAlias(alias, n, pErrorCode);
   985 }
   987 U_CAPI void U_EXPORT2
   988 ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode)
   989 {
   990     ucnv_io_getAliases(alias, 0, aliases, pErrorCode);
   991 }
   993 U_CAPI uint16_t U_EXPORT2
   994 ucnv_countStandards(void)
   995 {
   996     UErrorCode err = U_ZERO_ERROR;
   997     return ucnv_io_countStandards(&err);
   998 }
  1000 U_CAPI const char * U_EXPORT2
  1001 ucnv_getCanonicalName(const char *alias, const char *standard, UErrorCode *pErrorCode) {
  1002     if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
  1003         uint32_t convNum = findTaggedConverterNum(alias, standard, pErrorCode);
  1005         if (convNum < gMainTable.converterListSize) {
  1006             return GET_STRING(gMainTable.converterList[convNum]);
  1010     return NULL;
  1013 static int32_t U_CALLCONV
  1014 ucnv_io_countAllConverters(UEnumeration * /*enumerator*/, UErrorCode * /*pErrorCode*/) {
  1015     return gMainTable.converterListSize;
  1018 static const char* U_CALLCONV
  1019 ucnv_io_nextAllConverters(UEnumeration *enumerator,
  1020                             int32_t* resultLength,
  1021                             UErrorCode * /*pErrorCode*/)
  1023     uint16_t *myContext = (uint16_t *)(enumerator->context);
  1025     if (*myContext < gMainTable.converterListSize) {
  1026         const char *myStr = GET_STRING(gMainTable.converterList[(*myContext)++]);
  1027         if (resultLength) {
  1028             *resultLength = (int32_t)uprv_strlen(myStr);
  1030         return myStr;
  1032     /* Either we accessed a zero length list, or we enumerated too far. */
  1033     if (resultLength) {
  1034         *resultLength = 0;
  1036     return NULL;
  1039 static void U_CALLCONV
  1040 ucnv_io_resetAllConverters(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) {
  1041     *((uint16_t *)(enumerator->context)) = 0;
  1044 static const UEnumeration gEnumAllConverters = {
  1045     NULL,
  1046     NULL,
  1047     ucnv_io_closeUEnumeration,
  1048     ucnv_io_countAllConverters,
  1049     uenum_unextDefault,
  1050     ucnv_io_nextAllConverters,
  1051     ucnv_io_resetAllConverters
  1052 };
  1054 U_CAPI UEnumeration * U_EXPORT2
  1055 ucnv_openAllNames(UErrorCode *pErrorCode) {
  1056     UEnumeration *myEnum = NULL;
  1057     if (haveAliasData(pErrorCode)) {
  1058         uint16_t *myContext;
  1060         myEnum = static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration)));
  1061         if (myEnum == NULL) {
  1062             *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
  1063             return NULL;
  1065         uprv_memcpy(myEnum, &gEnumAllConverters, sizeof(UEnumeration));
  1066         myContext = static_cast<uint16_t *>(uprv_malloc(sizeof(uint16_t)));
  1067         if (myContext == NULL) {
  1068             *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
  1069             uprv_free(myEnum);
  1070             return NULL;
  1072         *myContext = 0;
  1073         myEnum->context = myContext;
  1075     return myEnum;
  1078 U_CFUNC uint16_t
  1079 ucnv_io_countKnownConverters(UErrorCode *pErrorCode) {
  1080     if (haveAliasData(pErrorCode)) {
  1081         return (uint16_t)gMainTable.converterListSize;
  1083     return 0;
  1086 /* alias table swapping ----------------------------------------------------- */
  1088 typedef char * U_CALLCONV StripForCompareFn(char *dst, const char *name);
  1090 /*
  1091  * row of a temporary array
  1093  * gets platform-endian charset string indexes and sorting indexes;
  1094  * after sorting this array by strings, the actual arrays are permutated
  1095  * according to the sorting indexes
  1096  */
  1097 typedef struct TempRow {
  1098     uint16_t strIndex, sortIndex;
  1099 } TempRow;
  1101 typedef struct TempAliasTable {
  1102     const char *chars;
  1103     TempRow *rows;
  1104     uint16_t *resort;
  1105     StripForCompareFn *stripForCompare;
  1106 } TempAliasTable;
  1108 enum {
  1109     STACK_ROW_CAPACITY=500
  1110 };
  1112 static int32_t
  1113 io_compareRows(const void *context, const void *left, const void *right) {
  1114     char strippedLeft[UCNV_MAX_CONVERTER_NAME_LENGTH],
  1115          strippedRight[UCNV_MAX_CONVERTER_NAME_LENGTH];
  1117     TempAliasTable *tempTable=(TempAliasTable *)context;
  1118     const char *chars=tempTable->chars;
  1120     return (int32_t)uprv_strcmp(tempTable->stripForCompare(strippedLeft, chars+2*((const TempRow *)left)->strIndex),
  1121                                 tempTable->stripForCompare(strippedRight, chars+2*((const TempRow *)right)->strIndex));
  1124 U_CAPI int32_t U_EXPORT2
  1125 ucnv_swapAliases(const UDataSwapper *ds,
  1126                  const void *inData, int32_t length, void *outData,
  1127                  UErrorCode *pErrorCode) {
  1128     const UDataInfo *pInfo;
  1129     int32_t headerSize;
  1131     const uint16_t *inTable;
  1132     const uint32_t *inSectionSizes;
  1133     uint32_t toc[offsetsCount];
  1134     uint32_t offsets[offsetsCount]; /* 16-bit-addressed offsets from inTable/outTable */
  1135     uint32_t i, count, tocLength, topOffset;
  1137     TempRow rows[STACK_ROW_CAPACITY];
  1138     uint16_t resort[STACK_ROW_CAPACITY];
  1139     TempAliasTable tempTable;
  1141     /* udata_swapDataHeader checks the arguments */
  1142     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
  1143     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
  1144         return 0;
  1147     /* check data format and format version */
  1148     pInfo=(const UDataInfo *)((const char *)inData+4);
  1149     if(!(
  1150         pInfo->dataFormat[0]==0x43 &&   /* dataFormat="CvAl" */
  1151         pInfo->dataFormat[1]==0x76 &&
  1152         pInfo->dataFormat[2]==0x41 &&
  1153         pInfo->dataFormat[3]==0x6c &&
  1154         pInfo->formatVersion[0]==3
  1155     )) {
  1156         udata_printError(ds, "ucnv_swapAliases(): data format %02x.%02x.%02x.%02x (format version %02x) is not an alias table\n",
  1157                          pInfo->dataFormat[0], pInfo->dataFormat[1],
  1158                          pInfo->dataFormat[2], pInfo->dataFormat[3],
  1159                          pInfo->formatVersion[0]);
  1160         *pErrorCode=U_UNSUPPORTED_ERROR;
  1161         return 0;
  1164     /* an alias table must contain at least the table of contents array */
  1165     if(length>=0 && (length-headerSize)<4*(1+minTocLength)) {
  1166         udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n",
  1167                          length-headerSize);
  1168         *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
  1169         return 0;
  1172     inSectionSizes=(const uint32_t *)((const char *)inData+headerSize);
  1173     inTable=(const uint16_t *)inSectionSizes;
  1174     uprv_memset(toc, 0, sizeof(toc));
  1175     toc[tocLengthIndex]=tocLength=ds->readUInt32(inSectionSizes[tocLengthIndex]);
  1176     if(tocLength<minTocLength || offsetsCount<=tocLength) {
  1177         udata_printError(ds, "ucnv_swapAliases(): table of contents contains unsupported number of sections (%u sections)\n", tocLength);
  1178         *pErrorCode=U_INVALID_FORMAT_ERROR;
  1179         return 0;
  1182     /* read the known part of the table of contents */
  1183     for(i=converterListIndex; i<=tocLength; ++i) {
  1184         toc[i]=ds->readUInt32(inSectionSizes[i]);
  1187     /* compute offsets */
  1188     uprv_memset(offsets, 0, sizeof(offsets));
  1189     offsets[converterListIndex]=2*(1+tocLength); /* count two 16-bit units per toc entry */
  1190     for(i=tagListIndex; i<=tocLength; ++i) {
  1191         offsets[i]=offsets[i-1]+toc[i-1];
  1194     /* compute the overall size of the after-header data, in numbers of 16-bit units */
  1195     topOffset=offsets[i-1]+toc[i-1];
  1197     if(length>=0) {
  1198         uint16_t *outTable;
  1199         const uint16_t *p, *p2;
  1200         uint16_t *q, *q2;
  1201         uint16_t oldIndex;
  1203         if((length-headerSize)<(2*(int32_t)topOffset)) {
  1204             udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n",
  1205                              length-headerSize);
  1206             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
  1207             return 0;
  1210         outTable=(uint16_t *)((char *)outData+headerSize);
  1212         /* swap the entire table of contents */
  1213         ds->swapArray32(ds, inTable, 4*(1+tocLength), outTable, pErrorCode);
  1215         /* swap unormalized strings & normalized strings */
  1216         ds->swapInvChars(ds, inTable+offsets[stringTableIndex], 2*(int32_t)(toc[stringTableIndex]+toc[normalizedStringTableIndex]),
  1217                              outTable+offsets[stringTableIndex], pErrorCode);
  1218         if(U_FAILURE(*pErrorCode)) {
  1219             udata_printError(ds, "ucnv_swapAliases().swapInvChars(charset names) failed\n");
  1220             return 0;
  1223         if(ds->inCharset==ds->outCharset) {
  1224             /* no need to sort, just swap all 16-bit values together */
  1225             ds->swapArray16(ds,
  1226                             inTable+offsets[converterListIndex],
  1227                             2*(int32_t)(offsets[stringTableIndex]-offsets[converterListIndex]),
  1228                             outTable+offsets[converterListIndex],
  1229                             pErrorCode);
  1230         } else {
  1231             /* allocate the temporary table for sorting */
  1232             count=toc[aliasListIndex];
  1234             tempTable.chars=(const char *)(outTable+offsets[stringTableIndex]); /* sort by outCharset */
  1236             if(count<=STACK_ROW_CAPACITY) {
  1237                 tempTable.rows=rows;
  1238                 tempTable.resort=resort;
  1239             } else {
  1240                 tempTable.rows=(TempRow *)uprv_malloc(count*sizeof(TempRow)+count*2);
  1241                 if(tempTable.rows==NULL) {
  1242                     udata_printError(ds, "ucnv_swapAliases(): unable to allocate memory for sorting tables (max length: %u)\n",
  1243                                      count);
  1244                     *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
  1245                     return 0;
  1247                 tempTable.resort=(uint16_t *)(tempTable.rows+count);
  1250             if(ds->outCharset==U_ASCII_FAMILY) {
  1251                 tempTable.stripForCompare=ucnv_io_stripASCIIForCompare;
  1252             } else /* U_EBCDIC_FAMILY */ {
  1253                 tempTable.stripForCompare=ucnv_io_stripEBCDICForCompare;
  1256             /*
  1257              * Sort unique aliases+mapped names.
  1259              * We need to sort the list again by outCharset strings because they
  1260              * sort differently for different charset families.
  1261              * First we set up a temporary table with the string indexes and
  1262              * sorting indexes and sort that.
  1263              * Then we permutate and copy/swap the actual values.
  1264              */
  1265             p=inTable+offsets[aliasListIndex];
  1266             q=outTable+offsets[aliasListIndex];
  1268             p2=inTable+offsets[untaggedConvArrayIndex];
  1269             q2=outTable+offsets[untaggedConvArrayIndex];
  1271             for(i=0; i<count; ++i) {
  1272                 tempTable.rows[i].strIndex=ds->readUInt16(p[i]);
  1273                 tempTable.rows[i].sortIndex=(uint16_t)i;
  1276             uprv_sortArray(tempTable.rows, (int32_t)count, sizeof(TempRow),
  1277                            io_compareRows, &tempTable,
  1278                            FALSE, pErrorCode);
  1280             if(U_SUCCESS(*pErrorCode)) {
  1281                 /* copy/swap/permutate items */
  1282                 if(p!=q) {
  1283                     for(i=0; i<count; ++i) {
  1284                         oldIndex=tempTable.rows[i].sortIndex;
  1285                         ds->swapArray16(ds, p+oldIndex, 2, q+i, pErrorCode);
  1286                         ds->swapArray16(ds, p2+oldIndex, 2, q2+i, pErrorCode);
  1288                 } else {
  1289                     /*
  1290                      * If we swap in-place, then the permutation must use another
  1291                      * temporary array (tempTable.resort)
  1292                      * before the results are copied to the outBundle.
  1293                      */
  1294                     uint16_t *r=tempTable.resort;
  1296                     for(i=0; i<count; ++i) {
  1297                         oldIndex=tempTable.rows[i].sortIndex;
  1298                         ds->swapArray16(ds, p+oldIndex, 2, r+i, pErrorCode);
  1300                     uprv_memcpy(q, r, 2*count);
  1302                     for(i=0; i<count; ++i) {
  1303                         oldIndex=tempTable.rows[i].sortIndex;
  1304                         ds->swapArray16(ds, p2+oldIndex, 2, r+i, pErrorCode);
  1306                     uprv_memcpy(q2, r, 2*count);
  1310             if(tempTable.rows!=rows) {
  1311                 uprv_free(tempTable.rows);
  1314             if(U_FAILURE(*pErrorCode)) {
  1315                 udata_printError(ds, "ucnv_swapAliases().uprv_sortArray(%u items) failed\n",
  1316                                  count);
  1317                 return 0;
  1320             /* swap remaining 16-bit values */
  1321             ds->swapArray16(ds,
  1322                             inTable+offsets[converterListIndex],
  1323                             2*(int32_t)(offsets[aliasListIndex]-offsets[converterListIndex]),
  1324                             outTable+offsets[converterListIndex],
  1325                             pErrorCode);
  1326             ds->swapArray16(ds,
  1327                             inTable+offsets[taggedAliasArrayIndex],
  1328                             2*(int32_t)(offsets[stringTableIndex]-offsets[taggedAliasArrayIndex]),
  1329                             outTable+offsets[taggedAliasArrayIndex],
  1330                             pErrorCode);
  1334     return headerSize+2*(int32_t)topOffset;
  1337 #endif
  1340 /*
  1341  * Hey, Emacs, please set the following:
  1343  * Local Variables:
  1344  * indent-tabs-mode: nil
  1345  * End:
  1347  */

mercurial