The Tor Browser: intl/icu/source/common/ucol

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /*

     2 *******************************************************************************

3 *

     4 *   Copyright (C) 2003-2012, International Business Machines

     5 *   Corporation and others.  All Rights Reserved.

6 *

     7 *******************************************************************************

     8 *   file name:  ucol_swp.cpp

     9 *   encoding:   US-ASCII

    10 *   tab size:   8 (not used)

    11 *   indentation:4

    12 *

    13 *   created on: 2003sep10

    14 *   created by: Markus W. Scherer

    15 *

    16 *   Swap collation binaries.

    17 */

    19 #include "unicode/udata.h" /* UDataInfo */

    20 #include "utrie.h"

    21 #include "udataswp.h"

    22 #include "cmemory.h"

    23 #include "ucol_data.h"

    24 #include "ucol_swp.h"

    26 /* swapping ----------------------------------------------------------------- */

    28 /*

    29  * This performs data swapping for a folded trie (see utrie.c for details).

    30  */

    32 U_CAPI int32_t U_EXPORT2

    33 utrie_swap(const UDataSwapper *ds,

    34            const void *inData, int32_t length, void *outData,

    35            UErrorCode *pErrorCode) {

    36     const UTrieHeader *inTrie;

    37     UTrieHeader trie;

    38     int32_t size;

    39     UBool dataIs32;

    41     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {

    42         return 0;

    43     }

    44     if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) {

    45         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

    46         return 0;

    47     }

    49     /* setup and swapping */

    50     if(length>=0 && (uint32_t)length<sizeof(UTrieHeader)) {

    51         *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;

    52         return 0;

    53     }

    55     inTrie=(const UTrieHeader *)inData;

    56     trie.signature=ds->readUInt32(inTrie->signature);

    57     trie.options=ds->readUInt32(inTrie->options);

    58     trie.indexLength=udata_readInt32(ds, inTrie->indexLength);

    59     trie.dataLength=udata_readInt32(ds, inTrie->dataLength);

    61     if( trie.signature!=0x54726965 ||

    62         (trie.options&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_SHIFT ||

    63         ((trie.options>>UTRIE_OPTIONS_INDEX_SHIFT)&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_INDEX_SHIFT ||

    64         trie.indexLength<UTRIE_BMP_INDEX_LENGTH ||

    65         (trie.indexLength&(UTRIE_SURROGATE_BLOCK_COUNT-1))!=0 ||

    66         trie.dataLength<UTRIE_DATA_BLOCK_LENGTH ||

    67         (trie.dataLength&(UTRIE_DATA_GRANULARITY-1))!=0 ||

    68         ((trie.options&UTRIE_OPTIONS_LATIN1_IS_LINEAR)!=0 && trie.dataLength<(UTRIE_DATA_BLOCK_LENGTH+0x100))

    69     ) {

    70         *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */

    71         return 0;

    72     }

    74     dataIs32=(UBool)((trie.options&UTRIE_OPTIONS_DATA_IS_32_BIT)!=0);

    75     size=sizeof(UTrieHeader)+trie.indexLength*2+trie.dataLength*(dataIs32?4:2);

    77     if(length>=0) {

    78         UTrieHeader *outTrie;

    80         if(length<size) {

    81             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;

    82             return 0;

    83         }

    85         outTrie=(UTrieHeader *)outData;

    87         /* swap the header */

    88         ds->swapArray32(ds, inTrie, sizeof(UTrieHeader), outTrie, pErrorCode);

    90         /* swap the index and the data */

    91         if(dataIs32) {

    92             ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode);

    93             ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, trie.dataLength*4,

    94                                      (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode);

    95         } else {

    96             ds->swapArray16(ds, inTrie+1, (trie.indexLength+trie.dataLength)*2, outTrie+1, pErrorCode);

    97         }

    98     }

   100     return size;

   101 }

   103 #if !UCONFIG_NO_COLLATION

   105 /* Modified copy of the beginning of ucol_swapBinary(). */

   106 U_CAPI UBool U_EXPORT2

   107 ucol_looksLikeCollationBinary(const UDataSwapper *ds,

   108                               const void *inData, int32_t length) {

   109     const UCATableHeader *inHeader;

   110     UCATableHeader header;

   112     if(ds==NULL || inData==NULL || length<-1) {

   113         return FALSE;

   114     }

   116     inHeader=(const UCATableHeader *)inData;

   118     /*

   119      * The collation binary must contain at least the UCATableHeader,

   120      * starting with its size field.

   121      * sizeof(UCATableHeader)==42*4 in ICU 2.8

   122      * check the length against the header size before reading the size field

   123      */

   124     uprv_memset(&header, 0, sizeof(header));

   125     if(length<0) {

   126         header.size=udata_readInt32(ds, inHeader->size);

   127     } else if((length<(42*4) || length<(header.size=udata_readInt32(ds, inHeader->size)))) {

   128         return FALSE;

   129     }

   131     header.magic=ds->readUInt32(inHeader->magic);

   132     if(!(

   133         header.magic==UCOL_HEADER_MAGIC &&

   134         inHeader->formatVersion[0]==3 /*&&

   135         inHeader->formatVersion[1]>=0*/

   136     )) {

   137         return FALSE;

   138     }

   140     if(inHeader->isBigEndian!=ds->inIsBigEndian || inHeader->charSetFamily!=ds->inCharset) {

   141         return FALSE;

   142     }

   144     return TRUE;

   145 }

   147 /* swap a header-less collation binary, inside a resource bundle or ucadata.icu */

   148 U_CAPI int32_t U_EXPORT2

   149 ucol_swapBinary(const UDataSwapper *ds,

   150                 const void *inData, int32_t length, void *outData,

   151                 UErrorCode *pErrorCode) {

   152     const uint8_t *inBytes;

   153     uint8_t *outBytes;

   155     const UCATableHeader *inHeader;

   156     UCATableHeader *outHeader;

   157     UCATableHeader header;

   159     uint32_t count;

   161     /* argument checking in case we were not called from ucol_swap() */

   162     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {

   163         return 0;

   164     }

   165     if(ds==NULL || inData==NULL || length<-1 || (length>0 && outData==NULL)) {

   166         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

   167         return 0;

   168     }

   170     inBytes=(const uint8_t *)inData;

   171     outBytes=(uint8_t *)outData;

   173     inHeader=(const UCATableHeader *)inData;

   174     outHeader=(UCATableHeader *)outData;

   176     /*

   177      * The collation binary must contain at least the UCATableHeader,

   178      * starting with its size field.

   179      * sizeof(UCATableHeader)==42*4 in ICU 2.8

   180      * check the length against the header size before reading the size field

   181      */

   182     uprv_memset(&header, 0, sizeof(header));

   183     if(length<0) {

   184         header.size=udata_readInt32(ds, inHeader->size);

   185     } else if((length<(42*4) || length<(header.size=udata_readInt32(ds, inHeader->size)))) {

   186         udata_printError(ds, "ucol_swapBinary(): too few bytes (%d after header) for collation data\n",

   187                          length);

   188         *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;

   189         return 0;

   190     }

   192     header.magic=ds->readUInt32(inHeader->magic);

   193     if(!(

   194         header.magic==UCOL_HEADER_MAGIC &&

   195         inHeader->formatVersion[0]==3 /*&&

   196         inHeader->formatVersion[1]>=0*/

   197     )) {

   198         udata_printError(ds, "ucol_swapBinary(): magic 0x%08x or format version %02x.%02x is not a collation binary\n",

   199                          header.magic,

   200                          inHeader->formatVersion[0], inHeader->formatVersion[1]);

   201         *pErrorCode=U_UNSUPPORTED_ERROR;

   202         return 0;

   203     }

   205     if(inHeader->isBigEndian!=ds->inIsBigEndian || inHeader->charSetFamily!=ds->inCharset) {

   206         udata_printError(ds, "ucol_swapBinary(): endianness %d or charset %d does not match the swapper\n",

   207                          inHeader->isBigEndian, inHeader->charSetFamily);

   208         *pErrorCode=U_INVALID_FORMAT_ERROR;

   209         return 0;

   210     }

   212     if(length>=0) {

   213         /* copy everything, takes care of data that needs no swapping */

   214         if(inBytes!=outBytes) {

   215             uprv_memcpy(outBytes, inBytes, header.size);

   216         }

   218         /* swap the necessary pieces in the order of their occurrence in the data */

   220         /* read more of the UCATableHeader (the size field was read above) */

   221         header.options=                 ds->readUInt32(inHeader->options);

   222         header.UCAConsts=               ds->readUInt32(inHeader->UCAConsts);

   223         header.contractionUCACombos=    ds->readUInt32(inHeader->contractionUCACombos);

   224         header.mappingPosition=         ds->readUInt32(inHeader->mappingPosition);

   225         header.expansion=               ds->readUInt32(inHeader->expansion);

   226         header.contractionIndex=        ds->readUInt32(inHeader->contractionIndex);

   227         header.contractionCEs=          ds->readUInt32(inHeader->contractionCEs);

   228         header.contractionSize=         ds->readUInt32(inHeader->contractionSize);

   229         header.endExpansionCE=          ds->readUInt32(inHeader->endExpansionCE);

   230         header.expansionCESize=         ds->readUInt32(inHeader->expansionCESize);

   231         header.endExpansionCECount=     udata_readInt32(ds, inHeader->endExpansionCECount);

   232         header.contractionUCACombosSize=udata_readInt32(ds, inHeader->contractionUCACombosSize);

   233         header.scriptToLeadByte=        ds->readUInt32(inHeader->scriptToLeadByte);

   234         header.leadByteToScript=        ds->readUInt32(inHeader->leadByteToScript);

   236         /* swap the 32-bit integers in the header */

   237         ds->swapArray32(ds, inHeader, (int32_t)((const char *)&inHeader->jamoSpecial-(const char *)inHeader),

   238                            outHeader, pErrorCode);

   239         ds->swapArray32(ds, &(inHeader->scriptToLeadByte), sizeof(header.scriptToLeadByte) + sizeof(header.leadByteToScript),

   240                            &(outHeader->scriptToLeadByte), pErrorCode);

   241         /* set the output platform properties */

   242         outHeader->isBigEndian=ds->outIsBigEndian;

   243         outHeader->charSetFamily=ds->outCharset;

   245         /* swap the options */

   246         if(header.options!=0) {

   247             ds->swapArray32(ds, inBytes+header.options, header.expansion-header.options,

   248                                outBytes+header.options, pErrorCode);

   249         }

   251         /* swap the expansions */

   252         if(header.mappingPosition!=0 && header.expansion!=0) {

   253             if(header.contractionIndex!=0) {

   254                 /* expansions bounded by contractions */

   255                 count=header.contractionIndex-header.expansion;

   256             } else {

   257                 /* no contractions: expansions bounded by the main trie */

   258                 count=header.mappingPosition-header.expansion;

   259             }

   260             ds->swapArray32(ds, inBytes+header.expansion, (int32_t)count,

   261                                outBytes+header.expansion, pErrorCode);

   262         }

   264         /* swap the contractions */

   265         if(header.contractionSize!=0) {

   266             /* contractionIndex: UChar[] */

   267             ds->swapArray16(ds, inBytes+header.contractionIndex, header.contractionSize*2,

   268                                outBytes+header.contractionIndex, pErrorCode);

   270             /* contractionCEs: CEs[] */

   271             ds->swapArray32(ds, inBytes+header.contractionCEs, header.contractionSize*4,

   272                                outBytes+header.contractionCEs, pErrorCode);

   273         }

   275         /* swap the main trie */

   276         if(header.mappingPosition!=0) {

   277             count=header.endExpansionCE-header.mappingPosition;

   278             utrie_swap(ds, inBytes+header.mappingPosition, (int32_t)count,

   279                           outBytes+header.mappingPosition, pErrorCode);

   280         }

   282         /* swap the max expansion table */

   283         if(header.endExpansionCECount!=0) {

   284             ds->swapArray32(ds, inBytes+header.endExpansionCE, header.endExpansionCECount*4,

   285                                outBytes+header.endExpansionCE, pErrorCode);

   286         }

   288         /* expansionCESize, unsafeCP, contrEndCP: uint8_t[], no need to swap */

   290         /* swap UCA constants */

   291         if(header.UCAConsts!=0) {

   292             /*

   293              * if UCAConsts!=0 then contractionUCACombos because we are swapping

   294              * the UCA data file, and we know that the UCA contains contractions

   295              */

   296             count=header.contractionUCACombos-header.UCAConsts;

   297             ds->swapArray32(ds, inBytes+header.UCAConsts, header.contractionUCACombos-header.UCAConsts,

   298                                outBytes+header.UCAConsts, pErrorCode);

   299         }

   301         /* swap UCA contractions */

   302         if(header.contractionUCACombosSize!=0) {

   303             count=header.contractionUCACombosSize*inHeader->contractionUCACombosWidth*U_SIZEOF_UCHAR;

   304             ds->swapArray16(ds, inBytes+header.contractionUCACombos, (int32_t)count,

   305                                outBytes+header.contractionUCACombos, pErrorCode);

   306         }

   308         /* swap the script to lead bytes */

   309         if(header.scriptToLeadByte!=0) {

   310             int indexCount = ds->readUInt16(*((uint16_t*)(inBytes+header.scriptToLeadByte))); // each entry = 2 * uint16

   311             int dataCount = ds->readUInt16(*((uint16_t*)(inBytes+header.scriptToLeadByte + 2))); // each entry = uint16

   312             ds->swapArray16(ds, inBytes+header.scriptToLeadByte,

   313                                 4 + (4 * indexCount) + (2 * dataCount),

   314                                 outBytes+header.scriptToLeadByte, pErrorCode);

   315         }

   317         /* swap the lead byte to scripts */

   318         if(header.leadByteToScript!=0) {

   319             int indexCount = ds->readUInt16(*((uint16_t*)(inBytes+header.leadByteToScript))); // each entry = uint16

   320             int dataCount = ds->readUInt16(*((uint16_t*)(inBytes+header.leadByteToScript + 2))); // each entry = uint16

   321             ds->swapArray16(ds, inBytes+header.leadByteToScript,

   322                                 4 + (2 * indexCount) + (2 * dataCount),

   323                                 outBytes+header.leadByteToScript, pErrorCode);

   324         }

   325     }

   327     return header.size;

   328 }

   330 /* swap ICU collation data like ucadata.icu */

   331 U_CAPI int32_t U_EXPORT2

   332 ucol_swap(const UDataSwapper *ds,

   333           const void *inData, int32_t length, void *outData,

   334           UErrorCode *pErrorCode) {

   336     const UDataInfo *pInfo;

   337     int32_t headerSize, collationSize;

   339     /* udata_swapDataHeader checks the arguments */

   340     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);

   341     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {

   342         return 0;

   343     }

   345     /* check data format and format version */

   346     pInfo=(const UDataInfo *)((const char *)inData+4);

   347     if(!(

   348         pInfo->dataFormat[0]==0x55 &&   /* dataFormat="UCol" */

   349         pInfo->dataFormat[1]==0x43 &&

   350         pInfo->dataFormat[2]==0x6f &&

   351         pInfo->dataFormat[3]==0x6c &&

   352         pInfo->formatVersion[0]==3 /*&&

   353         pInfo->formatVersion[1]>=0*/

   354     )) {

   355         udata_printError(ds, "ucol_swap(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not a collation file\n",

   356                          pInfo->dataFormat[0], pInfo->dataFormat[1],

   357                          pInfo->dataFormat[2], pInfo->dataFormat[3],

   358                          pInfo->formatVersion[0], pInfo->formatVersion[1]);

   359         *pErrorCode=U_UNSUPPORTED_ERROR;

   360         return 0;

   361     }

   363     collationSize=ucol_swapBinary(ds,

   364                         (const char *)inData+headerSize,

   365                         length>=0 ? length-headerSize : -1,

   366                         (char *)outData+headerSize,

   367                         pErrorCode);

   368     if(U_SUCCESS(*pErrorCode)) {

   369         return headerSize+collationSize;

   370     } else {

   371         return 0;

   372     }

   373 }

   375 /* swap inverse UCA collation data (invuca.icu) */

   376 U_CAPI int32_t U_EXPORT2

   377 ucol_swapInverseUCA(const UDataSwapper *ds,

   378                     const void *inData, int32_t length, void *outData,

   379                     UErrorCode *pErrorCode) {

   380     const UDataInfo *pInfo;

   381     int32_t headerSize;

   383     const uint8_t *inBytes;

   384     uint8_t *outBytes;

   386     const InverseUCATableHeader *inHeader;

   387     InverseUCATableHeader *outHeader;

   388     InverseUCATableHeader header={ 0,0,0,0,0,{0,0,0,0},{0,0,0,0,0,0,0,0} };

   390     /* udata_swapDataHeader checks the arguments */

   391     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);

   392     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {

   393         return 0;

   394     }

   396     /* check data format and format version */

   397     pInfo=(const UDataInfo *)((const char *)inData+4);

   398     if(!(

   399         pInfo->dataFormat[0]==0x49 &&   /* dataFormat="InvC" */

   400         pInfo->dataFormat[1]==0x6e &&

   401         pInfo->dataFormat[2]==0x76 &&

   402         pInfo->dataFormat[3]==0x43 &&

   403         pInfo->formatVersion[0]==2 &&

   404         pInfo->formatVersion[1]>=1

   405     )) {

   406         udata_printError(ds, "ucol_swapInverseUCA(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not an inverse UCA collation file\n",

   407                          pInfo->dataFormat[0], pInfo->dataFormat[1],

   408                          pInfo->dataFormat[2], pInfo->dataFormat[3],

   409                          pInfo->formatVersion[0], pInfo->formatVersion[1]);

   410         *pErrorCode=U_UNSUPPORTED_ERROR;

   411         return 0;

   412     }

   414     inBytes=(const uint8_t *)inData+headerSize;

   415     outBytes=(uint8_t *)outData+headerSize;

   417     inHeader=(const InverseUCATableHeader *)inBytes;

   418     outHeader=(InverseUCATableHeader *)outBytes;

   420     /*

   421      * The inverse UCA collation binary must contain at least the InverseUCATableHeader,

   422      * starting with its size field.

   423      * sizeof(UCATableHeader)==8*4 in ICU 2.8

   424      * check the length against the header size before reading the size field

   425      */

   426     if(length<0) {

   427         header.byteSize=udata_readInt32(ds, inHeader->byteSize);

   428     } else if(

   429         ((length-headerSize)<(8*4) ||

   430          (uint32_t)(length-headerSize)<(header.byteSize=udata_readInt32(ds, inHeader->byteSize)))

   431     ) {

   432         udata_printError(ds, "ucol_swapInverseUCA(): too few bytes (%d after header) for inverse UCA collation data\n",

   433                          length);

   434         *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;

   435         return 0;

   436     }

   438     if(length>=0) {

   439         /* copy everything, takes care of data that needs no swapping */

   440         if(inBytes!=outBytes) {

   441             uprv_memcpy(outBytes, inBytes, header.byteSize);

   442         }

   444         /* swap the necessary pieces in the order of their occurrence in the data */

   446         /* read more of the InverseUCATableHeader (the byteSize field was read above) */

   447         header.tableSize=   ds->readUInt32(inHeader->tableSize);

   448         header.contsSize=   ds->readUInt32(inHeader->contsSize);

   449         header.table=       ds->readUInt32(inHeader->table);

   450         header.conts=       ds->readUInt32(inHeader->conts);

   452         /* swap the 32-bit integers in the header */

   453         ds->swapArray32(ds, inHeader, 5*4, outHeader, pErrorCode);

   455         /* swap the inverse table; tableSize counts uint32_t[3] rows */

   456         ds->swapArray32(ds, inBytes+header.table, header.tableSize*3*4,

   457                            outBytes+header.table, pErrorCode);

   459         /* swap the continuation table; contsSize counts UChars */

   460         ds->swapArray16(ds, inBytes+header.conts, header.contsSize*U_SIZEOF_UCHAR,

   461                            outBytes+header.conts, pErrorCode);

   462     }

   464     return headerSize+header.byteSize;

   465 }

   467 #endif /* #if !UCONFIG_NO_COLLATION */

The Tor Browser / file revision

intl/icu/source/common/ucol_swp.cpp@6474c204b198

intl/icu/source/common/ucol_swp.cpp