intl/icu/source/tools/toolutil/swapimpl.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /*
     2 *******************************************************************************
     3 *
     4 *   Copyright (C) 2005-2012, International Business Machines
     5 *   Corporation and others.  All Rights Reserved.
     6 *
     7 *******************************************************************************
     8 *   file name:  swapimpl.cpp
     9 *   encoding:   US-ASCII
    10 *   tab size:   8 (not used)
    11 *   indentation:4
    12 *
    13 *   created on: 2005may05
    14 *   created by: Markus W. Scherer
    15 *
    16 *   Data file swapping functions moved here from the common library
    17 *   because some data is hardcoded in ICU4C and needs not be swapped any more.
    18 *   Moving the functions here simplifies testing (for code coverage) because
    19 *   we need not jump through hoops (like adding snapshots of these files
    20 *   to testdata).
    21 *
    22 *   The declarations for these functions remain in the internal header files
    23 *   in icu/source/common/
    24 */
    26 #include "unicode/utypes.h"
    27 #include "unicode/putil.h"
    28 #include "unicode/udata.h"
    30 /* Explicit include statement for std_string.h is needed
    31  * for compilation on certain platforms. (e.g. AIX/VACPP)
    32  */
    33 #include "unicode/std_string.h"
    35 #include "cmemory.h"
    36 #include "cstring.h"
    37 #include "uinvchar.h"
    38 #include "uassert.h"
    39 #include "uarrsort.h"
    40 #include "ucmndata.h"
    41 #include "udataswp.h"
    43 /* swapping implementations in common */
    45 #include "uresdata.h"
    46 #include "ucnv_io.h"
    47 #include "uprops.h"
    48 #include "ucase.h"
    49 #include "ubidi_props.h"
    50 #include "ucol_swp.h"
    51 #include "ucnv_bld.h"
    52 #include "unormimp.h"
    53 #include "normalizer2impl.h"
    54 #include "sprpimpl.h"
    55 #include "propname.h"
    56 #include "rbbidata.h"
    57 #include "utrie2.h"
    58 #include "dictionarydata.h"
    60 /* swapping implementations in i18n */
    62 #if !UCONFIG_NO_NORMALIZATION
    63 #include "uspoof_impl.h"
    64 #endif
    66 U_NAMESPACE_USE
    68 /* definitions */
    70 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
    72 /* Unicode property (value) aliases data swapping --------------------------- */
    74 static int32_t U_CALLCONV
    75 upname_swap(const UDataSwapper *ds,
    76             const void *inData, int32_t length, void *outData,
    77             UErrorCode *pErrorCode) {
    78     /* udata_swapDataHeader checks the arguments */
    79     int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
    80     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
    81         return 0;
    82     }
    84     /* check data format and format version */
    85     const UDataInfo *pInfo=
    86         reinterpret_cast<const UDataInfo *>(
    87             static_cast<const char *>(inData)+4);
    88     if(!(
    89         pInfo->dataFormat[0]==0x70 &&   /* dataFormat="pnam" */
    90         pInfo->dataFormat[1]==0x6e &&
    91         pInfo->dataFormat[2]==0x61 &&
    92         pInfo->dataFormat[3]==0x6d &&
    93         pInfo->formatVersion[0]==2
    94     )) {
    95         udata_printError(ds, "upname_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as pnames.icu\n",
    96                          pInfo->dataFormat[0], pInfo->dataFormat[1],
    97                          pInfo->dataFormat[2], pInfo->dataFormat[3],
    98                          pInfo->formatVersion[0]);
    99         *pErrorCode=U_UNSUPPORTED_ERROR;
   100         return 0;
   101     }
   103     const uint8_t *inBytes=static_cast<const uint8_t *>(inData)+headerSize;
   104     uint8_t *outBytes=static_cast<uint8_t *>(outData)+headerSize;
   106     if(length>=0) {
   107         length-=headerSize;
   108         // formatVersion 2 initially has indexes[8], 32 bytes.
   109         if(length<32) {
   110             udata_printError(ds, "upname_swap(): too few bytes (%d after header) for pnames.icu\n",
   111                              (int)length);
   112             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
   113             return 0;
   114         }
   115     }
   117     const int32_t *inIndexes=reinterpret_cast<const int32_t *>(inBytes);
   118     int32_t totalSize=udata_readInt32(ds, inIndexes[PropNameData::IX_TOTAL_SIZE]);
   119     if(length>=0) {
   120         if(length<totalSize) {
   121             udata_printError(ds, "upname_swap(): too few bytes (%d after header, should be %d) "
   122                              "for pnames.icu\n",
   123                              (int)length, (int)totalSize);
   124             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
   125             return 0;
   126         }
   128         int32_t numBytesIndexesAndValueMaps=
   129             udata_readInt32(ds, inIndexes[PropNameData::IX_BYTE_TRIES_OFFSET]);
   131         // Swap the indexes[] and the valueMaps[].
   132         ds->swapArray32(ds, inBytes, numBytesIndexesAndValueMaps, outBytes, pErrorCode);
   134         // Copy the rest of the data.
   135         if(inBytes!=outBytes) {
   136             uprv_memcpy(outBytes+numBytesIndexesAndValueMaps,
   137                         inBytes+numBytesIndexesAndValueMaps,
   138                         totalSize-numBytesIndexesAndValueMaps);
   139         }
   141         // We need not swap anything else:
   142         //
   143         // The ByteTries are already byte-serialized, and are fixed on ASCII.
   144         // (On an EBCDIC machine, the input string is converted to lowercase ASCII
   145         // while matching.)
   146         //
   147         // The name groups are mostly invariant characters, but since we only
   148         // generate, and keep in subversion, ASCII versions of pnames.icu,
   149         // and since only ICU4J uses the pnames.icu data file
   150         // (the data is hardcoded in ICU4C) and ICU4J uses ASCII data files,
   151         // we just copy those bytes too.
   152     }
   154     return headerSize+totalSize;
   155 }
   157 /* Unicode properties data swapping ----------------------------------------- */
   159 static int32_t U_CALLCONV
   160 uprops_swap(const UDataSwapper *ds,
   161             const void *inData, int32_t length, void *outData,
   162             UErrorCode *pErrorCode) {
   163     const UDataInfo *pInfo;
   164     int32_t headerSize, i;
   166     int32_t dataIndexes[UPROPS_INDEX_COUNT];
   167     const int32_t *inData32;
   169     /* udata_swapDataHeader checks the arguments */
   170     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
   171     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
   172         return 0;
   173     }
   175     /* check data format and format version */
   176     pInfo=(const UDataInfo *)((const char *)inData+4);
   177     if(!(
   178         pInfo->dataFormat[0]==0x55 &&   /* dataFormat="UPro" */
   179         pInfo->dataFormat[1]==0x50 &&
   180         pInfo->dataFormat[2]==0x72 &&
   181         pInfo->dataFormat[3]==0x6f &&
   182         (3<=pInfo->formatVersion[0] && pInfo->formatVersion[0]<=7) &&
   183         (pInfo->formatVersion[0]>=7 ||
   184             (pInfo->formatVersion[2]==UTRIE_SHIFT &&
   185              pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT))
   186     )) {
   187         udata_printError(ds, "uprops_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not a Unicode properties file\n",
   188                          pInfo->dataFormat[0], pInfo->dataFormat[1],
   189                          pInfo->dataFormat[2], pInfo->dataFormat[3],
   190                          pInfo->formatVersion[0]);
   191         *pErrorCode=U_UNSUPPORTED_ERROR;
   192         return 0;
   193     }
   195     /* the properties file must contain at least the indexes array */
   196     if(length>=0 && (length-headerSize)<(int32_t)sizeof(dataIndexes)) {
   197         udata_printError(ds, "uprops_swap(): too few bytes (%d after header) for a Unicode properties file\n",
   198                          length-headerSize);
   199         *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
   200         return 0;
   201     }
   203     /* read the indexes */
   204     inData32=(const int32_t *)((const char *)inData+headerSize);
   205     for(i=0; i<UPROPS_INDEX_COUNT; ++i) {
   206         dataIndexes[i]=udata_readInt32(ds, inData32[i]);
   207     }
   209     /*
   210      * comments are copied from the data format description in genprops/store.c
   211      * indexes[] constants are in uprops.h
   212      */
   213     int32_t dataTop;
   214     if(length>=0) {
   215         int32_t *outData32;
   217         /*
   218          * In formatVersion 7, UPROPS_DATA_TOP_INDEX has the post-header data size.
   219          * In earlier formatVersions, it is 0 and a lower dataIndexes entry
   220          * has the top of the last item.
   221          */
   222         for(i=UPROPS_DATA_TOP_INDEX; i>0 && (dataTop=dataIndexes[i])==0; --i) {}
   224         if((length-headerSize)<(4*dataTop)) {
   225             udata_printError(ds, "uprops_swap(): too few bytes (%d after header) for a Unicode properties file\n",
   226                              length-headerSize);
   227             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
   228             return 0;
   229         }
   231         outData32=(int32_t *)((char *)outData+headerSize);
   233         /* copy everything for inaccessible data (padding) */
   234         if(inData32!=outData32) {
   235             uprv_memcpy(outData32, inData32, 4*dataTop);
   236         }
   238         /* swap the indexes[16] */
   239         ds->swapArray32(ds, inData32, 4*UPROPS_INDEX_COUNT, outData32, pErrorCode);
   241         /*
   242          * swap the main properties UTrie
   243          * PT serialized properties trie, see utrie.h (byte size: 4*(i0-16))
   244          */
   245         utrie2_swapAnyVersion(ds,
   246             inData32+UPROPS_INDEX_COUNT,
   247             4*(dataIndexes[UPROPS_PROPS32_INDEX]-UPROPS_INDEX_COUNT),
   248             outData32+UPROPS_INDEX_COUNT,
   249             pErrorCode);
   251         /*
   252          * swap the properties and exceptions words
   253          * P  const uint32_t props32[i1-i0];
   254          * E  const uint32_t exceptions[i2-i1];
   255          */
   256         ds->swapArray32(ds,
   257             inData32+dataIndexes[UPROPS_PROPS32_INDEX],
   258             4*(dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX]-dataIndexes[UPROPS_PROPS32_INDEX]),
   259             outData32+dataIndexes[UPROPS_PROPS32_INDEX],
   260             pErrorCode);
   262         /*
   263          * swap the UChars
   264          * U  const UChar uchars[2*(i3-i2)];
   265          */
   266         ds->swapArray16(ds,
   267             inData32+dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX],
   268             4*(dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX]-dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX]),
   269             outData32+dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX],
   270             pErrorCode);
   272         /*
   273          * swap the additional UTrie
   274          * i3 additionalTrieIndex; -- 32-bit unit index to the additional trie for more properties
   275          */
   276         utrie2_swapAnyVersion(ds,
   277             inData32+dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX],
   278             4*(dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX]-dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX]),
   279             outData32+dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX],
   280             pErrorCode);
   282         /*
   283          * swap the properties vectors
   284          * PV const uint32_t propsVectors[(i6-i4)/i5][i5]==uint32_t propsVectors[i6-i4];
   285          */
   286         ds->swapArray32(ds,
   287             inData32+dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX],
   288             4*(dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]-dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX]),
   289             outData32+dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX],
   290             pErrorCode);
   292         // swap the Script_Extensions data
   293         // SCX const uint16_t scriptExtensions[2*(i7-i6)];
   294         ds->swapArray16(ds,
   295             inData32+dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX],
   296             4*(dataIndexes[UPROPS_RESERVED_INDEX_7]-dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]),
   297             outData32+dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX],
   298             pErrorCode);
   299     }
   301     /* i7 reservedIndex7; -- 32-bit unit index to the top of the Script_Extensions data */
   302     return headerSize+4*dataIndexes[UPROPS_RESERVED_INDEX_7];
   303 }
   305 /* Unicode case mapping data swapping --------------------------------------- */
   307 static int32_t U_CALLCONV
   308 ucase_swap(const UDataSwapper *ds,
   309            const void *inData, int32_t length, void *outData,
   310            UErrorCode *pErrorCode) {
   311     const UDataInfo *pInfo;
   312     int32_t headerSize;
   314     const uint8_t *inBytes;
   315     uint8_t *outBytes;
   317     const int32_t *inIndexes;
   318     int32_t indexes[16];
   320     int32_t i, offset, count, size;
   322     /* udata_swapDataHeader checks the arguments */
   323     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
   324     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
   325         return 0;
   326     }
   328     /* check data format and format version */
   329     pInfo=(const UDataInfo *)((const char *)inData+4);
   330     if(!(
   331         pInfo->dataFormat[0]==UCASE_FMT_0 &&    /* dataFormat="cAsE" */
   332         pInfo->dataFormat[1]==UCASE_FMT_1 &&
   333         pInfo->dataFormat[2]==UCASE_FMT_2 &&
   334         pInfo->dataFormat[3]==UCASE_FMT_3 &&
   335         ((pInfo->formatVersion[0]==1 &&
   336           pInfo->formatVersion[2]==UTRIE_SHIFT &&
   337           pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT) ||
   338          pInfo->formatVersion[0]==2 || pInfo->formatVersion[0]==3)
   339     )) {
   340         udata_printError(ds, "ucase_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as case mapping data\n",
   341                          pInfo->dataFormat[0], pInfo->dataFormat[1],
   342                          pInfo->dataFormat[2], pInfo->dataFormat[3],
   343                          pInfo->formatVersion[0]);
   344         *pErrorCode=U_UNSUPPORTED_ERROR;
   345         return 0;
   346     }
   348     inBytes=(const uint8_t *)inData+headerSize;
   349     outBytes=(uint8_t *)outData+headerSize;
   351     inIndexes=(const int32_t *)inBytes;
   353     if(length>=0) {
   354         length-=headerSize;
   355         if(length<16*4) {
   356             udata_printError(ds, "ucase_swap(): too few bytes (%d after header) for case mapping data\n",
   357                              length);
   358             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
   359             return 0;
   360         }
   361     }
   363     /* read the first 16 indexes (ICU 3.2/format version 1: UCASE_IX_TOP==16, might grow) */
   364     for(i=0; i<16; ++i) {
   365         indexes[i]=udata_readInt32(ds, inIndexes[i]);
   366     }
   368     /* get the total length of the data */
   369     size=indexes[UCASE_IX_LENGTH];
   371     if(length>=0) {
   372         if(length<size) {
   373             udata_printError(ds, "ucase_swap(): too few bytes (%d after header) for all of case mapping data\n",
   374                              length);
   375             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
   376             return 0;
   377         }
   379         /* copy the data for inaccessible bytes */
   380         if(inBytes!=outBytes) {
   381             uprv_memcpy(outBytes, inBytes, size);
   382         }
   384         offset=0;
   386         /* swap the int32_t indexes[] */
   387         count=indexes[UCASE_IX_INDEX_TOP]*4;
   388         ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
   389         offset+=count;
   391         /* swap the UTrie */
   392         count=indexes[UCASE_IX_TRIE_SIZE];
   393         utrie2_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
   394         offset+=count;
   396         /* swap the uint16_t exceptions[] and unfold[] */
   397         count=(indexes[UCASE_IX_EXC_LENGTH]+indexes[UCASE_IX_UNFOLD_LENGTH])*2;
   398         ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
   399         offset+=count;
   401         U_ASSERT(offset==size);
   402     }
   404     return headerSize+size;
   405 }
   407 /* Unicode bidi/shaping data swapping --------------------------------------- */
   409 static int32_t U_CALLCONV
   410 ubidi_swap(const UDataSwapper *ds,
   411            const void *inData, int32_t length, void *outData,
   412            UErrorCode *pErrorCode) {
   413     const UDataInfo *pInfo;
   414     int32_t headerSize;
   416     const uint8_t *inBytes;
   417     uint8_t *outBytes;
   419     const int32_t *inIndexes;
   420     int32_t indexes[16];
   422     int32_t i, offset, count, size;
   424     /* udata_swapDataHeader checks the arguments */
   425     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
   426     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
   427         return 0;
   428     }
   430     /* check data format and format version */
   431     pInfo=(const UDataInfo *)((const char *)inData+4);
   432     if(!(
   433         pInfo->dataFormat[0]==UBIDI_FMT_0 &&    /* dataFormat="BiDi" */
   434         pInfo->dataFormat[1]==UBIDI_FMT_1 &&
   435         pInfo->dataFormat[2]==UBIDI_FMT_2 &&
   436         pInfo->dataFormat[3]==UBIDI_FMT_3 &&
   437         ((pInfo->formatVersion[0]==1 &&
   438           pInfo->formatVersion[2]==UTRIE_SHIFT &&
   439           pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT) ||
   440          pInfo->formatVersion[0]==2)
   441     )) {
   442         udata_printError(ds, "ubidi_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as bidi/shaping data\n",
   443                          pInfo->dataFormat[0], pInfo->dataFormat[1],
   444                          pInfo->dataFormat[2], pInfo->dataFormat[3],
   445                          pInfo->formatVersion[0]);
   446         *pErrorCode=U_UNSUPPORTED_ERROR;
   447         return 0;
   448     }
   450     inBytes=(const uint8_t *)inData+headerSize;
   451     outBytes=(uint8_t *)outData+headerSize;
   453     inIndexes=(const int32_t *)inBytes;
   455     if(length>=0) {
   456         length-=headerSize;
   457         if(length<16*4) {
   458             udata_printError(ds, "ubidi_swap(): too few bytes (%d after header) for bidi/shaping data\n",
   459                              length);
   460             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
   461             return 0;
   462         }
   463     }
   465     /* read the first 16 indexes (ICU 3.4/format version 1: UBIDI_IX_TOP==16, might grow) */
   466     for(i=0; i<16; ++i) {
   467         indexes[i]=udata_readInt32(ds, inIndexes[i]);
   468     }
   470     /* get the total length of the data */
   471     size=indexes[UBIDI_IX_LENGTH];
   473     if(length>=0) {
   474         if(length<size) {
   475             udata_printError(ds, "ubidi_swap(): too few bytes (%d after header) for all of bidi/shaping data\n",
   476                              length);
   477             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
   478             return 0;
   479         }
   481         /* copy the data for inaccessible bytes */
   482         if(inBytes!=outBytes) {
   483             uprv_memcpy(outBytes, inBytes, size);
   484         }
   486         offset=0;
   488         /* swap the int32_t indexes[] */
   489         count=indexes[UBIDI_IX_INDEX_TOP]*4;
   490         ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
   491         offset+=count;
   493         /* swap the UTrie */
   494         count=indexes[UBIDI_IX_TRIE_SIZE];
   495         utrie2_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
   496         offset+=count;
   498         /* swap the uint32_t mirrors[] */
   499         count=indexes[UBIDI_IX_MIRROR_LENGTH]*4;
   500         ds->swapArray32(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
   501         offset+=count;
   503         /* just skip the uint8_t jgArray[] */
   504         count=indexes[UBIDI_IX_JG_LIMIT]-indexes[UBIDI_IX_JG_START];
   505         offset+=count;
   507         U_ASSERT(offset==size);
   508     }
   510     return headerSize+size;
   511 }
   513 /* Unicode normalization data swapping -------------------------------------- */
   515 #if !UCONFIG_NO_NORMALIZATION
   517 static int32_t U_CALLCONV
   518 unorm_swap(const UDataSwapper *ds,
   519            const void *inData, int32_t length, void *outData,
   520            UErrorCode *pErrorCode) {
   521     const UDataInfo *pInfo;
   522     int32_t headerSize;
   524     const uint8_t *inBytes;
   525     uint8_t *outBytes;
   527     const int32_t *inIndexes;
   528     int32_t indexes[32];
   530     int32_t i, offset, count, size;
   532     /* udata_swapDataHeader checks the arguments */
   533     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
   534     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
   535         return 0;
   536     }
   538     /* check data format and format version */
   539     pInfo=(const UDataInfo *)((const char *)inData+4);
   540     if(!(
   541         pInfo->dataFormat[0]==0x4e &&   /* dataFormat="Norm" */
   542         pInfo->dataFormat[1]==0x6f &&
   543         pInfo->dataFormat[2]==0x72 &&
   544         pInfo->dataFormat[3]==0x6d &&
   545         pInfo->formatVersion[0]==2
   546     )) {
   547         udata_printError(ds, "unorm_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as unorm.icu\n",
   548                          pInfo->dataFormat[0], pInfo->dataFormat[1],
   549                          pInfo->dataFormat[2], pInfo->dataFormat[3],
   550                          pInfo->formatVersion[0]);
   551         *pErrorCode=U_UNSUPPORTED_ERROR;
   552         return 0;
   553     }
   555     inBytes=(const uint8_t *)inData+headerSize;
   556     outBytes=(uint8_t *)outData+headerSize;
   558     inIndexes=(const int32_t *)inBytes;
   560     if(length>=0) {
   561         length-=headerSize;
   562         if(length<32*4) {
   563             udata_printError(ds, "unorm_swap(): too few bytes (%d after header) for unorm.icu\n",
   564                              length);
   565             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
   566             return 0;
   567         }
   568     }
   570     /* read the first 32 indexes (ICU 2.8/format version 2.2: _NORM_INDEX_TOP==32, might grow) */
   571     for(i=0; i<32; ++i) {
   572         indexes[i]=udata_readInt32(ds, inIndexes[i]);
   573     }
   575     /* calculate the total length of the data */
   576     size=
   577         32*4+ /* size of indexes[] */
   578         indexes[_NORM_INDEX_TRIE_SIZE]+
   579         indexes[_NORM_INDEX_UCHAR_COUNT]*2+
   580         indexes[_NORM_INDEX_COMBINE_DATA_COUNT]*2+
   581         indexes[_NORM_INDEX_FCD_TRIE_SIZE]+
   582         indexes[_NORM_INDEX_AUX_TRIE_SIZE]+
   583         indexes[_NORM_INDEX_CANON_SET_COUNT]*2;
   585     if(length>=0) {
   586         if(length<size) {
   587             udata_printError(ds, "unorm_swap(): too few bytes (%d after header) for all of unorm.icu\n",
   588                              length);
   589             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
   590             return 0;
   591         }
   593         /* copy the data for inaccessible bytes */
   594         if(inBytes!=outBytes) {
   595             uprv_memcpy(outBytes, inBytes, size);
   596         }
   598         offset=0;
   600         /* swap the indexes[] */
   601         count=32*4;
   602         ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
   603         offset+=count;
   605         /* swap the main UTrie */
   606         count=indexes[_NORM_INDEX_TRIE_SIZE];
   607         utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
   608         offset+=count;
   610         /* swap the uint16_t extraData[] and the uint16_t combiningTable[] */
   611         count=(indexes[_NORM_INDEX_UCHAR_COUNT]+indexes[_NORM_INDEX_COMBINE_DATA_COUNT])*2;
   612         ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
   613         offset+=count;
   615         /* swap the FCD UTrie */
   616         count=indexes[_NORM_INDEX_FCD_TRIE_SIZE];
   617         if(count!=0) {
   618             utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
   619             offset+=count;
   620         }
   622         /* swap the aux UTrie */
   623         count=indexes[_NORM_INDEX_AUX_TRIE_SIZE];
   624         if(count!=0) {
   625             utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
   626             offset+=count;
   627         }
   629         /* swap the uint16_t combiningTable[] */
   630         count=indexes[_NORM_INDEX_CANON_SET_COUNT]*2;
   631         ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
   632         offset+=count;
   633     }
   635     return headerSize+size;
   636 }
   638 #endif
   640 /* Swap 'Test' data from gentest */
   641 static int32_t U_CALLCONV
   642 test_swap(const UDataSwapper *ds,
   643            const void *inData, int32_t length, void *outData,
   644            UErrorCode *pErrorCode) {
   645     const UDataInfo *pInfo;
   646     int32_t headerSize;
   648     const uint8_t *inBytes;
   649     uint8_t *outBytes;
   651     int32_t offset;
   653     /* udata_swapDataHeader checks the arguments */
   654     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
   655     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
   656         udata_printError(ds, "test_swap(): data header swap failed %s\n", pErrorCode != NULL ? u_errorName(*pErrorCode) : "pErrorCode is NULL");
   657         return 0;
   658     }
   660     /* check data format and format version */
   661     pInfo=(const UDataInfo *)((const char *)inData+4);
   662     if(!(
   663         pInfo->dataFormat[0]==0x54 &&   /* dataFormat="Norm" */
   664         pInfo->dataFormat[1]==0x65 &&
   665         pInfo->dataFormat[2]==0x73 &&
   666         pInfo->dataFormat[3]==0x74 &&
   667         pInfo->formatVersion[0]==1
   668     )) {
   669         udata_printError(ds, "test_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as testdata\n",
   670                          pInfo->dataFormat[0], pInfo->dataFormat[1],
   671                          pInfo->dataFormat[2], pInfo->dataFormat[3],
   672                          pInfo->formatVersion[0]);
   673         *pErrorCode=U_UNSUPPORTED_ERROR;
   674         return 0;
   675     }
   677     inBytes=(const uint8_t *)inData+headerSize;
   678     outBytes=(uint8_t *)outData+headerSize;
   680     int32_t size16 = 2; // 16bit plus padding
   681     int32_t sizeStr = 5; // 4 char inv-str plus null
   682     int32_t size = size16 + sizeStr;
   684     if(length>=0) {
   685         if(length<size) {
   686             udata_printError(ds, "test_swap(): too few bytes (%d after header, wanted %d) for all of testdata\n",
   687                              length, size);
   688             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
   689             return 0;
   690         }
   692 	offset =0;
   693 	/* swap a 1 entry array */
   694         ds->swapArray16(ds, inBytes+offset, size16, outBytes+offset, pErrorCode);
   695 	offset+=size16;
   696 	ds->swapInvChars(ds, inBytes+offset, sizeStr, outBytes+offset, pErrorCode);
   697     }
   699     return headerSize+size;
   700 }
   702 /* swap any data (except a .dat package) ------------------------------------ */
   704 static const struct {
   705     uint8_t dataFormat[4];
   706     UDataSwapFn *swapFn;
   707 } swapFns[]={
   708     { { 0x52, 0x65, 0x73, 0x42 }, ures_swap },          /* dataFormat="ResB" */
   709 #if !UCONFIG_NO_LEGACY_CONVERSION
   710     { { 0x63, 0x6e, 0x76, 0x74 }, ucnv_swap },          /* dataFormat="cnvt" */
   711 #endif
   712 #if !UCONFIG_NO_CONVERSION
   713     { { 0x43, 0x76, 0x41, 0x6c }, ucnv_swapAliases },   /* dataFormat="CvAl" */
   714 #endif
   715 #if !UCONFIG_NO_IDNA
   716     { { 0x53, 0x50, 0x52, 0x50 }, usprep_swap },        /* dataFormat="SPRP" */
   717 #endif
   718     /* insert data formats here, descending by expected frequency of occurrence */
   719     { { 0x55, 0x50, 0x72, 0x6f }, uprops_swap },        /* dataFormat="UPro" */
   721     { { UCASE_FMT_0, UCASE_FMT_1, UCASE_FMT_2, UCASE_FMT_3 },
   722                                   ucase_swap },         /* dataFormat="cAsE" */
   724     { { UBIDI_FMT_0, UBIDI_FMT_1, UBIDI_FMT_2, UBIDI_FMT_3 },
   725                                   ubidi_swap },         /* dataFormat="BiDi" */
   727 #if !UCONFIG_NO_NORMALIZATION
   728     { { 0x4e, 0x6f, 0x72, 0x6d }, unorm_swap },         /* dataFormat="Norm" */
   729     { { 0x4e, 0x72, 0x6d, 0x32 }, unorm2_swap },        /* dataFormat="Nrm2" */
   730 #endif
   731 #if !UCONFIG_NO_COLLATION
   732     { { 0x55, 0x43, 0x6f, 0x6c }, ucol_swap },          /* dataFormat="UCol" */
   733     { { 0x49, 0x6e, 0x76, 0x43 }, ucol_swapInverseUCA },/* dataFormat="InvC" */
   734 #endif
   735 #if !UCONFIG_NO_BREAK_ITERATION
   736     { { 0x42, 0x72, 0x6b, 0x20 }, ubrk_swap },          /* dataFormat="Brk " */
   737     { { 0x44, 0x69, 0x63, 0x74 }, udict_swap },         /* dataFormat="Dict" */
   738 #endif
   739     { { 0x70, 0x6e, 0x61, 0x6d }, upname_swap },        /* dataFormat="pnam" */
   740     { { 0x75, 0x6e, 0x61, 0x6d }, uchar_swapNames },    /* dataFormat="unam" */
   741 #if !UCONFIG_NO_NORMALIZATION
   742     { { 0x43, 0x66, 0x75, 0x20 }, uspoof_swap },         /* dataFormat="Cfu " */
   743 #endif
   744     { { 0x54, 0x65, 0x73, 0x74 }, test_swap }            /* dataFormat="Test" */
   745 };
   747 U_CAPI int32_t U_EXPORT2
   748 udata_swap(const UDataSwapper *ds,
   749            const void *inData, int32_t length, void *outData,
   750            UErrorCode *pErrorCode) {
   751     char dataFormatChars[4];
   752     const UDataInfo *pInfo;
   753     int32_t i, swappedLength;
   755     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
   756         return 0;
   757     }
   759     /*
   760      * Preflight the header first; checks for illegal arguments, too.
   761      * Do not swap the header right away because the format-specific swapper
   762      * will swap it, get the headerSize again, and also use the header
   763      * information. Otherwise we would have to pass some of the information
   764      * and not be able to use the UDataSwapFn signature.
   765      */
   766     udata_swapDataHeader(ds, inData, -1, NULL, pErrorCode);
   768     /*
   769      * If we wanted udata_swap() to also handle non-loadable data like a UTrie,
   770      * then we could check here for further known magic values and structures.
   771      */
   772     if(U_FAILURE(*pErrorCode)) {
   773         return 0; /* the data format was not recognized */
   774     }
   776     pInfo=(const UDataInfo *)((const char *)inData+4);
   778     {
   779         /* convert the data format from ASCII to Unicode to the system charset */
   780         UChar u[4]={
   781              pInfo->dataFormat[0], pInfo->dataFormat[1],
   782              pInfo->dataFormat[2], pInfo->dataFormat[3]
   783         };
   785         if(uprv_isInvariantUString(u, 4)) {
   786             u_UCharsToChars(u, dataFormatChars, 4);
   787         } else {
   788             dataFormatChars[0]=dataFormatChars[1]=dataFormatChars[2]=dataFormatChars[3]='?';
   789         }
   790     }
   792     /* dispatch to the swap function for the dataFormat */
   793     for(i=0; i<LENGTHOF(swapFns); ++i) {
   794         if(0==memcmp(swapFns[i].dataFormat, pInfo->dataFormat, 4)) {
   795             swappedLength=swapFns[i].swapFn(ds, inData, length, outData, pErrorCode);
   797             if(U_FAILURE(*pErrorCode)) {
   798                 udata_printError(ds, "udata_swap(): failure swapping data format %02x.%02x.%02x.%02x (\"%c%c%c%c\") - %s\n",
   799                                  pInfo->dataFormat[0], pInfo->dataFormat[1],
   800                                  pInfo->dataFormat[2], pInfo->dataFormat[3],
   801                                  dataFormatChars[0], dataFormatChars[1],
   802                                  dataFormatChars[2], dataFormatChars[3],
   803                                  u_errorName(*pErrorCode));
   804             } else if(swappedLength<(length-15)) {
   805                 /* swapped less than expected */
   806                 udata_printError(ds, "udata_swap() warning: swapped only %d out of %d bytes - data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n",
   807                                  swappedLength, length,
   808                                  pInfo->dataFormat[0], pInfo->dataFormat[1],
   809                                  pInfo->dataFormat[2], pInfo->dataFormat[3],
   810                                  dataFormatChars[0], dataFormatChars[1],
   811                                  dataFormatChars[2], dataFormatChars[3],
   812                                  u_errorName(*pErrorCode));
   813             }
   815             return swappedLength;
   816         }
   817     }
   819     /* the dataFormat was not recognized */
   820     udata_printError(ds, "udata_swap(): unknown data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n",
   821                      pInfo->dataFormat[0], pInfo->dataFormat[1],
   822                      pInfo->dataFormat[2], pInfo->dataFormat[3],
   823                      dataFormatChars[0], dataFormatChars[1],
   824                      dataFormatChars[2], dataFormatChars[3]);
   826     *pErrorCode=U_UNSUPPORTED_ERROR;
   827     return 0;
   828 }

mercurial