intl/icu/source/tools/toolutil/swapimpl.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /*
michael@0 2 *******************************************************************************
michael@0 3 *
michael@0 4 * Copyright (C) 2005-2012, International Business Machines
michael@0 5 * Corporation and others. All Rights Reserved.
michael@0 6 *
michael@0 7 *******************************************************************************
michael@0 8 * file name: swapimpl.cpp
michael@0 9 * encoding: US-ASCII
michael@0 10 * tab size: 8 (not used)
michael@0 11 * indentation:4
michael@0 12 *
michael@0 13 * created on: 2005may05
michael@0 14 * created by: Markus W. Scherer
michael@0 15 *
michael@0 16 * Data file swapping functions moved here from the common library
michael@0 17 * because some data is hardcoded in ICU4C and needs not be swapped any more.
michael@0 18 * Moving the functions here simplifies testing (for code coverage) because
michael@0 19 * we need not jump through hoops (like adding snapshots of these files
michael@0 20 * to testdata).
michael@0 21 *
michael@0 22 * The declarations for these functions remain in the internal header files
michael@0 23 * in icu/source/common/
michael@0 24 */
michael@0 25
michael@0 26 #include "unicode/utypes.h"
michael@0 27 #include "unicode/putil.h"
michael@0 28 #include "unicode/udata.h"
michael@0 29
michael@0 30 /* Explicit include statement for std_string.h is needed
michael@0 31 * for compilation on certain platforms. (e.g. AIX/VACPP)
michael@0 32 */
michael@0 33 #include "unicode/std_string.h"
michael@0 34
michael@0 35 #include "cmemory.h"
michael@0 36 #include "cstring.h"
michael@0 37 #include "uinvchar.h"
michael@0 38 #include "uassert.h"
michael@0 39 #include "uarrsort.h"
michael@0 40 #include "ucmndata.h"
michael@0 41 #include "udataswp.h"
michael@0 42
michael@0 43 /* swapping implementations in common */
michael@0 44
michael@0 45 #include "uresdata.h"
michael@0 46 #include "ucnv_io.h"
michael@0 47 #include "uprops.h"
michael@0 48 #include "ucase.h"
michael@0 49 #include "ubidi_props.h"
michael@0 50 #include "ucol_swp.h"
michael@0 51 #include "ucnv_bld.h"
michael@0 52 #include "unormimp.h"
michael@0 53 #include "normalizer2impl.h"
michael@0 54 #include "sprpimpl.h"
michael@0 55 #include "propname.h"
michael@0 56 #include "rbbidata.h"
michael@0 57 #include "utrie2.h"
michael@0 58 #include "dictionarydata.h"
michael@0 59
michael@0 60 /* swapping implementations in i18n */
michael@0 61
michael@0 62 #if !UCONFIG_NO_NORMALIZATION
michael@0 63 #include "uspoof_impl.h"
michael@0 64 #endif
michael@0 65
michael@0 66 U_NAMESPACE_USE
michael@0 67
michael@0 68 /* definitions */
michael@0 69
michael@0 70 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
michael@0 71
michael@0 72 /* Unicode property (value) aliases data swapping --------------------------- */
michael@0 73
michael@0 74 static int32_t U_CALLCONV
michael@0 75 upname_swap(const UDataSwapper *ds,
michael@0 76 const void *inData, int32_t length, void *outData,
michael@0 77 UErrorCode *pErrorCode) {
michael@0 78 /* udata_swapDataHeader checks the arguments */
michael@0 79 int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
michael@0 80 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
michael@0 81 return 0;
michael@0 82 }
michael@0 83
michael@0 84 /* check data format and format version */
michael@0 85 const UDataInfo *pInfo=
michael@0 86 reinterpret_cast<const UDataInfo *>(
michael@0 87 static_cast<const char *>(inData)+4);
michael@0 88 if(!(
michael@0 89 pInfo->dataFormat[0]==0x70 && /* dataFormat="pnam" */
michael@0 90 pInfo->dataFormat[1]==0x6e &&
michael@0 91 pInfo->dataFormat[2]==0x61 &&
michael@0 92 pInfo->dataFormat[3]==0x6d &&
michael@0 93 pInfo->formatVersion[0]==2
michael@0 94 )) {
michael@0 95 udata_printError(ds, "upname_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as pnames.icu\n",
michael@0 96 pInfo->dataFormat[0], pInfo->dataFormat[1],
michael@0 97 pInfo->dataFormat[2], pInfo->dataFormat[3],
michael@0 98 pInfo->formatVersion[0]);
michael@0 99 *pErrorCode=U_UNSUPPORTED_ERROR;
michael@0 100 return 0;
michael@0 101 }
michael@0 102
michael@0 103 const uint8_t *inBytes=static_cast<const uint8_t *>(inData)+headerSize;
michael@0 104 uint8_t *outBytes=static_cast<uint8_t *>(outData)+headerSize;
michael@0 105
michael@0 106 if(length>=0) {
michael@0 107 length-=headerSize;
michael@0 108 // formatVersion 2 initially has indexes[8], 32 bytes.
michael@0 109 if(length<32) {
michael@0 110 udata_printError(ds, "upname_swap(): too few bytes (%d after header) for pnames.icu\n",
michael@0 111 (int)length);
michael@0 112 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
michael@0 113 return 0;
michael@0 114 }
michael@0 115 }
michael@0 116
michael@0 117 const int32_t *inIndexes=reinterpret_cast<const int32_t *>(inBytes);
michael@0 118 int32_t totalSize=udata_readInt32(ds, inIndexes[PropNameData::IX_TOTAL_SIZE]);
michael@0 119 if(length>=0) {
michael@0 120 if(length<totalSize) {
michael@0 121 udata_printError(ds, "upname_swap(): too few bytes (%d after header, should be %d) "
michael@0 122 "for pnames.icu\n",
michael@0 123 (int)length, (int)totalSize);
michael@0 124 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
michael@0 125 return 0;
michael@0 126 }
michael@0 127
michael@0 128 int32_t numBytesIndexesAndValueMaps=
michael@0 129 udata_readInt32(ds, inIndexes[PropNameData::IX_BYTE_TRIES_OFFSET]);
michael@0 130
michael@0 131 // Swap the indexes[] and the valueMaps[].
michael@0 132 ds->swapArray32(ds, inBytes, numBytesIndexesAndValueMaps, outBytes, pErrorCode);
michael@0 133
michael@0 134 // Copy the rest of the data.
michael@0 135 if(inBytes!=outBytes) {
michael@0 136 uprv_memcpy(outBytes+numBytesIndexesAndValueMaps,
michael@0 137 inBytes+numBytesIndexesAndValueMaps,
michael@0 138 totalSize-numBytesIndexesAndValueMaps);
michael@0 139 }
michael@0 140
michael@0 141 // We need not swap anything else:
michael@0 142 //
michael@0 143 // The ByteTries are already byte-serialized, and are fixed on ASCII.
michael@0 144 // (On an EBCDIC machine, the input string is converted to lowercase ASCII
michael@0 145 // while matching.)
michael@0 146 //
michael@0 147 // The name groups are mostly invariant characters, but since we only
michael@0 148 // generate, and keep in subversion, ASCII versions of pnames.icu,
michael@0 149 // and since only ICU4J uses the pnames.icu data file
michael@0 150 // (the data is hardcoded in ICU4C) and ICU4J uses ASCII data files,
michael@0 151 // we just copy those bytes too.
michael@0 152 }
michael@0 153
michael@0 154 return headerSize+totalSize;
michael@0 155 }
michael@0 156
michael@0 157 /* Unicode properties data swapping ----------------------------------------- */
michael@0 158
michael@0 159 static int32_t U_CALLCONV
michael@0 160 uprops_swap(const UDataSwapper *ds,
michael@0 161 const void *inData, int32_t length, void *outData,
michael@0 162 UErrorCode *pErrorCode) {
michael@0 163 const UDataInfo *pInfo;
michael@0 164 int32_t headerSize, i;
michael@0 165
michael@0 166 int32_t dataIndexes[UPROPS_INDEX_COUNT];
michael@0 167 const int32_t *inData32;
michael@0 168
michael@0 169 /* udata_swapDataHeader checks the arguments */
michael@0 170 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
michael@0 171 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
michael@0 172 return 0;
michael@0 173 }
michael@0 174
michael@0 175 /* check data format and format version */
michael@0 176 pInfo=(const UDataInfo *)((const char *)inData+4);
michael@0 177 if(!(
michael@0 178 pInfo->dataFormat[0]==0x55 && /* dataFormat="UPro" */
michael@0 179 pInfo->dataFormat[1]==0x50 &&
michael@0 180 pInfo->dataFormat[2]==0x72 &&
michael@0 181 pInfo->dataFormat[3]==0x6f &&
michael@0 182 (3<=pInfo->formatVersion[0] && pInfo->formatVersion[0]<=7) &&
michael@0 183 (pInfo->formatVersion[0]>=7 ||
michael@0 184 (pInfo->formatVersion[2]==UTRIE_SHIFT &&
michael@0 185 pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT))
michael@0 186 )) {
michael@0 187 udata_printError(ds, "uprops_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not a Unicode properties file\n",
michael@0 188 pInfo->dataFormat[0], pInfo->dataFormat[1],
michael@0 189 pInfo->dataFormat[2], pInfo->dataFormat[3],
michael@0 190 pInfo->formatVersion[0]);
michael@0 191 *pErrorCode=U_UNSUPPORTED_ERROR;
michael@0 192 return 0;
michael@0 193 }
michael@0 194
michael@0 195 /* the properties file must contain at least the indexes array */
michael@0 196 if(length>=0 && (length-headerSize)<(int32_t)sizeof(dataIndexes)) {
michael@0 197 udata_printError(ds, "uprops_swap(): too few bytes (%d after header) for a Unicode properties file\n",
michael@0 198 length-headerSize);
michael@0 199 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
michael@0 200 return 0;
michael@0 201 }
michael@0 202
michael@0 203 /* read the indexes */
michael@0 204 inData32=(const int32_t *)((const char *)inData+headerSize);
michael@0 205 for(i=0; i<UPROPS_INDEX_COUNT; ++i) {
michael@0 206 dataIndexes[i]=udata_readInt32(ds, inData32[i]);
michael@0 207 }
michael@0 208
michael@0 209 /*
michael@0 210 * comments are copied from the data format description in genprops/store.c
michael@0 211 * indexes[] constants are in uprops.h
michael@0 212 */
michael@0 213 int32_t dataTop;
michael@0 214 if(length>=0) {
michael@0 215 int32_t *outData32;
michael@0 216
michael@0 217 /*
michael@0 218 * In formatVersion 7, UPROPS_DATA_TOP_INDEX has the post-header data size.
michael@0 219 * In earlier formatVersions, it is 0 and a lower dataIndexes entry
michael@0 220 * has the top of the last item.
michael@0 221 */
michael@0 222 for(i=UPROPS_DATA_TOP_INDEX; i>0 && (dataTop=dataIndexes[i])==0; --i) {}
michael@0 223
michael@0 224 if((length-headerSize)<(4*dataTop)) {
michael@0 225 udata_printError(ds, "uprops_swap(): too few bytes (%d after header) for a Unicode properties file\n",
michael@0 226 length-headerSize);
michael@0 227 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
michael@0 228 return 0;
michael@0 229 }
michael@0 230
michael@0 231 outData32=(int32_t *)((char *)outData+headerSize);
michael@0 232
michael@0 233 /* copy everything for inaccessible data (padding) */
michael@0 234 if(inData32!=outData32) {
michael@0 235 uprv_memcpy(outData32, inData32, 4*dataTop);
michael@0 236 }
michael@0 237
michael@0 238 /* swap the indexes[16] */
michael@0 239 ds->swapArray32(ds, inData32, 4*UPROPS_INDEX_COUNT, outData32, pErrorCode);
michael@0 240
michael@0 241 /*
michael@0 242 * swap the main properties UTrie
michael@0 243 * PT serialized properties trie, see utrie.h (byte size: 4*(i0-16))
michael@0 244 */
michael@0 245 utrie2_swapAnyVersion(ds,
michael@0 246 inData32+UPROPS_INDEX_COUNT,
michael@0 247 4*(dataIndexes[UPROPS_PROPS32_INDEX]-UPROPS_INDEX_COUNT),
michael@0 248 outData32+UPROPS_INDEX_COUNT,
michael@0 249 pErrorCode);
michael@0 250
michael@0 251 /*
michael@0 252 * swap the properties and exceptions words
michael@0 253 * P const uint32_t props32[i1-i0];
michael@0 254 * E const uint32_t exceptions[i2-i1];
michael@0 255 */
michael@0 256 ds->swapArray32(ds,
michael@0 257 inData32+dataIndexes[UPROPS_PROPS32_INDEX],
michael@0 258 4*(dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX]-dataIndexes[UPROPS_PROPS32_INDEX]),
michael@0 259 outData32+dataIndexes[UPROPS_PROPS32_INDEX],
michael@0 260 pErrorCode);
michael@0 261
michael@0 262 /*
michael@0 263 * swap the UChars
michael@0 264 * U const UChar uchars[2*(i3-i2)];
michael@0 265 */
michael@0 266 ds->swapArray16(ds,
michael@0 267 inData32+dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX],
michael@0 268 4*(dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX]-dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX]),
michael@0 269 outData32+dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX],
michael@0 270 pErrorCode);
michael@0 271
michael@0 272 /*
michael@0 273 * swap the additional UTrie
michael@0 274 * i3 additionalTrieIndex; -- 32-bit unit index to the additional trie for more properties
michael@0 275 */
michael@0 276 utrie2_swapAnyVersion(ds,
michael@0 277 inData32+dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX],
michael@0 278 4*(dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX]-dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX]),
michael@0 279 outData32+dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX],
michael@0 280 pErrorCode);
michael@0 281
michael@0 282 /*
michael@0 283 * swap the properties vectors
michael@0 284 * PV const uint32_t propsVectors[(i6-i4)/i5][i5]==uint32_t propsVectors[i6-i4];
michael@0 285 */
michael@0 286 ds->swapArray32(ds,
michael@0 287 inData32+dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX],
michael@0 288 4*(dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]-dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX]),
michael@0 289 outData32+dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX],
michael@0 290 pErrorCode);
michael@0 291
michael@0 292 // swap the Script_Extensions data
michael@0 293 // SCX const uint16_t scriptExtensions[2*(i7-i6)];
michael@0 294 ds->swapArray16(ds,
michael@0 295 inData32+dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX],
michael@0 296 4*(dataIndexes[UPROPS_RESERVED_INDEX_7]-dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]),
michael@0 297 outData32+dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX],
michael@0 298 pErrorCode);
michael@0 299 }
michael@0 300
michael@0 301 /* i7 reservedIndex7; -- 32-bit unit index to the top of the Script_Extensions data */
michael@0 302 return headerSize+4*dataIndexes[UPROPS_RESERVED_INDEX_7];
michael@0 303 }
michael@0 304
michael@0 305 /* Unicode case mapping data swapping --------------------------------------- */
michael@0 306
michael@0 307 static int32_t U_CALLCONV
michael@0 308 ucase_swap(const UDataSwapper *ds,
michael@0 309 const void *inData, int32_t length, void *outData,
michael@0 310 UErrorCode *pErrorCode) {
michael@0 311 const UDataInfo *pInfo;
michael@0 312 int32_t headerSize;
michael@0 313
michael@0 314 const uint8_t *inBytes;
michael@0 315 uint8_t *outBytes;
michael@0 316
michael@0 317 const int32_t *inIndexes;
michael@0 318 int32_t indexes[16];
michael@0 319
michael@0 320 int32_t i, offset, count, size;
michael@0 321
michael@0 322 /* udata_swapDataHeader checks the arguments */
michael@0 323 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
michael@0 324 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
michael@0 325 return 0;
michael@0 326 }
michael@0 327
michael@0 328 /* check data format and format version */
michael@0 329 pInfo=(const UDataInfo *)((const char *)inData+4);
michael@0 330 if(!(
michael@0 331 pInfo->dataFormat[0]==UCASE_FMT_0 && /* dataFormat="cAsE" */
michael@0 332 pInfo->dataFormat[1]==UCASE_FMT_1 &&
michael@0 333 pInfo->dataFormat[2]==UCASE_FMT_2 &&
michael@0 334 pInfo->dataFormat[3]==UCASE_FMT_3 &&
michael@0 335 ((pInfo->formatVersion[0]==1 &&
michael@0 336 pInfo->formatVersion[2]==UTRIE_SHIFT &&
michael@0 337 pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT) ||
michael@0 338 pInfo->formatVersion[0]==2 || pInfo->formatVersion[0]==3)
michael@0 339 )) {
michael@0 340 udata_printError(ds, "ucase_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as case mapping data\n",
michael@0 341 pInfo->dataFormat[0], pInfo->dataFormat[1],
michael@0 342 pInfo->dataFormat[2], pInfo->dataFormat[3],
michael@0 343 pInfo->formatVersion[0]);
michael@0 344 *pErrorCode=U_UNSUPPORTED_ERROR;
michael@0 345 return 0;
michael@0 346 }
michael@0 347
michael@0 348 inBytes=(const uint8_t *)inData+headerSize;
michael@0 349 outBytes=(uint8_t *)outData+headerSize;
michael@0 350
michael@0 351 inIndexes=(const int32_t *)inBytes;
michael@0 352
michael@0 353 if(length>=0) {
michael@0 354 length-=headerSize;
michael@0 355 if(length<16*4) {
michael@0 356 udata_printError(ds, "ucase_swap(): too few bytes (%d after header) for case mapping data\n",
michael@0 357 length);
michael@0 358 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
michael@0 359 return 0;
michael@0 360 }
michael@0 361 }
michael@0 362
michael@0 363 /* read the first 16 indexes (ICU 3.2/format version 1: UCASE_IX_TOP==16, might grow) */
michael@0 364 for(i=0; i<16; ++i) {
michael@0 365 indexes[i]=udata_readInt32(ds, inIndexes[i]);
michael@0 366 }
michael@0 367
michael@0 368 /* get the total length of the data */
michael@0 369 size=indexes[UCASE_IX_LENGTH];
michael@0 370
michael@0 371 if(length>=0) {
michael@0 372 if(length<size) {
michael@0 373 udata_printError(ds, "ucase_swap(): too few bytes (%d after header) for all of case mapping data\n",
michael@0 374 length);
michael@0 375 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
michael@0 376 return 0;
michael@0 377 }
michael@0 378
michael@0 379 /* copy the data for inaccessible bytes */
michael@0 380 if(inBytes!=outBytes) {
michael@0 381 uprv_memcpy(outBytes, inBytes, size);
michael@0 382 }
michael@0 383
michael@0 384 offset=0;
michael@0 385
michael@0 386 /* swap the int32_t indexes[] */
michael@0 387 count=indexes[UCASE_IX_INDEX_TOP]*4;
michael@0 388 ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
michael@0 389 offset+=count;
michael@0 390
michael@0 391 /* swap the UTrie */
michael@0 392 count=indexes[UCASE_IX_TRIE_SIZE];
michael@0 393 utrie2_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
michael@0 394 offset+=count;
michael@0 395
michael@0 396 /* swap the uint16_t exceptions[] and unfold[] */
michael@0 397 count=(indexes[UCASE_IX_EXC_LENGTH]+indexes[UCASE_IX_UNFOLD_LENGTH])*2;
michael@0 398 ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
michael@0 399 offset+=count;
michael@0 400
michael@0 401 U_ASSERT(offset==size);
michael@0 402 }
michael@0 403
michael@0 404 return headerSize+size;
michael@0 405 }
michael@0 406
michael@0 407 /* Unicode bidi/shaping data swapping --------------------------------------- */
michael@0 408
michael@0 409 static int32_t U_CALLCONV
michael@0 410 ubidi_swap(const UDataSwapper *ds,
michael@0 411 const void *inData, int32_t length, void *outData,
michael@0 412 UErrorCode *pErrorCode) {
michael@0 413 const UDataInfo *pInfo;
michael@0 414 int32_t headerSize;
michael@0 415
michael@0 416 const uint8_t *inBytes;
michael@0 417 uint8_t *outBytes;
michael@0 418
michael@0 419 const int32_t *inIndexes;
michael@0 420 int32_t indexes[16];
michael@0 421
michael@0 422 int32_t i, offset, count, size;
michael@0 423
michael@0 424 /* udata_swapDataHeader checks the arguments */
michael@0 425 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
michael@0 426 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
michael@0 427 return 0;
michael@0 428 }
michael@0 429
michael@0 430 /* check data format and format version */
michael@0 431 pInfo=(const UDataInfo *)((const char *)inData+4);
michael@0 432 if(!(
michael@0 433 pInfo->dataFormat[0]==UBIDI_FMT_0 && /* dataFormat="BiDi" */
michael@0 434 pInfo->dataFormat[1]==UBIDI_FMT_1 &&
michael@0 435 pInfo->dataFormat[2]==UBIDI_FMT_2 &&
michael@0 436 pInfo->dataFormat[3]==UBIDI_FMT_3 &&
michael@0 437 ((pInfo->formatVersion[0]==1 &&
michael@0 438 pInfo->formatVersion[2]==UTRIE_SHIFT &&
michael@0 439 pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT) ||
michael@0 440 pInfo->formatVersion[0]==2)
michael@0 441 )) {
michael@0 442 udata_printError(ds, "ubidi_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as bidi/shaping data\n",
michael@0 443 pInfo->dataFormat[0], pInfo->dataFormat[1],
michael@0 444 pInfo->dataFormat[2], pInfo->dataFormat[3],
michael@0 445 pInfo->formatVersion[0]);
michael@0 446 *pErrorCode=U_UNSUPPORTED_ERROR;
michael@0 447 return 0;
michael@0 448 }
michael@0 449
michael@0 450 inBytes=(const uint8_t *)inData+headerSize;
michael@0 451 outBytes=(uint8_t *)outData+headerSize;
michael@0 452
michael@0 453 inIndexes=(const int32_t *)inBytes;
michael@0 454
michael@0 455 if(length>=0) {
michael@0 456 length-=headerSize;
michael@0 457 if(length<16*4) {
michael@0 458 udata_printError(ds, "ubidi_swap(): too few bytes (%d after header) for bidi/shaping data\n",
michael@0 459 length);
michael@0 460 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
michael@0 461 return 0;
michael@0 462 }
michael@0 463 }
michael@0 464
michael@0 465 /* read the first 16 indexes (ICU 3.4/format version 1: UBIDI_IX_TOP==16, might grow) */
michael@0 466 for(i=0; i<16; ++i) {
michael@0 467 indexes[i]=udata_readInt32(ds, inIndexes[i]);
michael@0 468 }
michael@0 469
michael@0 470 /* get the total length of the data */
michael@0 471 size=indexes[UBIDI_IX_LENGTH];
michael@0 472
michael@0 473 if(length>=0) {
michael@0 474 if(length<size) {
michael@0 475 udata_printError(ds, "ubidi_swap(): too few bytes (%d after header) for all of bidi/shaping data\n",
michael@0 476 length);
michael@0 477 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
michael@0 478 return 0;
michael@0 479 }
michael@0 480
michael@0 481 /* copy the data for inaccessible bytes */
michael@0 482 if(inBytes!=outBytes) {
michael@0 483 uprv_memcpy(outBytes, inBytes, size);
michael@0 484 }
michael@0 485
michael@0 486 offset=0;
michael@0 487
michael@0 488 /* swap the int32_t indexes[] */
michael@0 489 count=indexes[UBIDI_IX_INDEX_TOP]*4;
michael@0 490 ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
michael@0 491 offset+=count;
michael@0 492
michael@0 493 /* swap the UTrie */
michael@0 494 count=indexes[UBIDI_IX_TRIE_SIZE];
michael@0 495 utrie2_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
michael@0 496 offset+=count;
michael@0 497
michael@0 498 /* swap the uint32_t mirrors[] */
michael@0 499 count=indexes[UBIDI_IX_MIRROR_LENGTH]*4;
michael@0 500 ds->swapArray32(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
michael@0 501 offset+=count;
michael@0 502
michael@0 503 /* just skip the uint8_t jgArray[] */
michael@0 504 count=indexes[UBIDI_IX_JG_LIMIT]-indexes[UBIDI_IX_JG_START];
michael@0 505 offset+=count;
michael@0 506
michael@0 507 U_ASSERT(offset==size);
michael@0 508 }
michael@0 509
michael@0 510 return headerSize+size;
michael@0 511 }
michael@0 512
michael@0 513 /* Unicode normalization data swapping -------------------------------------- */
michael@0 514
michael@0 515 #if !UCONFIG_NO_NORMALIZATION
michael@0 516
michael@0 517 static int32_t U_CALLCONV
michael@0 518 unorm_swap(const UDataSwapper *ds,
michael@0 519 const void *inData, int32_t length, void *outData,
michael@0 520 UErrorCode *pErrorCode) {
michael@0 521 const UDataInfo *pInfo;
michael@0 522 int32_t headerSize;
michael@0 523
michael@0 524 const uint8_t *inBytes;
michael@0 525 uint8_t *outBytes;
michael@0 526
michael@0 527 const int32_t *inIndexes;
michael@0 528 int32_t indexes[32];
michael@0 529
michael@0 530 int32_t i, offset, count, size;
michael@0 531
michael@0 532 /* udata_swapDataHeader checks the arguments */
michael@0 533 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
michael@0 534 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
michael@0 535 return 0;
michael@0 536 }
michael@0 537
michael@0 538 /* check data format and format version */
michael@0 539 pInfo=(const UDataInfo *)((const char *)inData+4);
michael@0 540 if(!(
michael@0 541 pInfo->dataFormat[0]==0x4e && /* dataFormat="Norm" */
michael@0 542 pInfo->dataFormat[1]==0x6f &&
michael@0 543 pInfo->dataFormat[2]==0x72 &&
michael@0 544 pInfo->dataFormat[3]==0x6d &&
michael@0 545 pInfo->formatVersion[0]==2
michael@0 546 )) {
michael@0 547 udata_printError(ds, "unorm_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as unorm.icu\n",
michael@0 548 pInfo->dataFormat[0], pInfo->dataFormat[1],
michael@0 549 pInfo->dataFormat[2], pInfo->dataFormat[3],
michael@0 550 pInfo->formatVersion[0]);
michael@0 551 *pErrorCode=U_UNSUPPORTED_ERROR;
michael@0 552 return 0;
michael@0 553 }
michael@0 554
michael@0 555 inBytes=(const uint8_t *)inData+headerSize;
michael@0 556 outBytes=(uint8_t *)outData+headerSize;
michael@0 557
michael@0 558 inIndexes=(const int32_t *)inBytes;
michael@0 559
michael@0 560 if(length>=0) {
michael@0 561 length-=headerSize;
michael@0 562 if(length<32*4) {
michael@0 563 udata_printError(ds, "unorm_swap(): too few bytes (%d after header) for unorm.icu\n",
michael@0 564 length);
michael@0 565 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
michael@0 566 return 0;
michael@0 567 }
michael@0 568 }
michael@0 569
michael@0 570 /* read the first 32 indexes (ICU 2.8/format version 2.2: _NORM_INDEX_TOP==32, might grow) */
michael@0 571 for(i=0; i<32; ++i) {
michael@0 572 indexes[i]=udata_readInt32(ds, inIndexes[i]);
michael@0 573 }
michael@0 574
michael@0 575 /* calculate the total length of the data */
michael@0 576 size=
michael@0 577 32*4+ /* size of indexes[] */
michael@0 578 indexes[_NORM_INDEX_TRIE_SIZE]+
michael@0 579 indexes[_NORM_INDEX_UCHAR_COUNT]*2+
michael@0 580 indexes[_NORM_INDEX_COMBINE_DATA_COUNT]*2+
michael@0 581 indexes[_NORM_INDEX_FCD_TRIE_SIZE]+
michael@0 582 indexes[_NORM_INDEX_AUX_TRIE_SIZE]+
michael@0 583 indexes[_NORM_INDEX_CANON_SET_COUNT]*2;
michael@0 584
michael@0 585 if(length>=0) {
michael@0 586 if(length<size) {
michael@0 587 udata_printError(ds, "unorm_swap(): too few bytes (%d after header) for all of unorm.icu\n",
michael@0 588 length);
michael@0 589 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
michael@0 590 return 0;
michael@0 591 }
michael@0 592
michael@0 593 /* copy the data for inaccessible bytes */
michael@0 594 if(inBytes!=outBytes) {
michael@0 595 uprv_memcpy(outBytes, inBytes, size);
michael@0 596 }
michael@0 597
michael@0 598 offset=0;
michael@0 599
michael@0 600 /* swap the indexes[] */
michael@0 601 count=32*4;
michael@0 602 ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
michael@0 603 offset+=count;
michael@0 604
michael@0 605 /* swap the main UTrie */
michael@0 606 count=indexes[_NORM_INDEX_TRIE_SIZE];
michael@0 607 utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
michael@0 608 offset+=count;
michael@0 609
michael@0 610 /* swap the uint16_t extraData[] and the uint16_t combiningTable[] */
michael@0 611 count=(indexes[_NORM_INDEX_UCHAR_COUNT]+indexes[_NORM_INDEX_COMBINE_DATA_COUNT])*2;
michael@0 612 ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
michael@0 613 offset+=count;
michael@0 614
michael@0 615 /* swap the FCD UTrie */
michael@0 616 count=indexes[_NORM_INDEX_FCD_TRIE_SIZE];
michael@0 617 if(count!=0) {
michael@0 618 utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
michael@0 619 offset+=count;
michael@0 620 }
michael@0 621
michael@0 622 /* swap the aux UTrie */
michael@0 623 count=indexes[_NORM_INDEX_AUX_TRIE_SIZE];
michael@0 624 if(count!=0) {
michael@0 625 utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
michael@0 626 offset+=count;
michael@0 627 }
michael@0 628
michael@0 629 /* swap the uint16_t combiningTable[] */
michael@0 630 count=indexes[_NORM_INDEX_CANON_SET_COUNT]*2;
michael@0 631 ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
michael@0 632 offset+=count;
michael@0 633 }
michael@0 634
michael@0 635 return headerSize+size;
michael@0 636 }
michael@0 637
michael@0 638 #endif
michael@0 639
michael@0 640 /* Swap 'Test' data from gentest */
michael@0 641 static int32_t U_CALLCONV
michael@0 642 test_swap(const UDataSwapper *ds,
michael@0 643 const void *inData, int32_t length, void *outData,
michael@0 644 UErrorCode *pErrorCode) {
michael@0 645 const UDataInfo *pInfo;
michael@0 646 int32_t headerSize;
michael@0 647
michael@0 648 const uint8_t *inBytes;
michael@0 649 uint8_t *outBytes;
michael@0 650
michael@0 651 int32_t offset;
michael@0 652
michael@0 653 /* udata_swapDataHeader checks the arguments */
michael@0 654 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
michael@0 655 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
michael@0 656 udata_printError(ds, "test_swap(): data header swap failed %s\n", pErrorCode != NULL ? u_errorName(*pErrorCode) : "pErrorCode is NULL");
michael@0 657 return 0;
michael@0 658 }
michael@0 659
michael@0 660 /* check data format and format version */
michael@0 661 pInfo=(const UDataInfo *)((const char *)inData+4);
michael@0 662 if(!(
michael@0 663 pInfo->dataFormat[0]==0x54 && /* dataFormat="Norm" */
michael@0 664 pInfo->dataFormat[1]==0x65 &&
michael@0 665 pInfo->dataFormat[2]==0x73 &&
michael@0 666 pInfo->dataFormat[3]==0x74 &&
michael@0 667 pInfo->formatVersion[0]==1
michael@0 668 )) {
michael@0 669 udata_printError(ds, "test_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as testdata\n",
michael@0 670 pInfo->dataFormat[0], pInfo->dataFormat[1],
michael@0 671 pInfo->dataFormat[2], pInfo->dataFormat[3],
michael@0 672 pInfo->formatVersion[0]);
michael@0 673 *pErrorCode=U_UNSUPPORTED_ERROR;
michael@0 674 return 0;
michael@0 675 }
michael@0 676
michael@0 677 inBytes=(const uint8_t *)inData+headerSize;
michael@0 678 outBytes=(uint8_t *)outData+headerSize;
michael@0 679
michael@0 680 int32_t size16 = 2; // 16bit plus padding
michael@0 681 int32_t sizeStr = 5; // 4 char inv-str plus null
michael@0 682 int32_t size = size16 + sizeStr;
michael@0 683
michael@0 684 if(length>=0) {
michael@0 685 if(length<size) {
michael@0 686 udata_printError(ds, "test_swap(): too few bytes (%d after header, wanted %d) for all of testdata\n",
michael@0 687 length, size);
michael@0 688 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
michael@0 689 return 0;
michael@0 690 }
michael@0 691
michael@0 692 offset =0;
michael@0 693 /* swap a 1 entry array */
michael@0 694 ds->swapArray16(ds, inBytes+offset, size16, outBytes+offset, pErrorCode);
michael@0 695 offset+=size16;
michael@0 696 ds->swapInvChars(ds, inBytes+offset, sizeStr, outBytes+offset, pErrorCode);
michael@0 697 }
michael@0 698
michael@0 699 return headerSize+size;
michael@0 700 }
michael@0 701
michael@0 702 /* swap any data (except a .dat package) ------------------------------------ */
michael@0 703
michael@0 704 static const struct {
michael@0 705 uint8_t dataFormat[4];
michael@0 706 UDataSwapFn *swapFn;
michael@0 707 } swapFns[]={
michael@0 708 { { 0x52, 0x65, 0x73, 0x42 }, ures_swap }, /* dataFormat="ResB" */
michael@0 709 #if !UCONFIG_NO_LEGACY_CONVERSION
michael@0 710 { { 0x63, 0x6e, 0x76, 0x74 }, ucnv_swap }, /* dataFormat="cnvt" */
michael@0 711 #endif
michael@0 712 #if !UCONFIG_NO_CONVERSION
michael@0 713 { { 0x43, 0x76, 0x41, 0x6c }, ucnv_swapAliases }, /* dataFormat="CvAl" */
michael@0 714 #endif
michael@0 715 #if !UCONFIG_NO_IDNA
michael@0 716 { { 0x53, 0x50, 0x52, 0x50 }, usprep_swap }, /* dataFormat="SPRP" */
michael@0 717 #endif
michael@0 718 /* insert data formats here, descending by expected frequency of occurrence */
michael@0 719 { { 0x55, 0x50, 0x72, 0x6f }, uprops_swap }, /* dataFormat="UPro" */
michael@0 720
michael@0 721 { { UCASE_FMT_0, UCASE_FMT_1, UCASE_FMT_2, UCASE_FMT_3 },
michael@0 722 ucase_swap }, /* dataFormat="cAsE" */
michael@0 723
michael@0 724 { { UBIDI_FMT_0, UBIDI_FMT_1, UBIDI_FMT_2, UBIDI_FMT_3 },
michael@0 725 ubidi_swap }, /* dataFormat="BiDi" */
michael@0 726
michael@0 727 #if !UCONFIG_NO_NORMALIZATION
michael@0 728 { { 0x4e, 0x6f, 0x72, 0x6d }, unorm_swap }, /* dataFormat="Norm" */
michael@0 729 { { 0x4e, 0x72, 0x6d, 0x32 }, unorm2_swap }, /* dataFormat="Nrm2" */
michael@0 730 #endif
michael@0 731 #if !UCONFIG_NO_COLLATION
michael@0 732 { { 0x55, 0x43, 0x6f, 0x6c }, ucol_swap }, /* dataFormat="UCol" */
michael@0 733 { { 0x49, 0x6e, 0x76, 0x43 }, ucol_swapInverseUCA },/* dataFormat="InvC" */
michael@0 734 #endif
michael@0 735 #if !UCONFIG_NO_BREAK_ITERATION
michael@0 736 { { 0x42, 0x72, 0x6b, 0x20 }, ubrk_swap }, /* dataFormat="Brk " */
michael@0 737 { { 0x44, 0x69, 0x63, 0x74 }, udict_swap }, /* dataFormat="Dict" */
michael@0 738 #endif
michael@0 739 { { 0x70, 0x6e, 0x61, 0x6d }, upname_swap }, /* dataFormat="pnam" */
michael@0 740 { { 0x75, 0x6e, 0x61, 0x6d }, uchar_swapNames }, /* dataFormat="unam" */
michael@0 741 #if !UCONFIG_NO_NORMALIZATION
michael@0 742 { { 0x43, 0x66, 0x75, 0x20 }, uspoof_swap }, /* dataFormat="Cfu " */
michael@0 743 #endif
michael@0 744 { { 0x54, 0x65, 0x73, 0x74 }, test_swap } /* dataFormat="Test" */
michael@0 745 };
michael@0 746
michael@0 747 U_CAPI int32_t U_EXPORT2
michael@0 748 udata_swap(const UDataSwapper *ds,
michael@0 749 const void *inData, int32_t length, void *outData,
michael@0 750 UErrorCode *pErrorCode) {
michael@0 751 char dataFormatChars[4];
michael@0 752 const UDataInfo *pInfo;
michael@0 753 int32_t i, swappedLength;
michael@0 754
michael@0 755 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
michael@0 756 return 0;
michael@0 757 }
michael@0 758
michael@0 759 /*
michael@0 760 * Preflight the header first; checks for illegal arguments, too.
michael@0 761 * Do not swap the header right away because the format-specific swapper
michael@0 762 * will swap it, get the headerSize again, and also use the header
michael@0 763 * information. Otherwise we would have to pass some of the information
michael@0 764 * and not be able to use the UDataSwapFn signature.
michael@0 765 */
michael@0 766 udata_swapDataHeader(ds, inData, -1, NULL, pErrorCode);
michael@0 767
michael@0 768 /*
michael@0 769 * If we wanted udata_swap() to also handle non-loadable data like a UTrie,
michael@0 770 * then we could check here for further known magic values and structures.
michael@0 771 */
michael@0 772 if(U_FAILURE(*pErrorCode)) {
michael@0 773 return 0; /* the data format was not recognized */
michael@0 774 }
michael@0 775
michael@0 776 pInfo=(const UDataInfo *)((const char *)inData+4);
michael@0 777
michael@0 778 {
michael@0 779 /* convert the data format from ASCII to Unicode to the system charset */
michael@0 780 UChar u[4]={
michael@0 781 pInfo->dataFormat[0], pInfo->dataFormat[1],
michael@0 782 pInfo->dataFormat[2], pInfo->dataFormat[3]
michael@0 783 };
michael@0 784
michael@0 785 if(uprv_isInvariantUString(u, 4)) {
michael@0 786 u_UCharsToChars(u, dataFormatChars, 4);
michael@0 787 } else {
michael@0 788 dataFormatChars[0]=dataFormatChars[1]=dataFormatChars[2]=dataFormatChars[3]='?';
michael@0 789 }
michael@0 790 }
michael@0 791
michael@0 792 /* dispatch to the swap function for the dataFormat */
michael@0 793 for(i=0; i<LENGTHOF(swapFns); ++i) {
michael@0 794 if(0==memcmp(swapFns[i].dataFormat, pInfo->dataFormat, 4)) {
michael@0 795 swappedLength=swapFns[i].swapFn(ds, inData, length, outData, pErrorCode);
michael@0 796
michael@0 797 if(U_FAILURE(*pErrorCode)) {
michael@0 798 udata_printError(ds, "udata_swap(): failure swapping data format %02x.%02x.%02x.%02x (\"%c%c%c%c\") - %s\n",
michael@0 799 pInfo->dataFormat[0], pInfo->dataFormat[1],
michael@0 800 pInfo->dataFormat[2], pInfo->dataFormat[3],
michael@0 801 dataFormatChars[0], dataFormatChars[1],
michael@0 802 dataFormatChars[2], dataFormatChars[3],
michael@0 803 u_errorName(*pErrorCode));
michael@0 804 } else if(swappedLength<(length-15)) {
michael@0 805 /* swapped less than expected */
michael@0 806 udata_printError(ds, "udata_swap() warning: swapped only %d out of %d bytes - data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n",
michael@0 807 swappedLength, length,
michael@0 808 pInfo->dataFormat[0], pInfo->dataFormat[1],
michael@0 809 pInfo->dataFormat[2], pInfo->dataFormat[3],
michael@0 810 dataFormatChars[0], dataFormatChars[1],
michael@0 811 dataFormatChars[2], dataFormatChars[3],
michael@0 812 u_errorName(*pErrorCode));
michael@0 813 }
michael@0 814
michael@0 815 return swappedLength;
michael@0 816 }
michael@0 817 }
michael@0 818
michael@0 819 /* the dataFormat was not recognized */
michael@0 820 udata_printError(ds, "udata_swap(): unknown data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n",
michael@0 821 pInfo->dataFormat[0], pInfo->dataFormat[1],
michael@0 822 pInfo->dataFormat[2], pInfo->dataFormat[3],
michael@0 823 dataFormatChars[0], dataFormatChars[1],
michael@0 824 dataFormatChars[2], dataFormatChars[3]);
michael@0 825
michael@0 826 *pErrorCode=U_UNSUPPORTED_ERROR;
michael@0 827 return 0;
michael@0 828 }

mercurial