intl/icu/source/common/udata.cpp

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rw-r--r--

Correct previous dual key logic pending first delivery installment.

michael@0 1 /*
michael@0 2 ******************************************************************************
michael@0 3 *
michael@0 4 * Copyright (C) 1999-2013, International Business Machines
michael@0 5 * Corporation and others. All Rights Reserved.
michael@0 6 *
michael@0 7 ******************************************************************************
michael@0 8 * file name: udata.cpp
michael@0 9 * encoding: US-ASCII
michael@0 10 * tab size: 8 (not used)
michael@0 11 * indentation:4
michael@0 12 *
michael@0 13 * created on: 1999oct25
michael@0 14 * created by: Markus W. Scherer
michael@0 15 */
michael@0 16
michael@0 17 #include "unicode/utypes.h" /* U_PLATFORM etc. */
michael@0 18
michael@0 19 #ifdef __GNUC__
michael@0 20 /* if gcc
michael@0 21 #define ATTRIBUTE_WEAK __attribute__ ((weak))
michael@0 22 might have to #include some other header
michael@0 23 */
michael@0 24 #endif
michael@0 25
michael@0 26 #include "unicode/putil.h"
michael@0 27 #include "unicode/udata.h"
michael@0 28 #include "unicode/uversion.h"
michael@0 29 #include "charstr.h"
michael@0 30 #include "cmemory.h"
michael@0 31 #include "cstring.h"
michael@0 32 #include "putilimp.h"
michael@0 33 #include "uassert.h"
michael@0 34 #include "ucln_cmn.h"
michael@0 35 #include "ucmndata.h"
michael@0 36 #include "udatamem.h"
michael@0 37 #include "uhash.h"
michael@0 38 #include "umapfile.h"
michael@0 39 #include "umutex.h"
michael@0 40
michael@0 41 /***********************************************************************
michael@0 42 *
michael@0 43 * Notes on the organization of the ICU data implementation
michael@0 44 *
michael@0 45 * All of the public API is defined in udata.h
michael@0 46 *
michael@0 47 * The implementation is split into several files...
michael@0 48 *
michael@0 49 * - udata.c (this file) contains higher level code that knows about
michael@0 50 * the search paths for locating data, caching opened data, etc.
michael@0 51 *
michael@0 52 * - umapfile.c contains the low level platform-specific code for actually loading
michael@0 53 * (memory mapping, file reading, whatever) data into memory.
michael@0 54 *
michael@0 55 * - ucmndata.c deals with the tables of contents of ICU data items within
michael@0 56 * an ICU common format data file. The implementation includes
michael@0 57 * an abstract interface and support for multiple TOC formats.
michael@0 58 * All knowledge of any specific TOC format is encapsulated here.
michael@0 59 *
michael@0 60 * - udatamem.c has code for managing UDataMemory structs. These are little
michael@0 61 * descriptor objects for blocks of memory holding ICU data of
michael@0 62 * various types.
michael@0 63 */
michael@0 64
michael@0 65 /* configuration ---------------------------------------------------------- */
michael@0 66
michael@0 67 /* If you are excruciatingly bored turn this on .. */
michael@0 68 /* #define UDATA_DEBUG 1 */
michael@0 69
michael@0 70 #if defined(UDATA_DEBUG)
michael@0 71 # include <stdio.h>
michael@0 72 #endif
michael@0 73
michael@0 74 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
michael@0 75
michael@0 76 U_NAMESPACE_USE
michael@0 77
michael@0 78 /*
michael@0 79 * Forward declarations
michael@0 80 */
michael@0 81 static UDataMemory *udata_findCachedData(const char *path);
michael@0 82
michael@0 83 /***********************************************************************
michael@0 84 *
michael@0 85 * static (Global) data
michael@0 86 *
michael@0 87 ************************************************************************/
michael@0 88
michael@0 89 /*
michael@0 90 * Pointers to the common ICU data.
michael@0 91 *
michael@0 92 * We store multiple pointers to ICU data packages and iterate through them
michael@0 93 * when looking for a data item.
michael@0 94 *
michael@0 95 * It is possible to combine this with dependency inversion:
michael@0 96 * One or more data package libraries may export
michael@0 97 * functions that each return a pointer to their piece of the ICU data,
michael@0 98 * and this file would import them as weak functions, without a
michael@0 99 * strong linker dependency from the common library on the data library.
michael@0 100 *
michael@0 101 * Then we can have applications depend on only that part of ICU's data
michael@0 102 * that they really need, reducing the size of binaries that take advantage
michael@0 103 * of this.
michael@0 104 */
michael@0 105 static UDataMemory *gCommonICUDataArray[10] = { NULL };
michael@0 106
michael@0 107 static UBool gHaveTriedToLoadCommonData = FALSE; /* See extendICUData(). */
michael@0 108
michael@0 109 static UHashtable *gCommonDataCache = NULL; /* Global hash table of opened ICU data files. */
michael@0 110 static icu::UInitOnce gCommonDataCacheInitOnce = U_INITONCE_INITIALIZER;
michael@0 111
michael@0 112 static UDataFileAccess gDataFileAccess = UDATA_DEFAULT_ACCESS;
michael@0 113
michael@0 114 static UBool U_CALLCONV
michael@0 115 udata_cleanup(void)
michael@0 116 {
michael@0 117 int32_t i;
michael@0 118
michael@0 119 if (gCommonDataCache) { /* Delete the cache of user data mappings. */
michael@0 120 uhash_close(gCommonDataCache); /* Table owns the contents, and will delete them. */
michael@0 121 gCommonDataCache = NULL; /* Cleanup is not thread safe. */
michael@0 122 }
michael@0 123 gCommonDataCacheInitOnce.reset();
michael@0 124
michael@0 125 for (i = 0; i < LENGTHOF(gCommonICUDataArray) && gCommonICUDataArray[i] != NULL; ++i) {
michael@0 126 udata_close(gCommonICUDataArray[i]);
michael@0 127 gCommonICUDataArray[i] = NULL;
michael@0 128 }
michael@0 129 gHaveTriedToLoadCommonData = FALSE;
michael@0 130
michael@0 131 return TRUE; /* Everything was cleaned up */
michael@0 132 }
michael@0 133
michael@0 134 static UBool U_CALLCONV
michael@0 135 findCommonICUDataByName(const char *inBasename)
michael@0 136 {
michael@0 137 UBool found = FALSE;
michael@0 138 int32_t i;
michael@0 139
michael@0 140 UDataMemory *pData = udata_findCachedData(inBasename);
michael@0 141 if (pData == NULL)
michael@0 142 return FALSE;
michael@0 143
michael@0 144 for (i = 0; i < LENGTHOF(gCommonICUDataArray); ++i) {
michael@0 145 if ((gCommonICUDataArray[i] != NULL) && (gCommonICUDataArray[i]->pHeader == pData->pHeader)) {
michael@0 146 /* The data pointer is already in the array. */
michael@0 147 found = TRUE;
michael@0 148 break;
michael@0 149 }
michael@0 150 }
michael@0 151
michael@0 152 return found;
michael@0 153 }
michael@0 154
michael@0 155
michael@0 156 /*
michael@0 157 * setCommonICUData. Set a UDataMemory to be the global ICU Data
michael@0 158 */
michael@0 159 static UBool
michael@0 160 setCommonICUData(UDataMemory *pData, /* The new common data. Belongs to caller, we copy it. */
michael@0 161 UBool warn, /* If true, set USING_DEFAULT warning if ICUData was */
michael@0 162 /* changed by another thread before we got to it. */
michael@0 163 UErrorCode *pErr)
michael@0 164 {
michael@0 165 UDataMemory *newCommonData = UDataMemory_createNewInstance(pErr);
michael@0 166 int32_t i;
michael@0 167 UBool didUpdate = FALSE;
michael@0 168 if (U_FAILURE(*pErr)) {
michael@0 169 return FALSE;
michael@0 170 }
michael@0 171
michael@0 172 /* For the assignment, other threads must cleanly see either the old */
michael@0 173 /* or the new, not some partially initialized new. The old can not be */
michael@0 174 /* deleted - someone may still have a pointer to it lying around in */
michael@0 175 /* their locals. */
michael@0 176 UDatamemory_assign(newCommonData, pData);
michael@0 177 umtx_lock(NULL);
michael@0 178 for (i = 0; i < LENGTHOF(gCommonICUDataArray); ++i) {
michael@0 179 if (gCommonICUDataArray[i] == NULL) {
michael@0 180 gCommonICUDataArray[i] = newCommonData;
michael@0 181 ucln_common_registerCleanup(UCLN_COMMON_UDATA, udata_cleanup);
michael@0 182 didUpdate = TRUE;
michael@0 183 break;
michael@0 184 } else if (gCommonICUDataArray[i]->pHeader == pData->pHeader) {
michael@0 185 /* The same data pointer is already in the array. */
michael@0 186 break;
michael@0 187 }
michael@0 188 }
michael@0 189 umtx_unlock(NULL);
michael@0 190
michael@0 191 if (i == LENGTHOF(gCommonICUDataArray) && warn) {
michael@0 192 *pErr = U_USING_DEFAULT_WARNING;
michael@0 193 }
michael@0 194 if (!didUpdate) {
michael@0 195 uprv_free(newCommonData);
michael@0 196 }
michael@0 197 return didUpdate;
michael@0 198 }
michael@0 199
michael@0 200 static UBool
michael@0 201 setCommonICUDataPointer(const void *pData, UBool /*warn*/, UErrorCode *pErrorCode) {
michael@0 202 UDataMemory tData;
michael@0 203 UDataMemory_init(&tData);
michael@0 204 UDataMemory_setData(&tData, pData);
michael@0 205 udata_checkCommonData(&tData, pErrorCode);
michael@0 206 return setCommonICUData(&tData, FALSE, pErrorCode);
michael@0 207 }
michael@0 208
michael@0 209 static const char *
michael@0 210 findBasename(const char *path) {
michael@0 211 const char *basename=uprv_strrchr(path, U_FILE_SEP_CHAR);
michael@0 212 if(basename==NULL) {
michael@0 213 return path;
michael@0 214 } else {
michael@0 215 return basename+1;
michael@0 216 }
michael@0 217 }
michael@0 218
michael@0 219 #ifdef UDATA_DEBUG
michael@0 220 static const char *
michael@0 221 packageNameFromPath(const char *path)
michael@0 222 {
michael@0 223 if((path == NULL) || (*path == 0)) {
michael@0 224 return U_ICUDATA_NAME;
michael@0 225 }
michael@0 226
michael@0 227 path = findBasename(path);
michael@0 228
michael@0 229 if((path == NULL) || (*path == 0)) {
michael@0 230 return U_ICUDATA_NAME;
michael@0 231 }
michael@0 232
michael@0 233 return path;
michael@0 234 }
michael@0 235 #endif
michael@0 236
michael@0 237 /*----------------------------------------------------------------------*
michael@0 238 * *
michael@0 239 * Cache for common data *
michael@0 240 * Functions for looking up or adding entries to a cache of *
michael@0 241 * data that has been previously opened. Avoids a potentially *
michael@0 242 * expensive operation of re-opening the data for subsequent *
michael@0 243 * uses. *
michael@0 244 * *
michael@0 245 * Data remains cached for the duration of the process. *
michael@0 246 * *
michael@0 247 *----------------------------------------------------------------------*/
michael@0 248
michael@0 249 typedef struct DataCacheElement {
michael@0 250 char *name;
michael@0 251 UDataMemory *item;
michael@0 252 } DataCacheElement;
michael@0 253
michael@0 254
michael@0 255
michael@0 256 /*
michael@0 257 * Deleter function for DataCacheElements.
michael@0 258 * udata cleanup function closes the hash table; hash table in turn calls back to
michael@0 259 * here for each entry.
michael@0 260 */
michael@0 261 static void U_CALLCONV DataCacheElement_deleter(void *pDCEl) {
michael@0 262 DataCacheElement *p = (DataCacheElement *)pDCEl;
michael@0 263 udata_close(p->item); /* unmaps storage */
michael@0 264 uprv_free(p->name); /* delete the hash key string. */
michael@0 265 uprv_free(pDCEl); /* delete 'this' */
michael@0 266 }
michael@0 267
michael@0 268 static void udata_initHashTable() {
michael@0 269 UErrorCode err = U_ZERO_ERROR;
michael@0 270 U_ASSERT(gCommonDataCache == NULL);
michael@0 271 gCommonDataCache = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &err);
michael@0 272 if (U_FAILURE(err)) {
michael@0 273 // TODO: handle errors better.
michael@0 274 gCommonDataCache = NULL;
michael@0 275 }
michael@0 276 if (gCommonDataCache != NULL) {
michael@0 277 uhash_setValueDeleter(gCommonDataCache, DataCacheElement_deleter);
michael@0 278 ucln_common_registerCleanup(UCLN_COMMON_UDATA, udata_cleanup);
michael@0 279 }
michael@0 280 }
michael@0 281
michael@0 282 /* udata_getCacheHashTable()
michael@0 283 * Get the hash table used to store the data cache entries.
michael@0 284 * Lazy create it if it doesn't yet exist.
michael@0 285 */
michael@0 286 static UHashtable *udata_getHashTable() {
michael@0 287 umtx_initOnce(gCommonDataCacheInitOnce, &udata_initHashTable);
michael@0 288 return gCommonDataCache;
michael@0 289 }
michael@0 290
michael@0 291
michael@0 292
michael@0 293 static UDataMemory *udata_findCachedData(const char *path)
michael@0 294 {
michael@0 295 UHashtable *htable;
michael@0 296 UDataMemory *retVal = NULL;
michael@0 297 DataCacheElement *el;
michael@0 298 const char *baseName;
michael@0 299
michael@0 300 baseName = findBasename(path); /* Cache remembers only the base name, not the full path. */
michael@0 301 htable = udata_getHashTable();
michael@0 302 umtx_lock(NULL);
michael@0 303 el = (DataCacheElement *)uhash_get(htable, baseName);
michael@0 304 umtx_unlock(NULL);
michael@0 305 if (el != NULL) {
michael@0 306 retVal = el->item;
michael@0 307 }
michael@0 308 #ifdef UDATA_DEBUG
michael@0 309 fprintf(stderr, "Cache: [%s] -> %p\n", baseName, retVal);
michael@0 310 #endif
michael@0 311 return retVal;
michael@0 312 }
michael@0 313
michael@0 314
michael@0 315 static UDataMemory *udata_cacheDataItem(const char *path, UDataMemory *item, UErrorCode *pErr) {
michael@0 316 DataCacheElement *newElement;
michael@0 317 const char *baseName;
michael@0 318 int32_t nameLen;
michael@0 319 UHashtable *htable;
michael@0 320 DataCacheElement *oldValue = NULL;
michael@0 321 UErrorCode subErr = U_ZERO_ERROR;
michael@0 322
michael@0 323 if (U_FAILURE(*pErr)) {
michael@0 324 return NULL;
michael@0 325 }
michael@0 326
michael@0 327 /* Create a new DataCacheElement - the thingy we store in the hash table -
michael@0 328 * and copy the supplied path and UDataMemoryItems into it.
michael@0 329 */
michael@0 330 newElement = (DataCacheElement *)uprv_malloc(sizeof(DataCacheElement));
michael@0 331 if (newElement == NULL) {
michael@0 332 *pErr = U_MEMORY_ALLOCATION_ERROR;
michael@0 333 return NULL;
michael@0 334 }
michael@0 335 newElement->item = UDataMemory_createNewInstance(pErr);
michael@0 336 if (U_FAILURE(*pErr)) {
michael@0 337 uprv_free(newElement);
michael@0 338 return NULL;
michael@0 339 }
michael@0 340 UDatamemory_assign(newElement->item, item);
michael@0 341
michael@0 342 baseName = findBasename(path);
michael@0 343 nameLen = (int32_t)uprv_strlen(baseName);
michael@0 344 newElement->name = (char *)uprv_malloc(nameLen+1);
michael@0 345 if (newElement->name == NULL) {
michael@0 346 *pErr = U_MEMORY_ALLOCATION_ERROR;
michael@0 347 uprv_free(newElement->item);
michael@0 348 uprv_free(newElement);
michael@0 349 return NULL;
michael@0 350 }
michael@0 351 uprv_strcpy(newElement->name, baseName);
michael@0 352
michael@0 353 /* Stick the new DataCacheElement into the hash table.
michael@0 354 */
michael@0 355 htable = udata_getHashTable();
michael@0 356 umtx_lock(NULL);
michael@0 357 oldValue = (DataCacheElement *)uhash_get(htable, path);
michael@0 358 if (oldValue != NULL) {
michael@0 359 subErr = U_USING_DEFAULT_WARNING;
michael@0 360 }
michael@0 361 else {
michael@0 362 uhash_put(
michael@0 363 htable,
michael@0 364 newElement->name, /* Key */
michael@0 365 newElement, /* Value */
michael@0 366 &subErr);
michael@0 367 }
michael@0 368 umtx_unlock(NULL);
michael@0 369
michael@0 370 #ifdef UDATA_DEBUG
michael@0 371 fprintf(stderr, "Cache: [%s] <<< %p : %s. vFunc=%p\n", newElement->name,
michael@0 372 newElement->item, u_errorName(subErr), newElement->item->vFuncs);
michael@0 373 #endif
michael@0 374
michael@0 375 if (subErr == U_USING_DEFAULT_WARNING || U_FAILURE(subErr)) {
michael@0 376 *pErr = subErr; /* copy sub err unto fillin ONLY if something happens. */
michael@0 377 uprv_free(newElement->name);
michael@0 378 uprv_free(newElement->item);
michael@0 379 uprv_free(newElement);
michael@0 380 return oldValue ? oldValue->item : NULL;
michael@0 381 }
michael@0 382
michael@0 383 return newElement->item;
michael@0 384 }
michael@0 385
michael@0 386 /*----------------------------------------------------------------------*==============
michael@0 387 * *
michael@0 388 * Path management. Could be shared with other tools/etc if need be *
michael@0 389 * later on. *
michael@0 390 * *
michael@0 391 *----------------------------------------------------------------------*/
michael@0 392
michael@0 393 #define U_DATA_PATHITER_BUFSIZ 128 /* Size of local buffer for paths */
michael@0 394 /* Overflow causes malloc of larger buf */
michael@0 395
michael@0 396 U_NAMESPACE_BEGIN
michael@0 397
michael@0 398 class UDataPathIterator
michael@0 399 {
michael@0 400 public:
michael@0 401 UDataPathIterator(const char *path, const char *pkg,
michael@0 402 const char *item, const char *suffix, UBool doCheckLastFour,
michael@0 403 UErrorCode *pErrorCode);
michael@0 404 const char *next(UErrorCode *pErrorCode);
michael@0 405
michael@0 406 private:
michael@0 407 const char *path; /* working path (u_icudata_Dir) */
michael@0 408 const char *nextPath; /* path following this one */
michael@0 409 const char *basename; /* item's basename (icudt22e_mt.res)*/
michael@0 410 const char *suffix; /* item suffix (can be null) */
michael@0 411
michael@0 412 uint32_t basenameLen; /* length of basename */
michael@0 413
michael@0 414 CharString itemPath; /* path passed in with item name */
michael@0 415 CharString pathBuffer; /* output path for this it'ion */
michael@0 416 CharString packageStub; /* example: "/icudt28b". Will ignore that leaf in set paths. */
michael@0 417
michael@0 418 UBool checkLastFour; /* if TRUE then allow paths such as '/foo/myapp.dat'
michael@0 419 * to match, checks last 4 chars of suffix with
michael@0 420 * last 4 of path, then previous chars. */
michael@0 421 };
michael@0 422
michael@0 423 /**
michael@0 424 * @param iter The iterator to be initialized. Its current state does not matter.
michael@0 425 * @param path The full pathname to be iterated over. If NULL, defaults to U_ICUDATA_NAME
michael@0 426 * @param pkg Package which is being searched for, ex "icudt28l". Will ignore leave directories such as /icudt28l
michael@0 427 * @param item Item to be searched for. Can include full path, such as /a/b/foo.dat
michael@0 428 * @param suffix Optional item suffix, if not-null (ex. ".dat") then 'path' can contain 'item' explicitly.
michael@0 429 * Ex: 'stuff.dat' would be found in '/a/foo:/tmp/stuff.dat:/bar/baz' as item #2.
michael@0 430 * '/blarg/stuff.dat' would also be found.
michael@0 431 */
michael@0 432 UDataPathIterator::UDataPathIterator(const char *inPath, const char *pkg,
michael@0 433 const char *item, const char *inSuffix, UBool doCheckLastFour,
michael@0 434 UErrorCode *pErrorCode)
michael@0 435 {
michael@0 436 #ifdef UDATA_DEBUG
michael@0 437 fprintf(stderr, "SUFFIX1=%s PATH=%s\n", inSuffix, inPath);
michael@0 438 #endif
michael@0 439 /** Path **/
michael@0 440 if(inPath == NULL) {
michael@0 441 path = u_getDataDirectory();
michael@0 442 } else {
michael@0 443 path = inPath;
michael@0 444 }
michael@0 445
michael@0 446 /** Package **/
michael@0 447 if(pkg != NULL) {
michael@0 448 packageStub.append(U_FILE_SEP_CHAR, *pErrorCode).append(pkg, *pErrorCode);
michael@0 449 #ifdef UDATA_DEBUG
michael@0 450 fprintf(stderr, "STUB=%s [%d]\n", packageStub.data(), packageStub.length());
michael@0 451 #endif
michael@0 452 }
michael@0 453
michael@0 454 /** Item **/
michael@0 455 basename = findBasename(item);
michael@0 456 basenameLen = (int32_t)uprv_strlen(basename);
michael@0 457
michael@0 458 /** Item path **/
michael@0 459 if(basename == item) {
michael@0 460 nextPath = path;
michael@0 461 } else {
michael@0 462 itemPath.append(item, (int32_t)(basename-item), *pErrorCode);
michael@0 463 nextPath = itemPath.data();
michael@0 464 }
michael@0 465 #ifdef UDATA_DEBUG
michael@0 466 fprintf(stderr, "SUFFIX=%s [%p]\n", inSuffix, inSuffix);
michael@0 467 #endif
michael@0 468
michael@0 469 /** Suffix **/
michael@0 470 if(inSuffix != NULL) {
michael@0 471 suffix = inSuffix;
michael@0 472 } else {
michael@0 473 suffix = "";
michael@0 474 }
michael@0 475
michael@0 476 checkLastFour = doCheckLastFour;
michael@0 477
michael@0 478 /* pathBuffer will hold the output path strings returned by this iterator */
michael@0 479
michael@0 480 #ifdef UDATA_DEBUG
michael@0 481 fprintf(stderr, "%p: init %s -> [path=%s], [base=%s], [suff=%s], [itempath=%s], [nextpath=%s], [checklast4=%s]\n",
michael@0 482 iter,
michael@0 483 item,
michael@0 484 path,
michael@0 485 basename,
michael@0 486 suffix,
michael@0 487 itemPath.data(),
michael@0 488 nextPath,
michael@0 489 checkLastFour?"TRUE":"false");
michael@0 490 #endif
michael@0 491 }
michael@0 492
michael@0 493 /**
michael@0 494 * Get the next path on the list.
michael@0 495 *
michael@0 496 * @param iter The Iter to be used
michael@0 497 * @param len If set, pointer to the length of the returned path, for convenience.
michael@0 498 * @return Pointer to the next path segment, or NULL if there are no more.
michael@0 499 */
michael@0 500 const char *UDataPathIterator::next(UErrorCode *pErrorCode)
michael@0 501 {
michael@0 502 if(U_FAILURE(*pErrorCode)) {
michael@0 503 return NULL;
michael@0 504 }
michael@0 505
michael@0 506 const char *currentPath = NULL;
michael@0 507 int32_t pathLen = 0;
michael@0 508 const char *pathBasename;
michael@0 509
michael@0 510 do
michael@0 511 {
michael@0 512 if( nextPath == NULL ) {
michael@0 513 break;
michael@0 514 }
michael@0 515 currentPath = nextPath;
michael@0 516
michael@0 517 if(nextPath == itemPath.data()) { /* we were processing item's path. */
michael@0 518 nextPath = path; /* start with regular path next tm. */
michael@0 519 pathLen = (int32_t)uprv_strlen(currentPath);
michael@0 520 } else {
michael@0 521 /* fix up next for next time */
michael@0 522 nextPath = uprv_strchr(currentPath, U_PATH_SEP_CHAR);
michael@0 523 if(nextPath == NULL) {
michael@0 524 /* segment: entire path */
michael@0 525 pathLen = (int32_t)uprv_strlen(currentPath);
michael@0 526 } else {
michael@0 527 /* segment: until next segment */
michael@0 528 pathLen = (int32_t)(nextPath - currentPath);
michael@0 529 /* skip divider */
michael@0 530 nextPath ++;
michael@0 531 }
michael@0 532 }
michael@0 533
michael@0 534 if(pathLen == 0) {
michael@0 535 continue;
michael@0 536 }
michael@0 537
michael@0 538 #ifdef UDATA_DEBUG
michael@0 539 fprintf(stderr, "rest of path (IDD) = %s\n", currentPath);
michael@0 540 fprintf(stderr, " ");
michael@0 541 {
michael@0 542 uint32_t qqq;
michael@0 543 for(qqq=0;qqq<pathLen;qqq++)
michael@0 544 {
michael@0 545 fprintf(stderr, " ");
michael@0 546 }
michael@0 547
michael@0 548 fprintf(stderr, "^\n");
michael@0 549 }
michael@0 550 #endif
michael@0 551 pathBuffer.clear().append(currentPath, pathLen, *pErrorCode);
michael@0 552
michael@0 553 /* check for .dat files */
michael@0 554 pathBasename = findBasename(pathBuffer.data());
michael@0 555
michael@0 556 if(checkLastFour == TRUE &&
michael@0 557 (pathLen>=4) &&
michael@0 558 uprv_strncmp(pathBuffer.data() +(pathLen-4), suffix, 4)==0 && /* suffix matches */
michael@0 559 uprv_strncmp(findBasename(pathBuffer.data()), basename, basenameLen)==0 && /* base matches */
michael@0 560 uprv_strlen(pathBasename)==(basenameLen+4)) { /* base+suffix = full len */
michael@0 561
michael@0 562 #ifdef UDATA_DEBUG
michael@0 563 fprintf(stderr, "Have %s file on the path: %s\n", suffix, pathBuffer.data());
michael@0 564 #endif
michael@0 565 /* do nothing */
michael@0 566 }
michael@0 567 else
michael@0 568 { /* regular dir path */
michael@0 569 if(pathBuffer[pathLen-1] != U_FILE_SEP_CHAR) {
michael@0 570 if((pathLen>=4) &&
michael@0 571 uprv_strncmp(pathBuffer.data()+(pathLen-4), ".dat", 4) == 0)
michael@0 572 {
michael@0 573 #ifdef UDATA_DEBUG
michael@0 574 fprintf(stderr, "skipping non-directory .dat file %s\n", pathBuffer.data());
michael@0 575 #endif
michael@0 576 continue;
michael@0 577 }
michael@0 578
michael@0 579 /* Check if it is a directory with the same name as our package */
michael@0 580 if(!packageStub.isEmpty() &&
michael@0 581 (pathLen > packageStub.length()) &&
michael@0 582 !uprv_strcmp(pathBuffer.data() + pathLen - packageStub.length(), packageStub.data())) {
michael@0 583 #ifdef UDATA_DEBUG
michael@0 584 fprintf(stderr, "Found stub %s (will add package %s of len %d)\n", packageStub.data(), basename, basenameLen);
michael@0 585 #endif
michael@0 586 pathBuffer.truncate(pathLen - packageStub.length());
michael@0 587 }
michael@0 588 pathBuffer.append(U_FILE_SEP_CHAR, *pErrorCode);
michael@0 589 }
michael@0 590
michael@0 591 /* + basename */
michael@0 592 pathBuffer.append(packageStub.data()+1, packageStub.length()-1, *pErrorCode);
michael@0 593
michael@0 594 if(*suffix) /* tack on suffix */
michael@0 595 {
michael@0 596 pathBuffer.append(suffix, *pErrorCode);
michael@0 597 }
michael@0 598 }
michael@0 599
michael@0 600 #ifdef UDATA_DEBUG
michael@0 601 fprintf(stderr, " --> %s\n", pathBuffer.data());
michael@0 602 #endif
michael@0 603
michael@0 604 return pathBuffer.data();
michael@0 605
michael@0 606 } while(path);
michael@0 607
michael@0 608 /* fell way off the end */
michael@0 609 return NULL;
michael@0 610 }
michael@0 611
michael@0 612 U_NAMESPACE_END
michael@0 613
michael@0 614 /* ==================================================================================*/
michael@0 615
michael@0 616
michael@0 617 /*----------------------------------------------------------------------*
michael@0 618 * *
michael@0 619 * Add a static reference to the common data library *
michael@0 620 * Unless overridden by an explicit udata_setCommonData, this will be *
michael@0 621 * our common data. *
michael@0 622 * *
michael@0 623 *----------------------------------------------------------------------*/
michael@0 624 extern "C" const DataHeader U_DATA_API U_ICUDATA_ENTRY_POINT;
michael@0 625
michael@0 626 /*
michael@0 627 * This would be a good place for weak-linkage declarations of
michael@0 628 * partial-data-library access functions where each returns a pointer
michael@0 629 * to its data package, if it is linked in.
michael@0 630 */
michael@0 631 /*
michael@0 632 extern const void *uprv_getICUData_collation(void) ATTRIBUTE_WEAK;
michael@0 633 extern const void *uprv_getICUData_conversion(void) ATTRIBUTE_WEAK;
michael@0 634 */
michael@0 635
michael@0 636 /*----------------------------------------------------------------------*
michael@0 637 * *
michael@0 638 * openCommonData Attempt to open a common format (.dat) file *
michael@0 639 * Map it into memory (if it's not there already) *
michael@0 640 * and return a UDataMemory object for it. *
michael@0 641 * *
michael@0 642 * If the requested data is already open and cached *
michael@0 643 * just return the cached UDataMem object. *
michael@0 644 * *
michael@0 645 *----------------------------------------------------------------------*/
michael@0 646 static UDataMemory *
michael@0 647 openCommonData(const char *path, /* Path from OpenChoice? */
michael@0 648 int32_t commonDataIndex, /* ICU Data (index >= 0) if path == NULL */
michael@0 649 UErrorCode *pErrorCode)
michael@0 650 {
michael@0 651 UDataMemory tData;
michael@0 652 const char *pathBuffer;
michael@0 653 const char *inBasename;
michael@0 654
michael@0 655 if (U_FAILURE(*pErrorCode)) {
michael@0 656 return NULL;
michael@0 657 }
michael@0 658
michael@0 659 UDataMemory_init(&tData);
michael@0 660
michael@0 661 /* ??????? TODO revisit this */
michael@0 662 if (commonDataIndex >= 0) {
michael@0 663 /* "mini-cache" for common ICU data */
michael@0 664 if(commonDataIndex >= LENGTHOF(gCommonICUDataArray)) {
michael@0 665 return NULL;
michael@0 666 }
michael@0 667 if(gCommonICUDataArray[commonDataIndex] == NULL) {
michael@0 668 int32_t i;
michael@0 669 for(i = 0; i < commonDataIndex; ++i) {
michael@0 670 if(gCommonICUDataArray[i]->pHeader == &U_ICUDATA_ENTRY_POINT) {
michael@0 671 /* The linked-in data is already in the list. */
michael@0 672 return NULL;
michael@0 673 }
michael@0 674 }
michael@0 675
michael@0 676 /* Add the linked-in data to the list. */
michael@0 677 /*
michael@0 678 * This is where we would check and call weakly linked partial-data-library
michael@0 679 * access functions.
michael@0 680 */
michael@0 681 /*
michael@0 682 if (uprv_getICUData_collation) {
michael@0 683 setCommonICUDataPointer(uprv_getICUData_collation(), FALSE, pErrorCode);
michael@0 684 }
michael@0 685 if (uprv_getICUData_conversion) {
michael@0 686 setCommonICUDataPointer(uprv_getICUData_conversion(), FALSE, pErrorCode);
michael@0 687 }
michael@0 688 */
michael@0 689 setCommonICUDataPointer(&U_ICUDATA_ENTRY_POINT, FALSE, pErrorCode);
michael@0 690 }
michael@0 691 return gCommonICUDataArray[commonDataIndex];
michael@0 692 }
michael@0 693
michael@0 694
michael@0 695 /* request is NOT for ICU Data. */
michael@0 696
michael@0 697 /* Find the base name portion of the supplied path. */
michael@0 698 /* inBasename will be left pointing somewhere within the original path string. */
michael@0 699 inBasename = findBasename(path);
michael@0 700 #ifdef UDATA_DEBUG
michael@0 701 fprintf(stderr, "inBasename = %s\n", inBasename);
michael@0 702 #endif
michael@0 703
michael@0 704 if(*inBasename==0) {
michael@0 705 /* no basename. This will happen if the original path was a directory name, */
michael@0 706 /* like "a/b/c/". (Fallback to separate files will still work.) */
michael@0 707 #ifdef UDATA_DEBUG
michael@0 708 fprintf(stderr, "ocd: no basename in %s, bailing.\n", path);
michael@0 709 #endif
michael@0 710 *pErrorCode=U_FILE_ACCESS_ERROR;
michael@0 711 return NULL;
michael@0 712 }
michael@0 713
michael@0 714 /* Is the requested common data file already open and cached? */
michael@0 715 /* Note that the cache is keyed by the base name only. The rest of the path, */
michael@0 716 /* if any, is not considered. */
michael@0 717 {
michael@0 718 UDataMemory *dataToReturn = udata_findCachedData(inBasename);
michael@0 719 if (dataToReturn != NULL) {
michael@0 720 return dataToReturn;
michael@0 721 }
michael@0 722 }
michael@0 723
michael@0 724 /* Requested item is not in the cache.
michael@0 725 * Hunt it down, trying all the path locations
michael@0 726 */
michael@0 727
michael@0 728 UDataPathIterator iter(u_getDataDirectory(), inBasename, path, ".dat", TRUE, pErrorCode);
michael@0 729
michael@0 730 while((UDataMemory_isLoaded(&tData)==FALSE) && (pathBuffer = iter.next(pErrorCode)) != NULL)
michael@0 731 {
michael@0 732 #ifdef UDATA_DEBUG
michael@0 733 fprintf(stderr, "ocd: trying path %s - ", pathBuffer);
michael@0 734 #endif
michael@0 735 uprv_mapFile(&tData, pathBuffer);
michael@0 736 #ifdef UDATA_DEBUG
michael@0 737 fprintf(stderr, "%s\n", UDataMemory_isLoaded(&tData)?"LOADED":"not loaded");
michael@0 738 #endif
michael@0 739 }
michael@0 740
michael@0 741 #if defined(OS390_STUBDATA) && defined(OS390BATCH)
michael@0 742 if (!UDataMemory_isLoaded(&tData)) {
michael@0 743 char ourPathBuffer[1024];
michael@0 744 /* One more chance, for extendCommonData() */
michael@0 745 uprv_strncpy(ourPathBuffer, path, 1019);
michael@0 746 ourPathBuffer[1019]=0;
michael@0 747 uprv_strcat(ourPathBuffer, ".dat");
michael@0 748 uprv_mapFile(&tData, ourPathBuffer);
michael@0 749 }
michael@0 750 #endif
michael@0 751
michael@0 752 if (!UDataMemory_isLoaded(&tData)) {
michael@0 753 /* no common data */
michael@0 754 *pErrorCode=U_FILE_ACCESS_ERROR;
michael@0 755 return NULL;
michael@0 756 }
michael@0 757
michael@0 758 /* we have mapped a file, check its header */
michael@0 759 udata_checkCommonData(&tData, pErrorCode);
michael@0 760
michael@0 761
michael@0 762 /* Cache the UDataMemory struct for this .dat file,
michael@0 763 * so we won't need to hunt it down and map it again next time
michael@0 764 * something is needed from it. */
michael@0 765 return udata_cacheDataItem(inBasename, &tData, pErrorCode);
michael@0 766 }
michael@0 767
michael@0 768
michael@0 769 /*----------------------------------------------------------------------*
michael@0 770 * *
michael@0 771 * extendICUData If the full set of ICU data was not loaded at *
michael@0 772 * program startup, load it now. This function will *
michael@0 773 * be called when the lookup of an ICU data item in *
michael@0 774 * the common ICU data fails. *
michael@0 775 * *
michael@0 776 * return true if new data is loaded, false otherwise.*
michael@0 777 * *
michael@0 778 *----------------------------------------------------------------------*/
michael@0 779 static UBool extendICUData(UErrorCode *pErr)
michael@0 780 {
michael@0 781 UDataMemory *pData;
michael@0 782 UDataMemory copyPData;
michael@0 783 UBool didUpdate = FALSE;
michael@0 784
michael@0 785 /*
michael@0 786 * There is a chance for a race condition here.
michael@0 787 * Normally, ICU data is loaded from a DLL or via mmap() and
michael@0 788 * setCommonICUData() will detect if the same address is set twice.
michael@0 789 * If ICU is built with data loading via fread() then the address will
michael@0 790 * be different each time the common data is loaded and we may add
michael@0 791 * multiple copies of the data.
michael@0 792 * In this case, use a mutex to prevent the race.
michael@0 793 * Use a specific mutex to avoid nested locks of the global mutex.
michael@0 794 */
michael@0 795 #if MAP_IMPLEMENTATION==MAP_STDIO
michael@0 796 static UMutex extendICUDataMutex = U_MUTEX_INITIALIZER;
michael@0 797 umtx_lock(&extendICUDataMutex);
michael@0 798 #endif
michael@0 799 if(!gHaveTriedToLoadCommonData) {
michael@0 800 /* See if we can explicitly open a .dat file for the ICUData. */
michael@0 801 pData = openCommonData(
michael@0 802 U_ICUDATA_NAME, /* "icudt20l" , for example. */
michael@0 803 -1, /* Pretend we're not opening ICUData */
michael@0 804 pErr);
michael@0 805
michael@0 806 /* How about if there is no pData, eh... */
michael@0 807
michael@0 808 UDataMemory_init(&copyPData);
michael@0 809 if(pData != NULL) {
michael@0 810 UDatamemory_assign(&copyPData, pData);
michael@0 811 copyPData.map = 0; /* The mapping for this data is owned by the hash table */
michael@0 812 copyPData.mapAddr = 0; /* which will unmap it when ICU is shut down. */
michael@0 813 /* CommonICUData is also unmapped when ICU is shut down.*/
michael@0 814 /* To avoid unmapping the data twice, zero out the map */
michael@0 815 /* fields in the UDataMemory that we're assigning */
michael@0 816 /* to CommonICUData. */
michael@0 817
michael@0 818 didUpdate = /* no longer using this result */
michael@0 819 setCommonICUData(&copyPData,/* The new common data. */
michael@0 820 FALSE, /* No warnings if write didn't happen */
michael@0 821 pErr); /* setCommonICUData honors errors; NOP if error set */
michael@0 822 }
michael@0 823
michael@0 824 gHaveTriedToLoadCommonData = TRUE;
michael@0 825 }
michael@0 826
michael@0 827 didUpdate = findCommonICUDataByName(U_ICUDATA_NAME); /* Return 'true' when a racing writes out the extended */
michael@0 828 /* data after another thread has failed to see it (in openCommonData), so */
michael@0 829 /* extended data can be examined. */
michael@0 830 /* Also handles a race through here before gHaveTriedToLoadCommonData is set. */
michael@0 831
michael@0 832 #if MAP_IMPLEMENTATION==MAP_STDIO
michael@0 833 umtx_unlock(&extendICUDataMutex);
michael@0 834 #endif
michael@0 835 return didUpdate; /* Return true if ICUData pointer was updated. */
michael@0 836 /* (Could potentialy have been done by another thread racing */
michael@0 837 /* us through here, but that's fine, we still return true */
michael@0 838 /* so that current thread will also examine extended data. */
michael@0 839 }
michael@0 840
michael@0 841 /*----------------------------------------------------------------------*
michael@0 842 * *
michael@0 843 * udata_setCommonData *
michael@0 844 * *
michael@0 845 *----------------------------------------------------------------------*/
michael@0 846 U_CAPI void U_EXPORT2
michael@0 847 udata_setCommonData(const void *data, UErrorCode *pErrorCode) {
michael@0 848 UDataMemory dataMemory;
michael@0 849
michael@0 850 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
michael@0 851 return;
michael@0 852 }
michael@0 853
michael@0 854 if(data==NULL) {
michael@0 855 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 856 return;
michael@0 857 }
michael@0 858
michael@0 859 /* set the data pointer and test for validity */
michael@0 860 UDataMemory_init(&dataMemory);
michael@0 861 UDataMemory_setData(&dataMemory, data);
michael@0 862 udata_checkCommonData(&dataMemory, pErrorCode);
michael@0 863 if (U_FAILURE(*pErrorCode)) {return;}
michael@0 864
michael@0 865 /* we have good data */
michael@0 866 /* Set it up as the ICU Common Data. */
michael@0 867 setCommonICUData(&dataMemory, TRUE, pErrorCode);
michael@0 868 }
michael@0 869
michael@0 870 /*---------------------------------------------------------------------------
michael@0 871 *
michael@0 872 * udata_setAppData
michael@0 873 *
michael@0 874 *---------------------------------------------------------------------------- */
michael@0 875 U_CAPI void U_EXPORT2
michael@0 876 udata_setAppData(const char *path, const void *data, UErrorCode *err)
michael@0 877 {
michael@0 878 UDataMemory udm;
michael@0 879
michael@0 880 if(err==NULL || U_FAILURE(*err)) {
michael@0 881 return;
michael@0 882 }
michael@0 883 if(data==NULL) {
michael@0 884 *err=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 885 return;
michael@0 886 }
michael@0 887
michael@0 888 UDataMemory_init(&udm);
michael@0 889 UDataMemory_setData(&udm, data);
michael@0 890 udata_checkCommonData(&udm, err);
michael@0 891 udata_cacheDataItem(path, &udm, err);
michael@0 892 }
michael@0 893
michael@0 894 /*----------------------------------------------------------------------------*
michael@0 895 * *
michael@0 896 * checkDataItem Given a freshly located/loaded data item, either *
michael@0 897 * an entry in a common file or a separately loaded file, *
michael@0 898 * sanity check its header, and see if the data is *
michael@0 899 * acceptable to the app. *
michael@0 900 * If the data is good, create and return a UDataMemory *
michael@0 901 * object that can be returned to the application. *
michael@0 902 * Return NULL on any sort of failure. *
michael@0 903 * *
michael@0 904 *----------------------------------------------------------------------------*/
michael@0 905 static UDataMemory *
michael@0 906 checkDataItem
michael@0 907 (
michael@0 908 const DataHeader *pHeader, /* The data item to be checked. */
michael@0 909 UDataMemoryIsAcceptable *isAcceptable, /* App's call-back function */
michael@0 910 void *context, /* pass-thru param for above. */
michael@0 911 const char *type, /* pass-thru param for above. */
michael@0 912 const char *name, /* pass-thru param for above. */
michael@0 913 UErrorCode *nonFatalErr, /* Error code if this data was not acceptable */
michael@0 914 /* but openChoice should continue with */
michael@0 915 /* trying to get data from fallback path. */
michael@0 916 UErrorCode *fatalErr /* Bad error, caller should return immediately */
michael@0 917 )
michael@0 918 {
michael@0 919 UDataMemory *rDataMem = NULL; /* the new UDataMemory, to be returned. */
michael@0 920
michael@0 921 if (U_FAILURE(*fatalErr)) {
michael@0 922 return NULL;
michael@0 923 }
michael@0 924
michael@0 925 if(pHeader->dataHeader.magic1==0xda &&
michael@0 926 pHeader->dataHeader.magic2==0x27 &&
michael@0 927 (isAcceptable==NULL || isAcceptable(context, type, name, &pHeader->info))
michael@0 928 ) {
michael@0 929 rDataMem=UDataMemory_createNewInstance(fatalErr);
michael@0 930 if (U_FAILURE(*fatalErr)) {
michael@0 931 return NULL;
michael@0 932 }
michael@0 933 rDataMem->pHeader = pHeader;
michael@0 934 } else {
michael@0 935 /* the data is not acceptable, look further */
michael@0 936 /* If we eventually find something good, this errorcode will be */
michael@0 937 /* cleared out. */
michael@0 938 *nonFatalErr=U_INVALID_FORMAT_ERROR;
michael@0 939 }
michael@0 940 return rDataMem;
michael@0 941 }
michael@0 942
michael@0 943 /**
michael@0 944 * @return 0 if not loaded, 1 if loaded or err
michael@0 945 */
michael@0 946 static UDataMemory *doLoadFromIndividualFiles(const char *pkgName,
michael@0 947 const char *dataPath, const char *tocEntryPathSuffix,
michael@0 948 /* following arguments are the same as doOpenChoice itself */
michael@0 949 const char *path, const char *type, const char *name,
michael@0 950 UDataMemoryIsAcceptable *isAcceptable, void *context,
michael@0 951 UErrorCode *subErrorCode,
michael@0 952 UErrorCode *pErrorCode)
michael@0 953 {
michael@0 954 const char *pathBuffer;
michael@0 955 UDataMemory dataMemory;
michael@0 956 UDataMemory *pEntryData;
michael@0 957
michael@0 958 /* look in ind. files: package\nam.typ ========================= */
michael@0 959 /* init path iterator for individual files */
michael@0 960 UDataPathIterator iter(dataPath, pkgName, path, tocEntryPathSuffix, FALSE, pErrorCode);
michael@0 961
michael@0 962 while((pathBuffer = iter.next(pErrorCode)))
michael@0 963 {
michael@0 964 #ifdef UDATA_DEBUG
michael@0 965 fprintf(stderr, "UDATA: trying individual file %s\n", pathBuffer);
michael@0 966 #endif
michael@0 967 if(uprv_mapFile(&dataMemory, pathBuffer))
michael@0 968 {
michael@0 969 pEntryData = checkDataItem(dataMemory.pHeader, isAcceptable, context, type, name, subErrorCode, pErrorCode);
michael@0 970 if (pEntryData != NULL) {
michael@0 971 /* Data is good.
michael@0 972 * Hand off ownership of the backing memory to the user's UDataMemory.
michael@0 973 * and return it. */
michael@0 974 pEntryData->mapAddr = dataMemory.mapAddr;
michael@0 975 pEntryData->map = dataMemory.map;
michael@0 976
michael@0 977 #ifdef UDATA_DEBUG
michael@0 978 fprintf(stderr, "** Mapped file: %s\n", pathBuffer);
michael@0 979 #endif
michael@0 980 return pEntryData;
michael@0 981 }
michael@0 982
michael@0 983 /* the data is not acceptable, or some error occured. Either way, unmap the memory */
michael@0 984 udata_close(&dataMemory);
michael@0 985
michael@0 986 /* If we had a nasty error, bail out completely. */
michael@0 987 if (U_FAILURE(*pErrorCode)) {
michael@0 988 return NULL;
michael@0 989 }
michael@0 990
michael@0 991 /* Otherwise remember that we found data but didn't like it for some reason */
michael@0 992 *subErrorCode=U_INVALID_FORMAT_ERROR;
michael@0 993 }
michael@0 994 #ifdef UDATA_DEBUG
michael@0 995 fprintf(stderr, "%s\n", UDataMemory_isLoaded(&dataMemory)?"LOADED":"not loaded");
michael@0 996 #endif
michael@0 997 }
michael@0 998 return NULL;
michael@0 999 }
michael@0 1000
michael@0 1001 /**
michael@0 1002 * @return 0 if not loaded, 1 if loaded or err
michael@0 1003 */
michael@0 1004 static UDataMemory *doLoadFromCommonData(UBool isICUData, const char * /*pkgName*/,
michael@0 1005 const char * /*dataPath*/, const char * /*tocEntryPathSuffix*/, const char *tocEntryName,
michael@0 1006 /* following arguments are the same as doOpenChoice itself */
michael@0 1007 const char *path, const char *type, const char *name,
michael@0 1008 UDataMemoryIsAcceptable *isAcceptable, void *context,
michael@0 1009 UErrorCode *subErrorCode,
michael@0 1010 UErrorCode *pErrorCode)
michael@0 1011 {
michael@0 1012 UDataMemory *pEntryData;
michael@0 1013 const DataHeader *pHeader;
michael@0 1014 UDataMemory *pCommonData;
michael@0 1015 int32_t commonDataIndex;
michael@0 1016 UBool checkedExtendedICUData = FALSE;
michael@0 1017 /* try to get common data. The loop is for platforms such as the 390 that do
michael@0 1018 * not initially load the full set of ICU data. If the lookup of an ICU data item
michael@0 1019 * fails, the full (but slower to load) set is loaded, the and the loop repeats,
michael@0 1020 * trying the lookup again. Once the full set of ICU data is loaded, the loop wont
michael@0 1021 * repeat because the full set will be checked the first time through.
michael@0 1022 *
michael@0 1023 * The loop also handles the fallback to a .dat file if the application linked
michael@0 1024 * to the stub data library rather than a real library.
michael@0 1025 */
michael@0 1026 for (commonDataIndex = isICUData ? 0 : -1;;) {
michael@0 1027 pCommonData=openCommonData(path, commonDataIndex, subErrorCode); /** search for pkg **/
michael@0 1028
michael@0 1029 if(U_SUCCESS(*subErrorCode) && pCommonData!=NULL) {
michael@0 1030 int32_t length;
michael@0 1031
michael@0 1032 /* look up the data piece in the common data */
michael@0 1033 pHeader=pCommonData->vFuncs->Lookup(pCommonData, tocEntryName, &length, subErrorCode);
michael@0 1034 #ifdef UDATA_DEBUG
michael@0 1035 fprintf(stderr, "%s: pHeader=%p - %s\n", tocEntryName, pHeader, u_errorName(*subErrorCode));
michael@0 1036 #endif
michael@0 1037
michael@0 1038 if(pHeader!=NULL) {
michael@0 1039 pEntryData = checkDataItem(pHeader, isAcceptable, context, type, name, subErrorCode, pErrorCode);
michael@0 1040 #ifdef UDATA_DEBUG
michael@0 1041 fprintf(stderr, "pEntryData=%p\n", pEntryData);
michael@0 1042 #endif
michael@0 1043 if (U_FAILURE(*pErrorCode)) {
michael@0 1044 return NULL;
michael@0 1045 }
michael@0 1046 if (pEntryData != NULL) {
michael@0 1047 pEntryData->length = length;
michael@0 1048 return pEntryData;
michael@0 1049 }
michael@0 1050 }
michael@0 1051 }
michael@0 1052 /* Data wasn't found. If we were looking for an ICUData item and there is
michael@0 1053 * more data available, load it and try again,
michael@0 1054 * otherwise break out of this loop. */
michael@0 1055 if (!isICUData) {
michael@0 1056 return NULL;
michael@0 1057 } else if (pCommonData != NULL) {
michael@0 1058 ++commonDataIndex; /* try the next data package */
michael@0 1059 } else if ((!checkedExtendedICUData) && extendICUData(subErrorCode)) {
michael@0 1060 checkedExtendedICUData = TRUE;
michael@0 1061 /* try this data package slot again: it changed from NULL to non-NULL */
michael@0 1062 } else {
michael@0 1063 return NULL;
michael@0 1064 }
michael@0 1065 }
michael@0 1066 }
michael@0 1067
michael@0 1068 /*
michael@0 1069 * A note on the ownership of Mapped Memory
michael@0 1070 *
michael@0 1071 * For common format files, ownership resides with the UDataMemory object
michael@0 1072 * that lives in the cache of opened common data. These UDataMemorys are private
michael@0 1073 * to the udata implementation, and are never seen directly by users.
michael@0 1074 *
michael@0 1075 * The UDataMemory objects returned to users will have the address of some desired
michael@0 1076 * data within the mapped region, but they wont have the mapping info itself, and thus
michael@0 1077 * won't cause anything to be removed from memory when they are closed.
michael@0 1078 *
michael@0 1079 * For individual data files, the UDataMemory returned to the user holds the
michael@0 1080 * information necessary to unmap the data on close. If the user independently
michael@0 1081 * opens the same data file twice, two completely independent mappings will be made.
michael@0 1082 * (There is no cache of opened data items from individual files, only a cache of
michael@0 1083 * opened Common Data files, that is, files containing a collection of data items.)
michael@0 1084 *
michael@0 1085 * For common data passed in from the user via udata_setAppData() or
michael@0 1086 * udata_setCommonData(), ownership remains with the user.
michael@0 1087 *
michael@0 1088 * UDataMemory objects themselves, as opposed to the memory they describe,
michael@0 1089 * can be anywhere - heap, stack/local or global.
michael@0 1090 * They have a flag to indicate when they're heap allocated and thus
michael@0 1091 * must be deleted when closed.
michael@0 1092 */
michael@0 1093
michael@0 1094
michael@0 1095 /*----------------------------------------------------------------------------*
michael@0 1096 * *
michael@0 1097 * main data loading functions *
michael@0 1098 * *
michael@0 1099 *----------------------------------------------------------------------------*/
michael@0 1100 static UDataMemory *
michael@0 1101 doOpenChoice(const char *path, const char *type, const char *name,
michael@0 1102 UDataMemoryIsAcceptable *isAcceptable, void *context,
michael@0 1103 UErrorCode *pErrorCode)
michael@0 1104 {
michael@0 1105 UDataMemory *retVal = NULL;
michael@0 1106
michael@0 1107 const char *dataPath;
michael@0 1108
michael@0 1109 int32_t tocEntrySuffixIndex;
michael@0 1110 const char *tocEntryPathSuffix;
michael@0 1111 UErrorCode subErrorCode=U_ZERO_ERROR;
michael@0 1112 const char *treeChar;
michael@0 1113
michael@0 1114 UBool isICUData = FALSE;
michael@0 1115
michael@0 1116
michael@0 1117 /* Is this path ICU data? */
michael@0 1118 if(path == NULL ||
michael@0 1119 !strcmp(path, U_ICUDATA_ALIAS) || /* "ICUDATA" */
michael@0 1120 !uprv_strncmp(path, U_ICUDATA_NAME U_TREE_SEPARATOR_STRING, /* "icudt26e-" */
michael@0 1121 uprv_strlen(U_ICUDATA_NAME U_TREE_SEPARATOR_STRING)) ||
michael@0 1122 !uprv_strncmp(path, U_ICUDATA_ALIAS U_TREE_SEPARATOR_STRING, /* "ICUDATA-" */
michael@0 1123 uprv_strlen(U_ICUDATA_ALIAS U_TREE_SEPARATOR_STRING))) {
michael@0 1124 isICUData = TRUE;
michael@0 1125 }
michael@0 1126
michael@0 1127 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) /* Windows: try "foo\bar" and "foo/bar" */
michael@0 1128 /* remap from alternate path char to the main one */
michael@0 1129 CharString altSepPath;
michael@0 1130 if(path) {
michael@0 1131 if(uprv_strchr(path,U_FILE_ALT_SEP_CHAR) != NULL) {
michael@0 1132 altSepPath.append(path, *pErrorCode);
michael@0 1133 char *p;
michael@0 1134 while((p=uprv_strchr(altSepPath.data(), U_FILE_ALT_SEP_CHAR))) {
michael@0 1135 *p = U_FILE_SEP_CHAR;
michael@0 1136 }
michael@0 1137 #if defined (UDATA_DEBUG)
michael@0 1138 fprintf(stderr, "Changed path from [%s] to [%s]\n", path, altSepPath.s);
michael@0 1139 #endif
michael@0 1140 path = altSepPath.data();
michael@0 1141 }
michael@0 1142 }
michael@0 1143 #endif
michael@0 1144
michael@0 1145 CharString tocEntryName; /* entry name in tree format. ex: 'icudt28b/coll/ar.res' */
michael@0 1146 CharString tocEntryPath; /* entry name in path format. ex: 'icudt28b\\coll\\ar.res' */
michael@0 1147
michael@0 1148 CharString pkgName;
michael@0 1149 CharString treeName;
michael@0 1150
michael@0 1151 /* ======= Set up strings */
michael@0 1152 if(path==NULL) {
michael@0 1153 pkgName.append(U_ICUDATA_NAME, *pErrorCode);
michael@0 1154 } else {
michael@0 1155 const char *pkg;
michael@0 1156 const char *first;
michael@0 1157 pkg = uprv_strrchr(path, U_FILE_SEP_CHAR);
michael@0 1158 first = uprv_strchr(path, U_FILE_SEP_CHAR);
michael@0 1159 if(uprv_pathIsAbsolute(path) || (pkg != first)) { /* more than one slash in the path- not a tree name */
michael@0 1160 /* see if this is an /absolute/path/to/package path */
michael@0 1161 if(pkg) {
michael@0 1162 pkgName.append(pkg+1, *pErrorCode);
michael@0 1163 } else {
michael@0 1164 pkgName.append(path, *pErrorCode);
michael@0 1165 }
michael@0 1166 } else {
michael@0 1167 treeChar = uprv_strchr(path, U_TREE_SEPARATOR);
michael@0 1168 if(treeChar) {
michael@0 1169 treeName.append(treeChar+1, *pErrorCode); /* following '-' */
michael@0 1170 if(isICUData) {
michael@0 1171 pkgName.append(U_ICUDATA_NAME, *pErrorCode);
michael@0 1172 } else {
michael@0 1173 pkgName.append(path, (int32_t)(treeChar-path), *pErrorCode);
michael@0 1174 if (first == NULL) {
michael@0 1175 /*
michael@0 1176 This user data has no path, but there is a tree name.
michael@0 1177 Look up the correct path from the data cache later.
michael@0 1178 */
michael@0 1179 path = pkgName.data();
michael@0 1180 }
michael@0 1181 }
michael@0 1182 } else {
michael@0 1183 if(isICUData) {
michael@0 1184 pkgName.append(U_ICUDATA_NAME, *pErrorCode);
michael@0 1185 } else {
michael@0 1186 pkgName.append(path, *pErrorCode);
michael@0 1187 }
michael@0 1188 }
michael@0 1189 }
michael@0 1190 }
michael@0 1191
michael@0 1192 #ifdef UDATA_DEBUG
michael@0 1193 fprintf(stderr, " P=%s T=%s\n", pkgName.data(), treeName.data());
michael@0 1194 #endif
michael@0 1195
michael@0 1196 /* setting up the entry name and file name
michael@0 1197 * Make up a full name by appending the type to the supplied
michael@0 1198 * name, assuming that a type was supplied.
michael@0 1199 */
michael@0 1200
michael@0 1201 /* prepend the package */
michael@0 1202 tocEntryName.append(pkgName, *pErrorCode);
michael@0 1203 tocEntryPath.append(pkgName, *pErrorCode);
michael@0 1204 tocEntrySuffixIndex = tocEntryName.length();
michael@0 1205
michael@0 1206 if(!treeName.isEmpty()) {
michael@0 1207 tocEntryName.append(U_TREE_ENTRY_SEP_CHAR, *pErrorCode).append(treeName, *pErrorCode);
michael@0 1208 tocEntryPath.append(U_FILE_SEP_CHAR, *pErrorCode).append(treeName, *pErrorCode);
michael@0 1209 }
michael@0 1210
michael@0 1211 tocEntryName.append(U_TREE_ENTRY_SEP_CHAR, *pErrorCode).append(name, *pErrorCode);
michael@0 1212 tocEntryPath.append(U_FILE_SEP_CHAR, *pErrorCode).append(name, *pErrorCode);
michael@0 1213 if(type!=NULL && *type!=0) {
michael@0 1214 tocEntryName.append(".", *pErrorCode).append(type, *pErrorCode);
michael@0 1215 tocEntryPath.append(".", *pErrorCode).append(type, *pErrorCode);
michael@0 1216 }
michael@0 1217 tocEntryPathSuffix = tocEntryPath.data()+tocEntrySuffixIndex; /* suffix starts here */
michael@0 1218
michael@0 1219 #ifdef UDATA_DEBUG
michael@0 1220 fprintf(stderr, " tocEntryName = %s\n", tocEntryName.data());
michael@0 1221 fprintf(stderr, " tocEntryPath = %s\n", tocEntryName.data());
michael@0 1222 #endif
michael@0 1223
michael@0 1224 if(path == NULL) {
michael@0 1225 path = COMMON_DATA_NAME; /* "icudt26e" */
michael@0 1226 }
michael@0 1227
michael@0 1228 /************************ Begin loop looking for ind. files ***************/
michael@0 1229 #ifdef UDATA_DEBUG
michael@0 1230 fprintf(stderr, "IND: inBasename = %s, pkg=%s\n", "(n/a)", packageNameFromPath(path));
michael@0 1231 #endif
michael@0 1232
michael@0 1233 /* End of dealing with a null basename */
michael@0 1234 dataPath = u_getDataDirectory();
michael@0 1235
michael@0 1236 /**** COMMON PACKAGE - only if packages are first. */
michael@0 1237 if(gDataFileAccess == UDATA_PACKAGES_FIRST) {
michael@0 1238 #ifdef UDATA_DEBUG
michael@0 1239 fprintf(stderr, "Trying packages (UDATA_PACKAGES_FIRST)\n");
michael@0 1240 #endif
michael@0 1241 /* #2 */
michael@0 1242 retVal = doLoadFromCommonData(isICUData,
michael@0 1243 pkgName.data(), dataPath, tocEntryPathSuffix, tocEntryName.data(),
michael@0 1244 path, type, name, isAcceptable, context, &subErrorCode, pErrorCode);
michael@0 1245 if((retVal != NULL) || U_FAILURE(*pErrorCode)) {
michael@0 1246 return retVal;
michael@0 1247 }
michael@0 1248 }
michael@0 1249
michael@0 1250 /**** INDIVIDUAL FILES */
michael@0 1251 if((gDataFileAccess==UDATA_PACKAGES_FIRST) ||
michael@0 1252 (gDataFileAccess==UDATA_FILES_FIRST)) {
michael@0 1253 #ifdef UDATA_DEBUG
michael@0 1254 fprintf(stderr, "Trying individual files\n");
michael@0 1255 #endif
michael@0 1256 /* Check to make sure that there is a dataPath to iterate over */
michael@0 1257 if ((dataPath && *dataPath) || !isICUData) {
michael@0 1258 retVal = doLoadFromIndividualFiles(pkgName.data(), dataPath, tocEntryPathSuffix,
michael@0 1259 path, type, name, isAcceptable, context, &subErrorCode, pErrorCode);
michael@0 1260 if((retVal != NULL) || U_FAILURE(*pErrorCode)) {
michael@0 1261 return retVal;
michael@0 1262 }
michael@0 1263 }
michael@0 1264 }
michael@0 1265
michael@0 1266 /**** COMMON PACKAGE */
michael@0 1267 if((gDataFileAccess==UDATA_ONLY_PACKAGES) ||
michael@0 1268 (gDataFileAccess==UDATA_FILES_FIRST)) {
michael@0 1269 #ifdef UDATA_DEBUG
michael@0 1270 fprintf(stderr, "Trying packages (UDATA_ONLY_PACKAGES || UDATA_FILES_FIRST)\n");
michael@0 1271 #endif
michael@0 1272 retVal = doLoadFromCommonData(isICUData,
michael@0 1273 pkgName.data(), dataPath, tocEntryPathSuffix, tocEntryName.data(),
michael@0 1274 path, type, name, isAcceptable, context, &subErrorCode, pErrorCode);
michael@0 1275 if((retVal != NULL) || U_FAILURE(*pErrorCode)) {
michael@0 1276 return retVal;
michael@0 1277 }
michael@0 1278 }
michael@0 1279
michael@0 1280 /* Load from DLL. If we haven't attempted package load, we also haven't had any chance to
michael@0 1281 try a DLL (static or setCommonData/etc) load.
michael@0 1282 If we ever have a "UDATA_ONLY_FILES", add it to the or list here. */
michael@0 1283 if(gDataFileAccess==UDATA_NO_FILES) {
michael@0 1284 #ifdef UDATA_DEBUG
michael@0 1285 fprintf(stderr, "Trying common data (UDATA_NO_FILES)\n");
michael@0 1286 #endif
michael@0 1287 retVal = doLoadFromCommonData(isICUData,
michael@0 1288 pkgName.data(), "", tocEntryPathSuffix, tocEntryName.data(),
michael@0 1289 path, type, name, isAcceptable, context, &subErrorCode, pErrorCode);
michael@0 1290 if((retVal != NULL) || U_FAILURE(*pErrorCode)) {
michael@0 1291 return retVal;
michael@0 1292 }
michael@0 1293 }
michael@0 1294
michael@0 1295 /* data not found */
michael@0 1296 if(U_SUCCESS(*pErrorCode)) {
michael@0 1297 if(U_SUCCESS(subErrorCode)) {
michael@0 1298 /* file not found */
michael@0 1299 *pErrorCode=U_FILE_ACCESS_ERROR;
michael@0 1300 } else {
michael@0 1301 /* entry point not found or rejected */
michael@0 1302 *pErrorCode=subErrorCode;
michael@0 1303 }
michael@0 1304 }
michael@0 1305 return retVal;
michael@0 1306 }
michael@0 1307
michael@0 1308
michael@0 1309
michael@0 1310 /* API ---------------------------------------------------------------------- */
michael@0 1311
michael@0 1312 U_CAPI UDataMemory * U_EXPORT2
michael@0 1313 udata_open(const char *path, const char *type, const char *name,
michael@0 1314 UErrorCode *pErrorCode) {
michael@0 1315 #ifdef UDATA_DEBUG
michael@0 1316 fprintf(stderr, "udata_open(): Opening: %s : %s . %s\n", (path?path:"NULL"), name, type);
michael@0 1317 fflush(stderr);
michael@0 1318 #endif
michael@0 1319
michael@0 1320 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
michael@0 1321 return NULL;
michael@0 1322 } else if(name==NULL || *name==0) {
michael@0 1323 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 1324 return NULL;
michael@0 1325 } else {
michael@0 1326 return doOpenChoice(path, type, name, NULL, NULL, pErrorCode);
michael@0 1327 }
michael@0 1328 }
michael@0 1329
michael@0 1330
michael@0 1331
michael@0 1332 U_CAPI UDataMemory * U_EXPORT2
michael@0 1333 udata_openChoice(const char *path, const char *type, const char *name,
michael@0 1334 UDataMemoryIsAcceptable *isAcceptable, void *context,
michael@0 1335 UErrorCode *pErrorCode) {
michael@0 1336 #ifdef UDATA_DEBUG
michael@0 1337 fprintf(stderr, "udata_openChoice(): Opening: %s : %s . %s\n", (path?path:"NULL"), name, type);
michael@0 1338 #endif
michael@0 1339
michael@0 1340 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
michael@0 1341 return NULL;
michael@0 1342 } else if(name==NULL || *name==0 || isAcceptable==NULL) {
michael@0 1343 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 1344 return NULL;
michael@0 1345 } else {
michael@0 1346 return doOpenChoice(path, type, name, isAcceptable, context, pErrorCode);
michael@0 1347 }
michael@0 1348 }
michael@0 1349
michael@0 1350
michael@0 1351
michael@0 1352 U_CAPI void U_EXPORT2
michael@0 1353 udata_getInfo(UDataMemory *pData, UDataInfo *pInfo) {
michael@0 1354 if(pInfo!=NULL) {
michael@0 1355 if(pData!=NULL && pData->pHeader!=NULL) {
michael@0 1356 const UDataInfo *info=&pData->pHeader->info;
michael@0 1357 uint16_t dataInfoSize=udata_getInfoSize(info);
michael@0 1358 if(pInfo->size>dataInfoSize) {
michael@0 1359 pInfo->size=dataInfoSize;
michael@0 1360 }
michael@0 1361 uprv_memcpy((uint16_t *)pInfo+1, (const uint16_t *)info+1, pInfo->size-2);
michael@0 1362 if(info->isBigEndian!=U_IS_BIG_ENDIAN) {
michael@0 1363 /* opposite endianness */
michael@0 1364 uint16_t x=info->reservedWord;
michael@0 1365 pInfo->reservedWord=(uint16_t)((x<<8)|(x>>8));
michael@0 1366 }
michael@0 1367 } else {
michael@0 1368 pInfo->size=0;
michael@0 1369 }
michael@0 1370 }
michael@0 1371 }
michael@0 1372
michael@0 1373
michael@0 1374 U_CAPI void U_EXPORT2 udata_setFileAccess(UDataFileAccess access, UErrorCode * /*status*/)
michael@0 1375 {
michael@0 1376 gDataFileAccess = access;
michael@0 1377 }

mercurial