michael@0: /*
michael@0: ******************************************************************************
michael@0: *
michael@0: *   Copyright (C) 1997-2013, International Business Machines
michael@0: *   Corporation and others.  All Rights Reserved.
michael@0: *
michael@0: ******************************************************************************
michael@0: *
michael@0: *  FILE NAME : putil.c (previously putil.cpp and ptypes.cpp)
michael@0: *
michael@0: *   Date        Name        Description
michael@0: *   04/14/97    aliu        Creation.
michael@0: *   04/24/97    aliu        Added getDefaultDataDirectory() and
michael@0: *                            getDefaultLocaleID().
michael@0: *   04/28/97    aliu        Rewritten to assume Unix and apply general methods
michael@0: *                            for assumed case.  Non-UNIX platforms must be
michael@0: *                            special-cased.  Rewrote numeric methods dealing
michael@0: *                            with NaN and Infinity to be platform independent
michael@0: *                             over all IEEE 754 platforms.
michael@0: *   05/13/97    aliu        Restored sign of timezone
michael@0: *                            (semantics are hours West of GMT)
michael@0: *   06/16/98    erm         Added IEEE_754 stuff, cleaned up isInfinite, isNan,
michael@0: *                             nextDouble..
michael@0: *   07/22/98    stephen     Added remainder, max, min, trunc
michael@0: *   08/13/98    stephen     Added isNegativeInfinity, isPositiveInfinity
michael@0: *   08/24/98    stephen     Added longBitsFromDouble
michael@0: *   09/08/98    stephen     Minor changes for Mac Port
michael@0: *   03/02/99    stephen     Removed openFile().  Added AS400 support.
michael@0: *                            Fixed EBCDIC tables
michael@0: *   04/15/99    stephen     Converted to C.
michael@0: *   06/28/99    stephen     Removed mutex locking in u_isBigEndian().
michael@0: *   08/04/99    jeffrey R.  Added OS/2 changes
michael@0: *   11/15/99    helena      Integrated S/390 IEEE support.
michael@0: *   04/26/01    Barry N.    OS/400 support for uprv_getDefaultLocaleID
michael@0: *   08/15/01    Steven H.   OS/400 support for uprv_getDefaultCodepage
michael@0: *   01/03/08    Steven L.   Fake Time Support
michael@0: ******************************************************************************
michael@0: */
michael@0: 
michael@0: // Defines _XOPEN_SOURCE for access to POSIX functions.
michael@0: // Must be before any other #includes.
michael@0: #include "uposixdefs.h"
michael@0: 
michael@0: /* include ICU headers */
michael@0: #include "unicode/utypes.h"
michael@0: #include "unicode/putil.h"
michael@0: #include "unicode/ustring.h"
michael@0: #include "putilimp.h"
michael@0: #include "uassert.h"
michael@0: #include "umutex.h"
michael@0: #include "cmemory.h"
michael@0: #include "cstring.h"
michael@0: #include "locmap.h"
michael@0: #include "ucln_cmn.h"
michael@0: 
michael@0: /* Include standard headers. */
michael@0: #include <stdio.h>
michael@0: #include <stdlib.h>
michael@0: #include <string.h>
michael@0: #include <math.h>
michael@0: #include <locale.h>
michael@0: #include <float.h>
michael@0: 
michael@0: #ifndef U_COMMON_IMPLEMENTATION
michael@0: #error U_COMMON_IMPLEMENTATION not set - must be set for all ICU source files in common/ - see http://userguide.icu-project.org/howtouseicu
michael@0: #endif
michael@0: 
michael@0: 
michael@0: /* include system headers */
michael@0: #if U_PLATFORM_USES_ONLY_WIN32_API
michael@0:     /*
michael@0:      * TODO: U_PLATFORM_USES_ONLY_WIN32_API includes MinGW.
michael@0:      * Should Cygwin be included as well (U_PLATFORM_HAS_WIN32_API)
michael@0:      * to use native APIs as much as possible?
michael@0:      */
michael@0: #   define WIN32_LEAN_AND_MEAN
michael@0: #   define VC_EXTRALEAN
michael@0: #   define NOUSER
michael@0: #   define NOSERVICE
michael@0: #   define NOIME
michael@0: #   define NOMCX
michael@0: #   include <windows.h>
michael@0: #   include "wintz.h"
michael@0: #elif U_PLATFORM == U_PF_OS400
michael@0: #   include <float.h>
michael@0: #   include <qusec.h>       /* error code structure */
michael@0: #   include <qusrjobi.h>
michael@0: #   include <qliept.h>      /* EPT_CALL macro  - this include must be after all other "QSYSINCs" */
michael@0: #   include <mih/testptr.h> /* For uprv_maximumPtr */
michael@0: #elif U_PLATFORM == U_PF_CLASSIC_MACOS
michael@0: #   include <Files.h>
michael@0: #   include <IntlResources.h>
michael@0: #   include <Script.h>
michael@0: #   include <Folders.h>
michael@0: #   include <MacTypes.h>
michael@0: #   include <TextUtils.h>
michael@0: #   define ICU_NO_USER_DATA_OVERRIDE 1
michael@0: #elif U_PLATFORM == U_PF_OS390
michael@0: #   include "unicode/ucnv.h"   /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */
michael@0: #elif U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS
michael@0: #   include <limits.h>
michael@0: #   include <unistd.h>
michael@0: #   if U_PLATFORM == U_PF_SOLARIS
michael@0: #       ifndef _XPG4_2
michael@0: #           define _XPG4_2
michael@0: #       endif
michael@0: #   endif
michael@0: #elif U_PLATFORM == U_PF_QNX
michael@0: #   include <sys/neutrino.h>
michael@0: #endif
michael@0: 
michael@0: #if (U_PF_MINGW <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN) && defined(__STRICT_ANSI__)
michael@0: /* tzset isn't defined in strict ANSI on Cygwin and MinGW. */
michael@0: #undef __STRICT_ANSI__
michael@0: #endif
michael@0: 
michael@0: /*
michael@0:  * Cygwin with GCC requires inclusion of time.h after the above disabling strict asci mode statement.
michael@0:  */
michael@0: #include <time.h>
michael@0: 
michael@0: #if !U_PLATFORM_USES_ONLY_WIN32_API
michael@0: #include <sys/time.h>
michael@0: #endif
michael@0: 
michael@0: /*
michael@0:  * Only include langinfo.h if we have a way to get the codeset. If we later
michael@0:  * depend on more feature, we can test on U_HAVE_NL_LANGINFO.
michael@0:  *
michael@0:  */
michael@0: 
michael@0: #if U_HAVE_NL_LANGINFO_CODESET
michael@0: #include <langinfo.h>
michael@0: #endif
michael@0: 
michael@0: /**
michael@0:  * Simple things (presence of functions, etc) should just go in configure.in and be added to
michael@0:  * icucfg.h via autoheader.
michael@0:  */
michael@0: #if U_PLATFORM_IMPLEMENTS_POSIX
michael@0: #   if U_PLATFORM == U_PF_OS400
michael@0: #    define HAVE_DLFCN_H 0
michael@0: #    define HAVE_DLOPEN 0
michael@0: #   else
michael@0: #   ifndef HAVE_DLFCN_H
michael@0: #    define HAVE_DLFCN_H 1
michael@0: #   endif
michael@0: #   ifndef HAVE_DLOPEN
michael@0: #    define HAVE_DLOPEN 1
michael@0: #   endif
michael@0: #   endif
michael@0: #   ifndef HAVE_GETTIMEOFDAY
michael@0: #    define HAVE_GETTIMEOFDAY 1
michael@0: #   endif
michael@0: #else
michael@0: #   define HAVE_DLFCN_H 0
michael@0: #   define HAVE_DLOPEN 0
michael@0: #   define HAVE_GETTIMEOFDAY 0
michael@0: #endif
michael@0: 
michael@0: #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
michael@0: 
michael@0: /* Define the extension for data files, again... */
michael@0: #define DATA_TYPE "dat"
michael@0: 
michael@0: /* Leave this copyright notice here! */
michael@0: static const char copyright[] = U_COPYRIGHT_STRING;
michael@0: 
michael@0: /* floating point implementations ------------------------------------------- */
michael@0: 
michael@0: /* We return QNAN rather than SNAN*/
michael@0: #define SIGN 0x80000000U
michael@0: 
michael@0: /* Make it easy to define certain types of constants */
michael@0: typedef union {
michael@0:     int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */
michael@0:     double d64;
michael@0: } BitPatternConversion;
michael@0: static const BitPatternConversion gNan = { (int64_t) INT64_C(0x7FF8000000000000) };
michael@0: static const BitPatternConversion gInf = { (int64_t) INT64_C(0x7FF0000000000000) };
michael@0: 
michael@0: /*---------------------------------------------------------------------------
michael@0:   Platform utilities
michael@0:   Our general strategy is to assume we're on a POSIX platform.  Platforms which
michael@0:   are non-POSIX must declare themselves so.  The default POSIX implementation
michael@0:   will sometimes work for non-POSIX platforms as well (e.g., the NaN-related
michael@0:   functions).
michael@0:   ---------------------------------------------------------------------------*/
michael@0: 
michael@0: #if U_PLATFORM_USES_ONLY_WIN32_API || U_PLATFORM == U_PF_CLASSIC_MACOS || U_PLATFORM == U_PF_OS400
michael@0: #   undef U_POSIX_LOCALE
michael@0: #else
michael@0: #   define U_POSIX_LOCALE    1
michael@0: #endif
michael@0: 
michael@0: /*
michael@0:     WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble
michael@0:     can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2).
michael@0: */
michael@0: #if !IEEE_754
michael@0: static char*
michael@0: u_topNBytesOfDouble(double* d, int n)
michael@0: {
michael@0: #if U_IS_BIG_ENDIAN
michael@0:     return (char*)d;
michael@0: #else
michael@0:     return (char*)(d + 1) - n;
michael@0: #endif
michael@0: }
michael@0: 
michael@0: static char*
michael@0: u_bottomNBytesOfDouble(double* d, int n)
michael@0: {
michael@0: #if U_IS_BIG_ENDIAN
michael@0:     return (char*)(d + 1) - n;
michael@0: #else
michael@0:     return (char*)d;
michael@0: #endif
michael@0: }
michael@0: #endif   /* !IEEE_754 */
michael@0: 
michael@0: #if IEEE_754
michael@0: static UBool
michael@0: u_signBit(double d) {
michael@0:     uint8_t hiByte;
michael@0: #if U_IS_BIG_ENDIAN
michael@0:     hiByte = *(uint8_t *)&d;
michael@0: #else
michael@0:     hiByte = *(((uint8_t *)&d) + sizeof(double) - 1);
michael@0: #endif
michael@0:     return (hiByte & 0x80) != 0;
michael@0: }
michael@0: #endif
michael@0: 
michael@0: 
michael@0: 
michael@0: #if defined (U_DEBUG_FAKETIME)
michael@0: /* Override the clock to test things without having to move the system clock.
michael@0:  * Assumes POSIX gettimeofday() will function
michael@0:  */
michael@0: UDate fakeClock_t0 = 0; /** Time to start the clock from **/
michael@0: UDate fakeClock_dt = 0; /** Offset (fake time - real time) **/
michael@0: UBool fakeClock_set = FALSE; /** True if fake clock has spun up **/
michael@0: static UMutex fakeClockMutex = U_MUTEX_INTIALIZER;
michael@0: 
michael@0: static UDate getUTCtime_real() {
michael@0:     struct timeval posixTime;
michael@0:     gettimeofday(&posixTime, NULL);
michael@0:     return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
michael@0: }
michael@0: 
michael@0: static UDate getUTCtime_fake() {
michael@0:     umtx_lock(&fakeClockMutex);
michael@0:     if(!fakeClock_set) {
michael@0:         UDate real = getUTCtime_real();
michael@0:         const char *fake_start = getenv("U_FAKETIME_START");
michael@0:         if((fake_start!=NULL) && (fake_start[0]!=0)) {
michael@0:             sscanf(fake_start,"%lf",&fakeClock_t0);
michael@0:             fakeClock_dt = fakeClock_t0 - real;
michael@0:             fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, so the ICU clock will start at a preset value\n"
michael@0:                     "env variable U_FAKETIME_START=%.0f (%s) for an offset of %.0f ms from the current time %.0f\n",
michael@0:                     fakeClock_t0, fake_start, fakeClock_dt, real);
michael@0:         } else {
michael@0:           fakeClock_dt = 0;
michael@0:             fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, but U_FAKETIME_START was not set.\n"
michael@0:                     "Set U_FAKETIME_START to the number of milliseconds since 1/1/1970 to set the ICU clock.\n");
michael@0:         }
michael@0:         fakeClock_set = TRUE;
michael@0:     }
michael@0:     umtx_unlock(&fakeClockMutex);
michael@0: 
michael@0:     return getUTCtime_real() + fakeClock_dt;
michael@0: }
michael@0: #endif
michael@0: 
michael@0: #if U_PLATFORM_USES_ONLY_WIN32_API
michael@0: typedef union {
michael@0:     int64_t int64;
michael@0:     FILETIME fileTime;
michael@0: } FileTimeConversion;   /* This is like a ULARGE_INTEGER */
michael@0: 
michael@0: /* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */
michael@0: #define EPOCH_BIAS  INT64_C(116444736000000000)
michael@0: #define HECTONANOSECOND_PER_MILLISECOND   10000
michael@0: 
michael@0: #endif
michael@0: 
michael@0: /*---------------------------------------------------------------------------
michael@0:   Universal Implementations
michael@0:   These are designed to work on all platforms.  Try these, and if they
michael@0:   don't work on your platform, then special case your platform with new
michael@0:   implementations.
michael@0: ---------------------------------------------------------------------------*/
michael@0: 
michael@0: U_CAPI UDate U_EXPORT2
michael@0: uprv_getUTCtime()
michael@0: {
michael@0: #if defined(U_DEBUG_FAKETIME)
michael@0:     return getUTCtime_fake(); /* Hook for overriding the clock */
michael@0: #else
michael@0:     return uprv_getRawUTCtime();
michael@0: #endif
michael@0: }
michael@0: 
michael@0: /* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/
michael@0: U_CAPI UDate U_EXPORT2
michael@0: uprv_getRawUTCtime()
michael@0: {
michael@0: #if U_PLATFORM == U_PF_CLASSIC_MACOS
michael@0:     time_t t, t1, t2;
michael@0:     struct tm tmrec;
michael@0: 
michael@0:     uprv_memset( &tmrec, 0, sizeof(tmrec) );
michael@0:     tmrec.tm_year = 70;
michael@0:     tmrec.tm_mon = 0;
michael@0:     tmrec.tm_mday = 1;
michael@0:     t1 = mktime(&tmrec);    /* seconds of 1/1/1970*/
michael@0: 
michael@0:     time(&t);
michael@0:     uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
michael@0:     t2 = mktime(&tmrec);    /* seconds of current GMT*/
michael@0:     return (UDate)(t2 - t1) * U_MILLIS_PER_SECOND;         /* GMT (or UTC) in seconds since 1970*/
michael@0: #elif U_PLATFORM_USES_ONLY_WIN32_API
michael@0: 
michael@0:     FileTimeConversion winTime;
michael@0:     GetSystemTimeAsFileTime(&winTime.fileTime);
michael@0:     return (UDate)((winTime.int64 - EPOCH_BIAS) / HECTONANOSECOND_PER_MILLISECOND);
michael@0: #else
michael@0: 
michael@0: #if HAVE_GETTIMEOFDAY
michael@0:     struct timeval posixTime;
michael@0:     gettimeofday(&posixTime, NULL);
michael@0:     return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
michael@0: #else
michael@0:     time_t epochtime;
michael@0:     time(&epochtime);
michael@0:     return (UDate)epochtime * U_MILLIS_PER_SECOND;
michael@0: #endif
michael@0: 
michael@0: #endif
michael@0: }
michael@0: 
michael@0: /*-----------------------------------------------------------------------------
michael@0:   IEEE 754
michael@0:   These methods detect and return NaN and infinity values for doubles
michael@0:   conforming to IEEE 754.  Platforms which support this standard include X86,
michael@0:   Mac 680x0, Mac PowerPC, AIX RS/6000, and most others.
michael@0:   If this doesn't work on your platform, you have non-IEEE floating-point, and
michael@0:   will need to code your own versions.  A naive implementation is to return 0.0
michael@0:   for getNaN and getInfinity, and false for isNaN and isInfinite.
michael@0:   ---------------------------------------------------------------------------*/
michael@0: 
michael@0: U_CAPI UBool U_EXPORT2
michael@0: uprv_isNaN(double number)
michael@0: {
michael@0: #if IEEE_754
michael@0:     BitPatternConversion convertedNumber;
michael@0:     convertedNumber.d64 = number;
michael@0:     /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */
michael@0:     return (UBool)((convertedNumber.i64 & U_INT64_MAX) > gInf.i64);
michael@0: 
michael@0: #elif U_PLATFORM == U_PF_OS390
michael@0:     uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
michael@0:                         sizeof(uint32_t));
michael@0:     uint32_t lowBits  = *(uint32_t*)u_bottomNBytesOfDouble(&number,
michael@0:                         sizeof(uint32_t));
michael@0: 
michael@0:     return ((highBits & 0x7F080000L) == 0x7F080000L) &&
michael@0:       (lowBits == 0x00000000L);
michael@0: 
michael@0: #else
michael@0:     /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
michael@0:     /* you'll need to replace this default implementation with what's correct*/
michael@0:     /* for your platform.*/
michael@0:     return number != number;
michael@0: #endif
michael@0: }
michael@0: 
michael@0: U_CAPI UBool U_EXPORT2
michael@0: uprv_isInfinite(double number)
michael@0: {
michael@0: #if IEEE_754
michael@0:     BitPatternConversion convertedNumber;
michael@0:     convertedNumber.d64 = number;
michael@0:     /* Infinity is exactly 0x7FF0000000000000U. */
michael@0:     return (UBool)((convertedNumber.i64 & U_INT64_MAX) == gInf.i64);
michael@0: #elif U_PLATFORM == U_PF_OS390
michael@0:     uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
michael@0:                         sizeof(uint32_t));
michael@0:     uint32_t lowBits  = *(uint32_t*)u_bottomNBytesOfDouble(&number,
michael@0:                         sizeof(uint32_t));
michael@0: 
michael@0:     return ((highBits  & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L);
michael@0: 
michael@0: #else
michael@0:     /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
michael@0:     /* value, you'll need to replace this default implementation with what's*/
michael@0:     /* correct for your platform.*/
michael@0:     return number == (2.0 * number);
michael@0: #endif
michael@0: }
michael@0: 
michael@0: U_CAPI UBool U_EXPORT2
michael@0: uprv_isPositiveInfinity(double number)
michael@0: {
michael@0: #if IEEE_754 || U_PLATFORM == U_PF_OS390
michael@0:     return (UBool)(number > 0 && uprv_isInfinite(number));
michael@0: #else
michael@0:     return uprv_isInfinite(number);
michael@0: #endif
michael@0: }
michael@0: 
michael@0: U_CAPI UBool U_EXPORT2
michael@0: uprv_isNegativeInfinity(double number)
michael@0: {
michael@0: #if IEEE_754 || U_PLATFORM == U_PF_OS390
michael@0:     return (UBool)(number < 0 && uprv_isInfinite(number));
michael@0: 
michael@0: #else
michael@0:     uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
michael@0:                         sizeof(uint32_t));
michael@0:     return((highBits & SIGN) && uprv_isInfinite(number));
michael@0: 
michael@0: #endif
michael@0: }
michael@0: 
michael@0: U_CAPI double U_EXPORT2
michael@0: uprv_getNaN()
michael@0: {
michael@0: #if IEEE_754 || U_PLATFORM == U_PF_OS390
michael@0:     return gNan.d64;
michael@0: #else
michael@0:     /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
michael@0:     /* you'll need to replace this default implementation with what's correct*/
michael@0:     /* for your platform.*/
michael@0:     return 0.0;
michael@0: #endif
michael@0: }
michael@0: 
michael@0: U_CAPI double U_EXPORT2
michael@0: uprv_getInfinity()
michael@0: {
michael@0: #if IEEE_754 || U_PLATFORM == U_PF_OS390
michael@0:     return gInf.d64;
michael@0: #else
michael@0:     /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
michael@0:     /* value, you'll need to replace this default implementation with what's*/
michael@0:     /* correct for your platform.*/
michael@0:     return 0.0;
michael@0: #endif
michael@0: }
michael@0: 
michael@0: U_CAPI double U_EXPORT2
michael@0: uprv_floor(double x)
michael@0: {
michael@0:     return floor(x);
michael@0: }
michael@0: 
michael@0: U_CAPI double U_EXPORT2
michael@0: uprv_ceil(double x)
michael@0: {
michael@0:     return ceil(x);
michael@0: }
michael@0: 
michael@0: U_CAPI double U_EXPORT2
michael@0: uprv_round(double x)
michael@0: {
michael@0:     return uprv_floor(x + 0.5);
michael@0: }
michael@0: 
michael@0: U_CAPI double U_EXPORT2
michael@0: uprv_fabs(double x)
michael@0: {
michael@0:     return fabs(x);
michael@0: }
michael@0: 
michael@0: U_CAPI double U_EXPORT2
michael@0: uprv_modf(double x, double* y)
michael@0: {
michael@0:     return modf(x, y);
michael@0: }
michael@0: 
michael@0: U_CAPI double U_EXPORT2
michael@0: uprv_fmod(double x, double y)
michael@0: {
michael@0:     return fmod(x, y);
michael@0: }
michael@0: 
michael@0: U_CAPI double U_EXPORT2
michael@0: uprv_pow(double x, double y)
michael@0: {
michael@0:     /* This is declared as "double pow(double x, double y)" */
michael@0:     return pow(x, y);
michael@0: }
michael@0: 
michael@0: U_CAPI double U_EXPORT2
michael@0: uprv_pow10(int32_t x)
michael@0: {
michael@0:     return pow(10.0, (double)x);
michael@0: }
michael@0: 
michael@0: U_CAPI double U_EXPORT2
michael@0: uprv_fmax(double x, double y)
michael@0: {
michael@0: #if IEEE_754
michael@0:     /* first handle NaN*/
michael@0:     if(uprv_isNaN(x) || uprv_isNaN(y))
michael@0:         return uprv_getNaN();
michael@0: 
michael@0:     /* check for -0 and 0*/
michael@0:     if(x == 0.0 && y == 0.0 && u_signBit(x))
michael@0:         return y;
michael@0: 
michael@0: #endif
michael@0: 
michael@0:     /* this should work for all flt point w/o NaN and Inf special cases */
michael@0:     return (x > y ? x : y);
michael@0: }
michael@0: 
michael@0: U_CAPI double U_EXPORT2
michael@0: uprv_fmin(double x, double y)
michael@0: {
michael@0: #if IEEE_754
michael@0:     /* first handle NaN*/
michael@0:     if(uprv_isNaN(x) || uprv_isNaN(y))
michael@0:         return uprv_getNaN();
michael@0: 
michael@0:     /* check for -0 and 0*/
michael@0:     if(x == 0.0 && y == 0.0 && u_signBit(y))
michael@0:         return y;
michael@0: 
michael@0: #endif
michael@0: 
michael@0:     /* this should work for all flt point w/o NaN and Inf special cases */
michael@0:     return (x > y ? y : x);
michael@0: }
michael@0: 
michael@0: /**
michael@0:  * Truncates the given double.
michael@0:  * trunc(3.3) = 3.0, trunc (-3.3) = -3.0
michael@0:  * This is different than calling floor() or ceil():
michael@0:  * floor(3.3) = 3, floor(-3.3) = -4
michael@0:  * ceil(3.3) = 4, ceil(-3.3) = -3
michael@0:  */
michael@0: U_CAPI double U_EXPORT2
michael@0: uprv_trunc(double d)
michael@0: {
michael@0: #if IEEE_754
michael@0:     /* handle error cases*/
michael@0:     if(uprv_isNaN(d))
michael@0:         return uprv_getNaN();
michael@0:     if(uprv_isInfinite(d))
michael@0:         return uprv_getInfinity();
michael@0: 
michael@0:     if(u_signBit(d))    /* Signbit() picks up -0.0;  d<0 does not. */
michael@0:         return ceil(d);
michael@0:     else
michael@0:         return floor(d);
michael@0: 
michael@0: #else
michael@0:     return d >= 0 ? floor(d) : ceil(d);
michael@0: 
michael@0: #endif
michael@0: }
michael@0: 
michael@0: /**
michael@0:  * Return the largest positive number that can be represented by an integer
michael@0:  * type of arbitrary bit length.
michael@0:  */
michael@0: U_CAPI double U_EXPORT2
michael@0: uprv_maxMantissa(void)
michael@0: {
michael@0:     return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0;
michael@0: }
michael@0: 
michael@0: U_CAPI double U_EXPORT2
michael@0: uprv_log(double d)
michael@0: {
michael@0:     return log(d);
michael@0: }
michael@0: 
michael@0: U_CAPI void * U_EXPORT2
michael@0: uprv_maximumPtr(void * base)
michael@0: {
michael@0: #if U_PLATFORM == U_PF_OS400
michael@0:     /*
michael@0:      * With the provided function we should never be out of range of a given segment
michael@0:      * (a traditional/typical segment that is).  Our segments have 5 bytes for the
michael@0:      * id and 3 bytes for the offset.  The key is that the casting takes care of
michael@0:      * only retrieving the offset portion minus x1000.  Hence, the smallest offset
michael@0:      * seen in a program is x001000 and when casted to an int would be 0.
michael@0:      * That's why we can only add 0xffefff.  Otherwise, we would exceed the segment.
michael@0:      *
michael@0:      * Currently, 16MB is the current addressing limitation on i5/OS if the activation is
michael@0:      * non-TERASPACE.  If it is TERASPACE it is 2GB - 4k(header information).
michael@0:      * This function determines the activation based on the pointer that is passed in and
michael@0:      * calculates the appropriate maximum available size for
michael@0:      * each pointer type (TERASPACE and non-TERASPACE)
michael@0:      *
michael@0:      * Unlike other operating systems, the pointer model isn't determined at
michael@0:      * compile time on i5/OS.
michael@0:      */
michael@0:     if ((base != NULL) && (_TESTPTR(base, _C_TERASPACE_CHECK))) {
michael@0:         /* if it is a TERASPACE pointer the max is 2GB - 4k */
michael@0:         return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0x7fffefff)));
michael@0:     }
michael@0:     /* otherwise 16MB since NULL ptr is not checkable or the ptr is not TERASPACE */
michael@0:     return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0xffefff)));
michael@0: 
michael@0: #else
michael@0:     return U_MAX_PTR(base);
michael@0: #endif
michael@0: }
michael@0: 
michael@0: /*---------------------------------------------------------------------------
michael@0:   Platform-specific Implementations
michael@0:   Try these, and if they don't work on your platform, then special case your
michael@0:   platform with new implementations.
michael@0:   ---------------------------------------------------------------------------*/
michael@0: 
michael@0: /* Generic time zone layer -------------------------------------------------- */
michael@0: 
michael@0: /* Time zone utilities */
michael@0: U_CAPI void U_EXPORT2
michael@0: uprv_tzset()
michael@0: {
michael@0: #if defined(U_TZSET)
michael@0:     U_TZSET();
michael@0: #else
michael@0:     /* no initialization*/
michael@0: #endif
michael@0: }
michael@0: 
michael@0: U_CAPI int32_t U_EXPORT2
michael@0: uprv_timezone()
michael@0: {
michael@0: #ifdef U_TIMEZONE
michael@0:     return U_TIMEZONE;
michael@0: #else
michael@0:     time_t t, t1, t2;
michael@0:     struct tm tmrec;
michael@0:     int32_t tdiff = 0;
michael@0: 
michael@0:     time(&t);
michael@0:     uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) );
michael@0: #if U_PLATFORM != U_PF_IPHONE
michael@0:     UBool dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/
michael@0: #endif
michael@0:     t1 = mktime(&tmrec);                 /* local time in seconds*/
michael@0:     uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
michael@0:     t2 = mktime(&tmrec);                 /* GMT (or UTC) in seconds*/
michael@0:     tdiff = t2 - t1;
michael@0: 
michael@0: #if U_PLATFORM != U_PF_IPHONE
michael@0:     /* imitate NT behaviour, which returns same timezone offset to GMT for
michael@0:        winter and summer.
michael@0:        This does not work on all platforms. For instance, on glibc on Linux
michael@0:        and on Mac OS 10.5, tdiff calculated above remains the same
michael@0:        regardless of whether DST is in effect or not. iOS is another
michael@0:        platform where this does not work. Linux + glibc and Mac OS 10.5
michael@0:        have U_TIMEZONE defined so that this code is not reached.
michael@0:     */
michael@0:     if (dst_checked)
michael@0:         tdiff += 3600;
michael@0: #endif
michael@0:     return tdiff;
michael@0: #endif
michael@0: }
michael@0: 
michael@0: /* Note that U_TZNAME does *not* have to be tzname, but if it is,
michael@0:    some platforms need to have it declared here. */
michael@0: 
michael@0: #if defined(U_TZNAME) && (U_PLATFORM == U_PF_IRIX || U_PLATFORM_IS_DARWIN_BASED || (U_PLATFORM == U_PF_CYGWIN && !U_PLATFORM_USES_ONLY_WIN32_API))
michael@0: /* RS6000 and others reject char **tzname.  */
michael@0: extern U_IMPORT char *U_TZNAME[];
michael@0: #endif
michael@0: 
michael@0: #if !UCONFIG_NO_FILE_IO && ((U_PLATFORM_IS_DARWIN_BASED && (U_PLATFORM != U_PF_IPHONE || defined(U_TIMEZONE))) || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS)
michael@0: /* These platforms are likely to use Olson timezone IDs. */
michael@0: #define CHECK_LOCALTIME_LINK 1
michael@0: #if U_PLATFORM_IS_DARWIN_BASED
michael@0: #include <tzfile.h>
michael@0: #define TZZONEINFO      (TZDIR "/")
michael@0: #elif U_PLATFORM == U_PF_SOLARIS
michael@0: #define TZDEFAULT       "/etc/localtime"
michael@0: #define TZZONEINFO      "/usr/share/lib/zoneinfo/"
michael@0: #define TZZONEINFO2     "../usr/share/lib/zoneinfo/"
michael@0: #define TZ_ENV_CHECK    "localtime"
michael@0: #else
michael@0: #define TZDEFAULT       "/etc/localtime"
michael@0: #define TZZONEINFO      "/usr/share/zoneinfo/"
michael@0: #endif
michael@0: #if U_HAVE_DIRENT_H
michael@0: #define TZFILE_SKIP     "posixrules" /* tz file to skip when searching. */
michael@0: /* Some Linux distributions have 'localtime' in /usr/share/zoneinfo
michael@0:    symlinked to /etc/localtime, which makes searchForTZFile return
michael@0:    'localtime' when it's the first match. */
michael@0: #define TZFILE_SKIP2    "localtime"
michael@0: #define SEARCH_TZFILE
michael@0: #include <dirent.h>  /* Needed to search through system timezone files */
michael@0: #endif
michael@0: static char gTimeZoneBuffer[PATH_MAX];
michael@0: static char *gTimeZoneBufferPtr = NULL;
michael@0: #endif
michael@0: 
michael@0: #if !U_PLATFORM_USES_ONLY_WIN32_API
michael@0: #define isNonDigit(ch) (ch < '0' || '9' < ch)
michael@0: static UBool isValidOlsonID(const char *id) {
michael@0:     int32_t idx = 0;
michael@0: 
michael@0:     /* Determine if this is something like Iceland (Olson ID)
michael@0:     or AST4ADT (non-Olson ID) */
michael@0:     while (id[idx] && isNonDigit(id[idx]) && id[idx] != ',') {
michael@0:         idx++;
michael@0:     }
michael@0: 
michael@0:     /* If we went through the whole string, then it might be okay.
michael@0:     The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30",
michael@0:     "GRNLNDST3GRNLNDDT" or similar, so we cannot use it.
michael@0:     The rest of the time it could be an Olson ID. George */
michael@0:     return (UBool)(id[idx] == 0
michael@0:         || uprv_strcmp(id, "PST8PDT") == 0
michael@0:         || uprv_strcmp(id, "MST7MDT") == 0
michael@0:         || uprv_strcmp(id, "CST6CDT") == 0
michael@0:         || uprv_strcmp(id, "EST5EDT") == 0);
michael@0: }
michael@0: 
michael@0: /* On some Unix-like OS, 'posix' subdirectory in
michael@0:    /usr/share/zoneinfo replicates the top-level contents. 'right'
michael@0:    subdirectory has the same set of files, but individual files
michael@0:    are different from those in the top-level directory or 'posix'
michael@0:    because 'right' has files for TAI (Int'l Atomic Time) while 'posix'
michael@0:    has files for UTC.
michael@0:    When the first match for /etc/localtime is in either of them
michael@0:    (usually in posix because 'right' has different file contents),
michael@0:    or TZ environment variable points to one of them, createTimeZone
michael@0:    fails because, say, 'posix/America/New_York' is not an Olson
michael@0:    timezone id ('America/New_York' is). So, we have to skip
michael@0:    'posix/' and 'right/' at the beginning. */
michael@0: static void skipZoneIDPrefix(const char** id) {
michael@0:     if (uprv_strncmp(*id, "posix/", 6) == 0
michael@0:         || uprv_strncmp(*id, "right/", 6) == 0)
michael@0:     {
michael@0:         *id += 6;
michael@0:     }
michael@0: }
michael@0: #endif
michael@0: 
michael@0: #if defined(U_TZNAME) && !U_PLATFORM_USES_ONLY_WIN32_API
michael@0: 
michael@0: #define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600)
michael@0: typedef struct OffsetZoneMapping {
michael@0:     int32_t offsetSeconds;
michael@0:     int32_t daylightType; /* 0=U_DAYLIGHT_NONE, 1=daylight in June-U_DAYLIGHT_JUNE, 2=daylight in December=U_DAYLIGHT_DECEMBER*/
michael@0:     const char *stdID;
michael@0:     const char *dstID;
michael@0:     const char *olsonID;
michael@0: } OffsetZoneMapping;
michael@0: 
michael@0: enum { U_DAYLIGHT_NONE=0,U_DAYLIGHT_JUNE=1,U_DAYLIGHT_DECEMBER=2 };
michael@0: 
michael@0: /*
michael@0: This list tries to disambiguate a set of abbreviated timezone IDs and offsets
michael@0: and maps it to an Olson ID.
michael@0: Before adding anything to this list, take a look at
michael@0: icu/source/tools/tzcode/tz.alias
michael@0: Sometimes no daylight savings (0) is important to define due to aliases.
michael@0: This list can be tested with icu/source/test/compat/tzone.pl
michael@0: More values could be added to daylightType to increase precision.
michael@0: */
michael@0: static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS[] = {
michael@0:     {-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"},
michael@0:     {-43200, 1, "PETT", "PETST", "Asia/Kamchatka"},
michael@0:     {-43200, 2, "NZST", "NZDT", "Pacific/Auckland"},
michael@0:     {-43200, 1, "ANAT", "ANAST", "Asia/Anadyr"},
michael@0:     {-39600, 1, "MAGT", "MAGST", "Asia/Magadan"},
michael@0:     {-37800, 2, "LHST", "LHST", "Australia/Lord_Howe"},
michael@0:     {-36000, 2, "EST", "EST", "Australia/Sydney"},
michael@0:     {-36000, 1, "SAKT", "SAKST", "Asia/Sakhalin"},
michael@0:     {-36000, 1, "VLAT", "VLAST", "Asia/Vladivostok"},
michael@0:     {-34200, 2, "CST", "CST", "Australia/South"},
michael@0:     {-32400, 1, "YAKT", "YAKST", "Asia/Yakutsk"},
michael@0:     {-32400, 1, "CHOT", "CHOST", "Asia/Choibalsan"},
michael@0:     {-31500, 2, "CWST", "CWST", "Australia/Eucla"},
michael@0:     {-28800, 1, "IRKT", "IRKST", "Asia/Irkutsk"},
michael@0:     {-28800, 1, "ULAT", "ULAST", "Asia/Ulaanbaatar"},
michael@0:     {-28800, 2, "WST", "WST", "Australia/West"},
michael@0:     {-25200, 1, "HOVT", "HOVST", "Asia/Hovd"},
michael@0:     {-25200, 1, "KRAT", "KRAST", "Asia/Krasnoyarsk"},
michael@0:     {-21600, 1, "NOVT", "NOVST", "Asia/Novosibirsk"},
michael@0:     {-21600, 1, "OMST", "OMSST", "Asia/Omsk"},
michael@0:     {-18000, 1, "YEKT", "YEKST", "Asia/Yekaterinburg"},
michael@0:     {-14400, 1, "SAMT", "SAMST", "Europe/Samara"},
michael@0:     {-14400, 1, "AMT", "AMST", "Asia/Yerevan"},
michael@0:     {-14400, 1, "AZT", "AZST", "Asia/Baku"},
michael@0:     {-10800, 1, "AST", "ADT", "Asia/Baghdad"},
michael@0:     {-10800, 1, "MSK", "MSD", "Europe/Moscow"},
michael@0:     {-10800, 1, "VOLT", "VOLST", "Europe/Volgograd"},
michael@0:     {-7200, 0, "EET", "CEST", "Africa/Tripoli"},
michael@0:     {-7200, 1, "EET", "EEST", "Europe/Athens"}, /* Conflicts with Africa/Cairo */
michael@0:     {-7200, 1, "IST", "IDT", "Asia/Jerusalem"},
michael@0:     {-3600, 0, "CET", "WEST", "Africa/Algiers"},
michael@0:     {-3600, 2, "WAT", "WAST", "Africa/Windhoek"},
michael@0:     {0, 1, "GMT", "IST", "Europe/Dublin"},
michael@0:     {0, 1, "GMT", "BST", "Europe/London"},
michael@0:     {0, 0, "WET", "WEST", "Africa/Casablanca"},
michael@0:     {0, 0, "WET", "WET", "Africa/El_Aaiun"},
michael@0:     {3600, 1, "AZOT", "AZOST", "Atlantic/Azores"},
michael@0:     {3600, 1, "EGT", "EGST", "America/Scoresbysund"},
michael@0:     {10800, 1, "PMST", "PMDT", "America/Miquelon"},
michael@0:     {10800, 2, "UYT", "UYST", "America/Montevideo"},
michael@0:     {10800, 1, "WGT", "WGST", "America/Godthab"},
michael@0:     {10800, 2, "BRT", "BRST", "Brazil/East"},
michael@0:     {12600, 1, "NST", "NDT", "America/St_Johns"},
michael@0:     {14400, 1, "AST", "ADT", "Canada/Atlantic"},
michael@0:     {14400, 2, "AMT", "AMST", "America/Cuiaba"},
michael@0:     {14400, 2, "CLT", "CLST", "Chile/Continental"},
michael@0:     {14400, 2, "FKT", "FKST", "Atlantic/Stanley"},
michael@0:     {14400, 2, "PYT", "PYST", "America/Asuncion"},
michael@0:     {18000, 1, "CST", "CDT", "America/Havana"},
michael@0:     {18000, 1, "EST", "EDT", "US/Eastern"}, /* Conflicts with America/Grand_Turk */
michael@0:     {21600, 2, "EAST", "EASST", "Chile/EasterIsland"},
michael@0:     {21600, 0, "CST", "MDT", "Canada/Saskatchewan"},
michael@0:     {21600, 0, "CST", "CDT", "America/Guatemala"},
michael@0:     {21600, 1, "CST", "CDT", "US/Central"}, /* Conflicts with Mexico/General */
michael@0:     {25200, 1, "MST", "MDT", "US/Mountain"}, /* Conflicts with Mexico/BajaSur */
michael@0:     {28800, 0, "PST", "PST", "Pacific/Pitcairn"},
michael@0:     {28800, 1, "PST", "PDT", "US/Pacific"}, /* Conflicts with Mexico/BajaNorte */
michael@0:     {32400, 1, "AKST", "AKDT", "US/Alaska"},
michael@0:     {36000, 1, "HAST", "HADT", "US/Aleutian"}
michael@0: };
michael@0: 
michael@0: /*#define DEBUG_TZNAME*/
michael@0: 
michael@0: static const char* remapShortTimeZone(const char *stdID, const char *dstID, int32_t daylightType, int32_t offset)
michael@0: {
michael@0:     int32_t idx;
michael@0: #ifdef DEBUG_TZNAME
michael@0:     fprintf(stderr, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n", getenv("TZ"), stdID, dstID, daylightType, offset);
michael@0: #endif
michael@0:     for (idx = 0; idx < LENGTHOF(OFFSET_ZONE_MAPPINGS); idx++)
michael@0:     {
michael@0:         if (offset == OFFSET_ZONE_MAPPINGS[idx].offsetSeconds
michael@0:             && daylightType == OFFSET_ZONE_MAPPINGS[idx].daylightType
michael@0:             && strcmp(OFFSET_ZONE_MAPPINGS[idx].stdID, stdID) == 0
michael@0:             && strcmp(OFFSET_ZONE_MAPPINGS[idx].dstID, dstID) == 0)
michael@0:         {
michael@0:             return OFFSET_ZONE_MAPPINGS[idx].olsonID;
michael@0:         }
michael@0:     }
michael@0:     return NULL;
michael@0: }
michael@0: #endif
michael@0: 
michael@0: #ifdef SEARCH_TZFILE
michael@0: #define MAX_PATH_SIZE PATH_MAX /* Set the limit for the size of the path. */
michael@0: #define MAX_READ_SIZE 512
michael@0: 
michael@0: typedef struct DefaultTZInfo {
michael@0:     char* defaultTZBuffer;
michael@0:     int64_t defaultTZFileSize;
michael@0:     FILE* defaultTZFilePtr;
michael@0:     UBool defaultTZstatus;
michael@0:     int32_t defaultTZPosition;
michael@0: } DefaultTZInfo;
michael@0: 
michael@0: /*
michael@0:  * This method compares the two files given to see if they are a match.
michael@0:  * It is currently use to compare two TZ files.
michael@0:  */
michael@0: static UBool compareBinaryFiles(const char* defaultTZFileName, const char* TZFileName, DefaultTZInfo* tzInfo) {
michael@0:     FILE* file; 
michael@0:     int64_t sizeFile;
michael@0:     int64_t sizeFileLeft;
michael@0:     int32_t sizeFileRead;
michael@0:     int32_t sizeFileToRead;
michael@0:     char bufferFile[MAX_READ_SIZE];
michael@0:     UBool result = TRUE;
michael@0: 
michael@0:     if (tzInfo->defaultTZFilePtr == NULL) {
michael@0:         tzInfo->defaultTZFilePtr = fopen(defaultTZFileName, "r");
michael@0:     }
michael@0:     file = fopen(TZFileName, "r");
michael@0: 
michael@0:     tzInfo->defaultTZPosition = 0; /* reset position to begin search */
michael@0: 
michael@0:     if (file != NULL && tzInfo->defaultTZFilePtr != NULL) {
michael@0:         /* First check that the file size are equal. */
michael@0:         if (tzInfo->defaultTZFileSize == 0) {
michael@0:             fseek(tzInfo->defaultTZFilePtr, 0, SEEK_END);
michael@0:             tzInfo->defaultTZFileSize = ftell(tzInfo->defaultTZFilePtr);
michael@0:         }
michael@0:         fseek(file, 0, SEEK_END);
michael@0:         sizeFile = ftell(file);
michael@0:         sizeFileLeft = sizeFile;
michael@0: 
michael@0:         if (sizeFile != tzInfo->defaultTZFileSize) {
michael@0:             result = FALSE;
michael@0:         } else {
michael@0:             /* Store the data from the files in seperate buffers and
michael@0:              * compare each byte to determine equality.
michael@0:              */
michael@0:             if (tzInfo->defaultTZBuffer == NULL) {
michael@0:                 rewind(tzInfo->defaultTZFilePtr);
michael@0:                 tzInfo->defaultTZBuffer = (char*)uprv_malloc(sizeof(char) * tzInfo->defaultTZFileSize);
michael@0:                 sizeFileRead = fread(tzInfo->defaultTZBuffer, 1, tzInfo->defaultTZFileSize, tzInfo->defaultTZFilePtr);
michael@0:             }
michael@0:             rewind(file);
michael@0:             while(sizeFileLeft > 0) {
michael@0:                 uprv_memset(bufferFile, 0, MAX_READ_SIZE);
michael@0:                 sizeFileToRead = sizeFileLeft < MAX_READ_SIZE ? sizeFileLeft : MAX_READ_SIZE;
michael@0: 
michael@0:                 sizeFileRead = fread(bufferFile, 1, sizeFileToRead, file);
michael@0:                 if (memcmp(tzInfo->defaultTZBuffer + tzInfo->defaultTZPosition, bufferFile, sizeFileRead) != 0) {
michael@0:                     result = FALSE;
michael@0:                     break;
michael@0:                 }
michael@0:                 sizeFileLeft -= sizeFileRead;
michael@0:                 tzInfo->defaultTZPosition += sizeFileRead;
michael@0:             }
michael@0:         }
michael@0:     } else {
michael@0:         result = FALSE;
michael@0:     }
michael@0: 
michael@0:     if (file != NULL) {
michael@0:         fclose(file);
michael@0:     }
michael@0: 
michael@0:     return result;
michael@0: }
michael@0: /*
michael@0:  * This method recursively traverses the directory given for a matching TZ file and returns the first match.
michael@0:  */
michael@0: /* dirent also lists two entries: "." and ".." that we can safely ignore. */
michael@0: #define SKIP1 "."
michael@0: #define SKIP2 ".."
michael@0: static char SEARCH_TZFILE_RESULT[MAX_PATH_SIZE] = "";
michael@0: static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) {
michael@0:     char curpath[MAX_PATH_SIZE];
michael@0:     DIR* dirp = opendir(path);
michael@0:     DIR* subDirp = NULL;
michael@0:     struct dirent* dirEntry = NULL;
michael@0: 
michael@0:     char* result = NULL;
michael@0:     if (dirp == NULL) {
michael@0:         return result;
michael@0:     }
michael@0: 
michael@0:     /* Save the current path */
michael@0:     uprv_memset(curpath, 0, MAX_PATH_SIZE);
michael@0:     uprv_strcpy(curpath, path);
michael@0: 
michael@0:     /* Check each entry in the directory. */
michael@0:     while((dirEntry = readdir(dirp)) != NULL) {
michael@0:         const char* dirName = dirEntry->d_name;
michael@0:         if (uprv_strcmp(dirName, SKIP1) != 0 && uprv_strcmp(dirName, SKIP2) != 0) {
michael@0:             /* Create a newpath with the new entry to test each entry in the directory. */
michael@0:             char newpath[MAX_PATH_SIZE];
michael@0:             uprv_strcpy(newpath, curpath);
michael@0:             uprv_strcat(newpath, dirName);
michael@0: 
michael@0:             if ((subDirp = opendir(newpath)) != NULL) {
michael@0:                 /* If this new path is a directory, make a recursive call with the newpath. */
michael@0:                 closedir(subDirp);
michael@0:                 uprv_strcat(newpath, "/");
michael@0:                 result = searchForTZFile(newpath, tzInfo);
michael@0:                 /*
michael@0:                  Have to get out here. Otherwise, we'd keep looking
michael@0:                  and return the first match in the top-level directory
michael@0:                  if there's a match in the top-level. If not, this function
michael@0:                  would return NULL and set gTimeZoneBufferPtr to NULL in initDefault().
michael@0:                  It worked without this in most cases because we have a fallback of calling
michael@0:                  localtime_r to figure out the default timezone.
michael@0:                 */
michael@0:                 if (result != NULL)
michael@0:                     break;
michael@0:             } else if (uprv_strcmp(TZFILE_SKIP, dirName) != 0 && uprv_strcmp(TZFILE_SKIP2, dirName) != 0) {
michael@0:                 if(compareBinaryFiles(TZDEFAULT, newpath, tzInfo)) {
michael@0:                     const char* zoneid = newpath + (sizeof(TZZONEINFO)) - 1;
michael@0:                     skipZoneIDPrefix(&zoneid);
michael@0:                     uprv_strcpy(SEARCH_TZFILE_RESULT, zoneid);
michael@0:                     result = SEARCH_TZFILE_RESULT;
michael@0:                     /* Get out after the first one found. */
michael@0:                     break;
michael@0:                 }
michael@0:             }
michael@0:         }
michael@0:     }
michael@0:     closedir(dirp);
michael@0:     return result;
michael@0: }
michael@0: #endif
michael@0: U_CAPI const char* U_EXPORT2
michael@0: uprv_tzname(int n)
michael@0: {
michael@0:     const char *tzid = NULL;
michael@0: #if U_PLATFORM_USES_ONLY_WIN32_API
michael@0:     tzid = uprv_detectWindowsTimeZone();
michael@0: 
michael@0:     if (tzid != NULL) {
michael@0:         return tzid;
michael@0:     }
michael@0: #else
michael@0: 
michael@0: /*#if U_PLATFORM_IS_DARWIN_BASED
michael@0:     int ret;
michael@0: 
michael@0:     tzid = getenv("TZFILE");
michael@0:     if (tzid != NULL) {
michael@0:         return tzid;
michael@0:     }
michael@0: #endif*/
michael@0: 
michael@0: /* This code can be temporarily disabled to test tzname resolution later on. */
michael@0: #ifndef DEBUG_TZNAME
michael@0:     tzid = getenv("TZ");
michael@0:     if (tzid != NULL && isValidOlsonID(tzid)
michael@0: #if U_PLATFORM == U_PF_SOLARIS
michael@0:     /* When TZ equals localtime on Solaris, check the /etc/localtime file. */
michael@0:         && uprv_strcmp(tzid, TZ_ENV_CHECK) != 0
michael@0: #endif
michael@0:     ) {
michael@0:         /* This might be a good Olson ID. */
michael@0:         skipZoneIDPrefix(&tzid);
michael@0:         return tzid;
michael@0:     }
michael@0:     /* else U_TZNAME will give a better result. */
michael@0: #endif
michael@0: 
michael@0: #if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK)
michael@0:     /* Caller must handle threading issues */
michael@0:     if (gTimeZoneBufferPtr == NULL) {
michael@0:         /*
michael@0:         This is a trick to look at the name of the link to get the Olson ID
michael@0:         because the tzfile contents is underspecified.
michael@0:         This isn't guaranteed to work because it may not be a symlink.
michael@0:         */
michael@0:         int32_t ret = (int32_t)readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer));
michael@0:         if (0 < ret) {
michael@0:             int32_t tzZoneInfoLen = uprv_strlen(TZZONEINFO);
michael@0:             gTimeZoneBuffer[ret] = 0;
michael@0:             if (uprv_strncmp(gTimeZoneBuffer, TZZONEINFO, tzZoneInfoLen) == 0
michael@0:                 && isValidOlsonID(gTimeZoneBuffer + tzZoneInfoLen))
michael@0:             {
michael@0:                 return (gTimeZoneBufferPtr = gTimeZoneBuffer + tzZoneInfoLen);
michael@0:             }
michael@0: #if U_PLATFORM == U_PF_SOLARIS
michael@0:             else
michael@0:             {
michael@0:                 tzZoneInfoLen = uprv_strlen(TZZONEINFO2);
michael@0:                 if (uprv_strncmp(gTimeZoneBuffer, TZZONEINFO2, tzZoneInfoLen) == 0
michael@0:                                 && isValidOlsonID(gTimeZoneBuffer + tzZoneInfoLen))
michael@0:                 {
michael@0:                     return (gTimeZoneBufferPtr = gTimeZoneBuffer + tzZoneInfoLen);
michael@0:                 }
michael@0:             }
michael@0: #endif
michael@0:         } else {
michael@0: #if defined(SEARCH_TZFILE)
michael@0:             DefaultTZInfo* tzInfo = (DefaultTZInfo*)uprv_malloc(sizeof(DefaultTZInfo));
michael@0:             if (tzInfo != NULL) {
michael@0:                 tzInfo->defaultTZBuffer = NULL;
michael@0:                 tzInfo->defaultTZFileSize = 0;
michael@0:                 tzInfo->defaultTZFilePtr = NULL;
michael@0:                 tzInfo->defaultTZstatus = FALSE;
michael@0:                 tzInfo->defaultTZPosition = 0;
michael@0: 
michael@0:                 gTimeZoneBufferPtr = searchForTZFile(TZZONEINFO, tzInfo);
michael@0: 
michael@0:                 /* Free previously allocated memory */
michael@0:                 if (tzInfo->defaultTZBuffer != NULL) {
michael@0:                     uprv_free(tzInfo->defaultTZBuffer);
michael@0:                 }
michael@0:                 if (tzInfo->defaultTZFilePtr != NULL) {
michael@0:                     fclose(tzInfo->defaultTZFilePtr);
michael@0:                 }
michael@0:                 uprv_free(tzInfo);
michael@0:             }
michael@0: 
michael@0:             if (gTimeZoneBufferPtr != NULL && isValidOlsonID(gTimeZoneBufferPtr)) {
michael@0:                 return gTimeZoneBufferPtr;
michael@0:             }
michael@0: #endif
michael@0:         }
michael@0:     }
michael@0:     else {
michael@0:         return gTimeZoneBufferPtr;
michael@0:     }
michael@0: #endif
michael@0: #endif
michael@0: 
michael@0: #ifdef U_TZNAME
michael@0: #if U_PLATFORM_USES_ONLY_WIN32_API
michael@0:     /* The return value is free'd in timezone.cpp on Windows because
michael@0:      * the other code path returns a pointer to a heap location. */
michael@0:     return uprv_strdup(U_TZNAME[n]);
michael@0: #else
michael@0:     /*
michael@0:     U_TZNAME is usually a non-unique abbreviation, which isn't normally usable.
michael@0:     So we remap the abbreviation to an olson ID.
michael@0: 
michael@0:     Since Windows exposes a little more timezone information,
michael@0:     we normally don't use this code on Windows because
michael@0:     uprv_detectWindowsTimeZone should have already given the correct answer.
michael@0:     */
michael@0:     {
michael@0:         struct tm juneSol, decemberSol;
michael@0:         int daylightType;
michael@0:         static const time_t juneSolstice=1182478260; /*2007-06-21 18:11 UT*/
michael@0:         static const time_t decemberSolstice=1198332540; /*2007-12-22 06:09 UT*/
michael@0: 
michael@0:         /* This probing will tell us when daylight savings occurs.  */
michael@0:         localtime_r(&juneSolstice, &juneSol);
michael@0:         localtime_r(&decemberSolstice, &decemberSol);
michael@0:         if(decemberSol.tm_isdst > 0) {
michael@0:           daylightType = U_DAYLIGHT_DECEMBER;
michael@0:         } else if(juneSol.tm_isdst > 0) {
michael@0:           daylightType = U_DAYLIGHT_JUNE;
michael@0:         } else {
michael@0:           daylightType = U_DAYLIGHT_NONE;
michael@0:         }
michael@0:         tzid = remapShortTimeZone(U_TZNAME[0], U_TZNAME[1], daylightType, uprv_timezone());
michael@0:         if (tzid != NULL) {
michael@0:             return tzid;
michael@0:         }
michael@0:     }
michael@0:     return U_TZNAME[n];
michael@0: #endif
michael@0: #else
michael@0:     return "";
michael@0: #endif
michael@0: }
michael@0: 
michael@0: /* Get and set the ICU data directory --------------------------------------- */
michael@0: 
michael@0: static char *gDataDirectory = NULL;
michael@0: #if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API
michael@0:  static char *gCorrectedPOSIXLocale = NULL; /* Heap allocated */
michael@0: #endif
michael@0: 
michael@0: static UBool U_CALLCONV putil_cleanup(void)
michael@0: {
michael@0:     if (gDataDirectory && *gDataDirectory) {
michael@0:         uprv_free(gDataDirectory);
michael@0:     }
michael@0:     gDataDirectory = NULL;
michael@0: #if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API
michael@0:     if (gCorrectedPOSIXLocale) {
michael@0:         uprv_free(gCorrectedPOSIXLocale);
michael@0:         gCorrectedPOSIXLocale = NULL;
michael@0:     }
michael@0: #endif
michael@0:     return TRUE;
michael@0: }
michael@0: 
michael@0: /*
michael@0:  * Set the data directory.
michael@0:  *    Make a copy of the passed string, and set the global data dir to point to it.
michael@0:  */
michael@0: U_CAPI void U_EXPORT2
michael@0: u_setDataDirectory(const char *directory) {
michael@0:     char *newDataDir;
michael@0:     int32_t length;
michael@0: 
michael@0:     if(directory==NULL || *directory==0) {
michael@0:         /* A small optimization to prevent the malloc and copy when the
michael@0:         shared library is used, and this is a way to make sure that NULL
michael@0:         is never returned.
michael@0:         */
michael@0:         newDataDir = (char *)"";
michael@0:     }
michael@0:     else {
michael@0:         length=(int32_t)uprv_strlen(directory);
michael@0:         newDataDir = (char *)uprv_malloc(length + 2);
michael@0:         /* Exit out if newDataDir could not be created. */
michael@0:         if (newDataDir == NULL) {
michael@0:             return;
michael@0:         }
michael@0:         uprv_strcpy(newDataDir, directory);
michael@0: 
michael@0: #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
michael@0:         {
michael@0:             char *p;
michael@0:             while(p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) {
michael@0:                 *p = U_FILE_SEP_CHAR;
michael@0:             }
michael@0:         }
michael@0: #endif
michael@0:     }
michael@0: 
michael@0:     if (gDataDirectory && *gDataDirectory) {
michael@0:         uprv_free(gDataDirectory);
michael@0:     }
michael@0:     gDataDirectory = newDataDir;
michael@0:     ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
michael@0: }
michael@0: 
michael@0: U_CAPI UBool U_EXPORT2
michael@0: uprv_pathIsAbsolute(const char *path)
michael@0: {
michael@0:   if(!path || !*path) {
michael@0:     return FALSE;
michael@0:   }
michael@0: 
michael@0:   if(*path == U_FILE_SEP_CHAR) {
michael@0:     return TRUE;
michael@0:   }
michael@0: 
michael@0: #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
michael@0:   if(*path == U_FILE_ALT_SEP_CHAR) {
michael@0:     return TRUE;
michael@0:   }
michael@0: #endif
michael@0: 
michael@0: #if U_PLATFORM_USES_ONLY_WIN32_API
michael@0:   if( (((path[0] >= 'A') && (path[0] <= 'Z')) ||
michael@0:        ((path[0] >= 'a') && (path[0] <= 'z'))) &&
michael@0:       path[1] == ':' ) {
michael@0:     return TRUE;
michael@0:   }
michael@0: #endif
michael@0: 
michael@0:   return FALSE;
michael@0: }
michael@0: 
michael@0: /* Temporary backup setting of ICU_DATA_DIR_PREFIX_ENV_VAR
michael@0:    until some client wrapper makefiles are updated */
michael@0: #if U_PLATFORM_IS_DARWIN_BASED && TARGET_IPHONE_SIMULATOR
michael@0: # if !defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
michael@0: #  define ICU_DATA_DIR_PREFIX_ENV_VAR "IPHONE_SIMULATOR_ROOT"
michael@0: # endif
michael@0: #endif
michael@0: 
michael@0: U_CAPI const char * U_EXPORT2
michael@0: u_getDataDirectory(void) {
michael@0:     const char *path = NULL;
michael@0: #if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
michael@0:     char datadir_path_buffer[PATH_MAX];
michael@0: #endif
michael@0: 
michael@0:     /* if we have the directory, then return it immediately */
michael@0:     if(gDataDirectory) {
michael@0:         return gDataDirectory;
michael@0:     }
michael@0: 
michael@0:     /*
michael@0:     When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to
michael@0:     override ICU's data with the ICU_DATA environment variable. This prevents
michael@0:     problems where multiple custom copies of ICU's specific version of data
michael@0:     are installed on a system. Either the application must define the data
michael@0:     directory with u_setDataDirectory, define ICU_DATA_DIR when compiling
michael@0:     ICU, set the data with udata_setCommonData or trust that all of the
michael@0:     required data is contained in ICU's data library that contains
michael@0:     the entry point defined by U_ICUDATA_ENTRY_POINT.
michael@0: 
michael@0:     There may also be some platforms where environment variables
michael@0:     are not allowed.
michael@0:     */
michael@0: #   if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO
michael@0:     /* First try to get the environment variable */
michael@0:     path=getenv("ICU_DATA");
michael@0: #   endif
michael@0: 
michael@0:     /* ICU_DATA_DIR may be set as a compile option.
michael@0:      * U_ICU_DATA_DEFAULT_DIR is provided and is set by ICU at compile time
michael@0:      * and is used only when data is built in archive mode eliminating the need
michael@0:      * for ICU_DATA_DIR to be set. U_ICU_DATA_DEFAULT_DIR is set to the installation
michael@0:      * directory of the data dat file. Users should use ICU_DATA_DIR if they want to
michael@0:      * set their own path.
michael@0:      */
michael@0: #if defined(ICU_DATA_DIR) || defined(U_ICU_DATA_DEFAULT_DIR)
michael@0:     if(path==NULL || *path==0) {
michael@0: # if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
michael@0:         const char *prefix = getenv(ICU_DATA_DIR_PREFIX_ENV_VAR);
michael@0: # endif
michael@0: # ifdef ICU_DATA_DIR
michael@0:         path=ICU_DATA_DIR;
michael@0: # else
michael@0:         path=U_ICU_DATA_DEFAULT_DIR;
michael@0: # endif
michael@0: # if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
michael@0:         if (prefix != NULL) {
michael@0:             snprintf(datadir_path_buffer, PATH_MAX, "%s%s", prefix, path);
michael@0:             path=datadir_path_buffer;
michael@0:         }
michael@0: # endif
michael@0:     }
michael@0: #endif
michael@0: 
michael@0:     if(path==NULL) {
michael@0:         /* It looks really bad, set it to something. */
michael@0:         path = "";
michael@0:     }
michael@0: 
michael@0:     u_setDataDirectory(path);
michael@0:     return gDataDirectory;
michael@0: }
michael@0: 
michael@0: 
michael@0: 
michael@0: 
michael@0: 
michael@0: /* Macintosh-specific locale information ------------------------------------ */
michael@0: #if U_PLATFORM == U_PF_CLASSIC_MACOS
michael@0: 
michael@0: typedef struct {
michael@0:     int32_t script;
michael@0:     int32_t region;
michael@0:     int32_t lang;
michael@0:     int32_t date_region;
michael@0:     const char* posixID;
michael@0: } mac_lc_rec;
michael@0: 
michael@0: /* Todo: This will be updated with a newer version from www.unicode.org web
michael@0:    page when it's available.*/
michael@0: #define MAC_LC_MAGIC_NUMBER -5
michael@0: #define MAC_LC_INIT_NUMBER -9
michael@0: 
michael@0: static const mac_lc_rec mac_lc_recs[] = {
michael@0:     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 0, "en_US",
michael@0:     /* United States*/
michael@0:     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 1, "fr_FR",
michael@0:     /* France*/
michael@0:     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 2, "en_GB",
michael@0:     /* Great Britain*/
michael@0:     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 3, "de_DE",
michael@0:     /* Germany*/
michael@0:     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 4, "it_IT",
michael@0:     /* Italy*/
michael@0:     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 5, "nl_NL",
michael@0:     /* Metherlands*/
michael@0:     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 6, "fr_BE",
michael@0:     /* French for Belgium or Lxembourg*/
michael@0:     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 7, "sv_SE",
michael@0:     /* Sweden*/
michael@0:     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 9, "da_DK",
michael@0:     /* Denmark*/
michael@0:     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 10, "pt_PT",
michael@0:     /* Portugal*/
michael@0:     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 11, "fr_CA",
michael@0:     /* French Canada*/
michael@0:     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 13, "is_IS",
michael@0:     /* Israel*/
michael@0:     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 14, "ja_JP",
michael@0:     /* Japan*/
michael@0:     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 15, "en_AU",
michael@0:     /* Australia*/
michael@0:     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 16, "ar_AE",
michael@0:     /* the Arabic world (?)*/
michael@0:     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 17, "fi_FI",
michael@0:     /* Finland*/
michael@0:     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 18, "fr_CH",
michael@0:     /* French for Switzerland*/
michael@0:     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 19, "de_CH",
michael@0:     /* German for Switzerland*/
michael@0:     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 20, "el_GR",
michael@0:     /* Greece*/
michael@0:     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 21, "is_IS",
michael@0:     /* Iceland ===*/
michael@0:     /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 22, "",*/
michael@0:     /* Malta ===*/
michael@0:     /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 23, "",*/
michael@0:     /* Cyprus ===*/
michael@0:     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 24, "tr_TR",
michael@0:     /* Turkey ===*/
michael@0:     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 25, "sh_YU",
michael@0:     /* Croatian system for Yugoslavia*/
michael@0:     /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 33, "",*/
michael@0:     /* Hindi system for India*/
michael@0:     /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 34, "",*/
michael@0:     /* Pakistan*/
michael@0:     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 41, "lt_LT",
michael@0:     /* Lithuania*/
michael@0:     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 42, "pl_PL",
michael@0:     /* Poland*/
michael@0:     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 43, "hu_HU",
michael@0:     /* Hungary*/
michael@0:     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 44, "et_EE",
michael@0:     /* Estonia*/
michael@0:     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 45, "lv_LV",
michael@0:     /* Latvia*/
michael@0:     /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 46, "",*/
michael@0:     /* Lapland  [Ask Rich for the data. HS]*/
michael@0:     /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 47, "",*/
michael@0:     /* Faeroe Islands*/
michael@0:     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 48, "fa_IR",
michael@0:     /* Iran*/
michael@0:     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 49, "ru_RU",
michael@0:     /* Russia*/
michael@0:     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 50, "en_IE",
michael@0:     /* Ireland*/
michael@0:     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 51, "ko_KR",
michael@0:     /* Korea*/
michael@0:     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 52, "zh_CN",
michael@0:     /* People's Republic of China*/
michael@0:     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 53, "zh_TW",
michael@0:     /* Taiwan*/
michael@0:     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 54, "th_TH",
michael@0:     /* Thailand*/
michael@0: 
michael@0:     /* fallback is en_US*/
michael@0:     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER,
michael@0:     MAC_LC_MAGIC_NUMBER, "en_US"
michael@0: };
michael@0: 
michael@0: #endif
michael@0: 
michael@0: #if U_POSIX_LOCALE
michael@0: /* A helper function used by uprv_getPOSIXIDForDefaultLocale and
michael@0:  * uprv_getPOSIXIDForDefaultCodepage. Returns the posix locale id for
michael@0:  * LC_CTYPE and LC_MESSAGES. It doesn't support other locale categories.
michael@0:  */
michael@0: static const char *uprv_getPOSIXIDForCategory(int category)
michael@0: {
michael@0:     const char* posixID = NULL;
michael@0:     if (category == LC_MESSAGES || category == LC_CTYPE) {
michael@0:         /*
michael@0:         * On Solaris two different calls to setlocale can result in
michael@0:         * different values. Only get this value once.
michael@0:         *
michael@0:         * We must check this first because an application can set this.
michael@0:         *
michael@0:         * LC_ALL can't be used because it's platform dependent. The LANG
michael@0:         * environment variable seems to affect LC_CTYPE variable by default.
michael@0:         * Here is what setlocale(LC_ALL, NULL) can return.
michael@0:         * HPUX can return 'C C C C C C C'
michael@0:         * Solaris can return /en_US/C/C/C/C/C on the second try.
michael@0:         * Linux can return LC_CTYPE=C;LC_NUMERIC=C;...
michael@0:         *
michael@0:         * The default codepage detection also needs to use LC_CTYPE.
michael@0:         *
michael@0:         * Do not call setlocale(LC_*, "")! Using an empty string instead
michael@0:         * of NULL, will modify the libc behavior.
michael@0:         */
michael@0:         posixID = setlocale(category, NULL);
michael@0:         if ((posixID == 0)
michael@0:             || (uprv_strcmp("C", posixID) == 0)
michael@0:             || (uprv_strcmp("POSIX", posixID) == 0))
michael@0:         {
michael@0:             /* Maybe we got some garbage.  Try something more reasonable */
michael@0:             posixID = getenv("LC_ALL");
michael@0:             if (posixID == 0) {
michael@0:                 posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE");
michael@0:                 if (posixID == 0) {
michael@0:                     posixID = getenv("LANG");
michael@0:                 }
michael@0:             }
michael@0:         }
michael@0:     }
michael@0:     if ((posixID==0)
michael@0:         || (uprv_strcmp("C", posixID) == 0)
michael@0:         || (uprv_strcmp("POSIX", posixID) == 0))
michael@0:     {
michael@0:         /* Nothing worked.  Give it a nice POSIX default value. */
michael@0:         posixID = "en_US_POSIX";
michael@0:     }
michael@0:     return posixID;
michael@0: }
michael@0: 
michael@0: /* Return just the POSIX id for the default locale, whatever happens to be in
michael@0:  * it. It gets the value from LC_MESSAGES and indirectly from LC_ALL and LANG.
michael@0:  */
michael@0: static const char *uprv_getPOSIXIDForDefaultLocale(void)
michael@0: {
michael@0:     static const char* posixID = NULL;
michael@0:     if (posixID == 0) {
michael@0:         posixID = uprv_getPOSIXIDForCategory(LC_MESSAGES);
michael@0:     }
michael@0:     return posixID;
michael@0: }
michael@0: 
michael@0: #if !U_CHARSET_IS_UTF8
michael@0: /* Return just the POSIX id for the default codepage, whatever happens to be in
michael@0:  * it. It gets the value from LC_CTYPE and indirectly from LC_ALL and LANG.
michael@0:  */
michael@0: static const char *uprv_getPOSIXIDForDefaultCodepage(void)
michael@0: {
michael@0:     static const char* posixID = NULL;
michael@0:     if (posixID == 0) {
michael@0:         posixID = uprv_getPOSIXIDForCategory(LC_CTYPE);
michael@0:     }
michael@0:     return posixID;
michael@0: }
michael@0: #endif
michael@0: #endif
michael@0: 
michael@0: /* NOTE: The caller should handle thread safety */
michael@0: U_CAPI const char* U_EXPORT2
michael@0: uprv_getDefaultLocaleID()
michael@0: {
michael@0: #if U_POSIX_LOCALE
michael@0: /*
michael@0:   Note that:  (a '!' means the ID is improper somehow)
michael@0:      LC_ALL  ---->     default_loc          codepage
michael@0: --------------------------------------------------------
michael@0:      ab.CD             ab                   CD
michael@0:      ab@CD             ab__CD               -
michael@0:      ab@CD.EF          ab__CD               EF
michael@0: 
michael@0:      ab_CD.EF@GH       ab_CD_GH             EF
michael@0: 
michael@0: Some 'improper' ways to do the same as above:
michael@0:   !  ab_CD@GH.EF       ab_CD_GH             EF
michael@0:   !  ab_CD.EF@GH.IJ    ab_CD_GH             EF
michael@0:   !  ab_CD@ZZ.EF@GH.IJ ab_CD_GH             EF
michael@0: 
michael@0:      _CD@GH            _CD_GH               -
michael@0:      _CD.EF@GH         _CD_GH               EF
michael@0: 
michael@0: The variant cannot have dots in it.
michael@0: The 'rightmost' variant (@xxx) wins.
michael@0: The leftmost codepage (.xxx) wins.
michael@0: */
michael@0:     char *correctedPOSIXLocale = 0;
michael@0:     const char* posixID = uprv_getPOSIXIDForDefaultLocale();
michael@0:     const char *p;
michael@0:     const char *q;
michael@0:     int32_t len;
michael@0: 
michael@0:     /* Format: (no spaces)
michael@0:     ll [ _CC ] [ . MM ] [ @ VV]
michael@0: 
michael@0:       l = lang, C = ctry, M = charmap, V = variant
michael@0:     */
michael@0: 
michael@0:     if (gCorrectedPOSIXLocale != NULL) {
michael@0:         return gCorrectedPOSIXLocale;
michael@0:     }
michael@0: 
michael@0:     if ((p = uprv_strchr(posixID, '.')) != NULL) {
michael@0:         /* assume new locale can't be larger than old one? */
michael@0:         correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID)+1));
michael@0:         /* Exit on memory allocation error. */
michael@0:         if (correctedPOSIXLocale == NULL) {
michael@0:             return NULL;
michael@0:         }
michael@0:         uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
michael@0:         correctedPOSIXLocale[p-posixID] = 0;
michael@0: 
michael@0:         /* do not copy after the @ */
michael@0:         if ((p = uprv_strchr(correctedPOSIXLocale, '@')) != NULL) {
michael@0:             correctedPOSIXLocale[p-correctedPOSIXLocale] = 0;
michael@0:         }
michael@0:     }
michael@0: 
michael@0:     /* Note that we scan the *uncorrected* ID. */
michael@0:     if ((p = uprv_strrchr(posixID, '@')) != NULL) {
michael@0:         if (correctedPOSIXLocale == NULL) {
michael@0:             correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID)+1));
michael@0:             /* Exit on memory allocation error. */
michael@0:             if (correctedPOSIXLocale == NULL) {
michael@0:                 return NULL;
michael@0:             }
michael@0:             uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
michael@0:             correctedPOSIXLocale[p-posixID] = 0;
michael@0:         }
michael@0:         p++;
michael@0: 
michael@0:         /* Take care of any special cases here.. */
michael@0:         if (!uprv_strcmp(p, "nynorsk")) {
michael@0:             p = "NY";
michael@0:             /* Don't worry about no__NY. In practice, it won't appear. */
michael@0:         }
michael@0: 
michael@0:         if (uprv_strchr(correctedPOSIXLocale,'_') == NULL) {
michael@0:             uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b */
michael@0:         }
michael@0:         else {
michael@0:             uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */
michael@0:         }
michael@0: 
michael@0:         if ((q = uprv_strchr(p, '.')) != NULL) {
michael@0:             /* How big will the resulting string be? */
michael@0:             len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p));
michael@0:             uprv_strncat(correctedPOSIXLocale, p, q-p);
michael@0:             correctedPOSIXLocale[len] = 0;
michael@0:         }
michael@0:         else {
michael@0:             /* Anything following the @ sign */
michael@0:             uprv_strcat(correctedPOSIXLocale, p);
michael@0:         }
michael@0: 
michael@0:         /* Should there be a map from 'no@nynorsk' -> no_NO_NY here?
michael@0:          * How about 'russian' -> 'ru'?
michael@0:          * Many of the other locales using ISO codes will be handled by the
michael@0:          * canonicalization functions in uloc_getDefault.
michael@0:          */
michael@0:     }
michael@0: 
michael@0:     /* Was a correction made? */
michael@0:     if (correctedPOSIXLocale != NULL) {
michael@0:         posixID = correctedPOSIXLocale;
michael@0:     }
michael@0:     else {
michael@0:         /* copy it, just in case the original pointer goes away.  See j2395 */
michael@0:         correctedPOSIXLocale = (char *)uprv_malloc(uprv_strlen(posixID) + 1);
michael@0:         /* Exit on memory allocation error. */
michael@0:         if (correctedPOSIXLocale == NULL) {
michael@0:             return NULL;
michael@0:         }
michael@0:         posixID = uprv_strcpy(correctedPOSIXLocale, posixID);
michael@0:     }
michael@0: 
michael@0:     if (gCorrectedPOSIXLocale == NULL) {
michael@0:         gCorrectedPOSIXLocale = correctedPOSIXLocale;
michael@0:         ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
michael@0:         correctedPOSIXLocale = NULL;
michael@0:     }
michael@0: 
michael@0:     if (correctedPOSIXLocale != NULL) {  /* Was already set - clean up. */
michael@0:         uprv_free(correctedPOSIXLocale);
michael@0:     }
michael@0: 
michael@0:     return posixID;
michael@0: 
michael@0: #elif U_PLATFORM_USES_ONLY_WIN32_API
michael@0: #define POSIX_LOCALE_CAPACITY 64
michael@0:     UErrorCode status = U_ZERO_ERROR;
michael@0:     char *correctedPOSIXLocale = 0;
michael@0: 
michael@0:     if (gCorrectedPOSIXLocale != NULL) {
michael@0:         return gCorrectedPOSIXLocale;
michael@0:     }
michael@0: 
michael@0:     LCID id = GetThreadLocale();
michael@0:     correctedPOSIXLocale = static_cast<char *>(uprv_malloc(POSIX_LOCALE_CAPACITY + 1));
michael@0:     if (correctedPOSIXLocale) {
michael@0:         int32_t posixLen = uprv_convertToPosix(id, correctedPOSIXLocale, POSIX_LOCALE_CAPACITY, &status);
michael@0:         if (U_SUCCESS(status)) {
michael@0:             *(correctedPOSIXLocale + posixLen) = 0;
michael@0:             gCorrectedPOSIXLocale = correctedPOSIXLocale;
michael@0:             ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
michael@0:         } else {
michael@0:             uprv_free(correctedPOSIXLocale);
michael@0:         }
michael@0:     }
michael@0: 
michael@0:     if (gCorrectedPOSIXLocale == NULL) {
michael@0:         return "en_US";
michael@0:     }
michael@0:     return gCorrectedPOSIXLocale;
michael@0: 
michael@0: #elif U_PLATFORM == U_PF_CLASSIC_MACOS
michael@0:     int32_t script = MAC_LC_INIT_NUMBER;
michael@0:     /* = IntlScript(); or GetScriptManagerVariable(smSysScript);*/
michael@0:     int32_t region = MAC_LC_INIT_NUMBER;
michael@0:     /* = GetScriptManagerVariable(smRegionCode);*/
michael@0:     int32_t lang = MAC_LC_INIT_NUMBER;
michael@0:     /* = GetScriptManagerVariable(smScriptLang);*/
michael@0:     int32_t date_region = MAC_LC_INIT_NUMBER;
michael@0:     const char* posixID = 0;
michael@0:     int32_t count = sizeof(mac_lc_recs) / sizeof(mac_lc_rec);
michael@0:     int32_t i;
michael@0:     Intl1Hndl ih;
michael@0: 
michael@0:     ih = (Intl1Hndl) GetIntlResource(1);
michael@0:     if (ih)
michael@0:         date_region = ((uint16_t)(*ih)->intl1Vers) >> 8;
michael@0: 
michael@0:     for (i = 0; i < count; i++) {
michael@0:         if (   ((mac_lc_recs[i].script == MAC_LC_MAGIC_NUMBER)
michael@0:              || (mac_lc_recs[i].script == script))
michael@0:             && ((mac_lc_recs[i].region == MAC_LC_MAGIC_NUMBER)
michael@0:              || (mac_lc_recs[i].region == region))
michael@0:             && ((mac_lc_recs[i].lang == MAC_LC_MAGIC_NUMBER)
michael@0:              || (mac_lc_recs[i].lang == lang))
michael@0:             && ((mac_lc_recs[i].date_region == MAC_LC_MAGIC_NUMBER)
michael@0:              || (mac_lc_recs[i].date_region == date_region))
michael@0:             )
michael@0:         {
michael@0:             posixID = mac_lc_recs[i].posixID;
michael@0:             break;
michael@0:         }
michael@0:     }
michael@0: 
michael@0:     return posixID;
michael@0: 
michael@0: #elif U_PLATFORM == U_PF_OS400
michael@0:     /* locales are process scoped and are by definition thread safe */
michael@0:     static char correctedLocale[64];
michael@0:     const  char *localeID = getenv("LC_ALL");
michael@0:            char *p;
michael@0: 
michael@0:     if (localeID == NULL)
michael@0:         localeID = getenv("LANG");
michael@0:     if (localeID == NULL)
michael@0:         localeID = setlocale(LC_ALL, NULL);
michael@0:     /* Make sure we have something... */
michael@0:     if (localeID == NULL)
michael@0:         return "en_US_POSIX";
michael@0: 
michael@0:     /* Extract the locale name from the path. */
michael@0:     if((p = uprv_strrchr(localeID, '/')) != NULL)
michael@0:     {
michael@0:         /* Increment p to start of locale name. */
michael@0:         p++;
michael@0:         localeID = p;
michael@0:     }
michael@0: 
michael@0:     /* Copy to work location. */
michael@0:     uprv_strcpy(correctedLocale, localeID);
michael@0: 
michael@0:     /* Strip off the '.locale' extension. */
michael@0:     if((p = uprv_strchr(correctedLocale, '.')) != NULL) {
michael@0:         *p = 0;
michael@0:     }
michael@0: 
michael@0:     /* Upper case the locale name. */
michael@0:     T_CString_toUpperCase(correctedLocale);
michael@0: 
michael@0:     /* See if we are using the POSIX locale.  Any of the
michael@0:     * following are equivalent and use the same QLGPGCMA
michael@0:     * (POSIX) locale.
michael@0:     * QLGPGCMA2 means UCS2
michael@0:     * QLGPGCMA_4 means UTF-32
michael@0:     * QLGPGCMA_8 means UTF-8
michael@0:     */
michael@0:     if ((uprv_strcmp("C", correctedLocale) == 0) ||
michael@0:         (uprv_strcmp("POSIX", correctedLocale) == 0) ||
michael@0:         (uprv_strncmp("QLGPGCMA", correctedLocale, 8) == 0))
michael@0:     {
michael@0:         uprv_strcpy(correctedLocale, "en_US_POSIX");
michael@0:     }
michael@0:     else
michael@0:     {
michael@0:         int16_t LocaleLen;
michael@0: 
michael@0:         /* Lower case the lang portion. */
michael@0:         for(p = correctedLocale; *p != 0 && *p != '_'; p++)
michael@0:         {
michael@0:             *p = uprv_tolower(*p);
michael@0:         }
michael@0: 
michael@0:         /* Adjust for Euro.  After '_E' add 'URO'. */
michael@0:         LocaleLen = uprv_strlen(correctedLocale);
michael@0:         if (correctedLocale[LocaleLen - 2] == '_' &&
michael@0:             correctedLocale[LocaleLen - 1] == 'E')
michael@0:         {
michael@0:             uprv_strcat(correctedLocale, "URO");
michael@0:         }
michael@0: 
michael@0:         /* If using Lotus-based locale then convert to
michael@0:          * equivalent non Lotus.
michael@0:          */
michael@0:         else if (correctedLocale[LocaleLen - 2] == '_' &&
michael@0:             correctedLocale[LocaleLen - 1] == 'L')
michael@0:         {
michael@0:             correctedLocale[LocaleLen - 2] = 0;
michael@0:         }
michael@0: 
michael@0:         /* There are separate simplified and traditional
michael@0:          * locales called zh_HK_S and zh_HK_T.
michael@0:          */
michael@0:         else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0)
michael@0:         {
michael@0:             uprv_strcpy(correctedLocale, "zh_HK");
michael@0:         }
michael@0: 
michael@0:         /* A special zh_CN_GBK locale...
michael@0:         */
michael@0:         else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0)
michael@0:         {
michael@0:             uprv_strcpy(correctedLocale, "zh_CN");
michael@0:         }
michael@0: 
michael@0:     }
michael@0: 
michael@0:     return correctedLocale;
michael@0: #endif
michael@0: 
michael@0: }
michael@0: 
michael@0: #if !U_CHARSET_IS_UTF8
michael@0: #if U_POSIX_LOCALE
michael@0: /*
michael@0: Due to various platform differences, one platform may specify a charset,
michael@0: when they really mean a different charset. Remap the names so that they are
michael@0: compatible with ICU. Only conflicting/ambiguous aliases should be resolved
michael@0: here. Before adding anything to this function, please consider adding unique
michael@0: names to the ICU alias table in the data directory.
michael@0: */
michael@0: static const char*
michael@0: remapPlatformDependentCodepage(const char *locale, const char *name) {
michael@0:     if (locale != NULL && *locale == 0) {
michael@0:         /* Make sure that an empty locale is handled the same way. */
michael@0:         locale = NULL;
michael@0:     }
michael@0:     if (name == NULL) {
michael@0:         return NULL;
michael@0:     }
michael@0: #if U_PLATFORM == U_PF_AIX
michael@0:     if (uprv_strcmp(name, "IBM-943") == 0) {
michael@0:         /* Use the ASCII compatible ibm-943 */
michael@0:         name = "Shift-JIS";
michael@0:     }
michael@0:     else if (uprv_strcmp(name, "IBM-1252") == 0) {
michael@0:         /* Use the windows-1252 that contains the Euro */
michael@0:         name = "IBM-5348";
michael@0:     }
michael@0: #elif U_PLATFORM == U_PF_SOLARIS
michael@0:     if (locale != NULL && uprv_strcmp(name, "EUC") == 0) {
michael@0:         /* Solaris underspecifies the "EUC" name. */
michael@0:         if (uprv_strcmp(locale, "zh_CN") == 0) {
michael@0:             name = "EUC-CN";
michael@0:         }
michael@0:         else if (uprv_strcmp(locale, "zh_TW") == 0) {
michael@0:             name = "EUC-TW";
michael@0:         }
michael@0:         else if (uprv_strcmp(locale, "ko_KR") == 0) {
michael@0:             name = "EUC-KR";
michael@0:         }
michael@0:     }
michael@0:     else if (uprv_strcmp(name, "eucJP") == 0) {
michael@0:         /*
michael@0:         ibm-954 is the best match.
michael@0:         ibm-33722 is the default for eucJP (similar to Windows).
michael@0:         */
michael@0:         name = "eucjis";
michael@0:     }
michael@0:     else if (uprv_strcmp(name, "646") == 0) {
michael@0:         /*
michael@0:          * The default codepage given by Solaris is 646 but the C library routines treat it as if it was
michael@0:          * ISO-8859-1 instead of US-ASCII(646).
michael@0:          */
michael@0:         name = "ISO-8859-1";
michael@0:     }
michael@0: #elif U_PLATFORM_IS_DARWIN_BASED
michael@0:     if (locale == NULL && *name == 0) {
michael@0:         /*
michael@0:         No locale was specified, and an empty name was passed in.
michael@0:         This usually indicates that nl_langinfo didn't return valid information.
michael@0:         Mac OS X uses UTF-8 by default (especially the locale data and console).
michael@0:         */
michael@0:         name = "UTF-8";
michael@0:     }
michael@0:     else if (uprv_strcmp(name, "CP949") == 0) {
michael@0:         /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
michael@0:         name = "EUC-KR";
michael@0:     }
michael@0:     else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 && uprv_strcmp(name, "US-ASCII") == 0) {
michael@0:         /*
michael@0:          * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
michael@0:          */
michael@0:         name = "UTF-8";
michael@0:     }
michael@0: #elif U_PLATFORM == U_PF_BSD
michael@0:     if (uprv_strcmp(name, "CP949") == 0) {
michael@0:         /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
michael@0:         name = "EUC-KR";
michael@0:     }
michael@0: #elif U_PLATFORM == U_PF_HPUX
michael@0:     if (locale != NULL && uprv_strcmp(locale, "zh_HK") == 0 && uprv_strcmp(name, "big5") == 0) {
michael@0:         /* HP decided to extend big5 as hkbig5 even though it's not compatible :-( */
michael@0:         /* zh_TW.big5 is not the same charset as zh_HK.big5! */
michael@0:         name = "hkbig5";
michael@0:     }
michael@0:     else if (uprv_strcmp(name, "eucJP") == 0) {
michael@0:         /*
michael@0:         ibm-1350 is the best match, but unavailable.
michael@0:         ibm-954 is mostly a superset of ibm-1350.
michael@0:         ibm-33722 is the default for eucJP (similar to Windows).
michael@0:         */
michael@0:         name = "eucjis";
michael@0:     }
michael@0: #elif U_PLATFORM == U_PF_LINUX
michael@0:     if (locale != NULL && uprv_strcmp(name, "euc") == 0) {
michael@0:         /* Linux underspecifies the "EUC" name. */
michael@0:         if (uprv_strcmp(locale, "korean") == 0) {
michael@0:             name = "EUC-KR";
michael@0:         }
michael@0:         else if (uprv_strcmp(locale, "japanese") == 0) {
michael@0:             /* See comment below about eucJP */
michael@0:             name = "eucjis";
michael@0:         }
michael@0:     }
michael@0:     else if (uprv_strcmp(name, "eucjp") == 0) {
michael@0:         /*
michael@0:         ibm-1350 is the best match, but unavailable.
michael@0:         ibm-954 is mostly a superset of ibm-1350.
michael@0:         ibm-33722 is the default for eucJP (similar to Windows).
michael@0:         */
michael@0:         name = "eucjis";
michael@0:     }
michael@0:     else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 &&
michael@0:             (uprv_strcmp(name, "ANSI_X3.4-1968") == 0 || uprv_strcmp(name, "US-ASCII") == 0)) {
michael@0:         /*
michael@0:          * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
michael@0:          */
michael@0:         name = "UTF-8";
michael@0:     }
michael@0:     /*
michael@0:      * Linux returns ANSI_X3.4-1968 for C/POSIX, but the call site takes care of
michael@0:      * it by falling back to 'US-ASCII' when NULL is returned from this
michael@0:      * function. So, we don't have to worry about it here.
michael@0:      */
michael@0: #endif
michael@0:     /* return NULL when "" is passed in */
michael@0:     if (*name == 0) {
michael@0:         name = NULL;
michael@0:     }
michael@0:     return name;
michael@0: }
michael@0: 
michael@0: static const char*
michael@0: getCodepageFromPOSIXID(const char *localeName, char * buffer, int32_t buffCapacity)
michael@0: {
michael@0:     char localeBuf[100];
michael@0:     const char *name = NULL;
michael@0:     char *variant = NULL;
michael@0: 
michael@0:     if (localeName != NULL && (name = (uprv_strchr(localeName, '.'))) != NULL) {
michael@0:         size_t localeCapacity = uprv_min(sizeof(localeBuf), (name-localeName)+1);
michael@0:         uprv_strncpy(localeBuf, localeName, localeCapacity);
michael@0:         localeBuf[localeCapacity-1] = 0; /* ensure NULL termination */
michael@0:         name = uprv_strncpy(buffer, name+1, buffCapacity);
michael@0:         buffer[buffCapacity-1] = 0; /* ensure NULL termination */
michael@0:         if ((variant = const_cast<char *>(uprv_strchr(name, '@'))) != NULL) {
michael@0:             *variant = 0;
michael@0:         }
michael@0:         name = remapPlatformDependentCodepage(localeBuf, name);
michael@0:     }
michael@0:     return name;
michael@0: }
michael@0: #endif
michael@0: 
michael@0: static const char*
michael@0: int_getDefaultCodepage()
michael@0: {
michael@0: #if U_PLATFORM == U_PF_OS400
michael@0:     uint32_t ccsid = 37; /* Default to ibm-37 */
michael@0:     static char codepage[64];
michael@0:     Qwc_JOBI0400_t jobinfo;
michael@0:     Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */
michael@0: 
michael@0:     EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400",
michael@0:         "*                         ", "                ", &error);
michael@0: 
michael@0:     if (error.Bytes_Available == 0) {
michael@0:         if (jobinfo.Coded_Char_Set_ID != 0xFFFF) {
michael@0:             ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID;
michael@0:         }
michael@0:         else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) {
michael@0:             ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id;
michael@0:         }
michael@0:         /* else use the default */
michael@0:     }
michael@0:     sprintf(codepage,"ibm-%d", ccsid);
michael@0:     return codepage;
michael@0: 
michael@0: #elif U_PLATFORM == U_PF_OS390
michael@0:     static char codepage[64];
michael@0: 
michael@0:     strncpy(codepage, nl_langinfo(CODESET),63-strlen(UCNV_SWAP_LFNL_OPTION_STRING));
michael@0:     strcat(codepage,UCNV_SWAP_LFNL_OPTION_STRING);
michael@0:     codepage[63] = 0; /* NULL terminate */
michael@0: 
michael@0:     return codepage;
michael@0: 
michael@0: #elif U_PLATFORM == U_PF_CLASSIC_MACOS
michael@0:     return "macintosh"; /* TODO: Macintosh Roman. There must be a better way. fixme! */
michael@0: 
michael@0: #elif U_PLATFORM_USES_ONLY_WIN32_API
michael@0:     static char codepage[64];
michael@0:     sprintf(codepage, "windows-%d", GetACP());
michael@0:     return codepage;
michael@0: 
michael@0: #elif U_POSIX_LOCALE
michael@0:     static char codesetName[100];
michael@0:     const char *localeName = NULL;
michael@0:     const char *name = NULL;
michael@0: 
michael@0:     localeName = uprv_getPOSIXIDForDefaultCodepage();
michael@0:     uprv_memset(codesetName, 0, sizeof(codesetName));
michael@0: #if U_HAVE_NL_LANGINFO_CODESET
michael@0:     /* When available, check nl_langinfo first because it usually gives more
michael@0:        useful names. It depends on LC_CTYPE.
michael@0:        nl_langinfo may use the same buffer as setlocale. */
michael@0:     {
michael@0:         const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET);
michael@0: #if U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED
michael@0:         /*
michael@0:          * On Linux and MacOSX, ensure that default codepage for non C/POSIX locale is UTF-8
michael@0:          * instead of ASCII.
michael@0:          */
michael@0:         if (uprv_strcmp(localeName, "en_US_POSIX") != 0) {
michael@0:             codeset = remapPlatformDependentCodepage(localeName, codeset);
michael@0:         } else
michael@0: #endif
michael@0:         {
michael@0:             codeset = remapPlatformDependentCodepage(NULL, codeset);
michael@0:         }
michael@0: 
michael@0:         if (codeset != NULL) {
michael@0:             uprv_strncpy(codesetName, codeset, sizeof(codesetName));
michael@0:             codesetName[sizeof(codesetName)-1] = 0;
michael@0:             return codesetName;
michael@0:         }
michael@0:     }
michael@0: #endif
michael@0: 
michael@0:     /* Use setlocale in a nice way, and then check some environment variables.
michael@0:        Maybe the application used setlocale already.
michael@0:     */
michael@0:     uprv_memset(codesetName, 0, sizeof(codesetName));
michael@0:     name = getCodepageFromPOSIXID(localeName, codesetName, sizeof(codesetName));
michael@0:     if (name) {
michael@0:         /* if we can find the codeset name from setlocale, return that. */
michael@0:         return name;
michael@0:     }
michael@0: 
michael@0:     if (*codesetName == 0)
michael@0:     {
michael@0:         /* Everything failed. Return US ASCII (ISO 646). */
michael@0:         (void)uprv_strcpy(codesetName, "US-ASCII");
michael@0:     }
michael@0:     return codesetName;
michael@0: #else
michael@0:     return "US-ASCII";
michael@0: #endif
michael@0: }
michael@0: 
michael@0: 
michael@0: U_CAPI const char*  U_EXPORT2
michael@0: uprv_getDefaultCodepage()
michael@0: {
michael@0:     static char const  *name = NULL;
michael@0:     umtx_lock(NULL);
michael@0:     if (name == NULL) {
michael@0:         name = int_getDefaultCodepage();
michael@0:     }
michael@0:     umtx_unlock(NULL);
michael@0:     return name;
michael@0: }
michael@0: #endif  /* !U_CHARSET_IS_UTF8 */
michael@0: 
michael@0: 
michael@0: /* end of platform-specific implementation -------------- */
michael@0: 
michael@0: /* version handling --------------------------------------------------------- */
michael@0: 
michael@0: U_CAPI void U_EXPORT2
michael@0: u_versionFromString(UVersionInfo versionArray, const char *versionString) {
michael@0:     char *end;
michael@0:     uint16_t part=0;
michael@0: 
michael@0:     if(versionArray==NULL) {
michael@0:         return;
michael@0:     }
michael@0: 
michael@0:     if(versionString!=NULL) {
michael@0:         for(;;) {
michael@0:             versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10);
michael@0:             if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) {
michael@0:                 break;
michael@0:             }
michael@0:             versionString=end+1;
michael@0:         }
michael@0:     }
michael@0: 
michael@0:     while(part<U_MAX_VERSION_LENGTH) {
michael@0:         versionArray[part++]=0;
michael@0:     }
michael@0: }
michael@0: 
michael@0: U_CAPI void U_EXPORT2
michael@0: u_versionFromUString(UVersionInfo versionArray, const UChar *versionString) {
michael@0:     if(versionArray!=NULL && versionString!=NULL) {
michael@0:         char versionChars[U_MAX_VERSION_STRING_LENGTH+1];
michael@0:         int32_t len = u_strlen(versionString);
michael@0:         if(len>U_MAX_VERSION_STRING_LENGTH) {
michael@0:             len = U_MAX_VERSION_STRING_LENGTH;
michael@0:         }
michael@0:         u_UCharsToChars(versionString, versionChars, len);
michael@0:         versionChars[len]=0;
michael@0:         u_versionFromString(versionArray, versionChars);
michael@0:     }
michael@0: }
michael@0: 
michael@0: U_CAPI void U_EXPORT2
michael@0: u_versionToString(const UVersionInfo versionArray, char *versionString) {
michael@0:     uint16_t count, part;
michael@0:     uint8_t field;
michael@0: 
michael@0:     if(versionString==NULL) {
michael@0:         return;
michael@0:     }
michael@0: 
michael@0:     if(versionArray==NULL) {
michael@0:         versionString[0]=0;
michael@0:         return;
michael@0:     }
michael@0: 
michael@0:     /* count how many fields need to be written */
michael@0:     for(count=4; count>0 && versionArray[count-1]==0; --count) {
michael@0:     }
michael@0: 
michael@0:     if(count <= 1) {
michael@0:         count = 2;
michael@0:     }
michael@0: 
michael@0:     /* write the first part */
michael@0:     /* write the decimal field value */
michael@0:     field=versionArray[0];
michael@0:     if(field>=100) {
michael@0:         *versionString++=(char)('0'+field/100);
michael@0:         field%=100;
michael@0:     }
michael@0:     if(field>=10) {
michael@0:         *versionString++=(char)('0'+field/10);
michael@0:         field%=10;
michael@0:     }
michael@0:     *versionString++=(char)('0'+field);
michael@0: 
michael@0:     /* write the following parts */
michael@0:     for(part=1; part<count; ++part) {
michael@0:         /* write a dot first */
michael@0:         *versionString++=U_VERSION_DELIMITER;
michael@0: 
michael@0:         /* write the decimal field value */
michael@0:         field=versionArray[part];
michael@0:         if(field>=100) {
michael@0:             *versionString++=(char)('0'+field/100);
michael@0:             field%=100;
michael@0:         }
michael@0:         if(field>=10) {
michael@0:             *versionString++=(char)('0'+field/10);
michael@0:             field%=10;
michael@0:         }
michael@0:         *versionString++=(char)('0'+field);
michael@0:     }
michael@0: 
michael@0:     /* NUL-terminate */
michael@0:     *versionString=0;
michael@0: }
michael@0: 
michael@0: U_CAPI void U_EXPORT2
michael@0: u_getVersion(UVersionInfo versionArray) {
michael@0:     u_versionFromString(versionArray, U_ICU_VERSION);
michael@0: }
michael@0: 
michael@0: /**
michael@0:  * icucfg.h dependent code 
michael@0:  */
michael@0: 
michael@0: #if U_ENABLE_DYLOAD
michael@0:  
michael@0: #if HAVE_DLOPEN && !U_PLATFORM_USES_ONLY_WIN32_API
michael@0: 
michael@0: #if HAVE_DLFCN_H
michael@0: 
michael@0: #ifdef __MVS__
michael@0: #ifndef __SUSV3
michael@0: #define __SUSV3 1
michael@0: #endif
michael@0: #endif
michael@0: #include <dlfcn.h>
michael@0: #endif
michael@0: 
michael@0: U_INTERNAL void * U_EXPORT2
michael@0: uprv_dl_open(const char *libName, UErrorCode *status) {
michael@0:   void *ret = NULL;
michael@0:   if(U_FAILURE(*status)) return ret;
michael@0:   ret =  dlopen(libName, RTLD_NOW|RTLD_GLOBAL);
michael@0:   if(ret==NULL) {
michael@0: #ifdef U_TRACE_DYLOAD
michael@0:     printf("dlerror on dlopen(%s): %s\n", libName, dlerror());
michael@0: #endif
michael@0:     *status = U_MISSING_RESOURCE_ERROR;
michael@0:   }
michael@0:   return ret;
michael@0: }
michael@0: 
michael@0: U_INTERNAL void U_EXPORT2
michael@0: uprv_dl_close(void *lib, UErrorCode *status) {
michael@0:   if(U_FAILURE(*status)) return;
michael@0:   dlclose(lib);
michael@0: }
michael@0: 
michael@0: U_INTERNAL UVoidFunction* U_EXPORT2
michael@0: uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
michael@0:   union {
michael@0:       UVoidFunction *fp;
michael@0:       void *vp;
michael@0:   } uret;
michael@0:   uret.fp = NULL;
michael@0:   if(U_FAILURE(*status)) return uret.fp;
michael@0:   uret.vp = dlsym(lib, sym);
michael@0:   if(uret.vp == NULL) {
michael@0: #ifdef U_TRACE_DYLOAD
michael@0:     printf("dlerror on dlsym(%p,%s): %s\n", lib,sym, dlerror());
michael@0: #endif
michael@0:     *status = U_MISSING_RESOURCE_ERROR;
michael@0:   }
michael@0:   return uret.fp;
michael@0: }
michael@0: 
michael@0: #else
michael@0: 
michael@0: /* null (nonexistent) implementation. */
michael@0: 
michael@0: U_INTERNAL void * U_EXPORT2
michael@0: uprv_dl_open(const char *libName, UErrorCode *status) {
michael@0:   if(U_FAILURE(*status)) return NULL;
michael@0:   *status = U_UNSUPPORTED_ERROR;
michael@0:   return NULL;
michael@0: }
michael@0: 
michael@0: U_INTERNAL void U_EXPORT2
michael@0: uprv_dl_close(void *lib, UErrorCode *status) {
michael@0:   if(U_FAILURE(*status)) return;
michael@0:   *status = U_UNSUPPORTED_ERROR;
michael@0:   return;
michael@0: }
michael@0: 
michael@0: 
michael@0: U_INTERNAL UVoidFunction* U_EXPORT2
michael@0: uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
michael@0:   if(U_SUCCESS(*status)) {
michael@0:     *status = U_UNSUPPORTED_ERROR;
michael@0:   }
michael@0:   return (UVoidFunction*)NULL;
michael@0: }
michael@0: 
michael@0: 
michael@0: 
michael@0: #endif
michael@0: 
michael@0: #elif U_PLATFORM_USES_ONLY_WIN32_API
michael@0: 
michael@0: U_INTERNAL void * U_EXPORT2
michael@0: uprv_dl_open(const char *libName, UErrorCode *status) {
michael@0:   HMODULE lib = NULL;
michael@0:   
michael@0:   if(U_FAILURE(*status)) return NULL;
michael@0:   
michael@0:   lib = LoadLibraryA(libName);
michael@0:   
michael@0:   if(lib==NULL) {
michael@0:     *status = U_MISSING_RESOURCE_ERROR;
michael@0:   }
michael@0:   
michael@0:   return (void*)lib;
michael@0: }
michael@0: 
michael@0: U_INTERNAL void U_EXPORT2
michael@0: uprv_dl_close(void *lib, UErrorCode *status) {
michael@0:   HMODULE handle = (HMODULE)lib;
michael@0:   if(U_FAILURE(*status)) return;
michael@0:   
michael@0:   FreeLibrary(handle);
michael@0:   
michael@0:   return;
michael@0: }
michael@0: 
michael@0: 
michael@0: U_INTERNAL UVoidFunction* U_EXPORT2
michael@0: uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
michael@0:   HMODULE handle = (HMODULE)lib;
michael@0:   UVoidFunction* addr = NULL;
michael@0:   
michael@0:   if(U_FAILURE(*status) || lib==NULL) return NULL;
michael@0:   
michael@0:   addr = (UVoidFunction*)GetProcAddress(handle, sym);
michael@0:   
michael@0:   if(addr==NULL) {
michael@0:     DWORD lastError = GetLastError();
michael@0:     if(lastError == ERROR_PROC_NOT_FOUND) {
michael@0:       *status = U_MISSING_RESOURCE_ERROR;
michael@0:     } else {
michael@0:       *status = U_UNSUPPORTED_ERROR; /* other unknown error. */
michael@0:     }
michael@0:   }
michael@0:   
michael@0:   return addr;
michael@0: }
michael@0: 
michael@0: 
michael@0: #else
michael@0: 
michael@0: /* No dynamic loading set. */
michael@0: 
michael@0: U_INTERNAL void * U_EXPORT2
michael@0: uprv_dl_open(const char *libName, UErrorCode *status) {
michael@0:     if(U_FAILURE(*status)) return NULL;
michael@0:     *status = U_UNSUPPORTED_ERROR;
michael@0:     return NULL;
michael@0: }
michael@0: 
michael@0: U_INTERNAL void U_EXPORT2
michael@0: uprv_dl_close(void *lib, UErrorCode *status) {
michael@0:     if(U_FAILURE(*status)) return;
michael@0:     *status = U_UNSUPPORTED_ERROR;
michael@0:     return;
michael@0: }
michael@0: 
michael@0: 
michael@0: U_INTERNAL UVoidFunction* U_EXPORT2
michael@0: uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
michael@0:   if(U_SUCCESS(*status)) {
michael@0:     *status = U_UNSUPPORTED_ERROR;
michael@0:   }
michael@0:   return (UVoidFunction*)NULL;
michael@0: }
michael@0: 
michael@0: #endif /* U_ENABLE_DYLOAD */
michael@0: 
michael@0: /*
michael@0:  * Hey, Emacs, please set the following:
michael@0:  *
michael@0:  * Local Variables:
michael@0:  * indent-tabs-mode: nil
michael@0:  * End:
michael@0:  *
michael@0:  */