intl/icu/source/common/cstring.c

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/common/cstring.c	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,339 @@
     1.4 +/*
     1.5 +******************************************************************************
     1.6 +*
     1.7 +*   Copyright (C) 1997-2011, International Business Machines
     1.8 +*   Corporation and others.  All Rights Reserved.
     1.9 +*
    1.10 +******************************************************************************
    1.11 +*
    1.12 +* File CSTRING.C
    1.13 +*
    1.14 +* @author       Helena Shih
    1.15 +*
    1.16 +* Modification History:
    1.17 +*
    1.18 +*   Date        Name        Description
    1.19 +*   6/18/98     hshih       Created
    1.20 +*   09/08/98    stephen     Added include for ctype, for Mac Port
    1.21 +*   11/15/99    helena      Integrated S/390 IEEE changes. 
    1.22 +******************************************************************************
    1.23 +*/
    1.24 +
    1.25 +
    1.26 +
    1.27 +#include <stdlib.h>
    1.28 +#include <stdio.h>
    1.29 +#include "unicode/utypes.h"
    1.30 +#include "cmemory.h"
    1.31 +#include "cstring.h"
    1.32 +#include "uassert.h"
    1.33 +
    1.34 +/*
    1.35 + * We hardcode case conversion for invariant characters to match our expectation
    1.36 + * and the compiler execution charset.
    1.37 + * This prevents problems on systems
    1.38 + * - with non-default casing behavior, like Turkish system locales where
    1.39 + *   tolower('I') maps to dotless i and toupper('i') maps to dotted I
    1.40 + * - where there are no lowercase Latin characters at all, or using different
    1.41 + *   codes (some old EBCDIC codepages)
    1.42 + *
    1.43 + * This works because the compiler usually runs on a platform where the execution
    1.44 + * charset includes all of the invariant characters at their expected
    1.45 + * code positions, so that the char * string literals in ICU code match
    1.46 + * the char literals here.
    1.47 + *
    1.48 + * Note that the set of lowercase Latin letters is discontiguous in EBCDIC
    1.49 + * and the set of uppercase Latin letters is discontiguous as well.
    1.50 + */
    1.51 +
    1.52 +U_CAPI UBool U_EXPORT2
    1.53 +uprv_isASCIILetter(char c) {
    1.54 +#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
    1.55 +    return
    1.56 +        ('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z') ||
    1.57 +        ('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z');
    1.58 +#else
    1.59 +    return ('a'<=c && c<='z') || ('A'<=c && c<='Z');
    1.60 +#endif
    1.61 +}
    1.62 +
    1.63 +U_CAPI char U_EXPORT2
    1.64 +uprv_toupper(char c) {
    1.65 +#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
    1.66 +    if(('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z')) {
    1.67 +        c=(char)(c+('A'-'a'));
    1.68 +    }
    1.69 +#else
    1.70 +    if('a'<=c && c<='z') {
    1.71 +        c=(char)(c+('A'-'a'));
    1.72 +    }
    1.73 +#endif
    1.74 +    return c;
    1.75 +}
    1.76 +
    1.77 +
    1.78 +#if 0
    1.79 +/*
    1.80 + * Commented out because cstring.h defines uprv_tolower() to be
    1.81 + * the same as either uprv_asciitolower() or uprv_ebcdictolower()
    1.82 + * to reduce the amount of code to cover with tests.
    1.83 + *
    1.84 + * Note that this uprv_tolower() definition is likely to work for most
    1.85 + * charset families, not just ASCII and EBCDIC, because its #else branch
    1.86 + * is written generically.
    1.87 + */
    1.88 +U_CAPI char U_EXPORT2
    1.89 +uprv_tolower(char c) {
    1.90 +#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
    1.91 +    if(('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z')) {
    1.92 +        c=(char)(c+('a'-'A'));
    1.93 +    }
    1.94 +#else
    1.95 +    if('A'<=c && c<='Z') {
    1.96 +        c=(char)(c+('a'-'A'));
    1.97 +    }
    1.98 +#endif
    1.99 +    return c;
   1.100 +}
   1.101 +#endif
   1.102 +
   1.103 +U_CAPI char U_EXPORT2
   1.104 +uprv_asciitolower(char c) {
   1.105 +    if(0x41<=c && c<=0x5a) {
   1.106 +        c=(char)(c+0x20);
   1.107 +    }
   1.108 +    return c;
   1.109 +}
   1.110 +
   1.111 +U_CAPI char U_EXPORT2
   1.112 +uprv_ebcdictolower(char c) {
   1.113 +    if( (0xc1<=(uint8_t)c && (uint8_t)c<=0xc9) ||
   1.114 +        (0xd1<=(uint8_t)c && (uint8_t)c<=0xd9) ||
   1.115 +        (0xe2<=(uint8_t)c && (uint8_t)c<=0xe9)
   1.116 +    ) {
   1.117 +        c=(char)(c-0x40);
   1.118 +    }
   1.119 +    return c;
   1.120 +}
   1.121 +
   1.122 +
   1.123 +U_CAPI char* U_EXPORT2
   1.124 +T_CString_toLowerCase(char* str)
   1.125 +{
   1.126 +    char* origPtr = str;
   1.127 +
   1.128 +    if (str) {
   1.129 +        do
   1.130 +            *str = (char)uprv_tolower(*str);
   1.131 +        while (*(str++));
   1.132 +    }
   1.133 +
   1.134 +    return origPtr;
   1.135 +}
   1.136 +
   1.137 +U_CAPI char* U_EXPORT2
   1.138 +T_CString_toUpperCase(char* str)
   1.139 +{
   1.140 +    char* origPtr = str;
   1.141 +
   1.142 +    if (str) {
   1.143 +        do
   1.144 +            *str = (char)uprv_toupper(*str);
   1.145 +        while (*(str++));
   1.146 +    }
   1.147 +
   1.148 +    return origPtr;
   1.149 +}
   1.150 +
   1.151 +/*
   1.152 + * Takes a int32_t and fills in  a char* string with that number "radix"-based.
   1.153 + * Does not handle negative values (makes an empty string for them).
   1.154 + * Writes at most 12 chars ("-2147483647" plus NUL).
   1.155 + * Returns the length of the string (not including the NUL).
   1.156 + */
   1.157 +U_CAPI int32_t U_EXPORT2
   1.158 +T_CString_integerToString(char* buffer, int32_t v, int32_t radix)
   1.159 +{
   1.160 +    char      tbuf[30];
   1.161 +    int32_t   tbx    = sizeof(tbuf);
   1.162 +    uint8_t   digit;
   1.163 +    int32_t   length = 0;
   1.164 +    uint32_t  uval;
   1.165 +    
   1.166 +    U_ASSERT(radix>=2 && radix<=16);
   1.167 +    uval = (uint32_t) v;
   1.168 +    if(v<0 && radix == 10) {
   1.169 +        /* Only in base 10 do we conside numbers to be signed. */
   1.170 +        uval = (uint32_t)(-v); 
   1.171 +        buffer[length++] = '-';
   1.172 +    }
   1.173 +    
   1.174 +    tbx = sizeof(tbuf)-1;
   1.175 +    tbuf[tbx] = 0;   /* We are generating the digits backwards.  Null term the end. */
   1.176 +    do {
   1.177 +        digit = (uint8_t)(uval % radix);
   1.178 +        tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
   1.179 +        uval  = uval / radix;
   1.180 +    } while (uval != 0);
   1.181 +    
   1.182 +    /* copy converted number into user buffer  */
   1.183 +    uprv_strcpy(buffer+length, tbuf+tbx);
   1.184 +    length += sizeof(tbuf) - tbx -1;
   1.185 +    return length;
   1.186 +}
   1.187 +
   1.188 +
   1.189 +
   1.190 +/*
   1.191 + * Takes a int64_t and fills in  a char* string with that number "radix"-based.
   1.192 + * Writes at most 21: chars ("-9223372036854775807" plus NUL).
   1.193 + * Returns the length of the string, not including the terminating NULL.
   1.194 + */
   1.195 +U_CAPI int32_t U_EXPORT2
   1.196 +T_CString_int64ToString(char* buffer, int64_t v, uint32_t radix)
   1.197 +{
   1.198 +    char      tbuf[30];
   1.199 +    int32_t   tbx    = sizeof(tbuf);
   1.200 +    uint8_t   digit;
   1.201 +    int32_t   length = 0;
   1.202 +    uint64_t  uval;
   1.203 +    
   1.204 +    U_ASSERT(radix>=2 && radix<=16);
   1.205 +    uval = (uint64_t) v;
   1.206 +    if(v<0 && radix == 10) {
   1.207 +        /* Only in base 10 do we conside numbers to be signed. */
   1.208 +        uval = (uint64_t)(-v); 
   1.209 +        buffer[length++] = '-';
   1.210 +    }
   1.211 +    
   1.212 +    tbx = sizeof(tbuf)-1;
   1.213 +    tbuf[tbx] = 0;   /* We are generating the digits backwards.  Null term the end. */
   1.214 +    do {
   1.215 +        digit = (uint8_t)(uval % radix);
   1.216 +        tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
   1.217 +        uval  = uval / radix;
   1.218 +    } while (uval != 0);
   1.219 +    
   1.220 +    /* copy converted number into user buffer  */
   1.221 +    uprv_strcpy(buffer+length, tbuf+tbx);
   1.222 +    length += sizeof(tbuf) - tbx -1;
   1.223 +    return length;
   1.224 +}
   1.225 +
   1.226 +
   1.227 +U_CAPI int32_t U_EXPORT2
   1.228 +T_CString_stringToInteger(const char *integerString, int32_t radix)
   1.229 +{
   1.230 +    char *end;
   1.231 +    return uprv_strtoul(integerString, &end, radix);
   1.232 +
   1.233 +}
   1.234 +
   1.235 +U_CAPI int U_EXPORT2
   1.236 +uprv_stricmp(const char *str1, const char *str2) {
   1.237 +    if(str1==NULL) {
   1.238 +        if(str2==NULL) {
   1.239 +            return 0;
   1.240 +        } else {
   1.241 +            return -1;
   1.242 +        }
   1.243 +    } else if(str2==NULL) {
   1.244 +        return 1;
   1.245 +    } else {
   1.246 +        /* compare non-NULL strings lexically with lowercase */
   1.247 +        int rc;
   1.248 +        unsigned char c1, c2;
   1.249 +
   1.250 +        for(;;) {
   1.251 +            c1=(unsigned char)*str1;
   1.252 +            c2=(unsigned char)*str2;
   1.253 +            if(c1==0) {
   1.254 +                if(c2==0) {
   1.255 +                    return 0;
   1.256 +                } else {
   1.257 +                    return -1;
   1.258 +                }
   1.259 +            } else if(c2==0) {
   1.260 +                return 1;
   1.261 +            } else {
   1.262 +                /* compare non-zero characters with lowercase */
   1.263 +                rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
   1.264 +                if(rc!=0) {
   1.265 +                    return rc;
   1.266 +                }
   1.267 +            }
   1.268 +            ++str1;
   1.269 +            ++str2;
   1.270 +        }
   1.271 +    }
   1.272 +}
   1.273 +
   1.274 +U_CAPI int U_EXPORT2
   1.275 +uprv_strnicmp(const char *str1, const char *str2, uint32_t n) {
   1.276 +    if(str1==NULL) {
   1.277 +        if(str2==NULL) {
   1.278 +            return 0;
   1.279 +        } else {
   1.280 +            return -1;
   1.281 +        }
   1.282 +    } else if(str2==NULL) {
   1.283 +        return 1;
   1.284 +    } else {
   1.285 +        /* compare non-NULL strings lexically with lowercase */
   1.286 +        int rc;
   1.287 +        unsigned char c1, c2;
   1.288 +
   1.289 +        for(; n--;) {
   1.290 +            c1=(unsigned char)*str1;
   1.291 +            c2=(unsigned char)*str2;
   1.292 +            if(c1==0) {
   1.293 +                if(c2==0) {
   1.294 +                    return 0;
   1.295 +                } else {
   1.296 +                    return -1;
   1.297 +                }
   1.298 +            } else if(c2==0) {
   1.299 +                return 1;
   1.300 +            } else {
   1.301 +                /* compare non-zero characters with lowercase */
   1.302 +                rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
   1.303 +                if(rc!=0) {
   1.304 +                    return rc;
   1.305 +                }
   1.306 +            }
   1.307 +            ++str1;
   1.308 +            ++str2;
   1.309 +        }
   1.310 +    }
   1.311 +
   1.312 +    return 0;
   1.313 +}
   1.314 +
   1.315 +U_CAPI char* U_EXPORT2
   1.316 +uprv_strdup(const char *src) {
   1.317 +    size_t len = uprv_strlen(src) + 1;
   1.318 +    char *dup = (char *) uprv_malloc(len);
   1.319 +
   1.320 +    if (dup) {
   1.321 +        uprv_memcpy(dup, src, len);
   1.322 +    }
   1.323 +
   1.324 +    return dup;
   1.325 +}
   1.326 +
   1.327 +U_CAPI char* U_EXPORT2
   1.328 +uprv_strndup(const char *src, int32_t n) {
   1.329 +    char *dup;
   1.330 +
   1.331 +    if(n < 0) {
   1.332 +        dup = uprv_strdup(src);
   1.333 +    } else {
   1.334 +        dup = (char*)uprv_malloc(n+1);
   1.335 +        if (dup) { 
   1.336 +            uprv_memcpy(dup, src, n);
   1.337 +            dup[n] = 0;
   1.338 +        }
   1.339 +    }
   1.340 +
   1.341 +    return dup;
   1.342 +}

mercurial