intl/icu/source/common/cstring.c

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /*
     2 ******************************************************************************
     3 *
     4 *   Copyright (C) 1997-2011, International Business Machines
     5 *   Corporation and others.  All Rights Reserved.
     6 *
     7 ******************************************************************************
     8 *
     9 * File CSTRING.C
    10 *
    11 * @author       Helena Shih
    12 *
    13 * Modification History:
    14 *
    15 *   Date        Name        Description
    16 *   6/18/98     hshih       Created
    17 *   09/08/98    stephen     Added include for ctype, for Mac Port
    18 *   11/15/99    helena      Integrated S/390 IEEE changes. 
    19 ******************************************************************************
    20 */
    24 #include <stdlib.h>
    25 #include <stdio.h>
    26 #include "unicode/utypes.h"
    27 #include "cmemory.h"
    28 #include "cstring.h"
    29 #include "uassert.h"
    31 /*
    32  * We hardcode case conversion for invariant characters to match our expectation
    33  * and the compiler execution charset.
    34  * This prevents problems on systems
    35  * - with non-default casing behavior, like Turkish system locales where
    36  *   tolower('I') maps to dotless i and toupper('i') maps to dotted I
    37  * - where there are no lowercase Latin characters at all, or using different
    38  *   codes (some old EBCDIC codepages)
    39  *
    40  * This works because the compiler usually runs on a platform where the execution
    41  * charset includes all of the invariant characters at their expected
    42  * code positions, so that the char * string literals in ICU code match
    43  * the char literals here.
    44  *
    45  * Note that the set of lowercase Latin letters is discontiguous in EBCDIC
    46  * and the set of uppercase Latin letters is discontiguous as well.
    47  */
    49 U_CAPI UBool U_EXPORT2
    50 uprv_isASCIILetter(char c) {
    51 #if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
    52     return
    53         ('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z') ||
    54         ('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z');
    55 #else
    56     return ('a'<=c && c<='z') || ('A'<=c && c<='Z');
    57 #endif
    58 }
    60 U_CAPI char U_EXPORT2
    61 uprv_toupper(char c) {
    62 #if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
    63     if(('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z')) {
    64         c=(char)(c+('A'-'a'));
    65     }
    66 #else
    67     if('a'<=c && c<='z') {
    68         c=(char)(c+('A'-'a'));
    69     }
    70 #endif
    71     return c;
    72 }
    75 #if 0
    76 /*
    77  * Commented out because cstring.h defines uprv_tolower() to be
    78  * the same as either uprv_asciitolower() or uprv_ebcdictolower()
    79  * to reduce the amount of code to cover with tests.
    80  *
    81  * Note that this uprv_tolower() definition is likely to work for most
    82  * charset families, not just ASCII and EBCDIC, because its #else branch
    83  * is written generically.
    84  */
    85 U_CAPI char U_EXPORT2
    86 uprv_tolower(char c) {
    87 #if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
    88     if(('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z')) {
    89         c=(char)(c+('a'-'A'));
    90     }
    91 #else
    92     if('A'<=c && c<='Z') {
    93         c=(char)(c+('a'-'A'));
    94     }
    95 #endif
    96     return c;
    97 }
    98 #endif
   100 U_CAPI char U_EXPORT2
   101 uprv_asciitolower(char c) {
   102     if(0x41<=c && c<=0x5a) {
   103         c=(char)(c+0x20);
   104     }
   105     return c;
   106 }
   108 U_CAPI char U_EXPORT2
   109 uprv_ebcdictolower(char c) {
   110     if( (0xc1<=(uint8_t)c && (uint8_t)c<=0xc9) ||
   111         (0xd1<=(uint8_t)c && (uint8_t)c<=0xd9) ||
   112         (0xe2<=(uint8_t)c && (uint8_t)c<=0xe9)
   113     ) {
   114         c=(char)(c-0x40);
   115     }
   116     return c;
   117 }
   120 U_CAPI char* U_EXPORT2
   121 T_CString_toLowerCase(char* str)
   122 {
   123     char* origPtr = str;
   125     if (str) {
   126         do
   127             *str = (char)uprv_tolower(*str);
   128         while (*(str++));
   129     }
   131     return origPtr;
   132 }
   134 U_CAPI char* U_EXPORT2
   135 T_CString_toUpperCase(char* str)
   136 {
   137     char* origPtr = str;
   139     if (str) {
   140         do
   141             *str = (char)uprv_toupper(*str);
   142         while (*(str++));
   143     }
   145     return origPtr;
   146 }
   148 /*
   149  * Takes a int32_t and fills in  a char* string with that number "radix"-based.
   150  * Does not handle negative values (makes an empty string for them).
   151  * Writes at most 12 chars ("-2147483647" plus NUL).
   152  * Returns the length of the string (not including the NUL).
   153  */
   154 U_CAPI int32_t U_EXPORT2
   155 T_CString_integerToString(char* buffer, int32_t v, int32_t radix)
   156 {
   157     char      tbuf[30];
   158     int32_t   tbx    = sizeof(tbuf);
   159     uint8_t   digit;
   160     int32_t   length = 0;
   161     uint32_t  uval;
   163     U_ASSERT(radix>=2 && radix<=16);
   164     uval = (uint32_t) v;
   165     if(v<0 && radix == 10) {
   166         /* Only in base 10 do we conside numbers to be signed. */
   167         uval = (uint32_t)(-v); 
   168         buffer[length++] = '-';
   169     }
   171     tbx = sizeof(tbuf)-1;
   172     tbuf[tbx] = 0;   /* We are generating the digits backwards.  Null term the end. */
   173     do {
   174         digit = (uint8_t)(uval % radix);
   175         tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
   176         uval  = uval / radix;
   177     } while (uval != 0);
   179     /* copy converted number into user buffer  */
   180     uprv_strcpy(buffer+length, tbuf+tbx);
   181     length += sizeof(tbuf) - tbx -1;
   182     return length;
   183 }
   187 /*
   188  * Takes a int64_t and fills in  a char* string with that number "radix"-based.
   189  * Writes at most 21: chars ("-9223372036854775807" plus NUL).
   190  * Returns the length of the string, not including the terminating NULL.
   191  */
   192 U_CAPI int32_t U_EXPORT2
   193 T_CString_int64ToString(char* buffer, int64_t v, uint32_t radix)
   194 {
   195     char      tbuf[30];
   196     int32_t   tbx    = sizeof(tbuf);
   197     uint8_t   digit;
   198     int32_t   length = 0;
   199     uint64_t  uval;
   201     U_ASSERT(radix>=2 && radix<=16);
   202     uval = (uint64_t) v;
   203     if(v<0 && radix == 10) {
   204         /* Only in base 10 do we conside numbers to be signed. */
   205         uval = (uint64_t)(-v); 
   206         buffer[length++] = '-';
   207     }
   209     tbx = sizeof(tbuf)-1;
   210     tbuf[tbx] = 0;   /* We are generating the digits backwards.  Null term the end. */
   211     do {
   212         digit = (uint8_t)(uval % radix);
   213         tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
   214         uval  = uval / radix;
   215     } while (uval != 0);
   217     /* copy converted number into user buffer  */
   218     uprv_strcpy(buffer+length, tbuf+tbx);
   219     length += sizeof(tbuf) - tbx -1;
   220     return length;
   221 }
   224 U_CAPI int32_t U_EXPORT2
   225 T_CString_stringToInteger(const char *integerString, int32_t radix)
   226 {
   227     char *end;
   228     return uprv_strtoul(integerString, &end, radix);
   230 }
   232 U_CAPI int U_EXPORT2
   233 uprv_stricmp(const char *str1, const char *str2) {
   234     if(str1==NULL) {
   235         if(str2==NULL) {
   236             return 0;
   237         } else {
   238             return -1;
   239         }
   240     } else if(str2==NULL) {
   241         return 1;
   242     } else {
   243         /* compare non-NULL strings lexically with lowercase */
   244         int rc;
   245         unsigned char c1, c2;
   247         for(;;) {
   248             c1=(unsigned char)*str1;
   249             c2=(unsigned char)*str2;
   250             if(c1==0) {
   251                 if(c2==0) {
   252                     return 0;
   253                 } else {
   254                     return -1;
   255                 }
   256             } else if(c2==0) {
   257                 return 1;
   258             } else {
   259                 /* compare non-zero characters with lowercase */
   260                 rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
   261                 if(rc!=0) {
   262                     return rc;
   263                 }
   264             }
   265             ++str1;
   266             ++str2;
   267         }
   268     }
   269 }
   271 U_CAPI int U_EXPORT2
   272 uprv_strnicmp(const char *str1, const char *str2, uint32_t n) {
   273     if(str1==NULL) {
   274         if(str2==NULL) {
   275             return 0;
   276         } else {
   277             return -1;
   278         }
   279     } else if(str2==NULL) {
   280         return 1;
   281     } else {
   282         /* compare non-NULL strings lexically with lowercase */
   283         int rc;
   284         unsigned char c1, c2;
   286         for(; n--;) {
   287             c1=(unsigned char)*str1;
   288             c2=(unsigned char)*str2;
   289             if(c1==0) {
   290                 if(c2==0) {
   291                     return 0;
   292                 } else {
   293                     return -1;
   294                 }
   295             } else if(c2==0) {
   296                 return 1;
   297             } else {
   298                 /* compare non-zero characters with lowercase */
   299                 rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
   300                 if(rc!=0) {
   301                     return rc;
   302                 }
   303             }
   304             ++str1;
   305             ++str2;
   306         }
   307     }
   309     return 0;
   310 }
   312 U_CAPI char* U_EXPORT2
   313 uprv_strdup(const char *src) {
   314     size_t len = uprv_strlen(src) + 1;
   315     char *dup = (char *) uprv_malloc(len);
   317     if (dup) {
   318         uprv_memcpy(dup, src, len);
   319     }
   321     return dup;
   322 }
   324 U_CAPI char* U_EXPORT2
   325 uprv_strndup(const char *src, int32_t n) {
   326     char *dup;
   328     if(n < 0) {
   329         dup = uprv_strdup(src);
   330     } else {
   331         dup = (char*)uprv_malloc(n+1);
   332         if (dup) { 
   333             uprv_memcpy(dup, src, n);
   334             dup[n] = 0;
   335         }
   336     }
   338     return dup;
   339 }

mercurial