michael@0: /*
michael@0: ******************************************************************************
michael@0: *
michael@0: *   Copyright (C) 1997-2011, International Business Machines
michael@0: *   Corporation and others.  All Rights Reserved.
michael@0: *
michael@0: ******************************************************************************
michael@0: *
michael@0: * File CSTRING.C
michael@0: *
michael@0: * @author       Helena Shih
michael@0: *
michael@0: * Modification History:
michael@0: *
michael@0: *   Date        Name        Description
michael@0: *   6/18/98     hshih       Created
michael@0: *   09/08/98    stephen     Added include for ctype, for Mac Port
michael@0: *   11/15/99    helena      Integrated S/390 IEEE changes. 
michael@0: ******************************************************************************
michael@0: */
michael@0: 
michael@0: 
michael@0: 
michael@0: #include <stdlib.h>
michael@0: #include <stdio.h>
michael@0: #include "unicode/utypes.h"
michael@0: #include "cmemory.h"
michael@0: #include "cstring.h"
michael@0: #include "uassert.h"
michael@0: 
michael@0: /*
michael@0:  * We hardcode case conversion for invariant characters to match our expectation
michael@0:  * and the compiler execution charset.
michael@0:  * This prevents problems on systems
michael@0:  * - with non-default casing behavior, like Turkish system locales where
michael@0:  *   tolower('I') maps to dotless i and toupper('i') maps to dotted I
michael@0:  * - where there are no lowercase Latin characters at all, or using different
michael@0:  *   codes (some old EBCDIC codepages)
michael@0:  *
michael@0:  * This works because the compiler usually runs on a platform where the execution
michael@0:  * charset includes all of the invariant characters at their expected
michael@0:  * code positions, so that the char * string literals in ICU code match
michael@0:  * the char literals here.
michael@0:  *
michael@0:  * Note that the set of lowercase Latin letters is discontiguous in EBCDIC
michael@0:  * and the set of uppercase Latin letters is discontiguous as well.
michael@0:  */
michael@0: 
michael@0: U_CAPI UBool U_EXPORT2
michael@0: uprv_isASCIILetter(char c) {
michael@0: #if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
michael@0:     return
michael@0:         ('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z') ||
michael@0:         ('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z');
michael@0: #else
michael@0:     return ('a'<=c && c<='z') || ('A'<=c && c<='Z');
michael@0: #endif
michael@0: }
michael@0: 
michael@0: U_CAPI char U_EXPORT2
michael@0: uprv_toupper(char c) {
michael@0: #if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
michael@0:     if(('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z')) {
michael@0:         c=(char)(c+('A'-'a'));
michael@0:     }
michael@0: #else
michael@0:     if('a'<=c && c<='z') {
michael@0:         c=(char)(c+('A'-'a'));
michael@0:     }
michael@0: #endif
michael@0:     return c;
michael@0: }
michael@0: 
michael@0: 
michael@0: #if 0
michael@0: /*
michael@0:  * Commented out because cstring.h defines uprv_tolower() to be
michael@0:  * the same as either uprv_asciitolower() or uprv_ebcdictolower()
michael@0:  * to reduce the amount of code to cover with tests.
michael@0:  *
michael@0:  * Note that this uprv_tolower() definition is likely to work for most
michael@0:  * charset families, not just ASCII and EBCDIC, because its #else branch
michael@0:  * is written generically.
michael@0:  */
michael@0: U_CAPI char U_EXPORT2
michael@0: uprv_tolower(char c) {
michael@0: #if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
michael@0:     if(('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z')) {
michael@0:         c=(char)(c+('a'-'A'));
michael@0:     }
michael@0: #else
michael@0:     if('A'<=c && c<='Z') {
michael@0:         c=(char)(c+('a'-'A'));
michael@0:     }
michael@0: #endif
michael@0:     return c;
michael@0: }
michael@0: #endif
michael@0: 
michael@0: U_CAPI char U_EXPORT2
michael@0: uprv_asciitolower(char c) {
michael@0:     if(0x41<=c && c<=0x5a) {
michael@0:         c=(char)(c+0x20);
michael@0:     }
michael@0:     return c;
michael@0: }
michael@0: 
michael@0: U_CAPI char U_EXPORT2
michael@0: uprv_ebcdictolower(char c) {
michael@0:     if( (0xc1<=(uint8_t)c && (uint8_t)c<=0xc9) ||
michael@0:         (0xd1<=(uint8_t)c && (uint8_t)c<=0xd9) ||
michael@0:         (0xe2<=(uint8_t)c && (uint8_t)c<=0xe9)
michael@0:     ) {
michael@0:         c=(char)(c-0x40);
michael@0:     }
michael@0:     return c;
michael@0: }
michael@0: 
michael@0: 
michael@0: U_CAPI char* U_EXPORT2
michael@0: T_CString_toLowerCase(char* str)
michael@0: {
michael@0:     char* origPtr = str;
michael@0: 
michael@0:     if (str) {
michael@0:         do
michael@0:             *str = (char)uprv_tolower(*str);
michael@0:         while (*(str++));
michael@0:     }
michael@0: 
michael@0:     return origPtr;
michael@0: }
michael@0: 
michael@0: U_CAPI char* U_EXPORT2
michael@0: T_CString_toUpperCase(char* str)
michael@0: {
michael@0:     char* origPtr = str;
michael@0: 
michael@0:     if (str) {
michael@0:         do
michael@0:             *str = (char)uprv_toupper(*str);
michael@0:         while (*(str++));
michael@0:     }
michael@0: 
michael@0:     return origPtr;
michael@0: }
michael@0: 
michael@0: /*
michael@0:  * Takes a int32_t and fills in  a char* string with that number "radix"-based.
michael@0:  * Does not handle negative values (makes an empty string for them).
michael@0:  * Writes at most 12 chars ("-2147483647" plus NUL).
michael@0:  * Returns the length of the string (not including the NUL).
michael@0:  */
michael@0: U_CAPI int32_t U_EXPORT2
michael@0: T_CString_integerToString(char* buffer, int32_t v, int32_t radix)
michael@0: {
michael@0:     char      tbuf[30];
michael@0:     int32_t   tbx    = sizeof(tbuf);
michael@0:     uint8_t   digit;
michael@0:     int32_t   length = 0;
michael@0:     uint32_t  uval;
michael@0:     
michael@0:     U_ASSERT(radix>=2 && radix<=16);
michael@0:     uval = (uint32_t) v;
michael@0:     if(v<0 && radix == 10) {
michael@0:         /* Only in base 10 do we conside numbers to be signed. */
michael@0:         uval = (uint32_t)(-v); 
michael@0:         buffer[length++] = '-';
michael@0:     }
michael@0:     
michael@0:     tbx = sizeof(tbuf)-1;
michael@0:     tbuf[tbx] = 0;   /* We are generating the digits backwards.  Null term the end. */
michael@0:     do {
michael@0:         digit = (uint8_t)(uval % radix);
michael@0:         tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
michael@0:         uval  = uval / radix;
michael@0:     } while (uval != 0);
michael@0:     
michael@0:     /* copy converted number into user buffer  */
michael@0:     uprv_strcpy(buffer+length, tbuf+tbx);
michael@0:     length += sizeof(tbuf) - tbx -1;
michael@0:     return length;
michael@0: }
michael@0: 
michael@0: 
michael@0: 
michael@0: /*
michael@0:  * Takes a int64_t and fills in  a char* string with that number "radix"-based.
michael@0:  * Writes at most 21: chars ("-9223372036854775807" plus NUL).
michael@0:  * Returns the length of the string, not including the terminating NULL.
michael@0:  */
michael@0: U_CAPI int32_t U_EXPORT2
michael@0: T_CString_int64ToString(char* buffer, int64_t v, uint32_t radix)
michael@0: {
michael@0:     char      tbuf[30];
michael@0:     int32_t   tbx    = sizeof(tbuf);
michael@0:     uint8_t   digit;
michael@0:     int32_t   length = 0;
michael@0:     uint64_t  uval;
michael@0:     
michael@0:     U_ASSERT(radix>=2 && radix<=16);
michael@0:     uval = (uint64_t) v;
michael@0:     if(v<0 && radix == 10) {
michael@0:         /* Only in base 10 do we conside numbers to be signed. */
michael@0:         uval = (uint64_t)(-v); 
michael@0:         buffer[length++] = '-';
michael@0:     }
michael@0:     
michael@0:     tbx = sizeof(tbuf)-1;
michael@0:     tbuf[tbx] = 0;   /* We are generating the digits backwards.  Null term the end. */
michael@0:     do {
michael@0:         digit = (uint8_t)(uval % radix);
michael@0:         tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
michael@0:         uval  = uval / radix;
michael@0:     } while (uval != 0);
michael@0:     
michael@0:     /* copy converted number into user buffer  */
michael@0:     uprv_strcpy(buffer+length, tbuf+tbx);
michael@0:     length += sizeof(tbuf) - tbx -1;
michael@0:     return length;
michael@0: }
michael@0: 
michael@0: 
michael@0: U_CAPI int32_t U_EXPORT2
michael@0: T_CString_stringToInteger(const char *integerString, int32_t radix)
michael@0: {
michael@0:     char *end;
michael@0:     return uprv_strtoul(integerString, &end, radix);
michael@0: 
michael@0: }
michael@0: 
michael@0: U_CAPI int U_EXPORT2
michael@0: uprv_stricmp(const char *str1, const char *str2) {
michael@0:     if(str1==NULL) {
michael@0:         if(str2==NULL) {
michael@0:             return 0;
michael@0:         } else {
michael@0:             return -1;
michael@0:         }
michael@0:     } else if(str2==NULL) {
michael@0:         return 1;
michael@0:     } else {
michael@0:         /* compare non-NULL strings lexically with lowercase */
michael@0:         int rc;
michael@0:         unsigned char c1, c2;
michael@0: 
michael@0:         for(;;) {
michael@0:             c1=(unsigned char)*str1;
michael@0:             c2=(unsigned char)*str2;
michael@0:             if(c1==0) {
michael@0:                 if(c2==0) {
michael@0:                     return 0;
michael@0:                 } else {
michael@0:                     return -1;
michael@0:                 }
michael@0:             } else if(c2==0) {
michael@0:                 return 1;
michael@0:             } else {
michael@0:                 /* compare non-zero characters with lowercase */
michael@0:                 rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
michael@0:                 if(rc!=0) {
michael@0:                     return rc;
michael@0:                 }
michael@0:             }
michael@0:             ++str1;
michael@0:             ++str2;
michael@0:         }
michael@0:     }
michael@0: }
michael@0: 
michael@0: U_CAPI int U_EXPORT2
michael@0: uprv_strnicmp(const char *str1, const char *str2, uint32_t n) {
michael@0:     if(str1==NULL) {
michael@0:         if(str2==NULL) {
michael@0:             return 0;
michael@0:         } else {
michael@0:             return -1;
michael@0:         }
michael@0:     } else if(str2==NULL) {
michael@0:         return 1;
michael@0:     } else {
michael@0:         /* compare non-NULL strings lexically with lowercase */
michael@0:         int rc;
michael@0:         unsigned char c1, c2;
michael@0: 
michael@0:         for(; n--;) {
michael@0:             c1=(unsigned char)*str1;
michael@0:             c2=(unsigned char)*str2;
michael@0:             if(c1==0) {
michael@0:                 if(c2==0) {
michael@0:                     return 0;
michael@0:                 } else {
michael@0:                     return -1;
michael@0:                 }
michael@0:             } else if(c2==0) {
michael@0:                 return 1;
michael@0:             } else {
michael@0:                 /* compare non-zero characters with lowercase */
michael@0:                 rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
michael@0:                 if(rc!=0) {
michael@0:                     return rc;
michael@0:                 }
michael@0:             }
michael@0:             ++str1;
michael@0:             ++str2;
michael@0:         }
michael@0:     }
michael@0: 
michael@0:     return 0;
michael@0: }
michael@0: 
michael@0: U_CAPI char* U_EXPORT2
michael@0: uprv_strdup(const char *src) {
michael@0:     size_t len = uprv_strlen(src) + 1;
michael@0:     char *dup = (char *) uprv_malloc(len);
michael@0: 
michael@0:     if (dup) {
michael@0:         uprv_memcpy(dup, src, len);
michael@0:     }
michael@0: 
michael@0:     return dup;
michael@0: }
michael@0: 
michael@0: U_CAPI char* U_EXPORT2
michael@0: uprv_strndup(const char *src, int32_t n) {
michael@0:     char *dup;
michael@0: 
michael@0:     if(n < 0) {
michael@0:         dup = uprv_strdup(src);
michael@0:     } else {
michael@0:         dup = (char*)uprv_malloc(n+1);
michael@0:         if (dup) { 
michael@0:             uprv_memcpy(dup, src, n);
michael@0:             dup[n] = 0;
michael@0:         }
michael@0:     }
michael@0: 
michael@0:     return dup;
michael@0: }