michael@0: /* michael@0: ****************************************************************************** michael@0: * michael@0: * Copyright (C) 1997-2011, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: * michael@0: ****************************************************************************** michael@0: * michael@0: * File CSTRING.C michael@0: * michael@0: * @author Helena Shih michael@0: * michael@0: * Modification History: michael@0: * michael@0: * Date Name Description michael@0: * 6/18/98 hshih Created michael@0: * 09/08/98 stephen Added include for ctype, for Mac Port michael@0: * 11/15/99 helena Integrated S/390 IEEE changes. michael@0: ****************************************************************************** michael@0: */ michael@0: michael@0: michael@0: michael@0: #include michael@0: #include michael@0: #include "unicode/utypes.h" michael@0: #include "cmemory.h" michael@0: #include "cstring.h" michael@0: #include "uassert.h" michael@0: michael@0: /* michael@0: * We hardcode case conversion for invariant characters to match our expectation michael@0: * and the compiler execution charset. michael@0: * This prevents problems on systems michael@0: * - with non-default casing behavior, like Turkish system locales where michael@0: * tolower('I') maps to dotless i and toupper('i') maps to dotted I michael@0: * - where there are no lowercase Latin characters at all, or using different michael@0: * codes (some old EBCDIC codepages) michael@0: * michael@0: * This works because the compiler usually runs on a platform where the execution michael@0: * charset includes all of the invariant characters at their expected michael@0: * code positions, so that the char * string literals in ICU code match michael@0: * the char literals here. michael@0: * michael@0: * Note that the set of lowercase Latin letters is discontiguous in EBCDIC michael@0: * and the set of uppercase Latin letters is discontiguous as well. michael@0: */ michael@0: michael@0: U_CAPI UBool U_EXPORT2 michael@0: uprv_isASCIILetter(char c) { michael@0: #if U_CHARSET_FAMILY==U_EBCDIC_FAMILY michael@0: return michael@0: ('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z') || michael@0: ('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z'); michael@0: #else michael@0: return ('a'<=c && c<='z') || ('A'<=c && c<='Z'); michael@0: #endif michael@0: } michael@0: michael@0: U_CAPI char U_EXPORT2 michael@0: uprv_toupper(char c) { michael@0: #if U_CHARSET_FAMILY==U_EBCDIC_FAMILY michael@0: if(('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z')) { michael@0: c=(char)(c+('A'-'a')); michael@0: } michael@0: #else michael@0: if('a'<=c && c<='z') { michael@0: c=(char)(c+('A'-'a')); michael@0: } michael@0: #endif michael@0: return c; michael@0: } michael@0: michael@0: michael@0: #if 0 michael@0: /* michael@0: * Commented out because cstring.h defines uprv_tolower() to be michael@0: * the same as either uprv_asciitolower() or uprv_ebcdictolower() michael@0: * to reduce the amount of code to cover with tests. michael@0: * michael@0: * Note that this uprv_tolower() definition is likely to work for most michael@0: * charset families, not just ASCII and EBCDIC, because its #else branch michael@0: * is written generically. michael@0: */ michael@0: U_CAPI char U_EXPORT2 michael@0: uprv_tolower(char c) { michael@0: #if U_CHARSET_FAMILY==U_EBCDIC_FAMILY michael@0: if(('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z')) { michael@0: c=(char)(c+('a'-'A')); michael@0: } michael@0: #else michael@0: if('A'<=c && c<='Z') { michael@0: c=(char)(c+('a'-'A')); michael@0: } michael@0: #endif michael@0: return c; michael@0: } michael@0: #endif michael@0: michael@0: U_CAPI char U_EXPORT2 michael@0: uprv_asciitolower(char c) { michael@0: if(0x41<=c && c<=0x5a) { michael@0: c=(char)(c+0x20); michael@0: } michael@0: return c; michael@0: } michael@0: michael@0: U_CAPI char U_EXPORT2 michael@0: uprv_ebcdictolower(char c) { michael@0: if( (0xc1<=(uint8_t)c && (uint8_t)c<=0xc9) || michael@0: (0xd1<=(uint8_t)c && (uint8_t)c<=0xd9) || michael@0: (0xe2<=(uint8_t)c && (uint8_t)c<=0xe9) michael@0: ) { michael@0: c=(char)(c-0x40); michael@0: } michael@0: return c; michael@0: } michael@0: michael@0: michael@0: U_CAPI char* U_EXPORT2 michael@0: T_CString_toLowerCase(char* str) michael@0: { michael@0: char* origPtr = str; michael@0: michael@0: if (str) { michael@0: do michael@0: *str = (char)uprv_tolower(*str); michael@0: while (*(str++)); michael@0: } michael@0: michael@0: return origPtr; michael@0: } michael@0: michael@0: U_CAPI char* U_EXPORT2 michael@0: T_CString_toUpperCase(char* str) michael@0: { michael@0: char* origPtr = str; michael@0: michael@0: if (str) { michael@0: do michael@0: *str = (char)uprv_toupper(*str); michael@0: while (*(str++)); michael@0: } michael@0: michael@0: return origPtr; michael@0: } michael@0: michael@0: /* michael@0: * Takes a int32_t and fills in a char* string with that number "radix"-based. michael@0: * Does not handle negative values (makes an empty string for them). michael@0: * Writes at most 12 chars ("-2147483647" plus NUL). michael@0: * Returns the length of the string (not including the NUL). michael@0: */ michael@0: U_CAPI int32_t U_EXPORT2 michael@0: T_CString_integerToString(char* buffer, int32_t v, int32_t radix) michael@0: { michael@0: char tbuf[30]; michael@0: int32_t tbx = sizeof(tbuf); michael@0: uint8_t digit; michael@0: int32_t length = 0; michael@0: uint32_t uval; michael@0: michael@0: U_ASSERT(radix>=2 && radix<=16); michael@0: uval = (uint32_t) v; michael@0: if(v<0 && radix == 10) { michael@0: /* Only in base 10 do we conside numbers to be signed. */ michael@0: uval = (uint32_t)(-v); michael@0: buffer[length++] = '-'; michael@0: } michael@0: michael@0: tbx = sizeof(tbuf)-1; michael@0: tbuf[tbx] = 0; /* We are generating the digits backwards. Null term the end. */ michael@0: do { michael@0: digit = (uint8_t)(uval % radix); michael@0: tbuf[--tbx] = (char)(T_CString_itosOffset(digit)); michael@0: uval = uval / radix; michael@0: } while (uval != 0); michael@0: michael@0: /* copy converted number into user buffer */ michael@0: uprv_strcpy(buffer+length, tbuf+tbx); michael@0: length += sizeof(tbuf) - tbx -1; michael@0: return length; michael@0: } michael@0: michael@0: michael@0: michael@0: /* michael@0: * Takes a int64_t and fills in a char* string with that number "radix"-based. michael@0: * Writes at most 21: chars ("-9223372036854775807" plus NUL). michael@0: * Returns the length of the string, not including the terminating NULL. michael@0: */ michael@0: U_CAPI int32_t U_EXPORT2 michael@0: T_CString_int64ToString(char* buffer, int64_t v, uint32_t radix) michael@0: { michael@0: char tbuf[30]; michael@0: int32_t tbx = sizeof(tbuf); michael@0: uint8_t digit; michael@0: int32_t length = 0; michael@0: uint64_t uval; michael@0: michael@0: U_ASSERT(radix>=2 && radix<=16); michael@0: uval = (uint64_t) v; michael@0: if(v<0 && radix == 10) { michael@0: /* Only in base 10 do we conside numbers to be signed. */ michael@0: uval = (uint64_t)(-v); michael@0: buffer[length++] = '-'; michael@0: } michael@0: michael@0: tbx = sizeof(tbuf)-1; michael@0: tbuf[tbx] = 0; /* We are generating the digits backwards. Null term the end. */ michael@0: do { michael@0: digit = (uint8_t)(uval % radix); michael@0: tbuf[--tbx] = (char)(T_CString_itosOffset(digit)); michael@0: uval = uval / radix; michael@0: } while (uval != 0); michael@0: michael@0: /* copy converted number into user buffer */ michael@0: uprv_strcpy(buffer+length, tbuf+tbx); michael@0: length += sizeof(tbuf) - tbx -1; michael@0: return length; michael@0: } michael@0: michael@0: michael@0: U_CAPI int32_t U_EXPORT2 michael@0: T_CString_stringToInteger(const char *integerString, int32_t radix) michael@0: { michael@0: char *end; michael@0: return uprv_strtoul(integerString, &end, radix); michael@0: michael@0: } michael@0: michael@0: U_CAPI int U_EXPORT2 michael@0: uprv_stricmp(const char *str1, const char *str2) { michael@0: if(str1==NULL) { michael@0: if(str2==NULL) { michael@0: return 0; michael@0: } else { michael@0: return -1; michael@0: } michael@0: } else if(str2==NULL) { michael@0: return 1; michael@0: } else { michael@0: /* compare non-NULL strings lexically with lowercase */ michael@0: int rc; michael@0: unsigned char c1, c2; michael@0: michael@0: for(;;) { michael@0: c1=(unsigned char)*str1; michael@0: c2=(unsigned char)*str2; michael@0: if(c1==0) { michael@0: if(c2==0) { michael@0: return 0; michael@0: } else { michael@0: return -1; michael@0: } michael@0: } else if(c2==0) { michael@0: return 1; michael@0: } else { michael@0: /* compare non-zero characters with lowercase */ michael@0: rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2); michael@0: if(rc!=0) { michael@0: return rc; michael@0: } michael@0: } michael@0: ++str1; michael@0: ++str2; michael@0: } michael@0: } michael@0: } michael@0: michael@0: U_CAPI int U_EXPORT2 michael@0: uprv_strnicmp(const char *str1, const char *str2, uint32_t n) { michael@0: if(str1==NULL) { michael@0: if(str2==NULL) { michael@0: return 0; michael@0: } else { michael@0: return -1; michael@0: } michael@0: } else if(str2==NULL) { michael@0: return 1; michael@0: } else { michael@0: /* compare non-NULL strings lexically with lowercase */ michael@0: int rc; michael@0: unsigned char c1, c2; michael@0: michael@0: for(; n--;) { michael@0: c1=(unsigned char)*str1; michael@0: c2=(unsigned char)*str2; michael@0: if(c1==0) { michael@0: if(c2==0) { michael@0: return 0; michael@0: } else { michael@0: return -1; michael@0: } michael@0: } else if(c2==0) { michael@0: return 1; michael@0: } else { michael@0: /* compare non-zero characters with lowercase */ michael@0: rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2); michael@0: if(rc!=0) { michael@0: return rc; michael@0: } michael@0: } michael@0: ++str1; michael@0: ++str2; michael@0: } michael@0: } michael@0: michael@0: return 0; michael@0: } michael@0: michael@0: U_CAPI char* U_EXPORT2 michael@0: uprv_strdup(const char *src) { michael@0: size_t len = uprv_strlen(src) + 1; michael@0: char *dup = (char *) uprv_malloc(len); michael@0: michael@0: if (dup) { michael@0: uprv_memcpy(dup, src, len); michael@0: } michael@0: michael@0: return dup; michael@0: } michael@0: michael@0: U_CAPI char* U_EXPORT2 michael@0: uprv_strndup(const char *src, int32_t n) { michael@0: char *dup; michael@0: michael@0: if(n < 0) { michael@0: dup = uprv_strdup(src); michael@0: } else { michael@0: dup = (char*)uprv_malloc(n+1); michael@0: if (dup) { michael@0: uprv_memcpy(dup, src, n); michael@0: dup[n] = 0; michael@0: } michael@0: } michael@0: michael@0: return dup; michael@0: }