1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/cstring.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,339 @@ 1.4 +/* 1.5 +****************************************************************************** 1.6 +* 1.7 +* Copyright (C) 1997-2011, International Business Machines 1.8 +* Corporation and others. All Rights Reserved. 1.9 +* 1.10 +****************************************************************************** 1.11 +* 1.12 +* File CSTRING.C 1.13 +* 1.14 +* @author Helena Shih 1.15 +* 1.16 +* Modification History: 1.17 +* 1.18 +* Date Name Description 1.19 +* 6/18/98 hshih Created 1.20 +* 09/08/98 stephen Added include for ctype, for Mac Port 1.21 +* 11/15/99 helena Integrated S/390 IEEE changes. 1.22 +****************************************************************************** 1.23 +*/ 1.24 + 1.25 + 1.26 + 1.27 +#include <stdlib.h> 1.28 +#include <stdio.h> 1.29 +#include "unicode/utypes.h" 1.30 +#include "cmemory.h" 1.31 +#include "cstring.h" 1.32 +#include "uassert.h" 1.33 + 1.34 +/* 1.35 + * We hardcode case conversion for invariant characters to match our expectation 1.36 + * and the compiler execution charset. 1.37 + * This prevents problems on systems 1.38 + * - with non-default casing behavior, like Turkish system locales where 1.39 + * tolower('I') maps to dotless i and toupper('i') maps to dotted I 1.40 + * - where there are no lowercase Latin characters at all, or using different 1.41 + * codes (some old EBCDIC codepages) 1.42 + * 1.43 + * This works because the compiler usually runs on a platform where the execution 1.44 + * charset includes all of the invariant characters at their expected 1.45 + * code positions, so that the char * string literals in ICU code match 1.46 + * the char literals here. 1.47 + * 1.48 + * Note that the set of lowercase Latin letters is discontiguous in EBCDIC 1.49 + * and the set of uppercase Latin letters is discontiguous as well. 1.50 + */ 1.51 + 1.52 +U_CAPI UBool U_EXPORT2 1.53 +uprv_isASCIILetter(char c) { 1.54 +#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY 1.55 + return 1.56 + ('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z') || 1.57 + ('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z'); 1.58 +#else 1.59 + return ('a'<=c && c<='z') || ('A'<=c && c<='Z'); 1.60 +#endif 1.61 +} 1.62 + 1.63 +U_CAPI char U_EXPORT2 1.64 +uprv_toupper(char c) { 1.65 +#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY 1.66 + if(('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z')) { 1.67 + c=(char)(c+('A'-'a')); 1.68 + } 1.69 +#else 1.70 + if('a'<=c && c<='z') { 1.71 + c=(char)(c+('A'-'a')); 1.72 + } 1.73 +#endif 1.74 + return c; 1.75 +} 1.76 + 1.77 + 1.78 +#if 0 1.79 +/* 1.80 + * Commented out because cstring.h defines uprv_tolower() to be 1.81 + * the same as either uprv_asciitolower() or uprv_ebcdictolower() 1.82 + * to reduce the amount of code to cover with tests. 1.83 + * 1.84 + * Note that this uprv_tolower() definition is likely to work for most 1.85 + * charset families, not just ASCII and EBCDIC, because its #else branch 1.86 + * is written generically. 1.87 + */ 1.88 +U_CAPI char U_EXPORT2 1.89 +uprv_tolower(char c) { 1.90 +#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY 1.91 + if(('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z')) { 1.92 + c=(char)(c+('a'-'A')); 1.93 + } 1.94 +#else 1.95 + if('A'<=c && c<='Z') { 1.96 + c=(char)(c+('a'-'A')); 1.97 + } 1.98 +#endif 1.99 + return c; 1.100 +} 1.101 +#endif 1.102 + 1.103 +U_CAPI char U_EXPORT2 1.104 +uprv_asciitolower(char c) { 1.105 + if(0x41<=c && c<=0x5a) { 1.106 + c=(char)(c+0x20); 1.107 + } 1.108 + return c; 1.109 +} 1.110 + 1.111 +U_CAPI char U_EXPORT2 1.112 +uprv_ebcdictolower(char c) { 1.113 + if( (0xc1<=(uint8_t)c && (uint8_t)c<=0xc9) || 1.114 + (0xd1<=(uint8_t)c && (uint8_t)c<=0xd9) || 1.115 + (0xe2<=(uint8_t)c && (uint8_t)c<=0xe9) 1.116 + ) { 1.117 + c=(char)(c-0x40); 1.118 + } 1.119 + return c; 1.120 +} 1.121 + 1.122 + 1.123 +U_CAPI char* U_EXPORT2 1.124 +T_CString_toLowerCase(char* str) 1.125 +{ 1.126 + char* origPtr = str; 1.127 + 1.128 + if (str) { 1.129 + do 1.130 + *str = (char)uprv_tolower(*str); 1.131 + while (*(str++)); 1.132 + } 1.133 + 1.134 + return origPtr; 1.135 +} 1.136 + 1.137 +U_CAPI char* U_EXPORT2 1.138 +T_CString_toUpperCase(char* str) 1.139 +{ 1.140 + char* origPtr = str; 1.141 + 1.142 + if (str) { 1.143 + do 1.144 + *str = (char)uprv_toupper(*str); 1.145 + while (*(str++)); 1.146 + } 1.147 + 1.148 + return origPtr; 1.149 +} 1.150 + 1.151 +/* 1.152 + * Takes a int32_t and fills in a char* string with that number "radix"-based. 1.153 + * Does not handle negative values (makes an empty string for them). 1.154 + * Writes at most 12 chars ("-2147483647" plus NUL). 1.155 + * Returns the length of the string (not including the NUL). 1.156 + */ 1.157 +U_CAPI int32_t U_EXPORT2 1.158 +T_CString_integerToString(char* buffer, int32_t v, int32_t radix) 1.159 +{ 1.160 + char tbuf[30]; 1.161 + int32_t tbx = sizeof(tbuf); 1.162 + uint8_t digit; 1.163 + int32_t length = 0; 1.164 + uint32_t uval; 1.165 + 1.166 + U_ASSERT(radix>=2 && radix<=16); 1.167 + uval = (uint32_t) v; 1.168 + if(v<0 && radix == 10) { 1.169 + /* Only in base 10 do we conside numbers to be signed. */ 1.170 + uval = (uint32_t)(-v); 1.171 + buffer[length++] = '-'; 1.172 + } 1.173 + 1.174 + tbx = sizeof(tbuf)-1; 1.175 + tbuf[tbx] = 0; /* We are generating the digits backwards. Null term the end. */ 1.176 + do { 1.177 + digit = (uint8_t)(uval % radix); 1.178 + tbuf[--tbx] = (char)(T_CString_itosOffset(digit)); 1.179 + uval = uval / radix; 1.180 + } while (uval != 0); 1.181 + 1.182 + /* copy converted number into user buffer */ 1.183 + uprv_strcpy(buffer+length, tbuf+tbx); 1.184 + length += sizeof(tbuf) - tbx -1; 1.185 + return length; 1.186 +} 1.187 + 1.188 + 1.189 + 1.190 +/* 1.191 + * Takes a int64_t and fills in a char* string with that number "radix"-based. 1.192 + * Writes at most 21: chars ("-9223372036854775807" plus NUL). 1.193 + * Returns the length of the string, not including the terminating NULL. 1.194 + */ 1.195 +U_CAPI int32_t U_EXPORT2 1.196 +T_CString_int64ToString(char* buffer, int64_t v, uint32_t radix) 1.197 +{ 1.198 + char tbuf[30]; 1.199 + int32_t tbx = sizeof(tbuf); 1.200 + uint8_t digit; 1.201 + int32_t length = 0; 1.202 + uint64_t uval; 1.203 + 1.204 + U_ASSERT(radix>=2 && radix<=16); 1.205 + uval = (uint64_t) v; 1.206 + if(v<0 && radix == 10) { 1.207 + /* Only in base 10 do we conside numbers to be signed. */ 1.208 + uval = (uint64_t)(-v); 1.209 + buffer[length++] = '-'; 1.210 + } 1.211 + 1.212 + tbx = sizeof(tbuf)-1; 1.213 + tbuf[tbx] = 0; /* We are generating the digits backwards. Null term the end. */ 1.214 + do { 1.215 + digit = (uint8_t)(uval % radix); 1.216 + tbuf[--tbx] = (char)(T_CString_itosOffset(digit)); 1.217 + uval = uval / radix; 1.218 + } while (uval != 0); 1.219 + 1.220 + /* copy converted number into user buffer */ 1.221 + uprv_strcpy(buffer+length, tbuf+tbx); 1.222 + length += sizeof(tbuf) - tbx -1; 1.223 + return length; 1.224 +} 1.225 + 1.226 + 1.227 +U_CAPI int32_t U_EXPORT2 1.228 +T_CString_stringToInteger(const char *integerString, int32_t radix) 1.229 +{ 1.230 + char *end; 1.231 + return uprv_strtoul(integerString, &end, radix); 1.232 + 1.233 +} 1.234 + 1.235 +U_CAPI int U_EXPORT2 1.236 +uprv_stricmp(const char *str1, const char *str2) { 1.237 + if(str1==NULL) { 1.238 + if(str2==NULL) { 1.239 + return 0; 1.240 + } else { 1.241 + return -1; 1.242 + } 1.243 + } else if(str2==NULL) { 1.244 + return 1; 1.245 + } else { 1.246 + /* compare non-NULL strings lexically with lowercase */ 1.247 + int rc; 1.248 + unsigned char c1, c2; 1.249 + 1.250 + for(;;) { 1.251 + c1=(unsigned char)*str1; 1.252 + c2=(unsigned char)*str2; 1.253 + if(c1==0) { 1.254 + if(c2==0) { 1.255 + return 0; 1.256 + } else { 1.257 + return -1; 1.258 + } 1.259 + } else if(c2==0) { 1.260 + return 1; 1.261 + } else { 1.262 + /* compare non-zero characters with lowercase */ 1.263 + rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2); 1.264 + if(rc!=0) { 1.265 + return rc; 1.266 + } 1.267 + } 1.268 + ++str1; 1.269 + ++str2; 1.270 + } 1.271 + } 1.272 +} 1.273 + 1.274 +U_CAPI int U_EXPORT2 1.275 +uprv_strnicmp(const char *str1, const char *str2, uint32_t n) { 1.276 + if(str1==NULL) { 1.277 + if(str2==NULL) { 1.278 + return 0; 1.279 + } else { 1.280 + return -1; 1.281 + } 1.282 + } else if(str2==NULL) { 1.283 + return 1; 1.284 + } else { 1.285 + /* compare non-NULL strings lexically with lowercase */ 1.286 + int rc; 1.287 + unsigned char c1, c2; 1.288 + 1.289 + for(; n--;) { 1.290 + c1=(unsigned char)*str1; 1.291 + c2=(unsigned char)*str2; 1.292 + if(c1==0) { 1.293 + if(c2==0) { 1.294 + return 0; 1.295 + } else { 1.296 + return -1; 1.297 + } 1.298 + } else if(c2==0) { 1.299 + return 1; 1.300 + } else { 1.301 + /* compare non-zero characters with lowercase */ 1.302 + rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2); 1.303 + if(rc!=0) { 1.304 + return rc; 1.305 + } 1.306 + } 1.307 + ++str1; 1.308 + ++str2; 1.309 + } 1.310 + } 1.311 + 1.312 + return 0; 1.313 +} 1.314 + 1.315 +U_CAPI char* U_EXPORT2 1.316 +uprv_strdup(const char *src) { 1.317 + size_t len = uprv_strlen(src) + 1; 1.318 + char *dup = (char *) uprv_malloc(len); 1.319 + 1.320 + if (dup) { 1.321 + uprv_memcpy(dup, src, len); 1.322 + } 1.323 + 1.324 + return dup; 1.325 +} 1.326 + 1.327 +U_CAPI char* U_EXPORT2 1.328 +uprv_strndup(const char *src, int32_t n) { 1.329 + char *dup; 1.330 + 1.331 + if(n < 0) { 1.332 + dup = uprv_strdup(src); 1.333 + } else { 1.334 + dup = (char*)uprv_malloc(n+1); 1.335 + if (dup) { 1.336 + uprv_memcpy(dup, src, n); 1.337 + dup[n] = 0; 1.338 + } 1.339 + } 1.340 + 1.341 + return dup; 1.342 +}