intl/icu/source/common/cstring.c

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rw-r--r--

Correct previous dual key logic pending first delivery installment.

michael@0 1 /*
michael@0 2 ******************************************************************************
michael@0 3 *
michael@0 4 * Copyright (C) 1997-2011, International Business Machines
michael@0 5 * Corporation and others. All Rights Reserved.
michael@0 6 *
michael@0 7 ******************************************************************************
michael@0 8 *
michael@0 9 * File CSTRING.C
michael@0 10 *
michael@0 11 * @author Helena Shih
michael@0 12 *
michael@0 13 * Modification History:
michael@0 14 *
michael@0 15 * Date Name Description
michael@0 16 * 6/18/98 hshih Created
michael@0 17 * 09/08/98 stephen Added include for ctype, for Mac Port
michael@0 18 * 11/15/99 helena Integrated S/390 IEEE changes.
michael@0 19 ******************************************************************************
michael@0 20 */
michael@0 21
michael@0 22
michael@0 23
michael@0 24 #include <stdlib.h>
michael@0 25 #include <stdio.h>
michael@0 26 #include "unicode/utypes.h"
michael@0 27 #include "cmemory.h"
michael@0 28 #include "cstring.h"
michael@0 29 #include "uassert.h"
michael@0 30
michael@0 31 /*
michael@0 32 * We hardcode case conversion for invariant characters to match our expectation
michael@0 33 * and the compiler execution charset.
michael@0 34 * This prevents problems on systems
michael@0 35 * - with non-default casing behavior, like Turkish system locales where
michael@0 36 * tolower('I') maps to dotless i and toupper('i') maps to dotted I
michael@0 37 * - where there are no lowercase Latin characters at all, or using different
michael@0 38 * codes (some old EBCDIC codepages)
michael@0 39 *
michael@0 40 * This works because the compiler usually runs on a platform where the execution
michael@0 41 * charset includes all of the invariant characters at their expected
michael@0 42 * code positions, so that the char * string literals in ICU code match
michael@0 43 * the char literals here.
michael@0 44 *
michael@0 45 * Note that the set of lowercase Latin letters is discontiguous in EBCDIC
michael@0 46 * and the set of uppercase Latin letters is discontiguous as well.
michael@0 47 */
michael@0 48
michael@0 49 U_CAPI UBool U_EXPORT2
michael@0 50 uprv_isASCIILetter(char c) {
michael@0 51 #if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
michael@0 52 return
michael@0 53 ('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z') ||
michael@0 54 ('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z');
michael@0 55 #else
michael@0 56 return ('a'<=c && c<='z') || ('A'<=c && c<='Z');
michael@0 57 #endif
michael@0 58 }
michael@0 59
michael@0 60 U_CAPI char U_EXPORT2
michael@0 61 uprv_toupper(char c) {
michael@0 62 #if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
michael@0 63 if(('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z')) {
michael@0 64 c=(char)(c+('A'-'a'));
michael@0 65 }
michael@0 66 #else
michael@0 67 if('a'<=c && c<='z') {
michael@0 68 c=(char)(c+('A'-'a'));
michael@0 69 }
michael@0 70 #endif
michael@0 71 return c;
michael@0 72 }
michael@0 73
michael@0 74
michael@0 75 #if 0
michael@0 76 /*
michael@0 77 * Commented out because cstring.h defines uprv_tolower() to be
michael@0 78 * the same as either uprv_asciitolower() or uprv_ebcdictolower()
michael@0 79 * to reduce the amount of code to cover with tests.
michael@0 80 *
michael@0 81 * Note that this uprv_tolower() definition is likely to work for most
michael@0 82 * charset families, not just ASCII and EBCDIC, because its #else branch
michael@0 83 * is written generically.
michael@0 84 */
michael@0 85 U_CAPI char U_EXPORT2
michael@0 86 uprv_tolower(char c) {
michael@0 87 #if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
michael@0 88 if(('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z')) {
michael@0 89 c=(char)(c+('a'-'A'));
michael@0 90 }
michael@0 91 #else
michael@0 92 if('A'<=c && c<='Z') {
michael@0 93 c=(char)(c+('a'-'A'));
michael@0 94 }
michael@0 95 #endif
michael@0 96 return c;
michael@0 97 }
michael@0 98 #endif
michael@0 99
michael@0 100 U_CAPI char U_EXPORT2
michael@0 101 uprv_asciitolower(char c) {
michael@0 102 if(0x41<=c && c<=0x5a) {
michael@0 103 c=(char)(c+0x20);
michael@0 104 }
michael@0 105 return c;
michael@0 106 }
michael@0 107
michael@0 108 U_CAPI char U_EXPORT2
michael@0 109 uprv_ebcdictolower(char c) {
michael@0 110 if( (0xc1<=(uint8_t)c && (uint8_t)c<=0xc9) ||
michael@0 111 (0xd1<=(uint8_t)c && (uint8_t)c<=0xd9) ||
michael@0 112 (0xe2<=(uint8_t)c && (uint8_t)c<=0xe9)
michael@0 113 ) {
michael@0 114 c=(char)(c-0x40);
michael@0 115 }
michael@0 116 return c;
michael@0 117 }
michael@0 118
michael@0 119
michael@0 120 U_CAPI char* U_EXPORT2
michael@0 121 T_CString_toLowerCase(char* str)
michael@0 122 {
michael@0 123 char* origPtr = str;
michael@0 124
michael@0 125 if (str) {
michael@0 126 do
michael@0 127 *str = (char)uprv_tolower(*str);
michael@0 128 while (*(str++));
michael@0 129 }
michael@0 130
michael@0 131 return origPtr;
michael@0 132 }
michael@0 133
michael@0 134 U_CAPI char* U_EXPORT2
michael@0 135 T_CString_toUpperCase(char* str)
michael@0 136 {
michael@0 137 char* origPtr = str;
michael@0 138
michael@0 139 if (str) {
michael@0 140 do
michael@0 141 *str = (char)uprv_toupper(*str);
michael@0 142 while (*(str++));
michael@0 143 }
michael@0 144
michael@0 145 return origPtr;
michael@0 146 }
michael@0 147
michael@0 148 /*
michael@0 149 * Takes a int32_t and fills in a char* string with that number "radix"-based.
michael@0 150 * Does not handle negative values (makes an empty string for them).
michael@0 151 * Writes at most 12 chars ("-2147483647" plus NUL).
michael@0 152 * Returns the length of the string (not including the NUL).
michael@0 153 */
michael@0 154 U_CAPI int32_t U_EXPORT2
michael@0 155 T_CString_integerToString(char* buffer, int32_t v, int32_t radix)
michael@0 156 {
michael@0 157 char tbuf[30];
michael@0 158 int32_t tbx = sizeof(tbuf);
michael@0 159 uint8_t digit;
michael@0 160 int32_t length = 0;
michael@0 161 uint32_t uval;
michael@0 162
michael@0 163 U_ASSERT(radix>=2 && radix<=16);
michael@0 164 uval = (uint32_t) v;
michael@0 165 if(v<0 && radix == 10) {
michael@0 166 /* Only in base 10 do we conside numbers to be signed. */
michael@0 167 uval = (uint32_t)(-v);
michael@0 168 buffer[length++] = '-';
michael@0 169 }
michael@0 170
michael@0 171 tbx = sizeof(tbuf)-1;
michael@0 172 tbuf[tbx] = 0; /* We are generating the digits backwards. Null term the end. */
michael@0 173 do {
michael@0 174 digit = (uint8_t)(uval % radix);
michael@0 175 tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
michael@0 176 uval = uval / radix;
michael@0 177 } while (uval != 0);
michael@0 178
michael@0 179 /* copy converted number into user buffer */
michael@0 180 uprv_strcpy(buffer+length, tbuf+tbx);
michael@0 181 length += sizeof(tbuf) - tbx -1;
michael@0 182 return length;
michael@0 183 }
michael@0 184
michael@0 185
michael@0 186
michael@0 187 /*
michael@0 188 * Takes a int64_t and fills in a char* string with that number "radix"-based.
michael@0 189 * Writes at most 21: chars ("-9223372036854775807" plus NUL).
michael@0 190 * Returns the length of the string, not including the terminating NULL.
michael@0 191 */
michael@0 192 U_CAPI int32_t U_EXPORT2
michael@0 193 T_CString_int64ToString(char* buffer, int64_t v, uint32_t radix)
michael@0 194 {
michael@0 195 char tbuf[30];
michael@0 196 int32_t tbx = sizeof(tbuf);
michael@0 197 uint8_t digit;
michael@0 198 int32_t length = 0;
michael@0 199 uint64_t uval;
michael@0 200
michael@0 201 U_ASSERT(radix>=2 && radix<=16);
michael@0 202 uval = (uint64_t) v;
michael@0 203 if(v<0 && radix == 10) {
michael@0 204 /* Only in base 10 do we conside numbers to be signed. */
michael@0 205 uval = (uint64_t)(-v);
michael@0 206 buffer[length++] = '-';
michael@0 207 }
michael@0 208
michael@0 209 tbx = sizeof(tbuf)-1;
michael@0 210 tbuf[tbx] = 0; /* We are generating the digits backwards. Null term the end. */
michael@0 211 do {
michael@0 212 digit = (uint8_t)(uval % radix);
michael@0 213 tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
michael@0 214 uval = uval / radix;
michael@0 215 } while (uval != 0);
michael@0 216
michael@0 217 /* copy converted number into user buffer */
michael@0 218 uprv_strcpy(buffer+length, tbuf+tbx);
michael@0 219 length += sizeof(tbuf) - tbx -1;
michael@0 220 return length;
michael@0 221 }
michael@0 222
michael@0 223
michael@0 224 U_CAPI int32_t U_EXPORT2
michael@0 225 T_CString_stringToInteger(const char *integerString, int32_t radix)
michael@0 226 {
michael@0 227 char *end;
michael@0 228 return uprv_strtoul(integerString, &end, radix);
michael@0 229
michael@0 230 }
michael@0 231
michael@0 232 U_CAPI int U_EXPORT2
michael@0 233 uprv_stricmp(const char *str1, const char *str2) {
michael@0 234 if(str1==NULL) {
michael@0 235 if(str2==NULL) {
michael@0 236 return 0;
michael@0 237 } else {
michael@0 238 return -1;
michael@0 239 }
michael@0 240 } else if(str2==NULL) {
michael@0 241 return 1;
michael@0 242 } else {
michael@0 243 /* compare non-NULL strings lexically with lowercase */
michael@0 244 int rc;
michael@0 245 unsigned char c1, c2;
michael@0 246
michael@0 247 for(;;) {
michael@0 248 c1=(unsigned char)*str1;
michael@0 249 c2=(unsigned char)*str2;
michael@0 250 if(c1==0) {
michael@0 251 if(c2==0) {
michael@0 252 return 0;
michael@0 253 } else {
michael@0 254 return -1;
michael@0 255 }
michael@0 256 } else if(c2==0) {
michael@0 257 return 1;
michael@0 258 } else {
michael@0 259 /* compare non-zero characters with lowercase */
michael@0 260 rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
michael@0 261 if(rc!=0) {
michael@0 262 return rc;
michael@0 263 }
michael@0 264 }
michael@0 265 ++str1;
michael@0 266 ++str2;
michael@0 267 }
michael@0 268 }
michael@0 269 }
michael@0 270
michael@0 271 U_CAPI int U_EXPORT2
michael@0 272 uprv_strnicmp(const char *str1, const char *str2, uint32_t n) {
michael@0 273 if(str1==NULL) {
michael@0 274 if(str2==NULL) {
michael@0 275 return 0;
michael@0 276 } else {
michael@0 277 return -1;
michael@0 278 }
michael@0 279 } else if(str2==NULL) {
michael@0 280 return 1;
michael@0 281 } else {
michael@0 282 /* compare non-NULL strings lexically with lowercase */
michael@0 283 int rc;
michael@0 284 unsigned char c1, c2;
michael@0 285
michael@0 286 for(; n--;) {
michael@0 287 c1=(unsigned char)*str1;
michael@0 288 c2=(unsigned char)*str2;
michael@0 289 if(c1==0) {
michael@0 290 if(c2==0) {
michael@0 291 return 0;
michael@0 292 } else {
michael@0 293 return -1;
michael@0 294 }
michael@0 295 } else if(c2==0) {
michael@0 296 return 1;
michael@0 297 } else {
michael@0 298 /* compare non-zero characters with lowercase */
michael@0 299 rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
michael@0 300 if(rc!=0) {
michael@0 301 return rc;
michael@0 302 }
michael@0 303 }
michael@0 304 ++str1;
michael@0 305 ++str2;
michael@0 306 }
michael@0 307 }
michael@0 308
michael@0 309 return 0;
michael@0 310 }
michael@0 311
michael@0 312 U_CAPI char* U_EXPORT2
michael@0 313 uprv_strdup(const char *src) {
michael@0 314 size_t len = uprv_strlen(src) + 1;
michael@0 315 char *dup = (char *) uprv_malloc(len);
michael@0 316
michael@0 317 if (dup) {
michael@0 318 uprv_memcpy(dup, src, len);
michael@0 319 }
michael@0 320
michael@0 321 return dup;
michael@0 322 }
michael@0 323
michael@0 324 U_CAPI char* U_EXPORT2
michael@0 325 uprv_strndup(const char *src, int32_t n) {
michael@0 326 char *dup;
michael@0 327
michael@0 328 if(n < 0) {
michael@0 329 dup = uprv_strdup(src);
michael@0 330 } else {
michael@0 331 dup = (char*)uprv_malloc(n+1);
michael@0 332 if (dup) {
michael@0 333 uprv_memcpy(dup, src, n);
michael@0 334 dup[n] = 0;
michael@0 335 }
michael@0 336 }
michael@0 337
michael@0 338 return dup;
michael@0 339 }

mercurial