intl/icu/source/common/uinvchar.c

Sat, 03 Jan 2015 20:18:00 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Sat, 03 Jan 2015 20:18:00 +0100
branch
TOR_BUG_3246
changeset 7
129ffea94266
permissions
-rw-r--r--

Conditionally enable double key logic according to:
private browsing mode or privacy.thirdparty.isolate preference and
implement in GetCookieStringCommon and FindCookie where it counts...
With some reservations of how to convince FindCookie users to test
condition and pass a nullptr when disabling double key logic.

michael@0 1 /*
michael@0 2 *******************************************************************************
michael@0 3 *
michael@0 4 * Copyright (C) 1999-2010, International Business Machines
michael@0 5 * Corporation and others. All Rights Reserved.
michael@0 6 *
michael@0 7 *******************************************************************************
michael@0 8 * file name: uinvchar.c
michael@0 9 * encoding: US-ASCII
michael@0 10 * tab size: 8 (not used)
michael@0 11 * indentation:2
michael@0 12 *
michael@0 13 * created on: 2004sep14
michael@0 14 * created by: Markus W. Scherer
michael@0 15 *
michael@0 16 * Functions for handling invariant characters, moved here from putil.c
michael@0 17 * for better modularization.
michael@0 18 */
michael@0 19
michael@0 20 #include "unicode/utypes.h"
michael@0 21 #include "unicode/ustring.h"
michael@0 22 #include "udataswp.h"
michael@0 23 #include "cstring.h"
michael@0 24 #include "cmemory.h"
michael@0 25 #include "uassert.h"
michael@0 26 #include "uinvchar.h"
michael@0 27
michael@0 28 /* invariant-character handling --------------------------------------------- */
michael@0 29
michael@0 30 /*
michael@0 31 * These maps for ASCII to/from EBCDIC map invariant characters (see utypes.h)
michael@0 32 * appropriately for most EBCDIC codepages.
michael@0 33 *
michael@0 34 * They currently also map most other ASCII graphic characters,
michael@0 35 * appropriately for codepages 37 and 1047.
michael@0 36 * Exceptions: The characters for []^ have different codes in 37 & 1047.
michael@0 37 * Both versions are mapped to ASCII.
michael@0 38 *
michael@0 39 * ASCII 37 1047
michael@0 40 * [ 5B BA AD
michael@0 41 * ] 5D BB BD
michael@0 42 * ^ 5E B0 5F
michael@0 43 *
michael@0 44 * There are no mappings for variant characters from Unicode to EBCDIC.
michael@0 45 *
michael@0 46 * Currently, C0 control codes are also included in these maps.
michael@0 47 * Exceptions: S/390 Open Edition swaps LF and NEL codes compared with other
michael@0 48 * EBCDIC platforms; both codes (15 and 25) are mapped to ASCII LF (0A),
michael@0 49 * but there is no mapping for ASCII LF back to EBCDIC.
michael@0 50 *
michael@0 51 * ASCII EBCDIC S/390-OE
michael@0 52 * LF 0A 25 15
michael@0 53 * NEL 85 15 25
michael@0 54 *
michael@0 55 * The maps below explicitly exclude the variant
michael@0 56 * control and graphical characters that are in ASCII-based
michael@0 57 * codepages at 0x80 and above.
michael@0 58 * "No mapping" is expressed by mapping to a 00 byte.
michael@0 59 *
michael@0 60 * These tables do not establish a converter or a codepage.
michael@0 61 */
michael@0 62
michael@0 63 static const uint8_t asciiFromEbcdic[256]={
michael@0 64 0x00, 0x01, 0x02, 0x03, 0x00, 0x09, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
michael@0 65 0x10, 0x11, 0x12, 0x13, 0x00, 0x0a, 0x08, 0x00, 0x18, 0x19, 0x00, 0x00, 0x1c, 0x1d, 0x1e, 0x1f,
michael@0 66 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x17, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x06, 0x07,
michael@0 67 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14, 0x15, 0x00, 0x1a,
michael@0 68
michael@0 69 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x3c, 0x28, 0x2b, 0x7c,
michael@0 70 0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e,
michael@0 71 0x2d, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x25, 0x5f, 0x3e, 0x3f,
michael@0 72 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22,
michael@0 73
michael@0 74 0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
michael@0 75 0x00, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
michael@0 76 0x00, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x5b, 0x00, 0x00,
michael@0 77 0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5b, 0x5d, 0x00, 0x5d, 0x00, 0x00,
michael@0 78
michael@0 79 0x7b, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
michael@0 80 0x7d, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
michael@0 81 0x5c, 0x00, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
michael@0 82 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
michael@0 83 };
michael@0 84
michael@0 85 static const uint8_t ebcdicFromAscii[256]={
michael@0 86 0x00, 0x01, 0x02, 0x03, 0x37, 0x2d, 0x2e, 0x2f, 0x16, 0x05, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
michael@0 87 0x10, 0x11, 0x12, 0x13, 0x3c, 0x3d, 0x32, 0x26, 0x18, 0x19, 0x3f, 0x27, 0x1c, 0x1d, 0x1e, 0x1f,
michael@0 88 0x40, 0x00, 0x7f, 0x00, 0x00, 0x6c, 0x50, 0x7d, 0x4d, 0x5d, 0x5c, 0x4e, 0x6b, 0x60, 0x4b, 0x61,
michael@0 89 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0x7a, 0x5e, 0x4c, 0x7e, 0x6e, 0x6f,
michael@0 90
michael@0 91 0x00, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6,
michael@0 92 0xd7, 0xd8, 0xd9, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0x00, 0x00, 0x00, 0x00, 0x6d,
michael@0 93 0x00, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
michael@0 94 0x97, 0x98, 0x99, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0x00, 0x00, 0x00, 0x00, 0x07,
michael@0 95
michael@0 96 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
michael@0 97 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
michael@0 98 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
michael@0 99 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
michael@0 100
michael@0 101 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
michael@0 102 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
michael@0 103 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
michael@0 104 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
michael@0 105 };
michael@0 106
michael@0 107 /* Same as asciiFromEbcdic[] except maps all letters to lowercase. */
michael@0 108 static const uint8_t lowercaseAsciiFromEbcdic[256]={
michael@0 109 0x00, 0x01, 0x02, 0x03, 0x00, 0x09, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
michael@0 110 0x10, 0x11, 0x12, 0x13, 0x00, 0x0a, 0x08, 0x00, 0x18, 0x19, 0x00, 0x00, 0x1c, 0x1d, 0x1e, 0x1f,
michael@0 111 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x17, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x06, 0x07,
michael@0 112 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14, 0x15, 0x00, 0x1a,
michael@0 113
michael@0 114 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x3c, 0x28, 0x2b, 0x7c,
michael@0 115 0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e,
michael@0 116 0x2d, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x25, 0x5f, 0x3e, 0x3f,
michael@0 117 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22,
michael@0 118
michael@0 119 0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
michael@0 120 0x00, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
michael@0 121 0x00, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x5b, 0x00, 0x00,
michael@0 122 0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5b, 0x5d, 0x00, 0x5d, 0x00, 0x00,
michael@0 123
michael@0 124 0x7b, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
michael@0 125 0x7d, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
michael@0 126 0x7c, 0x00, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
michael@0 127 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
michael@0 128 };
michael@0 129
michael@0 130 /*
michael@0 131 * Bit sets indicating which characters of the ASCII repertoire
michael@0 132 * (by ASCII/Unicode code) are "invariant".
michael@0 133 * See utypes.h for more details.
michael@0 134 *
michael@0 135 * As invariant are considered the characters of the ASCII repertoire except
michael@0 136 * for the following:
michael@0 137 * 21 '!' <exclamation mark>
michael@0 138 * 23 '#' <number sign>
michael@0 139 * 24 '$' <dollar sign>
michael@0 140 *
michael@0 141 * 40 '@' <commercial at>
michael@0 142 *
michael@0 143 * 5b '[' <left bracket>
michael@0 144 * 5c '\' <backslash>
michael@0 145 * 5d ']' <right bracket>
michael@0 146 * 5e '^' <circumflex>
michael@0 147 *
michael@0 148 * 60 '`' <grave accent>
michael@0 149 *
michael@0 150 * 7b '{' <left brace>
michael@0 151 * 7c '|' <vertical line>
michael@0 152 * 7d '}' <right brace>
michael@0 153 * 7e '~' <tilde>
michael@0 154 */
michael@0 155 static const uint32_t invariantChars[4]={
michael@0 156 0xfffffbff, /* 00..1f but not 0a */
michael@0 157 0xffffffe5, /* 20..3f but not 21 23 24 */
michael@0 158 0x87fffffe, /* 40..5f but not 40 5b..5e */
michael@0 159 0x87fffffe /* 60..7f but not 60 7b..7e */
michael@0 160 };
michael@0 161
michael@0 162 /*
michael@0 163 * test unsigned types (or values known to be non-negative) for invariant characters,
michael@0 164 * tests ASCII-family character values
michael@0 165 */
michael@0 166 #define UCHAR_IS_INVARIANT(c) (((c)<=0x7f) && (invariantChars[(c)>>5]&((uint32_t)1<<((c)&0x1f)))!=0)
michael@0 167
michael@0 168 /* test signed types for invariant characters, adds test for positive values */
michael@0 169 #define SCHAR_IS_INVARIANT(c) ((0<=(c)) && UCHAR_IS_INVARIANT(c))
michael@0 170
michael@0 171 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
michael@0 172 #define CHAR_TO_UCHAR(c) c
michael@0 173 #define UCHAR_TO_CHAR(c) c
michael@0 174 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
michael@0 175 #define CHAR_TO_UCHAR(u) asciiFromEbcdic[u]
michael@0 176 #define UCHAR_TO_CHAR(u) ebcdicFromAscii[u]
michael@0 177 #else
michael@0 178 # error U_CHARSET_FAMILY is not valid
michael@0 179 #endif
michael@0 180
michael@0 181
michael@0 182 U_CAPI void U_EXPORT2
michael@0 183 u_charsToUChars(const char *cs, UChar *us, int32_t length) {
michael@0 184 UChar u;
michael@0 185 uint8_t c;
michael@0 186
michael@0 187 /*
michael@0 188 * Allow the entire ASCII repertoire to be mapped _to_ Unicode.
michael@0 189 * For EBCDIC systems, this works for characters with codes from
michael@0 190 * codepages 37 and 1047 or compatible.
michael@0 191 */
michael@0 192 while(length>0) {
michael@0 193 c=(uint8_t)(*cs++);
michael@0 194 u=(UChar)CHAR_TO_UCHAR(c);
michael@0 195 U_ASSERT((u!=0 || c==0)); /* only invariant chars converted? */
michael@0 196 *us++=u;
michael@0 197 --length;
michael@0 198 }
michael@0 199 }
michael@0 200
michael@0 201 U_CAPI void U_EXPORT2
michael@0 202 u_UCharsToChars(const UChar *us, char *cs, int32_t length) {
michael@0 203 UChar u;
michael@0 204
michael@0 205 while(length>0) {
michael@0 206 u=*us++;
michael@0 207 if(!UCHAR_IS_INVARIANT(u)) {
michael@0 208 U_ASSERT(FALSE); /* Variant characters were used. These are not portable in ICU. */
michael@0 209 u=0;
michael@0 210 }
michael@0 211 *cs++=(char)UCHAR_TO_CHAR(u);
michael@0 212 --length;
michael@0 213 }
michael@0 214 }
michael@0 215
michael@0 216 U_CAPI UBool U_EXPORT2
michael@0 217 uprv_isInvariantString(const char *s, int32_t length) {
michael@0 218 uint8_t c;
michael@0 219
michael@0 220 for(;;) {
michael@0 221 if(length<0) {
michael@0 222 /* NUL-terminated */
michael@0 223 c=(uint8_t)*s++;
michael@0 224 if(c==0) {
michael@0 225 break;
michael@0 226 }
michael@0 227 } else {
michael@0 228 /* count length */
michael@0 229 if(length==0) {
michael@0 230 break;
michael@0 231 }
michael@0 232 --length;
michael@0 233 c=(uint8_t)*s++;
michael@0 234 if(c==0) {
michael@0 235 continue; /* NUL is invariant */
michael@0 236 }
michael@0 237 }
michael@0 238 /* c!=0 now, one branch below checks c==0 for variant characters */
michael@0 239
michael@0 240 /*
michael@0 241 * no assertions here because these functions are legitimately called
michael@0 242 * for strings with variant characters
michael@0 243 */
michael@0 244 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
michael@0 245 if(!UCHAR_IS_INVARIANT(c)) {
michael@0 246 return FALSE; /* found a variant char */
michael@0 247 }
michael@0 248 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
michael@0 249 c=CHAR_TO_UCHAR(c);
michael@0 250 if(c==0 || !UCHAR_IS_INVARIANT(c)) {
michael@0 251 return FALSE; /* found a variant char */
michael@0 252 }
michael@0 253 #else
michael@0 254 # error U_CHARSET_FAMILY is not valid
michael@0 255 #endif
michael@0 256 }
michael@0 257 return TRUE;
michael@0 258 }
michael@0 259
michael@0 260 U_CAPI UBool U_EXPORT2
michael@0 261 uprv_isInvariantUString(const UChar *s, int32_t length) {
michael@0 262 UChar c;
michael@0 263
michael@0 264 for(;;) {
michael@0 265 if(length<0) {
michael@0 266 /* NUL-terminated */
michael@0 267 c=*s++;
michael@0 268 if(c==0) {
michael@0 269 break;
michael@0 270 }
michael@0 271 } else {
michael@0 272 /* count length */
michael@0 273 if(length==0) {
michael@0 274 break;
michael@0 275 }
michael@0 276 --length;
michael@0 277 c=*s++;
michael@0 278 }
michael@0 279
michael@0 280 /*
michael@0 281 * no assertions here because these functions are legitimately called
michael@0 282 * for strings with variant characters
michael@0 283 */
michael@0 284 if(!UCHAR_IS_INVARIANT(c)) {
michael@0 285 return FALSE; /* found a variant char */
michael@0 286 }
michael@0 287 }
michael@0 288 return TRUE;
michael@0 289 }
michael@0 290
michael@0 291 /* UDataSwapFn implementations used in udataswp.c ------- */
michael@0 292
michael@0 293 /* convert ASCII to EBCDIC and verify that all characters are invariant */
michael@0 294 U_CAPI int32_t U_EXPORT2
michael@0 295 uprv_ebcdicFromAscii(const UDataSwapper *ds,
michael@0 296 const void *inData, int32_t length, void *outData,
michael@0 297 UErrorCode *pErrorCode) {
michael@0 298 const uint8_t *s;
michael@0 299 uint8_t *t;
michael@0 300 uint8_t c;
michael@0 301
michael@0 302 int32_t count;
michael@0 303
michael@0 304 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
michael@0 305 return 0;
michael@0 306 }
michael@0 307 if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
michael@0 308 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 309 return 0;
michael@0 310 }
michael@0 311
michael@0 312 /* setup and swapping */
michael@0 313 s=(const uint8_t *)inData;
michael@0 314 t=(uint8_t *)outData;
michael@0 315 count=length;
michael@0 316 while(count>0) {
michael@0 317 c=*s++;
michael@0 318 if(!UCHAR_IS_INVARIANT(c)) {
michael@0 319 udata_printError(ds, "uprv_ebcdicFromAscii() string[%d] contains a variant character in position %d\n",
michael@0 320 length, length-count);
michael@0 321 *pErrorCode=U_INVALID_CHAR_FOUND;
michael@0 322 return 0;
michael@0 323 }
michael@0 324 *t++=ebcdicFromAscii[c];
michael@0 325 --count;
michael@0 326 }
michael@0 327
michael@0 328 return length;
michael@0 329 }
michael@0 330
michael@0 331 /* this function only checks and copies ASCII strings without conversion */
michael@0 332 U_CFUNC int32_t
michael@0 333 uprv_copyAscii(const UDataSwapper *ds,
michael@0 334 const void *inData, int32_t length, void *outData,
michael@0 335 UErrorCode *pErrorCode) {
michael@0 336 const uint8_t *s;
michael@0 337 uint8_t c;
michael@0 338
michael@0 339 int32_t count;
michael@0 340
michael@0 341 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
michael@0 342 return 0;
michael@0 343 }
michael@0 344 if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
michael@0 345 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 346 return 0;
michael@0 347 }
michael@0 348
michael@0 349 /* setup and checking */
michael@0 350 s=(const uint8_t *)inData;
michael@0 351 count=length;
michael@0 352 while(count>0) {
michael@0 353 c=*s++;
michael@0 354 if(!UCHAR_IS_INVARIANT(c)) {
michael@0 355 udata_printError(ds, "uprv_copyFromAscii() string[%d] contains a variant character in position %d\n",
michael@0 356 length, length-count);
michael@0 357 *pErrorCode=U_INVALID_CHAR_FOUND;
michael@0 358 return 0;
michael@0 359 }
michael@0 360 --count;
michael@0 361 }
michael@0 362
michael@0 363 if(length>0 && inData!=outData) {
michael@0 364 uprv_memcpy(outData, inData, length);
michael@0 365 }
michael@0 366
michael@0 367 return length;
michael@0 368 }
michael@0 369
michael@0 370 /* convert EBCDIC to ASCII and verify that all characters are invariant */
michael@0 371 U_CFUNC int32_t
michael@0 372 uprv_asciiFromEbcdic(const UDataSwapper *ds,
michael@0 373 const void *inData, int32_t length, void *outData,
michael@0 374 UErrorCode *pErrorCode) {
michael@0 375 const uint8_t *s;
michael@0 376 uint8_t *t;
michael@0 377 uint8_t c;
michael@0 378
michael@0 379 int32_t count;
michael@0 380
michael@0 381 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
michael@0 382 return 0;
michael@0 383 }
michael@0 384 if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
michael@0 385 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 386 return 0;
michael@0 387 }
michael@0 388
michael@0 389 /* setup and swapping */
michael@0 390 s=(const uint8_t *)inData;
michael@0 391 t=(uint8_t *)outData;
michael@0 392 count=length;
michael@0 393 while(count>0) {
michael@0 394 c=*s++;
michael@0 395 if(c!=0 && ((c=asciiFromEbcdic[c])==0 || !UCHAR_IS_INVARIANT(c))) {
michael@0 396 udata_printError(ds, "uprv_asciiFromEbcdic() string[%d] contains a variant character in position %d\n",
michael@0 397 length, length-count);
michael@0 398 *pErrorCode=U_INVALID_CHAR_FOUND;
michael@0 399 return 0;
michael@0 400 }
michael@0 401 *t++=c;
michael@0 402 --count;
michael@0 403 }
michael@0 404
michael@0 405 return length;
michael@0 406 }
michael@0 407
michael@0 408 /* this function only checks and copies EBCDIC strings without conversion */
michael@0 409 U_CFUNC int32_t
michael@0 410 uprv_copyEbcdic(const UDataSwapper *ds,
michael@0 411 const void *inData, int32_t length, void *outData,
michael@0 412 UErrorCode *pErrorCode) {
michael@0 413 const uint8_t *s;
michael@0 414 uint8_t c;
michael@0 415
michael@0 416 int32_t count;
michael@0 417
michael@0 418 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
michael@0 419 return 0;
michael@0 420 }
michael@0 421 if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
michael@0 422 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 423 return 0;
michael@0 424 }
michael@0 425
michael@0 426 /* setup and checking */
michael@0 427 s=(const uint8_t *)inData;
michael@0 428 count=length;
michael@0 429 while(count>0) {
michael@0 430 c=*s++;
michael@0 431 if(c!=0 && ((c=asciiFromEbcdic[c])==0 || !UCHAR_IS_INVARIANT(c))) {
michael@0 432 udata_printError(ds, "uprv_copyEbcdic() string[%] contains a variant character in position %d\n",
michael@0 433 length, length-count);
michael@0 434 *pErrorCode=U_INVALID_CHAR_FOUND;
michael@0 435 return 0;
michael@0 436 }
michael@0 437 --count;
michael@0 438 }
michael@0 439
michael@0 440 if(length>0 && inData!=outData) {
michael@0 441 uprv_memcpy(outData, inData, length);
michael@0 442 }
michael@0 443
michael@0 444 return length;
michael@0 445 }
michael@0 446
michael@0 447 /* compare invariant strings; variant characters compare less than others and unlike each other */
michael@0 448 U_CFUNC int32_t
michael@0 449 uprv_compareInvAscii(const UDataSwapper *ds,
michael@0 450 const char *outString, int32_t outLength,
michael@0 451 const UChar *localString, int32_t localLength) {
michael@0 452 int32_t minLength;
michael@0 453 UChar32 c1, c2;
michael@0 454 uint8_t c;
michael@0 455
michael@0 456 if(outString==NULL || outLength<-1 || localString==NULL || localLength<-1) {
michael@0 457 return 0;
michael@0 458 }
michael@0 459
michael@0 460 if(outLength<0) {
michael@0 461 outLength=(int32_t)uprv_strlen(outString);
michael@0 462 }
michael@0 463 if(localLength<0) {
michael@0 464 localLength=u_strlen(localString);
michael@0 465 }
michael@0 466
michael@0 467 minLength= outLength<localLength ? outLength : localLength;
michael@0 468
michael@0 469 while(minLength>0) {
michael@0 470 c=(uint8_t)*outString++;
michael@0 471 if(UCHAR_IS_INVARIANT(c)) {
michael@0 472 c1=c;
michael@0 473 } else {
michael@0 474 c1=-1;
michael@0 475 }
michael@0 476
michael@0 477 c2=*localString++;
michael@0 478 if(!UCHAR_IS_INVARIANT(c2)) {
michael@0 479 c2=-2;
michael@0 480 }
michael@0 481
michael@0 482 if((c1-=c2)!=0) {
michael@0 483 return c1;
michael@0 484 }
michael@0 485
michael@0 486 --minLength;
michael@0 487 }
michael@0 488
michael@0 489 /* strings start with same prefix, compare lengths */
michael@0 490 return outLength-localLength;
michael@0 491 }
michael@0 492
michael@0 493 U_CFUNC int32_t
michael@0 494 uprv_compareInvEbcdic(const UDataSwapper *ds,
michael@0 495 const char *outString, int32_t outLength,
michael@0 496 const UChar *localString, int32_t localLength) {
michael@0 497 int32_t minLength;
michael@0 498 UChar32 c1, c2;
michael@0 499 uint8_t c;
michael@0 500
michael@0 501 if(outString==NULL || outLength<-1 || localString==NULL || localLength<-1) {
michael@0 502 return 0;
michael@0 503 }
michael@0 504
michael@0 505 if(outLength<0) {
michael@0 506 outLength=(int32_t)uprv_strlen(outString);
michael@0 507 }
michael@0 508 if(localLength<0) {
michael@0 509 localLength=u_strlen(localString);
michael@0 510 }
michael@0 511
michael@0 512 minLength= outLength<localLength ? outLength : localLength;
michael@0 513
michael@0 514 while(minLength>0) {
michael@0 515 c=(uint8_t)*outString++;
michael@0 516 if(c==0) {
michael@0 517 c1=0;
michael@0 518 } else if((c1=asciiFromEbcdic[c])!=0 && UCHAR_IS_INVARIANT(c1)) {
michael@0 519 /* c1 is set */
michael@0 520 } else {
michael@0 521 c1=-1;
michael@0 522 }
michael@0 523
michael@0 524 c2=*localString++;
michael@0 525 if(!UCHAR_IS_INVARIANT(c2)) {
michael@0 526 c2=-2;
michael@0 527 }
michael@0 528
michael@0 529 if((c1-=c2)!=0) {
michael@0 530 return c1;
michael@0 531 }
michael@0 532
michael@0 533 --minLength;
michael@0 534 }
michael@0 535
michael@0 536 /* strings start with same prefix, compare lengths */
michael@0 537 return outLength-localLength;
michael@0 538 }
michael@0 539
michael@0 540 U_CAPI int32_t U_EXPORT2
michael@0 541 uprv_compareInvEbcdicAsAscii(const char *s1, const char *s2) {
michael@0 542 int32_t c1, c2;
michael@0 543
michael@0 544 for(;; ++s1, ++s2) {
michael@0 545 c1=(uint8_t)*s1;
michael@0 546 c2=(uint8_t)*s2;
michael@0 547 if(c1!=c2) {
michael@0 548 if(c1!=0 && ((c1=asciiFromEbcdic[c1])==0 || !UCHAR_IS_INVARIANT(c1))) {
michael@0 549 c1=-(int32_t)(uint8_t)*s1;
michael@0 550 }
michael@0 551 if(c2!=0 && ((c2=asciiFromEbcdic[c2])==0 || !UCHAR_IS_INVARIANT(c2))) {
michael@0 552 c2=-(int32_t)(uint8_t)*s2;
michael@0 553 }
michael@0 554 return c1-c2;
michael@0 555 } else if(c1==0) {
michael@0 556 return 0;
michael@0 557 }
michael@0 558 }
michael@0 559 }
michael@0 560
michael@0 561 U_CAPI char U_EXPORT2
michael@0 562 uprv_ebcdicToLowercaseAscii(char c) {
michael@0 563 return (char)lowercaseAsciiFromEbcdic[(uint8_t)c];
michael@0 564 }
michael@0 565
michael@0 566 U_INTERNAL uint8_t* U_EXPORT2
michael@0 567 uprv_aestrncpy(uint8_t *dst, const uint8_t *src, int32_t n)
michael@0 568 {
michael@0 569 uint8_t *orig_dst = dst;
michael@0 570
michael@0 571 if(n==-1) {
michael@0 572 n = uprv_strlen((const char*)src)+1; /* copy NUL */
michael@0 573 }
michael@0 574 /* copy non-null */
michael@0 575 while(*src && n>0) {
michael@0 576 *(dst++) = asciiFromEbcdic[*(src++)];
michael@0 577 n--;
michael@0 578 }
michael@0 579 /* pad */
michael@0 580 while(n>0) {
michael@0 581 *(dst++) = 0;
michael@0 582 n--;
michael@0 583 }
michael@0 584 return orig_dst;
michael@0 585 }
michael@0 586
michael@0 587 U_INTERNAL uint8_t* U_EXPORT2
michael@0 588 uprv_eastrncpy(uint8_t *dst, const uint8_t *src, int32_t n)
michael@0 589 {
michael@0 590 uint8_t *orig_dst = dst;
michael@0 591
michael@0 592 if(n==-1) {
michael@0 593 n = uprv_strlen((const char*)src)+1; /* copy NUL */
michael@0 594 }
michael@0 595 /* copy non-null */
michael@0 596 while(*src && n>0) {
michael@0 597 char ch = ebcdicFromAscii[*(src++)];
michael@0 598 if(ch == 0) {
michael@0 599 ch = ebcdicFromAscii[0x3f]; /* questionmark (subchar) */
michael@0 600 }
michael@0 601 *(dst++) = ch;
michael@0 602 n--;
michael@0 603 }
michael@0 604 /* pad */
michael@0 605 while(n>0) {
michael@0 606 *(dst++) = 0;
michael@0 607 n--;
michael@0 608 }
michael@0 609 return orig_dst;
michael@0 610 }
michael@0 611

mercurial