Sat, 03 Jan 2015 20:18:00 +0100
Conditionally enable double key logic according to:
private browsing mode or privacy.thirdparty.isolate preference and
implement in GetCookieStringCommon and FindCookie where it counts...
With some reservations of how to convince FindCookie users to test
condition and pass a nullptr when disabling double key logic.
michael@0 | 1 | /* |
michael@0 | 2 | ******************************************************************************* |
michael@0 | 3 | * |
michael@0 | 4 | * Copyright (C) 1999-2010, International Business Machines |
michael@0 | 5 | * Corporation and others. All Rights Reserved. |
michael@0 | 6 | * |
michael@0 | 7 | ******************************************************************************* |
michael@0 | 8 | * file name: uinvchar.c |
michael@0 | 9 | * encoding: US-ASCII |
michael@0 | 10 | * tab size: 8 (not used) |
michael@0 | 11 | * indentation:2 |
michael@0 | 12 | * |
michael@0 | 13 | * created on: 2004sep14 |
michael@0 | 14 | * created by: Markus W. Scherer |
michael@0 | 15 | * |
michael@0 | 16 | * Functions for handling invariant characters, moved here from putil.c |
michael@0 | 17 | * for better modularization. |
michael@0 | 18 | */ |
michael@0 | 19 | |
michael@0 | 20 | #include "unicode/utypes.h" |
michael@0 | 21 | #include "unicode/ustring.h" |
michael@0 | 22 | #include "udataswp.h" |
michael@0 | 23 | #include "cstring.h" |
michael@0 | 24 | #include "cmemory.h" |
michael@0 | 25 | #include "uassert.h" |
michael@0 | 26 | #include "uinvchar.h" |
michael@0 | 27 | |
michael@0 | 28 | /* invariant-character handling --------------------------------------------- */ |
michael@0 | 29 | |
michael@0 | 30 | /* |
michael@0 | 31 | * These maps for ASCII to/from EBCDIC map invariant characters (see utypes.h) |
michael@0 | 32 | * appropriately for most EBCDIC codepages. |
michael@0 | 33 | * |
michael@0 | 34 | * They currently also map most other ASCII graphic characters, |
michael@0 | 35 | * appropriately for codepages 37 and 1047. |
michael@0 | 36 | * Exceptions: The characters for []^ have different codes in 37 & 1047. |
michael@0 | 37 | * Both versions are mapped to ASCII. |
michael@0 | 38 | * |
michael@0 | 39 | * ASCII 37 1047 |
michael@0 | 40 | * [ 5B BA AD |
michael@0 | 41 | * ] 5D BB BD |
michael@0 | 42 | * ^ 5E B0 5F |
michael@0 | 43 | * |
michael@0 | 44 | * There are no mappings for variant characters from Unicode to EBCDIC. |
michael@0 | 45 | * |
michael@0 | 46 | * Currently, C0 control codes are also included in these maps. |
michael@0 | 47 | * Exceptions: S/390 Open Edition swaps LF and NEL codes compared with other |
michael@0 | 48 | * EBCDIC platforms; both codes (15 and 25) are mapped to ASCII LF (0A), |
michael@0 | 49 | * but there is no mapping for ASCII LF back to EBCDIC. |
michael@0 | 50 | * |
michael@0 | 51 | * ASCII EBCDIC S/390-OE |
michael@0 | 52 | * LF 0A 25 15 |
michael@0 | 53 | * NEL 85 15 25 |
michael@0 | 54 | * |
michael@0 | 55 | * The maps below explicitly exclude the variant |
michael@0 | 56 | * control and graphical characters that are in ASCII-based |
michael@0 | 57 | * codepages at 0x80 and above. |
michael@0 | 58 | * "No mapping" is expressed by mapping to a 00 byte. |
michael@0 | 59 | * |
michael@0 | 60 | * These tables do not establish a converter or a codepage. |
michael@0 | 61 | */ |
michael@0 | 62 | |
michael@0 | 63 | static const uint8_t asciiFromEbcdic[256]={ |
michael@0 | 64 | 0x00, 0x01, 0x02, 0x03, 0x00, 0x09, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, |
michael@0 | 65 | 0x10, 0x11, 0x12, 0x13, 0x00, 0x0a, 0x08, 0x00, 0x18, 0x19, 0x00, 0x00, 0x1c, 0x1d, 0x1e, 0x1f, |
michael@0 | 66 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x17, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x06, 0x07, |
michael@0 | 67 | 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14, 0x15, 0x00, 0x1a, |
michael@0 | 68 | |
michael@0 | 69 | 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x3c, 0x28, 0x2b, 0x7c, |
michael@0 | 70 | 0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e, |
michael@0 | 71 | 0x2d, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x25, 0x5f, 0x3e, 0x3f, |
michael@0 | 72 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22, |
michael@0 | 73 | |
michael@0 | 74 | 0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
michael@0 | 75 | 0x00, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
michael@0 | 76 | 0x00, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x5b, 0x00, 0x00, |
michael@0 | 77 | 0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5b, 0x5d, 0x00, 0x5d, 0x00, 0x00, |
michael@0 | 78 | |
michael@0 | 79 | 0x7b, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
michael@0 | 80 | 0x7d, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
michael@0 | 81 | 0x5c, 0x00, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
michael@0 | 82 | 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 |
michael@0 | 83 | }; |
michael@0 | 84 | |
michael@0 | 85 | static const uint8_t ebcdicFromAscii[256]={ |
michael@0 | 86 | 0x00, 0x01, 0x02, 0x03, 0x37, 0x2d, 0x2e, 0x2f, 0x16, 0x05, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, |
michael@0 | 87 | 0x10, 0x11, 0x12, 0x13, 0x3c, 0x3d, 0x32, 0x26, 0x18, 0x19, 0x3f, 0x27, 0x1c, 0x1d, 0x1e, 0x1f, |
michael@0 | 88 | 0x40, 0x00, 0x7f, 0x00, 0x00, 0x6c, 0x50, 0x7d, 0x4d, 0x5d, 0x5c, 0x4e, 0x6b, 0x60, 0x4b, 0x61, |
michael@0 | 89 | 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0x7a, 0x5e, 0x4c, 0x7e, 0x6e, 0x6f, |
michael@0 | 90 | |
michael@0 | 91 | 0x00, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, |
michael@0 | 92 | 0xd7, 0xd8, 0xd9, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0x00, 0x00, 0x00, 0x00, 0x6d, |
michael@0 | 93 | 0x00, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, |
michael@0 | 94 | 0x97, 0x98, 0x99, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0x00, 0x00, 0x00, 0x00, 0x07, |
michael@0 | 95 | |
michael@0 | 96 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
michael@0 | 97 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
michael@0 | 98 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
michael@0 | 99 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
michael@0 | 100 | |
michael@0 | 101 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
michael@0 | 102 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
michael@0 | 103 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
michael@0 | 104 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 |
michael@0 | 105 | }; |
michael@0 | 106 | |
michael@0 | 107 | /* Same as asciiFromEbcdic[] except maps all letters to lowercase. */ |
michael@0 | 108 | static const uint8_t lowercaseAsciiFromEbcdic[256]={ |
michael@0 | 109 | 0x00, 0x01, 0x02, 0x03, 0x00, 0x09, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, |
michael@0 | 110 | 0x10, 0x11, 0x12, 0x13, 0x00, 0x0a, 0x08, 0x00, 0x18, 0x19, 0x00, 0x00, 0x1c, 0x1d, 0x1e, 0x1f, |
michael@0 | 111 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x17, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x06, 0x07, |
michael@0 | 112 | 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14, 0x15, 0x00, 0x1a, |
michael@0 | 113 | |
michael@0 | 114 | 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x3c, 0x28, 0x2b, 0x7c, |
michael@0 | 115 | 0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e, |
michael@0 | 116 | 0x2d, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x25, 0x5f, 0x3e, 0x3f, |
michael@0 | 117 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22, |
michael@0 | 118 | |
michael@0 | 119 | 0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
michael@0 | 120 | 0x00, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
michael@0 | 121 | 0x00, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x5b, 0x00, 0x00, |
michael@0 | 122 | 0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5b, 0x5d, 0x00, 0x5d, 0x00, 0x00, |
michael@0 | 123 | |
michael@0 | 124 | 0x7b, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
michael@0 | 125 | 0x7d, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
michael@0 | 126 | 0x7c, 0x00, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
michael@0 | 127 | 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 |
michael@0 | 128 | }; |
michael@0 | 129 | |
michael@0 | 130 | /* |
michael@0 | 131 | * Bit sets indicating which characters of the ASCII repertoire |
michael@0 | 132 | * (by ASCII/Unicode code) are "invariant". |
michael@0 | 133 | * See utypes.h for more details. |
michael@0 | 134 | * |
michael@0 | 135 | * As invariant are considered the characters of the ASCII repertoire except |
michael@0 | 136 | * for the following: |
michael@0 | 137 | * 21 '!' <exclamation mark> |
michael@0 | 138 | * 23 '#' <number sign> |
michael@0 | 139 | * 24 '$' <dollar sign> |
michael@0 | 140 | * |
michael@0 | 141 | * 40 '@' <commercial at> |
michael@0 | 142 | * |
michael@0 | 143 | * 5b '[' <left bracket> |
michael@0 | 144 | * 5c '\' <backslash> |
michael@0 | 145 | * 5d ']' <right bracket> |
michael@0 | 146 | * 5e '^' <circumflex> |
michael@0 | 147 | * |
michael@0 | 148 | * 60 '`' <grave accent> |
michael@0 | 149 | * |
michael@0 | 150 | * 7b '{' <left brace> |
michael@0 | 151 | * 7c '|' <vertical line> |
michael@0 | 152 | * 7d '}' <right brace> |
michael@0 | 153 | * 7e '~' <tilde> |
michael@0 | 154 | */ |
michael@0 | 155 | static const uint32_t invariantChars[4]={ |
michael@0 | 156 | 0xfffffbff, /* 00..1f but not 0a */ |
michael@0 | 157 | 0xffffffe5, /* 20..3f but not 21 23 24 */ |
michael@0 | 158 | 0x87fffffe, /* 40..5f but not 40 5b..5e */ |
michael@0 | 159 | 0x87fffffe /* 60..7f but not 60 7b..7e */ |
michael@0 | 160 | }; |
michael@0 | 161 | |
michael@0 | 162 | /* |
michael@0 | 163 | * test unsigned types (or values known to be non-negative) for invariant characters, |
michael@0 | 164 | * tests ASCII-family character values |
michael@0 | 165 | */ |
michael@0 | 166 | #define UCHAR_IS_INVARIANT(c) (((c)<=0x7f) && (invariantChars[(c)>>5]&((uint32_t)1<<((c)&0x1f)))!=0) |
michael@0 | 167 | |
michael@0 | 168 | /* test signed types for invariant characters, adds test for positive values */ |
michael@0 | 169 | #define SCHAR_IS_INVARIANT(c) ((0<=(c)) && UCHAR_IS_INVARIANT(c)) |
michael@0 | 170 | |
michael@0 | 171 | #if U_CHARSET_FAMILY==U_ASCII_FAMILY |
michael@0 | 172 | #define CHAR_TO_UCHAR(c) c |
michael@0 | 173 | #define UCHAR_TO_CHAR(c) c |
michael@0 | 174 | #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY |
michael@0 | 175 | #define CHAR_TO_UCHAR(u) asciiFromEbcdic[u] |
michael@0 | 176 | #define UCHAR_TO_CHAR(u) ebcdicFromAscii[u] |
michael@0 | 177 | #else |
michael@0 | 178 | # error U_CHARSET_FAMILY is not valid |
michael@0 | 179 | #endif |
michael@0 | 180 | |
michael@0 | 181 | |
michael@0 | 182 | U_CAPI void U_EXPORT2 |
michael@0 | 183 | u_charsToUChars(const char *cs, UChar *us, int32_t length) { |
michael@0 | 184 | UChar u; |
michael@0 | 185 | uint8_t c; |
michael@0 | 186 | |
michael@0 | 187 | /* |
michael@0 | 188 | * Allow the entire ASCII repertoire to be mapped _to_ Unicode. |
michael@0 | 189 | * For EBCDIC systems, this works for characters with codes from |
michael@0 | 190 | * codepages 37 and 1047 or compatible. |
michael@0 | 191 | */ |
michael@0 | 192 | while(length>0) { |
michael@0 | 193 | c=(uint8_t)(*cs++); |
michael@0 | 194 | u=(UChar)CHAR_TO_UCHAR(c); |
michael@0 | 195 | U_ASSERT((u!=0 || c==0)); /* only invariant chars converted? */ |
michael@0 | 196 | *us++=u; |
michael@0 | 197 | --length; |
michael@0 | 198 | } |
michael@0 | 199 | } |
michael@0 | 200 | |
michael@0 | 201 | U_CAPI void U_EXPORT2 |
michael@0 | 202 | u_UCharsToChars(const UChar *us, char *cs, int32_t length) { |
michael@0 | 203 | UChar u; |
michael@0 | 204 | |
michael@0 | 205 | while(length>0) { |
michael@0 | 206 | u=*us++; |
michael@0 | 207 | if(!UCHAR_IS_INVARIANT(u)) { |
michael@0 | 208 | U_ASSERT(FALSE); /* Variant characters were used. These are not portable in ICU. */ |
michael@0 | 209 | u=0; |
michael@0 | 210 | } |
michael@0 | 211 | *cs++=(char)UCHAR_TO_CHAR(u); |
michael@0 | 212 | --length; |
michael@0 | 213 | } |
michael@0 | 214 | } |
michael@0 | 215 | |
michael@0 | 216 | U_CAPI UBool U_EXPORT2 |
michael@0 | 217 | uprv_isInvariantString(const char *s, int32_t length) { |
michael@0 | 218 | uint8_t c; |
michael@0 | 219 | |
michael@0 | 220 | for(;;) { |
michael@0 | 221 | if(length<0) { |
michael@0 | 222 | /* NUL-terminated */ |
michael@0 | 223 | c=(uint8_t)*s++; |
michael@0 | 224 | if(c==0) { |
michael@0 | 225 | break; |
michael@0 | 226 | } |
michael@0 | 227 | } else { |
michael@0 | 228 | /* count length */ |
michael@0 | 229 | if(length==0) { |
michael@0 | 230 | break; |
michael@0 | 231 | } |
michael@0 | 232 | --length; |
michael@0 | 233 | c=(uint8_t)*s++; |
michael@0 | 234 | if(c==0) { |
michael@0 | 235 | continue; /* NUL is invariant */ |
michael@0 | 236 | } |
michael@0 | 237 | } |
michael@0 | 238 | /* c!=0 now, one branch below checks c==0 for variant characters */ |
michael@0 | 239 | |
michael@0 | 240 | /* |
michael@0 | 241 | * no assertions here because these functions are legitimately called |
michael@0 | 242 | * for strings with variant characters |
michael@0 | 243 | */ |
michael@0 | 244 | #if U_CHARSET_FAMILY==U_ASCII_FAMILY |
michael@0 | 245 | if(!UCHAR_IS_INVARIANT(c)) { |
michael@0 | 246 | return FALSE; /* found a variant char */ |
michael@0 | 247 | } |
michael@0 | 248 | #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY |
michael@0 | 249 | c=CHAR_TO_UCHAR(c); |
michael@0 | 250 | if(c==0 || !UCHAR_IS_INVARIANT(c)) { |
michael@0 | 251 | return FALSE; /* found a variant char */ |
michael@0 | 252 | } |
michael@0 | 253 | #else |
michael@0 | 254 | # error U_CHARSET_FAMILY is not valid |
michael@0 | 255 | #endif |
michael@0 | 256 | } |
michael@0 | 257 | return TRUE; |
michael@0 | 258 | } |
michael@0 | 259 | |
michael@0 | 260 | U_CAPI UBool U_EXPORT2 |
michael@0 | 261 | uprv_isInvariantUString(const UChar *s, int32_t length) { |
michael@0 | 262 | UChar c; |
michael@0 | 263 | |
michael@0 | 264 | for(;;) { |
michael@0 | 265 | if(length<0) { |
michael@0 | 266 | /* NUL-terminated */ |
michael@0 | 267 | c=*s++; |
michael@0 | 268 | if(c==0) { |
michael@0 | 269 | break; |
michael@0 | 270 | } |
michael@0 | 271 | } else { |
michael@0 | 272 | /* count length */ |
michael@0 | 273 | if(length==0) { |
michael@0 | 274 | break; |
michael@0 | 275 | } |
michael@0 | 276 | --length; |
michael@0 | 277 | c=*s++; |
michael@0 | 278 | } |
michael@0 | 279 | |
michael@0 | 280 | /* |
michael@0 | 281 | * no assertions here because these functions are legitimately called |
michael@0 | 282 | * for strings with variant characters |
michael@0 | 283 | */ |
michael@0 | 284 | if(!UCHAR_IS_INVARIANT(c)) { |
michael@0 | 285 | return FALSE; /* found a variant char */ |
michael@0 | 286 | } |
michael@0 | 287 | } |
michael@0 | 288 | return TRUE; |
michael@0 | 289 | } |
michael@0 | 290 | |
michael@0 | 291 | /* UDataSwapFn implementations used in udataswp.c ------- */ |
michael@0 | 292 | |
michael@0 | 293 | /* convert ASCII to EBCDIC and verify that all characters are invariant */ |
michael@0 | 294 | U_CAPI int32_t U_EXPORT2 |
michael@0 | 295 | uprv_ebcdicFromAscii(const UDataSwapper *ds, |
michael@0 | 296 | const void *inData, int32_t length, void *outData, |
michael@0 | 297 | UErrorCode *pErrorCode) { |
michael@0 | 298 | const uint8_t *s; |
michael@0 | 299 | uint8_t *t; |
michael@0 | 300 | uint8_t c; |
michael@0 | 301 | |
michael@0 | 302 | int32_t count; |
michael@0 | 303 | |
michael@0 | 304 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
michael@0 | 305 | return 0; |
michael@0 | 306 | } |
michael@0 | 307 | if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) { |
michael@0 | 308 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
michael@0 | 309 | return 0; |
michael@0 | 310 | } |
michael@0 | 311 | |
michael@0 | 312 | /* setup and swapping */ |
michael@0 | 313 | s=(const uint8_t *)inData; |
michael@0 | 314 | t=(uint8_t *)outData; |
michael@0 | 315 | count=length; |
michael@0 | 316 | while(count>0) { |
michael@0 | 317 | c=*s++; |
michael@0 | 318 | if(!UCHAR_IS_INVARIANT(c)) { |
michael@0 | 319 | udata_printError(ds, "uprv_ebcdicFromAscii() string[%d] contains a variant character in position %d\n", |
michael@0 | 320 | length, length-count); |
michael@0 | 321 | *pErrorCode=U_INVALID_CHAR_FOUND; |
michael@0 | 322 | return 0; |
michael@0 | 323 | } |
michael@0 | 324 | *t++=ebcdicFromAscii[c]; |
michael@0 | 325 | --count; |
michael@0 | 326 | } |
michael@0 | 327 | |
michael@0 | 328 | return length; |
michael@0 | 329 | } |
michael@0 | 330 | |
michael@0 | 331 | /* this function only checks and copies ASCII strings without conversion */ |
michael@0 | 332 | U_CFUNC int32_t |
michael@0 | 333 | uprv_copyAscii(const UDataSwapper *ds, |
michael@0 | 334 | const void *inData, int32_t length, void *outData, |
michael@0 | 335 | UErrorCode *pErrorCode) { |
michael@0 | 336 | const uint8_t *s; |
michael@0 | 337 | uint8_t c; |
michael@0 | 338 | |
michael@0 | 339 | int32_t count; |
michael@0 | 340 | |
michael@0 | 341 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
michael@0 | 342 | return 0; |
michael@0 | 343 | } |
michael@0 | 344 | if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) { |
michael@0 | 345 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
michael@0 | 346 | return 0; |
michael@0 | 347 | } |
michael@0 | 348 | |
michael@0 | 349 | /* setup and checking */ |
michael@0 | 350 | s=(const uint8_t *)inData; |
michael@0 | 351 | count=length; |
michael@0 | 352 | while(count>0) { |
michael@0 | 353 | c=*s++; |
michael@0 | 354 | if(!UCHAR_IS_INVARIANT(c)) { |
michael@0 | 355 | udata_printError(ds, "uprv_copyFromAscii() string[%d] contains a variant character in position %d\n", |
michael@0 | 356 | length, length-count); |
michael@0 | 357 | *pErrorCode=U_INVALID_CHAR_FOUND; |
michael@0 | 358 | return 0; |
michael@0 | 359 | } |
michael@0 | 360 | --count; |
michael@0 | 361 | } |
michael@0 | 362 | |
michael@0 | 363 | if(length>0 && inData!=outData) { |
michael@0 | 364 | uprv_memcpy(outData, inData, length); |
michael@0 | 365 | } |
michael@0 | 366 | |
michael@0 | 367 | return length; |
michael@0 | 368 | } |
michael@0 | 369 | |
michael@0 | 370 | /* convert EBCDIC to ASCII and verify that all characters are invariant */ |
michael@0 | 371 | U_CFUNC int32_t |
michael@0 | 372 | uprv_asciiFromEbcdic(const UDataSwapper *ds, |
michael@0 | 373 | const void *inData, int32_t length, void *outData, |
michael@0 | 374 | UErrorCode *pErrorCode) { |
michael@0 | 375 | const uint8_t *s; |
michael@0 | 376 | uint8_t *t; |
michael@0 | 377 | uint8_t c; |
michael@0 | 378 | |
michael@0 | 379 | int32_t count; |
michael@0 | 380 | |
michael@0 | 381 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
michael@0 | 382 | return 0; |
michael@0 | 383 | } |
michael@0 | 384 | if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) { |
michael@0 | 385 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
michael@0 | 386 | return 0; |
michael@0 | 387 | } |
michael@0 | 388 | |
michael@0 | 389 | /* setup and swapping */ |
michael@0 | 390 | s=(const uint8_t *)inData; |
michael@0 | 391 | t=(uint8_t *)outData; |
michael@0 | 392 | count=length; |
michael@0 | 393 | while(count>0) { |
michael@0 | 394 | c=*s++; |
michael@0 | 395 | if(c!=0 && ((c=asciiFromEbcdic[c])==0 || !UCHAR_IS_INVARIANT(c))) { |
michael@0 | 396 | udata_printError(ds, "uprv_asciiFromEbcdic() string[%d] contains a variant character in position %d\n", |
michael@0 | 397 | length, length-count); |
michael@0 | 398 | *pErrorCode=U_INVALID_CHAR_FOUND; |
michael@0 | 399 | return 0; |
michael@0 | 400 | } |
michael@0 | 401 | *t++=c; |
michael@0 | 402 | --count; |
michael@0 | 403 | } |
michael@0 | 404 | |
michael@0 | 405 | return length; |
michael@0 | 406 | } |
michael@0 | 407 | |
michael@0 | 408 | /* this function only checks and copies EBCDIC strings without conversion */ |
michael@0 | 409 | U_CFUNC int32_t |
michael@0 | 410 | uprv_copyEbcdic(const UDataSwapper *ds, |
michael@0 | 411 | const void *inData, int32_t length, void *outData, |
michael@0 | 412 | UErrorCode *pErrorCode) { |
michael@0 | 413 | const uint8_t *s; |
michael@0 | 414 | uint8_t c; |
michael@0 | 415 | |
michael@0 | 416 | int32_t count; |
michael@0 | 417 | |
michael@0 | 418 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
michael@0 | 419 | return 0; |
michael@0 | 420 | } |
michael@0 | 421 | if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) { |
michael@0 | 422 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
michael@0 | 423 | return 0; |
michael@0 | 424 | } |
michael@0 | 425 | |
michael@0 | 426 | /* setup and checking */ |
michael@0 | 427 | s=(const uint8_t *)inData; |
michael@0 | 428 | count=length; |
michael@0 | 429 | while(count>0) { |
michael@0 | 430 | c=*s++; |
michael@0 | 431 | if(c!=0 && ((c=asciiFromEbcdic[c])==0 || !UCHAR_IS_INVARIANT(c))) { |
michael@0 | 432 | udata_printError(ds, "uprv_copyEbcdic() string[%] contains a variant character in position %d\n", |
michael@0 | 433 | length, length-count); |
michael@0 | 434 | *pErrorCode=U_INVALID_CHAR_FOUND; |
michael@0 | 435 | return 0; |
michael@0 | 436 | } |
michael@0 | 437 | --count; |
michael@0 | 438 | } |
michael@0 | 439 | |
michael@0 | 440 | if(length>0 && inData!=outData) { |
michael@0 | 441 | uprv_memcpy(outData, inData, length); |
michael@0 | 442 | } |
michael@0 | 443 | |
michael@0 | 444 | return length; |
michael@0 | 445 | } |
michael@0 | 446 | |
michael@0 | 447 | /* compare invariant strings; variant characters compare less than others and unlike each other */ |
michael@0 | 448 | U_CFUNC int32_t |
michael@0 | 449 | uprv_compareInvAscii(const UDataSwapper *ds, |
michael@0 | 450 | const char *outString, int32_t outLength, |
michael@0 | 451 | const UChar *localString, int32_t localLength) { |
michael@0 | 452 | int32_t minLength; |
michael@0 | 453 | UChar32 c1, c2; |
michael@0 | 454 | uint8_t c; |
michael@0 | 455 | |
michael@0 | 456 | if(outString==NULL || outLength<-1 || localString==NULL || localLength<-1) { |
michael@0 | 457 | return 0; |
michael@0 | 458 | } |
michael@0 | 459 | |
michael@0 | 460 | if(outLength<0) { |
michael@0 | 461 | outLength=(int32_t)uprv_strlen(outString); |
michael@0 | 462 | } |
michael@0 | 463 | if(localLength<0) { |
michael@0 | 464 | localLength=u_strlen(localString); |
michael@0 | 465 | } |
michael@0 | 466 | |
michael@0 | 467 | minLength= outLength<localLength ? outLength : localLength; |
michael@0 | 468 | |
michael@0 | 469 | while(minLength>0) { |
michael@0 | 470 | c=(uint8_t)*outString++; |
michael@0 | 471 | if(UCHAR_IS_INVARIANT(c)) { |
michael@0 | 472 | c1=c; |
michael@0 | 473 | } else { |
michael@0 | 474 | c1=-1; |
michael@0 | 475 | } |
michael@0 | 476 | |
michael@0 | 477 | c2=*localString++; |
michael@0 | 478 | if(!UCHAR_IS_INVARIANT(c2)) { |
michael@0 | 479 | c2=-2; |
michael@0 | 480 | } |
michael@0 | 481 | |
michael@0 | 482 | if((c1-=c2)!=0) { |
michael@0 | 483 | return c1; |
michael@0 | 484 | } |
michael@0 | 485 | |
michael@0 | 486 | --minLength; |
michael@0 | 487 | } |
michael@0 | 488 | |
michael@0 | 489 | /* strings start with same prefix, compare lengths */ |
michael@0 | 490 | return outLength-localLength; |
michael@0 | 491 | } |
michael@0 | 492 | |
michael@0 | 493 | U_CFUNC int32_t |
michael@0 | 494 | uprv_compareInvEbcdic(const UDataSwapper *ds, |
michael@0 | 495 | const char *outString, int32_t outLength, |
michael@0 | 496 | const UChar *localString, int32_t localLength) { |
michael@0 | 497 | int32_t minLength; |
michael@0 | 498 | UChar32 c1, c2; |
michael@0 | 499 | uint8_t c; |
michael@0 | 500 | |
michael@0 | 501 | if(outString==NULL || outLength<-1 || localString==NULL || localLength<-1) { |
michael@0 | 502 | return 0; |
michael@0 | 503 | } |
michael@0 | 504 | |
michael@0 | 505 | if(outLength<0) { |
michael@0 | 506 | outLength=(int32_t)uprv_strlen(outString); |
michael@0 | 507 | } |
michael@0 | 508 | if(localLength<0) { |
michael@0 | 509 | localLength=u_strlen(localString); |
michael@0 | 510 | } |
michael@0 | 511 | |
michael@0 | 512 | minLength= outLength<localLength ? outLength : localLength; |
michael@0 | 513 | |
michael@0 | 514 | while(minLength>0) { |
michael@0 | 515 | c=(uint8_t)*outString++; |
michael@0 | 516 | if(c==0) { |
michael@0 | 517 | c1=0; |
michael@0 | 518 | } else if((c1=asciiFromEbcdic[c])!=0 && UCHAR_IS_INVARIANT(c1)) { |
michael@0 | 519 | /* c1 is set */ |
michael@0 | 520 | } else { |
michael@0 | 521 | c1=-1; |
michael@0 | 522 | } |
michael@0 | 523 | |
michael@0 | 524 | c2=*localString++; |
michael@0 | 525 | if(!UCHAR_IS_INVARIANT(c2)) { |
michael@0 | 526 | c2=-2; |
michael@0 | 527 | } |
michael@0 | 528 | |
michael@0 | 529 | if((c1-=c2)!=0) { |
michael@0 | 530 | return c1; |
michael@0 | 531 | } |
michael@0 | 532 | |
michael@0 | 533 | --minLength; |
michael@0 | 534 | } |
michael@0 | 535 | |
michael@0 | 536 | /* strings start with same prefix, compare lengths */ |
michael@0 | 537 | return outLength-localLength; |
michael@0 | 538 | } |
michael@0 | 539 | |
michael@0 | 540 | U_CAPI int32_t U_EXPORT2 |
michael@0 | 541 | uprv_compareInvEbcdicAsAscii(const char *s1, const char *s2) { |
michael@0 | 542 | int32_t c1, c2; |
michael@0 | 543 | |
michael@0 | 544 | for(;; ++s1, ++s2) { |
michael@0 | 545 | c1=(uint8_t)*s1; |
michael@0 | 546 | c2=(uint8_t)*s2; |
michael@0 | 547 | if(c1!=c2) { |
michael@0 | 548 | if(c1!=0 && ((c1=asciiFromEbcdic[c1])==0 || !UCHAR_IS_INVARIANT(c1))) { |
michael@0 | 549 | c1=-(int32_t)(uint8_t)*s1; |
michael@0 | 550 | } |
michael@0 | 551 | if(c2!=0 && ((c2=asciiFromEbcdic[c2])==0 || !UCHAR_IS_INVARIANT(c2))) { |
michael@0 | 552 | c2=-(int32_t)(uint8_t)*s2; |
michael@0 | 553 | } |
michael@0 | 554 | return c1-c2; |
michael@0 | 555 | } else if(c1==0) { |
michael@0 | 556 | return 0; |
michael@0 | 557 | } |
michael@0 | 558 | } |
michael@0 | 559 | } |
michael@0 | 560 | |
michael@0 | 561 | U_CAPI char U_EXPORT2 |
michael@0 | 562 | uprv_ebcdicToLowercaseAscii(char c) { |
michael@0 | 563 | return (char)lowercaseAsciiFromEbcdic[(uint8_t)c]; |
michael@0 | 564 | } |
michael@0 | 565 | |
michael@0 | 566 | U_INTERNAL uint8_t* U_EXPORT2 |
michael@0 | 567 | uprv_aestrncpy(uint8_t *dst, const uint8_t *src, int32_t n) |
michael@0 | 568 | { |
michael@0 | 569 | uint8_t *orig_dst = dst; |
michael@0 | 570 | |
michael@0 | 571 | if(n==-1) { |
michael@0 | 572 | n = uprv_strlen((const char*)src)+1; /* copy NUL */ |
michael@0 | 573 | } |
michael@0 | 574 | /* copy non-null */ |
michael@0 | 575 | while(*src && n>0) { |
michael@0 | 576 | *(dst++) = asciiFromEbcdic[*(src++)]; |
michael@0 | 577 | n--; |
michael@0 | 578 | } |
michael@0 | 579 | /* pad */ |
michael@0 | 580 | while(n>0) { |
michael@0 | 581 | *(dst++) = 0; |
michael@0 | 582 | n--; |
michael@0 | 583 | } |
michael@0 | 584 | return orig_dst; |
michael@0 | 585 | } |
michael@0 | 586 | |
michael@0 | 587 | U_INTERNAL uint8_t* U_EXPORT2 |
michael@0 | 588 | uprv_eastrncpy(uint8_t *dst, const uint8_t *src, int32_t n) |
michael@0 | 589 | { |
michael@0 | 590 | uint8_t *orig_dst = dst; |
michael@0 | 591 | |
michael@0 | 592 | if(n==-1) { |
michael@0 | 593 | n = uprv_strlen((const char*)src)+1; /* copy NUL */ |
michael@0 | 594 | } |
michael@0 | 595 | /* copy non-null */ |
michael@0 | 596 | while(*src && n>0) { |
michael@0 | 597 | char ch = ebcdicFromAscii[*(src++)]; |
michael@0 | 598 | if(ch == 0) { |
michael@0 | 599 | ch = ebcdicFromAscii[0x3f]; /* questionmark (subchar) */ |
michael@0 | 600 | } |
michael@0 | 601 | *(dst++) = ch; |
michael@0 | 602 | n--; |
michael@0 | 603 | } |
michael@0 | 604 | /* pad */ |
michael@0 | 605 | while(n>0) { |
michael@0 | 606 | *(dst++) = 0; |
michael@0 | 607 | n--; |
michael@0 | 608 | } |
michael@0 | 609 | return orig_dst; |
michael@0 | 610 | } |
michael@0 | 611 |