1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/uinvchar.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,611 @@ 1.4 +/* 1.5 +******************************************************************************* 1.6 +* 1.7 +* Copyright (C) 1999-2010, International Business Machines 1.8 +* Corporation and others. All Rights Reserved. 1.9 +* 1.10 +******************************************************************************* 1.11 +* file name: uinvchar.c 1.12 +* encoding: US-ASCII 1.13 +* tab size: 8 (not used) 1.14 +* indentation:2 1.15 +* 1.16 +* created on: 2004sep14 1.17 +* created by: Markus W. Scherer 1.18 +* 1.19 +* Functions for handling invariant characters, moved here from putil.c 1.20 +* for better modularization. 1.21 +*/ 1.22 + 1.23 +#include "unicode/utypes.h" 1.24 +#include "unicode/ustring.h" 1.25 +#include "udataswp.h" 1.26 +#include "cstring.h" 1.27 +#include "cmemory.h" 1.28 +#include "uassert.h" 1.29 +#include "uinvchar.h" 1.30 + 1.31 +/* invariant-character handling --------------------------------------------- */ 1.32 + 1.33 +/* 1.34 + * These maps for ASCII to/from EBCDIC map invariant characters (see utypes.h) 1.35 + * appropriately for most EBCDIC codepages. 1.36 + * 1.37 + * They currently also map most other ASCII graphic characters, 1.38 + * appropriately for codepages 37 and 1047. 1.39 + * Exceptions: The characters for []^ have different codes in 37 & 1047. 1.40 + * Both versions are mapped to ASCII. 1.41 + * 1.42 + * ASCII 37 1047 1.43 + * [ 5B BA AD 1.44 + * ] 5D BB BD 1.45 + * ^ 5E B0 5F 1.46 + * 1.47 + * There are no mappings for variant characters from Unicode to EBCDIC. 1.48 + * 1.49 + * Currently, C0 control codes are also included in these maps. 1.50 + * Exceptions: S/390 Open Edition swaps LF and NEL codes compared with other 1.51 + * EBCDIC platforms; both codes (15 and 25) are mapped to ASCII LF (0A), 1.52 + * but there is no mapping for ASCII LF back to EBCDIC. 1.53 + * 1.54 + * ASCII EBCDIC S/390-OE 1.55 + * LF 0A 25 15 1.56 + * NEL 85 15 25 1.57 + * 1.58 + * The maps below explicitly exclude the variant 1.59 + * control and graphical characters that are in ASCII-based 1.60 + * codepages at 0x80 and above. 1.61 + * "No mapping" is expressed by mapping to a 00 byte. 1.62 + * 1.63 + * These tables do not establish a converter or a codepage. 1.64 + */ 1.65 + 1.66 +static const uint8_t asciiFromEbcdic[256]={ 1.67 + 0x00, 0x01, 0x02, 0x03, 0x00, 0x09, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 1.68 + 0x10, 0x11, 0x12, 0x13, 0x00, 0x0a, 0x08, 0x00, 0x18, 0x19, 0x00, 0x00, 0x1c, 0x1d, 0x1e, 0x1f, 1.69 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x17, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x06, 0x07, 1.70 + 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14, 0x15, 0x00, 0x1a, 1.71 + 1.72 + 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x3c, 0x28, 0x2b, 0x7c, 1.73 + 0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e, 1.74 + 0x2d, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x25, 0x5f, 0x3e, 0x3f, 1.75 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22, 1.76 + 1.77 + 0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 1.78 + 0x00, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 1.79 + 0x00, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x5b, 0x00, 0x00, 1.80 + 0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5b, 0x5d, 0x00, 0x5d, 0x00, 0x00, 1.81 + 1.82 + 0x7b, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 1.83 + 0x7d, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 1.84 + 0x5c, 0x00, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 1.85 + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 1.86 +}; 1.87 + 1.88 +static const uint8_t ebcdicFromAscii[256]={ 1.89 + 0x00, 0x01, 0x02, 0x03, 0x37, 0x2d, 0x2e, 0x2f, 0x16, 0x05, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 1.90 + 0x10, 0x11, 0x12, 0x13, 0x3c, 0x3d, 0x32, 0x26, 0x18, 0x19, 0x3f, 0x27, 0x1c, 0x1d, 0x1e, 0x1f, 1.91 + 0x40, 0x00, 0x7f, 0x00, 0x00, 0x6c, 0x50, 0x7d, 0x4d, 0x5d, 0x5c, 0x4e, 0x6b, 0x60, 0x4b, 0x61, 1.92 + 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0x7a, 0x5e, 0x4c, 0x7e, 0x6e, 0x6f, 1.93 + 1.94 + 0x00, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 1.95 + 0xd7, 0xd8, 0xd9, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0x00, 0x00, 0x00, 0x00, 0x6d, 1.96 + 0x00, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 1.97 + 0x97, 0x98, 0x99, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0x00, 0x00, 0x00, 0x00, 0x07, 1.98 + 1.99 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.100 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.101 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.102 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.103 + 1.104 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.105 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.106 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.107 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 1.108 +}; 1.109 + 1.110 +/* Same as asciiFromEbcdic[] except maps all letters to lowercase. */ 1.111 +static const uint8_t lowercaseAsciiFromEbcdic[256]={ 1.112 + 0x00, 0x01, 0x02, 0x03, 0x00, 0x09, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 1.113 + 0x10, 0x11, 0x12, 0x13, 0x00, 0x0a, 0x08, 0x00, 0x18, 0x19, 0x00, 0x00, 0x1c, 0x1d, 0x1e, 0x1f, 1.114 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x17, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x06, 0x07, 1.115 + 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14, 0x15, 0x00, 0x1a, 1.116 + 1.117 + 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x3c, 0x28, 0x2b, 0x7c, 1.118 + 0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e, 1.119 + 0x2d, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x25, 0x5f, 0x3e, 0x3f, 1.120 + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22, 1.121 + 1.122 + 0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 1.123 + 0x00, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 1.124 + 0x00, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x5b, 0x00, 0x00, 1.125 + 0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5b, 0x5d, 0x00, 0x5d, 0x00, 0x00, 1.126 + 1.127 + 0x7b, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 1.128 + 0x7d, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 1.129 + 0x7c, 0x00, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 1.130 + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 1.131 +}; 1.132 + 1.133 +/* 1.134 + * Bit sets indicating which characters of the ASCII repertoire 1.135 + * (by ASCII/Unicode code) are "invariant". 1.136 + * See utypes.h for more details. 1.137 + * 1.138 + * As invariant are considered the characters of the ASCII repertoire except 1.139 + * for the following: 1.140 + * 21 '!' <exclamation mark> 1.141 + * 23 '#' <number sign> 1.142 + * 24 '$' <dollar sign> 1.143 + * 1.144 + * 40 '@' <commercial at> 1.145 + * 1.146 + * 5b '[' <left bracket> 1.147 + * 5c '\' <backslash> 1.148 + * 5d ']' <right bracket> 1.149 + * 5e '^' <circumflex> 1.150 + * 1.151 + * 60 '`' <grave accent> 1.152 + * 1.153 + * 7b '{' <left brace> 1.154 + * 7c '|' <vertical line> 1.155 + * 7d '}' <right brace> 1.156 + * 7e '~' <tilde> 1.157 + */ 1.158 +static const uint32_t invariantChars[4]={ 1.159 + 0xfffffbff, /* 00..1f but not 0a */ 1.160 + 0xffffffe5, /* 20..3f but not 21 23 24 */ 1.161 + 0x87fffffe, /* 40..5f but not 40 5b..5e */ 1.162 + 0x87fffffe /* 60..7f but not 60 7b..7e */ 1.163 +}; 1.164 + 1.165 +/* 1.166 + * test unsigned types (or values known to be non-negative) for invariant characters, 1.167 + * tests ASCII-family character values 1.168 + */ 1.169 +#define UCHAR_IS_INVARIANT(c) (((c)<=0x7f) && (invariantChars[(c)>>5]&((uint32_t)1<<((c)&0x1f)))!=0) 1.170 + 1.171 +/* test signed types for invariant characters, adds test for positive values */ 1.172 +#define SCHAR_IS_INVARIANT(c) ((0<=(c)) && UCHAR_IS_INVARIANT(c)) 1.173 + 1.174 +#if U_CHARSET_FAMILY==U_ASCII_FAMILY 1.175 +#define CHAR_TO_UCHAR(c) c 1.176 +#define UCHAR_TO_CHAR(c) c 1.177 +#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY 1.178 +#define CHAR_TO_UCHAR(u) asciiFromEbcdic[u] 1.179 +#define UCHAR_TO_CHAR(u) ebcdicFromAscii[u] 1.180 +#else 1.181 +# error U_CHARSET_FAMILY is not valid 1.182 +#endif 1.183 + 1.184 + 1.185 +U_CAPI void U_EXPORT2 1.186 +u_charsToUChars(const char *cs, UChar *us, int32_t length) { 1.187 + UChar u; 1.188 + uint8_t c; 1.189 + 1.190 + /* 1.191 + * Allow the entire ASCII repertoire to be mapped _to_ Unicode. 1.192 + * For EBCDIC systems, this works for characters with codes from 1.193 + * codepages 37 and 1047 or compatible. 1.194 + */ 1.195 + while(length>0) { 1.196 + c=(uint8_t)(*cs++); 1.197 + u=(UChar)CHAR_TO_UCHAR(c); 1.198 + U_ASSERT((u!=0 || c==0)); /* only invariant chars converted? */ 1.199 + *us++=u; 1.200 + --length; 1.201 + } 1.202 +} 1.203 + 1.204 +U_CAPI void U_EXPORT2 1.205 +u_UCharsToChars(const UChar *us, char *cs, int32_t length) { 1.206 + UChar u; 1.207 + 1.208 + while(length>0) { 1.209 + u=*us++; 1.210 + if(!UCHAR_IS_INVARIANT(u)) { 1.211 + U_ASSERT(FALSE); /* Variant characters were used. These are not portable in ICU. */ 1.212 + u=0; 1.213 + } 1.214 + *cs++=(char)UCHAR_TO_CHAR(u); 1.215 + --length; 1.216 + } 1.217 +} 1.218 + 1.219 +U_CAPI UBool U_EXPORT2 1.220 +uprv_isInvariantString(const char *s, int32_t length) { 1.221 + uint8_t c; 1.222 + 1.223 + for(;;) { 1.224 + if(length<0) { 1.225 + /* NUL-terminated */ 1.226 + c=(uint8_t)*s++; 1.227 + if(c==0) { 1.228 + break; 1.229 + } 1.230 + } else { 1.231 + /* count length */ 1.232 + if(length==0) { 1.233 + break; 1.234 + } 1.235 + --length; 1.236 + c=(uint8_t)*s++; 1.237 + if(c==0) { 1.238 + continue; /* NUL is invariant */ 1.239 + } 1.240 + } 1.241 + /* c!=0 now, one branch below checks c==0 for variant characters */ 1.242 + 1.243 + /* 1.244 + * no assertions here because these functions are legitimately called 1.245 + * for strings with variant characters 1.246 + */ 1.247 +#if U_CHARSET_FAMILY==U_ASCII_FAMILY 1.248 + if(!UCHAR_IS_INVARIANT(c)) { 1.249 + return FALSE; /* found a variant char */ 1.250 + } 1.251 +#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY 1.252 + c=CHAR_TO_UCHAR(c); 1.253 + if(c==0 || !UCHAR_IS_INVARIANT(c)) { 1.254 + return FALSE; /* found a variant char */ 1.255 + } 1.256 +#else 1.257 +# error U_CHARSET_FAMILY is not valid 1.258 +#endif 1.259 + } 1.260 + return TRUE; 1.261 +} 1.262 + 1.263 +U_CAPI UBool U_EXPORT2 1.264 +uprv_isInvariantUString(const UChar *s, int32_t length) { 1.265 + UChar c; 1.266 + 1.267 + for(;;) { 1.268 + if(length<0) { 1.269 + /* NUL-terminated */ 1.270 + c=*s++; 1.271 + if(c==0) { 1.272 + break; 1.273 + } 1.274 + } else { 1.275 + /* count length */ 1.276 + if(length==0) { 1.277 + break; 1.278 + } 1.279 + --length; 1.280 + c=*s++; 1.281 + } 1.282 + 1.283 + /* 1.284 + * no assertions here because these functions are legitimately called 1.285 + * for strings with variant characters 1.286 + */ 1.287 + if(!UCHAR_IS_INVARIANT(c)) { 1.288 + return FALSE; /* found a variant char */ 1.289 + } 1.290 + } 1.291 + return TRUE; 1.292 +} 1.293 + 1.294 +/* UDataSwapFn implementations used in udataswp.c ------- */ 1.295 + 1.296 +/* convert ASCII to EBCDIC and verify that all characters are invariant */ 1.297 +U_CAPI int32_t U_EXPORT2 1.298 +uprv_ebcdicFromAscii(const UDataSwapper *ds, 1.299 + const void *inData, int32_t length, void *outData, 1.300 + UErrorCode *pErrorCode) { 1.301 + const uint8_t *s; 1.302 + uint8_t *t; 1.303 + uint8_t c; 1.304 + 1.305 + int32_t count; 1.306 + 1.307 + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1.308 + return 0; 1.309 + } 1.310 + if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) { 1.311 + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1.312 + return 0; 1.313 + } 1.314 + 1.315 + /* setup and swapping */ 1.316 + s=(const uint8_t *)inData; 1.317 + t=(uint8_t *)outData; 1.318 + count=length; 1.319 + while(count>0) { 1.320 + c=*s++; 1.321 + if(!UCHAR_IS_INVARIANT(c)) { 1.322 + udata_printError(ds, "uprv_ebcdicFromAscii() string[%d] contains a variant character in position %d\n", 1.323 + length, length-count); 1.324 + *pErrorCode=U_INVALID_CHAR_FOUND; 1.325 + return 0; 1.326 + } 1.327 + *t++=ebcdicFromAscii[c]; 1.328 + --count; 1.329 + } 1.330 + 1.331 + return length; 1.332 +} 1.333 + 1.334 +/* this function only checks and copies ASCII strings without conversion */ 1.335 +U_CFUNC int32_t 1.336 +uprv_copyAscii(const UDataSwapper *ds, 1.337 + const void *inData, int32_t length, void *outData, 1.338 + UErrorCode *pErrorCode) { 1.339 + const uint8_t *s; 1.340 + uint8_t c; 1.341 + 1.342 + int32_t count; 1.343 + 1.344 + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1.345 + return 0; 1.346 + } 1.347 + if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) { 1.348 + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1.349 + return 0; 1.350 + } 1.351 + 1.352 + /* setup and checking */ 1.353 + s=(const uint8_t *)inData; 1.354 + count=length; 1.355 + while(count>0) { 1.356 + c=*s++; 1.357 + if(!UCHAR_IS_INVARIANT(c)) { 1.358 + udata_printError(ds, "uprv_copyFromAscii() string[%d] contains a variant character in position %d\n", 1.359 + length, length-count); 1.360 + *pErrorCode=U_INVALID_CHAR_FOUND; 1.361 + return 0; 1.362 + } 1.363 + --count; 1.364 + } 1.365 + 1.366 + if(length>0 && inData!=outData) { 1.367 + uprv_memcpy(outData, inData, length); 1.368 + } 1.369 + 1.370 + return length; 1.371 +} 1.372 + 1.373 +/* convert EBCDIC to ASCII and verify that all characters are invariant */ 1.374 +U_CFUNC int32_t 1.375 +uprv_asciiFromEbcdic(const UDataSwapper *ds, 1.376 + const void *inData, int32_t length, void *outData, 1.377 + UErrorCode *pErrorCode) { 1.378 + const uint8_t *s; 1.379 + uint8_t *t; 1.380 + uint8_t c; 1.381 + 1.382 + int32_t count; 1.383 + 1.384 + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1.385 + return 0; 1.386 + } 1.387 + if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) { 1.388 + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1.389 + return 0; 1.390 + } 1.391 + 1.392 + /* setup and swapping */ 1.393 + s=(const uint8_t *)inData; 1.394 + t=(uint8_t *)outData; 1.395 + count=length; 1.396 + while(count>0) { 1.397 + c=*s++; 1.398 + if(c!=0 && ((c=asciiFromEbcdic[c])==0 || !UCHAR_IS_INVARIANT(c))) { 1.399 + udata_printError(ds, "uprv_asciiFromEbcdic() string[%d] contains a variant character in position %d\n", 1.400 + length, length-count); 1.401 + *pErrorCode=U_INVALID_CHAR_FOUND; 1.402 + return 0; 1.403 + } 1.404 + *t++=c; 1.405 + --count; 1.406 + } 1.407 + 1.408 + return length; 1.409 +} 1.410 + 1.411 +/* this function only checks and copies EBCDIC strings without conversion */ 1.412 +U_CFUNC int32_t 1.413 +uprv_copyEbcdic(const UDataSwapper *ds, 1.414 + const void *inData, int32_t length, void *outData, 1.415 + UErrorCode *pErrorCode) { 1.416 + const uint8_t *s; 1.417 + uint8_t c; 1.418 + 1.419 + int32_t count; 1.420 + 1.421 + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1.422 + return 0; 1.423 + } 1.424 + if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) { 1.425 + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1.426 + return 0; 1.427 + } 1.428 + 1.429 + /* setup and checking */ 1.430 + s=(const uint8_t *)inData; 1.431 + count=length; 1.432 + while(count>0) { 1.433 + c=*s++; 1.434 + if(c!=0 && ((c=asciiFromEbcdic[c])==0 || !UCHAR_IS_INVARIANT(c))) { 1.435 + udata_printError(ds, "uprv_copyEbcdic() string[%] contains a variant character in position %d\n", 1.436 + length, length-count); 1.437 + *pErrorCode=U_INVALID_CHAR_FOUND; 1.438 + return 0; 1.439 + } 1.440 + --count; 1.441 + } 1.442 + 1.443 + if(length>0 && inData!=outData) { 1.444 + uprv_memcpy(outData, inData, length); 1.445 + } 1.446 + 1.447 + return length; 1.448 +} 1.449 + 1.450 +/* compare invariant strings; variant characters compare less than others and unlike each other */ 1.451 +U_CFUNC int32_t 1.452 +uprv_compareInvAscii(const UDataSwapper *ds, 1.453 + const char *outString, int32_t outLength, 1.454 + const UChar *localString, int32_t localLength) { 1.455 + int32_t minLength; 1.456 + UChar32 c1, c2; 1.457 + uint8_t c; 1.458 + 1.459 + if(outString==NULL || outLength<-1 || localString==NULL || localLength<-1) { 1.460 + return 0; 1.461 + } 1.462 + 1.463 + if(outLength<0) { 1.464 + outLength=(int32_t)uprv_strlen(outString); 1.465 + } 1.466 + if(localLength<0) { 1.467 + localLength=u_strlen(localString); 1.468 + } 1.469 + 1.470 + minLength= outLength<localLength ? outLength : localLength; 1.471 + 1.472 + while(minLength>0) { 1.473 + c=(uint8_t)*outString++; 1.474 + if(UCHAR_IS_INVARIANT(c)) { 1.475 + c1=c; 1.476 + } else { 1.477 + c1=-1; 1.478 + } 1.479 + 1.480 + c2=*localString++; 1.481 + if(!UCHAR_IS_INVARIANT(c2)) { 1.482 + c2=-2; 1.483 + } 1.484 + 1.485 + if((c1-=c2)!=0) { 1.486 + return c1; 1.487 + } 1.488 + 1.489 + --minLength; 1.490 + } 1.491 + 1.492 + /* strings start with same prefix, compare lengths */ 1.493 + return outLength-localLength; 1.494 +} 1.495 + 1.496 +U_CFUNC int32_t 1.497 +uprv_compareInvEbcdic(const UDataSwapper *ds, 1.498 + const char *outString, int32_t outLength, 1.499 + const UChar *localString, int32_t localLength) { 1.500 + int32_t minLength; 1.501 + UChar32 c1, c2; 1.502 + uint8_t c; 1.503 + 1.504 + if(outString==NULL || outLength<-1 || localString==NULL || localLength<-1) { 1.505 + return 0; 1.506 + } 1.507 + 1.508 + if(outLength<0) { 1.509 + outLength=(int32_t)uprv_strlen(outString); 1.510 + } 1.511 + if(localLength<0) { 1.512 + localLength=u_strlen(localString); 1.513 + } 1.514 + 1.515 + minLength= outLength<localLength ? outLength : localLength; 1.516 + 1.517 + while(minLength>0) { 1.518 + c=(uint8_t)*outString++; 1.519 + if(c==0) { 1.520 + c1=0; 1.521 + } else if((c1=asciiFromEbcdic[c])!=0 && UCHAR_IS_INVARIANT(c1)) { 1.522 + /* c1 is set */ 1.523 + } else { 1.524 + c1=-1; 1.525 + } 1.526 + 1.527 + c2=*localString++; 1.528 + if(!UCHAR_IS_INVARIANT(c2)) { 1.529 + c2=-2; 1.530 + } 1.531 + 1.532 + if((c1-=c2)!=0) { 1.533 + return c1; 1.534 + } 1.535 + 1.536 + --minLength; 1.537 + } 1.538 + 1.539 + /* strings start with same prefix, compare lengths */ 1.540 + return outLength-localLength; 1.541 +} 1.542 + 1.543 +U_CAPI int32_t U_EXPORT2 1.544 +uprv_compareInvEbcdicAsAscii(const char *s1, const char *s2) { 1.545 + int32_t c1, c2; 1.546 + 1.547 + for(;; ++s1, ++s2) { 1.548 + c1=(uint8_t)*s1; 1.549 + c2=(uint8_t)*s2; 1.550 + if(c1!=c2) { 1.551 + if(c1!=0 && ((c1=asciiFromEbcdic[c1])==0 || !UCHAR_IS_INVARIANT(c1))) { 1.552 + c1=-(int32_t)(uint8_t)*s1; 1.553 + } 1.554 + if(c2!=0 && ((c2=asciiFromEbcdic[c2])==0 || !UCHAR_IS_INVARIANT(c2))) { 1.555 + c2=-(int32_t)(uint8_t)*s2; 1.556 + } 1.557 + return c1-c2; 1.558 + } else if(c1==0) { 1.559 + return 0; 1.560 + } 1.561 + } 1.562 +} 1.563 + 1.564 +U_CAPI char U_EXPORT2 1.565 +uprv_ebcdicToLowercaseAscii(char c) { 1.566 + return (char)lowercaseAsciiFromEbcdic[(uint8_t)c]; 1.567 +} 1.568 + 1.569 +U_INTERNAL uint8_t* U_EXPORT2 1.570 +uprv_aestrncpy(uint8_t *dst, const uint8_t *src, int32_t n) 1.571 +{ 1.572 + uint8_t *orig_dst = dst; 1.573 + 1.574 + if(n==-1) { 1.575 + n = uprv_strlen((const char*)src)+1; /* copy NUL */ 1.576 + } 1.577 + /* copy non-null */ 1.578 + while(*src && n>0) { 1.579 + *(dst++) = asciiFromEbcdic[*(src++)]; 1.580 + n--; 1.581 + } 1.582 + /* pad */ 1.583 + while(n>0) { 1.584 + *(dst++) = 0; 1.585 + n--; 1.586 + } 1.587 + return orig_dst; 1.588 +} 1.589 + 1.590 +U_INTERNAL uint8_t* U_EXPORT2 1.591 +uprv_eastrncpy(uint8_t *dst, const uint8_t *src, int32_t n) 1.592 +{ 1.593 + uint8_t *orig_dst = dst; 1.594 + 1.595 + if(n==-1) { 1.596 + n = uprv_strlen((const char*)src)+1; /* copy NUL */ 1.597 + } 1.598 + /* copy non-null */ 1.599 + while(*src && n>0) { 1.600 + char ch = ebcdicFromAscii[*(src++)]; 1.601 + if(ch == 0) { 1.602 + ch = ebcdicFromAscii[0x3f]; /* questionmark (subchar) */ 1.603 + } 1.604 + *(dst++) = ch; 1.605 + n--; 1.606 + } 1.607 + /* pad */ 1.608 + while(n>0) { 1.609 + *(dst++) = 0; 1.610 + n--; 1.611 + } 1.612 + return orig_dst; 1.613 +} 1.614 +