intl/icu/source/io/uscanf_p.c

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rw-r--r--

Correct previous dual key logic pending first delivery installment.

michael@0 1 /*
michael@0 2 *******************************************************************************
michael@0 3 *
michael@0 4 * Copyright (C) 1998-2011, International Business Machines
michael@0 5 * Corporation and others. All Rights Reserved.
michael@0 6 *
michael@0 7 *******************************************************************************
michael@0 8 *
michael@0 9 * File uscnnf_p.c
michael@0 10 *
michael@0 11 * Modification History:
michael@0 12 *
michael@0 13 * Date Name Description
michael@0 14 * 12/02/98 stephen Creation.
michael@0 15 * 03/13/99 stephen Modified for new C API.
michael@0 16 *******************************************************************************
michael@0 17 */
michael@0 18
michael@0 19 #include "unicode/utypes.h"
michael@0 20
michael@0 21 #if !UCONFIG_NO_FORMATTING
michael@0 22
michael@0 23 #include "unicode/uchar.h"
michael@0 24 #include "unicode/ustring.h"
michael@0 25 #include "unicode/unum.h"
michael@0 26 #include "unicode/udat.h"
michael@0 27 #include "unicode/uset.h"
michael@0 28 #include "uscanf.h"
michael@0 29 #include "ufmt_cmn.h"
michael@0 30 #include "ufile.h"
michael@0 31 #include "locbund.h"
michael@0 32
michael@0 33 #include "cmemory.h"
michael@0 34 #include "ustr_cnv.h"
michael@0 35
michael@0 36 /* flag characters for u_scanf */
michael@0 37 #define FLAG_ASTERISK 0x002A
michael@0 38 #define FLAG_PAREN 0x0028
michael@0 39
michael@0 40 #define ISFLAG(s) (s) == FLAG_ASTERISK || \
michael@0 41 (s) == FLAG_PAREN
michael@0 42
michael@0 43 /* special characters for u_scanf */
michael@0 44 #define SPEC_DOLLARSIGN 0x0024
michael@0 45
michael@0 46 /* unicode digits */
michael@0 47 #define DIGIT_ZERO 0x0030
michael@0 48 #define DIGIT_ONE 0x0031
michael@0 49 #define DIGIT_TWO 0x0032
michael@0 50 #define DIGIT_THREE 0x0033
michael@0 51 #define DIGIT_FOUR 0x0034
michael@0 52 #define DIGIT_FIVE 0x0035
michael@0 53 #define DIGIT_SIX 0x0036
michael@0 54 #define DIGIT_SEVEN 0x0037
michael@0 55 #define DIGIT_EIGHT 0x0038
michael@0 56 #define DIGIT_NINE 0x0039
michael@0 57
michael@0 58 #define ISDIGIT(s) (s) == DIGIT_ZERO || \
michael@0 59 (s) == DIGIT_ONE || \
michael@0 60 (s) == DIGIT_TWO || \
michael@0 61 (s) == DIGIT_THREE || \
michael@0 62 (s) == DIGIT_FOUR || \
michael@0 63 (s) == DIGIT_FIVE || \
michael@0 64 (s) == DIGIT_SIX || \
michael@0 65 (s) == DIGIT_SEVEN || \
michael@0 66 (s) == DIGIT_EIGHT || \
michael@0 67 (s) == DIGIT_NINE
michael@0 68
michael@0 69 /* u_scanf modifiers */
michael@0 70 #define MOD_H 0x0068
michael@0 71 #define MOD_LOWERL 0x006C
michael@0 72 #define MOD_L 0x004C
michael@0 73
michael@0 74 #define ISMOD(s) (s) == MOD_H || \
michael@0 75 (s) == MOD_LOWERL || \
michael@0 76 (s) == MOD_L
michael@0 77
michael@0 78 /**
michael@0 79 * Struct encapsulating a single uscanf format specification.
michael@0 80 */
michael@0 81 typedef struct u_scanf_spec_info {
michael@0 82 int32_t fWidth; /* Width */
michael@0 83
michael@0 84 UChar fSpec; /* Format specification */
michael@0 85
michael@0 86 UChar fPadChar; /* Padding character */
michael@0 87
michael@0 88 UBool fSkipArg; /* TRUE if arg should be skipped */
michael@0 89 UBool fIsLongDouble; /* L flag */
michael@0 90 UBool fIsShort; /* h flag */
michael@0 91 UBool fIsLong; /* l flag */
michael@0 92 UBool fIsLongLong; /* ll flag */
michael@0 93 UBool fIsString; /* TRUE if this is a NULL-terminated string. */
michael@0 94 } u_scanf_spec_info;
michael@0 95
michael@0 96
michael@0 97 /**
michael@0 98 * Struct encapsulating a single u_scanf format specification.
michael@0 99 */
michael@0 100 typedef struct u_scanf_spec {
michael@0 101 u_scanf_spec_info fInfo; /* Information on this spec */
michael@0 102 int32_t fArgPos; /* Position of data in arg list */
michael@0 103 } u_scanf_spec;
michael@0 104
michael@0 105 /**
michael@0 106 * Parse a single u_scanf format specifier in Unicode.
michael@0 107 * @param fmt A pointer to a '%' character in a u_scanf format specification.
michael@0 108 * @param spec A pointer to a <TT>u_scanf_spec</TT> to receive the parsed
michael@0 109 * format specifier.
michael@0 110 * @return The number of characters contained in this specifier.
michael@0 111 */
michael@0 112 static int32_t
michael@0 113 u_scanf_parse_spec (const UChar *fmt,
michael@0 114 u_scanf_spec *spec)
michael@0 115 {
michael@0 116 const UChar *s = fmt;
michael@0 117 const UChar *backup;
michael@0 118 u_scanf_spec_info *info = &(spec->fInfo);
michael@0 119
michael@0 120 /* initialize spec to default values */
michael@0 121 spec->fArgPos = -1;
michael@0 122
michael@0 123 info->fWidth = -1;
michael@0 124 info->fSpec = 0x0000;
michael@0 125 info->fPadChar = 0x0020;
michael@0 126 info->fSkipArg = FALSE;
michael@0 127 info->fIsLongDouble = FALSE;
michael@0 128 info->fIsShort = FALSE;
michael@0 129 info->fIsLong = FALSE;
michael@0 130 info->fIsLongLong = FALSE;
michael@0 131 info->fIsString = TRUE;
michael@0 132
michael@0 133
michael@0 134 /* skip over the initial '%' */
michael@0 135 s++;
michael@0 136
michael@0 137 /* Check for positional argument */
michael@0 138 if(ISDIGIT(*s)) {
michael@0 139
michael@0 140 /* Save the current position */
michael@0 141 backup = s;
michael@0 142
michael@0 143 /* handle positional parameters */
michael@0 144 if(ISDIGIT(*s)) {
michael@0 145 spec->fArgPos = (int) (*s++ - DIGIT_ZERO);
michael@0 146
michael@0 147 while(ISDIGIT(*s)) {
michael@0 148 spec->fArgPos *= 10;
michael@0 149 spec->fArgPos += (int) (*s++ - DIGIT_ZERO);
michael@0 150 }
michael@0 151 }
michael@0 152
michael@0 153 /* if there is no '$', don't read anything */
michael@0 154 if(*s != SPEC_DOLLARSIGN) {
michael@0 155 spec->fArgPos = -1;
michael@0 156 s = backup;
michael@0 157 }
michael@0 158 /* munge the '$' */
michael@0 159 else
michael@0 160 s++;
michael@0 161 }
michael@0 162
michael@0 163 /* Get any format flags */
michael@0 164 while(ISFLAG(*s)) {
michael@0 165 switch(*s++) {
michael@0 166
michael@0 167 /* skip argument */
michael@0 168 case FLAG_ASTERISK:
michael@0 169 info->fSkipArg = TRUE;
michael@0 170 break;
michael@0 171
michael@0 172 /* pad character specified */
michael@0 173 case FLAG_PAREN:
michael@0 174
michael@0 175 /* first four characters are hex values for pad char */
michael@0 176 info->fPadChar = (UChar)ufmt_digitvalue(*s++);
michael@0 177 info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++));
michael@0 178 info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++));
michael@0 179 info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++));
michael@0 180
michael@0 181 /* final character is ignored */
michael@0 182 s++;
michael@0 183
michael@0 184 break;
michael@0 185 }
michael@0 186 }
michael@0 187
michael@0 188 /* Get the width */
michael@0 189 if(ISDIGIT(*s)){
michael@0 190 info->fWidth = (int) (*s++ - DIGIT_ZERO);
michael@0 191
michael@0 192 while(ISDIGIT(*s)) {
michael@0 193 info->fWidth *= 10;
michael@0 194 info->fWidth += (int) (*s++ - DIGIT_ZERO);
michael@0 195 }
michael@0 196 }
michael@0 197
michael@0 198 /* Get any modifiers */
michael@0 199 if(ISMOD(*s)) {
michael@0 200 switch(*s++) {
michael@0 201
michael@0 202 /* short */
michael@0 203 case MOD_H:
michael@0 204 info->fIsShort = TRUE;
michael@0 205 break;
michael@0 206
michael@0 207 /* long or long long */
michael@0 208 case MOD_LOWERL:
michael@0 209 if(*s == MOD_LOWERL) {
michael@0 210 info->fIsLongLong = TRUE;
michael@0 211 /* skip over the next 'l' */
michael@0 212 s++;
michael@0 213 }
michael@0 214 else
michael@0 215 info->fIsLong = TRUE;
michael@0 216 break;
michael@0 217
michael@0 218 /* long double */
michael@0 219 case MOD_L:
michael@0 220 info->fIsLongDouble = TRUE;
michael@0 221 break;
michael@0 222 }
michael@0 223 }
michael@0 224
michael@0 225 /* finally, get the specifier letter */
michael@0 226 info->fSpec = *s++;
michael@0 227
michael@0 228 /* return # of characters in this specifier */
michael@0 229 return (int32_t)(s - fmt);
michael@0 230 }
michael@0 231
michael@0 232 #define UP_PERCENT 0x0025
michael@0 233
michael@0 234
michael@0 235 /* ANSI style formatting */
michael@0 236 /* Use US-ASCII characters only for formatting */
michael@0 237
michael@0 238 /* % */
michael@0 239 #define UFMT_SIMPLE_PERCENT {ufmt_simple_percent, u_scanf_simple_percent_handler}
michael@0 240 /* s */
michael@0 241 #define UFMT_STRING {ufmt_string, u_scanf_string_handler}
michael@0 242 /* c */
michael@0 243 #define UFMT_CHAR {ufmt_string, u_scanf_char_handler}
michael@0 244 /* d, i */
michael@0 245 #define UFMT_INT {ufmt_int, u_scanf_integer_handler}
michael@0 246 /* u */
michael@0 247 #define UFMT_UINT {ufmt_int, u_scanf_uinteger_handler}
michael@0 248 /* o */
michael@0 249 #define UFMT_OCTAL {ufmt_int, u_scanf_octal_handler}
michael@0 250 /* x, X */
michael@0 251 #define UFMT_HEX {ufmt_int, u_scanf_hex_handler}
michael@0 252 /* f */
michael@0 253 #define UFMT_DOUBLE {ufmt_double, u_scanf_double_handler}
michael@0 254 /* e, E */
michael@0 255 #define UFMT_SCIENTIFIC {ufmt_double, u_scanf_scientific_handler}
michael@0 256 /* g, G */
michael@0 257 #define UFMT_SCIDBL {ufmt_double, u_scanf_scidbl_handler}
michael@0 258 /* n */
michael@0 259 #define UFMT_COUNT {ufmt_count, u_scanf_count_handler}
michael@0 260 /* [ */
michael@0 261 #define UFMT_SCANSET {ufmt_string, u_scanf_scanset_handler}
michael@0 262
michael@0 263 /* non-ANSI extensions */
michael@0 264 /* Use US-ASCII characters only for formatting */
michael@0 265
michael@0 266 /* p */
michael@0 267 #define UFMT_POINTER {ufmt_pointer, u_scanf_pointer_handler}
michael@0 268 /* V */
michael@0 269 #define UFMT_SPELLOUT {ufmt_double, u_scanf_spellout_handler}
michael@0 270 /* P */
michael@0 271 #define UFMT_PERCENT {ufmt_double, u_scanf_percent_handler}
michael@0 272 /* C K is old format */
michael@0 273 #define UFMT_UCHAR {ufmt_uchar, u_scanf_uchar_handler}
michael@0 274 /* S U is old format */
michael@0 275 #define UFMT_USTRING {ufmt_ustring, u_scanf_ustring_handler}
michael@0 276
michael@0 277
michael@0 278 #define UFMT_EMPTY {ufmt_empty, NULL}
michael@0 279
michael@0 280 /**
michael@0 281 * A u_scanf handler function.
michael@0 282 * A u_scanf handler is responsible for handling a single u_scanf
michael@0 283 * format specification, for example 'd' or 's'.
michael@0 284 * @param stream The UFILE to which to write output.
michael@0 285 * @param info A pointer to a <TT>u_scanf_spec_info</TT> struct containing
michael@0 286 * information on the format specification.
michael@0 287 * @param args A pointer to the argument data
michael@0 288 * @param fmt A pointer to the first character in the format string
michael@0 289 * following the spec.
michael@0 290 * @param fmtConsumed On output, set to the number of characters consumed
michael@0 291 * in <TT>fmt</TT>. Do nothing, if the argument isn't variable width.
michael@0 292 * @param argConverted The number of arguments converted and assigned, or -1 if an
michael@0 293 * error occurred.
michael@0 294 * @return The number of code points consumed during reading.
michael@0 295 */
michael@0 296 typedef int32_t (*u_scanf_handler) (UFILE *stream,
michael@0 297 u_scanf_spec_info *info,
michael@0 298 ufmt_args *args,
michael@0 299 const UChar *fmt,
michael@0 300 int32_t *fmtConsumed,
michael@0 301 int32_t *argConverted);
michael@0 302
michael@0 303 typedef struct u_scanf_info {
michael@0 304 ufmt_type_info info;
michael@0 305 u_scanf_handler handler;
michael@0 306 } u_scanf_info;
michael@0 307
michael@0 308 #define USCANF_NUM_FMT_HANDLERS 108
michael@0 309 #define USCANF_SYMBOL_BUFFER_SIZE 8
michael@0 310
michael@0 311 /* We do not use handlers for 0-0x1f */
michael@0 312 #define USCANF_BASE_FMT_HANDLERS 0x20
michael@0 313
michael@0 314
michael@0 315 static int32_t
michael@0 316 u_scanf_skip_leading_ws(UFILE *input,
michael@0 317 UChar pad)
michael@0 318 {
michael@0 319 UChar c;
michael@0 320 int32_t count = 0;
michael@0 321 UBool isNotEOF;
michael@0 322
michael@0 323 /* skip all leading ws in the input */
michael@0 324 while( (isNotEOF = ufile_getch(input, &c)) && (c == pad || u_isWhitespace(c)) )
michael@0 325 {
michael@0 326 count++;
michael@0 327 }
michael@0 328
michael@0 329 /* put the final character back on the input */
michael@0 330 if(isNotEOF)
michael@0 331 u_fungetc(c, input);
michael@0 332
michael@0 333 return count;
michael@0 334 }
michael@0 335
michael@0 336 /* TODO: Is always skipping the prefix symbol as a positive sign a good idea in all locales? */
michael@0 337 static int32_t
michael@0 338 u_scanf_skip_leading_positive_sign(UFILE *input,
michael@0 339 UNumberFormat *format,
michael@0 340 UErrorCode *status)
michael@0 341 {
michael@0 342 UChar c;
michael@0 343 int32_t count = 0;
michael@0 344 UBool isNotEOF;
michael@0 345 UChar plusSymbol[USCANF_SYMBOL_BUFFER_SIZE];
michael@0 346 int32_t symbolLen;
michael@0 347 UErrorCode localStatus = U_ZERO_ERROR;
michael@0 348
michael@0 349 if (U_SUCCESS(*status)) {
michael@0 350 symbolLen = unum_getSymbol(format,
michael@0 351 UNUM_PLUS_SIGN_SYMBOL,
michael@0 352 plusSymbol,
michael@0 353 sizeof(plusSymbol)/sizeof(*plusSymbol),
michael@0 354 &localStatus);
michael@0 355
michael@0 356 if (U_SUCCESS(localStatus)) {
michael@0 357 /* skip all leading ws in the input */
michael@0 358 while( (isNotEOF = ufile_getch(input, &c)) && (count < symbolLen && c == plusSymbol[count]) )
michael@0 359 {
michael@0 360 count++;
michael@0 361 }
michael@0 362
michael@0 363 /* put the final character back on the input */
michael@0 364 if(isNotEOF) {
michael@0 365 u_fungetc(c, input);
michael@0 366 }
michael@0 367 }
michael@0 368 }
michael@0 369
michael@0 370 return count;
michael@0 371 }
michael@0 372
michael@0 373 static int32_t
michael@0 374 u_scanf_simple_percent_handler(UFILE *input,
michael@0 375 u_scanf_spec_info *info,
michael@0 376 ufmt_args *args,
michael@0 377 const UChar *fmt,
michael@0 378 int32_t *fmtConsumed,
michael@0 379 int32_t *argConverted)
michael@0 380 {
michael@0 381 /* make sure the next character in the input is a percent */
michael@0 382 *argConverted = 0;
michael@0 383 if(u_fgetc(input) != 0x0025) {
michael@0 384 *argConverted = -1;
michael@0 385 }
michael@0 386 return 1;
michael@0 387 }
michael@0 388
michael@0 389 static int32_t
michael@0 390 u_scanf_count_handler(UFILE *input,
michael@0 391 u_scanf_spec_info *info,
michael@0 392 ufmt_args *args,
michael@0 393 const UChar *fmt,
michael@0 394 int32_t *fmtConsumed,
michael@0 395 int32_t *argConverted)
michael@0 396 {
michael@0 397 /* in the special case of count, the u_scanf_spec_info's width */
michael@0 398 /* will contain the # of items converted thus far */
michael@0 399 if (!info->fSkipArg) {
michael@0 400 if (info->fIsShort)
michael@0 401 *(int16_t*)(args[0].ptrValue) = (int16_t)(UINT16_MAX & info->fWidth);
michael@0 402 else if (info->fIsLongLong)
michael@0 403 *(int64_t*)(args[0].ptrValue) = info->fWidth;
michael@0 404 else
michael@0 405 *(int32_t*)(args[0].ptrValue) = (int32_t)(UINT32_MAX & info->fWidth);
michael@0 406 }
michael@0 407 *argConverted = 0;
michael@0 408
michael@0 409 /* we converted 0 args */
michael@0 410 return 0;
michael@0 411 }
michael@0 412
michael@0 413 static int32_t
michael@0 414 u_scanf_double_handler(UFILE *input,
michael@0 415 u_scanf_spec_info *info,
michael@0 416 ufmt_args *args,
michael@0 417 const UChar *fmt,
michael@0 418 int32_t *fmtConsumed,
michael@0 419 int32_t *argConverted)
michael@0 420 {
michael@0 421 int32_t len;
michael@0 422 double num;
michael@0 423 UNumberFormat *format;
michael@0 424 int32_t parsePos = 0;
michael@0 425 int32_t skipped;
michael@0 426 UErrorCode status = U_ZERO_ERROR;
michael@0 427
michael@0 428
michael@0 429 /* skip all ws in the input */
michael@0 430 skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
michael@0 431
michael@0 432 /* fill the input's internal buffer */
michael@0 433 ufile_fill_uchar_buffer(input);
michael@0 434
michael@0 435 /* determine the size of the input's buffer */
michael@0 436 len = (int32_t)(input->str.fLimit - input->str.fPos);
michael@0 437
michael@0 438 /* truncate to the width, if specified */
michael@0 439 if(info->fWidth != -1)
michael@0 440 len = ufmt_min(len, info->fWidth);
michael@0 441
michael@0 442 /* get the formatter */
michael@0 443 format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL);
michael@0 444
michael@0 445 /* handle error */
michael@0 446 if(format == 0)
michael@0 447 return 0;
michael@0 448
michael@0 449 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
michael@0 450 skipped += u_scanf_skip_leading_positive_sign(input, format, &status);
michael@0 451
michael@0 452 /* parse the number */
michael@0 453 num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);
michael@0 454
michael@0 455 if (!info->fSkipArg) {
michael@0 456 if (info->fIsLong)
michael@0 457 *(double*)(args[0].ptrValue) = num;
michael@0 458 else if (info->fIsLongDouble)
michael@0 459 *(long double*)(args[0].ptrValue) = num;
michael@0 460 else
michael@0 461 *(float*)(args[0].ptrValue) = (float)num;
michael@0 462 }
michael@0 463
michael@0 464 /* mask off any necessary bits */
michael@0 465 /* if(! info->fIsLong_double)
michael@0 466 num &= DBL_MAX;*/
michael@0 467
michael@0 468 /* update the input's position to reflect consumed data */
michael@0 469 input->str.fPos += parsePos;
michael@0 470
michael@0 471 /* we converted 1 arg */
michael@0 472 *argConverted = !info->fSkipArg;
michael@0 473 return parsePos + skipped;
michael@0 474 }
michael@0 475
michael@0 476 #define UPRINTF_SYMBOL_BUFFER_SIZE 8
michael@0 477
michael@0 478 static int32_t
michael@0 479 u_scanf_scientific_handler(UFILE *input,
michael@0 480 u_scanf_spec_info *info,
michael@0 481 ufmt_args *args,
michael@0 482 const UChar *fmt,
michael@0 483 int32_t *fmtConsumed,
michael@0 484 int32_t *argConverted)
michael@0 485 {
michael@0 486 int32_t len;
michael@0 487 double num;
michael@0 488 UNumberFormat *format;
michael@0 489 int32_t parsePos = 0;
michael@0 490 int32_t skipped;
michael@0 491 UErrorCode status = U_ZERO_ERROR;
michael@0 492 UChar srcExpBuf[UPRINTF_SYMBOL_BUFFER_SIZE];
michael@0 493 int32_t srcLen, expLen;
michael@0 494 UChar expBuf[UPRINTF_SYMBOL_BUFFER_SIZE];
michael@0 495
michael@0 496
michael@0 497 /* skip all ws in the input */
michael@0 498 skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
michael@0 499
michael@0 500 /* fill the input's internal buffer */
michael@0 501 ufile_fill_uchar_buffer(input);
michael@0 502
michael@0 503 /* determine the size of the input's buffer */
michael@0 504 len = (int32_t)(input->str.fLimit - input->str.fPos);
michael@0 505
michael@0 506 /* truncate to the width, if specified */
michael@0 507 if(info->fWidth != -1)
michael@0 508 len = ufmt_min(len, info->fWidth);
michael@0 509
michael@0 510 /* get the formatter */
michael@0 511 format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SCIENTIFIC);
michael@0 512
michael@0 513 /* handle error */
michael@0 514 if(format == 0)
michael@0 515 return 0;
michael@0 516
michael@0 517 /* set the appropriate flags on the formatter */
michael@0 518
michael@0 519 srcLen = unum_getSymbol(format,
michael@0 520 UNUM_EXPONENTIAL_SYMBOL,
michael@0 521 srcExpBuf,
michael@0 522 sizeof(srcExpBuf),
michael@0 523 &status);
michael@0 524
michael@0 525 /* Upper/lower case the e */
michael@0 526 if (info->fSpec == (UChar)0x65 /* e */) {
michael@0 527 expLen = u_strToLower(expBuf, (int32_t)sizeof(expBuf),
michael@0 528 srcExpBuf, srcLen,
michael@0 529 input->str.fBundle.fLocale,
michael@0 530 &status);
michael@0 531 }
michael@0 532 else {
michael@0 533 expLen = u_strToUpper(expBuf, (int32_t)sizeof(expBuf),
michael@0 534 srcExpBuf, srcLen,
michael@0 535 input->str.fBundle.fLocale,
michael@0 536 &status);
michael@0 537 }
michael@0 538
michael@0 539 unum_setSymbol(format,
michael@0 540 UNUM_EXPONENTIAL_SYMBOL,
michael@0 541 expBuf,
michael@0 542 expLen,
michael@0 543 &status);
michael@0 544
michael@0 545
michael@0 546
michael@0 547
michael@0 548 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
michael@0 549 skipped += u_scanf_skip_leading_positive_sign(input, format, &status);
michael@0 550
michael@0 551 /* parse the number */
michael@0 552 num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);
michael@0 553
michael@0 554 if (!info->fSkipArg) {
michael@0 555 if (info->fIsLong)
michael@0 556 *(double*)(args[0].ptrValue) = num;
michael@0 557 else if (info->fIsLongDouble)
michael@0 558 *(long double*)(args[0].ptrValue) = num;
michael@0 559 else
michael@0 560 *(float*)(args[0].ptrValue) = (float)num;
michael@0 561 }
michael@0 562
michael@0 563 /* mask off any necessary bits */
michael@0 564 /* if(! info->fIsLong_double)
michael@0 565 num &= DBL_MAX;*/
michael@0 566
michael@0 567 /* update the input's position to reflect consumed data */
michael@0 568 input->str.fPos += parsePos;
michael@0 569
michael@0 570 /* we converted 1 arg */
michael@0 571 *argConverted = !info->fSkipArg;
michael@0 572 return parsePos + skipped;
michael@0 573 }
michael@0 574
michael@0 575 static int32_t
michael@0 576 u_scanf_scidbl_handler(UFILE *input,
michael@0 577 u_scanf_spec_info *info,
michael@0 578 ufmt_args *args,
michael@0 579 const UChar *fmt,
michael@0 580 int32_t *fmtConsumed,
michael@0 581 int32_t *argConverted)
michael@0 582 {
michael@0 583 int32_t len;
michael@0 584 double num;
michael@0 585 UNumberFormat *scientificFormat, *genericFormat;
michael@0 586 /*int32_t scientificResult, genericResult;*/
michael@0 587 double scientificResult, genericResult;
michael@0 588 int32_t scientificParsePos = 0, genericParsePos = 0, parsePos = 0;
michael@0 589 int32_t skipped;
michael@0 590 UErrorCode scientificStatus = U_ZERO_ERROR;
michael@0 591 UErrorCode genericStatus = U_ZERO_ERROR;
michael@0 592
michael@0 593
michael@0 594 /* since we can't determine by scanning the characters whether */
michael@0 595 /* a number was formatted in the 'f' or 'g' styles, parse the */
michael@0 596 /* string with both formatters, and assume whichever one */
michael@0 597 /* parsed the most is the correct formatter to use */
michael@0 598
michael@0 599
michael@0 600 /* skip all ws in the input */
michael@0 601 skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
michael@0 602
michael@0 603 /* fill the input's internal buffer */
michael@0 604 ufile_fill_uchar_buffer(input);
michael@0 605
michael@0 606 /* determine the size of the input's buffer */
michael@0 607 len = (int32_t)(input->str.fLimit - input->str.fPos);
michael@0 608
michael@0 609 /* truncate to the width, if specified */
michael@0 610 if(info->fWidth != -1)
michael@0 611 len = ufmt_min(len, info->fWidth);
michael@0 612
michael@0 613 /* get the formatters */
michael@0 614 scientificFormat = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SCIENTIFIC);
michael@0 615 genericFormat = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL);
michael@0 616
michael@0 617 /* handle error */
michael@0 618 if(scientificFormat == 0 || genericFormat == 0)
michael@0 619 return 0;
michael@0 620
michael@0 621 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
michael@0 622 skipped += u_scanf_skip_leading_positive_sign(input, genericFormat, &genericStatus);
michael@0 623
michael@0 624 /* parse the number using each format*/
michael@0 625
michael@0 626 scientificResult = unum_parseDouble(scientificFormat, input->str.fPos, len,
michael@0 627 &scientificParsePos, &scientificStatus);
michael@0 628
michael@0 629 genericResult = unum_parseDouble(genericFormat, input->str.fPos, len,
michael@0 630 &genericParsePos, &genericStatus);
michael@0 631
michael@0 632 /* determine which parse made it farther */
michael@0 633 if(scientificParsePos > genericParsePos) {
michael@0 634 /* stash the result in num */
michael@0 635 num = scientificResult;
michael@0 636 /* update the input's position to reflect consumed data */
michael@0 637 parsePos += scientificParsePos;
michael@0 638 }
michael@0 639 else {
michael@0 640 /* stash the result in num */
michael@0 641 num = genericResult;
michael@0 642 /* update the input's position to reflect consumed data */
michael@0 643 parsePos += genericParsePos;
michael@0 644 }
michael@0 645 input->str.fPos += parsePos;
michael@0 646
michael@0 647 if (!info->fSkipArg) {
michael@0 648 if (info->fIsLong)
michael@0 649 *(double*)(args[0].ptrValue) = num;
michael@0 650 else if (info->fIsLongDouble)
michael@0 651 *(long double*)(args[0].ptrValue) = num;
michael@0 652 else
michael@0 653 *(float*)(args[0].ptrValue) = (float)num;
michael@0 654 }
michael@0 655
michael@0 656 /* mask off any necessary bits */
michael@0 657 /* if(! info->fIsLong_double)
michael@0 658 num &= DBL_MAX;*/
michael@0 659
michael@0 660 /* we converted 1 arg */
michael@0 661 *argConverted = !info->fSkipArg;
michael@0 662 return parsePos + skipped;
michael@0 663 }
michael@0 664
michael@0 665 static int32_t
michael@0 666 u_scanf_integer_handler(UFILE *input,
michael@0 667 u_scanf_spec_info *info,
michael@0 668 ufmt_args *args,
michael@0 669 const UChar *fmt,
michael@0 670 int32_t *fmtConsumed,
michael@0 671 int32_t *argConverted)
michael@0 672 {
michael@0 673 int32_t len;
michael@0 674 void *num = (void*) (args[0].ptrValue);
michael@0 675 UNumberFormat *format;
michael@0 676 int32_t parsePos = 0;
michael@0 677 int32_t skipped;
michael@0 678 UErrorCode status = U_ZERO_ERROR;
michael@0 679 int64_t result;
michael@0 680
michael@0 681
michael@0 682 /* skip all ws in the input */
michael@0 683 skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
michael@0 684
michael@0 685 /* fill the input's internal buffer */
michael@0 686 ufile_fill_uchar_buffer(input);
michael@0 687
michael@0 688 /* determine the size of the input's buffer */
michael@0 689 len = (int32_t)(input->str.fLimit - input->str.fPos);
michael@0 690
michael@0 691 /* truncate to the width, if specified */
michael@0 692 if(info->fWidth != -1)
michael@0 693 len = ufmt_min(len, info->fWidth);
michael@0 694
michael@0 695 /* get the formatter */
michael@0 696 format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL);
michael@0 697
michael@0 698 /* handle error */
michael@0 699 if(format == 0)
michael@0 700 return 0;
michael@0 701
michael@0 702 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
michael@0 703 skipped += u_scanf_skip_leading_positive_sign(input, format, &status);
michael@0 704
michael@0 705 /* parse the number */
michael@0 706 result = unum_parseInt64(format, input->str.fPos, len, &parsePos, &status);
michael@0 707
michael@0 708 /* mask off any necessary bits */
michael@0 709 if (!info->fSkipArg) {
michael@0 710 if (info->fIsShort)
michael@0 711 *(int16_t*)num = (int16_t)(UINT16_MAX & result);
michael@0 712 else if (info->fIsLongLong)
michael@0 713 *(int64_t*)num = result;
michael@0 714 else
michael@0 715 *(int32_t*)num = (int32_t)(UINT32_MAX & result);
michael@0 716 }
michael@0 717
michael@0 718 /* update the input's position to reflect consumed data */
michael@0 719 input->str.fPos += parsePos;
michael@0 720
michael@0 721 /* we converted 1 arg */
michael@0 722 *argConverted = !info->fSkipArg;
michael@0 723 return parsePos + skipped;
michael@0 724 }
michael@0 725
michael@0 726 static int32_t
michael@0 727 u_scanf_uinteger_handler(UFILE *input,
michael@0 728 u_scanf_spec_info *info,
michael@0 729 ufmt_args *args,
michael@0 730 const UChar *fmt,
michael@0 731 int32_t *fmtConsumed,
michael@0 732 int32_t *argConverted)
michael@0 733 {
michael@0 734 /* TODO Fix this when Numberformat handles uint64_t */
michael@0 735 return u_scanf_integer_handler(input, info, args, fmt, fmtConsumed, argConverted);
michael@0 736 }
michael@0 737
michael@0 738 static int32_t
michael@0 739 u_scanf_percent_handler(UFILE *input,
michael@0 740 u_scanf_spec_info *info,
michael@0 741 ufmt_args *args,
michael@0 742 const UChar *fmt,
michael@0 743 int32_t *fmtConsumed,
michael@0 744 int32_t *argConverted)
michael@0 745 {
michael@0 746 int32_t len;
michael@0 747 double num;
michael@0 748 UNumberFormat *format;
michael@0 749 int32_t parsePos = 0;
michael@0 750 UErrorCode status = U_ZERO_ERROR;
michael@0 751
michael@0 752
michael@0 753 /* skip all ws in the input */
michael@0 754 u_scanf_skip_leading_ws(input, info->fPadChar);
michael@0 755
michael@0 756 /* fill the input's internal buffer */
michael@0 757 ufile_fill_uchar_buffer(input);
michael@0 758
michael@0 759 /* determine the size of the input's buffer */
michael@0 760 len = (int32_t)(input->str.fLimit - input->str.fPos);
michael@0 761
michael@0 762 /* truncate to the width, if specified */
michael@0 763 if(info->fWidth != -1)
michael@0 764 len = ufmt_min(len, info->fWidth);
michael@0 765
michael@0 766 /* get the formatter */
michael@0 767 format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_PERCENT);
michael@0 768
michael@0 769 /* handle error */
michael@0 770 if(format == 0)
michael@0 771 return 0;
michael@0 772
michael@0 773 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
michael@0 774 u_scanf_skip_leading_positive_sign(input, format, &status);
michael@0 775
michael@0 776 /* parse the number */
michael@0 777 num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);
michael@0 778
michael@0 779 if (!info->fSkipArg) {
michael@0 780 *(double*)(args[0].ptrValue) = num;
michael@0 781 }
michael@0 782
michael@0 783 /* mask off any necessary bits */
michael@0 784 /* if(! info->fIsLong_double)
michael@0 785 num &= DBL_MAX;*/
michael@0 786
michael@0 787 /* update the input's position to reflect consumed data */
michael@0 788 input->str.fPos += parsePos;
michael@0 789
michael@0 790 /* we converted 1 arg */
michael@0 791 *argConverted = !info->fSkipArg;
michael@0 792 return parsePos;
michael@0 793 }
michael@0 794
michael@0 795 static int32_t
michael@0 796 u_scanf_string_handler(UFILE *input,
michael@0 797 u_scanf_spec_info *info,
michael@0 798 ufmt_args *args,
michael@0 799 const UChar *fmt,
michael@0 800 int32_t *fmtConsumed,
michael@0 801 int32_t *argConverted)
michael@0 802 {
michael@0 803 const UChar *source;
michael@0 804 UConverter *conv;
michael@0 805 char *arg = (char*)(args[0].ptrValue);
michael@0 806 char *alias = arg;
michael@0 807 char *limit;
michael@0 808 UErrorCode status = U_ZERO_ERROR;
michael@0 809 int32_t count;
michael@0 810 int32_t skipped = 0;
michael@0 811 UChar c;
michael@0 812 UBool isNotEOF = FALSE;
michael@0 813
michael@0 814 /* skip all ws in the input */
michael@0 815 if (info->fIsString) {
michael@0 816 skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
michael@0 817 }
michael@0 818
michael@0 819 /* get the string one character at a time, truncating to the width */
michael@0 820 count = 0;
michael@0 821
michael@0 822 /* open the default converter */
michael@0 823 conv = u_getDefaultConverter(&status);
michael@0 824
michael@0 825 if(U_FAILURE(status))
michael@0 826 return -1;
michael@0 827
michael@0 828 while( (info->fWidth == -1 || count < info->fWidth)
michael@0 829 && (isNotEOF = ufile_getch(input, &c))
michael@0 830 && (!info->fIsString || (c != info->fPadChar && !u_isWhitespace(c))))
michael@0 831 {
michael@0 832
michael@0 833 if (!info->fSkipArg) {
michael@0 834 /* put the character from the input onto the target */
michael@0 835 source = &c;
michael@0 836 /* Since we do this one character at a time, do it this way. */
michael@0 837 if (info->fWidth > 0) {
michael@0 838 limit = alias + info->fWidth - count;
michael@0 839 }
michael@0 840 else {
michael@0 841 limit = alias + ucnv_getMaxCharSize(conv);
michael@0 842 }
michael@0 843
michael@0 844 /* convert the character to the default codepage */
michael@0 845 ucnv_fromUnicode(conv, &alias, limit, &source, source + 1,
michael@0 846 NULL, TRUE, &status);
michael@0 847
michael@0 848 if(U_FAILURE(status)) {
michael@0 849 /* clean up */
michael@0 850 u_releaseDefaultConverter(conv);
michael@0 851 return -1;
michael@0 852 }
michael@0 853 }
michael@0 854
michael@0 855 /* increment the count */
michael@0 856 ++count;
michael@0 857 }
michael@0 858
michael@0 859 /* put the final character we read back on the input */
michael@0 860 if (!info->fSkipArg) {
michael@0 861 if ((info->fWidth == -1 || count < info->fWidth) && isNotEOF)
michael@0 862 u_fungetc(c, input);
michael@0 863
michael@0 864 /* add the terminator */
michael@0 865 if (info->fIsString) {
michael@0 866 *alias = 0x00;
michael@0 867 }
michael@0 868 }
michael@0 869
michael@0 870 /* clean up */
michael@0 871 u_releaseDefaultConverter(conv);
michael@0 872
michael@0 873 /* we converted 1 arg */
michael@0 874 *argConverted = !info->fSkipArg;
michael@0 875 return count + skipped;
michael@0 876 }
michael@0 877
michael@0 878 static int32_t
michael@0 879 u_scanf_char_handler(UFILE *input,
michael@0 880 u_scanf_spec_info *info,
michael@0 881 ufmt_args *args,
michael@0 882 const UChar *fmt,
michael@0 883 int32_t *fmtConsumed,
michael@0 884 int32_t *argConverted)
michael@0 885 {
michael@0 886 if (info->fWidth < 0) {
michael@0 887 info->fWidth = 1;
michael@0 888 }
michael@0 889 info->fIsString = FALSE;
michael@0 890 return u_scanf_string_handler(input, info, args, fmt, fmtConsumed, argConverted);
michael@0 891 }
michael@0 892
michael@0 893 static int32_t
michael@0 894 u_scanf_ustring_handler(UFILE *input,
michael@0 895 u_scanf_spec_info *info,
michael@0 896 ufmt_args *args,
michael@0 897 const UChar *fmt,
michael@0 898 int32_t *fmtConsumed,
michael@0 899 int32_t *argConverted)
michael@0 900 {
michael@0 901 UChar *arg = (UChar*)(args[0].ptrValue);
michael@0 902 UChar *alias = arg;
michael@0 903 int32_t count;
michael@0 904 int32_t skipped = 0;
michael@0 905 UChar c;
michael@0 906 UBool isNotEOF = FALSE;
michael@0 907
michael@0 908 /* skip all ws in the input */
michael@0 909 if (info->fIsString) {
michael@0 910 skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
michael@0 911 }
michael@0 912
michael@0 913 /* get the string one character at a time, truncating to the width */
michael@0 914 count = 0;
michael@0 915
michael@0 916 while( (info->fWidth == -1 || count < info->fWidth)
michael@0 917 && (isNotEOF = ufile_getch(input, &c))
michael@0 918 && (!info->fIsString || (c != info->fPadChar && !u_isWhitespace(c))))
michael@0 919 {
michael@0 920
michael@0 921 /* put the character from the input onto the target */
michael@0 922 if (!info->fSkipArg) {
michael@0 923 *alias++ = c;
michael@0 924 }
michael@0 925
michael@0 926 /* increment the count */
michael@0 927 ++count;
michael@0 928 }
michael@0 929
michael@0 930 /* put the final character we read back on the input */
michael@0 931 if (!info->fSkipArg) {
michael@0 932 if((info->fWidth == -1 || count < info->fWidth) && isNotEOF) {
michael@0 933 u_fungetc(c, input);
michael@0 934 }
michael@0 935
michael@0 936 /* add the terminator */
michael@0 937 if (info->fIsString) {
michael@0 938 *alias = 0x0000;
michael@0 939 }
michael@0 940 }
michael@0 941
michael@0 942 /* we converted 1 arg */
michael@0 943 *argConverted = !info->fSkipArg;
michael@0 944 return count + skipped;
michael@0 945 }
michael@0 946
michael@0 947 static int32_t
michael@0 948 u_scanf_uchar_handler(UFILE *input,
michael@0 949 u_scanf_spec_info *info,
michael@0 950 ufmt_args *args,
michael@0 951 const UChar *fmt,
michael@0 952 int32_t *fmtConsumed,
michael@0 953 int32_t *argConverted)
michael@0 954 {
michael@0 955 if (info->fWidth < 0) {
michael@0 956 info->fWidth = 1;
michael@0 957 }
michael@0 958 info->fIsString = FALSE;
michael@0 959 return u_scanf_ustring_handler(input, info, args, fmt, fmtConsumed, argConverted);
michael@0 960 }
michael@0 961
michael@0 962 static int32_t
michael@0 963 u_scanf_spellout_handler(UFILE *input,
michael@0 964 u_scanf_spec_info *info,
michael@0 965 ufmt_args *args,
michael@0 966 const UChar *fmt,
michael@0 967 int32_t *fmtConsumed,
michael@0 968 int32_t *argConverted)
michael@0 969 {
michael@0 970 int32_t len;
michael@0 971 double num;
michael@0 972 UNumberFormat *format;
michael@0 973 int32_t parsePos = 0;
michael@0 974 int32_t skipped;
michael@0 975 UErrorCode status = U_ZERO_ERROR;
michael@0 976
michael@0 977
michael@0 978 /* skip all ws in the input */
michael@0 979 skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
michael@0 980
michael@0 981 /* fill the input's internal buffer */
michael@0 982 ufile_fill_uchar_buffer(input);
michael@0 983
michael@0 984 /* determine the size of the input's buffer */
michael@0 985 len = (int32_t)(input->str.fLimit - input->str.fPos);
michael@0 986
michael@0 987 /* truncate to the width, if specified */
michael@0 988 if(info->fWidth != -1)
michael@0 989 len = ufmt_min(len, info->fWidth);
michael@0 990
michael@0 991 /* get the formatter */
michael@0 992 format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SPELLOUT);
michael@0 993
michael@0 994 /* handle error */
michael@0 995 if(format == 0)
michael@0 996 return 0;
michael@0 997
michael@0 998 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
michael@0 999 /* This is not applicable to RBNF. */
michael@0 1000 /*skipped += u_scanf_skip_leading_positive_sign(input, format, &status);*/
michael@0 1001
michael@0 1002 /* parse the number */
michael@0 1003 num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);
michael@0 1004
michael@0 1005 if (!info->fSkipArg) {
michael@0 1006 *(double*)(args[0].ptrValue) = num;
michael@0 1007 }
michael@0 1008
michael@0 1009 /* mask off any necessary bits */
michael@0 1010 /* if(! info->fIsLong_double)
michael@0 1011 num &= DBL_MAX;*/
michael@0 1012
michael@0 1013 /* update the input's position to reflect consumed data */
michael@0 1014 input->str.fPos += parsePos;
michael@0 1015
michael@0 1016 /* we converted 1 arg */
michael@0 1017 *argConverted = !info->fSkipArg;
michael@0 1018 return parsePos + skipped;
michael@0 1019 }
michael@0 1020
michael@0 1021 static int32_t
michael@0 1022 u_scanf_hex_handler(UFILE *input,
michael@0 1023 u_scanf_spec_info *info,
michael@0 1024 ufmt_args *args,
michael@0 1025 const UChar *fmt,
michael@0 1026 int32_t *fmtConsumed,
michael@0 1027 int32_t *argConverted)
michael@0 1028 {
michael@0 1029 int32_t len;
michael@0 1030 int32_t skipped;
michael@0 1031 void *num = (void*) (args[0].ptrValue);
michael@0 1032 int64_t result;
michael@0 1033
michael@0 1034 /* skip all ws in the input */
michael@0 1035 skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
michael@0 1036
michael@0 1037 /* fill the input's internal buffer */
michael@0 1038 ufile_fill_uchar_buffer(input);
michael@0 1039
michael@0 1040 /* determine the size of the input's buffer */
michael@0 1041 len = (int32_t)(input->str.fLimit - input->str.fPos);
michael@0 1042
michael@0 1043 /* truncate to the width, if specified */
michael@0 1044 if(info->fWidth != -1)
michael@0 1045 len = ufmt_min(len, info->fWidth);
michael@0 1046
michael@0 1047 /* check for alternate form */
michael@0 1048 if( *(input->str.fPos) == 0x0030 &&
michael@0 1049 (*(input->str.fPos + 1) == 0x0078 || *(input->str.fPos + 1) == 0x0058) ) {
michael@0 1050
michael@0 1051 /* skip the '0' and 'x' or 'X' if present */
michael@0 1052 input->str.fPos += 2;
michael@0 1053 len -= 2;
michael@0 1054 }
michael@0 1055
michael@0 1056 /* parse the number */
michael@0 1057 result = ufmt_uto64(input->str.fPos, &len, 16);
michael@0 1058
michael@0 1059 /* update the input's position to reflect consumed data */
michael@0 1060 input->str.fPos += len;
michael@0 1061
michael@0 1062 /* mask off any necessary bits */
michael@0 1063 if (!info->fSkipArg) {
michael@0 1064 if (info->fIsShort)
michael@0 1065 *(int16_t*)num = (int16_t)(UINT16_MAX & result);
michael@0 1066 else if (info->fIsLongLong)
michael@0 1067 *(int64_t*)num = result;
michael@0 1068 else
michael@0 1069 *(int32_t*)num = (int32_t)(UINT32_MAX & result);
michael@0 1070 }
michael@0 1071
michael@0 1072 /* we converted 1 arg */
michael@0 1073 *argConverted = !info->fSkipArg;
michael@0 1074 return len + skipped;
michael@0 1075 }
michael@0 1076
michael@0 1077 static int32_t
michael@0 1078 u_scanf_octal_handler(UFILE *input,
michael@0 1079 u_scanf_spec_info *info,
michael@0 1080 ufmt_args *args,
michael@0 1081 const UChar *fmt,
michael@0 1082 int32_t *fmtConsumed,
michael@0 1083 int32_t *argConverted)
michael@0 1084 {
michael@0 1085 int32_t len;
michael@0 1086 int32_t skipped;
michael@0 1087 void *num = (void*) (args[0].ptrValue);
michael@0 1088 int64_t result;
michael@0 1089
michael@0 1090 /* skip all ws in the input */
michael@0 1091 skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
michael@0 1092
michael@0 1093 /* fill the input's internal buffer */
michael@0 1094 ufile_fill_uchar_buffer(input);
michael@0 1095
michael@0 1096 /* determine the size of the input's buffer */
michael@0 1097 len = (int32_t)(input->str.fLimit - input->str.fPos);
michael@0 1098
michael@0 1099 /* truncate to the width, if specified */
michael@0 1100 if(info->fWidth != -1)
michael@0 1101 len = ufmt_min(len, info->fWidth);
michael@0 1102
michael@0 1103 /* parse the number */
michael@0 1104 result = ufmt_uto64(input->str.fPos, &len, 8);
michael@0 1105
michael@0 1106 /* update the input's position to reflect consumed data */
michael@0 1107 input->str.fPos += len;
michael@0 1108
michael@0 1109 /* mask off any necessary bits */
michael@0 1110 if (!info->fSkipArg) {
michael@0 1111 if (info->fIsShort)
michael@0 1112 *(int16_t*)num = (int16_t)(UINT16_MAX & result);
michael@0 1113 else if (info->fIsLongLong)
michael@0 1114 *(int64_t*)num = result;
michael@0 1115 else
michael@0 1116 *(int32_t*)num = (int32_t)(UINT32_MAX & result);
michael@0 1117 }
michael@0 1118
michael@0 1119 /* we converted 1 arg */
michael@0 1120 *argConverted = !info->fSkipArg;
michael@0 1121 return len + skipped;
michael@0 1122 }
michael@0 1123
michael@0 1124 static int32_t
michael@0 1125 u_scanf_pointer_handler(UFILE *input,
michael@0 1126 u_scanf_spec_info *info,
michael@0 1127 ufmt_args *args,
michael@0 1128 const UChar *fmt,
michael@0 1129 int32_t *fmtConsumed,
michael@0 1130 int32_t *argConverted)
michael@0 1131 {
michael@0 1132 int32_t len;
michael@0 1133 int32_t skipped;
michael@0 1134 void *result;
michael@0 1135 void **p = (void**)(args[0].ptrValue);
michael@0 1136
michael@0 1137
michael@0 1138 /* skip all ws in the input */
michael@0 1139 skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
michael@0 1140
michael@0 1141 /* fill the input's internal buffer */
michael@0 1142 ufile_fill_uchar_buffer(input);
michael@0 1143
michael@0 1144 /* determine the size of the input's buffer */
michael@0 1145 len = (int32_t)(input->str.fLimit - input->str.fPos);
michael@0 1146
michael@0 1147 /* truncate to the width, if specified */
michael@0 1148 if(info->fWidth != -1) {
michael@0 1149 len = ufmt_min(len, info->fWidth);
michael@0 1150 }
michael@0 1151
michael@0 1152 /* Make sure that we don't consume too much */
michael@0 1153 if (len > (int32_t)(sizeof(void*)*2)) {
michael@0 1154 len = (int32_t)(sizeof(void*)*2);
michael@0 1155 }
michael@0 1156
michael@0 1157 /* parse the pointer - assign to temporary value */
michael@0 1158 result = ufmt_utop(input->str.fPos, &len);
michael@0 1159
michael@0 1160 if (!info->fSkipArg) {
michael@0 1161 *p = result;
michael@0 1162 }
michael@0 1163
michael@0 1164 /* update the input's position to reflect consumed data */
michael@0 1165 input->str.fPos += len;
michael@0 1166
michael@0 1167 /* we converted 1 arg */
michael@0 1168 *argConverted = !info->fSkipArg;
michael@0 1169 return len + skipped;
michael@0 1170 }
michael@0 1171
michael@0 1172 static int32_t
michael@0 1173 u_scanf_scanset_handler(UFILE *input,
michael@0 1174 u_scanf_spec_info *info,
michael@0 1175 ufmt_args *args,
michael@0 1176 const UChar *fmt,
michael@0 1177 int32_t *fmtConsumed,
michael@0 1178 int32_t *argConverted)
michael@0 1179 {
michael@0 1180 USet *scanset;
michael@0 1181 UErrorCode status = U_ZERO_ERROR;
michael@0 1182 int32_t chLeft = INT32_MAX;
michael@0 1183 UChar32 c;
michael@0 1184 UChar *alias = (UChar*) (args[0].ptrValue);
michael@0 1185 UBool isNotEOF = FALSE;
michael@0 1186 UBool readCharacter = FALSE;
michael@0 1187
michael@0 1188 /* Create an empty set */
michael@0 1189 scanset = uset_open(0, -1);
michael@0 1190
michael@0 1191 /* Back up one to get the [ */
michael@0 1192 fmt--;
michael@0 1193
michael@0 1194 /* truncate to the width, if specified and alias the target */
michael@0 1195 if(info->fWidth >= 0) {
michael@0 1196 chLeft = info->fWidth;
michael@0 1197 }
michael@0 1198
michael@0 1199 /* parse the scanset from the fmt string */
michael@0 1200 *fmtConsumed = uset_applyPattern(scanset, fmt, -1, 0, &status);
michael@0 1201
michael@0 1202 /* verify that the parse was successful */
michael@0 1203 if (U_SUCCESS(status)) {
michael@0 1204 c=0;
michael@0 1205
michael@0 1206 /* grab characters one at a time and make sure they are in the scanset */
michael@0 1207 while(chLeft > 0) {
michael@0 1208 if ((isNotEOF = ufile_getch32(input, &c)) && uset_contains(scanset, c)) {
michael@0 1209 readCharacter = TRUE;
michael@0 1210 if (!info->fSkipArg) {
michael@0 1211 int32_t idx = 0;
michael@0 1212 UBool isError = FALSE;
michael@0 1213
michael@0 1214 U16_APPEND(alias, idx, chLeft, c, isError);
michael@0 1215 if (isError) {
michael@0 1216 break;
michael@0 1217 }
michael@0 1218 alias += idx;
michael@0 1219 }
michael@0 1220 chLeft -= (1 + U_IS_SUPPLEMENTARY(c));
michael@0 1221 }
michael@0 1222 else {
michael@0 1223 /* if the character's not in the scanset, break out */
michael@0 1224 break;
michael@0 1225 }
michael@0 1226 }
michael@0 1227
michael@0 1228 /* put the final character we read back on the input */
michael@0 1229 if(isNotEOF && chLeft > 0) {
michael@0 1230 u_fungetc(c, input);
michael@0 1231 }
michael@0 1232 }
michael@0 1233
michael@0 1234 uset_close(scanset);
michael@0 1235
michael@0 1236 /* if we didn't match at least 1 character, fail */
michael@0 1237 if(!readCharacter)
michael@0 1238 return -1;
michael@0 1239 /* otherwise, add the terminator */
michael@0 1240 else if (!info->fSkipArg) {
michael@0 1241 *alias = 0x00;
michael@0 1242 }
michael@0 1243
michael@0 1244 /* we converted 1 arg */
michael@0 1245 *argConverted = !info->fSkipArg;
michael@0 1246 return (info->fWidth >= 0 ? info->fWidth : INT32_MAX) - chLeft;
michael@0 1247 }
michael@0 1248
michael@0 1249 /* Use US-ASCII characters only for formatting. Most codepages have
michael@0 1250 characters 20-7F from Unicode. Using any other codepage specific
michael@0 1251 characters will make it very difficult to format the string on
michael@0 1252 non-Unicode machines */
michael@0 1253 static const u_scanf_info g_u_scanf_infos[USCANF_NUM_FMT_HANDLERS] = {
michael@0 1254 /* 0x20 */
michael@0 1255 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
michael@0 1256 UFMT_EMPTY, UFMT_SIMPLE_PERCENT,UFMT_EMPTY, UFMT_EMPTY,
michael@0 1257 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
michael@0 1258 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
michael@0 1259
michael@0 1260 /* 0x30 */
michael@0 1261 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
michael@0 1262 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
michael@0 1263 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
michael@0 1264 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
michael@0 1265
michael@0 1266 /* 0x40 */
michael@0 1267 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_UCHAR,
michael@0 1268 UFMT_EMPTY, UFMT_SCIENTIFIC, UFMT_EMPTY, UFMT_SCIDBL,
michael@0 1269 #ifdef U_USE_OBSOLETE_IO_FORMATTING
michael@0 1270 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_UCHAR/*deprecated*/,
michael@0 1271 #else
michael@0 1272 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
michael@0 1273 #endif
michael@0 1274 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
michael@0 1275
michael@0 1276 /* 0x50 */
michael@0 1277 UFMT_PERCENT, UFMT_EMPTY, UFMT_EMPTY, UFMT_USTRING,
michael@0 1278 #ifdef U_USE_OBSOLETE_IO_FORMATTING
michael@0 1279 UFMT_EMPTY, UFMT_USTRING/*deprecated*/,UFMT_SPELLOUT, UFMT_EMPTY,
michael@0 1280 #else
michael@0 1281 UFMT_EMPTY, UFMT_EMPTY, UFMT_SPELLOUT, UFMT_EMPTY,
michael@0 1282 #endif
michael@0 1283 UFMT_HEX, UFMT_EMPTY, UFMT_EMPTY, UFMT_SCANSET,
michael@0 1284 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
michael@0 1285
michael@0 1286 /* 0x60 */
michael@0 1287 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_CHAR,
michael@0 1288 UFMT_INT, UFMT_SCIENTIFIC, UFMT_DOUBLE, UFMT_SCIDBL,
michael@0 1289 UFMT_EMPTY, UFMT_INT, UFMT_EMPTY, UFMT_EMPTY,
michael@0 1290 UFMT_EMPTY, UFMT_EMPTY, UFMT_COUNT, UFMT_OCTAL,
michael@0 1291
michael@0 1292 /* 0x70 */
michael@0 1293 UFMT_POINTER, UFMT_EMPTY, UFMT_EMPTY, UFMT_STRING,
michael@0 1294 UFMT_EMPTY, UFMT_UINT, UFMT_EMPTY, UFMT_EMPTY,
michael@0 1295 UFMT_HEX, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
michael@0 1296 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
michael@0 1297 };
michael@0 1298
michael@0 1299 U_CFUNC int32_t
michael@0 1300 u_scanf_parse(UFILE *f,
michael@0 1301 const UChar *patternSpecification,
michael@0 1302 va_list ap)
michael@0 1303 {
michael@0 1304 const UChar *alias;
michael@0 1305 int32_t count, converted, argConsumed, cpConsumed;
michael@0 1306 uint16_t handlerNum;
michael@0 1307
michael@0 1308 ufmt_args args;
michael@0 1309 u_scanf_spec spec;
michael@0 1310 ufmt_type_info info;
michael@0 1311 u_scanf_handler handler;
michael@0 1312
michael@0 1313 /* alias the pattern */
michael@0 1314 alias = patternSpecification;
michael@0 1315
michael@0 1316 /* haven't converted anything yet */
michael@0 1317 argConsumed = 0;
michael@0 1318 converted = 0;
michael@0 1319 cpConsumed = 0;
michael@0 1320
michael@0 1321 /* iterate through the pattern */
michael@0 1322 for(;;) {
michael@0 1323
michael@0 1324 /* match any characters up to the next '%' */
michael@0 1325 while(*alias != UP_PERCENT && *alias != 0x0000 && u_fgetc(f) == *alias) {
michael@0 1326 alias++;
michael@0 1327 }
michael@0 1328
michael@0 1329 /* if we aren't at a '%', or if we're at end of string, break*/
michael@0 1330 if(*alias != UP_PERCENT || *alias == 0x0000)
michael@0 1331 break;
michael@0 1332
michael@0 1333 /* parse the specifier */
michael@0 1334 count = u_scanf_parse_spec(alias, &spec);
michael@0 1335
michael@0 1336 /* update the pointer in pattern */
michael@0 1337 alias += count;
michael@0 1338
michael@0 1339 handlerNum = (uint16_t)(spec.fInfo.fSpec - USCANF_BASE_FMT_HANDLERS);
michael@0 1340 if (handlerNum < USCANF_NUM_FMT_HANDLERS) {
michael@0 1341 /* skip the argument, if necessary */
michael@0 1342 /* query the info function for argument information */
michael@0 1343 info = g_u_scanf_infos[ handlerNum ].info;
michael@0 1344 if (info != ufmt_count && u_feof(f)) {
michael@0 1345 break;
michael@0 1346 }
michael@0 1347 else if(spec.fInfo.fSkipArg) {
michael@0 1348 args.ptrValue = NULL;
michael@0 1349 }
michael@0 1350 else {
michael@0 1351 switch(info) {
michael@0 1352 case ufmt_count:
michael@0 1353 /* set the spec's width to the # of items converted */
michael@0 1354 spec.fInfo.fWidth = cpConsumed;
michael@0 1355 /* fall through to next case */
michael@0 1356 case ufmt_char:
michael@0 1357 case ufmt_uchar:
michael@0 1358 case ufmt_int:
michael@0 1359 case ufmt_string:
michael@0 1360 case ufmt_ustring:
michael@0 1361 case ufmt_pointer:
michael@0 1362 case ufmt_float:
michael@0 1363 case ufmt_double:
michael@0 1364 args.ptrValue = va_arg(ap, void*);
michael@0 1365 break;
michael@0 1366
michael@0 1367 default:
michael@0 1368 /* else args is ignored */
michael@0 1369 args.ptrValue = NULL;
michael@0 1370 break;
michael@0 1371 }
michael@0 1372 }
michael@0 1373
michael@0 1374 /* call the handler function */
michael@0 1375 handler = g_u_scanf_infos[ handlerNum ].handler;
michael@0 1376 if(handler != 0) {
michael@0 1377
michael@0 1378 /* reset count to 1 so that += for alias works. */
michael@0 1379 count = 1;
michael@0 1380
michael@0 1381 cpConsumed += (*handler)(f, &spec.fInfo, &args, alias, &count, &argConsumed);
michael@0 1382
michael@0 1383 /* if the handler encountered an error condition, break */
michael@0 1384 if(argConsumed < 0) {
michael@0 1385 converted = -1;
michael@0 1386 break;
michael@0 1387 }
michael@0 1388
michael@0 1389 /* add to the # of items converted */
michael@0 1390 converted += argConsumed;
michael@0 1391
michael@0 1392 /* update the pointer in pattern */
michael@0 1393 alias += count-1;
michael@0 1394 }
michael@0 1395 /* else do nothing */
michael@0 1396 }
michael@0 1397 /* else do nothing */
michael@0 1398
michael@0 1399 /* just ignore unknown tags */
michael@0 1400 }
michael@0 1401
michael@0 1402 /* return # of items converted */
michael@0 1403 return converted;
michael@0 1404 }
michael@0 1405
michael@0 1406 #endif /* #if !UCONFIG_NO_FORMATTING */

mercurial