intl/icu/source/io/uscanf_p.c

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/io/uscanf_p.c	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,1406 @@
     1.4 +/*
     1.5 +*******************************************************************************
     1.6 +*
     1.7 +*   Copyright (C) 1998-2011, International Business Machines
     1.8 +*   Corporation and others.  All Rights Reserved.
     1.9 +*
    1.10 +*******************************************************************************
    1.11 +*
    1.12 +* File uscnnf_p.c
    1.13 +*
    1.14 +* Modification History:
    1.15 +*
    1.16 +*   Date        Name        Description
    1.17 +*   12/02/98    stephen        Creation.
    1.18 +*   03/13/99    stephen     Modified for new C API.
    1.19 +*******************************************************************************
    1.20 +*/
    1.21 +
    1.22 +#include "unicode/utypes.h"
    1.23 +
    1.24 +#if !UCONFIG_NO_FORMATTING
    1.25 +
    1.26 +#include "unicode/uchar.h"
    1.27 +#include "unicode/ustring.h"
    1.28 +#include "unicode/unum.h"
    1.29 +#include "unicode/udat.h"
    1.30 +#include "unicode/uset.h"
    1.31 +#include "uscanf.h"
    1.32 +#include "ufmt_cmn.h"
    1.33 +#include "ufile.h"
    1.34 +#include "locbund.h"
    1.35 +
    1.36 +#include "cmemory.h"
    1.37 +#include "ustr_cnv.h"
    1.38 +
    1.39 +/* flag characters for u_scanf */
    1.40 +#define FLAG_ASTERISK 0x002A
    1.41 +#define FLAG_PAREN 0x0028
    1.42 +
    1.43 +#define ISFLAG(s)    (s) == FLAG_ASTERISK || \
    1.44 +            (s) == FLAG_PAREN
    1.45 +
    1.46 +/* special characters for u_scanf */
    1.47 +#define SPEC_DOLLARSIGN 0x0024
    1.48 +
    1.49 +/* unicode digits */
    1.50 +#define DIGIT_ZERO 0x0030
    1.51 +#define DIGIT_ONE 0x0031
    1.52 +#define DIGIT_TWO 0x0032
    1.53 +#define DIGIT_THREE 0x0033
    1.54 +#define DIGIT_FOUR 0x0034
    1.55 +#define DIGIT_FIVE 0x0035
    1.56 +#define DIGIT_SIX 0x0036
    1.57 +#define DIGIT_SEVEN 0x0037
    1.58 +#define DIGIT_EIGHT 0x0038
    1.59 +#define DIGIT_NINE 0x0039
    1.60 +
    1.61 +#define ISDIGIT(s)    (s) == DIGIT_ZERO || \
    1.62 +            (s) == DIGIT_ONE || \
    1.63 +            (s) == DIGIT_TWO || \
    1.64 +            (s) == DIGIT_THREE || \
    1.65 +            (s) == DIGIT_FOUR || \
    1.66 +            (s) == DIGIT_FIVE || \
    1.67 +            (s) == DIGIT_SIX || \
    1.68 +            (s) == DIGIT_SEVEN || \
    1.69 +            (s) == DIGIT_EIGHT || \
    1.70 +            (s) == DIGIT_NINE
    1.71 +
    1.72 +/* u_scanf modifiers */
    1.73 +#define MOD_H 0x0068
    1.74 +#define MOD_LOWERL 0x006C
    1.75 +#define MOD_L 0x004C
    1.76 +
    1.77 +#define ISMOD(s)    (s) == MOD_H || \
    1.78 +            (s) == MOD_LOWERL || \
    1.79 +            (s) == MOD_L
    1.80 +
    1.81 +/**
    1.82 + * Struct encapsulating a single uscanf format specification.
    1.83 + */
    1.84 +typedef struct u_scanf_spec_info {
    1.85 +    int32_t fWidth;         /* Width  */
    1.86 +
    1.87 +    UChar   fSpec;          /* Format specification  */
    1.88 +
    1.89 +    UChar   fPadChar;       /* Padding character  */
    1.90 +
    1.91 +    UBool   fSkipArg;       /* TRUE if arg should be skipped */
    1.92 +    UBool   fIsLongDouble;  /* L flag  */
    1.93 +    UBool   fIsShort;       /* h flag  */
    1.94 +    UBool   fIsLong;        /* l flag  */
    1.95 +    UBool   fIsLongLong;    /* ll flag  */
    1.96 +    UBool   fIsString;      /* TRUE if this is a NULL-terminated string. */
    1.97 +} u_scanf_spec_info;
    1.98 +
    1.99 +
   1.100 +/**
   1.101 + * Struct encapsulating a single u_scanf format specification.
   1.102 + */
   1.103 +typedef struct u_scanf_spec {
   1.104 +    u_scanf_spec_info    fInfo;        /* Information on this spec */
   1.105 +    int32_t        fArgPos;    /* Position of data in arg list */
   1.106 +} u_scanf_spec;
   1.107 +
   1.108 +/**
   1.109 + * Parse a single u_scanf format specifier in Unicode.
   1.110 + * @param fmt A pointer to a '%' character in a u_scanf format specification.
   1.111 + * @param spec A pointer to a <TT>u_scanf_spec</TT> to receive the parsed
   1.112 + * format specifier.
   1.113 + * @return The number of characters contained in this specifier.
   1.114 + */
   1.115 +static int32_t
   1.116 +u_scanf_parse_spec (const UChar     *fmt,
   1.117 +            u_scanf_spec    *spec)
   1.118 +{
   1.119 +    const UChar *s = fmt;
   1.120 +    const UChar *backup;
   1.121 +    u_scanf_spec_info *info = &(spec->fInfo);
   1.122 +
   1.123 +    /* initialize spec to default values */
   1.124 +    spec->fArgPos             = -1;
   1.125 +
   1.126 +    info->fWidth        = -1;
   1.127 +    info->fSpec         = 0x0000;
   1.128 +    info->fPadChar      = 0x0020;
   1.129 +    info->fSkipArg      = FALSE;
   1.130 +    info->fIsLongDouble = FALSE;
   1.131 +    info->fIsShort      = FALSE;
   1.132 +    info->fIsLong       = FALSE;
   1.133 +    info->fIsLongLong   = FALSE;
   1.134 +    info->fIsString     = TRUE;
   1.135 +
   1.136 +
   1.137 +    /* skip over the initial '%' */
   1.138 +    s++;
   1.139 +
   1.140 +    /* Check for positional argument */
   1.141 +    if(ISDIGIT(*s)) {
   1.142 +
   1.143 +        /* Save the current position */
   1.144 +        backup = s;
   1.145 +
   1.146 +        /* handle positional parameters */
   1.147 +        if(ISDIGIT(*s)) {
   1.148 +            spec->fArgPos = (int) (*s++ - DIGIT_ZERO);
   1.149 +
   1.150 +            while(ISDIGIT(*s)) {
   1.151 +                spec->fArgPos *= 10;
   1.152 +                spec->fArgPos += (int) (*s++ - DIGIT_ZERO);
   1.153 +            }
   1.154 +        }
   1.155 +
   1.156 +        /* if there is no '$', don't read anything */
   1.157 +        if(*s != SPEC_DOLLARSIGN) {
   1.158 +            spec->fArgPos = -1;
   1.159 +            s = backup;
   1.160 +        }
   1.161 +        /* munge the '$' */
   1.162 +        else
   1.163 +            s++;
   1.164 +    }
   1.165 +
   1.166 +    /* Get any format flags */
   1.167 +    while(ISFLAG(*s)) {
   1.168 +        switch(*s++) {
   1.169 +
   1.170 +            /* skip argument */
   1.171 +        case FLAG_ASTERISK:
   1.172 +            info->fSkipArg = TRUE;
   1.173 +            break;
   1.174 +
   1.175 +            /* pad character specified */
   1.176 +        case FLAG_PAREN:
   1.177 +
   1.178 +            /* first four characters are hex values for pad char */
   1.179 +            info->fPadChar = (UChar)ufmt_digitvalue(*s++);
   1.180 +            info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++));
   1.181 +            info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++));
   1.182 +            info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++));
   1.183 +
   1.184 +            /* final character is ignored */
   1.185 +            s++;
   1.186 +
   1.187 +            break;
   1.188 +        }
   1.189 +    }
   1.190 +
   1.191 +    /* Get the width */
   1.192 +    if(ISDIGIT(*s)){
   1.193 +        info->fWidth = (int) (*s++ - DIGIT_ZERO);
   1.194 +
   1.195 +        while(ISDIGIT(*s)) {
   1.196 +            info->fWidth *= 10;
   1.197 +            info->fWidth += (int) (*s++ - DIGIT_ZERO);
   1.198 +        }
   1.199 +    }
   1.200 +
   1.201 +    /* Get any modifiers */
   1.202 +    if(ISMOD(*s)) {
   1.203 +        switch(*s++) {
   1.204 +
   1.205 +            /* short */
   1.206 +        case MOD_H:
   1.207 +            info->fIsShort = TRUE;
   1.208 +            break;
   1.209 +
   1.210 +            /* long or long long */
   1.211 +        case MOD_LOWERL:
   1.212 +            if(*s == MOD_LOWERL) {
   1.213 +                info->fIsLongLong = TRUE;
   1.214 +                /* skip over the next 'l' */
   1.215 +                s++;
   1.216 +            }
   1.217 +            else
   1.218 +                info->fIsLong = TRUE;
   1.219 +            break;
   1.220 +
   1.221 +            /* long double */
   1.222 +        case MOD_L:
   1.223 +            info->fIsLongDouble = TRUE;
   1.224 +            break;
   1.225 +        }
   1.226 +    }
   1.227 +
   1.228 +    /* finally, get the specifier letter */
   1.229 +    info->fSpec = *s++;
   1.230 +
   1.231 +    /* return # of characters in this specifier */
   1.232 +    return (int32_t)(s - fmt);
   1.233 +}
   1.234 +
   1.235 +#define UP_PERCENT 0x0025
   1.236 +
   1.237 +
   1.238 +/* ANSI style formatting */
   1.239 +/* Use US-ASCII characters only for formatting */
   1.240 +
   1.241 +/* % */
   1.242 +#define UFMT_SIMPLE_PERCENT {ufmt_simple_percent, u_scanf_simple_percent_handler}
   1.243 +/* s */
   1.244 +#define UFMT_STRING         {ufmt_string, u_scanf_string_handler}
   1.245 +/* c */
   1.246 +#define UFMT_CHAR           {ufmt_string, u_scanf_char_handler}
   1.247 +/* d, i */
   1.248 +#define UFMT_INT            {ufmt_int, u_scanf_integer_handler}
   1.249 +/* u */
   1.250 +#define UFMT_UINT           {ufmt_int, u_scanf_uinteger_handler}
   1.251 +/* o */
   1.252 +#define UFMT_OCTAL          {ufmt_int, u_scanf_octal_handler}
   1.253 +/* x, X */
   1.254 +#define UFMT_HEX            {ufmt_int, u_scanf_hex_handler}
   1.255 +/* f */
   1.256 +#define UFMT_DOUBLE         {ufmt_double, u_scanf_double_handler}
   1.257 +/* e, E */
   1.258 +#define UFMT_SCIENTIFIC     {ufmt_double, u_scanf_scientific_handler}
   1.259 +/* g, G */
   1.260 +#define UFMT_SCIDBL         {ufmt_double, u_scanf_scidbl_handler}
   1.261 +/* n */
   1.262 +#define UFMT_COUNT          {ufmt_count, u_scanf_count_handler}
   1.263 +/* [ */
   1.264 +#define UFMT_SCANSET        {ufmt_string, u_scanf_scanset_handler}
   1.265 +
   1.266 +/* non-ANSI extensions */
   1.267 +/* Use US-ASCII characters only for formatting */
   1.268 +
   1.269 +/* p */
   1.270 +#define UFMT_POINTER        {ufmt_pointer, u_scanf_pointer_handler}
   1.271 +/* V */
   1.272 +#define UFMT_SPELLOUT       {ufmt_double, u_scanf_spellout_handler}
   1.273 +/* P */
   1.274 +#define UFMT_PERCENT        {ufmt_double, u_scanf_percent_handler}
   1.275 +/* C  K is old format */
   1.276 +#define UFMT_UCHAR          {ufmt_uchar, u_scanf_uchar_handler}
   1.277 +/* S  U is old format */
   1.278 +#define UFMT_USTRING        {ufmt_ustring, u_scanf_ustring_handler}
   1.279 +
   1.280 +
   1.281 +#define UFMT_EMPTY {ufmt_empty, NULL}
   1.282 +
   1.283 +/**
   1.284 + * A u_scanf handler function.  
   1.285 + * A u_scanf handler is responsible for handling a single u_scanf 
   1.286 + * format specification, for example 'd' or 's'.
   1.287 + * @param stream The UFILE to which to write output.
   1.288 + * @param info A pointer to a <TT>u_scanf_spec_info</TT> struct containing
   1.289 + * information on the format specification.
   1.290 + * @param args A pointer to the argument data
   1.291 + * @param fmt A pointer to the first character in the format string
   1.292 + * following the spec.
   1.293 + * @param fmtConsumed On output, set to the number of characters consumed
   1.294 + * in <TT>fmt</TT>. Do nothing, if the argument isn't variable width.
   1.295 + * @param argConverted The number of arguments converted and assigned, or -1 if an
   1.296 + * error occurred.
   1.297 + * @return The number of code points consumed during reading.
   1.298 + */
   1.299 +typedef int32_t (*u_scanf_handler) (UFILE   *stream,
   1.300 +                   u_scanf_spec_info  *info,
   1.301 +                   ufmt_args                *args,
   1.302 +                   const UChar              *fmt,
   1.303 +                   int32_t                  *fmtConsumed,
   1.304 +                   int32_t                  *argConverted);
   1.305 +
   1.306 +typedef struct u_scanf_info {
   1.307 +    ufmt_type_info info;
   1.308 +    u_scanf_handler handler;
   1.309 +} u_scanf_info;
   1.310 +
   1.311 +#define USCANF_NUM_FMT_HANDLERS 108
   1.312 +#define USCANF_SYMBOL_BUFFER_SIZE 8
   1.313 +
   1.314 +/* We do not use handlers for 0-0x1f */
   1.315 +#define USCANF_BASE_FMT_HANDLERS 0x20
   1.316 +
   1.317 +
   1.318 +static int32_t
   1.319 +u_scanf_skip_leading_ws(UFILE   *input,
   1.320 +                        UChar   pad)
   1.321 +{
   1.322 +    UChar   c;
   1.323 +    int32_t count = 0;
   1.324 +    UBool isNotEOF;
   1.325 +
   1.326 +    /* skip all leading ws in the input */
   1.327 +    while( (isNotEOF = ufile_getch(input, &c)) && (c == pad || u_isWhitespace(c)) )
   1.328 +    {
   1.329 +        count++;
   1.330 +    }
   1.331 +
   1.332 +    /* put the final character back on the input */
   1.333 +    if(isNotEOF)
   1.334 +        u_fungetc(c, input);
   1.335 +
   1.336 +    return count;
   1.337 +}
   1.338 +
   1.339 +/* TODO: Is always skipping the prefix symbol as a positive sign a good idea in all locales? */
   1.340 +static int32_t
   1.341 +u_scanf_skip_leading_positive_sign(UFILE   *input,
   1.342 +                                   UNumberFormat *format,
   1.343 +                                   UErrorCode *status)
   1.344 +{
   1.345 +    UChar   c;
   1.346 +    int32_t count = 0;
   1.347 +    UBool isNotEOF;
   1.348 +    UChar plusSymbol[USCANF_SYMBOL_BUFFER_SIZE];
   1.349 +    int32_t symbolLen;
   1.350 +    UErrorCode localStatus = U_ZERO_ERROR;
   1.351 +
   1.352 +    if (U_SUCCESS(*status)) {
   1.353 +        symbolLen = unum_getSymbol(format,
   1.354 +            UNUM_PLUS_SIGN_SYMBOL,
   1.355 +            plusSymbol,
   1.356 +            sizeof(plusSymbol)/sizeof(*plusSymbol),
   1.357 +            &localStatus);
   1.358 +
   1.359 +        if (U_SUCCESS(localStatus)) {
   1.360 +            /* skip all leading ws in the input */
   1.361 +            while( (isNotEOF = ufile_getch(input, &c)) && (count < symbolLen && c == plusSymbol[count]) )
   1.362 +            {
   1.363 +                count++;
   1.364 +            }
   1.365 +
   1.366 +            /* put the final character back on the input */
   1.367 +            if(isNotEOF) {
   1.368 +                u_fungetc(c, input);
   1.369 +            }
   1.370 +        }
   1.371 +    }
   1.372 +
   1.373 +    return count;
   1.374 +}
   1.375 +
   1.376 +static int32_t 
   1.377 +u_scanf_simple_percent_handler(UFILE        *input,
   1.378 +                               u_scanf_spec_info *info,
   1.379 +                               ufmt_args    *args,
   1.380 +                               const UChar  *fmt,
   1.381 +                               int32_t      *fmtConsumed,
   1.382 +                               int32_t      *argConverted)
   1.383 +{
   1.384 +    /* make sure the next character in the input is a percent */
   1.385 +    *argConverted = 0;
   1.386 +    if(u_fgetc(input) != 0x0025) {
   1.387 +        *argConverted = -1;
   1.388 +    }
   1.389 +    return 1;
   1.390 +}
   1.391 +
   1.392 +static int32_t
   1.393 +u_scanf_count_handler(UFILE         *input,
   1.394 +                      u_scanf_spec_info *info,
   1.395 +                      ufmt_args     *args,
   1.396 +                      const UChar   *fmt,
   1.397 +                      int32_t       *fmtConsumed,
   1.398 +                      int32_t       *argConverted)
   1.399 +{
   1.400 +    /* in the special case of count, the u_scanf_spec_info's width */
   1.401 +    /* will contain the # of items converted thus far */
   1.402 +    if (!info->fSkipArg) {
   1.403 +        if (info->fIsShort)
   1.404 +            *(int16_t*)(args[0].ptrValue) = (int16_t)(UINT16_MAX & info->fWidth);
   1.405 +        else if (info->fIsLongLong)
   1.406 +            *(int64_t*)(args[0].ptrValue) = info->fWidth;
   1.407 +        else
   1.408 +            *(int32_t*)(args[0].ptrValue) = (int32_t)(UINT32_MAX & info->fWidth);
   1.409 +    }
   1.410 +    *argConverted = 0;
   1.411 +
   1.412 +    /* we converted 0 args */
   1.413 +    return 0;
   1.414 +}
   1.415 +
   1.416 +static int32_t
   1.417 +u_scanf_double_handler(UFILE        *input,
   1.418 +                       u_scanf_spec_info *info,
   1.419 +                       ufmt_args    *args,
   1.420 +                       const UChar  *fmt,
   1.421 +                       int32_t      *fmtConsumed,
   1.422 +                       int32_t      *argConverted)
   1.423 +{
   1.424 +    int32_t         len;
   1.425 +    double          num;
   1.426 +    UNumberFormat   *format;
   1.427 +    int32_t         parsePos    = 0;
   1.428 +    int32_t         skipped;
   1.429 +    UErrorCode      status      = U_ZERO_ERROR;
   1.430 +
   1.431 +
   1.432 +    /* skip all ws in the input */
   1.433 +    skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
   1.434 +
   1.435 +    /* fill the input's internal buffer */
   1.436 +    ufile_fill_uchar_buffer(input);
   1.437 +
   1.438 +    /* determine the size of the input's buffer */
   1.439 +    len = (int32_t)(input->str.fLimit - input->str.fPos);
   1.440 +
   1.441 +    /* truncate to the width, if specified */
   1.442 +    if(info->fWidth != -1)
   1.443 +        len = ufmt_min(len, info->fWidth);
   1.444 +
   1.445 +    /* get the formatter */
   1.446 +    format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL);
   1.447 +
   1.448 +    /* handle error */
   1.449 +    if(format == 0)
   1.450 +        return 0;
   1.451 +
   1.452 +    /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
   1.453 +    skipped += u_scanf_skip_leading_positive_sign(input, format, &status);
   1.454 +
   1.455 +    /* parse the number */
   1.456 +    num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);
   1.457 +
   1.458 +    if (!info->fSkipArg) {
   1.459 +        if (info->fIsLong)
   1.460 +            *(double*)(args[0].ptrValue) = num;
   1.461 +        else if (info->fIsLongDouble)
   1.462 +            *(long double*)(args[0].ptrValue) = num;
   1.463 +        else
   1.464 +            *(float*)(args[0].ptrValue) = (float)num;
   1.465 +    }
   1.466 +
   1.467 +    /* mask off any necessary bits */
   1.468 +    /*  if(! info->fIsLong_double)
   1.469 +    num &= DBL_MAX;*/
   1.470 +
   1.471 +    /* update the input's position to reflect consumed data */
   1.472 +    input->str.fPos += parsePos;
   1.473 +
   1.474 +    /* we converted 1 arg */
   1.475 +    *argConverted = !info->fSkipArg;
   1.476 +    return parsePos + skipped;
   1.477 +}
   1.478 +
   1.479 +#define UPRINTF_SYMBOL_BUFFER_SIZE 8
   1.480 +
   1.481 +static int32_t
   1.482 +u_scanf_scientific_handler(UFILE        *input,
   1.483 +                           u_scanf_spec_info *info,
   1.484 +                           ufmt_args    *args,
   1.485 +                           const UChar  *fmt,
   1.486 +                           int32_t      *fmtConsumed,
   1.487 +                           int32_t      *argConverted)
   1.488 +{
   1.489 +    int32_t         len;
   1.490 +    double          num;
   1.491 +    UNumberFormat   *format;
   1.492 +    int32_t         parsePos    = 0;
   1.493 +    int32_t         skipped;
   1.494 +    UErrorCode      status      = U_ZERO_ERROR;
   1.495 +    UChar srcExpBuf[UPRINTF_SYMBOL_BUFFER_SIZE];
   1.496 +    int32_t srcLen, expLen;
   1.497 +    UChar expBuf[UPRINTF_SYMBOL_BUFFER_SIZE];
   1.498 +
   1.499 +
   1.500 +    /* skip all ws in the input */
   1.501 +    skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
   1.502 +
   1.503 +    /* fill the input's internal buffer */
   1.504 +    ufile_fill_uchar_buffer(input);
   1.505 +
   1.506 +    /* determine the size of the input's buffer */
   1.507 +    len = (int32_t)(input->str.fLimit - input->str.fPos);
   1.508 +
   1.509 +    /* truncate to the width, if specified */
   1.510 +    if(info->fWidth != -1)
   1.511 +        len = ufmt_min(len, info->fWidth);
   1.512 +
   1.513 +    /* get the formatter */
   1.514 +    format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SCIENTIFIC);
   1.515 +
   1.516 +    /* handle error */
   1.517 +    if(format == 0)
   1.518 +        return 0;
   1.519 +
   1.520 +    /* set the appropriate flags on the formatter */
   1.521 +
   1.522 +    srcLen = unum_getSymbol(format,
   1.523 +        UNUM_EXPONENTIAL_SYMBOL,
   1.524 +        srcExpBuf,
   1.525 +        sizeof(srcExpBuf),
   1.526 +        &status);
   1.527 +
   1.528 +    /* Upper/lower case the e */
   1.529 +    if (info->fSpec == (UChar)0x65 /* e */) {
   1.530 +        expLen = u_strToLower(expBuf, (int32_t)sizeof(expBuf),
   1.531 +            srcExpBuf, srcLen,
   1.532 +            input->str.fBundle.fLocale,
   1.533 +            &status);
   1.534 +    }
   1.535 +    else {
   1.536 +        expLen = u_strToUpper(expBuf, (int32_t)sizeof(expBuf),
   1.537 +            srcExpBuf, srcLen,
   1.538 +            input->str.fBundle.fLocale,
   1.539 +            &status);
   1.540 +    }
   1.541 +
   1.542 +    unum_setSymbol(format,
   1.543 +        UNUM_EXPONENTIAL_SYMBOL,
   1.544 +        expBuf,
   1.545 +        expLen,
   1.546 +        &status);
   1.547 +
   1.548 +
   1.549 +
   1.550 +
   1.551 +    /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
   1.552 +    skipped += u_scanf_skip_leading_positive_sign(input, format, &status);
   1.553 +
   1.554 +    /* parse the number */
   1.555 +    num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);
   1.556 +
   1.557 +    if (!info->fSkipArg) {
   1.558 +        if (info->fIsLong)
   1.559 +            *(double*)(args[0].ptrValue) = num;
   1.560 +        else if (info->fIsLongDouble)
   1.561 +            *(long double*)(args[0].ptrValue) = num;
   1.562 +        else
   1.563 +            *(float*)(args[0].ptrValue) = (float)num;
   1.564 +    }
   1.565 +
   1.566 +    /* mask off any necessary bits */
   1.567 +    /*  if(! info->fIsLong_double)
   1.568 +    num &= DBL_MAX;*/
   1.569 +
   1.570 +    /* update the input's position to reflect consumed data */
   1.571 +    input->str.fPos += parsePos;
   1.572 +
   1.573 +    /* we converted 1 arg */
   1.574 +    *argConverted = !info->fSkipArg;
   1.575 +    return parsePos + skipped;
   1.576 +}
   1.577 +
   1.578 +static int32_t
   1.579 +u_scanf_scidbl_handler(UFILE        *input,
   1.580 +                       u_scanf_spec_info *info,
   1.581 +                       ufmt_args    *args,
   1.582 +                       const UChar  *fmt,
   1.583 +                       int32_t      *fmtConsumed,
   1.584 +                       int32_t      *argConverted)
   1.585 +{
   1.586 +    int32_t       len;
   1.587 +    double        num;
   1.588 +    UNumberFormat *scientificFormat, *genericFormat;
   1.589 +    /*int32_t       scientificResult, genericResult;*/
   1.590 +    double        scientificResult, genericResult;
   1.591 +    int32_t       scientificParsePos = 0, genericParsePos = 0, parsePos = 0;
   1.592 +    int32_t       skipped;
   1.593 +    UErrorCode    scientificStatus = U_ZERO_ERROR;
   1.594 +    UErrorCode    genericStatus = U_ZERO_ERROR;
   1.595 +
   1.596 +
   1.597 +    /* since we can't determine by scanning the characters whether */
   1.598 +    /* a number was formatted in the 'f' or 'g' styles, parse the */
   1.599 +    /* string with both formatters, and assume whichever one */
   1.600 +    /* parsed the most is the correct formatter to use */
   1.601 +
   1.602 +
   1.603 +    /* skip all ws in the input */
   1.604 +    skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
   1.605 +
   1.606 +    /* fill the input's internal buffer */
   1.607 +    ufile_fill_uchar_buffer(input);
   1.608 +
   1.609 +    /* determine the size of the input's buffer */
   1.610 +    len = (int32_t)(input->str.fLimit - input->str.fPos);
   1.611 +
   1.612 +    /* truncate to the width, if specified */
   1.613 +    if(info->fWidth != -1)
   1.614 +        len = ufmt_min(len, info->fWidth);
   1.615 +
   1.616 +    /* get the formatters */
   1.617 +    scientificFormat = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SCIENTIFIC);
   1.618 +    genericFormat = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL);
   1.619 +
   1.620 +    /* handle error */
   1.621 +    if(scientificFormat == 0 || genericFormat == 0)
   1.622 +        return 0;
   1.623 +
   1.624 +    /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
   1.625 +    skipped += u_scanf_skip_leading_positive_sign(input, genericFormat, &genericStatus);
   1.626 +
   1.627 +    /* parse the number using each format*/
   1.628 +
   1.629 +    scientificResult = unum_parseDouble(scientificFormat, input->str.fPos, len,
   1.630 +        &scientificParsePos, &scientificStatus);
   1.631 +
   1.632 +    genericResult = unum_parseDouble(genericFormat, input->str.fPos, len,
   1.633 +        &genericParsePos, &genericStatus);
   1.634 +
   1.635 +    /* determine which parse made it farther */
   1.636 +    if(scientificParsePos > genericParsePos) {
   1.637 +        /* stash the result in num */
   1.638 +        num = scientificResult;
   1.639 +        /* update the input's position to reflect consumed data */
   1.640 +        parsePos += scientificParsePos;
   1.641 +    }
   1.642 +    else {
   1.643 +        /* stash the result in num */
   1.644 +        num = genericResult;
   1.645 +        /* update the input's position to reflect consumed data */
   1.646 +        parsePos += genericParsePos;
   1.647 +    }
   1.648 +    input->str.fPos += parsePos;
   1.649 +
   1.650 +    if (!info->fSkipArg) {
   1.651 +        if (info->fIsLong)
   1.652 +            *(double*)(args[0].ptrValue) = num;
   1.653 +        else if (info->fIsLongDouble)
   1.654 +            *(long double*)(args[0].ptrValue) = num;
   1.655 +        else
   1.656 +            *(float*)(args[0].ptrValue) = (float)num;
   1.657 +    }
   1.658 +
   1.659 +    /* mask off any necessary bits */
   1.660 +    /*  if(! info->fIsLong_double)
   1.661 +    num &= DBL_MAX;*/
   1.662 +
   1.663 +    /* we converted 1 arg */
   1.664 +    *argConverted = !info->fSkipArg;
   1.665 +    return parsePos + skipped;
   1.666 +}
   1.667 +
   1.668 +static int32_t
   1.669 +u_scanf_integer_handler(UFILE       *input,
   1.670 +                        u_scanf_spec_info *info,
   1.671 +                        ufmt_args   *args,
   1.672 +                        const UChar *fmt,
   1.673 +                        int32_t     *fmtConsumed,
   1.674 +                        int32_t     *argConverted)
   1.675 +{
   1.676 +    int32_t         len;
   1.677 +    void            *num        = (void*) (args[0].ptrValue);
   1.678 +    UNumberFormat   *format;
   1.679 +    int32_t         parsePos    = 0;
   1.680 +    int32_t         skipped;
   1.681 +    UErrorCode      status      = U_ZERO_ERROR;
   1.682 +    int64_t         result;
   1.683 +
   1.684 +
   1.685 +    /* skip all ws in the input */
   1.686 +    skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
   1.687 +
   1.688 +    /* fill the input's internal buffer */
   1.689 +    ufile_fill_uchar_buffer(input);
   1.690 +
   1.691 +    /* determine the size of the input's buffer */
   1.692 +    len = (int32_t)(input->str.fLimit - input->str.fPos);
   1.693 +
   1.694 +    /* truncate to the width, if specified */
   1.695 +    if(info->fWidth != -1)
   1.696 +        len = ufmt_min(len, info->fWidth);
   1.697 +
   1.698 +    /* get the formatter */
   1.699 +    format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL);
   1.700 +
   1.701 +    /* handle error */
   1.702 +    if(format == 0)
   1.703 +        return 0;
   1.704 +
   1.705 +    /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
   1.706 +    skipped += u_scanf_skip_leading_positive_sign(input, format, &status);
   1.707 +
   1.708 +    /* parse the number */
   1.709 +    result = unum_parseInt64(format, input->str.fPos, len, &parsePos, &status);
   1.710 +
   1.711 +    /* mask off any necessary bits */
   1.712 +    if (!info->fSkipArg) {
   1.713 +        if (info->fIsShort)
   1.714 +            *(int16_t*)num = (int16_t)(UINT16_MAX & result);
   1.715 +        else if (info->fIsLongLong)
   1.716 +            *(int64_t*)num = result;
   1.717 +        else
   1.718 +            *(int32_t*)num = (int32_t)(UINT32_MAX & result);
   1.719 +    }
   1.720 +
   1.721 +    /* update the input's position to reflect consumed data */
   1.722 +    input->str.fPos += parsePos;
   1.723 +
   1.724 +    /* we converted 1 arg */
   1.725 +    *argConverted = !info->fSkipArg;
   1.726 +    return parsePos + skipped;
   1.727 +}
   1.728 +
   1.729 +static int32_t
   1.730 +u_scanf_uinteger_handler(UFILE          *input,
   1.731 +                         u_scanf_spec_info *info,
   1.732 +                         ufmt_args      *args,
   1.733 +                         const UChar    *fmt,
   1.734 +                         int32_t        *fmtConsumed,
   1.735 +                         int32_t        *argConverted)
   1.736 +{
   1.737 +    /* TODO Fix this when Numberformat handles uint64_t */
   1.738 +    return u_scanf_integer_handler(input, info, args, fmt, fmtConsumed, argConverted);
   1.739 +}
   1.740 +
   1.741 +static int32_t
   1.742 +u_scanf_percent_handler(UFILE       *input,
   1.743 +                        u_scanf_spec_info *info,
   1.744 +                        ufmt_args   *args,
   1.745 +                        const UChar *fmt,
   1.746 +                        int32_t     *fmtConsumed,
   1.747 +                        int32_t     *argConverted)
   1.748 +{
   1.749 +    int32_t         len;
   1.750 +    double          num;
   1.751 +    UNumberFormat   *format;
   1.752 +    int32_t         parsePos    = 0;
   1.753 +    UErrorCode      status      = U_ZERO_ERROR;
   1.754 +
   1.755 +
   1.756 +    /* skip all ws in the input */
   1.757 +    u_scanf_skip_leading_ws(input, info->fPadChar);
   1.758 +
   1.759 +    /* fill the input's internal buffer */
   1.760 +    ufile_fill_uchar_buffer(input);
   1.761 +
   1.762 +    /* determine the size of the input's buffer */
   1.763 +    len = (int32_t)(input->str.fLimit - input->str.fPos);
   1.764 +
   1.765 +    /* truncate to the width, if specified */
   1.766 +    if(info->fWidth != -1)
   1.767 +        len = ufmt_min(len, info->fWidth);
   1.768 +
   1.769 +    /* get the formatter */
   1.770 +    format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_PERCENT);
   1.771 +
   1.772 +    /* handle error */
   1.773 +    if(format == 0)
   1.774 +        return 0;
   1.775 +
   1.776 +    /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
   1.777 +    u_scanf_skip_leading_positive_sign(input, format, &status);
   1.778 +
   1.779 +    /* parse the number */
   1.780 +    num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);
   1.781 +
   1.782 +    if (!info->fSkipArg) {
   1.783 +        *(double*)(args[0].ptrValue) = num;
   1.784 +    }
   1.785 +
   1.786 +    /* mask off any necessary bits */
   1.787 +    /*  if(! info->fIsLong_double)
   1.788 +    num &= DBL_MAX;*/
   1.789 +
   1.790 +    /* update the input's position to reflect consumed data */
   1.791 +    input->str.fPos += parsePos;
   1.792 +
   1.793 +    /* we converted 1 arg */
   1.794 +    *argConverted = !info->fSkipArg;
   1.795 +    return parsePos;
   1.796 +}
   1.797 +
   1.798 +static int32_t
   1.799 +u_scanf_string_handler(UFILE        *input,
   1.800 +                       u_scanf_spec_info *info,
   1.801 +                       ufmt_args    *args,
   1.802 +                       const UChar  *fmt,
   1.803 +                       int32_t      *fmtConsumed,
   1.804 +                       int32_t      *argConverted)
   1.805 +{
   1.806 +    const UChar *source;
   1.807 +    UConverter  *conv;
   1.808 +    char        *arg    = (char*)(args[0].ptrValue);
   1.809 +    char        *alias  = arg;
   1.810 +    char        *limit;
   1.811 +    UErrorCode  status  = U_ZERO_ERROR;
   1.812 +    int32_t     count;
   1.813 +    int32_t     skipped = 0;
   1.814 +    UChar       c;
   1.815 +    UBool       isNotEOF = FALSE;
   1.816 +
   1.817 +    /* skip all ws in the input */
   1.818 +    if (info->fIsString) {
   1.819 +        skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
   1.820 +    }
   1.821 +
   1.822 +    /* get the string one character at a time, truncating to the width */
   1.823 +    count = 0;
   1.824 +
   1.825 +    /* open the default converter */
   1.826 +    conv = u_getDefaultConverter(&status);
   1.827 +
   1.828 +    if(U_FAILURE(status))
   1.829 +        return -1;
   1.830 +
   1.831 +    while( (info->fWidth == -1 || count < info->fWidth) 
   1.832 +        && (isNotEOF = ufile_getch(input, &c))
   1.833 +        && (!info->fIsString || (c != info->fPadChar && !u_isWhitespace(c))))
   1.834 +    {
   1.835 +
   1.836 +        if (!info->fSkipArg) {
   1.837 +            /* put the character from the input onto the target */
   1.838 +            source = &c;
   1.839 +            /* Since we do this one character at a time, do it this way. */
   1.840 +            if (info->fWidth > 0) {
   1.841 +                limit = alias + info->fWidth - count;
   1.842 +            }
   1.843 +            else {
   1.844 +                limit = alias + ucnv_getMaxCharSize(conv);
   1.845 +            }
   1.846 +
   1.847 +            /* convert the character to the default codepage */
   1.848 +            ucnv_fromUnicode(conv, &alias, limit, &source, source + 1,
   1.849 +                NULL, TRUE, &status);
   1.850 +
   1.851 +            if(U_FAILURE(status)) {
   1.852 +                /* clean up */
   1.853 +                u_releaseDefaultConverter(conv);
   1.854 +                return -1;
   1.855 +            }
   1.856 +        }
   1.857 +
   1.858 +        /* increment the count */
   1.859 +        ++count;
   1.860 +    }
   1.861 +
   1.862 +    /* put the final character we read back on the input */
   1.863 +    if (!info->fSkipArg) {
   1.864 +        if ((info->fWidth == -1 || count < info->fWidth) && isNotEOF)
   1.865 +            u_fungetc(c, input);
   1.866 +
   1.867 +        /* add the terminator */
   1.868 +        if (info->fIsString) {
   1.869 +            *alias = 0x00;
   1.870 +        }
   1.871 +    }
   1.872 +
   1.873 +    /* clean up */
   1.874 +    u_releaseDefaultConverter(conv);
   1.875 +
   1.876 +    /* we converted 1 arg */
   1.877 +    *argConverted = !info->fSkipArg;
   1.878 +    return count + skipped;
   1.879 +}
   1.880 +
   1.881 +static int32_t
   1.882 +u_scanf_char_handler(UFILE          *input,
   1.883 +                     u_scanf_spec_info *info,
   1.884 +                     ufmt_args      *args,
   1.885 +                     const UChar    *fmt,
   1.886 +                     int32_t        *fmtConsumed,
   1.887 +                     int32_t        *argConverted)
   1.888 +{
   1.889 +    if (info->fWidth < 0) {
   1.890 +        info->fWidth = 1;
   1.891 +    }
   1.892 +    info->fIsString = FALSE;
   1.893 +    return u_scanf_string_handler(input, info, args, fmt, fmtConsumed, argConverted);
   1.894 +}
   1.895 +
   1.896 +static int32_t
   1.897 +u_scanf_ustring_handler(UFILE       *input,
   1.898 +                        u_scanf_spec_info *info,
   1.899 +                        ufmt_args   *args,
   1.900 +                        const UChar *fmt,
   1.901 +                        int32_t     *fmtConsumed,
   1.902 +                        int32_t     *argConverted)
   1.903 +{
   1.904 +    UChar   *arg     = (UChar*)(args[0].ptrValue);
   1.905 +    UChar   *alias     = arg;
   1.906 +    int32_t count;
   1.907 +    int32_t skipped = 0;
   1.908 +    UChar   c;
   1.909 +    UBool   isNotEOF = FALSE;
   1.910 +
   1.911 +    /* skip all ws in the input */
   1.912 +    if (info->fIsString) {
   1.913 +        skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
   1.914 +    }
   1.915 +
   1.916 +    /* get the string one character at a time, truncating to the width */
   1.917 +    count = 0;
   1.918 +
   1.919 +    while( (info->fWidth == -1 || count < info->fWidth)
   1.920 +        && (isNotEOF = ufile_getch(input, &c))
   1.921 +        && (!info->fIsString || (c != info->fPadChar && !u_isWhitespace(c))))
   1.922 +    {
   1.923 +
   1.924 +        /* put the character from the input onto the target */
   1.925 +        if (!info->fSkipArg) {
   1.926 +            *alias++ = c;
   1.927 +        }
   1.928 +
   1.929 +        /* increment the count */
   1.930 +        ++count;
   1.931 +    }
   1.932 +
   1.933 +    /* put the final character we read back on the input */
   1.934 +    if (!info->fSkipArg) {
   1.935 +        if((info->fWidth == -1 || count < info->fWidth) && isNotEOF) {
   1.936 +            u_fungetc(c, input);
   1.937 +        }
   1.938 +
   1.939 +        /* add the terminator */
   1.940 +        if (info->fIsString) {
   1.941 +            *alias = 0x0000;
   1.942 +        }
   1.943 +    }
   1.944 +
   1.945 +    /* we converted 1 arg */
   1.946 +    *argConverted = !info->fSkipArg;
   1.947 +    return count + skipped;
   1.948 +}
   1.949 +
   1.950 +static int32_t
   1.951 +u_scanf_uchar_handler(UFILE         *input,
   1.952 +                      u_scanf_spec_info *info,
   1.953 +                      ufmt_args     *args,
   1.954 +                      const UChar   *fmt,
   1.955 +                      int32_t       *fmtConsumed,
   1.956 +                      int32_t       *argConverted)
   1.957 +{
   1.958 +    if (info->fWidth < 0) {
   1.959 +        info->fWidth = 1;
   1.960 +    }
   1.961 +    info->fIsString = FALSE;
   1.962 +    return u_scanf_ustring_handler(input, info, args, fmt, fmtConsumed, argConverted);
   1.963 +}
   1.964 +
   1.965 +static int32_t
   1.966 +u_scanf_spellout_handler(UFILE          *input,
   1.967 +                         u_scanf_spec_info *info,
   1.968 +                         ufmt_args      *args,
   1.969 +                         const UChar    *fmt,
   1.970 +                         int32_t        *fmtConsumed,
   1.971 +                         int32_t        *argConverted)
   1.972 +{
   1.973 +    int32_t         len;
   1.974 +    double          num;
   1.975 +    UNumberFormat   *format;
   1.976 +    int32_t         parsePos    = 0;
   1.977 +    int32_t         skipped;
   1.978 +    UErrorCode      status      = U_ZERO_ERROR;
   1.979 +
   1.980 +
   1.981 +    /* skip all ws in the input */
   1.982 +    skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
   1.983 +
   1.984 +    /* fill the input's internal buffer */
   1.985 +    ufile_fill_uchar_buffer(input);
   1.986 +
   1.987 +    /* determine the size of the input's buffer */
   1.988 +    len = (int32_t)(input->str.fLimit - input->str.fPos);
   1.989 +
   1.990 +    /* truncate to the width, if specified */
   1.991 +    if(info->fWidth != -1)
   1.992 +        len = ufmt_min(len, info->fWidth);
   1.993 +
   1.994 +    /* get the formatter */
   1.995 +    format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SPELLOUT);
   1.996 +
   1.997 +    /* handle error */
   1.998 +    if(format == 0)
   1.999 +        return 0;
  1.1000 +
  1.1001 +    /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
  1.1002 +    /* This is not applicable to RBNF. */
  1.1003 +    /*skipped += u_scanf_skip_leading_positive_sign(input, format, &status);*/
  1.1004 +
  1.1005 +    /* parse the number */
  1.1006 +    num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);
  1.1007 +
  1.1008 +    if (!info->fSkipArg) {
  1.1009 +        *(double*)(args[0].ptrValue) = num;
  1.1010 +    }
  1.1011 +
  1.1012 +    /* mask off any necessary bits */
  1.1013 +    /*  if(! info->fIsLong_double)
  1.1014 +    num &= DBL_MAX;*/
  1.1015 +
  1.1016 +    /* update the input's position to reflect consumed data */
  1.1017 +    input->str.fPos += parsePos;
  1.1018 +
  1.1019 +    /* we converted 1 arg */
  1.1020 +    *argConverted = !info->fSkipArg;
  1.1021 +    return parsePos + skipped;
  1.1022 +}
  1.1023 +
  1.1024 +static int32_t
  1.1025 +u_scanf_hex_handler(UFILE       *input,
  1.1026 +                    u_scanf_spec_info *info,
  1.1027 +                    ufmt_args   *args,
  1.1028 +                    const UChar *fmt,
  1.1029 +                    int32_t     *fmtConsumed,
  1.1030 +                    int32_t     *argConverted)
  1.1031 +{
  1.1032 +    int32_t     len;
  1.1033 +    int32_t     skipped;
  1.1034 +    void        *num    = (void*) (args[0].ptrValue);
  1.1035 +    int64_t     result;
  1.1036 +
  1.1037 +    /* skip all ws in the input */
  1.1038 +    skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
  1.1039 +
  1.1040 +    /* fill the input's internal buffer */
  1.1041 +    ufile_fill_uchar_buffer(input);
  1.1042 +
  1.1043 +    /* determine the size of the input's buffer */
  1.1044 +    len = (int32_t)(input->str.fLimit - input->str.fPos);
  1.1045 +
  1.1046 +    /* truncate to the width, if specified */
  1.1047 +    if(info->fWidth != -1)
  1.1048 +        len = ufmt_min(len, info->fWidth);
  1.1049 +
  1.1050 +    /* check for alternate form */
  1.1051 +    if( *(input->str.fPos) == 0x0030 &&
  1.1052 +        (*(input->str.fPos + 1) == 0x0078 || *(input->str.fPos + 1) == 0x0058) ) {
  1.1053 +
  1.1054 +        /* skip the '0' and 'x' or 'X' if present */
  1.1055 +        input->str.fPos += 2;
  1.1056 +        len -= 2;
  1.1057 +    }
  1.1058 +
  1.1059 +    /* parse the number */
  1.1060 +    result = ufmt_uto64(input->str.fPos, &len, 16);
  1.1061 +
  1.1062 +    /* update the input's position to reflect consumed data */
  1.1063 +    input->str.fPos += len;
  1.1064 +
  1.1065 +    /* mask off any necessary bits */
  1.1066 +    if (!info->fSkipArg) {
  1.1067 +        if (info->fIsShort)
  1.1068 +            *(int16_t*)num = (int16_t)(UINT16_MAX & result);
  1.1069 +        else if (info->fIsLongLong)
  1.1070 +            *(int64_t*)num = result;
  1.1071 +        else
  1.1072 +            *(int32_t*)num = (int32_t)(UINT32_MAX & result);
  1.1073 +    }
  1.1074 +
  1.1075 +    /* we converted 1 arg */
  1.1076 +    *argConverted = !info->fSkipArg;
  1.1077 +    return len + skipped;
  1.1078 +}
  1.1079 +
  1.1080 +static int32_t
  1.1081 +u_scanf_octal_handler(UFILE         *input,
  1.1082 +                      u_scanf_spec_info *info,
  1.1083 +                      ufmt_args     *args,
  1.1084 +                      const UChar   *fmt,
  1.1085 +                      int32_t       *fmtConsumed,
  1.1086 +                      int32_t       *argConverted)
  1.1087 +{
  1.1088 +    int32_t     len;
  1.1089 +    int32_t     skipped;
  1.1090 +    void        *num         = (void*) (args[0].ptrValue);
  1.1091 +    int64_t     result;
  1.1092 +
  1.1093 +    /* skip all ws in the input */
  1.1094 +    skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
  1.1095 +
  1.1096 +    /* fill the input's internal buffer */
  1.1097 +    ufile_fill_uchar_buffer(input);
  1.1098 +
  1.1099 +    /* determine the size of the input's buffer */
  1.1100 +    len = (int32_t)(input->str.fLimit - input->str.fPos);
  1.1101 +
  1.1102 +    /* truncate to the width, if specified */
  1.1103 +    if(info->fWidth != -1)
  1.1104 +        len = ufmt_min(len, info->fWidth);
  1.1105 +
  1.1106 +    /* parse the number */
  1.1107 +    result = ufmt_uto64(input->str.fPos, &len, 8);
  1.1108 +
  1.1109 +    /* update the input's position to reflect consumed data */
  1.1110 +    input->str.fPos += len;
  1.1111 +
  1.1112 +    /* mask off any necessary bits */
  1.1113 +    if (!info->fSkipArg) {
  1.1114 +        if (info->fIsShort)
  1.1115 +            *(int16_t*)num = (int16_t)(UINT16_MAX & result);
  1.1116 +        else if (info->fIsLongLong)
  1.1117 +            *(int64_t*)num = result;
  1.1118 +        else
  1.1119 +            *(int32_t*)num = (int32_t)(UINT32_MAX & result);
  1.1120 +    }
  1.1121 +
  1.1122 +    /* we converted 1 arg */
  1.1123 +    *argConverted = !info->fSkipArg;
  1.1124 +    return len + skipped;
  1.1125 +}
  1.1126 +
  1.1127 +static int32_t
  1.1128 +u_scanf_pointer_handler(UFILE       *input,
  1.1129 +                        u_scanf_spec_info *info,
  1.1130 +                        ufmt_args   *args,
  1.1131 +                        const UChar *fmt,
  1.1132 +                        int32_t     *fmtConsumed,
  1.1133 +                        int32_t     *argConverted)
  1.1134 +{
  1.1135 +    int32_t len;
  1.1136 +    int32_t skipped;
  1.1137 +    void    *result;
  1.1138 +    void    **p     = (void**)(args[0].ptrValue);
  1.1139 +
  1.1140 +
  1.1141 +    /* skip all ws in the input */
  1.1142 +    skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
  1.1143 +
  1.1144 +    /* fill the input's internal buffer */
  1.1145 +    ufile_fill_uchar_buffer(input);
  1.1146 +
  1.1147 +    /* determine the size of the input's buffer */
  1.1148 +    len = (int32_t)(input->str.fLimit - input->str.fPos);
  1.1149 +
  1.1150 +    /* truncate to the width, if specified */
  1.1151 +    if(info->fWidth != -1) {
  1.1152 +        len = ufmt_min(len, info->fWidth);
  1.1153 +    }
  1.1154 +
  1.1155 +    /* Make sure that we don't consume too much */
  1.1156 +    if (len > (int32_t)(sizeof(void*)*2)) {
  1.1157 +        len = (int32_t)(sizeof(void*)*2);
  1.1158 +    }
  1.1159 +
  1.1160 +    /* parse the pointer - assign to temporary value */
  1.1161 +    result = ufmt_utop(input->str.fPos, &len);
  1.1162 +
  1.1163 +    if (!info->fSkipArg) {
  1.1164 +        *p = result;
  1.1165 +    }
  1.1166 +
  1.1167 +    /* update the input's position to reflect consumed data */
  1.1168 +    input->str.fPos += len;
  1.1169 +
  1.1170 +    /* we converted 1 arg */
  1.1171 +    *argConverted = !info->fSkipArg;
  1.1172 +    return len + skipped;
  1.1173 +}
  1.1174 +
  1.1175 +static int32_t
  1.1176 +u_scanf_scanset_handler(UFILE       *input,
  1.1177 +                        u_scanf_spec_info *info,
  1.1178 +                        ufmt_args   *args,
  1.1179 +                        const UChar *fmt,
  1.1180 +                        int32_t     *fmtConsumed,
  1.1181 +                        int32_t     *argConverted)
  1.1182 +{
  1.1183 +    USet        *scanset;
  1.1184 +    UErrorCode  status = U_ZERO_ERROR;
  1.1185 +    int32_t     chLeft = INT32_MAX;
  1.1186 +    UChar32     c;
  1.1187 +    UChar       *alias = (UChar*) (args[0].ptrValue);
  1.1188 +    UBool       isNotEOF = FALSE;
  1.1189 +    UBool       readCharacter = FALSE;
  1.1190 +
  1.1191 +    /* Create an empty set */
  1.1192 +    scanset = uset_open(0, -1);
  1.1193 +
  1.1194 +    /* Back up one to get the [ */
  1.1195 +    fmt--;
  1.1196 +
  1.1197 +    /* truncate to the width, if specified and alias the target */
  1.1198 +    if(info->fWidth >= 0) {
  1.1199 +        chLeft = info->fWidth;
  1.1200 +    }
  1.1201 +
  1.1202 +    /* parse the scanset from the fmt string */
  1.1203 +    *fmtConsumed = uset_applyPattern(scanset, fmt, -1, 0, &status);
  1.1204 +
  1.1205 +    /* verify that the parse was successful */
  1.1206 +    if (U_SUCCESS(status)) {
  1.1207 +        c=0;
  1.1208 +
  1.1209 +        /* grab characters one at a time and make sure they are in the scanset */
  1.1210 +        while(chLeft > 0) {
  1.1211 +            if ((isNotEOF = ufile_getch32(input, &c)) && uset_contains(scanset, c)) {
  1.1212 +                readCharacter = TRUE;
  1.1213 +                if (!info->fSkipArg) {
  1.1214 +                    int32_t idx = 0;
  1.1215 +                    UBool isError = FALSE;
  1.1216 +
  1.1217 +                    U16_APPEND(alias, idx, chLeft, c, isError);
  1.1218 +                    if (isError) {
  1.1219 +                        break;
  1.1220 +                    }
  1.1221 +                    alias += idx;
  1.1222 +                }
  1.1223 +                chLeft -= (1 + U_IS_SUPPLEMENTARY(c));
  1.1224 +            }
  1.1225 +            else {
  1.1226 +                /* if the character's not in the scanset, break out */
  1.1227 +                break;
  1.1228 +            }
  1.1229 +        }
  1.1230 +
  1.1231 +        /* put the final character we read back on the input */
  1.1232 +        if(isNotEOF && chLeft > 0) {
  1.1233 +            u_fungetc(c, input);
  1.1234 +        }
  1.1235 +    }
  1.1236 +
  1.1237 +    uset_close(scanset);
  1.1238 +
  1.1239 +    /* if we didn't match at least 1 character, fail */
  1.1240 +    if(!readCharacter)
  1.1241 +        return -1;
  1.1242 +    /* otherwise, add the terminator */
  1.1243 +    else if (!info->fSkipArg) {
  1.1244 +        *alias = 0x00;
  1.1245 +    }
  1.1246 +
  1.1247 +    /* we converted 1 arg */
  1.1248 +    *argConverted = !info->fSkipArg;
  1.1249 +    return (info->fWidth >= 0 ? info->fWidth : INT32_MAX) - chLeft;
  1.1250 +}
  1.1251 +
  1.1252 +/* Use US-ASCII characters only for formatting. Most codepages have
  1.1253 + characters 20-7F from Unicode. Using any other codepage specific
  1.1254 + characters will make it very difficult to format the string on
  1.1255 + non-Unicode machines */
  1.1256 +static const u_scanf_info g_u_scanf_infos[USCANF_NUM_FMT_HANDLERS] = {
  1.1257 +/* 0x20 */
  1.1258 +    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
  1.1259 +    UFMT_EMPTY,         UFMT_SIMPLE_PERCENT,UFMT_EMPTY,         UFMT_EMPTY,
  1.1260 +    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
  1.1261 +    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
  1.1262 +
  1.1263 +/* 0x30 */
  1.1264 +    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
  1.1265 +    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
  1.1266 +    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
  1.1267 +    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
  1.1268 +
  1.1269 +/* 0x40 */
  1.1270 +    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_UCHAR,
  1.1271 +    UFMT_EMPTY,         UFMT_SCIENTIFIC,    UFMT_EMPTY,         UFMT_SCIDBL,
  1.1272 +#ifdef U_USE_OBSOLETE_IO_FORMATTING
  1.1273 +    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_UCHAR/*deprecated*/,
  1.1274 +#else
  1.1275 +    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
  1.1276 +#endif
  1.1277 +    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
  1.1278 +
  1.1279 +/* 0x50 */
  1.1280 +    UFMT_PERCENT,       UFMT_EMPTY,         UFMT_EMPTY,         UFMT_USTRING,
  1.1281 +#ifdef U_USE_OBSOLETE_IO_FORMATTING
  1.1282 +    UFMT_EMPTY,         UFMT_USTRING/*deprecated*/,UFMT_SPELLOUT,      UFMT_EMPTY,
  1.1283 +#else
  1.1284 +    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_SPELLOUT,      UFMT_EMPTY,
  1.1285 +#endif
  1.1286 +    UFMT_HEX,           UFMT_EMPTY,         UFMT_EMPTY,         UFMT_SCANSET,
  1.1287 +    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
  1.1288 +
  1.1289 +/* 0x60 */
  1.1290 +    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_CHAR,
  1.1291 +    UFMT_INT,           UFMT_SCIENTIFIC,    UFMT_DOUBLE,        UFMT_SCIDBL,
  1.1292 +    UFMT_EMPTY,         UFMT_INT,           UFMT_EMPTY,         UFMT_EMPTY,
  1.1293 +    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_COUNT,         UFMT_OCTAL,
  1.1294 +
  1.1295 +/* 0x70 */
  1.1296 +    UFMT_POINTER,       UFMT_EMPTY,         UFMT_EMPTY,         UFMT_STRING,
  1.1297 +    UFMT_EMPTY,         UFMT_UINT,          UFMT_EMPTY,         UFMT_EMPTY,
  1.1298 +    UFMT_HEX,           UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
  1.1299 +    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
  1.1300 +};
  1.1301 +
  1.1302 +U_CFUNC int32_t
  1.1303 +u_scanf_parse(UFILE     *f,
  1.1304 +            const UChar *patternSpecification,
  1.1305 +            va_list     ap)
  1.1306 +{
  1.1307 +    const UChar     *alias;
  1.1308 +    int32_t         count, converted, argConsumed, cpConsumed;
  1.1309 +    uint16_t        handlerNum;
  1.1310 +
  1.1311 +    ufmt_args       args;
  1.1312 +    u_scanf_spec    spec;
  1.1313 +    ufmt_type_info  info;
  1.1314 +    u_scanf_handler handler;
  1.1315 +
  1.1316 +    /* alias the pattern */
  1.1317 +    alias = patternSpecification;
  1.1318 +
  1.1319 +    /* haven't converted anything yet */
  1.1320 +    argConsumed = 0;
  1.1321 +    converted = 0;
  1.1322 +    cpConsumed = 0;
  1.1323 +
  1.1324 +    /* iterate through the pattern */
  1.1325 +    for(;;) {
  1.1326 +
  1.1327 +        /* match any characters up to the next '%' */
  1.1328 +        while(*alias != UP_PERCENT && *alias != 0x0000 && u_fgetc(f) == *alias) {
  1.1329 +            alias++;
  1.1330 +        }
  1.1331 +
  1.1332 +        /* if we aren't at a '%', or if we're at end of string, break*/
  1.1333 +        if(*alias != UP_PERCENT || *alias == 0x0000)
  1.1334 +            break;
  1.1335 +
  1.1336 +        /* parse the specifier */
  1.1337 +        count = u_scanf_parse_spec(alias, &spec);
  1.1338 +
  1.1339 +        /* update the pointer in pattern */
  1.1340 +        alias += count;
  1.1341 +
  1.1342 +        handlerNum = (uint16_t)(spec.fInfo.fSpec - USCANF_BASE_FMT_HANDLERS);
  1.1343 +        if (handlerNum < USCANF_NUM_FMT_HANDLERS) {
  1.1344 +            /* skip the argument, if necessary */
  1.1345 +            /* query the info function for argument information */
  1.1346 +            info = g_u_scanf_infos[ handlerNum ].info;
  1.1347 +            if (info != ufmt_count && u_feof(f)) {
  1.1348 +                break;
  1.1349 +            }
  1.1350 +            else if(spec.fInfo.fSkipArg) {
  1.1351 +                args.ptrValue = NULL;
  1.1352 +            }
  1.1353 +            else {
  1.1354 +                switch(info) {
  1.1355 +                case ufmt_count:
  1.1356 +                    /* set the spec's width to the # of items converted */
  1.1357 +                    spec.fInfo.fWidth = cpConsumed;
  1.1358 +                    /* fall through to next case */
  1.1359 +                case ufmt_char:
  1.1360 +                case ufmt_uchar:
  1.1361 +                case ufmt_int:
  1.1362 +                case ufmt_string:
  1.1363 +                case ufmt_ustring:
  1.1364 +                case ufmt_pointer:
  1.1365 +                case ufmt_float:
  1.1366 +                case ufmt_double:
  1.1367 +                    args.ptrValue = va_arg(ap, void*);
  1.1368 +                    break;
  1.1369 +
  1.1370 +                default:
  1.1371 +                    /* else args is ignored */
  1.1372 +                    args.ptrValue = NULL;
  1.1373 +                    break;
  1.1374 +                }
  1.1375 +            }
  1.1376 +
  1.1377 +            /* call the handler function */
  1.1378 +            handler = g_u_scanf_infos[ handlerNum ].handler;
  1.1379 +            if(handler != 0) {
  1.1380 +
  1.1381 +                /* reset count to 1 so that += for alias works. */
  1.1382 +                count = 1;
  1.1383 +
  1.1384 +                cpConsumed += (*handler)(f, &spec.fInfo, &args, alias, &count, &argConsumed);
  1.1385 +
  1.1386 +                /* if the handler encountered an error condition, break */
  1.1387 +                if(argConsumed < 0) {
  1.1388 +                    converted = -1;
  1.1389 +                    break;
  1.1390 +                }
  1.1391 +
  1.1392 +                /* add to the # of items converted */
  1.1393 +                converted += argConsumed;
  1.1394 +
  1.1395 +                /* update the pointer in pattern */
  1.1396 +                alias += count-1;
  1.1397 +            }
  1.1398 +            /* else do nothing */
  1.1399 +        }
  1.1400 +        /* else do nothing */
  1.1401 +
  1.1402 +        /* just ignore unknown tags */
  1.1403 +    }
  1.1404 +
  1.1405 +    /* return # of items converted */
  1.1406 +    return converted;
  1.1407 +}
  1.1408 +
  1.1409 +#endif /* #if !UCONFIG_NO_FORMATTING */

mercurial