nsprpub/pr/src/io/prscanf.c

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     2 /* This Source Code Form is subject to the terms of the Mozilla Public
     3  * License, v. 2.0. If a copy of the MPL was not distributed with this
     4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     6 /*
     7  * Scan functions for NSPR types
     8  *
     9  * Author: Wan-Teh Chang
    10  *
    11  * Acknowledgment: The implementation is inspired by the source code
    12  * in P.J. Plauger's "The Standard C Library," Prentice-Hall, 1992.
    13  */
    15 #include <limits.h>
    16 #include <ctype.h>
    17 #include <string.h>
    18 #include <stdlib.h>
    19 #include "prprf.h"
    20 #include "prdtoa.h"
    21 #include "prlog.h"
    22 #include "prerror.h"
    24 /*
    25  * A function that reads a character from 'stream'.
    26  * Returns the character read, or EOF if end of stream is reached.
    27  */
    28 typedef int (*_PRGetCharFN)(void *stream);
    30 /*
    31  * A function that pushes the character 'ch' back to 'stream'.
    32  */
    33 typedef void (*_PRUngetCharFN)(void *stream, int ch); 
    35 /*
    36  * The size specifier for the integer and floating point number
    37  * conversions in format control strings.
    38  */
    39 typedef enum {
    40     _PR_size_none,  /* No size specifier is given */
    41     _PR_size_h,     /* The 'h' specifier, suggesting "short" */
    42     _PR_size_l,     /* The 'l' specifier, suggesting "long" */
    43     _PR_size_L,     /* The 'L' specifier, meaning a 'long double' */
    44     _PR_size_ll     /* The 'll' specifier, suggesting "long long" */
    45 } _PRSizeSpec;
    47 /*
    48  * The collection of data that is passed between the scan function
    49  * and its subordinate functions.  The fields of this structure
    50  * serve as the input or output arguments for these functions.
    51  */
    52 typedef struct {
    53     _PRGetCharFN get;        /* get a character from input stream */
    54     _PRUngetCharFN unget;    /* unget (push back) a character */
    55     void *stream;            /* argument for get and unget */
    56     va_list ap;              /* the variable argument list */
    57     int nChar;               /* number of characters read from 'stream' */
    59     PRBool assign;           /* assign, or suppress assignment? */
    60     int width;               /* field width */
    61     _PRSizeSpec sizeSpec;    /* 'h', 'l', 'L', or 'll' */
    63     PRBool converted;        /* is the value actually converted? */
    64 } ScanfState;
    66 #define GET(state) ((state)->nChar++, (state)->get((state)->stream))
    67 #define UNGET(state, ch) \
    68         ((state)->nChar--, (state)->unget((state)->stream, ch))
    70 /*
    71  * The following two macros, GET_IF_WITHIN_WIDTH and WITHIN_WIDTH,
    72  * are always used together.
    73  *
    74  * GET_IF_WITHIN_WIDTH calls the GET macro and assigns its return
    75  * value to 'ch' only if we have not exceeded the field width of
    76  * 'state'.  Therefore, after GET_IF_WITHIN_WIDTH, the value of
    77  * 'ch' is valid only if the macro WITHIN_WIDTH evaluates to true.
    78  */
    80 #define GET_IF_WITHIN_WIDTH(state, ch) \
    81         if (--(state)->width >= 0) { \
    82             (ch) = GET(state); \
    83         }
    84 #define WITHIN_WIDTH(state) ((state)->width >= 0)
    86 /*
    87  * _pr_strtoull:
    88  *     Convert a string to an unsigned 64-bit integer.  The string
    89  *     'str' is assumed to be a representation of the integer in
    90  *     base 'base'.
    91  *
    92  * Warning: 
    93  *     - Only handle base 8, 10, and 16.
    94  *     - No overflow checking.
    95  */
    97 static PRUint64
    98 _pr_strtoull(const char *str, char **endptr, int base)
    99 {
   100     static const int BASE_MAX = 16;
   101     static const char digits[] = "0123456789abcdef";
   102     char *digitPtr;
   103     PRUint64 x;    /* return value */
   104     PRInt64 base64;
   105     const char *cPtr;
   106     PRBool negative;
   107     const char *digitStart;
   109     PR_ASSERT(base == 0 || base == 8 || base == 10 || base == 16);
   110     if (base < 0 || base == 1 || base > BASE_MAX) {
   111         if (endptr) {
   112             *endptr = (char *) str;
   113             return LL_ZERO;
   114         }
   115     }
   117     cPtr = str;
   118     while (isspace(*cPtr)) {
   119         ++cPtr;
   120     }
   122     negative = PR_FALSE;
   123     if (*cPtr == '-') {
   124         negative = PR_TRUE;
   125         cPtr++;
   126     } else if (*cPtr == '+') {
   127         cPtr++;
   128     }
   130     if (base == 16) {
   131         if (*cPtr == '0' && (cPtr[1] == 'x' || cPtr[1] == 'X')) {
   132             cPtr += 2;
   133         }
   134     } else if (base == 0) {
   135         if (*cPtr != '0') {
   136             base = 10;
   137         } else if (cPtr[1] == 'x' || cPtr[1] == 'X') {
   138             base = 16;
   139             cPtr += 2;
   140         } else {
   141             base = 8;
   142         } 
   143     }
   144     PR_ASSERT(base != 0);
   145     LL_I2L(base64, base);
   146     digitStart = cPtr;
   148     /* Skip leading zeros */
   149     while (*cPtr == '0') {
   150         cPtr++;
   151     }
   153     LL_I2L(x, 0);
   154     while ((digitPtr = (char*)memchr(digits, tolower(*cPtr), base)) != NULL) {
   155         PRUint64 d;
   157         LL_I2L(d, (digitPtr - digits));
   158         LL_MUL(x, x, base64);
   159         LL_ADD(x, x, d);
   160         cPtr++;
   161     }
   163     if (cPtr == digitStart) {
   164         if (endptr) {
   165             *endptr = (char *) str;
   166         }
   167         return LL_ZERO;
   168     }
   170     if (negative) {
   171 #ifdef HAVE_LONG_LONG
   172         /* The cast to a signed type is to avoid a compiler warning */
   173         x = -(PRInt64)x;
   174 #else
   175         LL_NEG(x, x);
   176 #endif
   177     }
   179     if (endptr) {
   180         *endptr = (char *) cPtr;
   181     }
   182     return x;
   183 }
   185 /*
   186  * The maximum field width (in number of characters) that is enough
   187  * (may be more than necessary) to represent a 64-bit integer or
   188  * floating point number.
   189  */
   190 #define FMAX 31
   191 #define DECIMAL_POINT '.'
   193 static PRStatus
   194 GetInt(ScanfState *state, int code)
   195 {
   196     char buf[FMAX + 1], *p;
   197     int ch;
   198     static const char digits[] = "0123456789abcdefABCDEF";
   199     PRBool seenDigit = PR_FALSE;
   200     int base;
   201     int dlen;
   203     switch (code) {
   204         case 'd': case 'u':
   205             base = 10;
   206             break;
   207         case 'i':
   208             base = 0;
   209             break;
   210         case 'x': case 'X': case 'p':
   211             base = 16;
   212             break;
   213         case 'o':
   214             base = 8;
   215             break;
   216         default:
   217             return PR_FAILURE;
   218     }
   219     if (state->width == 0 || state->width > FMAX) {
   220         state->width = FMAX;
   221     }
   222     p = buf;
   223     GET_IF_WITHIN_WIDTH(state, ch);
   224     if (WITHIN_WIDTH(state) && (ch == '+' || ch == '-')) {
   225         *p++ = ch;
   226         GET_IF_WITHIN_WIDTH(state, ch);
   227     }
   228     if (WITHIN_WIDTH(state) && ch == '0') {
   229         seenDigit = PR_TRUE;
   230         *p++ = ch;
   231         GET_IF_WITHIN_WIDTH(state, ch);
   232         if (WITHIN_WIDTH(state)
   233                 && (ch == 'x' || ch == 'X')
   234                 && (base == 0 || base == 16)) {
   235             base = 16;
   236             *p++ = ch;
   237             GET_IF_WITHIN_WIDTH(state, ch);
   238         } else if (base == 0) {
   239             base = 8;
   240         }
   241     }
   242     if (base == 0 || base == 10) {
   243         dlen = 10;
   244     } else if (base == 8) {
   245         dlen = 8;
   246     } else {
   247         PR_ASSERT(base == 16);
   248         dlen = 16 + 6; /* 16 digits, plus 6 in uppercase */
   249     }
   250     while (WITHIN_WIDTH(state) && memchr(digits, ch, dlen)) {
   251         *p++ = ch;
   252         GET_IF_WITHIN_WIDTH(state, ch);
   253         seenDigit = PR_TRUE;
   254     }
   255     if (WITHIN_WIDTH(state)) {
   256         UNGET(state, ch);
   257     }
   258     if (!seenDigit) {
   259         return PR_FAILURE;
   260     }
   261     *p = '\0';
   262     if (state->assign) {
   263         if (code == 'd' || code == 'i') {
   264             if (state->sizeSpec == _PR_size_ll) {
   265                 PRInt64 llval = _pr_strtoull(buf, NULL, base);
   266                 *va_arg(state->ap, PRInt64 *) = llval;
   267             } else {
   268                 long lval = strtol(buf, NULL, base);
   270                 if (state->sizeSpec == _PR_size_none) {
   271                     *va_arg(state->ap, PRIntn *) = lval;
   272                 } else if (state->sizeSpec == _PR_size_h) {
   273                     *va_arg(state->ap, PRInt16 *) = (PRInt16)lval;
   274                 } else if (state->sizeSpec == _PR_size_l) {
   275                     *va_arg(state->ap, PRInt32 *) = lval;
   276                 } else {
   277                     return PR_FAILURE;
   278                 }
   279             }
   280         } else {
   281             if (state->sizeSpec == _PR_size_ll) {
   282                 PRUint64 llval = _pr_strtoull(buf, NULL, base);
   283                 *va_arg(state->ap, PRUint64 *) = llval;
   284             } else {
   285                 unsigned long lval = strtoul(buf, NULL, base);
   287                 if (state->sizeSpec == _PR_size_none) {
   288                     *va_arg(state->ap, PRUintn *) = lval;
   289                 } else if (state->sizeSpec == _PR_size_h) {
   290                     *va_arg(state->ap, PRUint16 *) = (PRUint16)lval;
   291                 } else if (state->sizeSpec == _PR_size_l) {
   292                     *va_arg(state->ap, PRUint32 *) = lval;
   293                 } else {
   294                     return PR_FAILURE;
   295                 }
   296             }
   297         }
   298         state->converted = PR_TRUE;
   299     }
   300     return PR_SUCCESS;
   301 }
   303 static PRStatus
   304 GetFloat(ScanfState *state)
   305 {
   306     char buf[FMAX + 1], *p;
   307     int ch;
   308     PRBool seenDigit = PR_FALSE;
   310     if (state->width == 0 || state->width > FMAX) {
   311         state->width = FMAX;
   312     }
   313     p = buf;
   314     GET_IF_WITHIN_WIDTH(state, ch);
   315     if (WITHIN_WIDTH(state) && (ch == '+' || ch == '-')) {
   316         *p++ = ch;
   317         GET_IF_WITHIN_WIDTH(state, ch);
   318     }
   319     while (WITHIN_WIDTH(state) && isdigit(ch)) {
   320         *p++ = ch;
   321         GET_IF_WITHIN_WIDTH(state, ch);
   322         seenDigit = PR_TRUE;
   323     }
   324     if (WITHIN_WIDTH(state) && ch == DECIMAL_POINT) {
   325         *p++ = ch;
   326         GET_IF_WITHIN_WIDTH(state, ch);
   327         while (WITHIN_WIDTH(state) && isdigit(ch)) {
   328             *p++ = ch;
   329             GET_IF_WITHIN_WIDTH(state, ch);
   330             seenDigit = PR_TRUE;
   331         }
   332     }
   334     /*
   335      * This is not robust.  For example, "1.2e+" would confuse
   336      * the code below to read 'e' and '+', only to realize that
   337      * it should have stopped at "1.2".  But we can't push back
   338      * more than one character, so there is nothing I can do.
   339      */
   341     /* Parse exponent */
   342     if (WITHIN_WIDTH(state) && (ch == 'e' || ch == 'E') && seenDigit) {
   343         *p++ = ch;
   344         GET_IF_WITHIN_WIDTH(state, ch);
   345         if (WITHIN_WIDTH(state) && (ch == '+' || ch == '-')) {
   346             *p++ = ch;
   347             GET_IF_WITHIN_WIDTH(state, ch);
   348         }
   349         while (WITHIN_WIDTH(state) && isdigit(ch)) {
   350             *p++ = ch;
   351             GET_IF_WITHIN_WIDTH(state, ch);
   352         }
   353     }
   354     if (WITHIN_WIDTH(state)) {
   355         UNGET(state, ch);
   356     }
   357     if (!seenDigit) {
   358         return PR_FAILURE;
   359     }
   360     *p = '\0';
   361     if (state->assign) {
   362         PRFloat64 dval = PR_strtod(buf, NULL);
   364         state->converted = PR_TRUE;
   365         if (state->sizeSpec == _PR_size_l) {
   366             *va_arg(state->ap, PRFloat64 *) = dval;
   367         } else if (state->sizeSpec == _PR_size_L) {
   368 #if defined(OSF1) || defined(IRIX)
   369             *va_arg(state->ap, double *) = dval;
   370 #else
   371             *va_arg(state->ap, long double *) = dval;
   372 #endif
   373         } else {
   374             *va_arg(state->ap, float *) = (float) dval;
   375         }
   376     }
   377     return PR_SUCCESS;
   378 }
   380 /*
   381  * Convert, and return the end of the conversion spec.
   382  * Return NULL on error.
   383  */
   385 static const char *
   386 Convert(ScanfState *state, const char *fmt)
   387 {
   388     const char *cPtr;
   389     int ch;
   390     char *cArg = NULL;
   392     state->converted = PR_FALSE;
   393     cPtr = fmt;
   394     if (*cPtr != 'c' && *cPtr != 'n' && *cPtr != '[') {
   395         do {
   396             ch = GET(state);
   397         } while (isspace(ch));
   398         UNGET(state, ch);
   399     }
   400     switch (*cPtr) {
   401         case 'c':
   402             if (state->assign) {
   403                 cArg = va_arg(state->ap, char *);
   404             }
   405             if (state->width == 0) {
   406                 state->width = 1;
   407             }
   408             for (; state->width > 0; state->width--) {
   409                 ch = GET(state);
   410                 if (ch == EOF) {
   411                     return NULL;
   412                 } else if (state->assign) {
   413                     *cArg++ = ch;
   414                 }
   415             }
   416             if (state->assign) {
   417                 state->converted = PR_TRUE;
   418             }
   419             break;
   420         case 'p':
   421         case 'd': case 'i': case 'o':
   422         case 'u': case 'x': case 'X':
   423             if (GetInt(state, *cPtr) == PR_FAILURE) {
   424                 return NULL;
   425             }
   426             break;
   427         case 'e': case 'E': case 'f':
   428         case 'g': case 'G':
   429             if (GetFloat(state) == PR_FAILURE) {
   430                 return NULL;
   431             }
   432             break;
   433         case 'n':
   434             /* do not consume any input */
   435             if (state->assign) {
   436                 switch (state->sizeSpec) {
   437                     case _PR_size_none:
   438                         *va_arg(state->ap, PRIntn *) = state->nChar;
   439                         break;
   440                     case _PR_size_h:
   441                         *va_arg(state->ap, PRInt16 *) = state->nChar;
   442                         break;
   443                     case _PR_size_l:
   444                         *va_arg(state->ap, PRInt32 *) = state->nChar;
   445                         break;
   446                     case _PR_size_ll:
   447                         LL_I2L(*va_arg(state->ap, PRInt64 *), state->nChar);
   448                         break;
   449                     default:
   450                         PR_ASSERT(0);
   451                 }
   452             }
   453             break;
   454         case 's':
   455             if (state->width == 0) {
   456                 state->width = INT_MAX;
   457             }
   458             if (state->assign) {
   459                 cArg = va_arg(state->ap, char *);
   460             }
   461             for (; state->width > 0; state->width--) {
   462                 ch = GET(state);
   463                 if ((ch == EOF) || isspace(ch)) {
   464                     UNGET(state, ch);
   465                     break;
   466                 }
   467                 if (state->assign) {
   468                     *cArg++ = ch;
   469                 }
   470             }
   471             if (state->assign) {
   472                 *cArg = '\0';
   473                 state->converted = PR_TRUE;
   474             }
   475             break;
   476         case '%':
   477             ch = GET(state);
   478             if (ch != '%') {
   479                 UNGET(state, ch);
   480                 return NULL;
   481             }
   482             break;
   483         case '[':
   484             {
   485                 PRBool complement = PR_FALSE;
   486                 const char *closeBracket;
   487                 size_t n;
   489                 if (*++cPtr == '^') {
   490                     complement = PR_TRUE;
   491                     cPtr++;
   492                 }
   493                 closeBracket = strchr(*cPtr == ']' ? cPtr + 1 : cPtr, ']');
   494                 if (closeBracket == NULL) {
   495                     return NULL;
   496                 }
   497                 n = closeBracket - cPtr;
   498                 if (state->width == 0) {
   499                     state->width = INT_MAX;
   500                 }
   501                 if (state->assign) {
   502                     cArg = va_arg(state->ap, char *);
   503                 }
   504                 for (; state->width > 0; state->width--) {
   505                     ch = GET(state);
   506                     if ((ch == EOF) 
   507                             || (!complement && !memchr(cPtr, ch, n))
   508                             || (complement && memchr(cPtr, ch, n))) {
   509                         UNGET(state, ch);
   510                         break;
   511                     }
   512                     if (state->assign) {
   513                         *cArg++ = ch;
   514                     }
   515                 }
   516                 if (state->assign) {
   517                     *cArg = '\0';
   518                     state->converted = PR_TRUE;
   519                 }
   520                 cPtr = closeBracket;
   521             }
   522             break;
   523         default:
   524             return NULL;
   525     }
   526     return cPtr;
   527 }
   529 static PRInt32
   530 DoScanf(ScanfState *state, const char *fmt)
   531 {
   532     PRInt32 nConverted = 0;
   533     const char *cPtr;
   534     int ch;
   536     state->nChar = 0;
   537     cPtr = fmt;
   538     while (1) {
   539         if (isspace(*cPtr)) {
   540             /* white space: skip */
   541             do {
   542                 cPtr++;
   543             } while (isspace(*cPtr));
   544             do {
   545                 ch = GET(state);
   546             } while (isspace(ch));
   547             UNGET(state, ch);
   548         } else if (*cPtr == '%') {
   549             /* format spec: convert */
   550             cPtr++;
   551             state->assign = PR_TRUE;
   552             if (*cPtr == '*') {
   553                 cPtr++;
   554                 state->assign = PR_FALSE;
   555             }
   556             for (state->width = 0; isdigit(*cPtr); cPtr++) {
   557                 state->width = state->width * 10 + *cPtr - '0';
   558             }
   559             state->sizeSpec = _PR_size_none;
   560             if (*cPtr == 'h') {
   561                 cPtr++;
   562                 state->sizeSpec = _PR_size_h;
   563             } else if (*cPtr == 'l') {
   564                 cPtr++;
   565                 if (*cPtr == 'l') {
   566                     cPtr++;
   567                     state->sizeSpec = _PR_size_ll;
   568                 } else {
   569                     state->sizeSpec = _PR_size_l;
   570                 }
   571             } else if (*cPtr == 'L') {
   572                 cPtr++;
   573                 state->sizeSpec = _PR_size_L;
   574             }
   575             cPtr = Convert(state, cPtr);
   576             if (cPtr == NULL) {
   577                 return (nConverted > 0 ? nConverted : EOF);
   578             }
   579             if (state->converted) {
   580                 nConverted++;
   581             }
   582             cPtr++;
   583         } else {
   584             /* others: must match */
   585             if (*cPtr == '\0') {
   586                 return nConverted;
   587             }
   588             ch = GET(state);
   589             if (ch != *cPtr) {
   590                 UNGET(state, ch);
   591                 return nConverted;
   592             }
   593             cPtr++;
   594         }
   595     }
   596 }
   598 static int
   599 StringGetChar(void *stream)
   600 {
   601     char *cPtr = *((char **) stream);
   603     if (*cPtr == '\0') {
   604         return EOF;
   605     } else {
   606         *((char **) stream) = cPtr + 1;
   607         return (unsigned char) *cPtr;
   608     }
   609 }
   611 static void
   612 StringUngetChar(void *stream, int ch)
   613 {
   614     char *cPtr = *((char **) stream);
   616     if (ch != EOF) {
   617         *((char **) stream) = cPtr - 1;
   618     }
   619 }
   621 PR_IMPLEMENT(PRInt32)
   622 PR_sscanf(const char *buf, const char *fmt, ...)
   623 {
   624     PRInt32 rv;
   625     ScanfState state;
   627     state.get = &StringGetChar;
   628     state.unget = &StringUngetChar;
   629     state.stream = (void *) &buf;
   630     va_start(state.ap, fmt);
   631     rv = DoScanf(&state, fmt);
   632     va_end(state.ap);
   633     return rv;
   634 }

mercurial