michael@0: /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: /* michael@0: * Scan functions for NSPR types michael@0: * michael@0: * Author: Wan-Teh Chang michael@0: * michael@0: * Acknowledgment: The implementation is inspired by the source code michael@0: * in P.J. Plauger's "The Standard C Library," Prentice-Hall, 1992. michael@0: */ michael@0: michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include "prprf.h" michael@0: #include "prdtoa.h" michael@0: #include "prlog.h" michael@0: #include "prerror.h" michael@0: michael@0: /* michael@0: * A function that reads a character from 'stream'. michael@0: * Returns the character read, or EOF if end of stream is reached. michael@0: */ michael@0: typedef int (*_PRGetCharFN)(void *stream); michael@0: michael@0: /* michael@0: * A function that pushes the character 'ch' back to 'stream'. michael@0: */ michael@0: typedef void (*_PRUngetCharFN)(void *stream, int ch); michael@0: michael@0: /* michael@0: * The size specifier for the integer and floating point number michael@0: * conversions in format control strings. michael@0: */ michael@0: typedef enum { michael@0: _PR_size_none, /* No size specifier is given */ michael@0: _PR_size_h, /* The 'h' specifier, suggesting "short" */ michael@0: _PR_size_l, /* The 'l' specifier, suggesting "long" */ michael@0: _PR_size_L, /* The 'L' specifier, meaning a 'long double' */ michael@0: _PR_size_ll /* The 'll' specifier, suggesting "long long" */ michael@0: } _PRSizeSpec; michael@0: michael@0: /* michael@0: * The collection of data that is passed between the scan function michael@0: * and its subordinate functions. The fields of this structure michael@0: * serve as the input or output arguments for these functions. michael@0: */ michael@0: typedef struct { michael@0: _PRGetCharFN get; /* get a character from input stream */ michael@0: _PRUngetCharFN unget; /* unget (push back) a character */ michael@0: void *stream; /* argument for get and unget */ michael@0: va_list ap; /* the variable argument list */ michael@0: int nChar; /* number of characters read from 'stream' */ michael@0: michael@0: PRBool assign; /* assign, or suppress assignment? */ michael@0: int width; /* field width */ michael@0: _PRSizeSpec sizeSpec; /* 'h', 'l', 'L', or 'll' */ michael@0: michael@0: PRBool converted; /* is the value actually converted? */ michael@0: } ScanfState; michael@0: michael@0: #define GET(state) ((state)->nChar++, (state)->get((state)->stream)) michael@0: #define UNGET(state, ch) \ michael@0: ((state)->nChar--, (state)->unget((state)->stream, ch)) michael@0: michael@0: /* michael@0: * The following two macros, GET_IF_WITHIN_WIDTH and WITHIN_WIDTH, michael@0: * are always used together. michael@0: * michael@0: * GET_IF_WITHIN_WIDTH calls the GET macro and assigns its return michael@0: * value to 'ch' only if we have not exceeded the field width of michael@0: * 'state'. Therefore, after GET_IF_WITHIN_WIDTH, the value of michael@0: * 'ch' is valid only if the macro WITHIN_WIDTH evaluates to true. michael@0: */ michael@0: michael@0: #define GET_IF_WITHIN_WIDTH(state, ch) \ michael@0: if (--(state)->width >= 0) { \ michael@0: (ch) = GET(state); \ michael@0: } michael@0: #define WITHIN_WIDTH(state) ((state)->width >= 0) michael@0: michael@0: /* michael@0: * _pr_strtoull: michael@0: * Convert a string to an unsigned 64-bit integer. The string michael@0: * 'str' is assumed to be a representation of the integer in michael@0: * base 'base'. michael@0: * michael@0: * Warning: michael@0: * - Only handle base 8, 10, and 16. michael@0: * - No overflow checking. michael@0: */ michael@0: michael@0: static PRUint64 michael@0: _pr_strtoull(const char *str, char **endptr, int base) michael@0: { michael@0: static const int BASE_MAX = 16; michael@0: static const char digits[] = "0123456789abcdef"; michael@0: char *digitPtr; michael@0: PRUint64 x; /* return value */ michael@0: PRInt64 base64; michael@0: const char *cPtr; michael@0: PRBool negative; michael@0: const char *digitStart; michael@0: michael@0: PR_ASSERT(base == 0 || base == 8 || base == 10 || base == 16); michael@0: if (base < 0 || base == 1 || base > BASE_MAX) { michael@0: if (endptr) { michael@0: *endptr = (char *) str; michael@0: return LL_ZERO; michael@0: } michael@0: } michael@0: michael@0: cPtr = str; michael@0: while (isspace(*cPtr)) { michael@0: ++cPtr; michael@0: } michael@0: michael@0: negative = PR_FALSE; michael@0: if (*cPtr == '-') { michael@0: negative = PR_TRUE; michael@0: cPtr++; michael@0: } else if (*cPtr == '+') { michael@0: cPtr++; michael@0: } michael@0: michael@0: if (base == 16) { michael@0: if (*cPtr == '0' && (cPtr[1] == 'x' || cPtr[1] == 'X')) { michael@0: cPtr += 2; michael@0: } michael@0: } else if (base == 0) { michael@0: if (*cPtr != '0') { michael@0: base = 10; michael@0: } else if (cPtr[1] == 'x' || cPtr[1] == 'X') { michael@0: base = 16; michael@0: cPtr += 2; michael@0: } else { michael@0: base = 8; michael@0: } michael@0: } michael@0: PR_ASSERT(base != 0); michael@0: LL_I2L(base64, base); michael@0: digitStart = cPtr; michael@0: michael@0: /* Skip leading zeros */ michael@0: while (*cPtr == '0') { michael@0: cPtr++; michael@0: } michael@0: michael@0: LL_I2L(x, 0); michael@0: while ((digitPtr = (char*)memchr(digits, tolower(*cPtr), base)) != NULL) { michael@0: PRUint64 d; michael@0: michael@0: LL_I2L(d, (digitPtr - digits)); michael@0: LL_MUL(x, x, base64); michael@0: LL_ADD(x, x, d); michael@0: cPtr++; michael@0: } michael@0: michael@0: if (cPtr == digitStart) { michael@0: if (endptr) { michael@0: *endptr = (char *) str; michael@0: } michael@0: return LL_ZERO; michael@0: } michael@0: michael@0: if (negative) { michael@0: #ifdef HAVE_LONG_LONG michael@0: /* The cast to a signed type is to avoid a compiler warning */ michael@0: x = -(PRInt64)x; michael@0: #else michael@0: LL_NEG(x, x); michael@0: #endif michael@0: } michael@0: michael@0: if (endptr) { michael@0: *endptr = (char *) cPtr; michael@0: } michael@0: return x; michael@0: } michael@0: michael@0: /* michael@0: * The maximum field width (in number of characters) that is enough michael@0: * (may be more than necessary) to represent a 64-bit integer or michael@0: * floating point number. michael@0: */ michael@0: #define FMAX 31 michael@0: #define DECIMAL_POINT '.' michael@0: michael@0: static PRStatus michael@0: GetInt(ScanfState *state, int code) michael@0: { michael@0: char buf[FMAX + 1], *p; michael@0: int ch; michael@0: static const char digits[] = "0123456789abcdefABCDEF"; michael@0: PRBool seenDigit = PR_FALSE; michael@0: int base; michael@0: int dlen; michael@0: michael@0: switch (code) { michael@0: case 'd': case 'u': michael@0: base = 10; michael@0: break; michael@0: case 'i': michael@0: base = 0; michael@0: break; michael@0: case 'x': case 'X': case 'p': michael@0: base = 16; michael@0: break; michael@0: case 'o': michael@0: base = 8; michael@0: break; michael@0: default: michael@0: return PR_FAILURE; michael@0: } michael@0: if (state->width == 0 || state->width > FMAX) { michael@0: state->width = FMAX; michael@0: } michael@0: p = buf; michael@0: GET_IF_WITHIN_WIDTH(state, ch); michael@0: if (WITHIN_WIDTH(state) && (ch == '+' || ch == '-')) { michael@0: *p++ = ch; michael@0: GET_IF_WITHIN_WIDTH(state, ch); michael@0: } michael@0: if (WITHIN_WIDTH(state) && ch == '0') { michael@0: seenDigit = PR_TRUE; michael@0: *p++ = ch; michael@0: GET_IF_WITHIN_WIDTH(state, ch); michael@0: if (WITHIN_WIDTH(state) michael@0: && (ch == 'x' || ch == 'X') michael@0: && (base == 0 || base == 16)) { michael@0: base = 16; michael@0: *p++ = ch; michael@0: GET_IF_WITHIN_WIDTH(state, ch); michael@0: } else if (base == 0) { michael@0: base = 8; michael@0: } michael@0: } michael@0: if (base == 0 || base == 10) { michael@0: dlen = 10; michael@0: } else if (base == 8) { michael@0: dlen = 8; michael@0: } else { michael@0: PR_ASSERT(base == 16); michael@0: dlen = 16 + 6; /* 16 digits, plus 6 in uppercase */ michael@0: } michael@0: while (WITHIN_WIDTH(state) && memchr(digits, ch, dlen)) { michael@0: *p++ = ch; michael@0: GET_IF_WITHIN_WIDTH(state, ch); michael@0: seenDigit = PR_TRUE; michael@0: } michael@0: if (WITHIN_WIDTH(state)) { michael@0: UNGET(state, ch); michael@0: } michael@0: if (!seenDigit) { michael@0: return PR_FAILURE; michael@0: } michael@0: *p = '\0'; michael@0: if (state->assign) { michael@0: if (code == 'd' || code == 'i') { michael@0: if (state->sizeSpec == _PR_size_ll) { michael@0: PRInt64 llval = _pr_strtoull(buf, NULL, base); michael@0: *va_arg(state->ap, PRInt64 *) = llval; michael@0: } else { michael@0: long lval = strtol(buf, NULL, base); michael@0: michael@0: if (state->sizeSpec == _PR_size_none) { michael@0: *va_arg(state->ap, PRIntn *) = lval; michael@0: } else if (state->sizeSpec == _PR_size_h) { michael@0: *va_arg(state->ap, PRInt16 *) = (PRInt16)lval; michael@0: } else if (state->sizeSpec == _PR_size_l) { michael@0: *va_arg(state->ap, PRInt32 *) = lval; michael@0: } else { michael@0: return PR_FAILURE; michael@0: } michael@0: } michael@0: } else { michael@0: if (state->sizeSpec == _PR_size_ll) { michael@0: PRUint64 llval = _pr_strtoull(buf, NULL, base); michael@0: *va_arg(state->ap, PRUint64 *) = llval; michael@0: } else { michael@0: unsigned long lval = strtoul(buf, NULL, base); michael@0: michael@0: if (state->sizeSpec == _PR_size_none) { michael@0: *va_arg(state->ap, PRUintn *) = lval; michael@0: } else if (state->sizeSpec == _PR_size_h) { michael@0: *va_arg(state->ap, PRUint16 *) = (PRUint16)lval; michael@0: } else if (state->sizeSpec == _PR_size_l) { michael@0: *va_arg(state->ap, PRUint32 *) = lval; michael@0: } else { michael@0: return PR_FAILURE; michael@0: } michael@0: } michael@0: } michael@0: state->converted = PR_TRUE; michael@0: } michael@0: return PR_SUCCESS; michael@0: } michael@0: michael@0: static PRStatus michael@0: GetFloat(ScanfState *state) michael@0: { michael@0: char buf[FMAX + 1], *p; michael@0: int ch; michael@0: PRBool seenDigit = PR_FALSE; michael@0: michael@0: if (state->width == 0 || state->width > FMAX) { michael@0: state->width = FMAX; michael@0: } michael@0: p = buf; michael@0: GET_IF_WITHIN_WIDTH(state, ch); michael@0: if (WITHIN_WIDTH(state) && (ch == '+' || ch == '-')) { michael@0: *p++ = ch; michael@0: GET_IF_WITHIN_WIDTH(state, ch); michael@0: } michael@0: while (WITHIN_WIDTH(state) && isdigit(ch)) { michael@0: *p++ = ch; michael@0: GET_IF_WITHIN_WIDTH(state, ch); michael@0: seenDigit = PR_TRUE; michael@0: } michael@0: if (WITHIN_WIDTH(state) && ch == DECIMAL_POINT) { michael@0: *p++ = ch; michael@0: GET_IF_WITHIN_WIDTH(state, ch); michael@0: while (WITHIN_WIDTH(state) && isdigit(ch)) { michael@0: *p++ = ch; michael@0: GET_IF_WITHIN_WIDTH(state, ch); michael@0: seenDigit = PR_TRUE; michael@0: } michael@0: } michael@0: michael@0: /* michael@0: * This is not robust. For example, "1.2e+" would confuse michael@0: * the code below to read 'e' and '+', only to realize that michael@0: * it should have stopped at "1.2". But we can't push back michael@0: * more than one character, so there is nothing I can do. michael@0: */ michael@0: michael@0: /* Parse exponent */ michael@0: if (WITHIN_WIDTH(state) && (ch == 'e' || ch == 'E') && seenDigit) { michael@0: *p++ = ch; michael@0: GET_IF_WITHIN_WIDTH(state, ch); michael@0: if (WITHIN_WIDTH(state) && (ch == '+' || ch == '-')) { michael@0: *p++ = ch; michael@0: GET_IF_WITHIN_WIDTH(state, ch); michael@0: } michael@0: while (WITHIN_WIDTH(state) && isdigit(ch)) { michael@0: *p++ = ch; michael@0: GET_IF_WITHIN_WIDTH(state, ch); michael@0: } michael@0: } michael@0: if (WITHIN_WIDTH(state)) { michael@0: UNGET(state, ch); michael@0: } michael@0: if (!seenDigit) { michael@0: return PR_FAILURE; michael@0: } michael@0: *p = '\0'; michael@0: if (state->assign) { michael@0: PRFloat64 dval = PR_strtod(buf, NULL); michael@0: michael@0: state->converted = PR_TRUE; michael@0: if (state->sizeSpec == _PR_size_l) { michael@0: *va_arg(state->ap, PRFloat64 *) = dval; michael@0: } else if (state->sizeSpec == _PR_size_L) { michael@0: #if defined(OSF1) || defined(IRIX) michael@0: *va_arg(state->ap, double *) = dval; michael@0: #else michael@0: *va_arg(state->ap, long double *) = dval; michael@0: #endif michael@0: } else { michael@0: *va_arg(state->ap, float *) = (float) dval; michael@0: } michael@0: } michael@0: return PR_SUCCESS; michael@0: } michael@0: michael@0: /* michael@0: * Convert, and return the end of the conversion spec. michael@0: * Return NULL on error. michael@0: */ michael@0: michael@0: static const char * michael@0: Convert(ScanfState *state, const char *fmt) michael@0: { michael@0: const char *cPtr; michael@0: int ch; michael@0: char *cArg = NULL; michael@0: michael@0: state->converted = PR_FALSE; michael@0: cPtr = fmt; michael@0: if (*cPtr != 'c' && *cPtr != 'n' && *cPtr != '[') { michael@0: do { michael@0: ch = GET(state); michael@0: } while (isspace(ch)); michael@0: UNGET(state, ch); michael@0: } michael@0: switch (*cPtr) { michael@0: case 'c': michael@0: if (state->assign) { michael@0: cArg = va_arg(state->ap, char *); michael@0: } michael@0: if (state->width == 0) { michael@0: state->width = 1; michael@0: } michael@0: for (; state->width > 0; state->width--) { michael@0: ch = GET(state); michael@0: if (ch == EOF) { michael@0: return NULL; michael@0: } else if (state->assign) { michael@0: *cArg++ = ch; michael@0: } michael@0: } michael@0: if (state->assign) { michael@0: state->converted = PR_TRUE; michael@0: } michael@0: break; michael@0: case 'p': michael@0: case 'd': case 'i': case 'o': michael@0: case 'u': case 'x': case 'X': michael@0: if (GetInt(state, *cPtr) == PR_FAILURE) { michael@0: return NULL; michael@0: } michael@0: break; michael@0: case 'e': case 'E': case 'f': michael@0: case 'g': case 'G': michael@0: if (GetFloat(state) == PR_FAILURE) { michael@0: return NULL; michael@0: } michael@0: break; michael@0: case 'n': michael@0: /* do not consume any input */ michael@0: if (state->assign) { michael@0: switch (state->sizeSpec) { michael@0: case _PR_size_none: michael@0: *va_arg(state->ap, PRIntn *) = state->nChar; michael@0: break; michael@0: case _PR_size_h: michael@0: *va_arg(state->ap, PRInt16 *) = state->nChar; michael@0: break; michael@0: case _PR_size_l: michael@0: *va_arg(state->ap, PRInt32 *) = state->nChar; michael@0: break; michael@0: case _PR_size_ll: michael@0: LL_I2L(*va_arg(state->ap, PRInt64 *), state->nChar); michael@0: break; michael@0: default: michael@0: PR_ASSERT(0); michael@0: } michael@0: } michael@0: break; michael@0: case 's': michael@0: if (state->width == 0) { michael@0: state->width = INT_MAX; michael@0: } michael@0: if (state->assign) { michael@0: cArg = va_arg(state->ap, char *); michael@0: } michael@0: for (; state->width > 0; state->width--) { michael@0: ch = GET(state); michael@0: if ((ch == EOF) || isspace(ch)) { michael@0: UNGET(state, ch); michael@0: break; michael@0: } michael@0: if (state->assign) { michael@0: *cArg++ = ch; michael@0: } michael@0: } michael@0: if (state->assign) { michael@0: *cArg = '\0'; michael@0: state->converted = PR_TRUE; michael@0: } michael@0: break; michael@0: case '%': michael@0: ch = GET(state); michael@0: if (ch != '%') { michael@0: UNGET(state, ch); michael@0: return NULL; michael@0: } michael@0: break; michael@0: case '[': michael@0: { michael@0: PRBool complement = PR_FALSE; michael@0: const char *closeBracket; michael@0: size_t n; michael@0: michael@0: if (*++cPtr == '^') { michael@0: complement = PR_TRUE; michael@0: cPtr++; michael@0: } michael@0: closeBracket = strchr(*cPtr == ']' ? cPtr + 1 : cPtr, ']'); michael@0: if (closeBracket == NULL) { michael@0: return NULL; michael@0: } michael@0: n = closeBracket - cPtr; michael@0: if (state->width == 0) { michael@0: state->width = INT_MAX; michael@0: } michael@0: if (state->assign) { michael@0: cArg = va_arg(state->ap, char *); michael@0: } michael@0: for (; state->width > 0; state->width--) { michael@0: ch = GET(state); michael@0: if ((ch == EOF) michael@0: || (!complement && !memchr(cPtr, ch, n)) michael@0: || (complement && memchr(cPtr, ch, n))) { michael@0: UNGET(state, ch); michael@0: break; michael@0: } michael@0: if (state->assign) { michael@0: *cArg++ = ch; michael@0: } michael@0: } michael@0: if (state->assign) { michael@0: *cArg = '\0'; michael@0: state->converted = PR_TRUE; michael@0: } michael@0: cPtr = closeBracket; michael@0: } michael@0: break; michael@0: default: michael@0: return NULL; michael@0: } michael@0: return cPtr; michael@0: } michael@0: michael@0: static PRInt32 michael@0: DoScanf(ScanfState *state, const char *fmt) michael@0: { michael@0: PRInt32 nConverted = 0; michael@0: const char *cPtr; michael@0: int ch; michael@0: michael@0: state->nChar = 0; michael@0: cPtr = fmt; michael@0: while (1) { michael@0: if (isspace(*cPtr)) { michael@0: /* white space: skip */ michael@0: do { michael@0: cPtr++; michael@0: } while (isspace(*cPtr)); michael@0: do { michael@0: ch = GET(state); michael@0: } while (isspace(ch)); michael@0: UNGET(state, ch); michael@0: } else if (*cPtr == '%') { michael@0: /* format spec: convert */ michael@0: cPtr++; michael@0: state->assign = PR_TRUE; michael@0: if (*cPtr == '*') { michael@0: cPtr++; michael@0: state->assign = PR_FALSE; michael@0: } michael@0: for (state->width = 0; isdigit(*cPtr); cPtr++) { michael@0: state->width = state->width * 10 + *cPtr - '0'; michael@0: } michael@0: state->sizeSpec = _PR_size_none; michael@0: if (*cPtr == 'h') { michael@0: cPtr++; michael@0: state->sizeSpec = _PR_size_h; michael@0: } else if (*cPtr == 'l') { michael@0: cPtr++; michael@0: if (*cPtr == 'l') { michael@0: cPtr++; michael@0: state->sizeSpec = _PR_size_ll; michael@0: } else { michael@0: state->sizeSpec = _PR_size_l; michael@0: } michael@0: } else if (*cPtr == 'L') { michael@0: cPtr++; michael@0: state->sizeSpec = _PR_size_L; michael@0: } michael@0: cPtr = Convert(state, cPtr); michael@0: if (cPtr == NULL) { michael@0: return (nConverted > 0 ? nConverted : EOF); michael@0: } michael@0: if (state->converted) { michael@0: nConverted++; michael@0: } michael@0: cPtr++; michael@0: } else { michael@0: /* others: must match */ michael@0: if (*cPtr == '\0') { michael@0: return nConverted; michael@0: } michael@0: ch = GET(state); michael@0: if (ch != *cPtr) { michael@0: UNGET(state, ch); michael@0: return nConverted; michael@0: } michael@0: cPtr++; michael@0: } michael@0: } michael@0: } michael@0: michael@0: static int michael@0: StringGetChar(void *stream) michael@0: { michael@0: char *cPtr = *((char **) stream); michael@0: michael@0: if (*cPtr == '\0') { michael@0: return EOF; michael@0: } else { michael@0: *((char **) stream) = cPtr + 1; michael@0: return (unsigned char) *cPtr; michael@0: } michael@0: } michael@0: michael@0: static void michael@0: StringUngetChar(void *stream, int ch) michael@0: { michael@0: char *cPtr = *((char **) stream); michael@0: michael@0: if (ch != EOF) { michael@0: *((char **) stream) = cPtr - 1; michael@0: } michael@0: } michael@0: michael@0: PR_IMPLEMENT(PRInt32) michael@0: PR_sscanf(const char *buf, const char *fmt, ...) michael@0: { michael@0: PRInt32 rv; michael@0: ScanfState state; michael@0: michael@0: state.get = &StringGetChar; michael@0: state.unget = &StringUngetChar; michael@0: state.stream = (void *) &buf; michael@0: va_start(state.ap, fmt); michael@0: rv = DoScanf(&state, fmt); michael@0: va_end(state.ap); michael@0: return rv; michael@0: }