|
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
3 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
5 |
|
6 /* |
|
7 * Scan functions for NSPR types |
|
8 * |
|
9 * Author: Wan-Teh Chang |
|
10 * |
|
11 * Acknowledgment: The implementation is inspired by the source code |
|
12 * in P.J. Plauger's "The Standard C Library," Prentice-Hall, 1992. |
|
13 */ |
|
14 |
|
15 #include <limits.h> |
|
16 #include <ctype.h> |
|
17 #include <string.h> |
|
18 #include <stdlib.h> |
|
19 #include "prprf.h" |
|
20 #include "prdtoa.h" |
|
21 #include "prlog.h" |
|
22 #include "prerror.h" |
|
23 |
|
24 /* |
|
25 * A function that reads a character from 'stream'. |
|
26 * Returns the character read, or EOF if end of stream is reached. |
|
27 */ |
|
28 typedef int (*_PRGetCharFN)(void *stream); |
|
29 |
|
30 /* |
|
31 * A function that pushes the character 'ch' back to 'stream'. |
|
32 */ |
|
33 typedef void (*_PRUngetCharFN)(void *stream, int ch); |
|
34 |
|
35 /* |
|
36 * The size specifier for the integer and floating point number |
|
37 * conversions in format control strings. |
|
38 */ |
|
39 typedef enum { |
|
40 _PR_size_none, /* No size specifier is given */ |
|
41 _PR_size_h, /* The 'h' specifier, suggesting "short" */ |
|
42 _PR_size_l, /* The 'l' specifier, suggesting "long" */ |
|
43 _PR_size_L, /* The 'L' specifier, meaning a 'long double' */ |
|
44 _PR_size_ll /* The 'll' specifier, suggesting "long long" */ |
|
45 } _PRSizeSpec; |
|
46 |
|
47 /* |
|
48 * The collection of data that is passed between the scan function |
|
49 * and its subordinate functions. The fields of this structure |
|
50 * serve as the input or output arguments for these functions. |
|
51 */ |
|
52 typedef struct { |
|
53 _PRGetCharFN get; /* get a character from input stream */ |
|
54 _PRUngetCharFN unget; /* unget (push back) a character */ |
|
55 void *stream; /* argument for get and unget */ |
|
56 va_list ap; /* the variable argument list */ |
|
57 int nChar; /* number of characters read from 'stream' */ |
|
58 |
|
59 PRBool assign; /* assign, or suppress assignment? */ |
|
60 int width; /* field width */ |
|
61 _PRSizeSpec sizeSpec; /* 'h', 'l', 'L', or 'll' */ |
|
62 |
|
63 PRBool converted; /* is the value actually converted? */ |
|
64 } ScanfState; |
|
65 |
|
66 #define GET(state) ((state)->nChar++, (state)->get((state)->stream)) |
|
67 #define UNGET(state, ch) \ |
|
68 ((state)->nChar--, (state)->unget((state)->stream, ch)) |
|
69 |
|
70 /* |
|
71 * The following two macros, GET_IF_WITHIN_WIDTH and WITHIN_WIDTH, |
|
72 * are always used together. |
|
73 * |
|
74 * GET_IF_WITHIN_WIDTH calls the GET macro and assigns its return |
|
75 * value to 'ch' only if we have not exceeded the field width of |
|
76 * 'state'. Therefore, after GET_IF_WITHIN_WIDTH, the value of |
|
77 * 'ch' is valid only if the macro WITHIN_WIDTH evaluates to true. |
|
78 */ |
|
79 |
|
80 #define GET_IF_WITHIN_WIDTH(state, ch) \ |
|
81 if (--(state)->width >= 0) { \ |
|
82 (ch) = GET(state); \ |
|
83 } |
|
84 #define WITHIN_WIDTH(state) ((state)->width >= 0) |
|
85 |
|
86 /* |
|
87 * _pr_strtoull: |
|
88 * Convert a string to an unsigned 64-bit integer. The string |
|
89 * 'str' is assumed to be a representation of the integer in |
|
90 * base 'base'. |
|
91 * |
|
92 * Warning: |
|
93 * - Only handle base 8, 10, and 16. |
|
94 * - No overflow checking. |
|
95 */ |
|
96 |
|
97 static PRUint64 |
|
98 _pr_strtoull(const char *str, char **endptr, int base) |
|
99 { |
|
100 static const int BASE_MAX = 16; |
|
101 static const char digits[] = "0123456789abcdef"; |
|
102 char *digitPtr; |
|
103 PRUint64 x; /* return value */ |
|
104 PRInt64 base64; |
|
105 const char *cPtr; |
|
106 PRBool negative; |
|
107 const char *digitStart; |
|
108 |
|
109 PR_ASSERT(base == 0 || base == 8 || base == 10 || base == 16); |
|
110 if (base < 0 || base == 1 || base > BASE_MAX) { |
|
111 if (endptr) { |
|
112 *endptr = (char *) str; |
|
113 return LL_ZERO; |
|
114 } |
|
115 } |
|
116 |
|
117 cPtr = str; |
|
118 while (isspace(*cPtr)) { |
|
119 ++cPtr; |
|
120 } |
|
121 |
|
122 negative = PR_FALSE; |
|
123 if (*cPtr == '-') { |
|
124 negative = PR_TRUE; |
|
125 cPtr++; |
|
126 } else if (*cPtr == '+') { |
|
127 cPtr++; |
|
128 } |
|
129 |
|
130 if (base == 16) { |
|
131 if (*cPtr == '0' && (cPtr[1] == 'x' || cPtr[1] == 'X')) { |
|
132 cPtr += 2; |
|
133 } |
|
134 } else if (base == 0) { |
|
135 if (*cPtr != '0') { |
|
136 base = 10; |
|
137 } else if (cPtr[1] == 'x' || cPtr[1] == 'X') { |
|
138 base = 16; |
|
139 cPtr += 2; |
|
140 } else { |
|
141 base = 8; |
|
142 } |
|
143 } |
|
144 PR_ASSERT(base != 0); |
|
145 LL_I2L(base64, base); |
|
146 digitStart = cPtr; |
|
147 |
|
148 /* Skip leading zeros */ |
|
149 while (*cPtr == '0') { |
|
150 cPtr++; |
|
151 } |
|
152 |
|
153 LL_I2L(x, 0); |
|
154 while ((digitPtr = (char*)memchr(digits, tolower(*cPtr), base)) != NULL) { |
|
155 PRUint64 d; |
|
156 |
|
157 LL_I2L(d, (digitPtr - digits)); |
|
158 LL_MUL(x, x, base64); |
|
159 LL_ADD(x, x, d); |
|
160 cPtr++; |
|
161 } |
|
162 |
|
163 if (cPtr == digitStart) { |
|
164 if (endptr) { |
|
165 *endptr = (char *) str; |
|
166 } |
|
167 return LL_ZERO; |
|
168 } |
|
169 |
|
170 if (negative) { |
|
171 #ifdef HAVE_LONG_LONG |
|
172 /* The cast to a signed type is to avoid a compiler warning */ |
|
173 x = -(PRInt64)x; |
|
174 #else |
|
175 LL_NEG(x, x); |
|
176 #endif |
|
177 } |
|
178 |
|
179 if (endptr) { |
|
180 *endptr = (char *) cPtr; |
|
181 } |
|
182 return x; |
|
183 } |
|
184 |
|
185 /* |
|
186 * The maximum field width (in number of characters) that is enough |
|
187 * (may be more than necessary) to represent a 64-bit integer or |
|
188 * floating point number. |
|
189 */ |
|
190 #define FMAX 31 |
|
191 #define DECIMAL_POINT '.' |
|
192 |
|
193 static PRStatus |
|
194 GetInt(ScanfState *state, int code) |
|
195 { |
|
196 char buf[FMAX + 1], *p; |
|
197 int ch; |
|
198 static const char digits[] = "0123456789abcdefABCDEF"; |
|
199 PRBool seenDigit = PR_FALSE; |
|
200 int base; |
|
201 int dlen; |
|
202 |
|
203 switch (code) { |
|
204 case 'd': case 'u': |
|
205 base = 10; |
|
206 break; |
|
207 case 'i': |
|
208 base = 0; |
|
209 break; |
|
210 case 'x': case 'X': case 'p': |
|
211 base = 16; |
|
212 break; |
|
213 case 'o': |
|
214 base = 8; |
|
215 break; |
|
216 default: |
|
217 return PR_FAILURE; |
|
218 } |
|
219 if (state->width == 0 || state->width > FMAX) { |
|
220 state->width = FMAX; |
|
221 } |
|
222 p = buf; |
|
223 GET_IF_WITHIN_WIDTH(state, ch); |
|
224 if (WITHIN_WIDTH(state) && (ch == '+' || ch == '-')) { |
|
225 *p++ = ch; |
|
226 GET_IF_WITHIN_WIDTH(state, ch); |
|
227 } |
|
228 if (WITHIN_WIDTH(state) && ch == '0') { |
|
229 seenDigit = PR_TRUE; |
|
230 *p++ = ch; |
|
231 GET_IF_WITHIN_WIDTH(state, ch); |
|
232 if (WITHIN_WIDTH(state) |
|
233 && (ch == 'x' || ch == 'X') |
|
234 && (base == 0 || base == 16)) { |
|
235 base = 16; |
|
236 *p++ = ch; |
|
237 GET_IF_WITHIN_WIDTH(state, ch); |
|
238 } else if (base == 0) { |
|
239 base = 8; |
|
240 } |
|
241 } |
|
242 if (base == 0 || base == 10) { |
|
243 dlen = 10; |
|
244 } else if (base == 8) { |
|
245 dlen = 8; |
|
246 } else { |
|
247 PR_ASSERT(base == 16); |
|
248 dlen = 16 + 6; /* 16 digits, plus 6 in uppercase */ |
|
249 } |
|
250 while (WITHIN_WIDTH(state) && memchr(digits, ch, dlen)) { |
|
251 *p++ = ch; |
|
252 GET_IF_WITHIN_WIDTH(state, ch); |
|
253 seenDigit = PR_TRUE; |
|
254 } |
|
255 if (WITHIN_WIDTH(state)) { |
|
256 UNGET(state, ch); |
|
257 } |
|
258 if (!seenDigit) { |
|
259 return PR_FAILURE; |
|
260 } |
|
261 *p = '\0'; |
|
262 if (state->assign) { |
|
263 if (code == 'd' || code == 'i') { |
|
264 if (state->sizeSpec == _PR_size_ll) { |
|
265 PRInt64 llval = _pr_strtoull(buf, NULL, base); |
|
266 *va_arg(state->ap, PRInt64 *) = llval; |
|
267 } else { |
|
268 long lval = strtol(buf, NULL, base); |
|
269 |
|
270 if (state->sizeSpec == _PR_size_none) { |
|
271 *va_arg(state->ap, PRIntn *) = lval; |
|
272 } else if (state->sizeSpec == _PR_size_h) { |
|
273 *va_arg(state->ap, PRInt16 *) = (PRInt16)lval; |
|
274 } else if (state->sizeSpec == _PR_size_l) { |
|
275 *va_arg(state->ap, PRInt32 *) = lval; |
|
276 } else { |
|
277 return PR_FAILURE; |
|
278 } |
|
279 } |
|
280 } else { |
|
281 if (state->sizeSpec == _PR_size_ll) { |
|
282 PRUint64 llval = _pr_strtoull(buf, NULL, base); |
|
283 *va_arg(state->ap, PRUint64 *) = llval; |
|
284 } else { |
|
285 unsigned long lval = strtoul(buf, NULL, base); |
|
286 |
|
287 if (state->sizeSpec == _PR_size_none) { |
|
288 *va_arg(state->ap, PRUintn *) = lval; |
|
289 } else if (state->sizeSpec == _PR_size_h) { |
|
290 *va_arg(state->ap, PRUint16 *) = (PRUint16)lval; |
|
291 } else if (state->sizeSpec == _PR_size_l) { |
|
292 *va_arg(state->ap, PRUint32 *) = lval; |
|
293 } else { |
|
294 return PR_FAILURE; |
|
295 } |
|
296 } |
|
297 } |
|
298 state->converted = PR_TRUE; |
|
299 } |
|
300 return PR_SUCCESS; |
|
301 } |
|
302 |
|
303 static PRStatus |
|
304 GetFloat(ScanfState *state) |
|
305 { |
|
306 char buf[FMAX + 1], *p; |
|
307 int ch; |
|
308 PRBool seenDigit = PR_FALSE; |
|
309 |
|
310 if (state->width == 0 || state->width > FMAX) { |
|
311 state->width = FMAX; |
|
312 } |
|
313 p = buf; |
|
314 GET_IF_WITHIN_WIDTH(state, ch); |
|
315 if (WITHIN_WIDTH(state) && (ch == '+' || ch == '-')) { |
|
316 *p++ = ch; |
|
317 GET_IF_WITHIN_WIDTH(state, ch); |
|
318 } |
|
319 while (WITHIN_WIDTH(state) && isdigit(ch)) { |
|
320 *p++ = ch; |
|
321 GET_IF_WITHIN_WIDTH(state, ch); |
|
322 seenDigit = PR_TRUE; |
|
323 } |
|
324 if (WITHIN_WIDTH(state) && ch == DECIMAL_POINT) { |
|
325 *p++ = ch; |
|
326 GET_IF_WITHIN_WIDTH(state, ch); |
|
327 while (WITHIN_WIDTH(state) && isdigit(ch)) { |
|
328 *p++ = ch; |
|
329 GET_IF_WITHIN_WIDTH(state, ch); |
|
330 seenDigit = PR_TRUE; |
|
331 } |
|
332 } |
|
333 |
|
334 /* |
|
335 * This is not robust. For example, "1.2e+" would confuse |
|
336 * the code below to read 'e' and '+', only to realize that |
|
337 * it should have stopped at "1.2". But we can't push back |
|
338 * more than one character, so there is nothing I can do. |
|
339 */ |
|
340 |
|
341 /* Parse exponent */ |
|
342 if (WITHIN_WIDTH(state) && (ch == 'e' || ch == 'E') && seenDigit) { |
|
343 *p++ = ch; |
|
344 GET_IF_WITHIN_WIDTH(state, ch); |
|
345 if (WITHIN_WIDTH(state) && (ch == '+' || ch == '-')) { |
|
346 *p++ = ch; |
|
347 GET_IF_WITHIN_WIDTH(state, ch); |
|
348 } |
|
349 while (WITHIN_WIDTH(state) && isdigit(ch)) { |
|
350 *p++ = ch; |
|
351 GET_IF_WITHIN_WIDTH(state, ch); |
|
352 } |
|
353 } |
|
354 if (WITHIN_WIDTH(state)) { |
|
355 UNGET(state, ch); |
|
356 } |
|
357 if (!seenDigit) { |
|
358 return PR_FAILURE; |
|
359 } |
|
360 *p = '\0'; |
|
361 if (state->assign) { |
|
362 PRFloat64 dval = PR_strtod(buf, NULL); |
|
363 |
|
364 state->converted = PR_TRUE; |
|
365 if (state->sizeSpec == _PR_size_l) { |
|
366 *va_arg(state->ap, PRFloat64 *) = dval; |
|
367 } else if (state->sizeSpec == _PR_size_L) { |
|
368 #if defined(OSF1) || defined(IRIX) |
|
369 *va_arg(state->ap, double *) = dval; |
|
370 #else |
|
371 *va_arg(state->ap, long double *) = dval; |
|
372 #endif |
|
373 } else { |
|
374 *va_arg(state->ap, float *) = (float) dval; |
|
375 } |
|
376 } |
|
377 return PR_SUCCESS; |
|
378 } |
|
379 |
|
380 /* |
|
381 * Convert, and return the end of the conversion spec. |
|
382 * Return NULL on error. |
|
383 */ |
|
384 |
|
385 static const char * |
|
386 Convert(ScanfState *state, const char *fmt) |
|
387 { |
|
388 const char *cPtr; |
|
389 int ch; |
|
390 char *cArg = NULL; |
|
391 |
|
392 state->converted = PR_FALSE; |
|
393 cPtr = fmt; |
|
394 if (*cPtr != 'c' && *cPtr != 'n' && *cPtr != '[') { |
|
395 do { |
|
396 ch = GET(state); |
|
397 } while (isspace(ch)); |
|
398 UNGET(state, ch); |
|
399 } |
|
400 switch (*cPtr) { |
|
401 case 'c': |
|
402 if (state->assign) { |
|
403 cArg = va_arg(state->ap, char *); |
|
404 } |
|
405 if (state->width == 0) { |
|
406 state->width = 1; |
|
407 } |
|
408 for (; state->width > 0; state->width--) { |
|
409 ch = GET(state); |
|
410 if (ch == EOF) { |
|
411 return NULL; |
|
412 } else if (state->assign) { |
|
413 *cArg++ = ch; |
|
414 } |
|
415 } |
|
416 if (state->assign) { |
|
417 state->converted = PR_TRUE; |
|
418 } |
|
419 break; |
|
420 case 'p': |
|
421 case 'd': case 'i': case 'o': |
|
422 case 'u': case 'x': case 'X': |
|
423 if (GetInt(state, *cPtr) == PR_FAILURE) { |
|
424 return NULL; |
|
425 } |
|
426 break; |
|
427 case 'e': case 'E': case 'f': |
|
428 case 'g': case 'G': |
|
429 if (GetFloat(state) == PR_FAILURE) { |
|
430 return NULL; |
|
431 } |
|
432 break; |
|
433 case 'n': |
|
434 /* do not consume any input */ |
|
435 if (state->assign) { |
|
436 switch (state->sizeSpec) { |
|
437 case _PR_size_none: |
|
438 *va_arg(state->ap, PRIntn *) = state->nChar; |
|
439 break; |
|
440 case _PR_size_h: |
|
441 *va_arg(state->ap, PRInt16 *) = state->nChar; |
|
442 break; |
|
443 case _PR_size_l: |
|
444 *va_arg(state->ap, PRInt32 *) = state->nChar; |
|
445 break; |
|
446 case _PR_size_ll: |
|
447 LL_I2L(*va_arg(state->ap, PRInt64 *), state->nChar); |
|
448 break; |
|
449 default: |
|
450 PR_ASSERT(0); |
|
451 } |
|
452 } |
|
453 break; |
|
454 case 's': |
|
455 if (state->width == 0) { |
|
456 state->width = INT_MAX; |
|
457 } |
|
458 if (state->assign) { |
|
459 cArg = va_arg(state->ap, char *); |
|
460 } |
|
461 for (; state->width > 0; state->width--) { |
|
462 ch = GET(state); |
|
463 if ((ch == EOF) || isspace(ch)) { |
|
464 UNGET(state, ch); |
|
465 break; |
|
466 } |
|
467 if (state->assign) { |
|
468 *cArg++ = ch; |
|
469 } |
|
470 } |
|
471 if (state->assign) { |
|
472 *cArg = '\0'; |
|
473 state->converted = PR_TRUE; |
|
474 } |
|
475 break; |
|
476 case '%': |
|
477 ch = GET(state); |
|
478 if (ch != '%') { |
|
479 UNGET(state, ch); |
|
480 return NULL; |
|
481 } |
|
482 break; |
|
483 case '[': |
|
484 { |
|
485 PRBool complement = PR_FALSE; |
|
486 const char *closeBracket; |
|
487 size_t n; |
|
488 |
|
489 if (*++cPtr == '^') { |
|
490 complement = PR_TRUE; |
|
491 cPtr++; |
|
492 } |
|
493 closeBracket = strchr(*cPtr == ']' ? cPtr + 1 : cPtr, ']'); |
|
494 if (closeBracket == NULL) { |
|
495 return NULL; |
|
496 } |
|
497 n = closeBracket - cPtr; |
|
498 if (state->width == 0) { |
|
499 state->width = INT_MAX; |
|
500 } |
|
501 if (state->assign) { |
|
502 cArg = va_arg(state->ap, char *); |
|
503 } |
|
504 for (; state->width > 0; state->width--) { |
|
505 ch = GET(state); |
|
506 if ((ch == EOF) |
|
507 || (!complement && !memchr(cPtr, ch, n)) |
|
508 || (complement && memchr(cPtr, ch, n))) { |
|
509 UNGET(state, ch); |
|
510 break; |
|
511 } |
|
512 if (state->assign) { |
|
513 *cArg++ = ch; |
|
514 } |
|
515 } |
|
516 if (state->assign) { |
|
517 *cArg = '\0'; |
|
518 state->converted = PR_TRUE; |
|
519 } |
|
520 cPtr = closeBracket; |
|
521 } |
|
522 break; |
|
523 default: |
|
524 return NULL; |
|
525 } |
|
526 return cPtr; |
|
527 } |
|
528 |
|
529 static PRInt32 |
|
530 DoScanf(ScanfState *state, const char *fmt) |
|
531 { |
|
532 PRInt32 nConverted = 0; |
|
533 const char *cPtr; |
|
534 int ch; |
|
535 |
|
536 state->nChar = 0; |
|
537 cPtr = fmt; |
|
538 while (1) { |
|
539 if (isspace(*cPtr)) { |
|
540 /* white space: skip */ |
|
541 do { |
|
542 cPtr++; |
|
543 } while (isspace(*cPtr)); |
|
544 do { |
|
545 ch = GET(state); |
|
546 } while (isspace(ch)); |
|
547 UNGET(state, ch); |
|
548 } else if (*cPtr == '%') { |
|
549 /* format spec: convert */ |
|
550 cPtr++; |
|
551 state->assign = PR_TRUE; |
|
552 if (*cPtr == '*') { |
|
553 cPtr++; |
|
554 state->assign = PR_FALSE; |
|
555 } |
|
556 for (state->width = 0; isdigit(*cPtr); cPtr++) { |
|
557 state->width = state->width * 10 + *cPtr - '0'; |
|
558 } |
|
559 state->sizeSpec = _PR_size_none; |
|
560 if (*cPtr == 'h') { |
|
561 cPtr++; |
|
562 state->sizeSpec = _PR_size_h; |
|
563 } else if (*cPtr == 'l') { |
|
564 cPtr++; |
|
565 if (*cPtr == 'l') { |
|
566 cPtr++; |
|
567 state->sizeSpec = _PR_size_ll; |
|
568 } else { |
|
569 state->sizeSpec = _PR_size_l; |
|
570 } |
|
571 } else if (*cPtr == 'L') { |
|
572 cPtr++; |
|
573 state->sizeSpec = _PR_size_L; |
|
574 } |
|
575 cPtr = Convert(state, cPtr); |
|
576 if (cPtr == NULL) { |
|
577 return (nConverted > 0 ? nConverted : EOF); |
|
578 } |
|
579 if (state->converted) { |
|
580 nConverted++; |
|
581 } |
|
582 cPtr++; |
|
583 } else { |
|
584 /* others: must match */ |
|
585 if (*cPtr == '\0') { |
|
586 return nConverted; |
|
587 } |
|
588 ch = GET(state); |
|
589 if (ch != *cPtr) { |
|
590 UNGET(state, ch); |
|
591 return nConverted; |
|
592 } |
|
593 cPtr++; |
|
594 } |
|
595 } |
|
596 } |
|
597 |
|
598 static int |
|
599 StringGetChar(void *stream) |
|
600 { |
|
601 char *cPtr = *((char **) stream); |
|
602 |
|
603 if (*cPtr == '\0') { |
|
604 return EOF; |
|
605 } else { |
|
606 *((char **) stream) = cPtr + 1; |
|
607 return (unsigned char) *cPtr; |
|
608 } |
|
609 } |
|
610 |
|
611 static void |
|
612 StringUngetChar(void *stream, int ch) |
|
613 { |
|
614 char *cPtr = *((char **) stream); |
|
615 |
|
616 if (ch != EOF) { |
|
617 *((char **) stream) = cPtr - 1; |
|
618 } |
|
619 } |
|
620 |
|
621 PR_IMPLEMENT(PRInt32) |
|
622 PR_sscanf(const char *buf, const char *fmt, ...) |
|
623 { |
|
624 PRInt32 rv; |
|
625 ScanfState state; |
|
626 |
|
627 state.get = &StringGetChar; |
|
628 state.unget = &StringUngetChar; |
|
629 state.stream = (void *) &buf; |
|
630 va_start(state.ap, fmt); |
|
631 rv = DoScanf(&state, fmt); |
|
632 va_end(state.ap); |
|
633 return rv; |
|
634 } |