js/src/jsonparser.cpp

branch
TOR_BUG_3246
changeset 7
129ffea94266
equal deleted inserted replaced
-1:000000000000 0:7105e0ddda7a
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 * vim: set ts=8 sts=4 et sw=4 tw=99:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7 #include "jsonparser.h"
8
9 #include "mozilla/RangedPtr.h"
10
11 #include <ctype.h>
12
13 #include "jsarray.h"
14 #include "jscompartment.h"
15 #include "jsnum.h"
16 #include "jsprf.h"
17
18 #include "vm/StringBuffer.h"
19
20 #include "jsobjinlines.h"
21
22 using namespace js;
23
24 using mozilla::RangedPtr;
25
26 JSONParser::~JSONParser()
27 {
28 for (size_t i = 0; i < stack.length(); i++) {
29 if (stack[i].state == FinishArrayElement)
30 js_delete(&stack[i].elements());
31 else
32 js_delete(&stack[i].properties());
33 }
34
35 for (size_t i = 0; i < freeElements.length(); i++)
36 js_delete(freeElements[i]);
37
38 for (size_t i = 0; i < freeProperties.length(); i++)
39 js_delete(freeProperties[i]);
40 }
41
42 void
43 JSONParser::trace(JSTracer *trc)
44 {
45 for (size_t i = 0; i < stack.length(); i++) {
46 if (stack[i].state == FinishArrayElement) {
47 ElementVector &elements = stack[i].elements();
48 for (size_t j = 0; j < elements.length(); j++)
49 gc::MarkValueRoot(trc, &elements[j], "JSONParser element");
50 } else {
51 PropertyVector &properties = stack[i].properties();
52 for (size_t j = 0; j < properties.length(); j++) {
53 gc::MarkValueRoot(trc, &properties[j].value, "JSONParser property value");
54 gc::MarkIdRoot(trc, &properties[j].id, "JSONParser property id");
55 }
56 }
57 }
58 }
59
60 void
61 JSONParser::getTextPosition(uint32_t *column, uint32_t *line)
62 {
63 ConstTwoByteChars ptr = begin;
64 uint32_t col = 1;
65 uint32_t row = 1;
66 for (; ptr < current; ptr++) {
67 if (*ptr == '\n' || *ptr == '\r') {
68 ++row;
69 col = 1;
70 // \r\n is treated as a single newline.
71 if (ptr + 1 < current && *ptr == '\r' && *(ptr + 1) == '\n')
72 ++ptr;
73 } else {
74 ++col;
75 }
76 }
77 *column = col;
78 *line = row;
79 }
80
81 void
82 JSONParser::error(const char *msg)
83 {
84 if (errorHandling == RaiseError) {
85 uint32_t column = 1, line = 1;
86 getTextPosition(&column, &line);
87
88 const size_t MaxWidth = sizeof("4294967295");
89 char columnNumber[MaxWidth];
90 JS_snprintf(columnNumber, sizeof columnNumber, "%lu", column);
91 char lineNumber[MaxWidth];
92 JS_snprintf(lineNumber, sizeof lineNumber, "%lu", line);
93
94 JS_ReportErrorNumber(cx, js_GetErrorMessage, nullptr, JSMSG_JSON_BAD_PARSE,
95 msg, lineNumber, columnNumber);
96 }
97 }
98
99 bool
100 JSONParser::errorReturn()
101 {
102 return errorHandling == NoError;
103 }
104
105 template<JSONParser::StringType ST>
106 JSONParser::Token
107 JSONParser::readString()
108 {
109 JS_ASSERT(current < end);
110 JS_ASSERT(*current == '"');
111
112 /*
113 * JSONString:
114 * /^"([^\u0000-\u001F"\\]|\\(["/\\bfnrt]|u[0-9a-fA-F]{4}))*"$/
115 */
116
117 if (++current == end) {
118 error("unterminated string literal");
119 return token(Error);
120 }
121
122 /*
123 * Optimization: if the source contains no escaped characters, create the
124 * string directly from the source text.
125 */
126 RangedPtr<const jschar> start = current;
127 for (; current < end; current++) {
128 if (*current == '"') {
129 size_t length = current - start;
130 current++;
131 JSFlatString *str = (ST == JSONParser::PropertyName)
132 ? AtomizeChars(cx, start.get(), length)
133 : js_NewStringCopyN<CanGC>(cx, start.get(), length);
134 if (!str)
135 return token(OOM);
136 return stringToken(str);
137 }
138
139 if (*current == '\\')
140 break;
141
142 if (*current <= 0x001F) {
143 error("bad control character in string literal");
144 return token(Error);
145 }
146 }
147
148 /*
149 * Slow case: string contains escaped characters. Copy a maximal sequence
150 * of unescaped characters into a temporary buffer, then an escaped
151 * character, and repeat until the entire string is consumed.
152 */
153 StringBuffer buffer(cx);
154 do {
155 if (start < current && !buffer.append(start.get(), current.get()))
156 return token(OOM);
157
158 if (current >= end)
159 break;
160
161 jschar c = *current++;
162 if (c == '"') {
163 JSFlatString *str = (ST == JSONParser::PropertyName)
164 ? buffer.finishAtom()
165 : buffer.finishString();
166 if (!str)
167 return token(OOM);
168 return stringToken(str);
169 }
170
171 if (c != '\\') {
172 --current;
173 error("bad character in string literal");
174 return token(Error);
175 }
176
177 if (current >= end)
178 break;
179
180 switch (*current++) {
181 case '"': c = '"'; break;
182 case '/': c = '/'; break;
183 case '\\': c = '\\'; break;
184 case 'b': c = '\b'; break;
185 case 'f': c = '\f'; break;
186 case 'n': c = '\n'; break;
187 case 'r': c = '\r'; break;
188 case 't': c = '\t'; break;
189
190 case 'u':
191 if (end - current < 4 ||
192 !(JS7_ISHEX(current[0]) &&
193 JS7_ISHEX(current[1]) &&
194 JS7_ISHEX(current[2]) &&
195 JS7_ISHEX(current[3])))
196 {
197 // Point to the first non-hexadecimal character (which may be
198 // missing).
199 if (current == end || !JS7_ISHEX(current[0]))
200 ; // already at correct location
201 else if (current + 1 == end || !JS7_ISHEX(current[1]))
202 current += 1;
203 else if (current + 2 == end || !JS7_ISHEX(current[2]))
204 current += 2;
205 else if (current + 3 == end || !JS7_ISHEX(current[3]))
206 current += 3;
207 else
208 MOZ_ASSUME_UNREACHABLE("logic error determining first erroneous character");
209
210 error("bad Unicode escape");
211 return token(Error);
212 }
213 c = (JS7_UNHEX(current[0]) << 12)
214 | (JS7_UNHEX(current[1]) << 8)
215 | (JS7_UNHEX(current[2]) << 4)
216 | (JS7_UNHEX(current[3]));
217 current += 4;
218 break;
219
220 default:
221 current--;
222 error("bad escaped character");
223 return token(Error);
224 }
225 if (!buffer.append(c))
226 return token(OOM);
227
228 start = current;
229 for (; current < end; current++) {
230 if (*current == '"' || *current == '\\' || *current <= 0x001F)
231 break;
232 }
233 } while (current < end);
234
235 error("unterminated string");
236 return token(Error);
237 }
238
239 JSONParser::Token
240 JSONParser::readNumber()
241 {
242 JS_ASSERT(current < end);
243 JS_ASSERT(JS7_ISDEC(*current) || *current == '-');
244
245 /*
246 * JSONNumber:
247 * /^-?(0|[1-9][0-9]+)(\.[0-9]+)?([eE][\+\-]?[0-9]+)?$/
248 */
249
250 bool negative = *current == '-';
251
252 /* -? */
253 if (negative && ++current == end) {
254 error("no number after minus sign");
255 return token(Error);
256 }
257
258 const RangedPtr<const jschar> digitStart = current;
259
260 /* 0|[1-9][0-9]+ */
261 if (!JS7_ISDEC(*current)) {
262 error("unexpected non-digit");
263 return token(Error);
264 }
265 if (*current++ != '0') {
266 for (; current < end; current++) {
267 if (!JS7_ISDEC(*current))
268 break;
269 }
270 }
271
272 /* Fast path: no fractional or exponent part. */
273 if (current == end || (*current != '.' && *current != 'e' && *current != 'E')) {
274 TwoByteChars chars(digitStart.get(), current - digitStart);
275 if (chars.length() < strlen("9007199254740992")) {
276 // If the decimal number is shorter than the length of 2**53, (the
277 // largest number a double can represent with integral precision),
278 // parse it using a decimal-only parser. This comparison is
279 // conservative but faster than a fully-precise check.
280 double d = ParseDecimalNumber(chars);
281 return numberToken(negative ? -d : d);
282 }
283
284 double d;
285 const jschar *dummy;
286 if (!GetPrefixInteger(cx, digitStart.get(), current.get(), 10, &dummy, &d))
287 return token(OOM);
288 JS_ASSERT(current == dummy);
289 return numberToken(negative ? -d : d);
290 }
291
292 /* (\.[0-9]+)? */
293 if (current < end && *current == '.') {
294 if (++current == end) {
295 error("missing digits after decimal point");
296 return token(Error);
297 }
298 if (!JS7_ISDEC(*current)) {
299 error("unterminated fractional number");
300 return token(Error);
301 }
302 while (++current < end) {
303 if (!JS7_ISDEC(*current))
304 break;
305 }
306 }
307
308 /* ([eE][\+\-]?[0-9]+)? */
309 if (current < end && (*current == 'e' || *current == 'E')) {
310 if (++current == end) {
311 error("missing digits after exponent indicator");
312 return token(Error);
313 }
314 if (*current == '+' || *current == '-') {
315 if (++current == end) {
316 error("missing digits after exponent sign");
317 return token(Error);
318 }
319 }
320 if (!JS7_ISDEC(*current)) {
321 error("exponent part is missing a number");
322 return token(Error);
323 }
324 while (++current < end) {
325 if (!JS7_ISDEC(*current))
326 break;
327 }
328 }
329
330 double d;
331 const jschar *finish;
332 if (!js_strtod(cx, digitStart.get(), current.get(), &finish, &d))
333 return token(OOM);
334 JS_ASSERT(current == finish);
335 return numberToken(negative ? -d : d);
336 }
337
338 static inline bool
339 IsJSONWhitespace(jschar c)
340 {
341 return c == '\t' || c == '\r' || c == '\n' || c == ' ';
342 }
343
344 JSONParser::Token
345 JSONParser::advance()
346 {
347 while (current < end && IsJSONWhitespace(*current))
348 current++;
349 if (current >= end) {
350 error("unexpected end of data");
351 return token(Error);
352 }
353
354 switch (*current) {
355 case '"':
356 return readString<LiteralValue>();
357
358 case '-':
359 case '0':
360 case '1':
361 case '2':
362 case '3':
363 case '4':
364 case '5':
365 case '6':
366 case '7':
367 case '8':
368 case '9':
369 return readNumber();
370
371 case 't':
372 if (end - current < 4 || current[1] != 'r' || current[2] != 'u' || current[3] != 'e') {
373 error("unexpected keyword");
374 return token(Error);
375 }
376 current += 4;
377 return token(True);
378
379 case 'f':
380 if (end - current < 5 ||
381 current[1] != 'a' || current[2] != 'l' || current[3] != 's' || current[4] != 'e')
382 {
383 error("unexpected keyword");
384 return token(Error);
385 }
386 current += 5;
387 return token(False);
388
389 case 'n':
390 if (end - current < 4 || current[1] != 'u' || current[2] != 'l' || current[3] != 'l') {
391 error("unexpected keyword");
392 return token(Error);
393 }
394 current += 4;
395 return token(Null);
396
397 case '[':
398 current++;
399 return token(ArrayOpen);
400 case ']':
401 current++;
402 return token(ArrayClose);
403
404 case '{':
405 current++;
406 return token(ObjectOpen);
407 case '}':
408 current++;
409 return token(ObjectClose);
410
411 case ',':
412 current++;
413 return token(Comma);
414
415 case ':':
416 current++;
417 return token(Colon);
418
419 default:
420 error("unexpected character");
421 return token(Error);
422 }
423 }
424
425 JSONParser::Token
426 JSONParser::advanceAfterObjectOpen()
427 {
428 JS_ASSERT(current[-1] == '{');
429
430 while (current < end && IsJSONWhitespace(*current))
431 current++;
432 if (current >= end) {
433 error("end of data while reading object contents");
434 return token(Error);
435 }
436
437 if (*current == '"')
438 return readString<PropertyName>();
439
440 if (*current == '}') {
441 current++;
442 return token(ObjectClose);
443 }
444
445 error("expected property name or '}'");
446 return token(Error);
447 }
448
449 static inline void
450 AssertPastValue(const RangedPtr<const jschar> current)
451 {
452 /*
453 * We're past an arbitrary JSON value, so the previous character is
454 * *somewhat* constrained, even if this assertion is pretty broad. Don't
455 * knock it till you tried it: this assertion *did* catch a bug once.
456 */
457 JS_ASSERT((current[-1] == 'l' &&
458 current[-2] == 'l' &&
459 current[-3] == 'u' &&
460 current[-4] == 'n') ||
461 (current[-1] == 'e' &&
462 current[-2] == 'u' &&
463 current[-3] == 'r' &&
464 current[-4] == 't') ||
465 (current[-1] == 'e' &&
466 current[-2] == 's' &&
467 current[-3] == 'l' &&
468 current[-4] == 'a' &&
469 current[-5] == 'f') ||
470 current[-1] == '}' ||
471 current[-1] == ']' ||
472 current[-1] == '"' ||
473 JS7_ISDEC(current[-1]));
474 }
475
476 JSONParser::Token
477 JSONParser::advanceAfterArrayElement()
478 {
479 AssertPastValue(current);
480
481 while (current < end && IsJSONWhitespace(*current))
482 current++;
483 if (current >= end) {
484 error("end of data when ',' or ']' was expected");
485 return token(Error);
486 }
487
488 if (*current == ',') {
489 current++;
490 return token(Comma);
491 }
492
493 if (*current == ']') {
494 current++;
495 return token(ArrayClose);
496 }
497
498 error("expected ',' or ']' after array element");
499 return token(Error);
500 }
501
502 JSONParser::Token
503 JSONParser::advancePropertyName()
504 {
505 JS_ASSERT(current[-1] == ',');
506
507 while (current < end && IsJSONWhitespace(*current))
508 current++;
509 if (current >= end) {
510 error("end of data when property name was expected");
511 return token(Error);
512 }
513
514 if (*current == '"')
515 return readString<PropertyName>();
516
517 error("expected double-quoted property name");
518 return token(Error);
519 }
520
521 JSONParser::Token
522 JSONParser::advancePropertyColon()
523 {
524 JS_ASSERT(current[-1] == '"');
525
526 while (current < end && IsJSONWhitespace(*current))
527 current++;
528 if (current >= end) {
529 error("end of data after property name when ':' was expected");
530 return token(Error);
531 }
532
533 if (*current == ':') {
534 current++;
535 return token(Colon);
536 }
537
538 error("expected ':' after property name in object");
539 return token(Error);
540 }
541
542 JSONParser::Token
543 JSONParser::advanceAfterProperty()
544 {
545 AssertPastValue(current);
546
547 while (current < end && IsJSONWhitespace(*current))
548 current++;
549 if (current >= end) {
550 error("end of data after property value in object");
551 return token(Error);
552 }
553
554 if (*current == ',') {
555 current++;
556 return token(Comma);
557 }
558
559 if (*current == '}') {
560 current++;
561 return token(ObjectClose);
562 }
563
564 error("expected ',' or '}' after property value in object");
565 return token(Error);
566 }
567
568 JSObject *
569 JSONParser::createFinishedObject(PropertyVector &properties)
570 {
571 /*
572 * Look for an existing cached type and shape for objects with this set of
573 * properties.
574 */
575 {
576 JSObject *obj = cx->compartment()->types.newTypedObject(cx, properties.begin(),
577 properties.length());
578 if (obj)
579 return obj;
580 }
581
582 /*
583 * Make a new object sized for the given number of properties and fill its
584 * shape in manually.
585 */
586 gc::AllocKind allocKind = gc::GetGCObjectKind(properties.length());
587 RootedObject obj(cx, NewBuiltinClassInstance(cx, &JSObject::class_, allocKind));
588 if (!obj)
589 return nullptr;
590
591 RootedId propid(cx);
592 RootedValue value(cx);
593
594 for (size_t i = 0; i < properties.length(); i++) {
595 propid = properties[i].id;
596 value = properties[i].value;
597 if (!DefineNativeProperty(cx, obj, propid, value, JS_PropertyStub, JS_StrictPropertyStub,
598 JSPROP_ENUMERATE)) {
599 return nullptr;
600 }
601 }
602
603 /*
604 * Try to assign a new type to the object with type information for its
605 * properties, and update the initializer type object cache with this
606 * object's final shape.
607 */
608 cx->compartment()->types.fixObjectType(cx, obj);
609
610 return obj;
611 }
612
613 inline bool
614 JSONParser::finishObject(MutableHandleValue vp, PropertyVector &properties)
615 {
616 JS_ASSERT(&properties == &stack.back().properties());
617
618 JSObject *obj = createFinishedObject(properties);
619 if (!obj)
620 return false;
621
622 vp.setObject(*obj);
623 if (!freeProperties.append(&properties))
624 return false;
625 stack.popBack();
626 return true;
627 }
628
629 inline bool
630 JSONParser::finishArray(MutableHandleValue vp, ElementVector &elements)
631 {
632 JS_ASSERT(&elements == &stack.back().elements());
633
634 JSObject *obj = NewDenseCopiedArray(cx, elements.length(), elements.begin());
635 if (!obj)
636 return false;
637
638 /* Try to assign a new type to the array according to its elements. */
639 cx->compartment()->types.fixArrayType(cx, obj);
640
641 vp.setObject(*obj);
642 if (!freeElements.append(&elements))
643 return false;
644 stack.popBack();
645 return true;
646 }
647
648 bool
649 JSONParser::parse(MutableHandleValue vp)
650 {
651 RootedValue value(cx);
652 JS_ASSERT(stack.empty());
653
654 vp.setUndefined();
655
656 Token token;
657 ParserState state = JSONValue;
658 while (true) {
659 switch (state) {
660 case FinishObjectMember: {
661 PropertyVector &properties = stack.back().properties();
662 properties.back().value = value;
663
664 token = advanceAfterProperty();
665 if (token == ObjectClose) {
666 if (!finishObject(&value, properties))
667 return false;
668 break;
669 }
670 if (token != Comma) {
671 if (token == OOM)
672 return false;
673 if (token != Error)
674 error("expected ',' or '}' after property-value pair in object literal");
675 return errorReturn();
676 }
677 token = advancePropertyName();
678 /* FALL THROUGH */
679 }
680
681 JSONMember:
682 if (token == String) {
683 jsid id = AtomToId(atomValue());
684 PropertyVector &properties = stack.back().properties();
685 if (!properties.append(IdValuePair(id)))
686 return false;
687 token = advancePropertyColon();
688 if (token != Colon) {
689 JS_ASSERT(token == Error);
690 return errorReturn();
691 }
692 goto JSONValue;
693 }
694 if (token == OOM)
695 return false;
696 if (token != Error)
697 error("property names must be double-quoted strings");
698 return errorReturn();
699
700 case FinishArrayElement: {
701 ElementVector &elements = stack.back().elements();
702 if (!elements.append(value.get()))
703 return false;
704 token = advanceAfterArrayElement();
705 if (token == Comma)
706 goto JSONValue;
707 if (token == ArrayClose) {
708 if (!finishArray(&value, elements))
709 return false;
710 break;
711 }
712 JS_ASSERT(token == Error);
713 return errorReturn();
714 }
715
716 JSONValue:
717 case JSONValue:
718 token = advance();
719 JSONValueSwitch:
720 switch (token) {
721 case String:
722 value = stringValue();
723 break;
724 case Number:
725 value = numberValue();
726 break;
727 case True:
728 value = BooleanValue(true);
729 break;
730 case False:
731 value = BooleanValue(false);
732 break;
733 case Null:
734 value = NullValue();
735 break;
736
737 case ArrayOpen: {
738 ElementVector *elements;
739 if (!freeElements.empty()) {
740 elements = freeElements.popCopy();
741 elements->clear();
742 } else {
743 elements = cx->new_<ElementVector>(cx);
744 if (!elements)
745 return false;
746 }
747 if (!stack.append(elements))
748 return false;
749
750 token = advance();
751 if (token == ArrayClose) {
752 if (!finishArray(&value, *elements))
753 return false;
754 break;
755 }
756 goto JSONValueSwitch;
757 }
758
759 case ObjectOpen: {
760 PropertyVector *properties;
761 if (!freeProperties.empty()) {
762 properties = freeProperties.popCopy();
763 properties->clear();
764 } else {
765 properties = cx->new_<PropertyVector>(cx);
766 if (!properties)
767 return false;
768 }
769 if (!stack.append(properties))
770 return false;
771
772 token = advanceAfterObjectOpen();
773 if (token == ObjectClose) {
774 if (!finishObject(&value, *properties))
775 return false;
776 break;
777 }
778 goto JSONMember;
779 }
780
781 case ArrayClose:
782 case ObjectClose:
783 case Colon:
784 case Comma:
785 // Move the current pointer backwards so that the position
786 // reported in the error message is correct.
787 --current;
788 error("unexpected character");
789 return errorReturn();
790
791 case OOM:
792 return false;
793
794 case Error:
795 return errorReturn();
796 }
797 break;
798 }
799
800 if (stack.empty())
801 break;
802 state = stack.back().state;
803 }
804
805 for (; current < end; current++) {
806 if (!IsJSONWhitespace(*current)) {
807 error("unexpected non-whitespace character after JSON data");
808 return errorReturn();
809 }
810 }
811
812 JS_ASSERT(end == current);
813 JS_ASSERT(stack.empty());
814
815 vp.set(value);
816 return true;
817 }

mercurial