|
1 """ |
|
2 Implementation of JSONDecoder |
|
3 """ |
|
4 import re |
|
5 import sys |
|
6 |
|
7 from simplejson.scanner import Scanner, pattern |
|
8 try: |
|
9 from simplejson._speedups import scanstring as c_scanstring |
|
10 except ImportError: |
|
11 pass |
|
12 |
|
13 FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL |
|
14 |
|
15 def _floatconstants(): |
|
16 import struct |
|
17 import sys |
|
18 _BYTES = '7FF80000000000007FF0000000000000'.decode('hex') |
|
19 if sys.byteorder != 'big': |
|
20 _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1] |
|
21 nan, inf = struct.unpack('dd', _BYTES) |
|
22 return nan, inf, -inf |
|
23 |
|
24 NaN, PosInf, NegInf = _floatconstants() |
|
25 |
|
26 |
|
27 def linecol(doc, pos): |
|
28 lineno = doc.count('\n', 0, pos) + 1 |
|
29 if lineno == 1: |
|
30 colno = pos |
|
31 else: |
|
32 colno = pos - doc.rindex('\n', 0, pos) |
|
33 return lineno, colno |
|
34 |
|
35 |
|
36 def errmsg(msg, doc, pos, end=None): |
|
37 lineno, colno = linecol(doc, pos) |
|
38 if end is None: |
|
39 return '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos) |
|
40 endlineno, endcolno = linecol(doc, end) |
|
41 return '%s: line %d column %d - line %d column %d (char %d - %d)' % ( |
|
42 msg, lineno, colno, endlineno, endcolno, pos, end) |
|
43 |
|
44 |
|
45 _CONSTANTS = { |
|
46 '-Infinity': NegInf, |
|
47 'Infinity': PosInf, |
|
48 'NaN': NaN, |
|
49 'true': True, |
|
50 'false': False, |
|
51 'null': None, |
|
52 } |
|
53 |
|
54 def JSONConstant(match, context, c=_CONSTANTS): |
|
55 s = match.group(0) |
|
56 fn = getattr(context, 'parse_constant', None) |
|
57 if fn is None: |
|
58 rval = c[s] |
|
59 else: |
|
60 rval = fn(s) |
|
61 return rval, None |
|
62 pattern('(-?Infinity|NaN|true|false|null)')(JSONConstant) |
|
63 |
|
64 |
|
65 def JSONNumber(match, context): |
|
66 match = JSONNumber.regex.match(match.string, *match.span()) |
|
67 integer, frac, exp = match.groups() |
|
68 if frac or exp: |
|
69 fn = getattr(context, 'parse_float', None) or float |
|
70 res = fn(integer + (frac or '') + (exp or '')) |
|
71 else: |
|
72 fn = getattr(context, 'parse_int', None) or int |
|
73 res = fn(integer) |
|
74 return res, None |
|
75 pattern(r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?')(JSONNumber) |
|
76 |
|
77 |
|
78 STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) |
|
79 BACKSLASH = { |
|
80 '"': u'"', '\\': u'\\', '/': u'/', |
|
81 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t', |
|
82 } |
|
83 |
|
84 DEFAULT_ENCODING = "utf-8" |
|
85 |
|
86 def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match): |
|
87 if encoding is None: |
|
88 encoding = DEFAULT_ENCODING |
|
89 chunks = [] |
|
90 _append = chunks.append |
|
91 begin = end - 1 |
|
92 while 1: |
|
93 chunk = _m(s, end) |
|
94 if chunk is None: |
|
95 raise ValueError( |
|
96 errmsg("Unterminated string starting at", s, begin)) |
|
97 end = chunk.end() |
|
98 content, terminator = chunk.groups() |
|
99 if content: |
|
100 if not isinstance(content, unicode): |
|
101 content = unicode(content, encoding) |
|
102 _append(content) |
|
103 if terminator == '"': |
|
104 break |
|
105 elif terminator != '\\': |
|
106 if strict: |
|
107 raise ValueError(errmsg("Invalid control character %r at", s, end)) |
|
108 else: |
|
109 _append(terminator) |
|
110 continue |
|
111 try: |
|
112 esc = s[end] |
|
113 except IndexError: |
|
114 raise ValueError( |
|
115 errmsg("Unterminated string starting at", s, begin)) |
|
116 if esc != 'u': |
|
117 try: |
|
118 m = _b[esc] |
|
119 except KeyError: |
|
120 raise ValueError( |
|
121 errmsg("Invalid \\escape: %r" % (esc,), s, end)) |
|
122 end += 1 |
|
123 else: |
|
124 esc = s[end + 1:end + 5] |
|
125 next_end = end + 5 |
|
126 msg = "Invalid \\uXXXX escape" |
|
127 try: |
|
128 if len(esc) != 4: |
|
129 raise ValueError |
|
130 uni = int(esc, 16) |
|
131 if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535: |
|
132 msg = "Invalid \\uXXXX\\uXXXX surrogate pair" |
|
133 if not s[end + 5:end + 7] == '\\u': |
|
134 raise ValueError |
|
135 esc2 = s[end + 7:end + 11] |
|
136 if len(esc2) != 4: |
|
137 raise ValueError |
|
138 uni2 = int(esc2, 16) |
|
139 uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) |
|
140 next_end += 6 |
|
141 m = unichr(uni) |
|
142 except ValueError: |
|
143 raise ValueError(errmsg(msg, s, end)) |
|
144 end = next_end |
|
145 _append(m) |
|
146 return u''.join(chunks), end |
|
147 |
|
148 |
|
149 # Use speedup |
|
150 try: |
|
151 scanstring = c_scanstring |
|
152 except NameError: |
|
153 scanstring = py_scanstring |
|
154 |
|
155 def JSONString(match, context): |
|
156 encoding = getattr(context, 'encoding', None) |
|
157 strict = getattr(context, 'strict', True) |
|
158 return scanstring(match.string, match.end(), encoding, strict) |
|
159 pattern(r'"')(JSONString) |
|
160 |
|
161 |
|
162 WHITESPACE = re.compile(r'\s*', FLAGS) |
|
163 |
|
164 def JSONObject(match, context, _w=WHITESPACE.match): |
|
165 pairs = {} |
|
166 s = match.string |
|
167 end = _w(s, match.end()).end() |
|
168 nextchar = s[end:end + 1] |
|
169 # Trivial empty object |
|
170 if nextchar == '}': |
|
171 return pairs, end + 1 |
|
172 if nextchar != '"': |
|
173 raise ValueError(errmsg("Expecting property name", s, end)) |
|
174 end += 1 |
|
175 encoding = getattr(context, 'encoding', None) |
|
176 strict = getattr(context, 'strict', True) |
|
177 iterscan = JSONScanner.iterscan |
|
178 while True: |
|
179 key, end = scanstring(s, end, encoding, strict) |
|
180 end = _w(s, end).end() |
|
181 if s[end:end + 1] != ':': |
|
182 raise ValueError(errmsg("Expecting : delimiter", s, end)) |
|
183 end = _w(s, end + 1).end() |
|
184 try: |
|
185 value, end = iterscan(s, idx=end, context=context).next() |
|
186 except StopIteration: |
|
187 raise ValueError(errmsg("Expecting object", s, end)) |
|
188 pairs[key] = value |
|
189 end = _w(s, end).end() |
|
190 nextchar = s[end:end + 1] |
|
191 end += 1 |
|
192 if nextchar == '}': |
|
193 break |
|
194 if nextchar != ',': |
|
195 raise ValueError(errmsg("Expecting , delimiter", s, end - 1)) |
|
196 end = _w(s, end).end() |
|
197 nextchar = s[end:end + 1] |
|
198 end += 1 |
|
199 if nextchar != '"': |
|
200 raise ValueError(errmsg("Expecting property name", s, end - 1)) |
|
201 object_hook = getattr(context, 'object_hook', None) |
|
202 if object_hook is not None: |
|
203 pairs = object_hook(pairs) |
|
204 return pairs, end |
|
205 pattern(r'{')(JSONObject) |
|
206 |
|
207 |
|
208 def JSONArray(match, context, _w=WHITESPACE.match): |
|
209 values = [] |
|
210 s = match.string |
|
211 end = _w(s, match.end()).end() |
|
212 # Look-ahead for trivial empty array |
|
213 nextchar = s[end:end + 1] |
|
214 if nextchar == ']': |
|
215 return values, end + 1 |
|
216 iterscan = JSONScanner.iterscan |
|
217 while True: |
|
218 try: |
|
219 value, end = iterscan(s, idx=end, context=context).next() |
|
220 except StopIteration: |
|
221 raise ValueError(errmsg("Expecting object", s, end)) |
|
222 values.append(value) |
|
223 end = _w(s, end).end() |
|
224 nextchar = s[end:end + 1] |
|
225 end += 1 |
|
226 if nextchar == ']': |
|
227 break |
|
228 if nextchar != ',': |
|
229 raise ValueError(errmsg("Expecting , delimiter", s, end)) |
|
230 end = _w(s, end).end() |
|
231 return values, end |
|
232 pattern(r'\[')(JSONArray) |
|
233 |
|
234 |
|
235 ANYTHING = [ |
|
236 JSONObject, |
|
237 JSONArray, |
|
238 JSONString, |
|
239 JSONConstant, |
|
240 JSONNumber, |
|
241 ] |
|
242 |
|
243 JSONScanner = Scanner(ANYTHING) |
|
244 |
|
245 |
|
246 class JSONDecoder(object): |
|
247 """ |
|
248 Simple JSON <http://json.org> decoder |
|
249 |
|
250 Performs the following translations in decoding by default: |
|
251 |
|
252 +---------------+-------------------+ |
|
253 | JSON | Python | |
|
254 +===============+===================+ |
|
255 | object | dict | |
|
256 +---------------+-------------------+ |
|
257 | array | list | |
|
258 +---------------+-------------------+ |
|
259 | string | unicode | |
|
260 +---------------+-------------------+ |
|
261 | number (int) | int, long | |
|
262 +---------------+-------------------+ |
|
263 | number (real) | float | |
|
264 +---------------+-------------------+ |
|
265 | true | True | |
|
266 +---------------+-------------------+ |
|
267 | false | False | |
|
268 +---------------+-------------------+ |
|
269 | null | None | |
|
270 +---------------+-------------------+ |
|
271 |
|
272 It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as |
|
273 their corresponding ``float`` values, which is outside the JSON spec. |
|
274 """ |
|
275 |
|
276 _scanner = Scanner(ANYTHING) |
|
277 __all__ = ['__init__', 'decode', 'raw_decode'] |
|
278 |
|
279 def __init__(self, encoding=None, object_hook=None, parse_float=None, |
|
280 parse_int=None, parse_constant=None, strict=True): |
|
281 """ |
|
282 ``encoding`` determines the encoding used to interpret any ``str`` |
|
283 objects decoded by this instance (utf-8 by default). It has no |
|
284 effect when decoding ``unicode`` objects. |
|
285 |
|
286 Note that currently only encodings that are a superset of ASCII work, |
|
287 strings of other encodings should be passed in as ``unicode``. |
|
288 |
|
289 ``object_hook``, if specified, will be called with the result |
|
290 of every JSON object decoded and its return value will be used in |
|
291 place of the given ``dict``. This can be used to provide custom |
|
292 deserializations (e.g. to support JSON-RPC class hinting). |
|
293 |
|
294 ``parse_float``, if specified, will be called with the string |
|
295 of every JSON float to be decoded. By default this is equivalent to |
|
296 float(num_str). This can be used to use another datatype or parser |
|
297 for JSON floats (e.g. decimal.Decimal). |
|
298 |
|
299 ``parse_int``, if specified, will be called with the string |
|
300 of every JSON int to be decoded. By default this is equivalent to |
|
301 int(num_str). This can be used to use another datatype or parser |
|
302 for JSON integers (e.g. float). |
|
303 |
|
304 ``parse_constant``, if specified, will be called with one of the |
|
305 following strings: -Infinity, Infinity, NaN, null, true, false. |
|
306 This can be used to raise an exception if invalid JSON numbers |
|
307 are encountered. |
|
308 """ |
|
309 self.encoding = encoding |
|
310 self.object_hook = object_hook |
|
311 self.parse_float = parse_float |
|
312 self.parse_int = parse_int |
|
313 self.parse_constant = parse_constant |
|
314 self.strict = strict |
|
315 |
|
316 def decode(self, s, _w=WHITESPACE.match): |
|
317 """ |
|
318 Return the Python representation of ``s`` (a ``str`` or ``unicode`` |
|
319 instance containing a JSON document) |
|
320 """ |
|
321 obj, end = self.raw_decode(s, idx=_w(s, 0).end()) |
|
322 end = _w(s, end).end() |
|
323 if end != len(s): |
|
324 raise ValueError(errmsg("Extra data", s, end, len(s))) |
|
325 return obj |
|
326 |
|
327 def raw_decode(self, s, **kw): |
|
328 """ |
|
329 Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning |
|
330 with a JSON document) and return a 2-tuple of the Python |
|
331 representation and the index in ``s`` where the document ended. |
|
332 |
|
333 This can be used to decode a JSON document from a string that may |
|
334 have extraneous data at the end. |
|
335 """ |
|
336 kw.setdefault('context', self) |
|
337 try: |
|
338 obj, end = self._scanner.iterscan(s, **kw).next() |
|
339 except StopIteration: |
|
340 raise ValueError("No JSON object could be decoded") |
|
341 return obj, end |
|
342 |
|
343 __all__ = ['JSONDecoder'] |