Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
michael@0 | 1 | """ |
michael@0 | 2 | Implementation of JSONDecoder |
michael@0 | 3 | """ |
michael@0 | 4 | import re |
michael@0 | 5 | import sys |
michael@0 | 6 | |
michael@0 | 7 | from simplejson.scanner import Scanner, pattern |
michael@0 | 8 | try: |
michael@0 | 9 | from simplejson._speedups import scanstring as c_scanstring |
michael@0 | 10 | except ImportError: |
michael@0 | 11 | pass |
michael@0 | 12 | |
michael@0 | 13 | FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL |
michael@0 | 14 | |
michael@0 | 15 | def _floatconstants(): |
michael@0 | 16 | import struct |
michael@0 | 17 | import sys |
michael@0 | 18 | _BYTES = '7FF80000000000007FF0000000000000'.decode('hex') |
michael@0 | 19 | if sys.byteorder != 'big': |
michael@0 | 20 | _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1] |
michael@0 | 21 | nan, inf = struct.unpack('dd', _BYTES) |
michael@0 | 22 | return nan, inf, -inf |
michael@0 | 23 | |
michael@0 | 24 | NaN, PosInf, NegInf = _floatconstants() |
michael@0 | 25 | |
michael@0 | 26 | |
michael@0 | 27 | def linecol(doc, pos): |
michael@0 | 28 | lineno = doc.count('\n', 0, pos) + 1 |
michael@0 | 29 | if lineno == 1: |
michael@0 | 30 | colno = pos |
michael@0 | 31 | else: |
michael@0 | 32 | colno = pos - doc.rindex('\n', 0, pos) |
michael@0 | 33 | return lineno, colno |
michael@0 | 34 | |
michael@0 | 35 | |
michael@0 | 36 | def errmsg(msg, doc, pos, end=None): |
michael@0 | 37 | lineno, colno = linecol(doc, pos) |
michael@0 | 38 | if end is None: |
michael@0 | 39 | return '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos) |
michael@0 | 40 | endlineno, endcolno = linecol(doc, end) |
michael@0 | 41 | return '%s: line %d column %d - line %d column %d (char %d - %d)' % ( |
michael@0 | 42 | msg, lineno, colno, endlineno, endcolno, pos, end) |
michael@0 | 43 | |
michael@0 | 44 | |
michael@0 | 45 | _CONSTANTS = { |
michael@0 | 46 | '-Infinity': NegInf, |
michael@0 | 47 | 'Infinity': PosInf, |
michael@0 | 48 | 'NaN': NaN, |
michael@0 | 49 | 'true': True, |
michael@0 | 50 | 'false': False, |
michael@0 | 51 | 'null': None, |
michael@0 | 52 | } |
michael@0 | 53 | |
michael@0 | 54 | def JSONConstant(match, context, c=_CONSTANTS): |
michael@0 | 55 | s = match.group(0) |
michael@0 | 56 | fn = getattr(context, 'parse_constant', None) |
michael@0 | 57 | if fn is None: |
michael@0 | 58 | rval = c[s] |
michael@0 | 59 | else: |
michael@0 | 60 | rval = fn(s) |
michael@0 | 61 | return rval, None |
michael@0 | 62 | pattern('(-?Infinity|NaN|true|false|null)')(JSONConstant) |
michael@0 | 63 | |
michael@0 | 64 | |
michael@0 | 65 | def JSONNumber(match, context): |
michael@0 | 66 | match = JSONNumber.regex.match(match.string, *match.span()) |
michael@0 | 67 | integer, frac, exp = match.groups() |
michael@0 | 68 | if frac or exp: |
michael@0 | 69 | fn = getattr(context, 'parse_float', None) or float |
michael@0 | 70 | res = fn(integer + (frac or '') + (exp or '')) |
michael@0 | 71 | else: |
michael@0 | 72 | fn = getattr(context, 'parse_int', None) or int |
michael@0 | 73 | res = fn(integer) |
michael@0 | 74 | return res, None |
michael@0 | 75 | pattern(r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?')(JSONNumber) |
michael@0 | 76 | |
michael@0 | 77 | |
michael@0 | 78 | STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) |
michael@0 | 79 | BACKSLASH = { |
michael@0 | 80 | '"': u'"', '\\': u'\\', '/': u'/', |
michael@0 | 81 | 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t', |
michael@0 | 82 | } |
michael@0 | 83 | |
michael@0 | 84 | DEFAULT_ENCODING = "utf-8" |
michael@0 | 85 | |
michael@0 | 86 | def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match): |
michael@0 | 87 | if encoding is None: |
michael@0 | 88 | encoding = DEFAULT_ENCODING |
michael@0 | 89 | chunks = [] |
michael@0 | 90 | _append = chunks.append |
michael@0 | 91 | begin = end - 1 |
michael@0 | 92 | while 1: |
michael@0 | 93 | chunk = _m(s, end) |
michael@0 | 94 | if chunk is None: |
michael@0 | 95 | raise ValueError( |
michael@0 | 96 | errmsg("Unterminated string starting at", s, begin)) |
michael@0 | 97 | end = chunk.end() |
michael@0 | 98 | content, terminator = chunk.groups() |
michael@0 | 99 | if content: |
michael@0 | 100 | if not isinstance(content, unicode): |
michael@0 | 101 | content = unicode(content, encoding) |
michael@0 | 102 | _append(content) |
michael@0 | 103 | if terminator == '"': |
michael@0 | 104 | break |
michael@0 | 105 | elif terminator != '\\': |
michael@0 | 106 | if strict: |
michael@0 | 107 | raise ValueError(errmsg("Invalid control character %r at", s, end)) |
michael@0 | 108 | else: |
michael@0 | 109 | _append(terminator) |
michael@0 | 110 | continue |
michael@0 | 111 | try: |
michael@0 | 112 | esc = s[end] |
michael@0 | 113 | except IndexError: |
michael@0 | 114 | raise ValueError( |
michael@0 | 115 | errmsg("Unterminated string starting at", s, begin)) |
michael@0 | 116 | if esc != 'u': |
michael@0 | 117 | try: |
michael@0 | 118 | m = _b[esc] |
michael@0 | 119 | except KeyError: |
michael@0 | 120 | raise ValueError( |
michael@0 | 121 | errmsg("Invalid \\escape: %r" % (esc,), s, end)) |
michael@0 | 122 | end += 1 |
michael@0 | 123 | else: |
michael@0 | 124 | esc = s[end + 1:end + 5] |
michael@0 | 125 | next_end = end + 5 |
michael@0 | 126 | msg = "Invalid \\uXXXX escape" |
michael@0 | 127 | try: |
michael@0 | 128 | if len(esc) != 4: |
michael@0 | 129 | raise ValueError |
michael@0 | 130 | uni = int(esc, 16) |
michael@0 | 131 | if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535: |
michael@0 | 132 | msg = "Invalid \\uXXXX\\uXXXX surrogate pair" |
michael@0 | 133 | if not s[end + 5:end + 7] == '\\u': |
michael@0 | 134 | raise ValueError |
michael@0 | 135 | esc2 = s[end + 7:end + 11] |
michael@0 | 136 | if len(esc2) != 4: |
michael@0 | 137 | raise ValueError |
michael@0 | 138 | uni2 = int(esc2, 16) |
michael@0 | 139 | uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) |
michael@0 | 140 | next_end += 6 |
michael@0 | 141 | m = unichr(uni) |
michael@0 | 142 | except ValueError: |
michael@0 | 143 | raise ValueError(errmsg(msg, s, end)) |
michael@0 | 144 | end = next_end |
michael@0 | 145 | _append(m) |
michael@0 | 146 | return u''.join(chunks), end |
michael@0 | 147 | |
michael@0 | 148 | |
michael@0 | 149 | # Use speedup |
michael@0 | 150 | try: |
michael@0 | 151 | scanstring = c_scanstring |
michael@0 | 152 | except NameError: |
michael@0 | 153 | scanstring = py_scanstring |
michael@0 | 154 | |
michael@0 | 155 | def JSONString(match, context): |
michael@0 | 156 | encoding = getattr(context, 'encoding', None) |
michael@0 | 157 | strict = getattr(context, 'strict', True) |
michael@0 | 158 | return scanstring(match.string, match.end(), encoding, strict) |
michael@0 | 159 | pattern(r'"')(JSONString) |
michael@0 | 160 | |
michael@0 | 161 | |
michael@0 | 162 | WHITESPACE = re.compile(r'\s*', FLAGS) |
michael@0 | 163 | |
michael@0 | 164 | def JSONObject(match, context, _w=WHITESPACE.match): |
michael@0 | 165 | pairs = {} |
michael@0 | 166 | s = match.string |
michael@0 | 167 | end = _w(s, match.end()).end() |
michael@0 | 168 | nextchar = s[end:end + 1] |
michael@0 | 169 | # Trivial empty object |
michael@0 | 170 | if nextchar == '}': |
michael@0 | 171 | return pairs, end + 1 |
michael@0 | 172 | if nextchar != '"': |
michael@0 | 173 | raise ValueError(errmsg("Expecting property name", s, end)) |
michael@0 | 174 | end += 1 |
michael@0 | 175 | encoding = getattr(context, 'encoding', None) |
michael@0 | 176 | strict = getattr(context, 'strict', True) |
michael@0 | 177 | iterscan = JSONScanner.iterscan |
michael@0 | 178 | while True: |
michael@0 | 179 | key, end = scanstring(s, end, encoding, strict) |
michael@0 | 180 | end = _w(s, end).end() |
michael@0 | 181 | if s[end:end + 1] != ':': |
michael@0 | 182 | raise ValueError(errmsg("Expecting : delimiter", s, end)) |
michael@0 | 183 | end = _w(s, end + 1).end() |
michael@0 | 184 | try: |
michael@0 | 185 | value, end = iterscan(s, idx=end, context=context).next() |
michael@0 | 186 | except StopIteration: |
michael@0 | 187 | raise ValueError(errmsg("Expecting object", s, end)) |
michael@0 | 188 | pairs[key] = value |
michael@0 | 189 | end = _w(s, end).end() |
michael@0 | 190 | nextchar = s[end:end + 1] |
michael@0 | 191 | end += 1 |
michael@0 | 192 | if nextchar == '}': |
michael@0 | 193 | break |
michael@0 | 194 | if nextchar != ',': |
michael@0 | 195 | raise ValueError(errmsg("Expecting , delimiter", s, end - 1)) |
michael@0 | 196 | end = _w(s, end).end() |
michael@0 | 197 | nextchar = s[end:end + 1] |
michael@0 | 198 | end += 1 |
michael@0 | 199 | if nextchar != '"': |
michael@0 | 200 | raise ValueError(errmsg("Expecting property name", s, end - 1)) |
michael@0 | 201 | object_hook = getattr(context, 'object_hook', None) |
michael@0 | 202 | if object_hook is not None: |
michael@0 | 203 | pairs = object_hook(pairs) |
michael@0 | 204 | return pairs, end |
michael@0 | 205 | pattern(r'{')(JSONObject) |
michael@0 | 206 | |
michael@0 | 207 | |
michael@0 | 208 | def JSONArray(match, context, _w=WHITESPACE.match): |
michael@0 | 209 | values = [] |
michael@0 | 210 | s = match.string |
michael@0 | 211 | end = _w(s, match.end()).end() |
michael@0 | 212 | # Look-ahead for trivial empty array |
michael@0 | 213 | nextchar = s[end:end + 1] |
michael@0 | 214 | if nextchar == ']': |
michael@0 | 215 | return values, end + 1 |
michael@0 | 216 | iterscan = JSONScanner.iterscan |
michael@0 | 217 | while True: |
michael@0 | 218 | try: |
michael@0 | 219 | value, end = iterscan(s, idx=end, context=context).next() |
michael@0 | 220 | except StopIteration: |
michael@0 | 221 | raise ValueError(errmsg("Expecting object", s, end)) |
michael@0 | 222 | values.append(value) |
michael@0 | 223 | end = _w(s, end).end() |
michael@0 | 224 | nextchar = s[end:end + 1] |
michael@0 | 225 | end += 1 |
michael@0 | 226 | if nextchar == ']': |
michael@0 | 227 | break |
michael@0 | 228 | if nextchar != ',': |
michael@0 | 229 | raise ValueError(errmsg("Expecting , delimiter", s, end)) |
michael@0 | 230 | end = _w(s, end).end() |
michael@0 | 231 | return values, end |
michael@0 | 232 | pattern(r'\[')(JSONArray) |
michael@0 | 233 | |
michael@0 | 234 | |
michael@0 | 235 | ANYTHING = [ |
michael@0 | 236 | JSONObject, |
michael@0 | 237 | JSONArray, |
michael@0 | 238 | JSONString, |
michael@0 | 239 | JSONConstant, |
michael@0 | 240 | JSONNumber, |
michael@0 | 241 | ] |
michael@0 | 242 | |
michael@0 | 243 | JSONScanner = Scanner(ANYTHING) |
michael@0 | 244 | |
michael@0 | 245 | |
michael@0 | 246 | class JSONDecoder(object): |
michael@0 | 247 | """ |
michael@0 | 248 | Simple JSON <http://json.org> decoder |
michael@0 | 249 | |
michael@0 | 250 | Performs the following translations in decoding by default: |
michael@0 | 251 | |
michael@0 | 252 | +---------------+-------------------+ |
michael@0 | 253 | | JSON | Python | |
michael@0 | 254 | +===============+===================+ |
michael@0 | 255 | | object | dict | |
michael@0 | 256 | +---------------+-------------------+ |
michael@0 | 257 | | array | list | |
michael@0 | 258 | +---------------+-------------------+ |
michael@0 | 259 | | string | unicode | |
michael@0 | 260 | +---------------+-------------------+ |
michael@0 | 261 | | number (int) | int, long | |
michael@0 | 262 | +---------------+-------------------+ |
michael@0 | 263 | | number (real) | float | |
michael@0 | 264 | +---------------+-------------------+ |
michael@0 | 265 | | true | True | |
michael@0 | 266 | +---------------+-------------------+ |
michael@0 | 267 | | false | False | |
michael@0 | 268 | +---------------+-------------------+ |
michael@0 | 269 | | null | None | |
michael@0 | 270 | +---------------+-------------------+ |
michael@0 | 271 | |
michael@0 | 272 | It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as |
michael@0 | 273 | their corresponding ``float`` values, which is outside the JSON spec. |
michael@0 | 274 | """ |
michael@0 | 275 | |
michael@0 | 276 | _scanner = Scanner(ANYTHING) |
michael@0 | 277 | __all__ = ['__init__', 'decode', 'raw_decode'] |
michael@0 | 278 | |
michael@0 | 279 | def __init__(self, encoding=None, object_hook=None, parse_float=None, |
michael@0 | 280 | parse_int=None, parse_constant=None, strict=True): |
michael@0 | 281 | """ |
michael@0 | 282 | ``encoding`` determines the encoding used to interpret any ``str`` |
michael@0 | 283 | objects decoded by this instance (utf-8 by default). It has no |
michael@0 | 284 | effect when decoding ``unicode`` objects. |
michael@0 | 285 | |
michael@0 | 286 | Note that currently only encodings that are a superset of ASCII work, |
michael@0 | 287 | strings of other encodings should be passed in as ``unicode``. |
michael@0 | 288 | |
michael@0 | 289 | ``object_hook``, if specified, will be called with the result |
michael@0 | 290 | of every JSON object decoded and its return value will be used in |
michael@0 | 291 | place of the given ``dict``. This can be used to provide custom |
michael@0 | 292 | deserializations (e.g. to support JSON-RPC class hinting). |
michael@0 | 293 | |
michael@0 | 294 | ``parse_float``, if specified, will be called with the string |
michael@0 | 295 | of every JSON float to be decoded. By default this is equivalent to |
michael@0 | 296 | float(num_str). This can be used to use another datatype or parser |
michael@0 | 297 | for JSON floats (e.g. decimal.Decimal). |
michael@0 | 298 | |
michael@0 | 299 | ``parse_int``, if specified, will be called with the string |
michael@0 | 300 | of every JSON int to be decoded. By default this is equivalent to |
michael@0 | 301 | int(num_str). This can be used to use another datatype or parser |
michael@0 | 302 | for JSON integers (e.g. float). |
michael@0 | 303 | |
michael@0 | 304 | ``parse_constant``, if specified, will be called with one of the |
michael@0 | 305 | following strings: -Infinity, Infinity, NaN, null, true, false. |
michael@0 | 306 | This can be used to raise an exception if invalid JSON numbers |
michael@0 | 307 | are encountered. |
michael@0 | 308 | """ |
michael@0 | 309 | self.encoding = encoding |
michael@0 | 310 | self.object_hook = object_hook |
michael@0 | 311 | self.parse_float = parse_float |
michael@0 | 312 | self.parse_int = parse_int |
michael@0 | 313 | self.parse_constant = parse_constant |
michael@0 | 314 | self.strict = strict |
michael@0 | 315 | |
michael@0 | 316 | def decode(self, s, _w=WHITESPACE.match): |
michael@0 | 317 | """ |
michael@0 | 318 | Return the Python representation of ``s`` (a ``str`` or ``unicode`` |
michael@0 | 319 | instance containing a JSON document) |
michael@0 | 320 | """ |
michael@0 | 321 | obj, end = self.raw_decode(s, idx=_w(s, 0).end()) |
michael@0 | 322 | end = _w(s, end).end() |
michael@0 | 323 | if end != len(s): |
michael@0 | 324 | raise ValueError(errmsg("Extra data", s, end, len(s))) |
michael@0 | 325 | return obj |
michael@0 | 326 | |
michael@0 | 327 | def raw_decode(self, s, **kw): |
michael@0 | 328 | """ |
michael@0 | 329 | Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning |
michael@0 | 330 | with a JSON document) and return a 2-tuple of the Python |
michael@0 | 331 | representation and the index in ``s`` where the document ended. |
michael@0 | 332 | |
michael@0 | 333 | This can be used to decode a JSON document from a string that may |
michael@0 | 334 | have extraneous data at the end. |
michael@0 | 335 | """ |
michael@0 | 336 | kw.setdefault('context', self) |
michael@0 | 337 | try: |
michael@0 | 338 | obj, end = self._scanner.iterscan(s, **kw).next() |
michael@0 | 339 | except StopIteration: |
michael@0 | 340 | raise ValueError("No JSON object could be decoded") |
michael@0 | 341 | return obj, end |
michael@0 | 342 | |
michael@0 | 343 | __all__ = ['JSONDecoder'] |