addon-sdk/source/python-lib/simplejson/decoder.py

Thu, 15 Jan 2015 15:59:08 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 15 Jan 2015 15:59:08 +0100
branch
TOR_BUG_9701
changeset 10
ac0c01689b40
permissions
-rw-r--r--

Implement a real Private Browsing Mode condition by changing the API/ABI;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

michael@0 1 """
michael@0 2 Implementation of JSONDecoder
michael@0 3 """
michael@0 4 import re
michael@0 5 import sys
michael@0 6
michael@0 7 from simplejson.scanner import Scanner, pattern
michael@0 8 try:
michael@0 9 from simplejson._speedups import scanstring as c_scanstring
michael@0 10 except ImportError:
michael@0 11 pass
michael@0 12
michael@0 13 FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
michael@0 14
michael@0 15 def _floatconstants():
michael@0 16 import struct
michael@0 17 import sys
michael@0 18 _BYTES = '7FF80000000000007FF0000000000000'.decode('hex')
michael@0 19 if sys.byteorder != 'big':
michael@0 20 _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1]
michael@0 21 nan, inf = struct.unpack('dd', _BYTES)
michael@0 22 return nan, inf, -inf
michael@0 23
michael@0 24 NaN, PosInf, NegInf = _floatconstants()
michael@0 25
michael@0 26
michael@0 27 def linecol(doc, pos):
michael@0 28 lineno = doc.count('\n', 0, pos) + 1
michael@0 29 if lineno == 1:
michael@0 30 colno = pos
michael@0 31 else:
michael@0 32 colno = pos - doc.rindex('\n', 0, pos)
michael@0 33 return lineno, colno
michael@0 34
michael@0 35
michael@0 36 def errmsg(msg, doc, pos, end=None):
michael@0 37 lineno, colno = linecol(doc, pos)
michael@0 38 if end is None:
michael@0 39 return '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos)
michael@0 40 endlineno, endcolno = linecol(doc, end)
michael@0 41 return '%s: line %d column %d - line %d column %d (char %d - %d)' % (
michael@0 42 msg, lineno, colno, endlineno, endcolno, pos, end)
michael@0 43
michael@0 44
michael@0 45 _CONSTANTS = {
michael@0 46 '-Infinity': NegInf,
michael@0 47 'Infinity': PosInf,
michael@0 48 'NaN': NaN,
michael@0 49 'true': True,
michael@0 50 'false': False,
michael@0 51 'null': None,
michael@0 52 }
michael@0 53
michael@0 54 def JSONConstant(match, context, c=_CONSTANTS):
michael@0 55 s = match.group(0)
michael@0 56 fn = getattr(context, 'parse_constant', None)
michael@0 57 if fn is None:
michael@0 58 rval = c[s]
michael@0 59 else:
michael@0 60 rval = fn(s)
michael@0 61 return rval, None
michael@0 62 pattern('(-?Infinity|NaN|true|false|null)')(JSONConstant)
michael@0 63
michael@0 64
michael@0 65 def JSONNumber(match, context):
michael@0 66 match = JSONNumber.regex.match(match.string, *match.span())
michael@0 67 integer, frac, exp = match.groups()
michael@0 68 if frac or exp:
michael@0 69 fn = getattr(context, 'parse_float', None) or float
michael@0 70 res = fn(integer + (frac or '') + (exp or ''))
michael@0 71 else:
michael@0 72 fn = getattr(context, 'parse_int', None) or int
michael@0 73 res = fn(integer)
michael@0 74 return res, None
michael@0 75 pattern(r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?')(JSONNumber)
michael@0 76
michael@0 77
michael@0 78 STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
michael@0 79 BACKSLASH = {
michael@0 80 '"': u'"', '\\': u'\\', '/': u'/',
michael@0 81 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t',
michael@0 82 }
michael@0 83
michael@0 84 DEFAULT_ENCODING = "utf-8"
michael@0 85
michael@0 86 def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match):
michael@0 87 if encoding is None:
michael@0 88 encoding = DEFAULT_ENCODING
michael@0 89 chunks = []
michael@0 90 _append = chunks.append
michael@0 91 begin = end - 1
michael@0 92 while 1:
michael@0 93 chunk = _m(s, end)
michael@0 94 if chunk is None:
michael@0 95 raise ValueError(
michael@0 96 errmsg("Unterminated string starting at", s, begin))
michael@0 97 end = chunk.end()
michael@0 98 content, terminator = chunk.groups()
michael@0 99 if content:
michael@0 100 if not isinstance(content, unicode):
michael@0 101 content = unicode(content, encoding)
michael@0 102 _append(content)
michael@0 103 if terminator == '"':
michael@0 104 break
michael@0 105 elif terminator != '\\':
michael@0 106 if strict:
michael@0 107 raise ValueError(errmsg("Invalid control character %r at", s, end))
michael@0 108 else:
michael@0 109 _append(terminator)
michael@0 110 continue
michael@0 111 try:
michael@0 112 esc = s[end]
michael@0 113 except IndexError:
michael@0 114 raise ValueError(
michael@0 115 errmsg("Unterminated string starting at", s, begin))
michael@0 116 if esc != 'u':
michael@0 117 try:
michael@0 118 m = _b[esc]
michael@0 119 except KeyError:
michael@0 120 raise ValueError(
michael@0 121 errmsg("Invalid \\escape: %r" % (esc,), s, end))
michael@0 122 end += 1
michael@0 123 else:
michael@0 124 esc = s[end + 1:end + 5]
michael@0 125 next_end = end + 5
michael@0 126 msg = "Invalid \\uXXXX escape"
michael@0 127 try:
michael@0 128 if len(esc) != 4:
michael@0 129 raise ValueError
michael@0 130 uni = int(esc, 16)
michael@0 131 if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535:
michael@0 132 msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
michael@0 133 if not s[end + 5:end + 7] == '\\u':
michael@0 134 raise ValueError
michael@0 135 esc2 = s[end + 7:end + 11]
michael@0 136 if len(esc2) != 4:
michael@0 137 raise ValueError
michael@0 138 uni2 = int(esc2, 16)
michael@0 139 uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
michael@0 140 next_end += 6
michael@0 141 m = unichr(uni)
michael@0 142 except ValueError:
michael@0 143 raise ValueError(errmsg(msg, s, end))
michael@0 144 end = next_end
michael@0 145 _append(m)
michael@0 146 return u''.join(chunks), end
michael@0 147
michael@0 148
michael@0 149 # Use speedup
michael@0 150 try:
michael@0 151 scanstring = c_scanstring
michael@0 152 except NameError:
michael@0 153 scanstring = py_scanstring
michael@0 154
michael@0 155 def JSONString(match, context):
michael@0 156 encoding = getattr(context, 'encoding', None)
michael@0 157 strict = getattr(context, 'strict', True)
michael@0 158 return scanstring(match.string, match.end(), encoding, strict)
michael@0 159 pattern(r'"')(JSONString)
michael@0 160
michael@0 161
michael@0 162 WHITESPACE = re.compile(r'\s*', FLAGS)
michael@0 163
michael@0 164 def JSONObject(match, context, _w=WHITESPACE.match):
michael@0 165 pairs = {}
michael@0 166 s = match.string
michael@0 167 end = _w(s, match.end()).end()
michael@0 168 nextchar = s[end:end + 1]
michael@0 169 # Trivial empty object
michael@0 170 if nextchar == '}':
michael@0 171 return pairs, end + 1
michael@0 172 if nextchar != '"':
michael@0 173 raise ValueError(errmsg("Expecting property name", s, end))
michael@0 174 end += 1
michael@0 175 encoding = getattr(context, 'encoding', None)
michael@0 176 strict = getattr(context, 'strict', True)
michael@0 177 iterscan = JSONScanner.iterscan
michael@0 178 while True:
michael@0 179 key, end = scanstring(s, end, encoding, strict)
michael@0 180 end = _w(s, end).end()
michael@0 181 if s[end:end + 1] != ':':
michael@0 182 raise ValueError(errmsg("Expecting : delimiter", s, end))
michael@0 183 end = _w(s, end + 1).end()
michael@0 184 try:
michael@0 185 value, end = iterscan(s, idx=end, context=context).next()
michael@0 186 except StopIteration:
michael@0 187 raise ValueError(errmsg("Expecting object", s, end))
michael@0 188 pairs[key] = value
michael@0 189 end = _w(s, end).end()
michael@0 190 nextchar = s[end:end + 1]
michael@0 191 end += 1
michael@0 192 if nextchar == '}':
michael@0 193 break
michael@0 194 if nextchar != ',':
michael@0 195 raise ValueError(errmsg("Expecting , delimiter", s, end - 1))
michael@0 196 end = _w(s, end).end()
michael@0 197 nextchar = s[end:end + 1]
michael@0 198 end += 1
michael@0 199 if nextchar != '"':
michael@0 200 raise ValueError(errmsg("Expecting property name", s, end - 1))
michael@0 201 object_hook = getattr(context, 'object_hook', None)
michael@0 202 if object_hook is not None:
michael@0 203 pairs = object_hook(pairs)
michael@0 204 return pairs, end
michael@0 205 pattern(r'{')(JSONObject)
michael@0 206
michael@0 207
michael@0 208 def JSONArray(match, context, _w=WHITESPACE.match):
michael@0 209 values = []
michael@0 210 s = match.string
michael@0 211 end = _w(s, match.end()).end()
michael@0 212 # Look-ahead for trivial empty array
michael@0 213 nextchar = s[end:end + 1]
michael@0 214 if nextchar == ']':
michael@0 215 return values, end + 1
michael@0 216 iterscan = JSONScanner.iterscan
michael@0 217 while True:
michael@0 218 try:
michael@0 219 value, end = iterscan(s, idx=end, context=context).next()
michael@0 220 except StopIteration:
michael@0 221 raise ValueError(errmsg("Expecting object", s, end))
michael@0 222 values.append(value)
michael@0 223 end = _w(s, end).end()
michael@0 224 nextchar = s[end:end + 1]
michael@0 225 end += 1
michael@0 226 if nextchar == ']':
michael@0 227 break
michael@0 228 if nextchar != ',':
michael@0 229 raise ValueError(errmsg("Expecting , delimiter", s, end))
michael@0 230 end = _w(s, end).end()
michael@0 231 return values, end
michael@0 232 pattern(r'\[')(JSONArray)
michael@0 233
michael@0 234
michael@0 235 ANYTHING = [
michael@0 236 JSONObject,
michael@0 237 JSONArray,
michael@0 238 JSONString,
michael@0 239 JSONConstant,
michael@0 240 JSONNumber,
michael@0 241 ]
michael@0 242
michael@0 243 JSONScanner = Scanner(ANYTHING)
michael@0 244
michael@0 245
michael@0 246 class JSONDecoder(object):
michael@0 247 """
michael@0 248 Simple JSON <http://json.org> decoder
michael@0 249
michael@0 250 Performs the following translations in decoding by default:
michael@0 251
michael@0 252 +---------------+-------------------+
michael@0 253 | JSON | Python |
michael@0 254 +===============+===================+
michael@0 255 | object | dict |
michael@0 256 +---------------+-------------------+
michael@0 257 | array | list |
michael@0 258 +---------------+-------------------+
michael@0 259 | string | unicode |
michael@0 260 +---------------+-------------------+
michael@0 261 | number (int) | int, long |
michael@0 262 +---------------+-------------------+
michael@0 263 | number (real) | float |
michael@0 264 +---------------+-------------------+
michael@0 265 | true | True |
michael@0 266 +---------------+-------------------+
michael@0 267 | false | False |
michael@0 268 +---------------+-------------------+
michael@0 269 | null | None |
michael@0 270 +---------------+-------------------+
michael@0 271
michael@0 272 It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
michael@0 273 their corresponding ``float`` values, which is outside the JSON spec.
michael@0 274 """
michael@0 275
michael@0 276 _scanner = Scanner(ANYTHING)
michael@0 277 __all__ = ['__init__', 'decode', 'raw_decode']
michael@0 278
michael@0 279 def __init__(self, encoding=None, object_hook=None, parse_float=None,
michael@0 280 parse_int=None, parse_constant=None, strict=True):
michael@0 281 """
michael@0 282 ``encoding`` determines the encoding used to interpret any ``str``
michael@0 283 objects decoded by this instance (utf-8 by default). It has no
michael@0 284 effect when decoding ``unicode`` objects.
michael@0 285
michael@0 286 Note that currently only encodings that are a superset of ASCII work,
michael@0 287 strings of other encodings should be passed in as ``unicode``.
michael@0 288
michael@0 289 ``object_hook``, if specified, will be called with the result
michael@0 290 of every JSON object decoded and its return value will be used in
michael@0 291 place of the given ``dict``. This can be used to provide custom
michael@0 292 deserializations (e.g. to support JSON-RPC class hinting).
michael@0 293
michael@0 294 ``parse_float``, if specified, will be called with the string
michael@0 295 of every JSON float to be decoded. By default this is equivalent to
michael@0 296 float(num_str). This can be used to use another datatype or parser
michael@0 297 for JSON floats (e.g. decimal.Decimal).
michael@0 298
michael@0 299 ``parse_int``, if specified, will be called with the string
michael@0 300 of every JSON int to be decoded. By default this is equivalent to
michael@0 301 int(num_str). This can be used to use another datatype or parser
michael@0 302 for JSON integers (e.g. float).
michael@0 303
michael@0 304 ``parse_constant``, if specified, will be called with one of the
michael@0 305 following strings: -Infinity, Infinity, NaN, null, true, false.
michael@0 306 This can be used to raise an exception if invalid JSON numbers
michael@0 307 are encountered.
michael@0 308 """
michael@0 309 self.encoding = encoding
michael@0 310 self.object_hook = object_hook
michael@0 311 self.parse_float = parse_float
michael@0 312 self.parse_int = parse_int
michael@0 313 self.parse_constant = parse_constant
michael@0 314 self.strict = strict
michael@0 315
michael@0 316 def decode(self, s, _w=WHITESPACE.match):
michael@0 317 """
michael@0 318 Return the Python representation of ``s`` (a ``str`` or ``unicode``
michael@0 319 instance containing a JSON document)
michael@0 320 """
michael@0 321 obj, end = self.raw_decode(s, idx=_w(s, 0).end())
michael@0 322 end = _w(s, end).end()
michael@0 323 if end != len(s):
michael@0 324 raise ValueError(errmsg("Extra data", s, end, len(s)))
michael@0 325 return obj
michael@0 326
michael@0 327 def raw_decode(self, s, **kw):
michael@0 328 """
michael@0 329 Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning
michael@0 330 with a JSON document) and return a 2-tuple of the Python
michael@0 331 representation and the index in ``s`` where the document ended.
michael@0 332
michael@0 333 This can be used to decode a JSON document from a string that may
michael@0 334 have extraneous data at the end.
michael@0 335 """
michael@0 336 kw.setdefault('context', self)
michael@0 337 try:
michael@0 338 obj, end = self._scanner.iterscan(s, **kw).next()
michael@0 339 except StopIteration:
michael@0 340 raise ValueError("No JSON object could be decoded")
michael@0 341 return obj, end
michael@0 342
michael@0 343 __all__ = ['JSONDecoder']

mercurial