michael@0: """ michael@0: Implementation of JSONDecoder michael@0: """ michael@0: import re michael@0: import sys michael@0: michael@0: from simplejson.scanner import Scanner, pattern michael@0: try: michael@0: from simplejson._speedups import scanstring as c_scanstring michael@0: except ImportError: michael@0: pass michael@0: michael@0: FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL michael@0: michael@0: def _floatconstants(): michael@0: import struct michael@0: import sys michael@0: _BYTES = '7FF80000000000007FF0000000000000'.decode('hex') michael@0: if sys.byteorder != 'big': michael@0: _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1] michael@0: nan, inf = struct.unpack('dd', _BYTES) michael@0: return nan, inf, -inf michael@0: michael@0: NaN, PosInf, NegInf = _floatconstants() michael@0: michael@0: michael@0: def linecol(doc, pos): michael@0: lineno = doc.count('\n', 0, pos) + 1 michael@0: if lineno == 1: michael@0: colno = pos michael@0: else: michael@0: colno = pos - doc.rindex('\n', 0, pos) michael@0: return lineno, colno michael@0: michael@0: michael@0: def errmsg(msg, doc, pos, end=None): michael@0: lineno, colno = linecol(doc, pos) michael@0: if end is None: michael@0: return '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos) michael@0: endlineno, endcolno = linecol(doc, end) michael@0: return '%s: line %d column %d - line %d column %d (char %d - %d)' % ( michael@0: msg, lineno, colno, endlineno, endcolno, pos, end) michael@0: michael@0: michael@0: _CONSTANTS = { michael@0: '-Infinity': NegInf, michael@0: 'Infinity': PosInf, michael@0: 'NaN': NaN, michael@0: 'true': True, michael@0: 'false': False, michael@0: 'null': None, michael@0: } michael@0: michael@0: def JSONConstant(match, context, c=_CONSTANTS): michael@0: s = match.group(0) michael@0: fn = getattr(context, 'parse_constant', None) michael@0: if fn is None: michael@0: rval = c[s] michael@0: else: michael@0: rval = fn(s) michael@0: return rval, None michael@0: pattern('(-?Infinity|NaN|true|false|null)')(JSONConstant) michael@0: michael@0: michael@0: def JSONNumber(match, context): michael@0: match = JSONNumber.regex.match(match.string, *match.span()) michael@0: integer, frac, exp = match.groups() michael@0: if frac or exp: michael@0: fn = getattr(context, 'parse_float', None) or float michael@0: res = fn(integer + (frac or '') + (exp or '')) michael@0: else: michael@0: fn = getattr(context, 'parse_int', None) or int michael@0: res = fn(integer) michael@0: return res, None michael@0: pattern(r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?')(JSONNumber) michael@0: michael@0: michael@0: STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) michael@0: BACKSLASH = { michael@0: '"': u'"', '\\': u'\\', '/': u'/', michael@0: 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t', michael@0: } michael@0: michael@0: DEFAULT_ENCODING = "utf-8" michael@0: michael@0: def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match): michael@0: if encoding is None: michael@0: encoding = DEFAULT_ENCODING michael@0: chunks = [] michael@0: _append = chunks.append michael@0: begin = end - 1 michael@0: while 1: michael@0: chunk = _m(s, end) michael@0: if chunk is None: michael@0: raise ValueError( michael@0: errmsg("Unterminated string starting at", s, begin)) michael@0: end = chunk.end() michael@0: content, terminator = chunk.groups() michael@0: if content: michael@0: if not isinstance(content, unicode): michael@0: content = unicode(content, encoding) michael@0: _append(content) michael@0: if terminator == '"': michael@0: break michael@0: elif terminator != '\\': michael@0: if strict: michael@0: raise ValueError(errmsg("Invalid control character %r at", s, end)) michael@0: else: michael@0: _append(terminator) michael@0: continue michael@0: try: michael@0: esc = s[end] michael@0: except IndexError: michael@0: raise ValueError( michael@0: errmsg("Unterminated string starting at", s, begin)) michael@0: if esc != 'u': michael@0: try: michael@0: m = _b[esc] michael@0: except KeyError: michael@0: raise ValueError( michael@0: errmsg("Invalid \\escape: %r" % (esc,), s, end)) michael@0: end += 1 michael@0: else: michael@0: esc = s[end + 1:end + 5] michael@0: next_end = end + 5 michael@0: msg = "Invalid \\uXXXX escape" michael@0: try: michael@0: if len(esc) != 4: michael@0: raise ValueError michael@0: uni = int(esc, 16) michael@0: if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535: michael@0: msg = "Invalid \\uXXXX\\uXXXX surrogate pair" michael@0: if not s[end + 5:end + 7] == '\\u': michael@0: raise ValueError michael@0: esc2 = s[end + 7:end + 11] michael@0: if len(esc2) != 4: michael@0: raise ValueError michael@0: uni2 = int(esc2, 16) michael@0: uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) michael@0: next_end += 6 michael@0: m = unichr(uni) michael@0: except ValueError: michael@0: raise ValueError(errmsg(msg, s, end)) michael@0: end = next_end michael@0: _append(m) michael@0: return u''.join(chunks), end michael@0: michael@0: michael@0: # Use speedup michael@0: try: michael@0: scanstring = c_scanstring michael@0: except NameError: michael@0: scanstring = py_scanstring michael@0: michael@0: def JSONString(match, context): michael@0: encoding = getattr(context, 'encoding', None) michael@0: strict = getattr(context, 'strict', True) michael@0: return scanstring(match.string, match.end(), encoding, strict) michael@0: pattern(r'"')(JSONString) michael@0: michael@0: michael@0: WHITESPACE = re.compile(r'\s*', FLAGS) michael@0: michael@0: def JSONObject(match, context, _w=WHITESPACE.match): michael@0: pairs = {} michael@0: s = match.string michael@0: end = _w(s, match.end()).end() michael@0: nextchar = s[end:end + 1] michael@0: # Trivial empty object michael@0: if nextchar == '}': michael@0: return pairs, end + 1 michael@0: if nextchar != '"': michael@0: raise ValueError(errmsg("Expecting property name", s, end)) michael@0: end += 1 michael@0: encoding = getattr(context, 'encoding', None) michael@0: strict = getattr(context, 'strict', True) michael@0: iterscan = JSONScanner.iterscan michael@0: while True: michael@0: key, end = scanstring(s, end, encoding, strict) michael@0: end = _w(s, end).end() michael@0: if s[end:end + 1] != ':': michael@0: raise ValueError(errmsg("Expecting : delimiter", s, end)) michael@0: end = _w(s, end + 1).end() michael@0: try: michael@0: value, end = iterscan(s, idx=end, context=context).next() michael@0: except StopIteration: michael@0: raise ValueError(errmsg("Expecting object", s, end)) michael@0: pairs[key] = value michael@0: end = _w(s, end).end() michael@0: nextchar = s[end:end + 1] michael@0: end += 1 michael@0: if nextchar == '}': michael@0: break michael@0: if nextchar != ',': michael@0: raise ValueError(errmsg("Expecting , delimiter", s, end - 1)) michael@0: end = _w(s, end).end() michael@0: nextchar = s[end:end + 1] michael@0: end += 1 michael@0: if nextchar != '"': michael@0: raise ValueError(errmsg("Expecting property name", s, end - 1)) michael@0: object_hook = getattr(context, 'object_hook', None) michael@0: if object_hook is not None: michael@0: pairs = object_hook(pairs) michael@0: return pairs, end michael@0: pattern(r'{')(JSONObject) michael@0: michael@0: michael@0: def JSONArray(match, context, _w=WHITESPACE.match): michael@0: values = [] michael@0: s = match.string michael@0: end = _w(s, match.end()).end() michael@0: # Look-ahead for trivial empty array michael@0: nextchar = s[end:end + 1] michael@0: if nextchar == ']': michael@0: return values, end + 1 michael@0: iterscan = JSONScanner.iterscan michael@0: while True: michael@0: try: michael@0: value, end = iterscan(s, idx=end, context=context).next() michael@0: except StopIteration: michael@0: raise ValueError(errmsg("Expecting object", s, end)) michael@0: values.append(value) michael@0: end = _w(s, end).end() michael@0: nextchar = s[end:end + 1] michael@0: end += 1 michael@0: if nextchar == ']': michael@0: break michael@0: if nextchar != ',': michael@0: raise ValueError(errmsg("Expecting , delimiter", s, end)) michael@0: end = _w(s, end).end() michael@0: return values, end michael@0: pattern(r'\[')(JSONArray) michael@0: michael@0: michael@0: ANYTHING = [ michael@0: JSONObject, michael@0: JSONArray, michael@0: JSONString, michael@0: JSONConstant, michael@0: JSONNumber, michael@0: ] michael@0: michael@0: JSONScanner = Scanner(ANYTHING) michael@0: michael@0: michael@0: class JSONDecoder(object): michael@0: """ michael@0: Simple JSON decoder michael@0: michael@0: Performs the following translations in decoding by default: michael@0: michael@0: +---------------+-------------------+ michael@0: | JSON | Python | michael@0: +===============+===================+ michael@0: | object | dict | michael@0: +---------------+-------------------+ michael@0: | array | list | michael@0: +---------------+-------------------+ michael@0: | string | unicode | michael@0: +---------------+-------------------+ michael@0: | number (int) | int, long | michael@0: +---------------+-------------------+ michael@0: | number (real) | float | michael@0: +---------------+-------------------+ michael@0: | true | True | michael@0: +---------------+-------------------+ michael@0: | false | False | michael@0: +---------------+-------------------+ michael@0: | null | None | michael@0: +---------------+-------------------+ michael@0: michael@0: It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as michael@0: their corresponding ``float`` values, which is outside the JSON spec. michael@0: """ michael@0: michael@0: _scanner = Scanner(ANYTHING) michael@0: __all__ = ['__init__', 'decode', 'raw_decode'] michael@0: michael@0: def __init__(self, encoding=None, object_hook=None, parse_float=None, michael@0: parse_int=None, parse_constant=None, strict=True): michael@0: """ michael@0: ``encoding`` determines the encoding used to interpret any ``str`` michael@0: objects decoded by this instance (utf-8 by default). It has no michael@0: effect when decoding ``unicode`` objects. michael@0: michael@0: Note that currently only encodings that are a superset of ASCII work, michael@0: strings of other encodings should be passed in as ``unicode``. michael@0: michael@0: ``object_hook``, if specified, will be called with the result michael@0: of every JSON object decoded and its return value will be used in michael@0: place of the given ``dict``. This can be used to provide custom michael@0: deserializations (e.g. to support JSON-RPC class hinting). michael@0: michael@0: ``parse_float``, if specified, will be called with the string michael@0: of every JSON float to be decoded. By default this is equivalent to michael@0: float(num_str). This can be used to use another datatype or parser michael@0: for JSON floats (e.g. decimal.Decimal). michael@0: michael@0: ``parse_int``, if specified, will be called with the string michael@0: of every JSON int to be decoded. By default this is equivalent to michael@0: int(num_str). This can be used to use another datatype or parser michael@0: for JSON integers (e.g. float). michael@0: michael@0: ``parse_constant``, if specified, will be called with one of the michael@0: following strings: -Infinity, Infinity, NaN, null, true, false. michael@0: This can be used to raise an exception if invalid JSON numbers michael@0: are encountered. michael@0: """ michael@0: self.encoding = encoding michael@0: self.object_hook = object_hook michael@0: self.parse_float = parse_float michael@0: self.parse_int = parse_int michael@0: self.parse_constant = parse_constant michael@0: self.strict = strict michael@0: michael@0: def decode(self, s, _w=WHITESPACE.match): michael@0: """ michael@0: Return the Python representation of ``s`` (a ``str`` or ``unicode`` michael@0: instance containing a JSON document) michael@0: """ michael@0: obj, end = self.raw_decode(s, idx=_w(s, 0).end()) michael@0: end = _w(s, end).end() michael@0: if end != len(s): michael@0: raise ValueError(errmsg("Extra data", s, end, len(s))) michael@0: return obj michael@0: michael@0: def raw_decode(self, s, **kw): michael@0: """ michael@0: Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning michael@0: with a JSON document) and return a 2-tuple of the Python michael@0: representation and the index in ``s`` where the document ended. michael@0: michael@0: This can be used to decode a JSON document from a string that may michael@0: have extraneous data at the end. michael@0: """ michael@0: kw.setdefault('context', self) michael@0: try: michael@0: obj, end = self._scanner.iterscan(s, **kw).next() michael@0: except StopIteration: michael@0: raise ValueError("No JSON object could be decoded") michael@0: return obj, end michael@0: michael@0: __all__ = ['JSONDecoder']