michael@0: """ michael@0: Implementation of JSONEncoder michael@0: """ michael@0: import re michael@0: michael@0: try: michael@0: from simplejson._speedups import encode_basestring_ascii as c_encode_basestring_ascii michael@0: except ImportError: michael@0: pass michael@0: michael@0: ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]') michael@0: ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') michael@0: HAS_UTF8 = re.compile(r'[\x80-\xff]') michael@0: ESCAPE_DCT = { michael@0: '\\': '\\\\', michael@0: '"': '\\"', michael@0: '\b': '\\b', michael@0: '\f': '\\f', michael@0: '\n': '\\n', michael@0: '\r': '\\r', michael@0: '\t': '\\t', michael@0: } michael@0: for i in range(0x20): michael@0: ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,)) michael@0: michael@0: # Assume this produces an infinity on all machines (probably not guaranteed) michael@0: INFINITY = float('1e66666') michael@0: FLOAT_REPR = repr michael@0: michael@0: def floatstr(o, allow_nan=True): michael@0: # Check for specials. Note that this type of test is processor- and/or michael@0: # platform-specific, so do tests which don't depend on the internals. michael@0: michael@0: if o != o: michael@0: text = 'NaN' michael@0: elif o == INFINITY: michael@0: text = 'Infinity' michael@0: elif o == -INFINITY: michael@0: text = '-Infinity' michael@0: else: michael@0: return FLOAT_REPR(o) michael@0: michael@0: if not allow_nan: michael@0: raise ValueError("Out of range float values are not JSON compliant: %r" michael@0: % (o,)) michael@0: michael@0: return text michael@0: michael@0: michael@0: def encode_basestring(s): michael@0: """ michael@0: Return a JSON representation of a Python string michael@0: """ michael@0: def replace(match): michael@0: return ESCAPE_DCT[match.group(0)] michael@0: return '"' + ESCAPE.sub(replace, s) + '"' michael@0: michael@0: michael@0: def py_encode_basestring_ascii(s): michael@0: if isinstance(s, str) and HAS_UTF8.search(s) is not None: michael@0: s = s.decode('utf-8') michael@0: def replace(match): michael@0: s = match.group(0) michael@0: try: michael@0: return ESCAPE_DCT[s] michael@0: except KeyError: michael@0: n = ord(s) michael@0: if n < 0x10000: michael@0: return '\\u%04x' % (n,) michael@0: else: michael@0: # surrogate pair michael@0: n -= 0x10000 michael@0: s1 = 0xd800 | ((n >> 10) & 0x3ff) michael@0: s2 = 0xdc00 | (n & 0x3ff) michael@0: return '\\u%04x\\u%04x' % (s1, s2) michael@0: return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"' michael@0: michael@0: michael@0: try: michael@0: encode_basestring_ascii = c_encode_basestring_ascii michael@0: except NameError: michael@0: encode_basestring_ascii = py_encode_basestring_ascii michael@0: michael@0: michael@0: class JSONEncoder(object): michael@0: """ michael@0: Extensible JSON encoder for Python data structures. michael@0: michael@0: Supports the following objects and types by default: michael@0: michael@0: +-------------------+---------------+ michael@0: | Python | JSON | michael@0: +===================+===============+ michael@0: | dict | object | michael@0: +-------------------+---------------+ michael@0: | list, tuple | array | michael@0: +-------------------+---------------+ michael@0: | str, unicode | string | michael@0: +-------------------+---------------+ michael@0: | int, long, float | number | michael@0: +-------------------+---------------+ michael@0: | True | true | michael@0: +-------------------+---------------+ michael@0: | False | false | michael@0: +-------------------+---------------+ michael@0: | None | null | michael@0: +-------------------+---------------+ michael@0: michael@0: To extend this to recognize other objects, subclass and implement a michael@0: ``.default()`` method with another method that returns a serializable michael@0: object for ``o`` if possible, otherwise it should call the superclass michael@0: implementation (to raise ``TypeError``). michael@0: """ michael@0: __all__ = ['__init__', 'default', 'encode', 'iterencode'] michael@0: item_separator = ', ' michael@0: key_separator = ': ' michael@0: def __init__(self, skipkeys=False, ensure_ascii=True, michael@0: check_circular=True, allow_nan=True, sort_keys=False, michael@0: indent=None, separators=None, encoding='utf-8', default=None): michael@0: """ michael@0: Constructor for JSONEncoder, with sensible defaults. michael@0: michael@0: If skipkeys is False, then it is a TypeError to attempt michael@0: encoding of keys that are not str, int, long, float or None. If michael@0: skipkeys is True, such items are simply skipped. michael@0: michael@0: If ensure_ascii is True, the output is guaranteed to be str michael@0: objects with all incoming unicode characters escaped. If michael@0: ensure_ascii is false, the output will be unicode object. michael@0: michael@0: If check_circular is True, then lists, dicts, and custom encoded michael@0: objects will be checked for circular references during encoding to michael@0: prevent an infinite recursion (which would cause an OverflowError). michael@0: Otherwise, no such check takes place. michael@0: michael@0: If allow_nan is True, then NaN, Infinity, and -Infinity will be michael@0: encoded as such. This behavior is not JSON specification compliant, michael@0: but is consistent with most JavaScript based encoders and decoders. michael@0: Otherwise, it will be a ValueError to encode such floats. michael@0: michael@0: If sort_keys is True, then the output of dictionaries will be michael@0: sorted by key; this is useful for regression tests to ensure michael@0: that JSON serializations can be compared on a day-to-day basis. michael@0: michael@0: If indent is a non-negative integer, then JSON array michael@0: elements and object members will be pretty-printed with that michael@0: indent level. An indent level of 0 will only insert newlines. michael@0: None is the most compact representation. michael@0: michael@0: If specified, separators should be a (item_separator, key_separator) michael@0: tuple. The default is (', ', ': '). To get the most compact JSON michael@0: representation you should specify (',', ':') to eliminate whitespace. michael@0: michael@0: If specified, default is a function that gets called for objects michael@0: that can't otherwise be serialized. It should return a JSON encodable michael@0: version of the object or raise a ``TypeError``. michael@0: michael@0: If encoding is not None, then all input strings will be michael@0: transformed into unicode using that encoding prior to JSON-encoding. michael@0: The default is UTF-8. michael@0: """ michael@0: michael@0: self.skipkeys = skipkeys michael@0: self.ensure_ascii = ensure_ascii michael@0: self.check_circular = check_circular michael@0: self.allow_nan = allow_nan michael@0: self.sort_keys = sort_keys michael@0: self.indent = indent michael@0: self.current_indent_level = 0 michael@0: if separators is not None: michael@0: self.item_separator, self.key_separator = separators michael@0: if default is not None: michael@0: self.default = default michael@0: self.encoding = encoding michael@0: michael@0: def _newline_indent(self): michael@0: return '\n' + (' ' * (self.indent * self.current_indent_level)) michael@0: michael@0: def _iterencode_list(self, lst, markers=None): michael@0: if not lst: michael@0: yield '[]' michael@0: return michael@0: if markers is not None: michael@0: markerid = id(lst) michael@0: if markerid in markers: michael@0: raise ValueError("Circular reference detected") michael@0: markers[markerid] = lst michael@0: yield '[' michael@0: if self.indent is not None: michael@0: self.current_indent_level += 1 michael@0: newline_indent = self._newline_indent() michael@0: separator = self.item_separator + newline_indent michael@0: yield newline_indent michael@0: else: michael@0: newline_indent = None michael@0: separator = self.item_separator michael@0: first = True michael@0: for value in lst: michael@0: if first: michael@0: first = False michael@0: else: michael@0: yield separator michael@0: for chunk in self._iterencode(value, markers): michael@0: yield chunk michael@0: if newline_indent is not None: michael@0: self.current_indent_level -= 1 michael@0: yield self._newline_indent() michael@0: yield ']' michael@0: if markers is not None: michael@0: del markers[markerid] michael@0: michael@0: def _iterencode_dict(self, dct, markers=None): michael@0: if not dct: michael@0: yield '{}' michael@0: return michael@0: if markers is not None: michael@0: markerid = id(dct) michael@0: if markerid in markers: michael@0: raise ValueError("Circular reference detected") michael@0: markers[markerid] = dct michael@0: yield '{' michael@0: key_separator = self.key_separator michael@0: if self.indent is not None: michael@0: self.current_indent_level += 1 michael@0: newline_indent = self._newline_indent() michael@0: item_separator = self.item_separator + newline_indent michael@0: yield newline_indent michael@0: else: michael@0: newline_indent = None michael@0: item_separator = self.item_separator michael@0: first = True michael@0: if self.ensure_ascii: michael@0: encoder = encode_basestring_ascii michael@0: else: michael@0: encoder = encode_basestring michael@0: allow_nan = self.allow_nan michael@0: if self.sort_keys: michael@0: keys = dct.keys() michael@0: keys.sort() michael@0: items = [(k, dct[k]) for k in keys] michael@0: else: michael@0: items = dct.iteritems() michael@0: _encoding = self.encoding michael@0: _do_decode = (_encoding is not None michael@0: and not (_encoding == 'utf-8')) michael@0: for key, value in items: michael@0: if isinstance(key, str): michael@0: if _do_decode: michael@0: key = key.decode(_encoding) michael@0: elif isinstance(key, basestring): michael@0: pass michael@0: # JavaScript is weakly typed for these, so it makes sense to michael@0: # also allow them. Many encoders seem to do something like this. michael@0: elif isinstance(key, float): michael@0: key = floatstr(key, allow_nan) michael@0: elif isinstance(key, (int, long)): michael@0: key = str(key) michael@0: elif key is True: michael@0: key = 'true' michael@0: elif key is False: michael@0: key = 'false' michael@0: elif key is None: michael@0: key = 'null' michael@0: elif self.skipkeys: michael@0: continue michael@0: else: michael@0: raise TypeError("key %r is not a string" % (key,)) michael@0: if first: michael@0: first = False michael@0: else: michael@0: yield item_separator michael@0: yield encoder(key) michael@0: yield key_separator michael@0: for chunk in self._iterencode(value, markers): michael@0: yield chunk michael@0: if newline_indent is not None: michael@0: self.current_indent_level -= 1 michael@0: yield self._newline_indent() michael@0: yield '}' michael@0: if markers is not None: michael@0: del markers[markerid] michael@0: michael@0: def _iterencode(self, o, markers=None): michael@0: if isinstance(o, basestring): michael@0: if self.ensure_ascii: michael@0: encoder = encode_basestring_ascii michael@0: else: michael@0: encoder = encode_basestring michael@0: _encoding = self.encoding michael@0: if (_encoding is not None and isinstance(o, str) michael@0: and not (_encoding == 'utf-8')): michael@0: o = o.decode(_encoding) michael@0: yield encoder(o) michael@0: elif o is None: michael@0: yield 'null' michael@0: elif o is True: michael@0: yield 'true' michael@0: elif o is False: michael@0: yield 'false' michael@0: elif isinstance(o, (int, long)): michael@0: yield str(o) michael@0: elif isinstance(o, float): michael@0: yield floatstr(o, self.allow_nan) michael@0: elif isinstance(o, (list, tuple)): michael@0: for chunk in self._iterencode_list(o, markers): michael@0: yield chunk michael@0: elif isinstance(o, dict): michael@0: for chunk in self._iterencode_dict(o, markers): michael@0: yield chunk michael@0: else: michael@0: if markers is not None: michael@0: markerid = id(o) michael@0: if markerid in markers: michael@0: raise ValueError("Circular reference detected") michael@0: markers[markerid] = o michael@0: for chunk in self._iterencode_default(o, markers): michael@0: yield chunk michael@0: if markers is not None: michael@0: del markers[markerid] michael@0: michael@0: def _iterencode_default(self, o, markers=None): michael@0: newobj = self.default(o) michael@0: return self._iterencode(newobj, markers) michael@0: michael@0: def default(self, o): michael@0: """ michael@0: Implement this method in a subclass such that it returns michael@0: a serializable object for ``o``, or calls the base implementation michael@0: (to raise a ``TypeError``). michael@0: michael@0: For example, to support arbitrary iterators, you could michael@0: implement default like this:: michael@0: michael@0: def default(self, o): michael@0: try: michael@0: iterable = iter(o) michael@0: except TypeError: michael@0: pass michael@0: else: michael@0: return list(iterable) michael@0: return JSONEncoder.default(self, o) michael@0: """ michael@0: raise TypeError("%r is not JSON serializable" % (o,)) michael@0: michael@0: def encode(self, o): michael@0: """ michael@0: Return a JSON string representation of a Python data structure. michael@0: michael@0: >>> JSONEncoder().encode({"foo": ["bar", "baz"]}) michael@0: '{"foo": ["bar", "baz"]}' michael@0: """ michael@0: # This is for extremely simple cases and benchmarks. michael@0: if isinstance(o, basestring): michael@0: if isinstance(o, str): michael@0: _encoding = self.encoding michael@0: if (_encoding is not None michael@0: and not (_encoding == 'utf-8')): michael@0: o = o.decode(_encoding) michael@0: if self.ensure_ascii: michael@0: return encode_basestring_ascii(o) michael@0: else: michael@0: return encode_basestring(o) michael@0: # This doesn't pass the iterator directly to ''.join() because the michael@0: # exceptions aren't as detailed. The list call should be roughly michael@0: # equivalent to the PySequence_Fast that ''.join() would do. michael@0: chunks = list(self.iterencode(o)) michael@0: return ''.join(chunks) michael@0: michael@0: def iterencode(self, o): michael@0: """ michael@0: Encode the given object and yield each string michael@0: representation as available. michael@0: michael@0: For example:: michael@0: michael@0: for chunk in JSONEncoder().iterencode(bigobject): michael@0: mysocket.write(chunk) michael@0: """ michael@0: if self.check_circular: michael@0: markers = {} michael@0: else: michael@0: markers = None michael@0: return self._iterencode(o, markers) michael@0: michael@0: __all__ = ['JSONEncoder']