michael@0: """
michael@0: Implementation of JSONEncoder
michael@0: """
michael@0: import re
michael@0: 
michael@0: try:
michael@0:     from simplejson._speedups import encode_basestring_ascii as c_encode_basestring_ascii
michael@0: except ImportError:
michael@0:     pass
michael@0: 
michael@0: ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
michael@0: ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
michael@0: HAS_UTF8 = re.compile(r'[\x80-\xff]')
michael@0: ESCAPE_DCT = {
michael@0:     '\\': '\\\\',
michael@0:     '"': '\\"',
michael@0:     '\b': '\\b',
michael@0:     '\f': '\\f',
michael@0:     '\n': '\\n',
michael@0:     '\r': '\\r',
michael@0:     '\t': '\\t',
michael@0: }
michael@0: for i in range(0x20):
michael@0:     ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
michael@0: 
michael@0: # Assume this produces an infinity on all machines (probably not guaranteed)
michael@0: INFINITY = float('1e66666')
michael@0: FLOAT_REPR = repr
michael@0: 
michael@0: def floatstr(o, allow_nan=True):
michael@0:     # Check for specials.  Note that this type of test is processor- and/or
michael@0:     # platform-specific, so do tests which don't depend on the internals.
michael@0: 
michael@0:     if o != o:
michael@0:         text = 'NaN'
michael@0:     elif o == INFINITY:
michael@0:         text = 'Infinity'
michael@0:     elif o == -INFINITY:
michael@0:         text = '-Infinity'
michael@0:     else:
michael@0:         return FLOAT_REPR(o)
michael@0: 
michael@0:     if not allow_nan:
michael@0:         raise ValueError("Out of range float values are not JSON compliant: %r"
michael@0:             % (o,))
michael@0: 
michael@0:     return text
michael@0: 
michael@0: 
michael@0: def encode_basestring(s):
michael@0:     """
michael@0:     Return a JSON representation of a Python string
michael@0:     """
michael@0:     def replace(match):
michael@0:         return ESCAPE_DCT[match.group(0)]
michael@0:     return '"' + ESCAPE.sub(replace, s) + '"'
michael@0: 
michael@0: 
michael@0: def py_encode_basestring_ascii(s):
michael@0:     if isinstance(s, str) and HAS_UTF8.search(s) is not None:
michael@0:         s = s.decode('utf-8')
michael@0:     def replace(match):
michael@0:         s = match.group(0)
michael@0:         try:
michael@0:             return ESCAPE_DCT[s]
michael@0:         except KeyError:
michael@0:             n = ord(s)
michael@0:             if n < 0x10000:
michael@0:                 return '\\u%04x' % (n,)
michael@0:             else:
michael@0:                 # surrogate pair
michael@0:                 n -= 0x10000
michael@0:                 s1 = 0xd800 | ((n >> 10) & 0x3ff)
michael@0:                 s2 = 0xdc00 | (n & 0x3ff)
michael@0:                 return '\\u%04x\\u%04x' % (s1, s2)
michael@0:     return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
michael@0: 
michael@0: 
michael@0: try:
michael@0:     encode_basestring_ascii = c_encode_basestring_ascii
michael@0: except NameError:
michael@0:     encode_basestring_ascii = py_encode_basestring_ascii
michael@0: 
michael@0: 
michael@0: class JSONEncoder(object):
michael@0:     """
michael@0:     Extensible JSON <http://json.org> encoder for Python data structures.
michael@0: 
michael@0:     Supports the following objects and types by default:
michael@0:     
michael@0:     +-------------------+---------------+
michael@0:     | Python            | JSON          |
michael@0:     +===================+===============+
michael@0:     | dict              | object        |
michael@0:     +-------------------+---------------+
michael@0:     | list, tuple       | array         |
michael@0:     +-------------------+---------------+
michael@0:     | str, unicode      | string        |
michael@0:     +-------------------+---------------+
michael@0:     | int, long, float  | number        |
michael@0:     +-------------------+---------------+
michael@0:     | True              | true          |
michael@0:     +-------------------+---------------+
michael@0:     | False             | false         |
michael@0:     +-------------------+---------------+
michael@0:     | None              | null          |
michael@0:     +-------------------+---------------+
michael@0: 
michael@0:     To extend this to recognize other objects, subclass and implement a
michael@0:     ``.default()`` method with another method that returns a serializable
michael@0:     object for ``o`` if possible, otherwise it should call the superclass
michael@0:     implementation (to raise ``TypeError``).
michael@0:     """
michael@0:     __all__ = ['__init__', 'default', 'encode', 'iterencode']
michael@0:     item_separator = ', '
michael@0:     key_separator = ': '
michael@0:     def __init__(self, skipkeys=False, ensure_ascii=True,
michael@0:             check_circular=True, allow_nan=True, sort_keys=False,
michael@0:             indent=None, separators=None, encoding='utf-8', default=None):
michael@0:         """
michael@0:         Constructor for JSONEncoder, with sensible defaults.
michael@0: 
michael@0:         If skipkeys is False, then it is a TypeError to attempt
michael@0:         encoding of keys that are not str, int, long, float or None.  If
michael@0:         skipkeys is True, such items are simply skipped.
michael@0: 
michael@0:         If ensure_ascii is True, the output is guaranteed to be str
michael@0:         objects with all incoming unicode characters escaped.  If
michael@0:         ensure_ascii is false, the output will be unicode object.
michael@0: 
michael@0:         If check_circular is True, then lists, dicts, and custom encoded
michael@0:         objects will be checked for circular references during encoding to
michael@0:         prevent an infinite recursion (which would cause an OverflowError).
michael@0:         Otherwise, no such check takes place.
michael@0: 
michael@0:         If allow_nan is True, then NaN, Infinity, and -Infinity will be
michael@0:         encoded as such.  This behavior is not JSON specification compliant,
michael@0:         but is consistent with most JavaScript based encoders and decoders.
michael@0:         Otherwise, it will be a ValueError to encode such floats.
michael@0: 
michael@0:         If sort_keys is True, then the output of dictionaries will be
michael@0:         sorted by key; this is useful for regression tests to ensure
michael@0:         that JSON serializations can be compared on a day-to-day basis.
michael@0: 
michael@0:         If indent is a non-negative integer, then JSON array
michael@0:         elements and object members will be pretty-printed with that
michael@0:         indent level.  An indent level of 0 will only insert newlines.
michael@0:         None is the most compact representation.
michael@0: 
michael@0:         If specified, separators should be a (item_separator, key_separator)
michael@0:         tuple.  The default is (', ', ': ').  To get the most compact JSON
michael@0:         representation you should specify (',', ':') to eliminate whitespace.
michael@0: 
michael@0:         If specified, default is a function that gets called for objects
michael@0:         that can't otherwise be serialized.  It should return a JSON encodable
michael@0:         version of the object or raise a ``TypeError``.
michael@0: 
michael@0:         If encoding is not None, then all input strings will be
michael@0:         transformed into unicode using that encoding prior to JSON-encoding.
michael@0:         The default is UTF-8.
michael@0:         """
michael@0: 
michael@0:         self.skipkeys = skipkeys
michael@0:         self.ensure_ascii = ensure_ascii
michael@0:         self.check_circular = check_circular
michael@0:         self.allow_nan = allow_nan
michael@0:         self.sort_keys = sort_keys
michael@0:         self.indent = indent
michael@0:         self.current_indent_level = 0
michael@0:         if separators is not None:
michael@0:             self.item_separator, self.key_separator = separators
michael@0:         if default is not None:
michael@0:             self.default = default
michael@0:         self.encoding = encoding
michael@0: 
michael@0:     def _newline_indent(self):
michael@0:         return '\n' + (' ' * (self.indent * self.current_indent_level))
michael@0: 
michael@0:     def _iterencode_list(self, lst, markers=None):
michael@0:         if not lst:
michael@0:             yield '[]'
michael@0:             return
michael@0:         if markers is not None:
michael@0:             markerid = id(lst)
michael@0:             if markerid in markers:
michael@0:                 raise ValueError("Circular reference detected")
michael@0:             markers[markerid] = lst
michael@0:         yield '['
michael@0:         if self.indent is not None:
michael@0:             self.current_indent_level += 1
michael@0:             newline_indent = self._newline_indent()
michael@0:             separator = self.item_separator + newline_indent
michael@0:             yield newline_indent
michael@0:         else:
michael@0:             newline_indent = None
michael@0:             separator = self.item_separator
michael@0:         first = True
michael@0:         for value in lst:
michael@0:             if first:
michael@0:                 first = False
michael@0:             else:
michael@0:                 yield separator
michael@0:             for chunk in self._iterencode(value, markers):
michael@0:                 yield chunk
michael@0:         if newline_indent is not None:
michael@0:             self.current_indent_level -= 1
michael@0:             yield self._newline_indent()
michael@0:         yield ']'
michael@0:         if markers is not None:
michael@0:             del markers[markerid]
michael@0: 
michael@0:     def _iterencode_dict(self, dct, markers=None):
michael@0:         if not dct:
michael@0:             yield '{}'
michael@0:             return
michael@0:         if markers is not None:
michael@0:             markerid = id(dct)
michael@0:             if markerid in markers:
michael@0:                 raise ValueError("Circular reference detected")
michael@0:             markers[markerid] = dct
michael@0:         yield '{'
michael@0:         key_separator = self.key_separator
michael@0:         if self.indent is not None:
michael@0:             self.current_indent_level += 1
michael@0:             newline_indent = self._newline_indent()
michael@0:             item_separator = self.item_separator + newline_indent
michael@0:             yield newline_indent
michael@0:         else:
michael@0:             newline_indent = None
michael@0:             item_separator = self.item_separator
michael@0:         first = True
michael@0:         if self.ensure_ascii:
michael@0:             encoder = encode_basestring_ascii
michael@0:         else:
michael@0:             encoder = encode_basestring
michael@0:         allow_nan = self.allow_nan
michael@0:         if self.sort_keys:
michael@0:             keys = dct.keys()
michael@0:             keys.sort()
michael@0:             items = [(k, dct[k]) for k in keys]
michael@0:         else:
michael@0:             items = dct.iteritems()
michael@0:         _encoding = self.encoding
michael@0:         _do_decode = (_encoding is not None
michael@0:             and not (_encoding == 'utf-8'))
michael@0:         for key, value in items:
michael@0:             if isinstance(key, str):
michael@0:                 if _do_decode:
michael@0:                     key = key.decode(_encoding)
michael@0:             elif isinstance(key, basestring):
michael@0:                 pass
michael@0:             # JavaScript is weakly typed for these, so it makes sense to
michael@0:             # also allow them.  Many encoders seem to do something like this.
michael@0:             elif isinstance(key, float):
michael@0:                 key = floatstr(key, allow_nan)
michael@0:             elif isinstance(key, (int, long)):
michael@0:                 key = str(key)
michael@0:             elif key is True:
michael@0:                 key = 'true'
michael@0:             elif key is False:
michael@0:                 key = 'false'
michael@0:             elif key is None:
michael@0:                 key = 'null'
michael@0:             elif self.skipkeys:
michael@0:                 continue
michael@0:             else:
michael@0:                 raise TypeError("key %r is not a string" % (key,))
michael@0:             if first:
michael@0:                 first = False
michael@0:             else:
michael@0:                 yield item_separator
michael@0:             yield encoder(key)
michael@0:             yield key_separator
michael@0:             for chunk in self._iterencode(value, markers):
michael@0:                 yield chunk
michael@0:         if newline_indent is not None:
michael@0:             self.current_indent_level -= 1
michael@0:             yield self._newline_indent()
michael@0:         yield '}'
michael@0:         if markers is not None:
michael@0:             del markers[markerid]
michael@0: 
michael@0:     def _iterencode(self, o, markers=None):
michael@0:         if isinstance(o, basestring):
michael@0:             if self.ensure_ascii:
michael@0:                 encoder = encode_basestring_ascii
michael@0:             else:
michael@0:                 encoder = encode_basestring
michael@0:             _encoding = self.encoding
michael@0:             if (_encoding is not None and isinstance(o, str)
michael@0:                     and not (_encoding == 'utf-8')):
michael@0:                 o = o.decode(_encoding)
michael@0:             yield encoder(o)
michael@0:         elif o is None:
michael@0:             yield 'null'
michael@0:         elif o is True:
michael@0:             yield 'true'
michael@0:         elif o is False:
michael@0:             yield 'false'
michael@0:         elif isinstance(o, (int, long)):
michael@0:             yield str(o)
michael@0:         elif isinstance(o, float):
michael@0:             yield floatstr(o, self.allow_nan)
michael@0:         elif isinstance(o, (list, tuple)):
michael@0:             for chunk in self._iterencode_list(o, markers):
michael@0:                 yield chunk
michael@0:         elif isinstance(o, dict):
michael@0:             for chunk in self._iterencode_dict(o, markers):
michael@0:                 yield chunk
michael@0:         else:
michael@0:             if markers is not None:
michael@0:                 markerid = id(o)
michael@0:                 if markerid in markers:
michael@0:                     raise ValueError("Circular reference detected")
michael@0:                 markers[markerid] = o
michael@0:             for chunk in self._iterencode_default(o, markers):
michael@0:                 yield chunk
michael@0:             if markers is not None:
michael@0:                 del markers[markerid]
michael@0: 
michael@0:     def _iterencode_default(self, o, markers=None):
michael@0:         newobj = self.default(o)
michael@0:         return self._iterencode(newobj, markers)
michael@0: 
michael@0:     def default(self, o):
michael@0:         """
michael@0:         Implement this method in a subclass such that it returns
michael@0:         a serializable object for ``o``, or calls the base implementation
michael@0:         (to raise a ``TypeError``).
michael@0: 
michael@0:         For example, to support arbitrary iterators, you could
michael@0:         implement default like this::
michael@0:             
michael@0:             def default(self, o):
michael@0:                 try:
michael@0:                     iterable = iter(o)
michael@0:                 except TypeError:
michael@0:                     pass
michael@0:                 else:
michael@0:                     return list(iterable)
michael@0:                 return JSONEncoder.default(self, o)
michael@0:         """
michael@0:         raise TypeError("%r is not JSON serializable" % (o,))
michael@0: 
michael@0:     def encode(self, o):
michael@0:         """
michael@0:         Return a JSON string representation of a Python data structure.
michael@0: 
michael@0:         >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
michael@0:         '{"foo": ["bar", "baz"]}'
michael@0:         """
michael@0:         # This is for extremely simple cases and benchmarks.
michael@0:         if isinstance(o, basestring):
michael@0:             if isinstance(o, str):
michael@0:                 _encoding = self.encoding
michael@0:                 if (_encoding is not None 
michael@0:                         and not (_encoding == 'utf-8')):
michael@0:                     o = o.decode(_encoding)
michael@0:             if self.ensure_ascii:
michael@0:                 return encode_basestring_ascii(o)
michael@0:             else:
michael@0:                 return encode_basestring(o)
michael@0:         # This doesn't pass the iterator directly to ''.join() because the
michael@0:         # exceptions aren't as detailed.  The list call should be roughly
michael@0:         # equivalent to the PySequence_Fast that ''.join() would do.
michael@0:         chunks = list(self.iterencode(o))
michael@0:         return ''.join(chunks)
michael@0: 
michael@0:     def iterencode(self, o):
michael@0:         """
michael@0:         Encode the given object and yield each string
michael@0:         representation as available.
michael@0:         
michael@0:         For example::
michael@0:             
michael@0:             for chunk in JSONEncoder().iterencode(bigobject):
michael@0:                 mysocket.write(chunk)
michael@0:         """
michael@0:         if self.check_circular:
michael@0:             markers = {}
michael@0:         else:
michael@0:             markers = None
michael@0:         return self._iterencode(o, markers)
michael@0: 
michael@0: __all__ = ['JSONEncoder']