addon-sdk/source/python-lib/simplejson/encoder.py

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/addon-sdk/source/python-lib/simplejson/encoder.py	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,385 @@
     1.4 +"""
     1.5 +Implementation of JSONEncoder
     1.6 +"""
     1.7 +import re
     1.8 +
     1.9 +try:
    1.10 +    from simplejson._speedups import encode_basestring_ascii as c_encode_basestring_ascii
    1.11 +except ImportError:
    1.12 +    pass
    1.13 +
    1.14 +ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
    1.15 +ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
    1.16 +HAS_UTF8 = re.compile(r'[\x80-\xff]')
    1.17 +ESCAPE_DCT = {
    1.18 +    '\\': '\\\\',
    1.19 +    '"': '\\"',
    1.20 +    '\b': '\\b',
    1.21 +    '\f': '\\f',
    1.22 +    '\n': '\\n',
    1.23 +    '\r': '\\r',
    1.24 +    '\t': '\\t',
    1.25 +}
    1.26 +for i in range(0x20):
    1.27 +    ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
    1.28 +
    1.29 +# Assume this produces an infinity on all machines (probably not guaranteed)
    1.30 +INFINITY = float('1e66666')
    1.31 +FLOAT_REPR = repr
    1.32 +
    1.33 +def floatstr(o, allow_nan=True):
    1.34 +    # Check for specials.  Note that this type of test is processor- and/or
    1.35 +    # platform-specific, so do tests which don't depend on the internals.
    1.36 +
    1.37 +    if o != o:
    1.38 +        text = 'NaN'
    1.39 +    elif o == INFINITY:
    1.40 +        text = 'Infinity'
    1.41 +    elif o == -INFINITY:
    1.42 +        text = '-Infinity'
    1.43 +    else:
    1.44 +        return FLOAT_REPR(o)
    1.45 +
    1.46 +    if not allow_nan:
    1.47 +        raise ValueError("Out of range float values are not JSON compliant: %r"
    1.48 +            % (o,))
    1.49 +
    1.50 +    return text
    1.51 +
    1.52 +
    1.53 +def encode_basestring(s):
    1.54 +    """
    1.55 +    Return a JSON representation of a Python string
    1.56 +    """
    1.57 +    def replace(match):
    1.58 +        return ESCAPE_DCT[match.group(0)]
    1.59 +    return '"' + ESCAPE.sub(replace, s) + '"'
    1.60 +
    1.61 +
    1.62 +def py_encode_basestring_ascii(s):
    1.63 +    if isinstance(s, str) and HAS_UTF8.search(s) is not None:
    1.64 +        s = s.decode('utf-8')
    1.65 +    def replace(match):
    1.66 +        s = match.group(0)
    1.67 +        try:
    1.68 +            return ESCAPE_DCT[s]
    1.69 +        except KeyError:
    1.70 +            n = ord(s)
    1.71 +            if n < 0x10000:
    1.72 +                return '\\u%04x' % (n,)
    1.73 +            else:
    1.74 +                # surrogate pair
    1.75 +                n -= 0x10000
    1.76 +                s1 = 0xd800 | ((n >> 10) & 0x3ff)
    1.77 +                s2 = 0xdc00 | (n & 0x3ff)
    1.78 +                return '\\u%04x\\u%04x' % (s1, s2)
    1.79 +    return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
    1.80 +
    1.81 +
    1.82 +try:
    1.83 +    encode_basestring_ascii = c_encode_basestring_ascii
    1.84 +except NameError:
    1.85 +    encode_basestring_ascii = py_encode_basestring_ascii
    1.86 +
    1.87 +
    1.88 +class JSONEncoder(object):
    1.89 +    """
    1.90 +    Extensible JSON <http://json.org> encoder for Python data structures.
    1.91 +
    1.92 +    Supports the following objects and types by default:
    1.93 +    
    1.94 +    +-------------------+---------------+
    1.95 +    | Python            | JSON          |
    1.96 +    +===================+===============+
    1.97 +    | dict              | object        |
    1.98 +    +-------------------+---------------+
    1.99 +    | list, tuple       | array         |
   1.100 +    +-------------------+---------------+
   1.101 +    | str, unicode      | string        |
   1.102 +    +-------------------+---------------+
   1.103 +    | int, long, float  | number        |
   1.104 +    +-------------------+---------------+
   1.105 +    | True              | true          |
   1.106 +    +-------------------+---------------+
   1.107 +    | False             | false         |
   1.108 +    +-------------------+---------------+
   1.109 +    | None              | null          |
   1.110 +    +-------------------+---------------+
   1.111 +
   1.112 +    To extend this to recognize other objects, subclass and implement a
   1.113 +    ``.default()`` method with another method that returns a serializable
   1.114 +    object for ``o`` if possible, otherwise it should call the superclass
   1.115 +    implementation (to raise ``TypeError``).
   1.116 +    """
   1.117 +    __all__ = ['__init__', 'default', 'encode', 'iterencode']
   1.118 +    item_separator = ', '
   1.119 +    key_separator = ': '
   1.120 +    def __init__(self, skipkeys=False, ensure_ascii=True,
   1.121 +            check_circular=True, allow_nan=True, sort_keys=False,
   1.122 +            indent=None, separators=None, encoding='utf-8', default=None):
   1.123 +        """
   1.124 +        Constructor for JSONEncoder, with sensible defaults.
   1.125 +
   1.126 +        If skipkeys is False, then it is a TypeError to attempt
   1.127 +        encoding of keys that are not str, int, long, float or None.  If
   1.128 +        skipkeys is True, such items are simply skipped.
   1.129 +
   1.130 +        If ensure_ascii is True, the output is guaranteed to be str
   1.131 +        objects with all incoming unicode characters escaped.  If
   1.132 +        ensure_ascii is false, the output will be unicode object.
   1.133 +
   1.134 +        If check_circular is True, then lists, dicts, and custom encoded
   1.135 +        objects will be checked for circular references during encoding to
   1.136 +        prevent an infinite recursion (which would cause an OverflowError).
   1.137 +        Otherwise, no such check takes place.
   1.138 +
   1.139 +        If allow_nan is True, then NaN, Infinity, and -Infinity will be
   1.140 +        encoded as such.  This behavior is not JSON specification compliant,
   1.141 +        but is consistent with most JavaScript based encoders and decoders.
   1.142 +        Otherwise, it will be a ValueError to encode such floats.
   1.143 +
   1.144 +        If sort_keys is True, then the output of dictionaries will be
   1.145 +        sorted by key; this is useful for regression tests to ensure
   1.146 +        that JSON serializations can be compared on a day-to-day basis.
   1.147 +
   1.148 +        If indent is a non-negative integer, then JSON array
   1.149 +        elements and object members will be pretty-printed with that
   1.150 +        indent level.  An indent level of 0 will only insert newlines.
   1.151 +        None is the most compact representation.
   1.152 +
   1.153 +        If specified, separators should be a (item_separator, key_separator)
   1.154 +        tuple.  The default is (', ', ': ').  To get the most compact JSON
   1.155 +        representation you should specify (',', ':') to eliminate whitespace.
   1.156 +
   1.157 +        If specified, default is a function that gets called for objects
   1.158 +        that can't otherwise be serialized.  It should return a JSON encodable
   1.159 +        version of the object or raise a ``TypeError``.
   1.160 +
   1.161 +        If encoding is not None, then all input strings will be
   1.162 +        transformed into unicode using that encoding prior to JSON-encoding.
   1.163 +        The default is UTF-8.
   1.164 +        """
   1.165 +
   1.166 +        self.skipkeys = skipkeys
   1.167 +        self.ensure_ascii = ensure_ascii
   1.168 +        self.check_circular = check_circular
   1.169 +        self.allow_nan = allow_nan
   1.170 +        self.sort_keys = sort_keys
   1.171 +        self.indent = indent
   1.172 +        self.current_indent_level = 0
   1.173 +        if separators is not None:
   1.174 +            self.item_separator, self.key_separator = separators
   1.175 +        if default is not None:
   1.176 +            self.default = default
   1.177 +        self.encoding = encoding
   1.178 +
   1.179 +    def _newline_indent(self):
   1.180 +        return '\n' + (' ' * (self.indent * self.current_indent_level))
   1.181 +
   1.182 +    def _iterencode_list(self, lst, markers=None):
   1.183 +        if not lst:
   1.184 +            yield '[]'
   1.185 +            return
   1.186 +        if markers is not None:
   1.187 +            markerid = id(lst)
   1.188 +            if markerid in markers:
   1.189 +                raise ValueError("Circular reference detected")
   1.190 +            markers[markerid] = lst
   1.191 +        yield '['
   1.192 +        if self.indent is not None:
   1.193 +            self.current_indent_level += 1
   1.194 +            newline_indent = self._newline_indent()
   1.195 +            separator = self.item_separator + newline_indent
   1.196 +            yield newline_indent
   1.197 +        else:
   1.198 +            newline_indent = None
   1.199 +            separator = self.item_separator
   1.200 +        first = True
   1.201 +        for value in lst:
   1.202 +            if first:
   1.203 +                first = False
   1.204 +            else:
   1.205 +                yield separator
   1.206 +            for chunk in self._iterencode(value, markers):
   1.207 +                yield chunk
   1.208 +        if newline_indent is not None:
   1.209 +            self.current_indent_level -= 1
   1.210 +            yield self._newline_indent()
   1.211 +        yield ']'
   1.212 +        if markers is not None:
   1.213 +            del markers[markerid]
   1.214 +
   1.215 +    def _iterencode_dict(self, dct, markers=None):
   1.216 +        if not dct:
   1.217 +            yield '{}'
   1.218 +            return
   1.219 +        if markers is not None:
   1.220 +            markerid = id(dct)
   1.221 +            if markerid in markers:
   1.222 +                raise ValueError("Circular reference detected")
   1.223 +            markers[markerid] = dct
   1.224 +        yield '{'
   1.225 +        key_separator = self.key_separator
   1.226 +        if self.indent is not None:
   1.227 +            self.current_indent_level += 1
   1.228 +            newline_indent = self._newline_indent()
   1.229 +            item_separator = self.item_separator + newline_indent
   1.230 +            yield newline_indent
   1.231 +        else:
   1.232 +            newline_indent = None
   1.233 +            item_separator = self.item_separator
   1.234 +        first = True
   1.235 +        if self.ensure_ascii:
   1.236 +            encoder = encode_basestring_ascii
   1.237 +        else:
   1.238 +            encoder = encode_basestring
   1.239 +        allow_nan = self.allow_nan
   1.240 +        if self.sort_keys:
   1.241 +            keys = dct.keys()
   1.242 +            keys.sort()
   1.243 +            items = [(k, dct[k]) for k in keys]
   1.244 +        else:
   1.245 +            items = dct.iteritems()
   1.246 +        _encoding = self.encoding
   1.247 +        _do_decode = (_encoding is not None
   1.248 +            and not (_encoding == 'utf-8'))
   1.249 +        for key, value in items:
   1.250 +            if isinstance(key, str):
   1.251 +                if _do_decode:
   1.252 +                    key = key.decode(_encoding)
   1.253 +            elif isinstance(key, basestring):
   1.254 +                pass
   1.255 +            # JavaScript is weakly typed for these, so it makes sense to
   1.256 +            # also allow them.  Many encoders seem to do something like this.
   1.257 +            elif isinstance(key, float):
   1.258 +                key = floatstr(key, allow_nan)
   1.259 +            elif isinstance(key, (int, long)):
   1.260 +                key = str(key)
   1.261 +            elif key is True:
   1.262 +                key = 'true'
   1.263 +            elif key is False:
   1.264 +                key = 'false'
   1.265 +            elif key is None:
   1.266 +                key = 'null'
   1.267 +            elif self.skipkeys:
   1.268 +                continue
   1.269 +            else:
   1.270 +                raise TypeError("key %r is not a string" % (key,))
   1.271 +            if first:
   1.272 +                first = False
   1.273 +            else:
   1.274 +                yield item_separator
   1.275 +            yield encoder(key)
   1.276 +            yield key_separator
   1.277 +            for chunk in self._iterencode(value, markers):
   1.278 +                yield chunk
   1.279 +        if newline_indent is not None:
   1.280 +            self.current_indent_level -= 1
   1.281 +            yield self._newline_indent()
   1.282 +        yield '}'
   1.283 +        if markers is not None:
   1.284 +            del markers[markerid]
   1.285 +
   1.286 +    def _iterencode(self, o, markers=None):
   1.287 +        if isinstance(o, basestring):
   1.288 +            if self.ensure_ascii:
   1.289 +                encoder = encode_basestring_ascii
   1.290 +            else:
   1.291 +                encoder = encode_basestring
   1.292 +            _encoding = self.encoding
   1.293 +            if (_encoding is not None and isinstance(o, str)
   1.294 +                    and not (_encoding == 'utf-8')):
   1.295 +                o = o.decode(_encoding)
   1.296 +            yield encoder(o)
   1.297 +        elif o is None:
   1.298 +            yield 'null'
   1.299 +        elif o is True:
   1.300 +            yield 'true'
   1.301 +        elif o is False:
   1.302 +            yield 'false'
   1.303 +        elif isinstance(o, (int, long)):
   1.304 +            yield str(o)
   1.305 +        elif isinstance(o, float):
   1.306 +            yield floatstr(o, self.allow_nan)
   1.307 +        elif isinstance(o, (list, tuple)):
   1.308 +            for chunk in self._iterencode_list(o, markers):
   1.309 +                yield chunk
   1.310 +        elif isinstance(o, dict):
   1.311 +            for chunk in self._iterencode_dict(o, markers):
   1.312 +                yield chunk
   1.313 +        else:
   1.314 +            if markers is not None:
   1.315 +                markerid = id(o)
   1.316 +                if markerid in markers:
   1.317 +                    raise ValueError("Circular reference detected")
   1.318 +                markers[markerid] = o
   1.319 +            for chunk in self._iterencode_default(o, markers):
   1.320 +                yield chunk
   1.321 +            if markers is not None:
   1.322 +                del markers[markerid]
   1.323 +
   1.324 +    def _iterencode_default(self, o, markers=None):
   1.325 +        newobj = self.default(o)
   1.326 +        return self._iterencode(newobj, markers)
   1.327 +
   1.328 +    def default(self, o):
   1.329 +        """
   1.330 +        Implement this method in a subclass such that it returns
   1.331 +        a serializable object for ``o``, or calls the base implementation
   1.332 +        (to raise a ``TypeError``).
   1.333 +
   1.334 +        For example, to support arbitrary iterators, you could
   1.335 +        implement default like this::
   1.336 +            
   1.337 +            def default(self, o):
   1.338 +                try:
   1.339 +                    iterable = iter(o)
   1.340 +                except TypeError:
   1.341 +                    pass
   1.342 +                else:
   1.343 +                    return list(iterable)
   1.344 +                return JSONEncoder.default(self, o)
   1.345 +        """
   1.346 +        raise TypeError("%r is not JSON serializable" % (o,))
   1.347 +
   1.348 +    def encode(self, o):
   1.349 +        """
   1.350 +        Return a JSON string representation of a Python data structure.
   1.351 +
   1.352 +        >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
   1.353 +        '{"foo": ["bar", "baz"]}'
   1.354 +        """
   1.355 +        # This is for extremely simple cases and benchmarks.
   1.356 +        if isinstance(o, basestring):
   1.357 +            if isinstance(o, str):
   1.358 +                _encoding = self.encoding
   1.359 +                if (_encoding is not None 
   1.360 +                        and not (_encoding == 'utf-8')):
   1.361 +                    o = o.decode(_encoding)
   1.362 +            if self.ensure_ascii:
   1.363 +                return encode_basestring_ascii(o)
   1.364 +            else:
   1.365 +                return encode_basestring(o)
   1.366 +        # This doesn't pass the iterator directly to ''.join() because the
   1.367 +        # exceptions aren't as detailed.  The list call should be roughly
   1.368 +        # equivalent to the PySequence_Fast that ''.join() would do.
   1.369 +        chunks = list(self.iterencode(o))
   1.370 +        return ''.join(chunks)
   1.371 +
   1.372 +    def iterencode(self, o):
   1.373 +        """
   1.374 +        Encode the given object and yield each string
   1.375 +        representation as available.
   1.376 +        
   1.377 +        For example::
   1.378 +            
   1.379 +            for chunk in JSONEncoder().iterencode(bigobject):
   1.380 +                mysocket.write(chunk)
   1.381 +        """
   1.382 +        if self.check_circular:
   1.383 +            markers = {}
   1.384 +        else:
   1.385 +            markers = None
   1.386 +        return self._iterencode(o, markers)
   1.387 +
   1.388 +__all__ = ['JSONEncoder']

mercurial