1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/addon-sdk/source/python-lib/simplejson/encoder.py Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,385 @@ 1.4 +""" 1.5 +Implementation of JSONEncoder 1.6 +""" 1.7 +import re 1.8 + 1.9 +try: 1.10 + from simplejson._speedups import encode_basestring_ascii as c_encode_basestring_ascii 1.11 +except ImportError: 1.12 + pass 1.13 + 1.14 +ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]') 1.15 +ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') 1.16 +HAS_UTF8 = re.compile(r'[\x80-\xff]') 1.17 +ESCAPE_DCT = { 1.18 + '\\': '\\\\', 1.19 + '"': '\\"', 1.20 + '\b': '\\b', 1.21 + '\f': '\\f', 1.22 + '\n': '\\n', 1.23 + '\r': '\\r', 1.24 + '\t': '\\t', 1.25 +} 1.26 +for i in range(0x20): 1.27 + ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,)) 1.28 + 1.29 +# Assume this produces an infinity on all machines (probably not guaranteed) 1.30 +INFINITY = float('1e66666') 1.31 +FLOAT_REPR = repr 1.32 + 1.33 +def floatstr(o, allow_nan=True): 1.34 + # Check for specials. Note that this type of test is processor- and/or 1.35 + # platform-specific, so do tests which don't depend on the internals. 1.36 + 1.37 + if o != o: 1.38 + text = 'NaN' 1.39 + elif o == INFINITY: 1.40 + text = 'Infinity' 1.41 + elif o == -INFINITY: 1.42 + text = '-Infinity' 1.43 + else: 1.44 + return FLOAT_REPR(o) 1.45 + 1.46 + if not allow_nan: 1.47 + raise ValueError("Out of range float values are not JSON compliant: %r" 1.48 + % (o,)) 1.49 + 1.50 + return text 1.51 + 1.52 + 1.53 +def encode_basestring(s): 1.54 + """ 1.55 + Return a JSON representation of a Python string 1.56 + """ 1.57 + def replace(match): 1.58 + return ESCAPE_DCT[match.group(0)] 1.59 + return '"' + ESCAPE.sub(replace, s) + '"' 1.60 + 1.61 + 1.62 +def py_encode_basestring_ascii(s): 1.63 + if isinstance(s, str) and HAS_UTF8.search(s) is not None: 1.64 + s = s.decode('utf-8') 1.65 + def replace(match): 1.66 + s = match.group(0) 1.67 + try: 1.68 + return ESCAPE_DCT[s] 1.69 + except KeyError: 1.70 + n = ord(s) 1.71 + if n < 0x10000: 1.72 + return '\\u%04x' % (n,) 1.73 + else: 1.74 + # surrogate pair 1.75 + n -= 0x10000 1.76 + s1 = 0xd800 | ((n >> 10) & 0x3ff) 1.77 + s2 = 0xdc00 | (n & 0x3ff) 1.78 + return '\\u%04x\\u%04x' % (s1, s2) 1.79 + return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"' 1.80 + 1.81 + 1.82 +try: 1.83 + encode_basestring_ascii = c_encode_basestring_ascii 1.84 +except NameError: 1.85 + encode_basestring_ascii = py_encode_basestring_ascii 1.86 + 1.87 + 1.88 +class JSONEncoder(object): 1.89 + """ 1.90 + Extensible JSON <http://json.org> encoder for Python data structures. 1.91 + 1.92 + Supports the following objects and types by default: 1.93 + 1.94 + +-------------------+---------------+ 1.95 + | Python | JSON | 1.96 + +===================+===============+ 1.97 + | dict | object | 1.98 + +-------------------+---------------+ 1.99 + | list, tuple | array | 1.100 + +-------------------+---------------+ 1.101 + | str, unicode | string | 1.102 + +-------------------+---------------+ 1.103 + | int, long, float | number | 1.104 + +-------------------+---------------+ 1.105 + | True | true | 1.106 + +-------------------+---------------+ 1.107 + | False | false | 1.108 + +-------------------+---------------+ 1.109 + | None | null | 1.110 + +-------------------+---------------+ 1.111 + 1.112 + To extend this to recognize other objects, subclass and implement a 1.113 + ``.default()`` method with another method that returns a serializable 1.114 + object for ``o`` if possible, otherwise it should call the superclass 1.115 + implementation (to raise ``TypeError``). 1.116 + """ 1.117 + __all__ = ['__init__', 'default', 'encode', 'iterencode'] 1.118 + item_separator = ', ' 1.119 + key_separator = ': ' 1.120 + def __init__(self, skipkeys=False, ensure_ascii=True, 1.121 + check_circular=True, allow_nan=True, sort_keys=False, 1.122 + indent=None, separators=None, encoding='utf-8', default=None): 1.123 + """ 1.124 + Constructor for JSONEncoder, with sensible defaults. 1.125 + 1.126 + If skipkeys is False, then it is a TypeError to attempt 1.127 + encoding of keys that are not str, int, long, float or None. If 1.128 + skipkeys is True, such items are simply skipped. 1.129 + 1.130 + If ensure_ascii is True, the output is guaranteed to be str 1.131 + objects with all incoming unicode characters escaped. If 1.132 + ensure_ascii is false, the output will be unicode object. 1.133 + 1.134 + If check_circular is True, then lists, dicts, and custom encoded 1.135 + objects will be checked for circular references during encoding to 1.136 + prevent an infinite recursion (which would cause an OverflowError). 1.137 + Otherwise, no such check takes place. 1.138 + 1.139 + If allow_nan is True, then NaN, Infinity, and -Infinity will be 1.140 + encoded as such. This behavior is not JSON specification compliant, 1.141 + but is consistent with most JavaScript based encoders and decoders. 1.142 + Otherwise, it will be a ValueError to encode such floats. 1.143 + 1.144 + If sort_keys is True, then the output of dictionaries will be 1.145 + sorted by key; this is useful for regression tests to ensure 1.146 + that JSON serializations can be compared on a day-to-day basis. 1.147 + 1.148 + If indent is a non-negative integer, then JSON array 1.149 + elements and object members will be pretty-printed with that 1.150 + indent level. An indent level of 0 will only insert newlines. 1.151 + None is the most compact representation. 1.152 + 1.153 + If specified, separators should be a (item_separator, key_separator) 1.154 + tuple. The default is (', ', ': '). To get the most compact JSON 1.155 + representation you should specify (',', ':') to eliminate whitespace. 1.156 + 1.157 + If specified, default is a function that gets called for objects 1.158 + that can't otherwise be serialized. It should return a JSON encodable 1.159 + version of the object or raise a ``TypeError``. 1.160 + 1.161 + If encoding is not None, then all input strings will be 1.162 + transformed into unicode using that encoding prior to JSON-encoding. 1.163 + The default is UTF-8. 1.164 + """ 1.165 + 1.166 + self.skipkeys = skipkeys 1.167 + self.ensure_ascii = ensure_ascii 1.168 + self.check_circular = check_circular 1.169 + self.allow_nan = allow_nan 1.170 + self.sort_keys = sort_keys 1.171 + self.indent = indent 1.172 + self.current_indent_level = 0 1.173 + if separators is not None: 1.174 + self.item_separator, self.key_separator = separators 1.175 + if default is not None: 1.176 + self.default = default 1.177 + self.encoding = encoding 1.178 + 1.179 + def _newline_indent(self): 1.180 + return '\n' + (' ' * (self.indent * self.current_indent_level)) 1.181 + 1.182 + def _iterencode_list(self, lst, markers=None): 1.183 + if not lst: 1.184 + yield '[]' 1.185 + return 1.186 + if markers is not None: 1.187 + markerid = id(lst) 1.188 + if markerid in markers: 1.189 + raise ValueError("Circular reference detected") 1.190 + markers[markerid] = lst 1.191 + yield '[' 1.192 + if self.indent is not None: 1.193 + self.current_indent_level += 1 1.194 + newline_indent = self._newline_indent() 1.195 + separator = self.item_separator + newline_indent 1.196 + yield newline_indent 1.197 + else: 1.198 + newline_indent = None 1.199 + separator = self.item_separator 1.200 + first = True 1.201 + for value in lst: 1.202 + if first: 1.203 + first = False 1.204 + else: 1.205 + yield separator 1.206 + for chunk in self._iterencode(value, markers): 1.207 + yield chunk 1.208 + if newline_indent is not None: 1.209 + self.current_indent_level -= 1 1.210 + yield self._newline_indent() 1.211 + yield ']' 1.212 + if markers is not None: 1.213 + del markers[markerid] 1.214 + 1.215 + def _iterencode_dict(self, dct, markers=None): 1.216 + if not dct: 1.217 + yield '{}' 1.218 + return 1.219 + if markers is not None: 1.220 + markerid = id(dct) 1.221 + if markerid in markers: 1.222 + raise ValueError("Circular reference detected") 1.223 + markers[markerid] = dct 1.224 + yield '{' 1.225 + key_separator = self.key_separator 1.226 + if self.indent is not None: 1.227 + self.current_indent_level += 1 1.228 + newline_indent = self._newline_indent() 1.229 + item_separator = self.item_separator + newline_indent 1.230 + yield newline_indent 1.231 + else: 1.232 + newline_indent = None 1.233 + item_separator = self.item_separator 1.234 + first = True 1.235 + if self.ensure_ascii: 1.236 + encoder = encode_basestring_ascii 1.237 + else: 1.238 + encoder = encode_basestring 1.239 + allow_nan = self.allow_nan 1.240 + if self.sort_keys: 1.241 + keys = dct.keys() 1.242 + keys.sort() 1.243 + items = [(k, dct[k]) for k in keys] 1.244 + else: 1.245 + items = dct.iteritems() 1.246 + _encoding = self.encoding 1.247 + _do_decode = (_encoding is not None 1.248 + and not (_encoding == 'utf-8')) 1.249 + for key, value in items: 1.250 + if isinstance(key, str): 1.251 + if _do_decode: 1.252 + key = key.decode(_encoding) 1.253 + elif isinstance(key, basestring): 1.254 + pass 1.255 + # JavaScript is weakly typed for these, so it makes sense to 1.256 + # also allow them. Many encoders seem to do something like this. 1.257 + elif isinstance(key, float): 1.258 + key = floatstr(key, allow_nan) 1.259 + elif isinstance(key, (int, long)): 1.260 + key = str(key) 1.261 + elif key is True: 1.262 + key = 'true' 1.263 + elif key is False: 1.264 + key = 'false' 1.265 + elif key is None: 1.266 + key = 'null' 1.267 + elif self.skipkeys: 1.268 + continue 1.269 + else: 1.270 + raise TypeError("key %r is not a string" % (key,)) 1.271 + if first: 1.272 + first = False 1.273 + else: 1.274 + yield item_separator 1.275 + yield encoder(key) 1.276 + yield key_separator 1.277 + for chunk in self._iterencode(value, markers): 1.278 + yield chunk 1.279 + if newline_indent is not None: 1.280 + self.current_indent_level -= 1 1.281 + yield self._newline_indent() 1.282 + yield '}' 1.283 + if markers is not None: 1.284 + del markers[markerid] 1.285 + 1.286 + def _iterencode(self, o, markers=None): 1.287 + if isinstance(o, basestring): 1.288 + if self.ensure_ascii: 1.289 + encoder = encode_basestring_ascii 1.290 + else: 1.291 + encoder = encode_basestring 1.292 + _encoding = self.encoding 1.293 + if (_encoding is not None and isinstance(o, str) 1.294 + and not (_encoding == 'utf-8')): 1.295 + o = o.decode(_encoding) 1.296 + yield encoder(o) 1.297 + elif o is None: 1.298 + yield 'null' 1.299 + elif o is True: 1.300 + yield 'true' 1.301 + elif o is False: 1.302 + yield 'false' 1.303 + elif isinstance(o, (int, long)): 1.304 + yield str(o) 1.305 + elif isinstance(o, float): 1.306 + yield floatstr(o, self.allow_nan) 1.307 + elif isinstance(o, (list, tuple)): 1.308 + for chunk in self._iterencode_list(o, markers): 1.309 + yield chunk 1.310 + elif isinstance(o, dict): 1.311 + for chunk in self._iterencode_dict(o, markers): 1.312 + yield chunk 1.313 + else: 1.314 + if markers is not None: 1.315 + markerid = id(o) 1.316 + if markerid in markers: 1.317 + raise ValueError("Circular reference detected") 1.318 + markers[markerid] = o 1.319 + for chunk in self._iterencode_default(o, markers): 1.320 + yield chunk 1.321 + if markers is not None: 1.322 + del markers[markerid] 1.323 + 1.324 + def _iterencode_default(self, o, markers=None): 1.325 + newobj = self.default(o) 1.326 + return self._iterencode(newobj, markers) 1.327 + 1.328 + def default(self, o): 1.329 + """ 1.330 + Implement this method in a subclass such that it returns 1.331 + a serializable object for ``o``, or calls the base implementation 1.332 + (to raise a ``TypeError``). 1.333 + 1.334 + For example, to support arbitrary iterators, you could 1.335 + implement default like this:: 1.336 + 1.337 + def default(self, o): 1.338 + try: 1.339 + iterable = iter(o) 1.340 + except TypeError: 1.341 + pass 1.342 + else: 1.343 + return list(iterable) 1.344 + return JSONEncoder.default(self, o) 1.345 + """ 1.346 + raise TypeError("%r is not JSON serializable" % (o,)) 1.347 + 1.348 + def encode(self, o): 1.349 + """ 1.350 + Return a JSON string representation of a Python data structure. 1.351 + 1.352 + >>> JSONEncoder().encode({"foo": ["bar", "baz"]}) 1.353 + '{"foo": ["bar", "baz"]}' 1.354 + """ 1.355 + # This is for extremely simple cases and benchmarks. 1.356 + if isinstance(o, basestring): 1.357 + if isinstance(o, str): 1.358 + _encoding = self.encoding 1.359 + if (_encoding is not None 1.360 + and not (_encoding == 'utf-8')): 1.361 + o = o.decode(_encoding) 1.362 + if self.ensure_ascii: 1.363 + return encode_basestring_ascii(o) 1.364 + else: 1.365 + return encode_basestring(o) 1.366 + # This doesn't pass the iterator directly to ''.join() because the 1.367 + # exceptions aren't as detailed. The list call should be roughly 1.368 + # equivalent to the PySequence_Fast that ''.join() would do. 1.369 + chunks = list(self.iterencode(o)) 1.370 + return ''.join(chunks) 1.371 + 1.372 + def iterencode(self, o): 1.373 + """ 1.374 + Encode the given object and yield each string 1.375 + representation as available. 1.376 + 1.377 + For example:: 1.378 + 1.379 + for chunk in JSONEncoder().iterencode(bigobject): 1.380 + mysocket.write(chunk) 1.381 + """ 1.382 + if self.check_circular: 1.383 + markers = {} 1.384 + else: 1.385 + markers = None 1.386 + return self._iterencode(o, markers) 1.387 + 1.388 +__all__ = ['JSONEncoder']