addon-sdk/source/python-lib/simplejson/encoder.py

Sat, 03 Jan 2015 20:18:00 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Sat, 03 Jan 2015 20:18:00 +0100
branch
TOR_BUG_3246
changeset 7
129ffea94266
permissions
-rw-r--r--

Conditionally enable double key logic according to:
private browsing mode or privacy.thirdparty.isolate preference and
implement in GetCookieStringCommon and FindCookie where it counts...
With some reservations of how to convince FindCookie users to test
condition and pass a nullptr when disabling double key logic.

michael@0 1 """
michael@0 2 Implementation of JSONEncoder
michael@0 3 """
michael@0 4 import re
michael@0 5
michael@0 6 try:
michael@0 7 from simplejson._speedups import encode_basestring_ascii as c_encode_basestring_ascii
michael@0 8 except ImportError:
michael@0 9 pass
michael@0 10
michael@0 11 ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
michael@0 12 ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
michael@0 13 HAS_UTF8 = re.compile(r'[\x80-\xff]')
michael@0 14 ESCAPE_DCT = {
michael@0 15 '\\': '\\\\',
michael@0 16 '"': '\\"',
michael@0 17 '\b': '\\b',
michael@0 18 '\f': '\\f',
michael@0 19 '\n': '\\n',
michael@0 20 '\r': '\\r',
michael@0 21 '\t': '\\t',
michael@0 22 }
michael@0 23 for i in range(0x20):
michael@0 24 ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
michael@0 25
michael@0 26 # Assume this produces an infinity on all machines (probably not guaranteed)
michael@0 27 INFINITY = float('1e66666')
michael@0 28 FLOAT_REPR = repr
michael@0 29
michael@0 30 def floatstr(o, allow_nan=True):
michael@0 31 # Check for specials. Note that this type of test is processor- and/or
michael@0 32 # platform-specific, so do tests which don't depend on the internals.
michael@0 33
michael@0 34 if o != o:
michael@0 35 text = 'NaN'
michael@0 36 elif o == INFINITY:
michael@0 37 text = 'Infinity'
michael@0 38 elif o == -INFINITY:
michael@0 39 text = '-Infinity'
michael@0 40 else:
michael@0 41 return FLOAT_REPR(o)
michael@0 42
michael@0 43 if not allow_nan:
michael@0 44 raise ValueError("Out of range float values are not JSON compliant: %r"
michael@0 45 % (o,))
michael@0 46
michael@0 47 return text
michael@0 48
michael@0 49
michael@0 50 def encode_basestring(s):
michael@0 51 """
michael@0 52 Return a JSON representation of a Python string
michael@0 53 """
michael@0 54 def replace(match):
michael@0 55 return ESCAPE_DCT[match.group(0)]
michael@0 56 return '"' + ESCAPE.sub(replace, s) + '"'
michael@0 57
michael@0 58
michael@0 59 def py_encode_basestring_ascii(s):
michael@0 60 if isinstance(s, str) and HAS_UTF8.search(s) is not None:
michael@0 61 s = s.decode('utf-8')
michael@0 62 def replace(match):
michael@0 63 s = match.group(0)
michael@0 64 try:
michael@0 65 return ESCAPE_DCT[s]
michael@0 66 except KeyError:
michael@0 67 n = ord(s)
michael@0 68 if n < 0x10000:
michael@0 69 return '\\u%04x' % (n,)
michael@0 70 else:
michael@0 71 # surrogate pair
michael@0 72 n -= 0x10000
michael@0 73 s1 = 0xd800 | ((n >> 10) & 0x3ff)
michael@0 74 s2 = 0xdc00 | (n & 0x3ff)
michael@0 75 return '\\u%04x\\u%04x' % (s1, s2)
michael@0 76 return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
michael@0 77
michael@0 78
michael@0 79 try:
michael@0 80 encode_basestring_ascii = c_encode_basestring_ascii
michael@0 81 except NameError:
michael@0 82 encode_basestring_ascii = py_encode_basestring_ascii
michael@0 83
michael@0 84
michael@0 85 class JSONEncoder(object):
michael@0 86 """
michael@0 87 Extensible JSON <http://json.org> encoder for Python data structures.
michael@0 88
michael@0 89 Supports the following objects and types by default:
michael@0 90
michael@0 91 +-------------------+---------------+
michael@0 92 | Python | JSON |
michael@0 93 +===================+===============+
michael@0 94 | dict | object |
michael@0 95 +-------------------+---------------+
michael@0 96 | list, tuple | array |
michael@0 97 +-------------------+---------------+
michael@0 98 | str, unicode | string |
michael@0 99 +-------------------+---------------+
michael@0 100 | int, long, float | number |
michael@0 101 +-------------------+---------------+
michael@0 102 | True | true |
michael@0 103 +-------------------+---------------+
michael@0 104 | False | false |
michael@0 105 +-------------------+---------------+
michael@0 106 | None | null |
michael@0 107 +-------------------+---------------+
michael@0 108
michael@0 109 To extend this to recognize other objects, subclass and implement a
michael@0 110 ``.default()`` method with another method that returns a serializable
michael@0 111 object for ``o`` if possible, otherwise it should call the superclass
michael@0 112 implementation (to raise ``TypeError``).
michael@0 113 """
michael@0 114 __all__ = ['__init__', 'default', 'encode', 'iterencode']
michael@0 115 item_separator = ', '
michael@0 116 key_separator = ': '
michael@0 117 def __init__(self, skipkeys=False, ensure_ascii=True,
michael@0 118 check_circular=True, allow_nan=True, sort_keys=False,
michael@0 119 indent=None, separators=None, encoding='utf-8', default=None):
michael@0 120 """
michael@0 121 Constructor for JSONEncoder, with sensible defaults.
michael@0 122
michael@0 123 If skipkeys is False, then it is a TypeError to attempt
michael@0 124 encoding of keys that are not str, int, long, float or None. If
michael@0 125 skipkeys is True, such items are simply skipped.
michael@0 126
michael@0 127 If ensure_ascii is True, the output is guaranteed to be str
michael@0 128 objects with all incoming unicode characters escaped. If
michael@0 129 ensure_ascii is false, the output will be unicode object.
michael@0 130
michael@0 131 If check_circular is True, then lists, dicts, and custom encoded
michael@0 132 objects will be checked for circular references during encoding to
michael@0 133 prevent an infinite recursion (which would cause an OverflowError).
michael@0 134 Otherwise, no such check takes place.
michael@0 135
michael@0 136 If allow_nan is True, then NaN, Infinity, and -Infinity will be
michael@0 137 encoded as such. This behavior is not JSON specification compliant,
michael@0 138 but is consistent with most JavaScript based encoders and decoders.
michael@0 139 Otherwise, it will be a ValueError to encode such floats.
michael@0 140
michael@0 141 If sort_keys is True, then the output of dictionaries will be
michael@0 142 sorted by key; this is useful for regression tests to ensure
michael@0 143 that JSON serializations can be compared on a day-to-day basis.
michael@0 144
michael@0 145 If indent is a non-negative integer, then JSON array
michael@0 146 elements and object members will be pretty-printed with that
michael@0 147 indent level. An indent level of 0 will only insert newlines.
michael@0 148 None is the most compact representation.
michael@0 149
michael@0 150 If specified, separators should be a (item_separator, key_separator)
michael@0 151 tuple. The default is (', ', ': '). To get the most compact JSON
michael@0 152 representation you should specify (',', ':') to eliminate whitespace.
michael@0 153
michael@0 154 If specified, default is a function that gets called for objects
michael@0 155 that can't otherwise be serialized. It should return a JSON encodable
michael@0 156 version of the object or raise a ``TypeError``.
michael@0 157
michael@0 158 If encoding is not None, then all input strings will be
michael@0 159 transformed into unicode using that encoding prior to JSON-encoding.
michael@0 160 The default is UTF-8.
michael@0 161 """
michael@0 162
michael@0 163 self.skipkeys = skipkeys
michael@0 164 self.ensure_ascii = ensure_ascii
michael@0 165 self.check_circular = check_circular
michael@0 166 self.allow_nan = allow_nan
michael@0 167 self.sort_keys = sort_keys
michael@0 168 self.indent = indent
michael@0 169 self.current_indent_level = 0
michael@0 170 if separators is not None:
michael@0 171 self.item_separator, self.key_separator = separators
michael@0 172 if default is not None:
michael@0 173 self.default = default
michael@0 174 self.encoding = encoding
michael@0 175
michael@0 176 def _newline_indent(self):
michael@0 177 return '\n' + (' ' * (self.indent * self.current_indent_level))
michael@0 178
michael@0 179 def _iterencode_list(self, lst, markers=None):
michael@0 180 if not lst:
michael@0 181 yield '[]'
michael@0 182 return
michael@0 183 if markers is not None:
michael@0 184 markerid = id(lst)
michael@0 185 if markerid in markers:
michael@0 186 raise ValueError("Circular reference detected")
michael@0 187 markers[markerid] = lst
michael@0 188 yield '['
michael@0 189 if self.indent is not None:
michael@0 190 self.current_indent_level += 1
michael@0 191 newline_indent = self._newline_indent()
michael@0 192 separator = self.item_separator + newline_indent
michael@0 193 yield newline_indent
michael@0 194 else:
michael@0 195 newline_indent = None
michael@0 196 separator = self.item_separator
michael@0 197 first = True
michael@0 198 for value in lst:
michael@0 199 if first:
michael@0 200 first = False
michael@0 201 else:
michael@0 202 yield separator
michael@0 203 for chunk in self._iterencode(value, markers):
michael@0 204 yield chunk
michael@0 205 if newline_indent is not None:
michael@0 206 self.current_indent_level -= 1
michael@0 207 yield self._newline_indent()
michael@0 208 yield ']'
michael@0 209 if markers is not None:
michael@0 210 del markers[markerid]
michael@0 211
michael@0 212 def _iterencode_dict(self, dct, markers=None):
michael@0 213 if not dct:
michael@0 214 yield '{}'
michael@0 215 return
michael@0 216 if markers is not None:
michael@0 217 markerid = id(dct)
michael@0 218 if markerid in markers:
michael@0 219 raise ValueError("Circular reference detected")
michael@0 220 markers[markerid] = dct
michael@0 221 yield '{'
michael@0 222 key_separator = self.key_separator
michael@0 223 if self.indent is not None:
michael@0 224 self.current_indent_level += 1
michael@0 225 newline_indent = self._newline_indent()
michael@0 226 item_separator = self.item_separator + newline_indent
michael@0 227 yield newline_indent
michael@0 228 else:
michael@0 229 newline_indent = None
michael@0 230 item_separator = self.item_separator
michael@0 231 first = True
michael@0 232 if self.ensure_ascii:
michael@0 233 encoder = encode_basestring_ascii
michael@0 234 else:
michael@0 235 encoder = encode_basestring
michael@0 236 allow_nan = self.allow_nan
michael@0 237 if self.sort_keys:
michael@0 238 keys = dct.keys()
michael@0 239 keys.sort()
michael@0 240 items = [(k, dct[k]) for k in keys]
michael@0 241 else:
michael@0 242 items = dct.iteritems()
michael@0 243 _encoding = self.encoding
michael@0 244 _do_decode = (_encoding is not None
michael@0 245 and not (_encoding == 'utf-8'))
michael@0 246 for key, value in items:
michael@0 247 if isinstance(key, str):
michael@0 248 if _do_decode:
michael@0 249 key = key.decode(_encoding)
michael@0 250 elif isinstance(key, basestring):
michael@0 251 pass
michael@0 252 # JavaScript is weakly typed for these, so it makes sense to
michael@0 253 # also allow them. Many encoders seem to do something like this.
michael@0 254 elif isinstance(key, float):
michael@0 255 key = floatstr(key, allow_nan)
michael@0 256 elif isinstance(key, (int, long)):
michael@0 257 key = str(key)
michael@0 258 elif key is True:
michael@0 259 key = 'true'
michael@0 260 elif key is False:
michael@0 261 key = 'false'
michael@0 262 elif key is None:
michael@0 263 key = 'null'
michael@0 264 elif self.skipkeys:
michael@0 265 continue
michael@0 266 else:
michael@0 267 raise TypeError("key %r is not a string" % (key,))
michael@0 268 if first:
michael@0 269 first = False
michael@0 270 else:
michael@0 271 yield item_separator
michael@0 272 yield encoder(key)
michael@0 273 yield key_separator
michael@0 274 for chunk in self._iterencode(value, markers):
michael@0 275 yield chunk
michael@0 276 if newline_indent is not None:
michael@0 277 self.current_indent_level -= 1
michael@0 278 yield self._newline_indent()
michael@0 279 yield '}'
michael@0 280 if markers is not None:
michael@0 281 del markers[markerid]
michael@0 282
michael@0 283 def _iterencode(self, o, markers=None):
michael@0 284 if isinstance(o, basestring):
michael@0 285 if self.ensure_ascii:
michael@0 286 encoder = encode_basestring_ascii
michael@0 287 else:
michael@0 288 encoder = encode_basestring
michael@0 289 _encoding = self.encoding
michael@0 290 if (_encoding is not None and isinstance(o, str)
michael@0 291 and not (_encoding == 'utf-8')):
michael@0 292 o = o.decode(_encoding)
michael@0 293 yield encoder(o)
michael@0 294 elif o is None:
michael@0 295 yield 'null'
michael@0 296 elif o is True:
michael@0 297 yield 'true'
michael@0 298 elif o is False:
michael@0 299 yield 'false'
michael@0 300 elif isinstance(o, (int, long)):
michael@0 301 yield str(o)
michael@0 302 elif isinstance(o, float):
michael@0 303 yield floatstr(o, self.allow_nan)
michael@0 304 elif isinstance(o, (list, tuple)):
michael@0 305 for chunk in self._iterencode_list(o, markers):
michael@0 306 yield chunk
michael@0 307 elif isinstance(o, dict):
michael@0 308 for chunk in self._iterencode_dict(o, markers):
michael@0 309 yield chunk
michael@0 310 else:
michael@0 311 if markers is not None:
michael@0 312 markerid = id(o)
michael@0 313 if markerid in markers:
michael@0 314 raise ValueError("Circular reference detected")
michael@0 315 markers[markerid] = o
michael@0 316 for chunk in self._iterencode_default(o, markers):
michael@0 317 yield chunk
michael@0 318 if markers is not None:
michael@0 319 del markers[markerid]
michael@0 320
michael@0 321 def _iterencode_default(self, o, markers=None):
michael@0 322 newobj = self.default(o)
michael@0 323 return self._iterencode(newobj, markers)
michael@0 324
michael@0 325 def default(self, o):
michael@0 326 """
michael@0 327 Implement this method in a subclass such that it returns
michael@0 328 a serializable object for ``o``, or calls the base implementation
michael@0 329 (to raise a ``TypeError``).
michael@0 330
michael@0 331 For example, to support arbitrary iterators, you could
michael@0 332 implement default like this::
michael@0 333
michael@0 334 def default(self, o):
michael@0 335 try:
michael@0 336 iterable = iter(o)
michael@0 337 except TypeError:
michael@0 338 pass
michael@0 339 else:
michael@0 340 return list(iterable)
michael@0 341 return JSONEncoder.default(self, o)
michael@0 342 """
michael@0 343 raise TypeError("%r is not JSON serializable" % (o,))
michael@0 344
michael@0 345 def encode(self, o):
michael@0 346 """
michael@0 347 Return a JSON string representation of a Python data structure.
michael@0 348
michael@0 349 >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
michael@0 350 '{"foo": ["bar", "baz"]}'
michael@0 351 """
michael@0 352 # This is for extremely simple cases and benchmarks.
michael@0 353 if isinstance(o, basestring):
michael@0 354 if isinstance(o, str):
michael@0 355 _encoding = self.encoding
michael@0 356 if (_encoding is not None
michael@0 357 and not (_encoding == 'utf-8')):
michael@0 358 o = o.decode(_encoding)
michael@0 359 if self.ensure_ascii:
michael@0 360 return encode_basestring_ascii(o)
michael@0 361 else:
michael@0 362 return encode_basestring(o)
michael@0 363 # This doesn't pass the iterator directly to ''.join() because the
michael@0 364 # exceptions aren't as detailed. The list call should be roughly
michael@0 365 # equivalent to the PySequence_Fast that ''.join() would do.
michael@0 366 chunks = list(self.iterencode(o))
michael@0 367 return ''.join(chunks)
michael@0 368
michael@0 369 def iterencode(self, o):
michael@0 370 """
michael@0 371 Encode the given object and yield each string
michael@0 372 representation as available.
michael@0 373
michael@0 374 For example::
michael@0 375
michael@0 376 for chunk in JSONEncoder().iterencode(bigobject):
michael@0 377 mysocket.write(chunk)
michael@0 378 """
michael@0 379 if self.check_circular:
michael@0 380 markers = {}
michael@0 381 else:
michael@0 382 markers = None
michael@0 383 return self._iterencode(o, markers)
michael@0 384
michael@0 385 __all__ = ['JSONEncoder']

mercurial