addon-sdk/source/python-lib/simplejson/encoder.py

Sat, 03 Jan 2015 20:18:00 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Sat, 03 Jan 2015 20:18:00 +0100
branch
TOR_BUG_3246
changeset 7
129ffea94266
permissions
-rw-r--r--

Conditionally enable double key logic according to:
private browsing mode or privacy.thirdparty.isolate preference and
implement in GetCookieStringCommon and FindCookie where it counts...
With some reservations of how to convince FindCookie users to test
condition and pass a nullptr when disabling double key logic.

     1 """
     2 Implementation of JSONEncoder
     3 """
     4 import re
     6 try:
     7     from simplejson._speedups import encode_basestring_ascii as c_encode_basestring_ascii
     8 except ImportError:
     9     pass
    11 ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
    12 ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
    13 HAS_UTF8 = re.compile(r'[\x80-\xff]')
    14 ESCAPE_DCT = {
    15     '\\': '\\\\',
    16     '"': '\\"',
    17     '\b': '\\b',
    18     '\f': '\\f',
    19     '\n': '\\n',
    20     '\r': '\\r',
    21     '\t': '\\t',
    22 }
    23 for i in range(0x20):
    24     ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
    26 # Assume this produces an infinity on all machines (probably not guaranteed)
    27 INFINITY = float('1e66666')
    28 FLOAT_REPR = repr
    30 def floatstr(o, allow_nan=True):
    31     # Check for specials.  Note that this type of test is processor- and/or
    32     # platform-specific, so do tests which don't depend on the internals.
    34     if o != o:
    35         text = 'NaN'
    36     elif o == INFINITY:
    37         text = 'Infinity'
    38     elif o == -INFINITY:
    39         text = '-Infinity'
    40     else:
    41         return FLOAT_REPR(o)
    43     if not allow_nan:
    44         raise ValueError("Out of range float values are not JSON compliant: %r"
    45             % (o,))
    47     return text
    50 def encode_basestring(s):
    51     """
    52     Return a JSON representation of a Python string
    53     """
    54     def replace(match):
    55         return ESCAPE_DCT[match.group(0)]
    56     return '"' + ESCAPE.sub(replace, s) + '"'
    59 def py_encode_basestring_ascii(s):
    60     if isinstance(s, str) and HAS_UTF8.search(s) is not None:
    61         s = s.decode('utf-8')
    62     def replace(match):
    63         s = match.group(0)
    64         try:
    65             return ESCAPE_DCT[s]
    66         except KeyError:
    67             n = ord(s)
    68             if n < 0x10000:
    69                 return '\\u%04x' % (n,)
    70             else:
    71                 # surrogate pair
    72                 n -= 0x10000
    73                 s1 = 0xd800 | ((n >> 10) & 0x3ff)
    74                 s2 = 0xdc00 | (n & 0x3ff)
    75                 return '\\u%04x\\u%04x' % (s1, s2)
    76     return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
    79 try:
    80     encode_basestring_ascii = c_encode_basestring_ascii
    81 except NameError:
    82     encode_basestring_ascii = py_encode_basestring_ascii
    85 class JSONEncoder(object):
    86     """
    87     Extensible JSON <http://json.org> encoder for Python data structures.
    89     Supports the following objects and types by default:
    91     +-------------------+---------------+
    92     | Python            | JSON          |
    93     +===================+===============+
    94     | dict              | object        |
    95     +-------------------+---------------+
    96     | list, tuple       | array         |
    97     +-------------------+---------------+
    98     | str, unicode      | string        |
    99     +-------------------+---------------+
   100     | int, long, float  | number        |
   101     +-------------------+---------------+
   102     | True              | true          |
   103     +-------------------+---------------+
   104     | False             | false         |
   105     +-------------------+---------------+
   106     | None              | null          |
   107     +-------------------+---------------+
   109     To extend this to recognize other objects, subclass and implement a
   110     ``.default()`` method with another method that returns a serializable
   111     object for ``o`` if possible, otherwise it should call the superclass
   112     implementation (to raise ``TypeError``).
   113     """
   114     __all__ = ['__init__', 'default', 'encode', 'iterencode']
   115     item_separator = ', '
   116     key_separator = ': '
   117     def __init__(self, skipkeys=False, ensure_ascii=True,
   118             check_circular=True, allow_nan=True, sort_keys=False,
   119             indent=None, separators=None, encoding='utf-8', default=None):
   120         """
   121         Constructor for JSONEncoder, with sensible defaults.
   123         If skipkeys is False, then it is a TypeError to attempt
   124         encoding of keys that are not str, int, long, float or None.  If
   125         skipkeys is True, such items are simply skipped.
   127         If ensure_ascii is True, the output is guaranteed to be str
   128         objects with all incoming unicode characters escaped.  If
   129         ensure_ascii is false, the output will be unicode object.
   131         If check_circular is True, then lists, dicts, and custom encoded
   132         objects will be checked for circular references during encoding to
   133         prevent an infinite recursion (which would cause an OverflowError).
   134         Otherwise, no such check takes place.
   136         If allow_nan is True, then NaN, Infinity, and -Infinity will be
   137         encoded as such.  This behavior is not JSON specification compliant,
   138         but is consistent with most JavaScript based encoders and decoders.
   139         Otherwise, it will be a ValueError to encode such floats.
   141         If sort_keys is True, then the output of dictionaries will be
   142         sorted by key; this is useful for regression tests to ensure
   143         that JSON serializations can be compared on a day-to-day basis.
   145         If indent is a non-negative integer, then JSON array
   146         elements and object members will be pretty-printed with that
   147         indent level.  An indent level of 0 will only insert newlines.
   148         None is the most compact representation.
   150         If specified, separators should be a (item_separator, key_separator)
   151         tuple.  The default is (', ', ': ').  To get the most compact JSON
   152         representation you should specify (',', ':') to eliminate whitespace.
   154         If specified, default is a function that gets called for objects
   155         that can't otherwise be serialized.  It should return a JSON encodable
   156         version of the object or raise a ``TypeError``.
   158         If encoding is not None, then all input strings will be
   159         transformed into unicode using that encoding prior to JSON-encoding.
   160         The default is UTF-8.
   161         """
   163         self.skipkeys = skipkeys
   164         self.ensure_ascii = ensure_ascii
   165         self.check_circular = check_circular
   166         self.allow_nan = allow_nan
   167         self.sort_keys = sort_keys
   168         self.indent = indent
   169         self.current_indent_level = 0
   170         if separators is not None:
   171             self.item_separator, self.key_separator = separators
   172         if default is not None:
   173             self.default = default
   174         self.encoding = encoding
   176     def _newline_indent(self):
   177         return '\n' + (' ' * (self.indent * self.current_indent_level))
   179     def _iterencode_list(self, lst, markers=None):
   180         if not lst:
   181             yield '[]'
   182             return
   183         if markers is not None:
   184             markerid = id(lst)
   185             if markerid in markers:
   186                 raise ValueError("Circular reference detected")
   187             markers[markerid] = lst
   188         yield '['
   189         if self.indent is not None:
   190             self.current_indent_level += 1
   191             newline_indent = self._newline_indent()
   192             separator = self.item_separator + newline_indent
   193             yield newline_indent
   194         else:
   195             newline_indent = None
   196             separator = self.item_separator
   197         first = True
   198         for value in lst:
   199             if first:
   200                 first = False
   201             else:
   202                 yield separator
   203             for chunk in self._iterencode(value, markers):
   204                 yield chunk
   205         if newline_indent is not None:
   206             self.current_indent_level -= 1
   207             yield self._newline_indent()
   208         yield ']'
   209         if markers is not None:
   210             del markers[markerid]
   212     def _iterencode_dict(self, dct, markers=None):
   213         if not dct:
   214             yield '{}'
   215             return
   216         if markers is not None:
   217             markerid = id(dct)
   218             if markerid in markers:
   219                 raise ValueError("Circular reference detected")
   220             markers[markerid] = dct
   221         yield '{'
   222         key_separator = self.key_separator
   223         if self.indent is not None:
   224             self.current_indent_level += 1
   225             newline_indent = self._newline_indent()
   226             item_separator = self.item_separator + newline_indent
   227             yield newline_indent
   228         else:
   229             newline_indent = None
   230             item_separator = self.item_separator
   231         first = True
   232         if self.ensure_ascii:
   233             encoder = encode_basestring_ascii
   234         else:
   235             encoder = encode_basestring
   236         allow_nan = self.allow_nan
   237         if self.sort_keys:
   238             keys = dct.keys()
   239             keys.sort()
   240             items = [(k, dct[k]) for k in keys]
   241         else:
   242             items = dct.iteritems()
   243         _encoding = self.encoding
   244         _do_decode = (_encoding is not None
   245             and not (_encoding == 'utf-8'))
   246         for key, value in items:
   247             if isinstance(key, str):
   248                 if _do_decode:
   249                     key = key.decode(_encoding)
   250             elif isinstance(key, basestring):
   251                 pass
   252             # JavaScript is weakly typed for these, so it makes sense to
   253             # also allow them.  Many encoders seem to do something like this.
   254             elif isinstance(key, float):
   255                 key = floatstr(key, allow_nan)
   256             elif isinstance(key, (int, long)):
   257                 key = str(key)
   258             elif key is True:
   259                 key = 'true'
   260             elif key is False:
   261                 key = 'false'
   262             elif key is None:
   263                 key = 'null'
   264             elif self.skipkeys:
   265                 continue
   266             else:
   267                 raise TypeError("key %r is not a string" % (key,))
   268             if first:
   269                 first = False
   270             else:
   271                 yield item_separator
   272             yield encoder(key)
   273             yield key_separator
   274             for chunk in self._iterencode(value, markers):
   275                 yield chunk
   276         if newline_indent is not None:
   277             self.current_indent_level -= 1
   278             yield self._newline_indent()
   279         yield '}'
   280         if markers is not None:
   281             del markers[markerid]
   283     def _iterencode(self, o, markers=None):
   284         if isinstance(o, basestring):
   285             if self.ensure_ascii:
   286                 encoder = encode_basestring_ascii
   287             else:
   288                 encoder = encode_basestring
   289             _encoding = self.encoding
   290             if (_encoding is not None and isinstance(o, str)
   291                     and not (_encoding == 'utf-8')):
   292                 o = o.decode(_encoding)
   293             yield encoder(o)
   294         elif o is None:
   295             yield 'null'
   296         elif o is True:
   297             yield 'true'
   298         elif o is False:
   299             yield 'false'
   300         elif isinstance(o, (int, long)):
   301             yield str(o)
   302         elif isinstance(o, float):
   303             yield floatstr(o, self.allow_nan)
   304         elif isinstance(o, (list, tuple)):
   305             for chunk in self._iterencode_list(o, markers):
   306                 yield chunk
   307         elif isinstance(o, dict):
   308             for chunk in self._iterencode_dict(o, markers):
   309                 yield chunk
   310         else:
   311             if markers is not None:
   312                 markerid = id(o)
   313                 if markerid in markers:
   314                     raise ValueError("Circular reference detected")
   315                 markers[markerid] = o
   316             for chunk in self._iterencode_default(o, markers):
   317                 yield chunk
   318             if markers is not None:
   319                 del markers[markerid]
   321     def _iterencode_default(self, o, markers=None):
   322         newobj = self.default(o)
   323         return self._iterencode(newobj, markers)
   325     def default(self, o):
   326         """
   327         Implement this method in a subclass such that it returns
   328         a serializable object for ``o``, or calls the base implementation
   329         (to raise a ``TypeError``).
   331         For example, to support arbitrary iterators, you could
   332         implement default like this::
   334             def default(self, o):
   335                 try:
   336                     iterable = iter(o)
   337                 except TypeError:
   338                     pass
   339                 else:
   340                     return list(iterable)
   341                 return JSONEncoder.default(self, o)
   342         """
   343         raise TypeError("%r is not JSON serializable" % (o,))
   345     def encode(self, o):
   346         """
   347         Return a JSON string representation of a Python data structure.
   349         >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
   350         '{"foo": ["bar", "baz"]}'
   351         """
   352         # This is for extremely simple cases and benchmarks.
   353         if isinstance(o, basestring):
   354             if isinstance(o, str):
   355                 _encoding = self.encoding
   356                 if (_encoding is not None 
   357                         and not (_encoding == 'utf-8')):
   358                     o = o.decode(_encoding)
   359             if self.ensure_ascii:
   360                 return encode_basestring_ascii(o)
   361             else:
   362                 return encode_basestring(o)
   363         # This doesn't pass the iterator directly to ''.join() because the
   364         # exceptions aren't as detailed.  The list call should be roughly
   365         # equivalent to the PySequence_Fast that ''.join() would do.
   366         chunks = list(self.iterencode(o))
   367         return ''.join(chunks)
   369     def iterencode(self, o):
   370         """
   371         Encode the given object and yield each string
   372         representation as available.
   374         For example::
   376             for chunk in JSONEncoder().iterencode(bigobject):
   377                 mysocket.write(chunk)
   378         """
   379         if self.check_circular:
   380             markers = {}
   381         else:
   382             markers = None
   383         return self._iterencode(o, markers)
   385 __all__ = ['JSONEncoder']

mercurial