Imported Upstream version 3.5.3+dfsg
Felipe Sateler
11 years ago
0 | simplejson 2.3.2 | |
1 | ||
2 | Copyright (c) 2006 Bob Ippolito | |
3 | ||
4 | Permission is hereby granted, free of charge, to any person obtaining a copy of | |
5 | this software and associated documentation files (the "Software"), to deal in | |
6 | the Software without restriction, including without limitation the rights to | |
7 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies | |
8 | of the Software, and to permit persons to whom the Software is furnished to do | |
9 | so, subject to the following conditions: | |
10 | ||
11 | The above copyright notice and this permission notice shall be included in all | |
12 | copies or substantial portions of the Software. | |
13 | ||
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
20 | SOFTWARE. |
0 | r"""JSON (JavaScript Object Notation) <http://json.org> is a subset of | |
1 | JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data | |
2 | interchange format. | |
3 | ||
4 | :mod:`simplejson` exposes an API familiar to users of the standard library | |
5 | :mod:`marshal` and :mod:`pickle` modules. It is the externally maintained | |
6 | version of the :mod:`json` library contained in Python 2.6, but maintains | |
7 | compatibility with Python 2.4 and Python 2.5 and (currently) has | |
8 | significant performance advantages, even without using the optional C | |
9 | extension for speedups. | |
10 | ||
11 | Encoding basic Python object hierarchies:: | |
12 | ||
13 | >>> import simplejson as json | |
14 | >>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}]) | |
15 | '["foo", {"bar": ["baz", null, 1.0, 2]}]' | |
16 | >>> print json.dumps("\"foo\bar") | |
17 | "\"foo\bar" | |
18 | >>> print json.dumps(u'\u1234') | |
19 | "\u1234" | |
20 | >>> print json.dumps('\\') | |
21 | "\\" | |
22 | >>> print json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True) | |
23 | {"a": 0, "b": 0, "c": 0} | |
24 | >>> from StringIO import StringIO | |
25 | >>> io = StringIO() | |
26 | >>> json.dump(['streaming API'], io) | |
27 | >>> io.getvalue() | |
28 | '["streaming API"]' | |
29 | ||
30 | Compact encoding:: | |
31 | ||
32 | >>> import simplejson as json | |
33 | >>> json.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',',':')) | |
34 | '[1,2,3,{"4":5,"6":7}]' | |
35 | ||
36 | Pretty printing:: | |
37 | ||
38 | >>> import simplejson as json | |
39 | >>> s = json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=' ') | |
40 | >>> print '\n'.join([l.rstrip() for l in s.splitlines()]) | |
41 | { | |
42 | "4": 5, | |
43 | "6": 7 | |
44 | } | |
45 | ||
46 | Decoding JSON:: | |
47 | ||
48 | >>> import simplejson as json | |
49 | >>> obj = [u'foo', {u'bar': [u'baz', None, 1.0, 2]}] | |
50 | >>> json.loads('["foo", {"bar":["baz", null, 1.0, 2]}]') == obj | |
51 | True | |
52 | >>> json.loads('"\\"foo\\bar"') == u'"foo\x08ar' | |
53 | True | |
54 | >>> from StringIO import StringIO | |
55 | >>> io = StringIO('["streaming API"]') | |
56 | >>> json.load(io)[0] == 'streaming API' | |
57 | True | |
58 | ||
59 | Specializing JSON object decoding:: | |
60 | ||
61 | >>> import simplejson as json | |
62 | >>> def as_complex(dct): | |
63 | ... if '__complex__' in dct: | |
64 | ... return complex(dct['real'], dct['imag']) | |
65 | ... return dct | |
66 | ... | |
67 | >>> json.loads('{"__complex__": true, "real": 1, "imag": 2}', | |
68 | ... object_hook=as_complex) | |
69 | (1+2j) | |
70 | >>> from decimal import Decimal | |
71 | >>> json.loads('1.1', parse_float=Decimal) == Decimal('1.1') | |
72 | True | |
73 | ||
74 | Specializing JSON object encoding:: | |
75 | ||
76 | >>> import simplejson as json | |
77 | >>> def encode_complex(obj): | |
78 | ... if isinstance(obj, complex): | |
79 | ... return [obj.real, obj.imag] | |
80 | ... raise TypeError(repr(o) + " is not JSON serializable") | |
81 | ... | |
82 | >>> json.dumps(2 + 1j, default=encode_complex) | |
83 | '[2.0, 1.0]' | |
84 | >>> json.JSONEncoder(default=encode_complex).encode(2 + 1j) | |
85 | '[2.0, 1.0]' | |
86 | >>> ''.join(json.JSONEncoder(default=encode_complex).iterencode(2 + 1j)) | |
87 | '[2.0, 1.0]' | |
88 | ||
89 | ||
90 | Using simplejson.tool from the shell to validate and pretty-print:: | |
91 | ||
92 | $ echo '{"json":"obj"}' | python -m simplejson.tool | |
93 | { | |
94 | "json": "obj" | |
95 | } | |
96 | $ echo '{ 1.2:3.4}' | python -m simplejson.tool | |
97 | Expecting property name: line 1 column 2 (char 2) | |
98 | """ | |
99 | __version__ = '2.3.2' | |
100 | __all__ = [ | |
101 | 'dump', 'dumps', 'load', 'loads', | |
102 | 'JSONDecoder', 'JSONDecodeError', 'JSONEncoder', | |
103 | 'OrderedDict', | |
104 | ] | |
105 | ||
106 | __author__ = 'Bob Ippolito <bob@redivi.com>' | |
107 | ||
108 | from decimal import Decimal | |
109 | ||
110 | from .decoder import JSONDecoder, JSONDecodeError | |
111 | from .encoder import JSONEncoder | |
112 | def _import_OrderedDict(): | |
113 | import collections | |
114 | try: | |
115 | return collections.OrderedDict | |
116 | except AttributeError: | |
117 | import ordered_dict | |
118 | return ordered_dict.OrderedDict | |
119 | OrderedDict = _import_OrderedDict() | |
120 | ||
121 | def _import_c_make_encoder(): | |
122 | try: | |
123 | from ._speedups import make_encoder | |
124 | return make_encoder | |
125 | except ImportError: | |
126 | return None | |
127 | ||
128 | _default_encoder = JSONEncoder( | |
129 | skipkeys=False, | |
130 | ensure_ascii=True, | |
131 | check_circular=True, | |
132 | allow_nan=True, | |
133 | indent=None, | |
134 | separators=None, | |
135 | encoding='utf-8', | |
136 | default=None, | |
137 | use_decimal=True, | |
138 | namedtuple_as_object=True, | |
139 | tuple_as_array=True, | |
140 | ) | |
141 | ||
142 | def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, | |
143 | allow_nan=True, cls=None, indent=None, separators=None, | |
144 | encoding='utf-8', default=None, use_decimal=True, | |
145 | namedtuple_as_object=True, tuple_as_array=True, | |
146 | **kw): | |
147 | """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a | |
148 | ``.write()``-supporting file-like object). | |
149 | ||
150 | If ``skipkeys`` is true then ``dict`` keys that are not basic types | |
151 | (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) | |
152 | will be skipped instead of raising a ``TypeError``. | |
153 | ||
154 | If ``ensure_ascii`` is false, then the some chunks written to ``fp`` | |
155 | may be ``unicode`` instances, subject to normal Python ``str`` to | |
156 | ``unicode`` coercion rules. Unless ``fp.write()`` explicitly | |
157 | understands ``unicode`` (as in ``codecs.getwriter()``) this is likely | |
158 | to cause an error. | |
159 | ||
160 | If ``check_circular`` is false, then the circular reference check | |
161 | for container types will be skipped and a circular reference will | |
162 | result in an ``OverflowError`` (or worse). | |
163 | ||
164 | If ``allow_nan`` is false, then it will be a ``ValueError`` to | |
165 | serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) | |
166 | in strict compliance of the JSON specification, instead of using the | |
167 | JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). | |
168 | ||
169 | If *indent* is a string, then JSON array elements and object members | |
170 | will be pretty-printed with a newline followed by that string repeated | |
171 | for each level of nesting. ``None`` (the default) selects the most compact | |
172 | representation without any newlines. For backwards compatibility with | |
173 | versions of simplejson earlier than 2.1.0, an integer is also accepted | |
174 | and is converted to a string with that many spaces. | |
175 | ||
176 | If ``separators`` is an ``(item_separator, dict_separator)`` tuple | |
177 | then it will be used instead of the default ``(', ', ': ')`` separators. | |
178 | ``(',', ':')`` is the most compact JSON representation. | |
179 | ||
180 | ``encoding`` is the character encoding for str instances, default is UTF-8. | |
181 | ||
182 | ``default(obj)`` is a function that should return a serializable version | |
183 | of obj or raise TypeError. The default simply raises TypeError. | |
184 | ||
185 | If *use_decimal* is true (default: ``True``) then decimal.Decimal | |
186 | will be natively serialized to JSON with full precision. | |
187 | ||
188 | If *namedtuple_as_object* is true (default: ``True``), | |
189 | :class:`tuple` subclasses with ``_asdict()`` methods will be encoded | |
190 | as JSON objects. | |
191 | ||
192 | If *tuple_as_array* is true (default: ``True``), | |
193 | :class:`tuple` (and subclasses) will be encoded as JSON arrays. | |
194 | ||
195 | To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the | |
196 | ``.default()`` method to serialize additional types), specify it with | |
197 | the ``cls`` kwarg. | |
198 | ||
199 | """ | |
200 | # cached encoder | |
201 | if (not skipkeys and ensure_ascii and | |
202 | check_circular and allow_nan and | |
203 | cls is None and indent is None and separators is None and | |
204 | encoding == 'utf-8' and default is None and use_decimal | |
205 | and namedtuple_as_object and tuple_as_array and not kw): | |
206 | iterable = _default_encoder.iterencode(obj) | |
207 | else: | |
208 | if cls is None: | |
209 | cls = JSONEncoder | |
210 | iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii, | |
211 | check_circular=check_circular, allow_nan=allow_nan, indent=indent, | |
212 | separators=separators, encoding=encoding, | |
213 | default=default, use_decimal=use_decimal, | |
214 | namedtuple_as_object=namedtuple_as_object, | |
215 | tuple_as_array=tuple_as_array, | |
216 | **kw).iterencode(obj) | |
217 | # could accelerate with writelines in some versions of Python, at | |
218 | # a debuggability cost | |
219 | for chunk in iterable: | |
220 | fp.write(chunk) | |
221 | ||
222 | ||
223 | def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, | |
224 | allow_nan=True, cls=None, indent=None, separators=None, | |
225 | encoding='utf-8', default=None, use_decimal=True, | |
226 | namedtuple_as_object=True, | |
227 | tuple_as_array=True, | |
228 | **kw): | |
229 | """Serialize ``obj`` to a JSON formatted ``str``. | |
230 | ||
231 | If ``skipkeys`` is false then ``dict`` keys that are not basic types | |
232 | (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) | |
233 | will be skipped instead of raising a ``TypeError``. | |
234 | ||
235 | If ``ensure_ascii`` is false, then the return value will be a | |
236 | ``unicode`` instance subject to normal Python ``str`` to ``unicode`` | |
237 | coercion rules instead of being escaped to an ASCII ``str``. | |
238 | ||
239 | If ``check_circular`` is false, then the circular reference check | |
240 | for container types will be skipped and a circular reference will | |
241 | result in an ``OverflowError`` (or worse). | |
242 | ||
243 | If ``allow_nan`` is false, then it will be a ``ValueError`` to | |
244 | serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in | |
245 | strict compliance of the JSON specification, instead of using the | |
246 | JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). | |
247 | ||
248 | If ``indent`` is a string, then JSON array elements and object members | |
249 | will be pretty-printed with a newline followed by that string repeated | |
250 | for each level of nesting. ``None`` (the default) selects the most compact | |
251 | representation without any newlines. For backwards compatibility with | |
252 | versions of simplejson earlier than 2.1.0, an integer is also accepted | |
253 | and is converted to a string with that many spaces. | |
254 | ||
255 | If ``separators`` is an ``(item_separator, dict_separator)`` tuple | |
256 | then it will be used instead of the default ``(', ', ': ')`` separators. | |
257 | ``(',', ':')`` is the most compact JSON representation. | |
258 | ||
259 | ``encoding`` is the character encoding for str instances, default is UTF-8. | |
260 | ||
261 | ``default(obj)`` is a function that should return a serializable version | |
262 | of obj or raise TypeError. The default simply raises TypeError. | |
263 | ||
264 | If *use_decimal* is true (default: ``True``) then decimal.Decimal | |
265 | will be natively serialized to JSON with full precision. | |
266 | ||
267 | If *namedtuple_as_object* is true (default: ``True``), | |
268 | :class:`tuple` subclasses with ``_asdict()`` methods will be encoded | |
269 | as JSON objects. | |
270 | ||
271 | If *tuple_as_array* is true (default: ``True``), | |
272 | :class:`tuple` (and subclasses) will be encoded as JSON arrays. | |
273 | ||
274 | To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the | |
275 | ``.default()`` method to serialize additional types), specify it with | |
276 | the ``cls`` kwarg. | |
277 | ||
278 | """ | |
279 | # cached encoder | |
280 | if (not skipkeys and ensure_ascii and | |
281 | check_circular and allow_nan and | |
282 | cls is None and indent is None and separators is None and | |
283 | encoding == 'utf-8' and default is None and use_decimal | |
284 | and namedtuple_as_object and tuple_as_array and not kw): | |
285 | return _default_encoder.encode(obj) | |
286 | if cls is None: | |
287 | cls = JSONEncoder | |
288 | return cls( | |
289 | skipkeys=skipkeys, ensure_ascii=ensure_ascii, | |
290 | check_circular=check_circular, allow_nan=allow_nan, indent=indent, | |
291 | separators=separators, encoding=encoding, default=default, | |
292 | use_decimal=use_decimal, | |
293 | namedtuple_as_object=namedtuple_as_object, | |
294 | tuple_as_array=tuple_as_array, | |
295 | **kw).encode(obj) | |
296 | ||
297 | ||
298 | _default_decoder = JSONDecoder(encoding=None, object_hook=None, | |
299 | object_pairs_hook=None) | |
300 | ||
301 | ||
302 | def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None, | |
303 | parse_int=None, parse_constant=None, object_pairs_hook=None, | |
304 | use_decimal=False, namedtuple_as_object=True, tuple_as_array=True, | |
305 | **kw): | |
306 | """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing | |
307 | a JSON document) to a Python object. | |
308 | ||
309 | *encoding* determines the encoding used to interpret any | |
310 | :class:`str` objects decoded by this instance (``'utf-8'`` by | |
311 | default). It has no effect when decoding :class:`unicode` objects. | |
312 | ||
313 | Note that currently only encodings that are a superset of ASCII work, | |
314 | strings of other encodings should be passed in as :class:`unicode`. | |
315 | ||
316 | *object_hook*, if specified, will be called with the result of every | |
317 | JSON object decoded and its return value will be used in place of the | |
318 | given :class:`dict`. This can be used to provide custom | |
319 | deserializations (e.g. to support JSON-RPC class hinting). | |
320 | ||
321 | *object_pairs_hook* is an optional function that will be called with | |
322 | the result of any object literal decode with an ordered list of pairs. | |
323 | The return value of *object_pairs_hook* will be used instead of the | |
324 | :class:`dict`. This feature can be used to implement custom decoders | |
325 | that rely on the order that the key and value pairs are decoded (for | |
326 | example, :func:`collections.OrderedDict` will remember the order of | |
327 | insertion). If *object_hook* is also defined, the *object_pairs_hook* | |
328 | takes priority. | |
329 | ||
330 | *parse_float*, if specified, will be called with the string of every | |
331 | JSON float to be decoded. By default, this is equivalent to | |
332 | ``float(num_str)``. This can be used to use another datatype or parser | |
333 | for JSON floats (e.g. :class:`decimal.Decimal`). | |
334 | ||
335 | *parse_int*, if specified, will be called with the string of every | |
336 | JSON int to be decoded. By default, this is equivalent to | |
337 | ``int(num_str)``. This can be used to use another datatype or parser | |
338 | for JSON integers (e.g. :class:`float`). | |
339 | ||
340 | *parse_constant*, if specified, will be called with one of the | |
341 | following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This | |
342 | can be used to raise an exception if invalid JSON numbers are | |
343 | encountered. | |
344 | ||
345 | If *use_decimal* is true (default: ``False``) then it implies | |
346 | parse_float=decimal.Decimal for parity with ``dump``. | |
347 | ||
348 | To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` | |
349 | kwarg. | |
350 | ||
351 | """ | |
352 | return loads(fp.read(), | |
353 | encoding=encoding, cls=cls, object_hook=object_hook, | |
354 | parse_float=parse_float, parse_int=parse_int, | |
355 | parse_constant=parse_constant, object_pairs_hook=object_pairs_hook, | |
356 | use_decimal=use_decimal, **kw) | |
357 | ||
358 | ||
359 | def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, | |
360 | parse_int=None, parse_constant=None, object_pairs_hook=None, | |
361 | use_decimal=False, **kw): | |
362 | """Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON | |
363 | document) to a Python object. | |
364 | ||
365 | *encoding* determines the encoding used to interpret any | |
366 | :class:`str` objects decoded by this instance (``'utf-8'`` by | |
367 | default). It has no effect when decoding :class:`unicode` objects. | |
368 | ||
369 | Note that currently only encodings that are a superset of ASCII work, | |
370 | strings of other encodings should be passed in as :class:`unicode`. | |
371 | ||
372 | *object_hook*, if specified, will be called with the result of every | |
373 | JSON object decoded and its return value will be used in place of the | |
374 | given :class:`dict`. This can be used to provide custom | |
375 | deserializations (e.g. to support JSON-RPC class hinting). | |
376 | ||
377 | *object_pairs_hook* is an optional function that will be called with | |
378 | the result of any object literal decode with an ordered list of pairs. | |
379 | The return value of *object_pairs_hook* will be used instead of the | |
380 | :class:`dict`. This feature can be used to implement custom decoders | |
381 | that rely on the order that the key and value pairs are decoded (for | |
382 | example, :func:`collections.OrderedDict` will remember the order of | |
383 | insertion). If *object_hook* is also defined, the *object_pairs_hook* | |
384 | takes priority. | |
385 | ||
386 | *parse_float*, if specified, will be called with the string of every | |
387 | JSON float to be decoded. By default, this is equivalent to | |
388 | ``float(num_str)``. This can be used to use another datatype or parser | |
389 | for JSON floats (e.g. :class:`decimal.Decimal`). | |
390 | ||
391 | *parse_int*, if specified, will be called with the string of every | |
392 | JSON int to be decoded. By default, this is equivalent to | |
393 | ``int(num_str)``. This can be used to use another datatype or parser | |
394 | for JSON integers (e.g. :class:`float`). | |
395 | ||
396 | *parse_constant*, if specified, will be called with one of the | |
397 | following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This | |
398 | can be used to raise an exception if invalid JSON numbers are | |
399 | encountered. | |
400 | ||
401 | If *use_decimal* is true (default: ``False``) then it implies | |
402 | parse_float=decimal.Decimal for parity with ``dump``. | |
403 | ||
404 | To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` | |
405 | kwarg. | |
406 | ||
407 | """ | |
408 | if (cls is None and encoding is None and object_hook is None and | |
409 | parse_int is None and parse_float is None and | |
410 | parse_constant is None and object_pairs_hook is None | |
411 | and not use_decimal and not kw): | |
412 | return _default_decoder.decode(s) | |
413 | if cls is None: | |
414 | cls = JSONDecoder | |
415 | if object_hook is not None: | |
416 | kw['object_hook'] = object_hook | |
417 | if object_pairs_hook is not None: | |
418 | kw['object_pairs_hook'] = object_pairs_hook | |
419 | if parse_float is not None: | |
420 | kw['parse_float'] = parse_float | |
421 | if parse_int is not None: | |
422 | kw['parse_int'] = parse_int | |
423 | if parse_constant is not None: | |
424 | kw['parse_constant'] = parse_constant | |
425 | if use_decimal: | |
426 | if parse_float is not None: | |
427 | raise TypeError("use_decimal=True implies parse_float=Decimal") | |
428 | kw['parse_float'] = Decimal | |
429 | return cls(encoding=encoding, **kw).decode(s) | |
430 | ||
431 | ||
432 | def _toggle_speedups(enabled): | |
433 | import simplejson.decoder as dec | |
434 | import simplejson.encoder as enc | |
435 | import simplejson.scanner as scan | |
436 | c_make_encoder = _import_c_make_encoder() | |
437 | if enabled: | |
438 | dec.scanstring = dec.c_scanstring or dec.py_scanstring | |
439 | enc.c_make_encoder = c_make_encoder | |
440 | enc.encode_basestring_ascii = (enc.c_encode_basestring_ascii or | |
441 | enc.py_encode_basestring_ascii) | |
442 | scan.make_scanner = scan.c_make_scanner or scan.py_make_scanner | |
443 | else: | |
444 | dec.scanstring = dec.py_scanstring | |
445 | enc.c_make_encoder = None | |
446 | enc.encode_basestring_ascii = enc.py_encode_basestring_ascii | |
447 | scan.make_scanner = scan.py_make_scanner | |
448 | dec.make_scanner = scan.make_scanner | |
449 | global _default_decoder | |
450 | _default_decoder = JSONDecoder( | |
451 | encoding=None, | |
452 | object_hook=None, | |
453 | object_pairs_hook=None, | |
454 | ) | |
455 | global _default_encoder | |
456 | _default_encoder = JSONEncoder( | |
457 | skipkeys=False, | |
458 | ensure_ascii=True, | |
459 | check_circular=True, | |
460 | allow_nan=True, | |
461 | indent=None, | |
462 | separators=None, | |
463 | encoding='utf-8', | |
464 | default=None, | |
465 | ) |
0 | """Implementation of JSONDecoder | |
1 | """ | |
2 | import re | |
3 | import sys | |
4 | import struct | |
5 | ||
6 | from .scanner import make_scanner | |
7 | def _import_c_scanstring(): | |
8 | try: | |
9 | from ._speedups import scanstring | |
10 | return scanstring | |
11 | except ImportError: | |
12 | return None | |
13 | c_scanstring = _import_c_scanstring() | |
14 | ||
15 | __all__ = ['JSONDecoder'] | |
16 | ||
17 | FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL | |
18 | ||
19 | def _floatconstants(): | |
20 | _BYTES = '7FF80000000000007FF0000000000000'.decode('hex') | |
21 | # The struct module in Python 2.4 would get frexp() out of range here | |
22 | # when an endian is specified in the format string. Fixed in Python 2.5+ | |
23 | if sys.byteorder != 'big': | |
24 | _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1] | |
25 | nan, inf = struct.unpack('dd', _BYTES) | |
26 | return nan, inf, -inf | |
27 | ||
28 | NaN, PosInf, NegInf = _floatconstants() | |
29 | ||
30 | ||
31 | class JSONDecodeError(ValueError): | |
32 | """Subclass of ValueError with the following additional properties: | |
33 | ||
34 | msg: The unformatted error message | |
35 | doc: The JSON document being parsed | |
36 | pos: The start index of doc where parsing failed | |
37 | end: The end index of doc where parsing failed (may be None) | |
38 | lineno: The line corresponding to pos | |
39 | colno: The column corresponding to pos | |
40 | endlineno: The line corresponding to end (may be None) | |
41 | endcolno: The column corresponding to end (may be None) | |
42 | ||
43 | """ | |
44 | def __init__(self, msg, doc, pos, end=None): | |
45 | ValueError.__init__(self, errmsg(msg, doc, pos, end=end)) | |
46 | self.msg = msg | |
47 | self.doc = doc | |
48 | self.pos = pos | |
49 | self.end = end | |
50 | self.lineno, self.colno = linecol(doc, pos) | |
51 | if end is not None: | |
52 | self.endlineno, self.endcolno = linecol(doc, end) | |
53 | else: | |
54 | self.endlineno, self.endcolno = None, None | |
55 | ||
56 | ||
57 | def linecol(doc, pos): | |
58 | lineno = doc.count('\n', 0, pos) + 1 | |
59 | if lineno == 1: | |
60 | colno = pos | |
61 | else: | |
62 | colno = pos - doc.rindex('\n', 0, pos) | |
63 | return lineno, colno | |
64 | ||
65 | ||
66 | def errmsg(msg, doc, pos, end=None): | |
67 | # Note that this function is called from _speedups | |
68 | lineno, colno = linecol(doc, pos) | |
69 | if end is None: | |
70 | #fmt = '{0}: line {1} column {2} (char {3})' | |
71 | #return fmt.format(msg, lineno, colno, pos) | |
72 | fmt = '%s: line %d column %d (char %d)' | |
73 | return fmt % (msg, lineno, colno, pos) | |
74 | endlineno, endcolno = linecol(doc, end) | |
75 | #fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})' | |
76 | #return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end) | |
77 | fmt = '%s: line %d column %d - line %d column %d (char %d - %d)' | |
78 | return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end) | |
79 | ||
80 | ||
81 | _CONSTANTS = { | |
82 | '-Infinity': NegInf, | |
83 | 'Infinity': PosInf, | |
84 | 'NaN': NaN, | |
85 | } | |
86 | ||
87 | STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) | |
88 | BACKSLASH = { | |
89 | '"': u'"', '\\': u'\\', '/': u'/', | |
90 | 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t', | |
91 | } | |
92 | ||
93 | DEFAULT_ENCODING = "utf-8" | |
94 | ||
95 | def py_scanstring(s, end, encoding=None, strict=True, | |
96 | _b=BACKSLASH, _m=STRINGCHUNK.match): | |
97 | """Scan the string s for a JSON string. End is the index of the | |
98 | character in s after the quote that started the JSON string. | |
99 | Unescapes all valid JSON string escape sequences and raises ValueError | |
100 | on attempt to decode an invalid string. If strict is False then literal | |
101 | control characters are allowed in the string. | |
102 | ||
103 | Returns a tuple of the decoded string and the index of the character in s | |
104 | after the end quote.""" | |
105 | if encoding is None: | |
106 | encoding = DEFAULT_ENCODING | |
107 | chunks = [] | |
108 | _append = chunks.append | |
109 | begin = end - 1 | |
110 | while 1: | |
111 | chunk = _m(s, end) | |
112 | if chunk is None: | |
113 | raise JSONDecodeError( | |
114 | "Unterminated string starting at", s, begin) | |
115 | end = chunk.end() | |
116 | content, terminator = chunk.groups() | |
117 | # Content is contains zero or more unescaped string characters | |
118 | if content: | |
119 | if not isinstance(content, unicode): | |
120 | content = unicode(content, encoding) | |
121 | _append(content) | |
122 | # Terminator is the end of string, a literal control character, | |
123 | # or a backslash denoting that an escape sequence follows | |
124 | if terminator == '"': | |
125 | break | |
126 | elif terminator != '\\': | |
127 | if strict: | |
128 | msg = "Invalid control character %r at" % (terminator,) | |
129 | #msg = "Invalid control character {0!r} at".format(terminator) | |
130 | raise JSONDecodeError(msg, s, end) | |
131 | else: | |
132 | _append(terminator) | |
133 | continue | |
134 | try: | |
135 | esc = s[end] | |
136 | except IndexError: | |
137 | raise JSONDecodeError( | |
138 | "Unterminated string starting at", s, begin) | |
139 | # If not a unicode escape sequence, must be in the lookup table | |
140 | if esc != 'u': | |
141 | try: | |
142 | char = _b[esc] | |
143 | except KeyError: | |
144 | msg = "Invalid \\escape: " + repr(esc) | |
145 | raise JSONDecodeError(msg, s, end) | |
146 | end += 1 | |
147 | else: | |
148 | # Unicode escape sequence | |
149 | esc = s[end + 1:end + 5] | |
150 | next_end = end + 5 | |
151 | if len(esc) != 4: | |
152 | msg = "Invalid \\uXXXX escape" | |
153 | raise JSONDecodeError(msg, s, end) | |
154 | uni = int(esc, 16) | |
155 | # Check for surrogate pair on UCS-4 systems | |
156 | if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535: | |
157 | msg = "Invalid \\uXXXX\\uXXXX surrogate pair" | |
158 | if not s[end + 5:end + 7] == '\\u': | |
159 | raise JSONDecodeError(msg, s, end) | |
160 | esc2 = s[end + 7:end + 11] | |
161 | if len(esc2) != 4: | |
162 | raise JSONDecodeError(msg, s, end) | |
163 | uni2 = int(esc2, 16) | |
164 | uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) | |
165 | next_end += 6 | |
166 | char = unichr(uni) | |
167 | end = next_end | |
168 | # Append the unescaped character | |
169 | _append(char) | |
170 | return u''.join(chunks), end | |
171 | ||
172 | ||
173 | # Use speedup if available | |
174 | scanstring = c_scanstring or py_scanstring | |
175 | ||
176 | WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) | |
177 | WHITESPACE_STR = ' \t\n\r' | |
178 | ||
179 | def JSONObject((s, end), encoding, strict, scan_once, object_hook, | |
180 | object_pairs_hook, memo=None, | |
181 | _w=WHITESPACE.match, _ws=WHITESPACE_STR): | |
182 | # Backwards compatibility | |
183 | if memo is None: | |
184 | memo = {} | |
185 | memo_get = memo.setdefault | |
186 | pairs = [] | |
187 | # Use a slice to prevent IndexError from being raised, the following | |
188 | # check will raise a more specific ValueError if the string is empty | |
189 | nextchar = s[end:end + 1] | |
190 | # Normally we expect nextchar == '"' | |
191 | if nextchar != '"': | |
192 | if nextchar in _ws: | |
193 | end = _w(s, end).end() | |
194 | nextchar = s[end:end + 1] | |
195 | # Trivial empty object | |
196 | if nextchar == '}': | |
197 | if object_pairs_hook is not None: | |
198 | result = object_pairs_hook(pairs) | |
199 | return result, end + 1 | |
200 | pairs = {} | |
201 | if object_hook is not None: | |
202 | pairs = object_hook(pairs) | |
203 | return pairs, end + 1 | |
204 | elif nextchar != '"': | |
205 | raise JSONDecodeError("Expecting property name", s, end) | |
206 | end += 1 | |
207 | while True: | |
208 | key, end = scanstring(s, end, encoding, strict) | |
209 | key = memo_get(key, key) | |
210 | ||
211 | # To skip some function call overhead we optimize the fast paths where | |
212 | # the JSON key separator is ": " or just ":". | |
213 | if s[end:end + 1] != ':': | |
214 | end = _w(s, end).end() | |
215 | if s[end:end + 1] != ':': | |
216 | raise JSONDecodeError("Expecting : delimiter", s, end) | |
217 | ||
218 | end += 1 | |
219 | ||
220 | try: | |
221 | if s[end] in _ws: | |
222 | end += 1 | |
223 | if s[end] in _ws: | |
224 | end = _w(s, end + 1).end() | |
225 | except IndexError: | |
226 | pass | |
227 | ||
228 | try: | |
229 | value, end = scan_once(s, end) | |
230 | except StopIteration: | |
231 | raise JSONDecodeError("Expecting object", s, end) | |
232 | pairs.append((key, value)) | |
233 | ||
234 | try: | |
235 | nextchar = s[end] | |
236 | if nextchar in _ws: | |
237 | end = _w(s, end + 1).end() | |
238 | nextchar = s[end] | |
239 | except IndexError: | |
240 | nextchar = '' | |
241 | end += 1 | |
242 | ||
243 | if nextchar == '}': | |
244 | break | |
245 | elif nextchar != ',': | |
246 | raise JSONDecodeError("Expecting , delimiter", s, end - 1) | |
247 | ||
248 | try: | |
249 | nextchar = s[end] | |
250 | if nextchar in _ws: | |
251 | end += 1 | |
252 | nextchar = s[end] | |
253 | if nextchar in _ws: | |
254 | end = _w(s, end + 1).end() | |
255 | nextchar = s[end] | |
256 | except IndexError: | |
257 | nextchar = '' | |
258 | ||
259 | end += 1 | |
260 | if nextchar != '"': | |
261 | raise JSONDecodeError("Expecting property name", s, end - 1) | |
262 | ||
263 | if object_pairs_hook is not None: | |
264 | result = object_pairs_hook(pairs) | |
265 | return result, end | |
266 | pairs = dict(pairs) | |
267 | if object_hook is not None: | |
268 | pairs = object_hook(pairs) | |
269 | return pairs, end | |
270 | ||
271 | def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): | |
272 | values = [] | |
273 | nextchar = s[end:end + 1] | |
274 | if nextchar in _ws: | |
275 | end = _w(s, end + 1).end() | |
276 | nextchar = s[end:end + 1] | |
277 | # Look-ahead for trivial empty array | |
278 | if nextchar == ']': | |
279 | return values, end + 1 | |
280 | _append = values.append | |
281 | while True: | |
282 | try: | |
283 | value, end = scan_once(s, end) | |
284 | except StopIteration: | |
285 | raise JSONDecodeError("Expecting object", s, end) | |
286 | _append(value) | |
287 | nextchar = s[end:end + 1] | |
288 | if nextchar in _ws: | |
289 | end = _w(s, end + 1).end() | |
290 | nextchar = s[end:end + 1] | |
291 | end += 1 | |
292 | if nextchar == ']': | |
293 | break | |
294 | elif nextchar != ',': | |
295 | raise JSONDecodeError("Expecting , delimiter", s, end) | |
296 | ||
297 | try: | |
298 | if s[end] in _ws: | |
299 | end += 1 | |
300 | if s[end] in _ws: | |
301 | end = _w(s, end + 1).end() | |
302 | except IndexError: | |
303 | pass | |
304 | ||
305 | return values, end | |
306 | ||
307 | class JSONDecoder(object): | |
308 | """Simple JSON <http://json.org> decoder | |
309 | ||
310 | Performs the following translations in decoding by default: | |
311 | ||
312 | +---------------+-------------------+ | |
313 | | JSON | Python | | |
314 | +===============+===================+ | |
315 | | object | dict | | |
316 | +---------------+-------------------+ | |
317 | | array | list | | |
318 | +---------------+-------------------+ | |
319 | | string | unicode | | |
320 | +---------------+-------------------+ | |
321 | | number (int) | int, long | | |
322 | +---------------+-------------------+ | |
323 | | number (real) | float | | |
324 | +---------------+-------------------+ | |
325 | | true | True | | |
326 | +---------------+-------------------+ | |
327 | | false | False | | |
328 | +---------------+-------------------+ | |
329 | | null | None | | |
330 | +---------------+-------------------+ | |
331 | ||
332 | It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as | |
333 | their corresponding ``float`` values, which is outside the JSON spec. | |
334 | ||
335 | """ | |
336 | ||
337 | def __init__(self, encoding=None, object_hook=None, parse_float=None, | |
338 | parse_int=None, parse_constant=None, strict=True, | |
339 | object_pairs_hook=None): | |
340 | """ | |
341 | *encoding* determines the encoding used to interpret any | |
342 | :class:`str` objects decoded by this instance (``'utf-8'`` by | |
343 | default). It has no effect when decoding :class:`unicode` objects. | |
344 | ||
345 | Note that currently only encodings that are a superset of ASCII work, | |
346 | strings of other encodings should be passed in as :class:`unicode`. | |
347 | ||
348 | *object_hook*, if specified, will be called with the result of every | |
349 | JSON object decoded and its return value will be used in place of the | |
350 | given :class:`dict`. This can be used to provide custom | |
351 | deserializations (e.g. to support JSON-RPC class hinting). | |
352 | ||
353 | *object_pairs_hook* is an optional function that will be called with | |
354 | the result of any object literal decode with an ordered list of pairs. | |
355 | The return value of *object_pairs_hook* will be used instead of the | |
356 | :class:`dict`. This feature can be used to implement custom decoders | |
357 | that rely on the order that the key and value pairs are decoded (for | |
358 | example, :func:`collections.OrderedDict` will remember the order of | |
359 | insertion). If *object_hook* is also defined, the *object_pairs_hook* | |
360 | takes priority. | |
361 | ||
362 | *parse_float*, if specified, will be called with the string of every | |
363 | JSON float to be decoded. By default, this is equivalent to | |
364 | ``float(num_str)``. This can be used to use another datatype or parser | |
365 | for JSON floats (e.g. :class:`decimal.Decimal`). | |
366 | ||
367 | *parse_int*, if specified, will be called with the string of every | |
368 | JSON int to be decoded. By default, this is equivalent to | |
369 | ``int(num_str)``. This can be used to use another datatype or parser | |
370 | for JSON integers (e.g. :class:`float`). | |
371 | ||
372 | *parse_constant*, if specified, will be called with one of the | |
373 | following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This | |
374 | can be used to raise an exception if invalid JSON numbers are | |
375 | encountered. | |
376 | ||
377 | *strict* controls the parser's behavior when it encounters an | |
378 | invalid control character in a string. The default setting of | |
379 | ``True`` means that unescaped control characters are parse errors, if | |
380 | ``False`` then control characters will be allowed in strings. | |
381 | ||
382 | """ | |
383 | self.encoding = encoding | |
384 | self.object_hook = object_hook | |
385 | self.object_pairs_hook = object_pairs_hook | |
386 | self.parse_float = parse_float or float | |
387 | self.parse_int = parse_int or int | |
388 | self.parse_constant = parse_constant or _CONSTANTS.__getitem__ | |
389 | self.strict = strict | |
390 | self.parse_object = JSONObject | |
391 | self.parse_array = JSONArray | |
392 | self.parse_string = scanstring | |
393 | self.memo = {} | |
394 | self.scan_once = make_scanner(self) | |
395 | ||
396 | def decode(self, s, _w=WHITESPACE.match): | |
397 | """Return the Python representation of ``s`` (a ``str`` or ``unicode`` | |
398 | instance containing a JSON document) | |
399 | ||
400 | """ | |
401 | obj, end = self.raw_decode(s, idx=_w(s, 0).end()) | |
402 | end = _w(s, end).end() | |
403 | if end != len(s): | |
404 | raise JSONDecodeError("Extra data", s, end, len(s)) | |
405 | return obj | |
406 | ||
407 | def raw_decode(self, s, idx=0): | |
408 | """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` | |
409 | beginning with a JSON document) and return a 2-tuple of the Python | |
410 | representation and the index in ``s`` where the document ended. | |
411 | ||
412 | This can be used to decode a JSON document from a string that may | |
413 | have extraneous data at the end. | |
414 | ||
415 | """ | |
416 | try: | |
417 | obj, end = self.scan_once(s, idx) | |
418 | except StopIteration: | |
419 | raise JSONDecodeError("No JSON object could be decoded", s, idx) | |
420 | return obj, end |
0 | """Implementation of JSONEncoder | |
1 | """ | |
2 | import re | |
3 | from decimal import Decimal | |
4 | ||
5 | def _import_speedups(): | |
6 | try: | |
7 | from . import _speedups | |
8 | return _speedups.encode_basestring_ascii, _speedups.make_encoder | |
9 | except ImportError: | |
10 | return None, None | |
11 | c_encode_basestring_ascii, c_make_encoder = _import_speedups() | |
12 | ||
13 | from .decoder import PosInf | |
14 | ||
15 | ESCAPE = re.compile(ur'[\x00-\x1f\\"\b\f\n\r\t\u2028\u2029]') | |
16 | ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') | |
17 | HAS_UTF8 = re.compile(r'[\x80-\xff]') | |
18 | ESCAPE_DCT = { | |
19 | '\\': '\\\\', | |
20 | '"': '\\"', | |
21 | '\b': '\\b', | |
22 | '\f': '\\f', | |
23 | '\n': '\\n', | |
24 | '\r': '\\r', | |
25 | '\t': '\\t', | |
26 | u'\u2028': '\\u2028', | |
27 | u'\u2029': '\\u2029', | |
28 | } | |
29 | for i in range(0x20): | |
30 | #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i)) | |
31 | ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,)) | |
32 | ||
33 | FLOAT_REPR = repr | |
34 | ||
35 | def encode_basestring(s): | |
36 | """Return a JSON representation of a Python string | |
37 | ||
38 | """ | |
39 | if isinstance(s, str) and HAS_UTF8.search(s) is not None: | |
40 | s = s.decode('utf-8') | |
41 | def replace(match): | |
42 | return ESCAPE_DCT[match.group(0)] | |
43 | return u'"' + ESCAPE.sub(replace, s) + u'"' | |
44 | ||
45 | ||
46 | def py_encode_basestring_ascii(s): | |
47 | """Return an ASCII-only JSON representation of a Python string | |
48 | ||
49 | """ | |
50 | if isinstance(s, str) and HAS_UTF8.search(s) is not None: | |
51 | s = s.decode('utf-8') | |
52 | def replace(match): | |
53 | s = match.group(0) | |
54 | try: | |
55 | return ESCAPE_DCT[s] | |
56 | except KeyError: | |
57 | n = ord(s) | |
58 | if n < 0x10000: | |
59 | #return '\\u{0:04x}'.format(n) | |
60 | return '\\u%04x' % (n,) | |
61 | else: | |
62 | # surrogate pair | |
63 | n -= 0x10000 | |
64 | s1 = 0xd800 | ((n >> 10) & 0x3ff) | |
65 | s2 = 0xdc00 | (n & 0x3ff) | |
66 | #return '\\u{0:04x}\\u{1:04x}'.format(s1, s2) | |
67 | return '\\u%04x\\u%04x' % (s1, s2) | |
68 | return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"' | |
69 | ||
70 | ||
71 | encode_basestring_ascii = ( | |
72 | c_encode_basestring_ascii or py_encode_basestring_ascii) | |
73 | ||
74 | class JSONEncoder(object): | |
75 | """Extensible JSON <http://json.org> encoder for Python data structures. | |
76 | ||
77 | Supports the following objects and types by default: | |
78 | ||
79 | +-------------------+---------------+ | |
80 | | Python | JSON | | |
81 | +===================+===============+ | |
82 | | dict, namedtuple | object | | |
83 | +-------------------+---------------+ | |
84 | | list, tuple | array | | |
85 | +-------------------+---------------+ | |
86 | | str, unicode | string | | |
87 | +-------------------+---------------+ | |
88 | | int, long, float | number | | |
89 | +-------------------+---------------+ | |
90 | | True | true | | |
91 | +-------------------+---------------+ | |
92 | | False | false | | |
93 | +-------------------+---------------+ | |
94 | | None | null | | |
95 | +-------------------+---------------+ | |
96 | ||
97 | To extend this to recognize other objects, subclass and implement a | |
98 | ``.default()`` method with another method that returns a serializable | |
99 | object for ``o`` if possible, otherwise it should call the superclass | |
100 | implementation (to raise ``TypeError``). | |
101 | ||
102 | """ | |
103 | item_separator = ', ' | |
104 | key_separator = ': ' | |
105 | def __init__(self, skipkeys=False, ensure_ascii=True, | |
106 | check_circular=True, allow_nan=True, sort_keys=False, | |
107 | indent=None, separators=None, encoding='utf-8', default=None, | |
108 | use_decimal=True, namedtuple_as_object=True, | |
109 | tuple_as_array=True): | |
110 | """Constructor for JSONEncoder, with sensible defaults. | |
111 | ||
112 | If skipkeys is false, then it is a TypeError to attempt | |
113 | encoding of keys that are not str, int, long, float or None. If | |
114 | skipkeys is True, such items are simply skipped. | |
115 | ||
116 | If ensure_ascii is true, the output is guaranteed to be str | |
117 | objects with all incoming unicode characters escaped. If | |
118 | ensure_ascii is false, the output will be unicode object. | |
119 | ||
120 | If check_circular is true, then lists, dicts, and custom encoded | |
121 | objects will be checked for circular references during encoding to | |
122 | prevent an infinite recursion (which would cause an OverflowError). | |
123 | Otherwise, no such check takes place. | |
124 | ||
125 | If allow_nan is true, then NaN, Infinity, and -Infinity will be | |
126 | encoded as such. This behavior is not JSON specification compliant, | |
127 | but is consistent with most JavaScript based encoders and decoders. | |
128 | Otherwise, it will be a ValueError to encode such floats. | |
129 | ||
130 | If sort_keys is true, then the output of dictionaries will be | |
131 | sorted by key; this is useful for regression tests to ensure | |
132 | that JSON serializations can be compared on a day-to-day basis. | |
133 | ||
134 | If indent is a string, then JSON array elements and object members | |
135 | will be pretty-printed with a newline followed by that string repeated | |
136 | for each level of nesting. ``None`` (the default) selects the most compact | |
137 | representation without any newlines. For backwards compatibility with | |
138 | versions of simplejson earlier than 2.1.0, an integer is also accepted | |
139 | and is converted to a string with that many spaces. | |
140 | ||
141 | If specified, separators should be a (item_separator, key_separator) | |
142 | tuple. The default is (', ', ': '). To get the most compact JSON | |
143 | representation you should specify (',', ':') to eliminate whitespace. | |
144 | ||
145 | If specified, default is a function that gets called for objects | |
146 | that can't otherwise be serialized. It should return a JSON encodable | |
147 | version of the object or raise a ``TypeError``. | |
148 | ||
149 | If encoding is not None, then all input strings will be | |
150 | transformed into unicode using that encoding prior to JSON-encoding. | |
151 | The default is UTF-8. | |
152 | ||
153 | If use_decimal is true (not the default), ``decimal.Decimal`` will | |
154 | be supported directly by the encoder. For the inverse, decode JSON | |
155 | with ``parse_float=decimal.Decimal``. | |
156 | ||
157 | If namedtuple_as_object is true (the default), objects with | |
158 | ``_asdict()`` methods will be encoded as JSON objects. | |
159 | ||
160 | If tuple_as_array is true (the default), tuple (and subclasses) will | |
161 | be encoded as JSON arrays. | |
162 | """ | |
163 | ||
164 | self.skipkeys = skipkeys | |
165 | self.ensure_ascii = ensure_ascii | |
166 | self.check_circular = check_circular | |
167 | self.allow_nan = allow_nan | |
168 | self.sort_keys = sort_keys | |
169 | self.use_decimal = use_decimal | |
170 | self.namedtuple_as_object = namedtuple_as_object | |
171 | self.tuple_as_array = tuple_as_array | |
172 | if isinstance(indent, (int, long)): | |
173 | indent = ' ' * indent | |
174 | self.indent = indent | |
175 | if separators is not None: | |
176 | self.item_separator, self.key_separator = separators | |
177 | elif indent is not None: | |
178 | self.item_separator = ',' | |
179 | if default is not None: | |
180 | self.default = default | |
181 | self.encoding = encoding | |
182 | ||
183 | def default(self, o): | |
184 | """Implement this method in a subclass such that it returns | |
185 | a serializable object for ``o``, or calls the base implementation | |
186 | (to raise a ``TypeError``). | |
187 | ||
188 | For example, to support arbitrary iterators, you could | |
189 | implement default like this:: | |
190 | ||
191 | def default(self, o): | |
192 | try: | |
193 | iterable = iter(o) | |
194 | except TypeError: | |
195 | pass | |
196 | else: | |
197 | return list(iterable) | |
198 | return JSONEncoder.default(self, o) | |
199 | ||
200 | """ | |
201 | raise TypeError(repr(o) + " is not JSON serializable") | |
202 | ||
203 | def encode(self, o): | |
204 | """Return a JSON string representation of a Python data structure. | |
205 | ||
206 | >>> from simplejson import JSONEncoder | |
207 | >>> JSONEncoder().encode({"foo": ["bar", "baz"]}) | |
208 | '{"foo": ["bar", "baz"]}' | |
209 | ||
210 | """ | |
211 | # This is for extremely simple cases and benchmarks. | |
212 | if isinstance(o, basestring): | |
213 | if isinstance(o, str): | |
214 | _encoding = self.encoding | |
215 | if (_encoding is not None | |
216 | and not (_encoding == 'utf-8')): | |
217 | o = o.decode(_encoding) | |
218 | if self.ensure_ascii: | |
219 | return encode_basestring_ascii(o) | |
220 | else: | |
221 | return encode_basestring(o) | |
222 | # This doesn't pass the iterator directly to ''.join() because the | |
223 | # exceptions aren't as detailed. The list call should be roughly | |
224 | # equivalent to the PySequence_Fast that ''.join() would do. | |
225 | chunks = self.iterencode(o, _one_shot=True) | |
226 | if not isinstance(chunks, (list, tuple)): | |
227 | chunks = list(chunks) | |
228 | if self.ensure_ascii: | |
229 | return ''.join(chunks) | |
230 | else: | |
231 | return u''.join(chunks) | |
232 | ||
233 | def iterencode(self, o, _one_shot=False): | |
234 | """Encode the given object and yield each string | |
235 | representation as available. | |
236 | ||
237 | For example:: | |
238 | ||
239 | for chunk in JSONEncoder().iterencode(bigobject): | |
240 | mysocket.write(chunk) | |
241 | ||
242 | """ | |
243 | if self.check_circular: | |
244 | markers = {} | |
245 | else: | |
246 | markers = None | |
247 | if self.ensure_ascii: | |
248 | _encoder = encode_basestring_ascii | |
249 | else: | |
250 | _encoder = encode_basestring | |
251 | if self.encoding != 'utf-8': | |
252 | def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding): | |
253 | if isinstance(o, str): | |
254 | o = o.decode(_encoding) | |
255 | return _orig_encoder(o) | |
256 | ||
257 | def floatstr(o, allow_nan=self.allow_nan, | |
258 | _repr=FLOAT_REPR, _inf=PosInf, _neginf=-PosInf): | |
259 | # Check for specials. Note that this type of test is processor | |
260 | # and/or platform-specific, so do tests which don't depend on | |
261 | # the internals. | |
262 | ||
263 | if o != o: | |
264 | text = 'NaN' | |
265 | elif o == _inf: | |
266 | text = 'Infinity' | |
267 | elif o == _neginf: | |
268 | text = '-Infinity' | |
269 | else: | |
270 | return _repr(o) | |
271 | ||
272 | if not allow_nan: | |
273 | raise ValueError( | |
274 | "Out of range float values are not JSON compliant: " + | |
275 | repr(o)) | |
276 | ||
277 | return text | |
278 | ||
279 | ||
280 | key_memo = {} | |
281 | if (_one_shot and c_make_encoder is not None | |
282 | and self.indent is None): | |
283 | _iterencode = c_make_encoder( | |
284 | markers, self.default, _encoder, self.indent, | |
285 | self.key_separator, self.item_separator, self.sort_keys, | |
286 | self.skipkeys, self.allow_nan, key_memo, self.use_decimal, | |
287 | self.namedtuple_as_object, self.tuple_as_array) | |
288 | else: | |
289 | _iterencode = _make_iterencode( | |
290 | markers, self.default, _encoder, self.indent, floatstr, | |
291 | self.key_separator, self.item_separator, self.sort_keys, | |
292 | self.skipkeys, _one_shot, self.use_decimal, | |
293 | self.namedtuple_as_object, self.tuple_as_array) | |
294 | try: | |
295 | return _iterencode(o, 0) | |
296 | finally: | |
297 | key_memo.clear() | |
298 | ||
299 | ||
300 | class JSONEncoderForHTML(JSONEncoder): | |
301 | """An encoder that produces JSON safe to embed in HTML. | |
302 | ||
303 | To embed JSON content in, say, a script tag on a web page, the | |
304 | characters &, < and > should be escaped. They cannot be escaped | |
305 | with the usual entities (e.g. &) because they are not expanded | |
306 | within <script> tags. | |
307 | """ | |
308 | ||
309 | def encode(self, o): | |
310 | # Override JSONEncoder.encode because it has hacks for | |
311 | # performance that make things more complicated. | |
312 | chunks = self.iterencode(o, True) | |
313 | if self.ensure_ascii: | |
314 | return ''.join(chunks) | |
315 | else: | |
316 | return u''.join(chunks) | |
317 | ||
318 | def iterencode(self, o, _one_shot=False): | |
319 | chunks = super(JSONEncoderForHTML, self).iterencode(o, _one_shot) | |
320 | for chunk in chunks: | |
321 | chunk = chunk.replace('&', '\\u0026') | |
322 | chunk = chunk.replace('<', '\\u003c') | |
323 | chunk = chunk.replace('>', '\\u003e') | |
324 | yield chunk | |
325 | ||
326 | ||
327 | def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, | |
328 | _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, | |
329 | _use_decimal, _namedtuple_as_object, _tuple_as_array, | |
330 | ## HACK: hand-optimized bytecode; turn globals into locals | |
331 | False=False, | |
332 | True=True, | |
333 | ValueError=ValueError, | |
334 | basestring=basestring, | |
335 | Decimal=Decimal, | |
336 | dict=dict, | |
337 | float=float, | |
338 | id=id, | |
339 | int=int, | |
340 | isinstance=isinstance, | |
341 | list=list, | |
342 | long=long, | |
343 | str=str, | |
344 | tuple=tuple, | |
345 | ): | |
346 | ||
347 | def _iterencode_list(lst, _current_indent_level): | |
348 | if not lst: | |
349 | yield '[]' | |
350 | return | |
351 | if markers is not None: | |
352 | markerid = id(lst) | |
353 | if markerid in markers: | |
354 | raise ValueError("Circular reference detected") | |
355 | markers[markerid] = lst | |
356 | buf = '[' | |
357 | if _indent is not None: | |
358 | _current_indent_level += 1 | |
359 | newline_indent = '\n' + (_indent * _current_indent_level) | |
360 | separator = _item_separator + newline_indent | |
361 | buf += newline_indent | |
362 | else: | |
363 | newline_indent = None | |
364 | separator = _item_separator | |
365 | first = True | |
366 | for value in lst: | |
367 | if first: | |
368 | first = False | |
369 | else: | |
370 | buf = separator | |
371 | if isinstance(value, basestring): | |
372 | yield buf + _encoder(value) | |
373 | elif value is None: | |
374 | yield buf + 'null' | |
375 | elif value is True: | |
376 | yield buf + 'true' | |
377 | elif value is False: | |
378 | yield buf + 'false' | |
379 | elif isinstance(value, (int, long)): | |
380 | yield buf + str(value) | |
381 | elif isinstance(value, float): | |
382 | yield buf + _floatstr(value) | |
383 | elif _use_decimal and isinstance(value, Decimal): | |
384 | yield buf + str(value) | |
385 | else: | |
386 | yield buf | |
387 | if isinstance(value, list): | |
388 | chunks = _iterencode_list(value, _current_indent_level) | |
389 | else: | |
390 | _asdict = _namedtuple_as_object and getattr(value, '_asdict', None) | |
391 | if _asdict and callable(_asdict): | |
392 | chunks = _iterencode_dict(_asdict(), | |
393 | _current_indent_level) | |
394 | elif _tuple_as_array and isinstance(value, tuple): | |
395 | chunks = _iterencode_list(value, _current_indent_level) | |
396 | elif isinstance(value, dict): | |
397 | chunks = _iterencode_dict(value, _current_indent_level) | |
398 | else: | |
399 | chunks = _iterencode(value, _current_indent_level) | |
400 | for chunk in chunks: | |
401 | yield chunk | |
402 | if newline_indent is not None: | |
403 | _current_indent_level -= 1 | |
404 | yield '\n' + (_indent * _current_indent_level) | |
405 | yield ']' | |
406 | if markers is not None: | |
407 | del markers[markerid] | |
408 | ||
409 | def _iterencode_dict(dct, _current_indent_level): | |
410 | if not dct: | |
411 | yield '{}' | |
412 | return | |
413 | if markers is not None: | |
414 | markerid = id(dct) | |
415 | if markerid in markers: | |
416 | raise ValueError("Circular reference detected") | |
417 | markers[markerid] = dct | |
418 | yield '{' | |
419 | if _indent is not None: | |
420 | _current_indent_level += 1 | |
421 | newline_indent = '\n' + (_indent * _current_indent_level) | |
422 | item_separator = _item_separator + newline_indent | |
423 | yield newline_indent | |
424 | else: | |
425 | newline_indent = None | |
426 | item_separator = _item_separator | |
427 | first = True | |
428 | if _sort_keys: | |
429 | items = dct.items() | |
430 | items.sort(key=lambda kv: kv[0]) | |
431 | else: | |
432 | items = dct.iteritems() | |
433 | for key, value in items: | |
434 | if isinstance(key, basestring): | |
435 | pass | |
436 | # JavaScript is weakly typed for these, so it makes sense to | |
437 | # also allow them. Many encoders seem to do something like this. | |
438 | elif isinstance(key, float): | |
439 | key = _floatstr(key) | |
440 | elif key is True: | |
441 | key = 'true' | |
442 | elif key is False: | |
443 | key = 'false' | |
444 | elif key is None: | |
445 | key = 'null' | |
446 | elif isinstance(key, (int, long)): | |
447 | key = str(key) | |
448 | elif _skipkeys: | |
449 | continue | |
450 | else: | |
451 | raise TypeError("key " + repr(key) + " is not a string") | |
452 | if first: | |
453 | first = False | |
454 | else: | |
455 | yield item_separator | |
456 | yield _encoder(key) | |
457 | yield _key_separator | |
458 | if isinstance(value, basestring): | |
459 | yield _encoder(value) | |
460 | elif value is None: | |
461 | yield 'null' | |
462 | elif value is True: | |
463 | yield 'true' | |
464 | elif value is False: | |
465 | yield 'false' | |
466 | elif isinstance(value, (int, long)): | |
467 | yield str(value) | |
468 | elif isinstance(value, float): | |
469 | yield _floatstr(value) | |
470 | elif _use_decimal and isinstance(value, Decimal): | |
471 | yield str(value) | |
472 | else: | |
473 | if isinstance(value, list): | |
474 | chunks = _iterencode_list(value, _current_indent_level) | |
475 | else: | |
476 | _asdict = _namedtuple_as_object and getattr(value, '_asdict', None) | |
477 | if _asdict and callable(_asdict): | |
478 | chunks = _iterencode_dict(_asdict(), | |
479 | _current_indent_level) | |
480 | elif _tuple_as_array and isinstance(value, tuple): | |
481 | chunks = _iterencode_list(value, _current_indent_level) | |
482 | elif isinstance(value, dict): | |
483 | chunks = _iterencode_dict(value, _current_indent_level) | |
484 | else: | |
485 | chunks = _iterencode(value, _current_indent_level) | |
486 | for chunk in chunks: | |
487 | yield chunk | |
488 | if newline_indent is not None: | |
489 | _current_indent_level -= 1 | |
490 | yield '\n' + (_indent * _current_indent_level) | |
491 | yield '}' | |
492 | if markers is not None: | |
493 | del markers[markerid] | |
494 | ||
495 | def _iterencode(o, _current_indent_level): | |
496 | if isinstance(o, basestring): | |
497 | yield _encoder(o) | |
498 | elif o is None: | |
499 | yield 'null' | |
500 | elif o is True: | |
501 | yield 'true' | |
502 | elif o is False: | |
503 | yield 'false' | |
504 | elif isinstance(o, (int, long)): | |
505 | yield str(o) | |
506 | elif isinstance(o, float): | |
507 | yield _floatstr(o) | |
508 | elif isinstance(o, list): | |
509 | for chunk in _iterencode_list(o, _current_indent_level): | |
510 | yield chunk | |
511 | else: | |
512 | _asdict = _namedtuple_as_object and getattr(o, '_asdict', None) | |
513 | if _asdict and callable(_asdict): | |
514 | for chunk in _iterencode_dict(_asdict(), _current_indent_level): | |
515 | yield chunk | |
516 | elif (_tuple_as_array and isinstance(o, tuple)): | |
517 | for chunk in _iterencode_list(o, _current_indent_level): | |
518 | yield chunk | |
519 | elif isinstance(o, dict): | |
520 | for chunk in _iterencode_dict(o, _current_indent_level): | |
521 | yield chunk | |
522 | elif _use_decimal and isinstance(o, Decimal): | |
523 | yield str(o) | |
524 | else: | |
525 | if markers is not None: | |
526 | markerid = id(o) | |
527 | if markerid in markers: | |
528 | raise ValueError("Circular reference detected") | |
529 | markers[markerid] = o | |
530 | o = _default(o) | |
531 | for chunk in _iterencode(o, _current_indent_level): | |
532 | yield chunk | |
533 | if markers is not None: | |
534 | del markers[markerid] | |
535 | ||
536 | return _iterencode |
0 | """Drop-in replacement for collections.OrderedDict by Raymond Hettinger | |
1 | ||
2 | http://code.activestate.com/recipes/576693/ | |
3 | ||
4 | """ | |
5 | from UserDict import DictMixin | |
6 | ||
7 | # Modified from original to support Python 2.4, see | |
8 | # http://code.google.com/p/simplejson/issues/detail?id=53 | |
9 | try: | |
10 | all | |
11 | except NameError: | |
12 | def all(seq): | |
13 | for elem in seq: | |
14 | if not elem: | |
15 | return False | |
16 | return True | |
17 | ||
18 | class OrderedDict(dict, DictMixin): | |
19 | ||
20 | def __init__(self, *args, **kwds): | |
21 | if len(args) > 1: | |
22 | raise TypeError('expected at most 1 arguments, got %d' % len(args)) | |
23 | try: | |
24 | self.__end | |
25 | except AttributeError: | |
26 | self.clear() | |
27 | self.update(*args, **kwds) | |
28 | ||
29 | def clear(self): | |
30 | self.__end = end = [] | |
31 | end += [None, end, end] # sentinel node for doubly linked list | |
32 | self.__map = {} # key --> [key, prev, next] | |
33 | dict.clear(self) | |
34 | ||
35 | def __setitem__(self, key, value): | |
36 | if key not in self: | |
37 | end = self.__end | |
38 | curr = end[1] | |
39 | curr[2] = end[1] = self.__map[key] = [key, curr, end] | |
40 | dict.__setitem__(self, key, value) | |
41 | ||
42 | def __delitem__(self, key): | |
43 | dict.__delitem__(self, key) | |
44 | key, prev, next = self.__map.pop(key) | |
45 | prev[2] = next | |
46 | next[1] = prev | |
47 | ||
48 | def __iter__(self): | |
49 | end = self.__end | |
50 | curr = end[2] | |
51 | while curr is not end: | |
52 | yield curr[0] | |
53 | curr = curr[2] | |
54 | ||
55 | def __reversed__(self): | |
56 | end = self.__end | |
57 | curr = end[1] | |
58 | while curr is not end: | |
59 | yield curr[0] | |
60 | curr = curr[1] | |
61 | ||
62 | def popitem(self, last=True): | |
63 | if not self: | |
64 | raise KeyError('dictionary is empty') | |
65 | # Modified from original to support Python 2.4, see | |
66 | # http://code.google.com/p/simplejson/issues/detail?id=53 | |
67 | if last: | |
68 | key = reversed(self).next() | |
69 | else: | |
70 | key = iter(self).next() | |
71 | value = self.pop(key) | |
72 | return key, value | |
73 | ||
74 | def __reduce__(self): | |
75 | items = [[k, self[k]] for k in self] | |
76 | tmp = self.__map, self.__end | |
77 | del self.__map, self.__end | |
78 | inst_dict = vars(self).copy() | |
79 | self.__map, self.__end = tmp | |
80 | if inst_dict: | |
81 | return (self.__class__, (items,), inst_dict) | |
82 | return self.__class__, (items,) | |
83 | ||
84 | def keys(self): | |
85 | return list(self) | |
86 | ||
87 | setdefault = DictMixin.setdefault | |
88 | update = DictMixin.update | |
89 | pop = DictMixin.pop | |
90 | values = DictMixin.values | |
91 | items = DictMixin.items | |
92 | iterkeys = DictMixin.iterkeys | |
93 | itervalues = DictMixin.itervalues | |
94 | iteritems = DictMixin.iteritems | |
95 | ||
96 | def __repr__(self): | |
97 | if not self: | |
98 | return '%s()' % (self.__class__.__name__,) | |
99 | return '%s(%r)' % (self.__class__.__name__, self.items()) | |
100 | ||
101 | def copy(self): | |
102 | return self.__class__(self) | |
103 | ||
104 | @classmethod | |
105 | def fromkeys(cls, iterable, value=None): | |
106 | d = cls() | |
107 | for key in iterable: | |
108 | d[key] = value | |
109 | return d | |
110 | ||
111 | def __eq__(self, other): | |
112 | if isinstance(other, OrderedDict): | |
113 | return len(self)==len(other) and \ | |
114 | all(p==q for p, q in zip(self.items(), other.items())) | |
115 | return dict.__eq__(self, other) | |
116 | ||
117 | def __ne__(self, other): | |
118 | return not self == other |
0 | """JSON token scanner | |
1 | """ | |
2 | import re | |
3 | def _import_c_make_scanner(): | |
4 | try: | |
5 | from ._speedups import make_scanner | |
6 | return make_scanner | |
7 | except ImportError: | |
8 | return None | |
9 | c_make_scanner = _import_c_make_scanner() | |
10 | ||
11 | __all__ = ['make_scanner'] | |
12 | ||
13 | NUMBER_RE = re.compile( | |
14 | r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?', | |
15 | (re.VERBOSE | re.MULTILINE | re.DOTALL)) | |
16 | ||
17 | def py_make_scanner(context): | |
18 | parse_object = context.parse_object | |
19 | parse_array = context.parse_array | |
20 | parse_string = context.parse_string | |
21 | match_number = NUMBER_RE.match | |
22 | encoding = context.encoding | |
23 | strict = context.strict | |
24 | parse_float = context.parse_float | |
25 | parse_int = context.parse_int | |
26 | parse_constant = context.parse_constant | |
27 | object_hook = context.object_hook | |
28 | object_pairs_hook = context.object_pairs_hook | |
29 | memo = context.memo | |
30 | ||
31 | def _scan_once(string, idx): | |
32 | try: | |
33 | nextchar = string[idx] | |
34 | except IndexError: | |
35 | raise StopIteration | |
36 | ||
37 | if nextchar == '"': | |
38 | return parse_string(string, idx + 1, encoding, strict) | |
39 | elif nextchar == '{': | |
40 | return parse_object((string, idx + 1), encoding, strict, | |
41 | _scan_once, object_hook, object_pairs_hook, memo) | |
42 | elif nextchar == '[': | |
43 | return parse_array((string, idx + 1), _scan_once) | |
44 | elif nextchar == 'n' and string[idx:idx + 4] == 'null': | |
45 | return None, idx + 4 | |
46 | elif nextchar == 't' and string[idx:idx + 4] == 'true': | |
47 | return True, idx + 4 | |
48 | elif nextchar == 'f' and string[idx:idx + 5] == 'false': | |
49 | return False, idx + 5 | |
50 | ||
51 | m = match_number(string, idx) | |
52 | if m is not None: | |
53 | integer, frac, exp = m.groups() | |
54 | if frac or exp: | |
55 | res = parse_float(integer + (frac or '') + (exp or '')) | |
56 | else: | |
57 | res = parse_int(integer) | |
58 | return res, m.end() | |
59 | elif nextchar == 'N' and string[idx:idx + 3] == 'NaN': | |
60 | return parse_constant('NaN'), idx + 3 | |
61 | elif nextchar == 'I' and string[idx:idx + 8] == 'Infinity': | |
62 | return parse_constant('Infinity'), idx + 8 | |
63 | elif nextchar == '-' and string[idx:idx + 9] == '-Infinity': | |
64 | return parse_constant('-Infinity'), idx + 9 | |
65 | else: | |
66 | raise StopIteration | |
67 | ||
68 | def scan_once(string, idx): | |
69 | try: | |
70 | return _scan_once(string, idx) | |
71 | finally: | |
72 | memo.clear() | |
73 | ||
74 | return scan_once | |
75 | ||
76 | make_scanner = c_make_scanner or py_make_scanner |
0 | r"""Command-line tool to validate and pretty-print JSON | |
1 | ||
2 | Usage:: | |
3 | ||
4 | $ echo '{"json":"obj"}' | python -m simplejson.tool | |
5 | { | |
6 | "json": "obj" | |
7 | } | |
8 | $ echo '{ 1.2:3.4}' | python -m simplejson.tool | |
9 | Expecting property name: line 1 column 2 (char 2) | |
10 | ||
11 | """ | |
12 | import sys | |
13 | import simplejson as json | |
14 | ||
15 | def main(): | |
16 | if len(sys.argv) == 1: | |
17 | infile = sys.stdin | |
18 | outfile = sys.stdout | |
19 | elif len(sys.argv) == 2: | |
20 | infile = open(sys.argv[1], 'rb') | |
21 | outfile = sys.stdout | |
22 | elif len(sys.argv) == 3: | |
23 | infile = open(sys.argv[1], 'rb') | |
24 | outfile = open(sys.argv[2], 'wb') | |
25 | else: | |
26 | raise SystemExit(sys.argv[0] + " [infile [outfile]]") | |
27 | try: | |
28 | obj = json.load(infile, | |
29 | object_pairs_hook=json.OrderedDict, | |
30 | use_decimal=True) | |
31 | except ValueError, e: | |
32 | raise SystemExit(e) | |
33 | json.dump(obj, outfile, sort_keys=True, indent=' ', use_decimal=True) | |
34 | outfile.write('\n') | |
35 | ||
36 | ||
37 | if __name__ == '__main__': | |
38 | main() |
0 | /* | |
1 | ** Copyright (C) 2005-2010 Erik de Castro Lopo <erikd@mega-nerd.com> | |
2 | ** | |
3 | ** All rights reserved. | |
4 | ** | |
5 | ** Redistribution and use in source and binary forms, with or without | |
6 | ** modification, are permitted provided that the following conditions are | |
7 | ** met: | |
8 | ** | |
9 | ** * Redistributions of source code must retain the above copyright | |
10 | ** notice, this list of conditions and the following disclaimer. | |
11 | ** * Redistributions in binary form must reproduce the above copyright | |
12 | ** notice, this list of conditions and the following disclaimer in | |
13 | ** the documentation and/or other materials provided with the | |
14 | ** distribution. | |
15 | ** * Neither the author nor the names of any contributors may be used | |
16 | ** to endorse or promote products derived from this software without | |
17 | ** specific prior written permission. | |
18 | ** | |
19 | ** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
20 | ** "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | |
21 | ** TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
22 | ** PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR | |
23 | ** CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
24 | ** EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
25 | ** PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; | |
26 | ** OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, | |
27 | ** WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR | |
28 | ** OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF | |
29 | ** ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
30 | */ | |
31 | ||
32 | /* | |
33 | ** The above modified BSD style license (GPL and LGPL compatible) applies to | |
34 | ** this file. It does not apply to libsndfile itself which is released under | |
35 | ** the GNU LGPL or the libsndfile test suite which is released under the GNU | |
36 | ** GPL. | |
37 | ** This means that this header file can be used under this modified BSD style | |
38 | ** license, but the LGPL still holds for the libsndfile library itself. | |
39 | */ | |
40 | ||
41 | /* | |
42 | ** sndfile.hh -- A lightweight C++ wrapper for the libsndfile API. | |
43 | ** | |
44 | ** All the methods are inlines and all functionality is contained in this | |
45 | ** file. There is no separate implementation file. | |
46 | ** | |
47 | ** API documentation is in the doc/ directory of the source code tarball | |
48 | ** and at http://www.mega-nerd.com/libsndfile/api.html. | |
49 | */ | |
50 | ||
51 | #ifndef SNDFILE_HH | |
52 | #define SNDFILE_HH | |
53 | ||
54 | #include <sndfile.h> | |
55 | ||
56 | #include <string> | |
57 | #include <new> // for std::nothrow | |
58 | ||
59 | class SndfileHandle | |
60 | { private : | |
61 | struct SNDFILE_ref | |
62 | { SNDFILE_ref (void) ; | |
63 | ~SNDFILE_ref (void) ; | |
64 | ||
65 | SNDFILE *sf ; | |
66 | SF_INFO sfinfo ; | |
67 | int ref ; | |
68 | } ; | |
69 | ||
70 | SNDFILE_ref *p ; | |
71 | ||
72 | public : | |
73 | /* Default constructor */ | |
74 | SndfileHandle (void) : p (NULL) {} ; | |
75 | SndfileHandle (const char *path, int mode = SFM_READ, | |
76 | int format = 0, int channels = 0, int samplerate = 0) ; | |
77 | SndfileHandle (std::string const & path, int mode = SFM_READ, | |
78 | int format = 0, int channels = 0, int samplerate = 0) ; | |
79 | SndfileHandle (int fd, bool close_desc, int mode = SFM_READ, | |
80 | int format = 0, int channels = 0, int samplerate = 0) ; | |
81 | ~SndfileHandle (void) ; | |
82 | ||
83 | SndfileHandle (const SndfileHandle &orig) ; | |
84 | SndfileHandle & operator = (const SndfileHandle &rhs) ; | |
85 | ||
86 | /* Mainly for debugging/testing. */ | |
87 | int refCount (void) const { return (p == NULL) ? 0 : p->ref ; } | |
88 | ||
89 | operator bool () const { return (p != NULL) ; } | |
90 | ||
91 | bool operator == (const SndfileHandle &rhs) const { return (p == rhs.p) ; } | |
92 | ||
93 | sf_count_t frames (void) const { return p ? p->sfinfo.frames : 0 ; } | |
94 | int format (void) const { return p ? p->sfinfo.format : 0 ; } | |
95 | int channels (void) const { return p ? p->sfinfo.channels : 0 ; } | |
96 | int samplerate (void) const { return p ? p->sfinfo.samplerate : 0 ; } | |
97 | ||
98 | int error (void) const ; | |
99 | const char * strError (void) const ; | |
100 | ||
101 | int command (int cmd, void *data, int datasize) ; | |
102 | ||
103 | sf_count_t seek (sf_count_t frames, int whence) ; | |
104 | ||
105 | void writeSync (void) ; | |
106 | ||
107 | int setString (int str_type, const char* str) ; | |
108 | ||
109 | const char* getString (int str_type) const ; | |
110 | ||
111 | static int formatCheck (int format, int channels, int samplerate) ; | |
112 | ||
113 | sf_count_t read (short *ptr, sf_count_t items) ; | |
114 | sf_count_t read (int *ptr, sf_count_t items) ; | |
115 | sf_count_t read (float *ptr, sf_count_t items) ; | |
116 | sf_count_t read (double *ptr, sf_count_t items) ; | |
117 | ||
118 | sf_count_t write (const short *ptr, sf_count_t items) ; | |
119 | sf_count_t write (const int *ptr, sf_count_t items) ; | |
120 | sf_count_t write (const float *ptr, sf_count_t items) ; | |
121 | sf_count_t write (const double *ptr, sf_count_t items) ; | |
122 | ||
123 | sf_count_t readf (short *ptr, sf_count_t frames) ; | |
124 | sf_count_t readf (int *ptr, sf_count_t frames) ; | |
125 | sf_count_t readf (float *ptr, sf_count_t frames) ; | |
126 | sf_count_t readf (double *ptr, sf_count_t frames) ; | |
127 | ||
128 | sf_count_t writef (const short *ptr, sf_count_t frames) ; | |
129 | sf_count_t writef (const int *ptr, sf_count_t frames) ; | |
130 | sf_count_t writef (const float *ptr, sf_count_t frames) ; | |
131 | sf_count_t writef (const double *ptr, sf_count_t frames) ; | |
132 | ||
133 | sf_count_t readRaw (void *ptr, sf_count_t bytes) ; | |
134 | sf_count_t writeRaw (const void *ptr, sf_count_t bytes) ; | |
135 | ||
136 | SNDFILE * rawHandle(void); /**< raw access to the handle. SndfileHandle keeps ownership */ | |
137 | SNDFILE * takeOwnership(void); /**< take ownership of handle, iff reference count is 1 */ | |
138 | } ; | |
139 | ||
140 | /*============================================================================== | |
141 | ** Nothing but implementation below. | |
142 | */ | |
143 | ||
144 | inline | |
145 | SndfileHandle::SNDFILE_ref::SNDFILE_ref (void) | |
146 | : ref (1) | |
147 | {} | |
148 | ||
149 | inline | |
150 | SndfileHandle::SNDFILE_ref::~SNDFILE_ref (void) | |
151 | { if (sf != NULL) sf_close (sf) ; } | |
152 | ||
153 | inline | |
154 | SndfileHandle::SndfileHandle (const char *path, int mode, int fmt, int chans, int srate) | |
155 | : p (NULL) | |
156 | { | |
157 | p = new (std::nothrow) SNDFILE_ref () ; | |
158 | ||
159 | if (p != NULL) | |
160 | { p->ref = 1 ; | |
161 | ||
162 | p->sfinfo.frames = 0 ; | |
163 | p->sfinfo.channels = chans ; | |
164 | p->sfinfo.format = fmt ; | |
165 | p->sfinfo.samplerate = srate ; | |
166 | p->sfinfo.sections = 0 ; | |
167 | p->sfinfo.seekable = 0 ; | |
168 | ||
169 | p->sf = sf_open (path, mode, &p->sfinfo) ; | |
170 | } ; | |
171 | ||
172 | return ; | |
173 | } /* SndfileHandle const char * constructor */ | |
174 | ||
175 | inline | |
176 | SndfileHandle::SndfileHandle (std::string const & path, int mode, int fmt, int chans, int srate) | |
177 | : p (NULL) | |
178 | { | |
179 | p = new (std::nothrow) SNDFILE_ref () ; | |
180 | ||
181 | if (p != NULL) | |
182 | { p->ref = 1 ; | |
183 | ||
184 | p->sfinfo.frames = 0 ; | |
185 | p->sfinfo.channels = chans ; | |
186 | p->sfinfo.format = fmt ; | |
187 | p->sfinfo.samplerate = srate ; | |
188 | p->sfinfo.sections = 0 ; | |
189 | p->sfinfo.seekable = 0 ; | |
190 | ||
191 | p->sf = sf_open (path.c_str (), mode, &p->sfinfo) ; | |
192 | } ; | |
193 | ||
194 | return ; | |
195 | } /* SndfileHandle std::string constructor */ | |
196 | ||
197 | inline | |
198 | SndfileHandle::SndfileHandle (int fd, bool close_desc, int mode, int fmt, int chans, int srate) | |
199 | : p (NULL) | |
200 | { | |
201 | if (fd < 0) | |
202 | return ; | |
203 | ||
204 | p = new (std::nothrow) SNDFILE_ref () ; | |
205 | ||
206 | if (p != NULL) | |
207 | { p->ref = 1 ; | |
208 | ||
209 | p->sfinfo.frames = 0 ; | |
210 | p->sfinfo.channels = chans ; | |
211 | p->sfinfo.format = fmt ; | |
212 | p->sfinfo.samplerate = srate ; | |
213 | p->sfinfo.sections = 0 ; | |
214 | p->sfinfo.seekable = 0 ; | |
215 | ||
216 | p->sf = sf_open_fd (fd, mode, &p->sfinfo, close_desc) ; | |
217 | } ; | |
218 | ||
219 | return ; | |
220 | } /* SndfileHandle fd constructor */ | |
221 | ||
222 | inline | |
223 | SndfileHandle::~SndfileHandle (void) | |
224 | { if (p != NULL && --p->ref == 0) | |
225 | delete p ; | |
226 | } /* SndfileHandle destructor */ | |
227 | ||
228 | ||
229 | inline | |
230 | SndfileHandle::SndfileHandle (const SndfileHandle &orig) | |
231 | : p (orig.p) | |
232 | { if (p != NULL) | |
233 | ++p->ref ; | |
234 | } /* SndfileHandle copy constructor */ | |
235 | ||
236 | inline SndfileHandle & | |
237 | SndfileHandle::operator = (const SndfileHandle &rhs) | |
238 | { | |
239 | if (&rhs == this) | |
240 | return *this ; | |
241 | if (p != NULL && --p->ref == 0) | |
242 | delete p ; | |
243 | ||
244 | p = rhs.p ; | |
245 | if (p != NULL) | |
246 | ++p->ref ; | |
247 | ||
248 | return *this ; | |
249 | } /* SndfileHandle assignment operator */ | |
250 | ||
251 | inline int | |
252 | SndfileHandle::error (void) const | |
253 | { return sf_error (p->sf) ; } | |
254 | ||
255 | inline const char * | |
256 | SndfileHandle::strError (void) const | |
257 | { return sf_strerror (p->sf) ; } | |
258 | ||
259 | inline int | |
260 | SndfileHandle::command (int cmd, void *data, int datasize) | |
261 | { return sf_command (p->sf, cmd, data, datasize) ; } | |
262 | ||
263 | inline sf_count_t | |
264 | SndfileHandle::seek (sf_count_t frame_count, int whence) | |
265 | { return sf_seek (p->sf, frame_count, whence) ; } | |
266 | ||
267 | inline void | |
268 | SndfileHandle::writeSync (void) | |
269 | { sf_write_sync (p->sf) ; } | |
270 | ||
271 | inline int | |
272 | SndfileHandle::setString (int str_type, const char* str) | |
273 | { return sf_set_string (p->sf, str_type, str) ; } | |
274 | ||
275 | inline const char* | |
276 | SndfileHandle::getString (int str_type) const | |
277 | { return sf_get_string (p->sf, str_type) ; } | |
278 | ||
279 | inline int | |
280 | SndfileHandle::formatCheck (int fmt, int chans, int srate) | |
281 | { | |
282 | SF_INFO sfinfo ; | |
283 | ||
284 | sfinfo.frames = 0 ; | |
285 | sfinfo.channels = chans ; | |
286 | sfinfo.format = fmt ; | |
287 | sfinfo.samplerate = srate ; | |
288 | sfinfo.sections = 0 ; | |
289 | sfinfo.seekable = 0 ; | |
290 | ||
291 | return sf_format_check (&sfinfo) ; | |
292 | } | |
293 | ||
294 | /*---------------------------------------------------------------------*/ | |
295 | ||
296 | inline sf_count_t | |
297 | SndfileHandle::read (short *ptr, sf_count_t items) | |
298 | { return sf_read_short (p->sf, ptr, items) ; } | |
299 | ||
300 | inline sf_count_t | |
301 | SndfileHandle::read (int *ptr, sf_count_t items) | |
302 | { return sf_read_int (p->sf, ptr, items) ; } | |
303 | ||
304 | inline sf_count_t | |
305 | SndfileHandle::read (float *ptr, sf_count_t items) | |
306 | { return sf_read_float (p->sf, ptr, items) ; } | |
307 | ||
308 | inline sf_count_t | |
309 | SndfileHandle::read (double *ptr, sf_count_t items) | |
310 | { return sf_read_double (p->sf, ptr, items) ; } | |
311 | ||
312 | inline sf_count_t | |
313 | SndfileHandle::write (const short *ptr, sf_count_t items) | |
314 | { return sf_write_short (p->sf, ptr, items) ; } | |
315 | ||
316 | inline sf_count_t | |
317 | SndfileHandle::write (const int *ptr, sf_count_t items) | |
318 | { return sf_write_int (p->sf, ptr, items) ; } | |
319 | ||
320 | inline sf_count_t | |
321 | SndfileHandle::write (const float *ptr, sf_count_t items) | |
322 | { return sf_write_float (p->sf, ptr, items) ; } | |
323 | ||
324 | inline sf_count_t | |
325 | SndfileHandle::write (const double *ptr, sf_count_t items) | |
326 | { return sf_write_double (p->sf, ptr, items) ; } | |
327 | ||
328 | inline sf_count_t | |
329 | SndfileHandle::readf (short *ptr, sf_count_t frame_count) | |
330 | { return sf_readf_short (p->sf, ptr, frame_count) ; } | |
331 | ||
332 | inline sf_count_t | |
333 | SndfileHandle::readf (int *ptr, sf_count_t frame_count) | |
334 | { return sf_readf_int (p->sf, ptr, frame_count) ; } | |
335 | ||
336 | inline sf_count_t | |
337 | SndfileHandle::readf (float *ptr, sf_count_t frame_count) | |
338 | { return sf_readf_float (p->sf, ptr, frame_count) ; } | |
339 | ||
340 | inline sf_count_t | |
341 | SndfileHandle::readf (double *ptr, sf_count_t frame_count) | |
342 | { return sf_readf_double (p->sf, ptr, frame_count) ; } | |
343 | ||
344 | inline sf_count_t | |
345 | SndfileHandle::writef (const short *ptr, sf_count_t frame_count) | |
346 | { return sf_writef_short (p->sf, ptr, frame_count) ; } | |
347 | ||
348 | inline sf_count_t | |
349 | SndfileHandle::writef (const int *ptr, sf_count_t frame_count) | |
350 | { return sf_writef_int (p->sf, ptr, frame_count) ; } | |
351 | ||
352 | inline sf_count_t | |
353 | SndfileHandle::writef (const float *ptr, sf_count_t frame_count) | |
354 | { return sf_writef_float (p->sf, ptr, frame_count) ; } | |
355 | ||
356 | inline sf_count_t | |
357 | SndfileHandle::writef (const double *ptr, sf_count_t frame_count) | |
358 | { return sf_writef_double (p->sf, ptr, frame_count) ; } | |
359 | ||
360 | inline sf_count_t | |
361 | SndfileHandle::readRaw (void *ptr, sf_count_t bytes) | |
362 | { return sf_read_raw (p->sf, ptr, bytes) ; } | |
363 | ||
364 | inline sf_count_t | |
365 | SndfileHandle::writeRaw (const void *ptr, sf_count_t bytes) | |
366 | { return sf_write_raw (p->sf, ptr, bytes) ; } | |
367 | ||
368 | inline SNDFILE * | |
369 | SndfileHandle::rawHandle(void) | |
370 | { | |
371 | if (p) | |
372 | return p->sf; | |
373 | else | |
374 | return NULL; | |
375 | } | |
376 | ||
377 | inline SNDFILE * | |
378 | SndfileHandle::takeOwnership(void) | |
379 | { | |
380 | if (!p || (p->ref != 1)) | |
381 | return NULL; | |
382 | ||
383 | SNDFILE * ret = p->sf; | |
384 | p->sf = NULL; | |
385 | delete p; | |
386 | p = NULL; | |
387 | return ret; | |
388 | } | |
389 | ||
390 | #ifdef ENABLE_SNDFILE_WINDOWS_PROTOTYPES | |
391 | ||
392 | inline | |
393 | SndfileHandle::SndfileHandle (LPCWSTR wpath, int mode, int fmt, int chans, int srate) | |
394 | : p (NULL) | |
395 | { | |
396 | p = new (std::nothrow) SNDFILE_ref () ; | |
397 | ||
398 | if (p != NULL) | |
399 | { p->ref = 1 ; | |
400 | ||
401 | p->sfinfo.frames = 0 ; | |
402 | p->sfinfo.channels = chans ; | |
403 | p->sfinfo.format = fmt ; | |
404 | p->sfinfo.samplerate = srate ; | |
405 | p->sfinfo.sections = 0 ; | |
406 | p->sfinfo.seekable = 0 ; | |
407 | ||
408 | p->sf = sf_wchar_open (wpath, mode, &p->sfinfo) ; | |
409 | } ; | |
410 | ||
411 | return ; | |
412 | } /* SndfileHandle const wchar_t * constructor */ | |
413 | ||
414 | #endif | |
415 | ||
416 | #endif /* SNDFILE_HH */ | |
417 |