diff --git a/PKG-INFO b/PKG-INFO index 5d38520..8570be6 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 1.1 Name: cbor -Version: 0.1.21 +Version: 0.1.24 Summary: RFC 7049 - Concise Binary Object Representation Home-page: https://bitbucket.org/bodhisnarkva/cbor Author: Brian Olson diff --git a/c/cbormodule.c b/c/cbormodule.c index 8d68e5a..f400604 100644 --- a/c/cbormodule.c +++ b/c/cbormodule.c @@ -28,6 +28,10 @@ #define IS_PY3 1 #endif + +typedef struct { + unsigned int sort_keys; +} EncodeOptions; // Hey Look! It's a polymorphic object structure in C! @@ -995,28 +999,49 @@ return; } -static int inner_dumps(PyObject* ob, uint8_t* out, uintptr_t* posp); - -static int dumps_dict(PyObject* ob, uint8_t* out, uintptr_t* posp) { +static int inner_dumps(EncodeOptions *optp, PyObject* ob, uint8_t* out, uintptr_t* posp); + +static int dumps_dict(EncodeOptions *optp, PyObject* ob, uint8_t* out, uintptr_t* posp) { uintptr_t pos = *posp; - Py_ssize_t dictiter = 0; + Py_ssize_t dictlen = PyDict_Size(ob); PyObject* key; PyObject* val; - Py_ssize_t dictlen = PyDict_Size(ob); int err; + tag_aux_out(CBOR_MAP, dictlen, out, &pos); - while (PyDict_Next(ob, &dictiter, &key, &val)) { - err = inner_dumps(key, out, &pos); - if (err != 0) { return err; } - err = inner_dumps(val, out, &pos); - if (err != 0) { return err; } - } + + if (optp->sort_keys) { + Py_ssize_t index = 0; + PyObject* keylist = PyDict_Keys(ob); + PyList_Sort(keylist); + + //fprintf(stderr, "sortking keys\n"); + for (index = 0; index < PyList_Size(keylist); index++) { + key = PyList_GetItem(keylist, index); // Borrowed ref + val = PyDict_GetItem(ob, key); // Borrowed ref + err = inner_dumps(optp, key, out, &pos); + if (err != 0) { return err; } + err = inner_dumps(optp, val, out, &pos); + if (err != 0) { return err; } + } + Py_DECREF(keylist); + } else { + Py_ssize_t dictiter = 0; + //fprintf(stderr, "unsorted keys\n"); + while (PyDict_Next(ob, &dictiter, &key, &val)) { + err = inner_dumps(optp, key, out, &pos); + if (err != 0) { return err; } + err = inner_dumps(optp, val, out, &pos); + if (err != 0) { return err; } + } + } + *posp = pos; return 0; } -static void dumps_bignum(uint8_t tag, PyObject* val, uint8_t* out, uintptr_t* posp) { +static void dumps_bignum(EncodeOptions *optp, uint8_t tag, PyObject* val, uint8_t* out, uintptr_t* posp) { uintptr_t pos = (posp != NULL) ? *posp : 0; PyObject* eight = PyLong_FromLong(8); PyObject* bytemask = NULL; @@ -1066,7 +1091,7 @@ *posp = pos; } -static int dumps_tag(PyObject* ob, uint8_t* out, uintptr_t* posp) { +static int dumps_tag(EncodeOptions *optp, PyObject* ob, uint8_t* out, uintptr_t* posp) { uintptr_t pos = (posp != NULL) ? *posp : 0; int err = 0; @@ -1084,7 +1109,7 @@ long val = PyInt_AsLong(tag_num); if (val > 0) { tag_aux_out(CBOR_TAG, val, out, &pos); - err = inner_dumps(tag_value, out, &pos); + err = inner_dumps(optp, tag_value, out, &pos); } else { PyErr_Format(PyExc_ValueError, "tag cannot be a negative int: %ld", val); err = -1; @@ -1097,7 +1122,7 @@ if (overflow == 0) { if (val >= 0) { tag_aux_out(CBOR_TAG, val, out, &pos); - err = inner_dumps(tag_value, out, &pos); + err = inner_dumps(optp, tag_value, out, &pos); } else { PyErr_Format(PyExc_ValueError, "tag cannot be a negative long: %lld", val); err = -1; @@ -1126,10 +1151,15 @@ // With out=NULL it just counts the length. // return err, 0=OK -static int inner_dumps(PyObject* ob, uint8_t* out, uintptr_t* posp) { +static int inner_dumps(EncodeOptions *optp, PyObject* ob, uint8_t* out, uintptr_t* posp) { uintptr_t pos = (posp != NULL) ? *posp : 0; - if (PyBool_Check(ob)) { + if (ob == Py_None) { + if (out != NULL) { + out[pos] = CBOR_NULL; + } + pos += 1; + } else if (PyBool_Check(ob)) { if (out != NULL) { if (PyObject_IsTrue(ob)) { out[pos] = CBOR_TRUE; @@ -1138,20 +1168,15 @@ } } pos += 1; - } else if (ob == Py_None) { - if (out != NULL) { - out[pos] = CBOR_NULL; - } - pos += 1; } else if (PyDict_Check(ob)) { - int err = dumps_dict(ob, out, &pos); + int err = dumps_dict(optp, ob, out, &pos); if (err != 0) { return err; } } else if (PyList_Check(ob)) { Py_ssize_t i; Py_ssize_t listlen = PyList_Size(ob); tag_aux_out(CBOR_ARRAY, listlen, out, &pos); for (i = 0; i < listlen; i++) { - int err = inner_dumps(PyList_GetItem(ob, i), out, &pos); + int err = inner_dumps(optp, PyList_GetItem(ob, i), out, &pos); if (err != 0) { return err; } } } else if (PyTuple_Check(ob)) { @@ -1159,7 +1184,7 @@ Py_ssize_t listlen = PyTuple_Size(ob); tag_aux_out(CBOR_ARRAY, listlen, out, &pos); for (i = 0; i < listlen; i++) { - int err = inner_dumps(PyTuple_GetItem(ob, i), out, &pos); + int err = inner_dumps(optp, PyTuple_GetItem(ob, i), out, &pos); if (err != 0) { return err; } } // TODO: accept other enumerables and emit a variable length array @@ -1188,11 +1213,11 @@ PyObject* minusone = PyLong_FromLongLong(-1L); PyObject* val = PyNumber_Subtract(minusone, ob); Py_DECREF(minusone); - dumps_bignum(CBOR_TAG_NEGBIGNUM, val, out, &pos); + dumps_bignum(optp, CBOR_TAG_NEGBIGNUM, val, out, &pos); Py_DECREF(val); } else { // BIG INT - dumps_bignum(CBOR_TAG_BIGNUM, ob, out, &pos); + dumps_bignum(optp, CBOR_TAG_BIGNUM, ob, out, &pos); } } } else if (PyFloat_Check(ob)) { @@ -1219,7 +1244,7 @@ { PyObject* tag_class = getCborTagClass(); if (PyObject_IsInstance(ob, tag_class)) { - int err = dumps_tag(ob, out, &pos); + int err = dumps_tag(optp, ob, out, &pos); if (err != 0) { return err; } handled = 1; } @@ -1247,9 +1272,27 @@ return 0; } +static int _dumps_kwargs(EncodeOptions *optp, PyObject* kwargs) { + if (kwargs == NULL) { + } else if (!PyDict_Check(kwargs)) { + PyErr_Format(PyExc_ValueError, "kwargs not dict: %R\n", kwargs); + return 0; + } else { + PyObject* sort_keys = PyDict_GetItemString(kwargs, "sort_keys"); // Borrowed ref + if (sort_keys != NULL) { + optp->sort_keys = PyObject_IsTrue(sort_keys); + //fprintf(stderr, "sort_keys=%d\n", optp->sort_keys); + } + } + return 1; +} + static PyObject* -cbor_dumps(PyObject* noself, PyObject* args) { +cbor_dumps(PyObject* noself, PyObject* args, PyObject* kwargs) { + PyObject* ob; + EncodeOptions opts = {0}; + EncodeOptions *optp = &opts; is_big_endian(); if (PyType_IsSubtype(Py_TYPE(args), &PyList_Type)) { ob = PyList_GetItem(args, 0); @@ -1258,6 +1301,13 @@ } else { PyErr_Format(PyExc_ValueError, "args not list or tuple: %R\n", args); return NULL; + } + if (ob == NULL) { + return NULL; + } + + if (!_dumps_kwargs(optp, kwargs)) { + return NULL; } { @@ -1268,7 +1318,7 @@ int err; // first pass just to count length - err = inner_dumps(ob, NULL, &pos); + err = inner_dumps(optp, ob, NULL, &pos); if (err != 0) { return NULL; } @@ -1281,7 +1331,7 @@ return NULL; } - err = inner_dumps(ob, out, NULL); + err = inner_dumps(optp, ob, out, NULL); if (err != 0) { PyMem_Free(out); return NULL; @@ -1295,10 +1345,12 @@ } static PyObject* -cbor_dump(PyObject* noself, PyObject* args) { +cbor_dump(PyObject* noself, PyObject* args, PyObject *kwargs) { // args should be (obj, fp) PyObject* ob; PyObject* fp; + EncodeOptions opts = {0}; + EncodeOptions *optp = &opts; is_big_endian(); if (PyType_IsSubtype(Py_TYPE(args), &PyList_Type)) { @@ -1311,6 +1363,13 @@ PyErr_Format(PyExc_ValueError, "args not list or tuple: %R\n", args); return NULL; } + if ((ob == NULL) || (fp == NULL)) { + return NULL; + } + + if (!_dumps_kwargs(optp, kwargs)) { + return NULL; + } { // TODO: make this smarter, right now it is justt fp.write(dumps(ob)) @@ -1320,7 +1379,7 @@ int err; // first pass just to count length - err = inner_dumps(ob, NULL, &pos); + err = inner_dumps(optp, ob, NULL, &pos); if (err != 0) { return NULL; } @@ -1333,7 +1392,7 @@ return NULL; } - err = inner_dumps(ob, out, NULL); + err = inner_dumps(optp, ob, out, NULL); if (err != 0) { PyMem_Free(out); return NULL; @@ -1377,12 +1436,12 @@ static PyMethodDef CborMethods[] = { {"loads", cbor_loads, METH_VARARGS, "parse cbor from data buffer to objects"}, - {"dumps", cbor_dumps, METH_VARARGS, + {"dumps", (PyCFunction)cbor_dumps, METH_VARARGS|METH_KEYWORDS, "serialize python object to bytes"}, {"load", cbor_load, METH_VARARGS, "Parse cbor from data buffer to objects.\n" "Takes a file-like object capable of .read(N)\n"}, - {"dump", cbor_dump, METH_VARARGS, + {"dump", (PyCFunction)cbor_dump, METH_VARARGS|METH_KEYWORDS, "Serialize python object to bytes.\n" "dump(obj, fp)\n" "obj: object to output; fp: file-like object to .write() to\n"}, diff --git a/cbor/VERSION.py b/cbor/VERSION.py new file mode 100644 index 0000000..cd3ff78 --- /dev/null +++ b/cbor/VERSION.py @@ -0,0 +1 @@ +'0.1.24' diff --git a/cbor/__init__.py b/cbor/__init__.py index f56f435..1b6ec15 100644 --- a/cbor/__init__.py +++ b/cbor/__init__.py @@ -9,9 +9,11 @@ from .cbor import Tag from .tagmap import TagMapper, ClassTag, UnknownTagException +from .VERSION import __doc__ as __version__ __all__ = [ 'loads', 'dumps', 'load', 'dump', 'Tag', 'TagMapper', 'ClassTag', 'UnknownTagException', + '__version__', ] diff --git a/cbor/cbor.py b/cbor/cbor.py index fa349c6..2dfffb5 100644 --- a/cbor/cbor.py +++ b/cbor/cbor.py @@ -149,27 +149,39 @@ return _encode_type_num(CBOR_TEXT, len(val)) + val -def dumps_array(arr): +def dumps_array(arr, sort_keys=False): head = _encode_type_num(CBOR_ARRAY, len(arr)) - parts = [dumps(x) for x in arr] + parts = [dumps(x, sort_keys=sort_keys) for x in arr] return head + b''.join(parts) if _IS_PY3: - def dumps_dict(d): + def dumps_dict(d, sort_keys=False): head = _encode_type_num(CBOR_MAP, len(d)) parts = [head] - for k,v in d.items(): - parts.append(dumps(k)) - parts.append(dumps(v)) + if sort_keys: + for k in sorted(d.keys()): + v = d[k] + parts.append(dumps(k, sort_keys=sort_keys)) + parts.append(dumps(v, sort_keys=sort_keys)) + else: + for k,v in d.items(): + parts.append(dumps(k, sort_keys=sort_keys)) + parts.append(dumps(v, sort_keys=sort_keys)) return b''.join(parts) else: - def dumps_dict(d): + def dumps_dict(d, sort_keys=False): head = _encode_type_num(CBOR_MAP, len(d)) parts = [head] - for k,v in d.iteritems(): - parts.append(dumps(k)) - parts.append(dumps(v)) + if sort_keys: + for k in sorted(d.iterkeys()): + v = d[k] + parts.append(dumps(k, sort_keys=sort_keys)) + parts.append(dumps(v, sort_keys=sort_keys)) + else: + for k,v in d.iteritems(): + parts.append(dumps(k, sort_keys=sort_keys)) + parts.append(dumps(v, sort_keys=sort_keys)) return b''.join(parts) @@ -179,8 +191,8 @@ return struct.pack('B', CBOR_FALSE) -def dumps_tag(t): - return _encode_type_num(CBOR_TAG, t.tag) + dumps(t.value) +def dumps_tag(t, sort_keys=False): + return _encode_type_num(CBOR_TAG, t.tag) + dumps(t.value, sort_keys=sort_keys) if _IS_PY3: @@ -195,7 +207,7 @@ return isinstance(x, (int, long)) -def dumps(ob): +def dumps(ob, sort_keys=False): if ob is None: return struct.pack('B', CBOR_NULL) if isinstance(ob, bool): @@ -203,28 +215,28 @@ if _is_stringish(ob): return dumps_string(ob) if isinstance(ob, (list, tuple)): - return dumps_array(ob) + return dumps_array(ob, sort_keys=sort_keys) # TODO: accept other enumerables and emit a variable length array if isinstance(ob, dict): - return dumps_dict(ob) + return dumps_dict(ob, sort_keys=sort_keys) if isinstance(ob, float): return dumps_float(ob) if _is_intish(ob): return dumps_int(ob) if isinstance(ob, Tag): - return dumps_tag(ob) + return dumps_tag(ob, sort_keys=sort_keys) raise Exception("don't know how to cbor serialize object of type %s", type(ob)) # same basic signature as json.dump, but with no options (yet) -def dump(obj, fp): +def dump(obj, fp, sort_keys=False): """ obj: Python object to serialize fp: file-like object capable of .write(bytes) """ # this is kinda lame, but probably not inefficient for non-huge objects # TODO: .write() to fp as we go as each inner object is serialized - blob = dumps(obj) + blob = dumps(obj, sort_keys=sort_keys) fp.write(blob) diff --git a/cbor/tagmap.py b/cbor/tagmap.py index 030664f..369fbb9 100644 --- a/cbor/tagmap.py +++ b/cbor/tagmap.py @@ -5,7 +5,7 @@ # fall back to 100% python implementation from .cbor import loads, dumps, load, dump -from .cbor import Tag, CBOR_TAG_CBOR +from .cbor import Tag, CBOR_TAG_CBOR, _IS_PY3 class ClassTag(object): @@ -54,7 +54,11 @@ # can't do this in Python 2.6: #return {k:self.encode(v) for k,v in obj.iteritems()} out = {} - for k,v in obj.iteritems(): + if _IS_PY3: + items = obj.items() + else: + items = obj.iteritems() + for k,v in items: out[k] = self.encode(v) return out # fall through, let underlying cbor.dump decide if it can encode object @@ -77,7 +81,11 @@ return obj if isinstance(obj, dict): # update in place - for k,v in obj.iteritems(): + if _IS_PY3: + items = obj.items() + else: + items = obj.iteritems() + for k,v in items: # assume key is a primitive obj[k] = self.decode(v) return obj diff --git a/cbor.egg-info/PKG-INFO b/cbor.egg-info/PKG-INFO index 5d38520..8570be6 100644 --- a/cbor.egg-info/PKG-INFO +++ b/cbor.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 1.1 Name: cbor -Version: 0.1.21 +Version: 0.1.24 Summary: RFC 7049 - Concise Binary Object Representation Home-page: https://bitbucket.org/bodhisnarkva/cbor Author: Brian Olson diff --git a/cbor.egg-info/SOURCES.txt b/cbor.egg-info/SOURCES.txt index 19ddf58..850e225 100644 --- a/cbor.egg-info/SOURCES.txt +++ b/cbor.egg-info/SOURCES.txt @@ -1,6 +1,7 @@ setup.py c/cbor.h c/cbormodule.c +cbor/VERSION.py cbor/__init__.py cbor/cbor.py cbor/cbor_rpc_client.py diff --git a/setup.py b/setup.py index 07cf929..37a25ab 100644 --- a/setup.py +++ b/setup.py @@ -12,6 +12,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +# Thanks! +# to Mic Bowman for a bunch of work and impetus on dumps(,sort_keys=) from distutils.command.build_ext import build_ext from distutils.errors import (CCompilerError, DistutilsExecError, @@ -53,9 +56,12 @@ raise +VERSION = eval(open('cbor/VERSION.py','rb').read()) + + setup_options = dict( name='cbor', - version='0.1.21', + version=VERSION, description='RFC 7049 - Concise Binary Object Representation', long_description=""" An implementation of RFC 7049 - Concise Binary Object Representation (CBOR).