diff --git a/PKG-INFO b/PKG-INFO new file mode 100644 index 0000000..5d38520 --- /dev/null +++ b/PKG-INFO @@ -0,0 +1,27 @@ +Metadata-Version: 1.1 +Name: cbor +Version: 0.1.21 +Summary: RFC 7049 - Concise Binary Object Representation +Home-page: https://bitbucket.org/bodhisnarkva/cbor +Author: Brian Olson +Author-email: bolson@bolson.org +License: Apache +Description: + An implementation of RFC 7049 - Concise Binary Object Representation (CBOR). + + CBOR is comparable to JSON, has a superset of JSON's ability, but serializes to a binary format which is smaller and faster to generate and parse. + + The two primary functions are cbor.loads() and cbor.dumps(). + + This library includes a C implementation which runs 3-5 times faster than the Python standard library's C-accelerated implementanion of JSON. This is also includes a 100% Python implementation. + +Platform: UNKNOWN +Classifier: Development Status :: 4 - Beta +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: Apache Software License +Classifier: Operating System :: OS Independent +Classifier: Programming Language :: Python :: 2.7 +Classifier: Programming Language :: Python :: 3.3 +Classifier: Programming Language :: Python :: 3.4 +Classifier: Programming Language :: C +Classifier: Topic :: Software Development :: Libraries :: Python Modules diff --git a/c/cbor.h b/c/cbor.h new file mode 100644 index 0000000..ec5511d --- /dev/null +++ b/c/cbor.h @@ -0,0 +1,76 @@ +#ifndef CBOR_H +#define CBOR_H + +#define CBOR_TYPE_MASK 0xE0 /* top 3 bits */ +#define CBOR_INFO_BITS 0x1F /* low 5 bits */ + +#define CBOR_UINT 0x00 +#define CBOR_NEGINT 0x20 +#define CBOR_BYTES 0x40 +#define CBOR_TEXT 0x60 +#define CBOR_ARRAY 0x80 +#define CBOR_MAP 0xA0 +#define CBOR_TAG 0xC0 +#define CBOR_7 0xE0 /* float and other types */ + +#define CBOR_ADDITIONAL_INFORMATION 0x1F + +/* read the "additional information" of a tag byte which is often a + * small literal integer describing the length in bytes of the data + * item */ +#define IS_SMALL_LITERAL(n) (((n) & 0x1f) < 24) +#define SMALL_LITERAL(n) ((n) & 0x1f) + + +#define CBOR_UINT8_FOLLOWS 24 // 0x18 +#define CBOR_UINT16_FOLLOWS 25 // 0x19 +#define CBOR_UINT32_FOLLOWS 26 // 0x1A +#define CBOR_UINT64_FOLLOWS 27 // 0x1B +#define CBOR_VAR_FOLLOWS 31 // 0x1F + +#define CBOR_UINT8 (CBOR_UINT | CBOR_UINT8_FOLLOWS) +#define CBOR_UINT16 (CBOR_UINT | CBOR_UINT16_FOLLOWS) +#define CBOR_UINT32 (CBOR_UINT | CBOR_UINT32_FOLLOWS) +#define CBOR_UINT64 (CBOR_UINT | CBOR_UINT64_FOLLOWS) + +#define CBOR_NEGINT8 (CBOR_NEGINT | CBOR_UINT8_FOLLOWS) +#define CBOR_NEGINT16 (CBOR_NEGINT | CBOR_UINT16_FOLLOWS) +#define CBOR_NEGINT32 (CBOR_NEGINT | CBOR_UINT32_FOLLOWS) +#define CBOR_NEGINT64 (CBOR_NEGINT | CBOR_UINT64_FOLLOWS) + + +#define CBOR_BREAK 0xFF + +#define CBOR_FALSE (CBOR_7 | 20) +#define CBOR_TRUE (CBOR_7 | 21) +#define CBOR_NULL (CBOR_7 | 22) +#define CBOR_UNDEFINED (CBOR_7 | 23) + +#define CBOR_FLOAT16 (CBOR_7 | 25) +#define CBOR_FLOAT32 (CBOR_7 | 26) +#define CBOR_FLOAT64 (CBOR_7 | 27) + + +#define CBOR_TAG_DATE_STRING (0) /* RFC3339 */ +#define CBOR_TAG_DATE_ARRAY (1) /* any number type follows, seconds since 1970-01-01T00:00:00 UTC */ +#define CBOR_TAG_BIGNUM (2) /* big endian byte string follows */ +#define CBOR_TAG_NEGBIGNUM (3) /* big endian byte string follows */ +#define CBOR_TAG_DECIMAL (4) /* [ 10^x exponent, number ] */ +#define CBOR_TAG_BIGFLOAT (5) /* [ 2^x exponent, number ] */ +//#define CBOR_TAG_BASE64URL (21) +//#define CBOR_TAG_BASE64 (22) +#define CBOR_TAG_BASE16 (23) +#define CBOR_TAG_CBOR (24) /* following byte string is embedded CBOR data */ + +#define CBOR_TAG_URI 32 +//#define CBOR_TAG_BASE64URL 33 +//#define CBOR_TAG_BASE64 34 +#define CBOR_TAG_REGEX 35 +#define CBOR_TAG_MIME 36 /* following text is MIME message, headers, separators and all */ +#define CBOR_TAG_CBOR_FILEHEADER 55799 /* can open a file with 0xd9d9f7 */ + + +/* Content-Type: application/cbor */ + + +#endif /* CBOR_H */ diff --git a/c/cbormodule.c b/c/cbormodule.c new file mode 100644 index 0000000..8d68e5a --- /dev/null +++ b/c/cbormodule.c @@ -0,0 +1,1419 @@ +#include "Python.h" + +#include "cbor.h" + +#include +#include + +//#include +#include + + +#ifndef DEBUG_LOGGING +// causes things to be written to stderr +#define DEBUG_LOGGING 0 +//#define DEBUG_LOGGING 1 +#endif + + +#ifdef Py_InitModule +// Python 2.7 + +#define HAS_FILE_READER 1 +#define IS_PY3 0 + +#else + +#define HAS_FILE_READER 0 +#define IS_PY3 1 + +#endif + +// Hey Look! It's a polymorphic object structure in C! + +// read(, len): read len bytes and return in buffer, or NULL on error +// read1(, uint8_t*): read one byte and return 0 on success +// return_buffer(, *): release result of read(, len) +// delete(): destructor. free thiz and contents. +#define READER_FUNCTIONS \ + void* (*read)(void* self, Py_ssize_t len); \ + int (*read1)(void* self, uint8_t* oneByte); \ + void (*return_buffer)(void* self, void* buffer); \ + void (*delete)(void* self); + +#define SET_READER_FUNCTIONS(thiz, clazz) (thiz)->read = clazz##_read;\ + (thiz)->read1 = clazz##_read1;\ + (thiz)->return_buffer = clazz##_return_buffer;\ + (thiz)->delete = clazz##_delete; + +typedef struct _Reader { + READER_FUNCTIONS; +} Reader; + +static Reader* NewBufferReader(PyObject* ob); +static Reader* NewObjectReader(PyObject* ob); +#if HAS_FILE_READER +static Reader* NewFileReader(PyObject* ob); +#endif + + +static PyObject* loads_tag(Reader* rin, uint64_t aux); +static int loads_kv(PyObject* out, Reader* rin); + +typedef struct VarBufferPart { + void* start; + uint64_t len; + struct VarBufferPart* next; +} VarBufferPart; + + +static int logprintf(const char* fmt, ...) { + va_list ap; + int ret; + va_start(ap, fmt); +#if DEBUG_LOGGING + ret = vfprintf(stderr, fmt, ap); +#else + ret = 0; +#endif + va_end(ap); + return ret; +} + +// TODO: portably work this out at compile time +static int _is_big_endian = 0; + +static int is_big_endian(void) { + uint32_t val = 1234; + _is_big_endian = val == htonl(val); + //logprintf("is_big_endian=%d\n", _is_big_endian); + return _is_big_endian; +} + + +PyObject* decodeFloat16(Reader* rin) { + // float16 parsing adapted from example code in spec + uint8_t hibyte, lobyte;// = raw[pos]; + int err; + int exp; + int mant; + double val; + + err = rin->read1(rin, &hibyte); + if (err) { logprintf("fail in float16[0]\n"); return NULL; } + err = rin->read1(rin, &lobyte); + if (err) { logprintf("fail in float16[1]\n"); return NULL; } + + exp = (hibyte >> 2) & 0x1f; + mant = ((hibyte & 0x3) << 8) | lobyte; + if (exp == 0) { + val = ldexp(mant, -24); + } else if (exp != 31) { + val = ldexp(mant + 1024, exp - 25); + } else { + val = mant == 0 ? INFINITY : NAN; + } + if (hibyte & 0x80) { + val = -val; + } + return PyFloat_FromDouble(val); +} +PyObject* decodeFloat32(Reader* rin) { + float val; + uint8_t* raw = rin->read(rin, 4); + if (!raw) { logprintf("fail in float32\n"); return NULL; } + if (_is_big_endian) { + // easy! + val = *((float*)raw); + } else { + uint8_t* dest = (uint8_t*)(&val); + dest[3] = raw[0]; + dest[2] = raw[1]; + dest[1] = raw[2]; + dest[0] = raw[3]; + } + rin->return_buffer(rin, raw); + return PyFloat_FromDouble(val); +} +PyObject* decodeFloat64(Reader* rin) { + int si; + uint64_t aux = 0; + uint8_t* raw = rin->read(rin, 8); + if (!raw) { logprintf("fail in float64\n"); return NULL; } + for (si = 0; si < 8; si++) { + aux = aux << 8; + aux |= raw[si]; + } + rin->return_buffer(rin, raw); + return PyFloat_FromDouble(*((double*)(&aux))); +} + +// parse following int value into *auxP +// return 0 on success, -1 on fail +static int handle_info_bits(Reader* rin, uint8_t cbor_info, uint64_t* auxP) { + uint64_t aux; + + if (cbor_info <= 23) { + // literal value <=23 + aux = cbor_info; + } else if (cbor_info == CBOR_UINT8_FOLLOWS) { + uint8_t taux; + if (rin->read1(rin, &taux)) { logprintf("fail in uint8\n"); return -1; } + aux = taux; + } else if (cbor_info == CBOR_UINT16_FOLLOWS) { + uint8_t hibyte, lobyte; + if (rin->read1(rin, &hibyte)) { logprintf("fail in uint16[0]\n"); return -1; } + if (rin->read1(rin, &lobyte)) { logprintf("fail in uint16[1]\n"); return -1; } + aux = (hibyte << 8) | lobyte; + } else if (cbor_info == CBOR_UINT32_FOLLOWS) { + uint8_t* raw = (uint8_t*)rin->read(rin, 4); + if (!raw) { logprintf("fail in uint32[1]\n"); return -1; } + aux = + (((uint64_t)raw[0]) << 24) | + (((uint64_t)raw[1]) << 16) | + (((uint64_t)raw[2]) << 8) | + ((uint64_t)raw[3]); + rin->return_buffer(rin, raw); + } else if (cbor_info == CBOR_UINT64_FOLLOWS) { + int si; + uint8_t* raw = (uint8_t*)rin->read(rin, 8); + if (!raw) { logprintf("fail in uint64[1]\n"); return -1; } + aux = 0; + for (si = 0; si < 8; si++) { + aux = aux << 8; + aux |= raw[si]; + } + rin->return_buffer(rin, raw); + } else { + aux = 0; + } + *auxP = aux; + return 0; +} + +static PyObject* inner_loads_c(Reader* rin, uint8_t c); + +static PyObject* inner_loads(Reader* rin) { + uint8_t c; + int err; + + err = rin->read1(rin, &c); + if (err) { logprintf("fail in loads tag\n"); return NULL; } + return inner_loads_c(rin, c); +} + +PyObject* inner_loads_c(Reader* rin, uint8_t c) { + uint8_t cbor_type; + uint8_t cbor_info; + uint64_t aux; + + cbor_type = c & CBOR_TYPE_MASK; + cbor_info = c & CBOR_INFO_BITS; + +#if 0 + if (pos > len) { + PyErr_SetString(PyExc_ValueError, "misparse, token went longer than buffer"); + return NULL; + } + + pos += 1; +#endif + + if (cbor_type == CBOR_7) { + if (cbor_info == CBOR_UINT16_FOLLOWS) { // float16 + return decodeFloat16(rin); + } else if (cbor_info == CBOR_UINT32_FOLLOWS) { // float32 + return decodeFloat32(rin); + } else if (cbor_info == CBOR_UINT64_FOLLOWS) { // float64 + return decodeFloat64(rin); + } + // not a float, fall through to other CBOR_7 interpretations + } + if (handle_info_bits(rin, cbor_info, &aux)) { logprintf("info bits failed\n"); return NULL; } + + PyObject* out = NULL; + switch (cbor_type) { + case CBOR_UINT: + out = PyLong_FromUnsignedLongLong(aux); + if (out == NULL) { + PyErr_SetString(PyExc_RuntimeError, "unknown error decoding UINT"); + } + return out; + case CBOR_NEGINT: + if (aux > 0x7fffffffffffffff) { + PyObject* bignum = PyLong_FromUnsignedLongLong(aux); + PyObject* minusOne = PyLong_FromLong(-1); + out = PyNumber_Subtract(minusOne, bignum); + Py_DECREF(minusOne); + Py_DECREF(bignum); + } else { + out = PyLong_FromLongLong((long long)(((long long)-1) - aux)); + } + if (out == NULL) { + PyErr_SetString(PyExc_RuntimeError, "unknown error decoding NEGINT"); + } + return out; + case CBOR_BYTES: + if (cbor_info == CBOR_VAR_FOLLOWS) { + size_t total = 0; + VarBufferPart* parts = NULL; + VarBufferPart* parts_tail = NULL; + uint8_t sc; + if (rin->read1(rin, &sc)) { logprintf("r1 fail in var bytes tag\n"); return NULL; } + while (sc != CBOR_BREAK) { + uint8_t scbor_type = sc & CBOR_TYPE_MASK; + uint8_t scbor_info = sc & CBOR_INFO_BITS; + uint64_t saux; + void* blob; + + if (scbor_type != CBOR_BYTES) { + PyErr_Format(PyExc_ValueError, "expected subordinate BYTES block under VAR BYTES, but got %x", scbor_type); + return NULL; + } + if(handle_info_bits(rin, scbor_info, &saux)) { logprintf("var bytes sub infobits failed\n"); return NULL; } + blob = rin->read(rin, saux); + if (!blob) { logprintf("var bytes sub bytes read failed\n"); return NULL; } + if (parts_tail == NULL) { + parts = parts_tail = (VarBufferPart*)PyMem_Malloc(sizeof(VarBufferPart) + saux); + } else { + parts_tail->next = (VarBufferPart*)PyMem_Malloc(sizeof(VarBufferPart) + saux); + parts_tail = parts_tail->next; + } + parts_tail->start = (void*)(parts_tail + 1); + memcpy(parts_tail->start, blob, saux); + rin->return_buffer(rin, blob); + parts_tail->len = saux; + parts_tail->next = NULL; + total += saux; + if (rin->read1(rin, &sc)) { logprintf("r1 fail in var bytes tag\n"); return NULL; } + } + // Done + { + uint8_t* allbytes = (uint8_t*)PyMem_Malloc(total); + uintptr_t op = 0; + while (parts != NULL) { + VarBufferPart* next; + memcpy(allbytes + op, parts->start, parts->len); + op += parts->len; + next = parts->next; + PyMem_Free(parts); + parts = next; + } + out = PyBytes_FromStringAndSize((char*)allbytes, total); + PyMem_Free(allbytes); + } + if (out == NULL) { + PyErr_SetString(PyExc_RuntimeError, "unknown error decoding VAR BYTES"); + } + } else { + void* raw; + if (aux == 0) { + static void* empty_string = ""; + raw = empty_string; + } else { + raw = rin->read(rin, aux); + if (!raw) { logprintf("bytes read failed\n"); return NULL; } + } + out = PyBytes_FromStringAndSize(raw, (Py_ssize_t)aux); + if (out == NULL) { + PyErr_SetString(PyExc_RuntimeError, "unknown error decoding BYTES"); + } + if (aux != 0) { + rin->return_buffer(rin, raw); + } + } + return out; + case CBOR_TEXT: + if (cbor_info == CBOR_VAR_FOLLOWS) { + PyObject* parts = PyList_New(0); + PyObject* joiner = PyUnicode_FromString(""); + uint8_t sc; + if (rin->read1(rin, &sc)) { logprintf("r1 fail in var text tag\n"); return NULL; } + while (sc != CBOR_BREAK) { + PyObject* subitem = inner_loads_c(rin, sc); + if (subitem == NULL) { logprintf("fail in var text subitem\n"); return NULL; } + PyList_Append(parts, subitem); + Py_DECREF(subitem); + if (rin->read1(rin, &sc)) { logprintf("r1 fail in var text tag\n"); return NULL; } + } + // Done + out = PyUnicode_Join(joiner, parts); + Py_DECREF(joiner); + Py_DECREF(parts); + if (out == NULL) { + PyErr_SetString(PyExc_RuntimeError, "unknown error decoding VAR TEXT"); + } + } else { + void* raw; + if (aux == 0) { + static void* empty_string = ""; + raw = empty_string; + } else { + raw = rin->read(rin, aux); + if (!raw) { logprintf("read text failed\n"); return NULL; } + } + out = PyUnicode_FromStringAndSize((char*)raw, (Py_ssize_t)aux); + if (out == NULL) { + PyErr_SetString(PyExc_RuntimeError, "unknown error decoding TEXT"); + } + if (aux != 0) { + rin->return_buffer(rin, raw); + } + } + return out; + case CBOR_ARRAY: + if (cbor_info == CBOR_VAR_FOLLOWS) { + uint8_t sc; + out = PyList_New(0); + if (rin->read1(rin, &sc)) { logprintf("r1 fail in var array tag\n"); return NULL; } + while (sc != CBOR_BREAK) { + PyObject* subitem = inner_loads_c(rin, sc); + if (subitem == NULL) { logprintf("fail in var array subitem\n"); return NULL; } + PyList_Append(out, subitem); + Py_DECREF(subitem); + if (rin->read1(rin, &sc)) { logprintf("r1 fail in var array tag\n"); return NULL; } + } + // Done + if (out == NULL) { + PyErr_SetString(PyExc_RuntimeError, "unknown error decoding VAR ARRAY"); + } + } else { + unsigned int i; + out = PyList_New((Py_ssize_t)aux); + for (i = 0; i < aux; i++) { + PyObject* subitem = inner_loads(rin); + if (subitem == NULL) { logprintf("array subitem[%d] (of %d) failed\n", i, aux); return NULL; } + PyList_SetItem(out, (Py_ssize_t)i, subitem); + // PyList_SetItem became the owner of the reference count of subitem, we don't need to DECREF it + } + if (out == NULL) { + PyErr_SetString(PyExc_RuntimeError, "unknown error decoding ARRAY"); + } + } + return out; + case CBOR_MAP: + out = PyDict_New(); + if (cbor_info == CBOR_VAR_FOLLOWS) { + uint8_t sc; + if (rin->read1(rin, &sc)) { logprintf("r1 fail in var map tag\n"); return NULL; } + while (sc != CBOR_BREAK) { + PyObject* key = inner_loads_c(rin, sc); + PyObject* value; + if (key == NULL) { logprintf("var map key fail\n"); return NULL; } + value = inner_loads(rin); + if (value == NULL) { logprintf("var map val vail\n"); return NULL; } + PyDict_SetItem(out, key, value); + Py_DECREF(key); + Py_DECREF(value); + + if (rin->read1(rin, &sc)) { logprintf("r1 fail in var map tag\n"); return NULL; } + } + if (out == NULL) { + PyErr_SetString(PyExc_RuntimeError, "unknown error decoding VAR MAP"); + } + } else { + unsigned int i; + for (i = 0; i < aux; i++) { + if (loads_kv(out, rin) != 0) { + logprintf("map kv[%d] failed\n", i); + return NULL; + } + } + if (out == NULL) { + PyErr_SetString(PyExc_RuntimeError, "unknown error decoding MAP"); + } + } + return out; + case CBOR_TAG: + return loads_tag(rin, aux); + case CBOR_7: + if (aux == 20) { + out = Py_False; + Py_INCREF(out); + } else if (aux == 21) { + out = Py_True; + Py_INCREF(out); + } else if (aux == 22) { + out = Py_None; + Py_INCREF(out); + } else if (aux == 23) { + // js `undefined`, closest is py None + out = Py_None; + Py_INCREF(out); + } + if (out == NULL) { + PyErr_Format(PyExc_ValueError, "unknown section 7 marker %02x, aux=%llu", c, aux); + } + return out; + default: + PyErr_Format(PyExc_RuntimeError, "unknown cbor marker %02x", c); + return NULL; + } + PyErr_SetString(PyExc_RuntimeError, "cbor library internal error moof!"); + return NULL; +} + +static int loads_kv(PyObject* out, Reader* rin) { + PyObject* key = inner_loads(rin); + PyObject* value; + if (key == NULL) { logprintf("map key fail\n"); return -1; } + value = inner_loads(rin); + if (value == NULL) { logprintf("map val fail\n"); return -1; } + PyDict_SetItem(out, key, value); + Py_DECREF(key); + Py_DECREF(value); + return 0; +} + +static PyObject* loads_bignum(Reader* rin, uint8_t c) { + PyObject* out = NULL; + + uint8_t bytes_info = c & CBOR_INFO_BITS; + if (bytes_info < 24) { + int i; + PyObject* eight = PyLong_FromLong(8); + out = PyLong_FromLong(0); + for (i = 0; i < bytes_info; i++) { + // TODO: is this leaking like crazy? + PyObject* curbyte; + PyObject* tout = PyNumber_Lshift(out, eight); + Py_DECREF(out); + out = tout; + uint8_t cb; + if (rin->read1(rin, &cb)) { + logprintf("r1 fail in bignum %d/%d\n", i, bytes_info); + Py_DECREF(eight); + Py_DECREF(out); + return NULL; + } + curbyte = PyLong_FromLong(cb); + tout = PyNumber_Or(out, curbyte); + Py_DECREF(curbyte); + Py_DECREF(out); + out = tout; + } + Py_DECREF(eight); + return out; + } else { + PyErr_Format(PyExc_NotImplementedError, "TODO: TAG BIGNUM for bigger bignum bytes_info=%d, len(ull)=%lu\n", bytes_info, sizeof(unsigned long long)); + return NULL; + } +} + + +// returns a PyObject for cbor.cbor.Tag +// Returned PyObject* is a BORROWED reference from the module dict +static PyObject* getCborTagClass(void) { + PyObject* cbor_module = PyImport_ImportModule("cbor.cbor"); + PyObject* moddict = PyModule_GetDict(cbor_module); + PyObject* tag_class = PyDict_GetItemString(moddict, "Tag"); + // moddict and tag_class are 'borrowed reference' + Py_DECREF(cbor_module); + + return tag_class; +} + + +static PyObject* loads_tag(Reader* rin, uint64_t aux) { + PyObject* out = NULL; + // return an object CBORTag(tagnum, nextob) + if (aux == CBOR_TAG_BIGNUM) { + // If the next object is bytes, interpret it here without making a PyObject for it. + uint8_t sc; + if (rin->read1(rin, &sc)) { logprintf("r1 fail in bignum tag\n"); return NULL; } + if ((sc & CBOR_TYPE_MASK) == CBOR_BYTES) { + return loads_bignum(rin, sc); + } else { + PyErr_Format(PyExc_ValueError, "TAG BIGNUM not followed by bytes but %02x", sc); + return NULL; + } + PyErr_Format(PyExc_ValueError, "TODO: WRITEME CBOR TAG BIGNUM %02x ...\n", sc); + return NULL; + } else if (aux == CBOR_TAG_NEGBIGNUM) { + // If the next object is bytes, interpret it here without making a PyObject for it. + uint8_t sc; + if (rin->read1(rin, &sc)) { logprintf("r1 fail in negbignum tag\n"); return NULL; } + if ((sc & CBOR_TYPE_MASK) == CBOR_BYTES) { + out = loads_bignum(rin, sc); + if (out == NULL) { logprintf("loads_bignum fail inside TAG_NEGBIGNUM\n"); return NULL; } + PyObject* minusOne = PyLong_FromLong(-1); + PyObject* tout = PyNumber_Subtract(minusOne, out); + Py_DECREF(minusOne); + Py_DECREF(out); + out = tout; + return out; + } else { + PyErr_Format(PyExc_ValueError, "TAG NEGBIGNUM not followed by bytes but %02x", sc); + return NULL; + } + PyErr_Format(PyExc_ValueError, "TODO: WRITEME CBOR TAG NEGBIGNUM %02x ...\n", sc); + return NULL; + } + out = inner_loads(rin); + if (out == NULL) { return NULL; } + { + PyObject* tag_class = getCborTagClass(); + PyObject* args = Py_BuildValue("(K,O)", aux, out); + PyObject* tout = PyObject_CallObject(tag_class, args); + Py_DECREF(args); + Py_DECREF(out); + // tag_class was just a borrowed reference + out = tout; + } + return out; +} + + +static PyObject* +cbor_loads(PyObject* noself, PyObject* args) { + PyObject* ob; + is_big_endian(); + if (PyType_IsSubtype(Py_TYPE(args), &PyList_Type)) { + ob = PyList_GetItem(args, 0); + } else if (PyType_IsSubtype(Py_TYPE(args), &PyTuple_Type)) { + ob = PyTuple_GetItem(args, 0); + } else { + PyErr_Format(PyExc_ValueError, "args not list or tuple: %R\n", args); + return NULL; + } + + if (ob == Py_None) { + PyErr_SetString(PyExc_ValueError, "got None for buffer to decode in loads"); + return NULL; + } + + { + PyObject* out = NULL; + Reader* r = NewBufferReader(ob); + if (!r) { + return NULL; + } + out = inner_loads(r); + r->delete(r); + return out; + } +} + + +#if HAS_FILE_READER + +typedef struct _FileReader { + READER_FUNCTIONS; + FILE* fin; + void* dst; + Py_ssize_t dst_size; + Py_ssize_t read_count; +} FileReader; + +// read from a python builtin file which contains a C FILE* +static void* FileReader_read(void* self, Py_ssize_t len) { + FileReader* thiz = (FileReader*)self; + Py_ssize_t rtotal = 0; + uintptr_t opos; + //logprintf("file read %d\n", len); + if (len > thiz->dst_size) { + thiz->dst = PyMem_Realloc(thiz->dst, len); + thiz->dst_size = len; + } else if ((thiz->dst_size > (128 * 1024)) && (len < 4096)) { + PyMem_Free(thiz->dst); + thiz->dst = PyMem_Malloc(len); + thiz->dst_size = len; + } + opos = (uintptr_t)(thiz->dst); + while (1) { + size_t rlen = fread((void*)opos, 1, len, thiz->fin); + if (rlen == 0) { + // file isn't going to give any more + PyErr_Format(PyExc_ValueError, "only got %zd bytes with %zd stil to read from file", rtotal, len); + PyMem_Free(thiz->dst); + thiz->dst = NULL; + thiz->dst_size = 0; + return NULL; + } + thiz->read_count += rlen; + rtotal += rlen; + opos += rlen; + len -= rlen; + if (rtotal >= len) { + if (thiz->dst == NULL) { + PyErr_SetString(PyExc_RuntimeError, "known error in file reader, NULL dst"); + return NULL; + } + return thiz->dst; + } + } +} +static int FileReader_read1(void* self, uint8_t* oneByte) { + FileReader* thiz = (FileReader*)self; + size_t didread = fread((void*)oneByte, 1, 1, thiz->fin); + if (didread == 0) { + logprintf("failed to read 1 from file\n"); + PyErr_SetString(PyExc_ValueError, "got nothing reading 1 from file"); + return -1; + } + thiz->read_count++; + return 0; +} +static void FileReader_return_buffer(void* self, void* buffer) { + // Nothing to do, we hold onto the buffer and maybe reuse it for next read +} +static void FileReader_delete(void* self) { + FileReader* thiz = (FileReader*)self; + if (thiz->dst) { + PyMem_Free(thiz->dst); + } + PyMem_Free(thiz); +} +static Reader* NewFileReader(PyObject* ob) { + FileReader* fr = (FileReader*)PyMem_Malloc(sizeof(FileReader)); + if (fr == NULL) { + PyErr_SetString(PyExc_MemoryError, "failed to allocate FileReader"); + return NULL; + } + fr->fin = PyFile_AsFile(ob); + if (fr->fin == NULL) { + PyErr_SetString(PyExc_RuntimeError, "PyFile_AsFile NULL"); + PyMem_Free(fr); + return NULL; + } + fr->dst = NULL; + fr->dst_size = 0; + fr->read_count = 0; + SET_READER_FUNCTIONS(fr, FileReader); + return (Reader*)fr; +} + +#endif /* Python 2.7 FileReader */ + + +typedef struct _ObjectReader { + READER_FUNCTIONS; + PyObject* ob; + + // We got one object with all the bytes neccessary, and need to + // DECREF it later. + PyObject* retval; + void* bytes; + + // OR, we got several objects, we DECREFed them as we went, and + // need to Free() this buffer at the end. + void* dst; + + Py_ssize_t read_count; + int exception_is_external; +} ObjectReader; + +// read from a python file-like object which has a .read(n) method +static void* ObjectReader_read(void* context, Py_ssize_t len) { + ObjectReader* thiz = (ObjectReader*)context; + Py_ssize_t rtotal = 0; + uintptr_t opos = 0; + //logprintf("ob read %d\n", len); + assert(!thiz->dst); + assert(!thiz->bytes); + while (rtotal < len) { + PyObject* retval = PyObject_CallMethod(thiz->ob, "read", "n", len - rtotal, NULL); + Py_ssize_t rlen; + if (retval == NULL) { + thiz->exception_is_external = 1; + logprintf("exception in object.read()\n"); + return NULL; + } + if (!PyBytes_Check(retval)) { + logprintf("object.read() is not bytes\n"); + PyErr_SetString(PyExc_ValueError, "expected ob.read() to return a bytes object\n"); + Py_DECREF(retval); + return NULL; + } + rlen = PyBytes_Size(retval); + thiz->read_count += rlen; + if (rlen > len - rtotal) { + logprintf("object.read() is too much!\n"); + PyErr_Format(PyExc_ValueError, "ob.read() returned %ld bytes but only wanted %lu\n", rlen, len - rtotal); + Py_DECREF(retval); + return NULL; + } + if (rlen == len) { + // best case! All in one call to read() + // We _keep_ a reference to retval until later. + thiz->retval = retval; + thiz->bytes = PyBytes_AsString(retval); + assert(thiz->bytes); + thiz->dst = NULL; + opos = 0; + return thiz->bytes; + } + if (thiz->dst == NULL) { + thiz->dst = PyMem_Malloc(len); + opos = (uintptr_t)thiz->dst; + } + // else, not enough all in one go + memcpy((void*)opos, PyBytes_AsString(retval), rlen); + Py_DECREF(retval); + opos += rlen; + rtotal += rlen; + } + assert(thiz->dst); + return thiz->dst; +} +static int ObjectReader_read1(void* self, uint8_t* oneByte) { + ObjectReader* thiz = (ObjectReader*)self; + PyObject* retval = PyObject_CallMethod(thiz->ob, "read", "i", 1, NULL); + Py_ssize_t rlen; + if (retval == NULL) { + thiz->exception_is_external = 1; + //logprintf("call ob read(1) failed\n"); + return -1; + } + if (!PyBytes_Check(retval)) { + PyErr_SetString(PyExc_ValueError, "expected ob.read() to return a bytes object\n"); + return -1; + } + rlen = PyBytes_Size(retval); + thiz->read_count += rlen; + if (rlen > 1) { + PyErr_Format(PyExc_ValueError, "TODO: raise exception: WAT ob.read() returned %ld bytes but only wanted 1\n", rlen); + return -1; + } + if (rlen == 1) { + *oneByte = PyBytes_AsString(retval)[0]; + Py_DECREF(retval); + return 0; + } + PyErr_SetString(PyExc_ValueError, "got nothing reading 1"); + return -1; +} +static void ObjectReader_return_buffer(void* context, void* buffer) { + ObjectReader* thiz = (ObjectReader*)context; + if (buffer == thiz->bytes) { + Py_DECREF(thiz->retval); + thiz->retval = NULL; + thiz->bytes = NULL; + } else if (buffer == thiz->dst) { + PyMem_Free(thiz->dst); + thiz->dst = NULL; + } else { + logprintf("TODO: raise exception, could not release buffer %p, wanted dst=%p or bytes=%p\n", buffer, thiz->dst, thiz->bytes); + } +} +static void ObjectReader_delete(void* context) { + ObjectReader* thiz = (ObjectReader*)context; + if (thiz->retval != NULL) { + Py_DECREF(thiz->retval); + } + if (thiz->dst != NULL) { + PyMem_Free(thiz->dst); + } + PyMem_Free(thiz); +} +static Reader* NewObjectReader(PyObject* ob) { + ObjectReader* r = (ObjectReader*)PyMem_Malloc(sizeof(ObjectReader)); + r->ob = ob; + r->retval = NULL; + r->bytes = NULL; + r->dst = NULL; + r->read_count = 0; + r->exception_is_external = 0; + SET_READER_FUNCTIONS(r, ObjectReader); + return (Reader*)r; +} + +typedef struct _BufferReader { + READER_FUNCTIONS; + uint8_t* raw; + Py_ssize_t len; + uintptr_t pos; +} BufferReader; + +// read from a buffer, aka loads() +static void* BufferReader_read(void* context, Py_ssize_t len) { + BufferReader* thiz = (BufferReader*)context; + //logprintf("br %p %d (%d)\n", thiz, len, thiz->len); + if (len <= thiz->len) { + void* out = (void*)thiz->pos; + thiz->pos += len; + thiz->len -= len; + assert(out); + return out; + } + PyErr_Format(PyExc_ValueError, "buffer read for %zd but only have %zd\n", len, thiz->len); + return NULL; +} +static int BufferReader_read1(void* self, uint8_t* oneByte) { + BufferReader* thiz = (BufferReader*)self; + //logprintf("br %p _1_ (%d)\n", thiz, thiz->len); + if (thiz->len <= 0) { + PyErr_SetString(PyExc_LookupError, "buffer exhausted"); + return -1; + } + *oneByte = *((uint8_t*)thiz->pos); + thiz->pos += 1; + thiz->len -= 1; + return 0; +} +static void BufferReader_return_buffer(void* context, void* buffer) { + // nothing to do +} +static void BufferReader_delete(void* context) { + BufferReader* thiz = (BufferReader*)context; + PyMem_Free(thiz); +} +static Reader* NewBufferReader(PyObject* ob) { + BufferReader* r = (BufferReader*)PyMem_Malloc(sizeof(BufferReader)); + SET_READER_FUNCTIONS(r, BufferReader); + if (PyByteArray_Check(ob)) { + r->raw = (uint8_t*)PyByteArray_AsString(ob); + r->len = PyByteArray_Size(ob); + } else if (PyBytes_Check(ob)) { + r->raw = (uint8_t*)PyBytes_AsString(ob); + r->len = PyBytes_Size(ob); + } else { + PyErr_SetString(PyExc_ValueError, "input of unknown type not bytes or bytearray"); + return NULL; + } + r->pos = (uintptr_t)r->raw; + if (r->len == 0) { + PyErr_SetString(PyExc_ValueError, "got zero length string in loads"); + return NULL; + } + if (r->raw == NULL) { + PyErr_SetString(PyExc_ValueError, "got NULL buffer for string"); + return NULL; + } + //logprintf("NBR(%llu, %ld)\n", r->pos, r->len); + return (Reader*)r; +} + + +static PyObject* +cbor_load(PyObject* noself, PyObject* args) { + PyObject* ob; + Reader* reader; + is_big_endian(); + if (PyType_IsSubtype(Py_TYPE(args), &PyList_Type)) { + ob = PyList_GetItem(args, 0); + } else if (PyType_IsSubtype(Py_TYPE(args), &PyTuple_Type)) { + ob = PyTuple_GetItem(args, 0); + } else { + PyErr_Format(PyExc_ValueError, "args not list or tuple: %R\n", args); + return NULL; + } + + if (ob == Py_None) { + PyErr_SetString(PyExc_ValueError, "got None for buffer to decode in loads"); + return NULL; + } + PyObject* retval; +#if HAS_FILE_READER + if (PyFile_Check(ob)) { + reader = NewFileReader(ob); + if (reader == NULL) { return NULL; } + retval = inner_loads(reader); + if ((retval == NULL) && + (((FileReader*)reader)->read_count == 0) && + (feof(((FileReader*)reader)->fin) != 0)) { + // never got anything, started at EOF + PyErr_Clear(); + PyErr_SetString(PyExc_EOFError, "read nothing, apparent EOF"); + } + reader->delete(reader); + } else +#endif + { + reader = NewObjectReader(ob); + retval = inner_loads(reader); + if ((retval == NULL) && + (!((ObjectReader*)reader)->exception_is_external) && + ((ObjectReader*)reader)->read_count == 0) { + // never got anything, assume EOF + PyErr_Clear(); + PyErr_SetString(PyExc_EOFError, "read nothing, apparent EOF"); + } + reader->delete(reader); + } + return retval; +} + + +static void tag_u64_out(uint8_t cbor_type, uint64_t aux, uint8_t* out, uintptr_t* posp) { + uintptr_t pos = *posp; + if (out != NULL) { + out[pos] = cbor_type | CBOR_UINT64_FOLLOWS; + out[pos+1] = (aux >> 56) & 0x0ff; + out[pos+2] = (aux >> 48) & 0x0ff; + out[pos+3] = (aux >> 40) & 0x0ff; + out[pos+4] = (aux >> 32) & 0x0ff; + out[pos+5] = (aux >> 24) & 0x0ff; + out[pos+6] = (aux >> 16) & 0x0ff; + out[pos+7] = (aux >> 8) & 0x0ff; + out[pos+8] = aux & 0x0ff; + } + pos += 9; + *posp = pos; +} + + +static void tag_aux_out(uint8_t cbor_type, uint64_t aux, uint8_t* out, uintptr_t* posp) { + uintptr_t pos = *posp; + if (aux <= 23) { + // tiny literal + if (out != NULL) { + out[pos] = cbor_type | aux; + } + pos += 1; + } else if (aux <= 0x0ff) { + // one byte value + if (out != NULL) { + out[pos] = cbor_type | CBOR_UINT8_FOLLOWS; + out[pos+1] = aux; + } + pos += 2; + } else if (aux <= 0x0ffff) { + // two byte value + if (out != NULL) { + out[pos] = cbor_type | CBOR_UINT16_FOLLOWS; + out[pos+1] = (aux >> 8) & 0x0ff; + out[pos+2] = aux & 0x0ff; + } + pos += 3; + } else if (aux <= 0x0ffffffffL) { + // four byte value + if (out != NULL) { + out[pos] = cbor_type | CBOR_UINT32_FOLLOWS; + out[pos+1] = (aux >> 24) & 0x0ff; + out[pos+2] = (aux >> 16) & 0x0ff; + out[pos+3] = (aux >> 8) & 0x0ff; + out[pos+4] = aux & 0x0ff; + } + pos += 5; + } else { + // eight byte value + tag_u64_out(cbor_type, aux, out, posp); + return; + } + *posp = pos; + return; +} + +static int inner_dumps(PyObject* ob, uint8_t* out, uintptr_t* posp); + +static int dumps_dict(PyObject* ob, uint8_t* out, uintptr_t* posp) { + uintptr_t pos = *posp; + Py_ssize_t dictiter = 0; + PyObject* key; + PyObject* val; + Py_ssize_t dictlen = PyDict_Size(ob); + int err; + tag_aux_out(CBOR_MAP, dictlen, out, &pos); + while (PyDict_Next(ob, &dictiter, &key, &val)) { + err = inner_dumps(key, out, &pos); + if (err != 0) { return err; } + err = inner_dumps(val, out, &pos); + if (err != 0) { return err; } + } + *posp = pos; + return 0; +} + + +static void dumps_bignum(uint8_t tag, PyObject* val, uint8_t* out, uintptr_t* posp) { + uintptr_t pos = (posp != NULL) ? *posp : 0; + PyObject* eight = PyLong_FromLong(8); + PyObject* bytemask = NULL; + PyObject* nval = NULL; + uint8_t* revbytes = NULL; + int revbytepos = 0; + int val_is_orig = 1; + if (out != NULL) { + bytemask = PyLong_FromLongLong(0x0ff); + revbytes = PyMem_Malloc(23); + } + while (PyObject_IsTrue(val) && (revbytepos < 23)) { + if (revbytes != NULL) { + PyObject* tbyte = PyNumber_And(val, bytemask); + revbytes[revbytepos] = PyLong_AsLong(tbyte); + Py_DECREF(tbyte); + } + revbytepos++; + nval = PyNumber_InPlaceRshift(val, eight); + if (val_is_orig) { + val_is_orig = 0; + } else { + Py_DECREF(val); + } + val = nval; + } + if (revbytes != NULL) { + out[pos] = CBOR_TAG | tag; + pos++; + out[pos] = CBOR_BYTES | revbytepos; + pos++; + revbytepos--; + while (revbytepos >= 0) { + out[pos] = revbytes[revbytepos]; + pos++; + revbytepos--; + } + PyMem_Free(revbytes); + Py_DECREF(bytemask); + } else { + pos += 2 + revbytepos; + } + if (!val_is_orig) { + Py_DECREF(val); + } + Py_DECREF(eight); + *posp = pos; +} + +static int dumps_tag(PyObject* ob, uint8_t* out, uintptr_t* posp) { + uintptr_t pos = (posp != NULL) ? *posp : 0; + int err = 0; + + + PyObject* tag_num; + PyObject* tag_value; + err = 0; + + tag_num = PyObject_GetAttrString(ob, "tag"); + if (tag_num != NULL) { + tag_value = PyObject_GetAttrString(ob, "value"); + if (tag_value != NULL) { +#ifdef Py_INTOBJECT_H + if (PyInt_Check(tag_num)) { + long val = PyInt_AsLong(tag_num); + if (val > 0) { + tag_aux_out(CBOR_TAG, val, out, &pos); + err = inner_dumps(tag_value, out, &pos); + } else { + PyErr_Format(PyExc_ValueError, "tag cannot be a negative int: %ld", val); + err = -1; + } + } else +#endif + if (PyLong_Check(tag_num)) { + int overflow = -1; + long long val = PyLong_AsLongLongAndOverflow(tag_num, &overflow); + if (overflow == 0) { + if (val >= 0) { + tag_aux_out(CBOR_TAG, val, out, &pos); + err = inner_dumps(tag_value, out, &pos); + } else { + PyErr_Format(PyExc_ValueError, "tag cannot be a negative long: %lld", val); + err = -1; + } + } else { + PyErr_SetString(PyExc_ValueError, "tag number too large"); + err = -1; + } + } + Py_DECREF(tag_value); + } else { + PyErr_SetString(PyExc_ValueError, "broken Tag object has .tag but not .value"); + err = -1; + } + Py_DECREF(tag_num); + } else { + PyErr_SetString(PyExc_ValueError, "broken Tag object with no .tag"); + err = -1; + } + if (err != 0) { return err; } + + *posp = pos; + return err; +} + + +// With out=NULL it just counts the length. +// return err, 0=OK +static int inner_dumps(PyObject* ob, uint8_t* out, uintptr_t* posp) { + uintptr_t pos = (posp != NULL) ? *posp : 0; + + if (PyBool_Check(ob)) { + if (out != NULL) { + if (PyObject_IsTrue(ob)) { + out[pos] = CBOR_TRUE; + } else { + out[pos] = CBOR_FALSE; + } + } + pos += 1; + } else if (ob == Py_None) { + if (out != NULL) { + out[pos] = CBOR_NULL; + } + pos += 1; + } else if (PyDict_Check(ob)) { + int err = dumps_dict(ob, out, &pos); + if (err != 0) { return err; } + } else if (PyList_Check(ob)) { + Py_ssize_t i; + Py_ssize_t listlen = PyList_Size(ob); + tag_aux_out(CBOR_ARRAY, listlen, out, &pos); + for (i = 0; i < listlen; i++) { + int err = inner_dumps(PyList_GetItem(ob, i), out, &pos); + if (err != 0) { return err; } + } + } else if (PyTuple_Check(ob)) { + Py_ssize_t i; + Py_ssize_t listlen = PyTuple_Size(ob); + tag_aux_out(CBOR_ARRAY, listlen, out, &pos); + for (i = 0; i < listlen; i++) { + int err = inner_dumps(PyTuple_GetItem(ob, i), out, &pos); + if (err != 0) { return err; } + } + // TODO: accept other enumerables and emit a variable length array +#ifdef Py_INTOBJECT_H + // PyInt exists in Python 2 but not 3 + } else if (PyInt_Check(ob)) { + long val = PyInt_AsLong(ob); + if (val >= 0) { + tag_aux_out(CBOR_UINT, val, out, &pos); + } else { + tag_aux_out(CBOR_NEGINT, -1 - val, out, &pos); + } +#endif + } else if (PyLong_Check(ob)) { + int overflow = 0; + long long val = PyLong_AsLongLongAndOverflow(ob, &overflow); + if (overflow == 0) { + if (val >= 0) { + tag_aux_out(CBOR_UINT, val, out, &pos); + } else { + tag_aux_out(CBOR_NEGINT, -1L - val, out, &pos); + } + } else { + if (overflow < 0) { + // BIG NEGINT + PyObject* minusone = PyLong_FromLongLong(-1L); + PyObject* val = PyNumber_Subtract(minusone, ob); + Py_DECREF(minusone); + dumps_bignum(CBOR_TAG_NEGBIGNUM, val, out, &pos); + Py_DECREF(val); + } else { + // BIG INT + dumps_bignum(CBOR_TAG_BIGNUM, ob, out, &pos); + } + } + } else if (PyFloat_Check(ob)) { + double val = PyFloat_AsDouble(ob); + tag_u64_out(CBOR_7, *((uint64_t*)(&val)), out, &pos); + } else if (PyBytes_Check(ob)) { + Py_ssize_t len = PyBytes_Size(ob); + tag_aux_out(CBOR_BYTES, len, out, &pos); + if (out != NULL) { + memcpy(out + pos, PyBytes_AsString(ob), len); + } + pos += len; + } else if (PyUnicode_Check(ob)) { + PyObject* utf8 = PyUnicode_AsUTF8String(ob); + Py_ssize_t len = PyBytes_Size(utf8); + tag_aux_out(CBOR_TEXT, len, out, &pos); + if (out != NULL) { + memcpy(out + pos, PyBytes_AsString(utf8), len); + } + pos += len; + Py_DECREF(utf8); + } else { + int handled = 0; + { + PyObject* tag_class = getCborTagClass(); + if (PyObject_IsInstance(ob, tag_class)) { + int err = dumps_tag(ob, out, &pos); + if (err != 0) { return err; } + handled = 1; + } + // tag_class was just a borrowed reference + } + + // TODO: other special object serializations here + + if (!handled) { +#if IS_PY3 + PyErr_Format(PyExc_ValueError, "cannot serialize unknown object: %R", ob); +#else + PyObject* badtype = PyObject_Type(ob); + PyObject* badtypename = PyObject_Str(badtype); + PyErr_Format(PyExc_ValueError, "cannot serialize unknown object of type %s", PyString_AsString(badtypename)); + Py_DECREF(badtypename); + Py_DECREF(badtype); +#endif + return -1; + } + } + if (posp != NULL) { + *posp = pos; + } + return 0; +} + +static PyObject* +cbor_dumps(PyObject* noself, PyObject* args) { + PyObject* ob; + is_big_endian(); + if (PyType_IsSubtype(Py_TYPE(args), &PyList_Type)) { + ob = PyList_GetItem(args, 0); + } else if (PyType_IsSubtype(Py_TYPE(args), &PyTuple_Type)) { + ob = PyTuple_GetItem(args, 0); + } else { + PyErr_Format(PyExc_ValueError, "args not list or tuple: %R\n", args); + return NULL; + } + + { + Py_ssize_t outlen = 0; + uintptr_t pos = 0; + void* out = NULL; + PyObject* obout = NULL; + int err; + + // first pass just to count length + err = inner_dumps(ob, NULL, &pos); + if (err != 0) { + return NULL; + } + + outlen = pos; + + out = PyMem_Malloc(outlen); + if (out == NULL) { + PyErr_NoMemory(); + return NULL; + } + + err = inner_dumps(ob, out, NULL); + if (err != 0) { + PyMem_Free(out); + return NULL; + } + + // TODO: I wish there was a way to do this without this copy. + obout = PyBytes_FromStringAndSize(out, outlen); + PyMem_Free(out); + return obout; + } +} + +static PyObject* +cbor_dump(PyObject* noself, PyObject* args) { + // args should be (obj, fp) + PyObject* ob; + PyObject* fp; + + is_big_endian(); + if (PyType_IsSubtype(Py_TYPE(args), &PyList_Type)) { + ob = PyList_GetItem(args, 0); + fp = PyList_GetItem(args, 1); + } else if (PyType_IsSubtype(Py_TYPE(args), &PyTuple_Type)) { + ob = PyTuple_GetItem(args, 0); + fp = PyTuple_GetItem(args, 1); + } else { + PyErr_Format(PyExc_ValueError, "args not list or tuple: %R\n", args); + return NULL; + } + + { + // TODO: make this smarter, right now it is justt fp.write(dumps(ob)) + Py_ssize_t outlen = 0; + uintptr_t pos = 0; + void* out = NULL; + int err; + + // first pass just to count length + err = inner_dumps(ob, NULL, &pos); + if (err != 0) { + return NULL; + } + + outlen = pos; + + out = PyMem_Malloc(outlen); + if (out == NULL) { + PyErr_NoMemory(); + return NULL; + } + + err = inner_dumps(ob, out, NULL); + if (err != 0) { + PyMem_Free(out); + return NULL; + } + +#if HAS_FILE_READER + if (PyFile_Check(fp)) { + FILE* fout = PyFile_AsFile(fp); + fwrite(out, 1, outlen, fout); + } else +#endif + { + PyObject* ret; + PyObject* obout = NULL; +#if IS_PY3 + PyObject* writeStr = PyUnicode_FromString("write"); +#else + PyObject* writeStr = PyString_FromString("write"); +#endif + obout = PyBytes_FromStringAndSize(out, outlen); + //logprintf("write %zd bytes to %p.write() as %p\n", outlen, fp, obout); + ret = PyObject_CallMethodObjArgs(fp, writeStr, obout, NULL); + Py_DECREF(writeStr); + Py_DECREF(obout); + if (ret != NULL) { + Py_DECREF(ret); + } else { + // exception in fp.write() + PyMem_Free(out); + return NULL; + } + //logprintf("wrote %zd bytes to %p.write() as %p\n", outlen, fp, obout); + } + PyMem_Free(out); + } + + Py_RETURN_NONE; +} + + +static PyMethodDef CborMethods[] = { + {"loads", cbor_loads, METH_VARARGS, + "parse cbor from data buffer to objects"}, + {"dumps", cbor_dumps, METH_VARARGS, + "serialize python object to bytes"}, + {"load", cbor_load, METH_VARARGS, + "Parse cbor from data buffer to objects.\n" + "Takes a file-like object capable of .read(N)\n"}, + {"dump", cbor_dump, METH_VARARGS, + "Serialize python object to bytes.\n" + "dump(obj, fp)\n" + "obj: object to output; fp: file-like object to .write() to\n"}, + {NULL, NULL, 0, NULL} /* Sentinel */ +}; + +#ifdef Py_InitModule +// Python 2.7 +PyMODINIT_FUNC +init_cbor(void) +{ + (void) Py_InitModule("cbor._cbor", CborMethods); +} +#else +// Python 3 +PyMODINIT_FUNC +PyInit__cbor(void) +{ + static PyModuleDef modef = { + PyModuleDef_HEAD_INIT, + }; + //modef.m_base = PyModuleDef_HEAD_INIT; + modef.m_name = "cbor._cbor"; + modef.m_doc = NULL; + modef.m_size = 0; + modef.m_methods = CborMethods; + modef.m_reload = NULL; + modef.m_traverse = NULL; + modef.m_clear = NULL; + modef.m_free = NULL; + return PyModule_Create(&modef); +} +#endif + diff --git a/cbor/__init__.py b/cbor/__init__.py new file mode 100644 index 0000000..f56f435 --- /dev/null +++ b/cbor/__init__.py @@ -0,0 +1,17 @@ +#!python + +try: + # try C library _cbor.so + from ._cbor import loads, dumps, load, dump +except: + # fall back to 100% python implementation + from .cbor import loads, dumps, load, dump + +from .cbor import Tag +from .tagmap import TagMapper, ClassTag, UnknownTagException + +__all__ = [ + 'loads', 'dumps', 'load', 'dump', + 'Tag', + 'TagMapper', 'ClassTag', 'UnknownTagException', +] diff --git a/cbor/cbor.py b/cbor/cbor.py new file mode 100644 index 0000000..fa349c6 --- /dev/null +++ b/cbor/cbor.py @@ -0,0 +1,498 @@ +#!python +# -*- Python -*- + +import datetime +import re +import struct +import sys + +_IS_PY3 = sys.version_info[0] >= 3 + +if _IS_PY3: + from io import BytesIO as StringIO +else: + try: + from cStringIO import StringIO + except: + from StringIO import StringIO + + +CBOR_TYPE_MASK = 0xE0 # top 3 bits +CBOR_INFO_BITS = 0x1F # low 5 bits + + +CBOR_UINT = 0x00 +CBOR_NEGINT = 0x20 +CBOR_BYTES = 0x40 +CBOR_TEXT = 0x60 +CBOR_ARRAY = 0x80 +CBOR_MAP = 0xA0 +CBOR_TAG = 0xC0 +CBOR_7 = 0xE0 # float and other types + +CBOR_UINT8_FOLLOWS = 24 # 0x18 +CBOR_UINT16_FOLLOWS = 25 # 0x19 +CBOR_UINT32_FOLLOWS = 26 # 0x1a +CBOR_UINT64_FOLLOWS = 27 # 0x1b +CBOR_VAR_FOLLOWS = 31 # 0x1f + +CBOR_BREAK = 0xFF + +CBOR_FALSE = (CBOR_7 | 20) +CBOR_TRUE = (CBOR_7 | 21) +CBOR_NULL = (CBOR_7 | 22) +CBOR_UNDEFINED = (CBOR_7 | 23) # js 'undefined' value + +CBOR_FLOAT16 = (CBOR_7 | 25) +CBOR_FLOAT32 = (CBOR_7 | 26) +CBOR_FLOAT64 = (CBOR_7 | 27) + +CBOR_TAG_DATE_STRING = 0 # RFC3339 +CBOR_TAG_DATE_ARRAY = 1 # any number type follows, seconds since 1970-01-01T00:00:00 UTC +CBOR_TAG_BIGNUM = 2 # big endian byte string follows +CBOR_TAG_NEGBIGNUM = 3 # big endian byte string follows +CBOR_TAG_DECIMAL = 4 # [ 10^x exponent, number ] +CBOR_TAG_BIGFLOAT = 5 # [ 2^x exponent, number ] +CBOR_TAG_BASE64URL = 21 +CBOR_TAG_BASE64 = 22 +CBOR_TAG_BASE16 = 23 +CBOR_TAG_CBOR = 24 # following byte string is embedded CBOR data + +CBOR_TAG_URI = 32 +CBOR_TAG_BASE64URL = 33 +CBOR_TAG_BASE64 = 34 +CBOR_TAG_REGEX = 35 +CBOR_TAG_MIME = 36 # following text is MIME message, headers, separators and all +CBOR_TAG_CBOR_FILEHEADER = 55799 # can open a file with 0xd9d9f7 + +_CBOR_TAG_BIGNUM_BYTES = struct.pack('B', CBOR_TAG | CBOR_TAG_BIGNUM) + + +def dumps_int(val): + "return bytes representing int val in CBOR" + if val >= 0: + # CBOR_UINT is 0, so I'm lazy/efficient about not OR-ing it in. + if val <= 23: + return struct.pack('B', val) + if val <= 0x0ff: + return struct.pack('BB', CBOR_UINT8_FOLLOWS, val) + if val <= 0x0ffff: + return struct.pack('!BH', CBOR_UINT16_FOLLOWS, val) + if val <= 0x0ffffffff: + return struct.pack('!BI', CBOR_UINT32_FOLLOWS, val) + if val <= 0x0ffffffffffffffff: + return struct.pack('!BQ', CBOR_UINT64_FOLLOWS, val) + outb = _dumps_bignum_to_bytearray(val) + return _CBOR_TAG_BIGNUM_BYTES + _encode_type_num(CBOR_BYTES, len(outb)) + outb + val = -1 - val + return _encode_type_num(CBOR_NEGINT, val) + + +if _IS_PY3: + def _dumps_bignum_to_bytearray(val): + out = [] + while val > 0: + out.insert(0, val & 0x0ff) + val = val >> 8 + return bytes(out) +else: + def _dumps_bignum_to_bytearray(val): + out = [] + while val > 0: + out.insert(0, chr(val & 0x0ff)) + val = val >> 8 + return b''.join(out) + + +def dumps_float(val): + return struct.pack("!Bd", CBOR_FLOAT64, val) + + +_CBOR_TAG_NEGBIGNUM_BYTES = struct.pack('B', CBOR_TAG | CBOR_TAG_NEGBIGNUM) + + +def _encode_type_num(cbor_type, val): + """For some CBOR primary type [0..7] and an auxiliary unsigned number, return CBOR encoded bytes""" + assert val >= 0 + if val <= 23: + return struct.pack('B', cbor_type | val) + if val <= 0x0ff: + return struct.pack('BB', cbor_type | CBOR_UINT8_FOLLOWS, val) + if val <= 0x0ffff: + return struct.pack('!BH', cbor_type | CBOR_UINT16_FOLLOWS, val) + if val <= 0x0ffffffff: + return struct.pack('!BI', cbor_type | CBOR_UINT32_FOLLOWS, val) + if (((cbor_type == CBOR_NEGINT) and (val <= 0x07fffffffffffffff)) or + ((cbor_type != CBOR_NEGINT) and (val <= 0x0ffffffffffffffff))): + return struct.pack('!BQ', cbor_type | CBOR_UINT64_FOLLOWS, val) + if cbor_type != CBOR_NEGINT: + raise Exception("value too big for CBOR unsigned number: {0!r}".format(val)) + outb = _dumps_bignum_to_bytearray(val) + return _CBOR_TAG_NEGBIGNUM_BYTES + _encode_type_num(CBOR_BYTES, len(outb)) + outb + + +if _IS_PY3: + def _is_unicode(val): + return isinstance(val, str) +else: + def _is_unicode(val): + return isinstance(val, unicode) + + +def dumps_string(val, is_text=None, is_bytes=None): + if _is_unicode(val): + val = val.encode('utf8') + is_text = True + is_bytes = False + if (is_bytes) or not (is_text == True): + return _encode_type_num(CBOR_BYTES, len(val)) + val + return _encode_type_num(CBOR_TEXT, len(val)) + val + + +def dumps_array(arr): + head = _encode_type_num(CBOR_ARRAY, len(arr)) + parts = [dumps(x) for x in arr] + return head + b''.join(parts) + + +if _IS_PY3: + def dumps_dict(d): + head = _encode_type_num(CBOR_MAP, len(d)) + parts = [head] + for k,v in d.items(): + parts.append(dumps(k)) + parts.append(dumps(v)) + return b''.join(parts) +else: + def dumps_dict(d): + head = _encode_type_num(CBOR_MAP, len(d)) + parts = [head] + for k,v in d.iteritems(): + parts.append(dumps(k)) + parts.append(dumps(v)) + return b''.join(parts) + + +def dumps_bool(b): + if b: + return struct.pack('B', CBOR_TRUE) + return struct.pack('B', CBOR_FALSE) + + +def dumps_tag(t): + return _encode_type_num(CBOR_TAG, t.tag) + dumps(t.value) + + +if _IS_PY3: + def _is_stringish(x): + return isinstance(x, (str, bytes)) + def _is_intish(x): + return isinstance(x, int) +else: + def _is_stringish(x): + return isinstance(x, (str, basestring, bytes, unicode)) + def _is_intish(x): + return isinstance(x, (int, long)) + + +def dumps(ob): + if ob is None: + return struct.pack('B', CBOR_NULL) + if isinstance(ob, bool): + return dumps_bool(ob) + if _is_stringish(ob): + return dumps_string(ob) + if isinstance(ob, (list, tuple)): + return dumps_array(ob) + # TODO: accept other enumerables and emit a variable length array + if isinstance(ob, dict): + return dumps_dict(ob) + if isinstance(ob, float): + return dumps_float(ob) + if _is_intish(ob): + return dumps_int(ob) + if isinstance(ob, Tag): + return dumps_tag(ob) + raise Exception("don't know how to cbor serialize object of type %s", type(ob)) + + +# same basic signature as json.dump, but with no options (yet) +def dump(obj, fp): + """ + obj: Python object to serialize + fp: file-like object capable of .write(bytes) + """ + # this is kinda lame, but probably not inefficient for non-huge objects + # TODO: .write() to fp as we go as each inner object is serialized + blob = dumps(obj) + fp.write(blob) + + +class Tag(object): + def __init__(self, tag=None, value=None): + self.tag = tag + self.value = value + + def __repr__(self): + return "Tag({0!r}, {1!r})".format(self.tag, self.value) + + def __eq__(self, other): + if not isinstance(other, Tag): + return False + return (self.tag == other.tag) and (self.value == other.value) + + +def loads(data): + """ + Parse CBOR bytes and return Python objects. + """ + if data is None: + raise ValueError("got None for buffer to decode in loads") + fp = StringIO(data) + return _loads(fp)[0] + + +def load(fp): + """ + Parse and return object from fp, a file-like object supporting .read(n) + """ + return _loads(fp)[0] + + +_MAX_DEPTH = 100 + + +def _tag_aux(fp, tb): + bytes_read = 1 + tag = tb & CBOR_TYPE_MASK + tag_aux = tb & CBOR_INFO_BITS + if tag_aux <= 23: + aux = tag_aux + elif tag_aux == CBOR_UINT8_FOLLOWS: + data = fp.read(1) + aux = struct.unpack_from("!B", data, 0)[0] + bytes_read += 1 + elif tag_aux == CBOR_UINT16_FOLLOWS: + data = fp.read(2) + aux = struct.unpack_from("!H", data, 0)[0] + bytes_read += 2 + elif tag_aux == CBOR_UINT32_FOLLOWS: + data = fp.read(4) + aux = struct.unpack_from("!I", data, 0)[0] + bytes_read += 4 + elif tag_aux == CBOR_UINT64_FOLLOWS: + data = fp.read(8) + aux = struct.unpack_from("!Q", data, 0)[0] + bytes_read += 8 + else: + assert tag_aux == CBOR_VAR_FOLLOWS, "bogus tag {0:02x}".format(tb) + aux = None + + return tag, tag_aux, aux, bytes_read + + +def _read_byte(fp): + tb = fp.read(1) + if len(tb) == 0: + # I guess not all file-like objects do this + raise EOFError() + return ord(tb) + + +def _loads_var_array(fp, limit, depth, returntags, bytes_read): + ob = [] + tb = _read_byte(fp) + while tb != CBOR_BREAK: + (subob, sub_len) = _loads_tb(fp, tb, limit, depth, returntags) + bytes_read += 1 + sub_len + ob.append(subob) + tb = _read_byte(fp) + return (ob, bytes_read + 1) + + +def _loads_var_map(fp, limit, depth, returntags, bytes_read): + ob = {} + tb = _read_byte(fp) + while tb != CBOR_BREAK: + (subk, sub_len) = _loads_tb(fp, tb, limit, depth, returntags) + bytes_read += 1 + sub_len + (subv, sub_len) = _loads(fp, limit, depth, returntags) + bytes_read += sub_len + ob[subk] = subv + tb = _read_byte(fp) + return (ob, bytes_read + 1) + + +if _IS_PY3: + def _loads_array(fp, limit, depth, returntags, aux, bytes_read): + ob = [] + for i in range(aux): + subob, subpos = _loads(fp) + bytes_read += subpos + ob.append(subob) + return ob, bytes_read + def _loads_map(fp, limit, depth, returntags, aux, bytes_read): + ob = {} + for i in range(aux): + subk, subpos = _loads(fp) + bytes_read += subpos + subv, subpos = _loads(fp) + bytes_read += subpos + ob[subk] = subv + return ob, bytes_read +else: + def _loads_array(fp, limit, depth, returntags, aux, bytes_read): + ob = [] + for i in xrange(aux): + subob, subpos = _loads(fp) + bytes_read += subpos + ob.append(subob) + return ob, bytes_read + def _loads_map(fp, limit, depth, returntags, aux, bytes_read): + ob = {} + for i in xrange(aux): + subk, subpos = _loads(fp) + bytes_read += subpos + subv, subpos = _loads(fp) + bytes_read += subpos + ob[subk] = subv + return ob, bytes_read + + +def _loads(fp, limit=None, depth=0, returntags=False): + "return (object, bytes read)" + if depth > _MAX_DEPTH: + raise Exception("hit CBOR loads recursion depth limit") + + tb = _read_byte(fp) + + return _loads_tb(fp, tb, limit, depth, returntags) + +def _loads_tb(fp, tb, limit=None, depth=0, returntags=False): + # Some special cases of CBOR_7 best handled by special struct.unpack logic here + if tb == CBOR_FLOAT16: + data = fp.read(2) + hibyte, lowbyte = struct.unpack_from("BB", data, 0) + exp = (hibyte >> 2) & 0x1F + mant = ((hibyte & 0x03) << 8) | lowbyte + if exp == 0: + val = mant * (2.0 ** -24) + elif exp == 31: + if mant == 0: + val = float('Inf') + else: + val = float('NaN') + else: + val = (mant + 1024.0) * (2 ** (exp - 25)) + if hibyte & 0x80: + val = -1.0 * val + return (val, 3) + elif tb == CBOR_FLOAT32: + data = fp.read(4) + pf = struct.unpack_from("!f", data, 0) + return (pf[0], 5) + elif tb == CBOR_FLOAT64: + data = fp.read(8) + pf = struct.unpack_from("!d", data, 0) + return (pf[0], 9) + + tag, tag_aux, aux, bytes_read = _tag_aux(fp, tb) + + if tag == CBOR_UINT: + return (aux, bytes_read) + elif tag == CBOR_NEGINT: + return (-1 - aux, bytes_read) + elif tag == CBOR_BYTES: + ob, subpos = loads_bytes(fp, aux) + return (ob, bytes_read + subpos) + elif tag == CBOR_TEXT: + raw, subpos = loads_bytes(fp, aux, btag=CBOR_TEXT) + ob = raw.decode('utf8') + return (ob, bytes_read + subpos) + elif tag == CBOR_ARRAY: + if aux is None: + return _loads_var_array(fp, limit, depth, returntags, bytes_read) + return _loads_array(fp, limit, depth, returntags, aux, bytes_read) + elif tag == CBOR_MAP: + if aux is None: + return _loads_var_map(fp, limit, depth, returntags, bytes_read) + return _loads_map(fp, limit, depth, returntags, aux, bytes_read) + elif tag == CBOR_TAG: + ob, subpos = _loads(fp) + bytes_read += subpos + if returntags: + # Don't interpret the tag, return it and the tagged object. + ob = Tag(aux, ob) + else: + # attempt to interpet the tag and the value into a Python object. + ob = tagify(ob, aux) + return ob, bytes_read + elif tag == CBOR_7: + if tb == CBOR_TRUE: + return (True, bytes_read) + if tb == CBOR_FALSE: + return (False, bytes_read) + if tb == CBOR_NULL: + return (None, bytes_read) + if tb == CBOR_UNDEFINED: + return (None, bytes_read) + raise ValueError("unknown cbor tag 7 byte: {:02x}".format(tb)) + + +def loads_bytes(fp, aux, btag=CBOR_BYTES): + # TODO: limit to some maximum number of chunks and some maximum total bytes + if aux is not None: + # simple case + ob = fp.read(aux) + return (ob, aux) + # read chunks of bytes + chunklist = [] + total_bytes_read = 0 + while True: + tb = fp.read(1)[0] + if not _IS_PY3: + tb = ord(tb) + if tb == CBOR_BREAK: + total_bytes_read += 1 + break + tag, tag_aux, aux, bytes_read = _tag_aux(fp, tb) + assert tag == btag, 'variable length value contains unexpected component' + ob = fp.read(aux) + chunklist.append(ob) + total_bytes_read += bytes_read + aux + return (b''.join(chunklist), total_bytes_read) + + +if _IS_PY3: + def _bytes_to_biguint(bs): + out = 0 + for ch in bs: + out = out << 8 + out = out | ch + return out +else: + def _bytes_to_biguint(bs): + out = 0 + for ch in bs: + out = out << 8 + out = out | ord(ch) + return out + + +def tagify(ob, aux): + # TODO: make this extensible? + # cbor.register_tag_handler(tagnumber, tag_handler) + # where tag_handler takes (tagnumber, tagged_object) + if aux == CBOR_TAG_DATE_STRING: + # TODO: parse RFC3339 date string + pass + if aux == CBOR_TAG_DATE_ARRAY: + return datetime.datetime.utcfromtimestamp(ob) + if aux == CBOR_TAG_BIGNUM: + return _bytes_to_biguint(ob) + if aux == CBOR_TAG_NEGBIGNUM: + return -1 - _bytes_to_biguint(ob) + if aux == CBOR_TAG_REGEX: + # Is this actually a good idea? Should we just return the tag and the raw value to the user somehow? + return re.compile(ob) + return Tag(aux, ob) diff --git a/cbor/cbor_rpc_client.py b/cbor/cbor_rpc_client.py new file mode 100644 index 0000000..e801456 --- /dev/null +++ b/cbor/cbor_rpc_client.py @@ -0,0 +1,175 @@ +from __future__ import absolute_import +import logging +import random +import socket +import time + +import cbor + + +logger = logging.getLogger(__name__) + + +class SocketReader(object): + ''' + Simple adapter from socket.recv to file-like-read + ''' + def __init__(self, sock): + self.socket = sock + self.timeout_seconds = 10.0 + + def read(self, num): + start = time.time() + data = self.socket.recv(num) + while len(data) < num: + now = time.time() + if now > (start + self.timeout_seconds): + break + ndat = self.socket.recv(num - len(data)) + if ndat: + data += ndat + return data + + +class CborRpcClient(object): + '''Base class for all client objects. + + This provides common `addr_family`, `address`, and `registry_addresses` + configuration parameters, and manages the connection back to the server. + + Automatic retry and time based fallback is managed from + configuration parameters `retries` (default 5), and + `base_retry_seconds` (default 0.5). Retry time doubles on each + retry. E.g. try 0; wait 0.5s; try 1; wait 1s; try 2; wait 2s; try + 3; wait 4s; try 4; wait 8s; try 5; FAIL. Total time waited just + under base_retry_seconds * (2 ** retries). + + .. automethod:: __init__ + .. automethod:: _rpc + .. automethod:: close + + ''' + + def __init__(self, config=None): + self._socket_family = config.get('addr_family', socket.AF_INET) + # may need to be ('host', port) + self._socket_addr = config.get('address') + if self._socket_family == socket.AF_INET: + if not isinstance(self._socket_addr, tuple): + # python socket standard library insists this be tuple! + tsocket_addr = tuple(self._socket_addr) + assert len(tsocket_addr) == 2, 'address must be length-2 tuple ("hostname", port number), got {!r} tuplified to {!r}'.format(self._socket_addr, tsocket_addr) + self._socket_addr = tsocket_addr + self._socket = None + self._rfile = None + self._local_addr = None + self._message_count = 0 + self._retries = config.get('retries', 5) + self._base_retry_seconds = float(config.get('base_retry_seconds', 0.5)) + + def _conn(self): + # lazy socket opener + if self._socket is None: + try: + self._socket = socket.create_connection(self._socket_addr) + self._local_addr = self._socket.getsockname() + except: + logger.error('error connecting to %r:%r', self._socket_addr[0], + self._socket_addr[1], exc_info=True) + raise + return self._socket + + def close(self): + '''Close the connection to the server. + + The next RPC call will reopen the connection. + + ''' + if self._socket is not None: + self._rfile = None + try: + self._socket.shutdown(socket.SHUT_RDWR) + self._socket.close() + except socket.error: + logger.warn('error closing lockd client socket', + exc_info=True) + self._socket = None + + @property + def rfile(self): + if self._rfile is None: + conn = self._conn() + self._rfile = SocketReader(conn) + return self._rfile + + def _rpc(self, method_name, params): + '''Call a method on the server. + + Calls ``method_name(*params)`` remotely, and returns the results + of that function call. Expected return types are primitives, lists, + and dictionaries. + + :raise Exception: if the server response was a failure + + ''' + mlog = logging.getLogger('cborrpc') + tryn = 0 + delay = self._base_retry_seconds + self._message_count += 1 + message = { + 'id': self._message_count, + 'method': method_name, + 'params': params + } + mlog.debug('request %r', message) + buf = cbor.dumps(message) + + errormessage = None + while True: + try: + conn = self._conn() + conn.send(buf) + response = cbor.load(self.rfile) + mlog.debug('response %r', response) + assert response['id'] == message['id'] + if 'result' in response: + return response['result'] + # From here on out we got a response, the server + # didn't have some weird intermittent error or + # non-connectivity, it gave us an error message. We + # don't retry that, we raise it to the user. + errormessage = response.get('error') + if errormessage and hasattr(errormessage,'get'): + errormessage = errormessage.get('message') + if not errormessage: + errormessage = repr(response) + break + except Exception as ex: + if tryn < self._retries: + tryn += 1 + logger.debug('ex in %r (%s), retrying %s in %s sec...', + method_name, ex, tryn, delay, exc_info=True) + self.close() + time.sleep(delay) + delay *= 2 + continue + logger.error('failed in rpc %r %r', method_name, params, + exc_info=True) + raise + raise Exception(errormessage) + + +if __name__ == '__main__': + import sys + logging.basicConfig(level=logging.DEBUG) + host,port = sys.argv[1].split(':') + if not host: + host = 'localhost' + port = int(port) + client = CborRpcClient({'address':(host,port)}) + print(client._rpc(u'connect', [u'127.0.0.1:5432', u'root', u'aoeu'])) + print(client._rpc(u'put', [[('k1','v1'), ('k2','v2')]])) + #print(client._rpc(u'ping', [])) + #print(client._rpc(u'gnip', [])) + client.close() + diff --git a/cbor/tagmap.py b/cbor/tagmap.py new file mode 100644 index 0000000..030664f --- /dev/null +++ b/cbor/tagmap.py @@ -0,0 +1,134 @@ +try: + # try C library _cbor.so + from ._cbor import loads, dumps, load, dump +except: + # fall back to 100% python implementation + from .cbor import loads, dumps, load, dump + +from .cbor import Tag, CBOR_TAG_CBOR + + +class ClassTag(object): + ''' + For some CBOR tag_number, encode/decode Python class_type. + class_type manily used for isintance(foo, class_type) + Call encode_function() taking a Python instance and returning CBOR primitive types. + Call decode_function() on CBOR primitive types and return an instance of the Python class_type (a factory function). + ''' + def __init__(self, tag_number, class_type, encode_function, decode_function): + self.tag_number = tag_number + self.class_type = class_type + self.encode_function = encode_function + self.decode_function = decode_function + + +# TODO: This would be more efficient if it moved into cbor.py and +# cbormodule.c, happening inline so that there is only one traversal +# of the objects. But that would require two implementations. When +# this API has been used more and can be considered settled I should +# do that. -- Brian Olson 20140917_172229 +class TagMapper(object): + ''' + Translate Python objects and CBOR tagged data. + Use the CBOR TAG system to note that some data is of a certain class. + Dump while translating Python objects into a CBOR compatible representation. + Load and translate CBOR primitives back into Python objects. + ''' + def __init__(self, class_tags=None, raise_on_unknown_tag=False): + ''' + class_tags: list of ClassTag objects + ''' + self.class_tags = class_tags + self.raise_on_unknown_tag = raise_on_unknown_tag + + def encode(self, obj): + for ct in self.class_tags: + if (ct.class_type is None) or (ct.encode_function is None): + continue + if isinstance(obj, ct.class_type): + return Tag(ct.tag_number, ct.encode_function(obj)) + if isinstance(obj, (list, tuple)): + return [self.encode(x) for x in obj] + if isinstance(obj, dict): + # assume key is a primitive + # can't do this in Python 2.6: + #return {k:self.encode(v) for k,v in obj.iteritems()} + out = {} + for k,v in obj.iteritems(): + out[k] = self.encode(v) + return out + # fall through, let underlying cbor.dump decide if it can encode object + return obj + + def decode(self, obj): + if isinstance(obj, Tag): + for ct in self.class_tags: + if ct.tag_number == obj.tag: + return ct.decode_function(obj.value) + # unknown Tag + if self.raise_on_unknown_tag: + raise UnknownTagException(str(obj.tag)) + # otherwise, pass it through + return obj + if isinstance(obj, list): + # update in place. cbor only decodes to list, not tuple + for i,v in enumerate(obj): + obj[i] = self.decode(v) + return obj + if isinstance(obj, dict): + # update in place + for k,v in obj.iteritems(): + # assume key is a primitive + obj[k] = self.decode(v) + return obj + # non-recursive object (num,bool,blob,string) + return obj + + def dump(self, obj, fp): + dump(self.encode(obj), fp) + + def dumps(self, obj): + return dumps(self.encode(obj)) + + def load(self, fp): + return self.decode(load(fp)) + + def loads(self, blob): + return self.decode(loads(blob)) + + +class WrappedCBOR(ClassTag): + """Handles Tag 24, where a byte array is sub encoded CBOR. + Unpacks sub encoded object on finding such a tag. + Does not convert anyting into such a tag. + + Usage: +>>> import cbor +>>> import cbor.tagmap +>>> tm=cbor.TagMapper([cbor.tagmap.WrappedCBOR()]) +>>> x = cbor.dumps(cbor.Tag(24, cbor.dumps({"a":[1,2,3]}))) +>>> x +'\xd8\x18G\xa1Aa\x83\x01\x02\x03' +>>> tm.loads(x) +{'a': [1L, 2L, 3L]} +>>> cbor.loads(x) +Tag(24L, '\xa1Aa\x83\x01\x02\x03') +""" + def __init__(self): + super(WrappedCBOR, self).__init__(CBOR_TAG_CBOR, None, None, loads) + + @staticmethod + def wrap(ob): + return Tag(CBOR_TAG_CBOR, dumps(ob)) + + @staticmethod + def dump(ob, fp): + return dump(Tag(CBOR_TAG_CBOR, dumps(ob)), fp) + + @staticmethod + def dumps(ob): + return dumps(Tag(CBOR_TAG_CBOR, dumps(ob))) + + +class UnknownTagException(BaseException): + pass diff --git a/cbor.egg-info/PKG-INFO b/cbor.egg-info/PKG-INFO new file mode 100644 index 0000000..5d38520 --- /dev/null +++ b/cbor.egg-info/PKG-INFO @@ -0,0 +1,27 @@ +Metadata-Version: 1.1 +Name: cbor +Version: 0.1.21 +Summary: RFC 7049 - Concise Binary Object Representation +Home-page: https://bitbucket.org/bodhisnarkva/cbor +Author: Brian Olson +Author-email: bolson@bolson.org +License: Apache +Description: + An implementation of RFC 7049 - Concise Binary Object Representation (CBOR). + + CBOR is comparable to JSON, has a superset of JSON's ability, but serializes to a binary format which is smaller and faster to generate and parse. + + The two primary functions are cbor.loads() and cbor.dumps(). + + This library includes a C implementation which runs 3-5 times faster than the Python standard library's C-accelerated implementanion of JSON. This is also includes a 100% Python implementation. + +Platform: UNKNOWN +Classifier: Development Status :: 4 - Beta +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: Apache Software License +Classifier: Operating System :: OS Independent +Classifier: Programming Language :: Python :: 2.7 +Classifier: Programming Language :: Python :: 3.3 +Classifier: Programming Language :: Python :: 3.4 +Classifier: Programming Language :: C +Classifier: Topic :: Software Development :: Libraries :: Python Modules diff --git a/cbor.egg-info/SOURCES.txt b/cbor.egg-info/SOURCES.txt new file mode 100644 index 0000000..19ddf58 --- /dev/null +++ b/cbor.egg-info/SOURCES.txt @@ -0,0 +1,11 @@ +setup.py +c/cbor.h +c/cbormodule.c +cbor/__init__.py +cbor/cbor.py +cbor/cbor_rpc_client.py +cbor/tagmap.py +cbor.egg-info/PKG-INFO +cbor.egg-info/SOURCES.txt +cbor.egg-info/dependency_links.txt +cbor.egg-info/top_level.txt \ No newline at end of file diff --git a/cbor.egg-info/dependency_links.txt b/cbor.egg-info/dependency_links.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/cbor.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/cbor.egg-info/top_level.txt b/cbor.egg-info/top_level.txt new file mode 100644 index 0000000..1bd3f40 --- /dev/null +++ b/cbor.egg-info/top_level.txt @@ -0,0 +1 @@ +cbor diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..861a9f5 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,5 @@ +[egg_info] +tag_build = +tag_date = 0 +tag_svn_revision = 0 + diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..07cf929 --- /dev/null +++ b/setup.py @@ -0,0 +1,123 @@ +#! /usr/bin/env python +# Copyright 2014 Brian Olson +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from distutils.command.build_ext import build_ext +from distutils.errors import (CCompilerError, DistutilsExecError, + DistutilsPlatformError) +import sys + +from setuptools import setup, Extension + + +build_errors = (CCompilerError, DistutilsExecError, DistutilsPlatformError) +if sys.platform == 'win32' and sys.version_info > (2, 6): + # 2.6's distutils.msvc9compiler can raise an IOError when failing to + # find the compiler + build_errors += (IOError,) + + +class BuildError(Exception): + """Raised if compiling extensions failed.""" + + +class optional_build_ext(build_ext): + """build_ext implementation with optional C speedups.""" + + def run(self): + try: + build_ext.run(self) + except DistutilsPlatformError: + raise BuildError() + + def build_extension(self, ext): + try: + build_ext.build_extension(self, ext) + except build_errors as be: + raise BuildError(be) + except ValueError as ve: + # this can happen on Windows 64 bit, see Python issue 7511 + if "'path'" in str(sys.exc_info()[1]): # works with Python 2 and 3 + raise BuildError(ve) + raise + + +setup_options = dict( + name='cbor', + version='0.1.21', + description='RFC 7049 - Concise Binary Object Representation', + long_description=""" +An implementation of RFC 7049 - Concise Binary Object Representation (CBOR). + +CBOR is comparable to JSON, has a superset of JSON's ability, but serializes to a binary format which is smaller and faster to generate and parse. + +The two primary functions are cbor.loads() and cbor.dumps(). + +This library includes a C implementation which runs 3-5 times faster than the Python standard library's C-accelerated implementanion of JSON. This is also includes a 100% Python implementation. +""", + author='Brian Olson', + author_email='bolson@bolson.org', + url='https://bitbucket.org/bodhisnarkva/cbor', + packages=['cbor'], + package_dir={'cbor':'cbor'}, + ext_modules=[ + Extension( + 'cbor._cbor', + include_dirs=['c/'], + sources=['c/cbormodule.c'], + headers=['c/cbor.h'], + ) + ], + license='Apache', + classifiers=[ + 'Development Status :: 4 - Beta', + 'Intended Audience :: Developers', + 'License :: OSI Approved :: Apache Software License', + 'Operating System :: OS Independent', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3.3', + 'Programming Language :: Python :: 3.4', + 'Programming Language :: C', + 'Topic :: Software Development :: Libraries :: Python Modules', + ], + cmdclass={'build_ext': optional_build_ext}, +) + + +def main(): + """ Perform setup with optional C speedups. + + Optional extension compilation stolen from markupsafe, which again stole + it from simplejson. Creds to Bob Ippolito for the original code. + """ + is_jython = 'java' in sys.platform + is_pypy = hasattr(sys, 'pypy_translation_info') + + if is_jython or is_pypy: + del setup_options['ext_modules'] + + try: + setup(**setup_options) + except BuildError as be: + sys.stderr.write(''' +BUILD ERROR: + %s +RETRYING WITHOUT C EXTENSIONS +''' % (be,)) + del setup_options['ext_modules'] + setup(**setup_options) + + +if __name__ == '__main__': + main()