Skip to content

Commit

Permalink
Add 'bytes_as_strings' option to Py3.2 'Pickler'/'dump'/'dumps'.
Browse files Browse the repository at this point in the history
  • Loading branch information
tseaver committed Mar 5, 2013
1 parent 920c6cd commit 073bb89
Show file tree
Hide file tree
Showing 4 changed files with 155 additions and 28 deletions.
4 changes: 2 additions & 2 deletions CHANGES.rst
Expand Up @@ -4,8 +4,8 @@ CHANGES
0.3 (unreleased)
----------------

- Nothing changed yet.

- Added ``bytes_as_strings`` option to the Python 3.2 version of
``Pickler``, ``dump``, and ``dumps``.

0.2 (2013-03-05)
----------------
Expand Down
157 changes: 133 additions & 24 deletions src/zodbpickle/_pickle_32.c
@@ -1,6 +1,9 @@
#include "Python.h"
#include "structmember.h"

static
const char *Py_hexdigits = "0123456789abcdef"; /* from 3.3's codecs.c */

PyDoc_STRVAR(pickle_module_doc,
"Optimized C implementation for the Python pickle module.");

Expand Down Expand Up @@ -338,6 +341,8 @@ typedef struct PicklerObject {
int fast_nesting;
int fix_imports; /* Indicate whether Pickler should fix
the name of globals for Python 2.x. */
int bytes_as_strings; /* Indicate whether Pickler should pickle
bytes objects using string opcodes. */
PyObject *fast_memo;
} PicklerObject;

Expand Down Expand Up @@ -771,6 +776,7 @@ _Pickler_New(void)
self->fast = 0;
self->fast_nesting = 0;
self->fix_imports = 0;
self->bytes_as_strings = 0;
self->fast_memo = NULL;

self->memo = PyMemoTable_New();
Expand All @@ -791,10 +797,11 @@ _Pickler_New(void)

static int
_Pickler_SetProtocol(PicklerObject *self, PyObject *proto_obj,
PyObject *fix_imports_obj)
PyObject *fix_imports_obj, PyObject *bytes_as_strings_obj)
{
long proto = 0;
int fix_imports;
int bytes_as_strings;

if (proto_obj == NULL || proto_obj == Py_None)
proto = DEFAULT_PROTOCOL;
Expand All @@ -813,10 +820,14 @@ _Pickler_SetProtocol(PicklerObject *self, PyObject *proto_obj,
fix_imports = PyObject_IsTrue(fix_imports_obj);
if (fix_imports == -1)
return -1;
bytes_as_strings = PyObject_IsTrue(bytes_as_strings_obj);
if (bytes_as_strings == -1)
return -1;

self->proto = proto;
self->bin = proto > 0;
self->fix_imports = fix_imports && proto < 3;
self->bytes_as_strings = bytes_as_strings && proto < 3;

return 0;
}
Expand Down Expand Up @@ -1693,11 +1704,54 @@ save_float(PicklerObject *self, PyObject *obj)

return 0;
}

/* Essentially PyObject_Repr(obj) for bytes, but it returns bytes, doesn't add
the b prefix nor the quotes. */
static PyObject *
raw_bytes_escape(PyObject *obj)
{
PyObject *repr, *result;
Py_ssize_t i, size;
char *data, *p;

size = PyBytes_GET_SIZE(obj);
data = PyBytes_AS_STRING(obj);

if (size > PY_SSIZE_T_MAX / 4)
return PyErr_NoMemory();
repr = PyByteArray_FromStringAndSize(NULL, size * 4);
if (repr == NULL)
return NULL;
if (size == 0)
goto done;

p = PyByteArray_AS_STRING(repr);
for (i=0; i < size; i++) {
char ch = data[i];
/* Map control characters, non-ASCII characters, apostrophe and
* backslash to '\xXX' */
if (ch < 0x20 || ch >= 0x80 || ch == '\'' || ch == '\\') {
*p++ = '\\';
*p++ = 'x';
*p++ = Py_hexdigits[(ch >> 4) & 0xf];
*p++ = Py_hexdigits[ch & 0xf];
}
/* Copy everything else as-is */
else
*p++ = ch;
}
size = p - PyByteArray_AS_STRING(repr);

done:
result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(repr), size);
Py_DECREF(repr);
return result;
}

static int
save_bytes(PicklerObject *self, PyObject *obj)
{
if (self->proto < 3) {
if (self->proto < 3 && !self->bytes_as_strings) {
/* Older pickle protocols do not have an opcode for pickling bytes
objects. Therefore, we need to fake the copy protocol (i.e.,
the __reduce__ method) to permit bytes object unpickling.
Expand Down Expand Up @@ -1754,6 +1808,35 @@ save_bytes(PicklerObject *self, PyObject *obj)
Py_DECREF(reduce_value);
return status;
}
else if (self->bytes_as_strings && !self->bin) {
const char string_op = STRING;
PyObject *encoded = NULL;
Py_ssize_t size;

encoded = raw_bytes_escape(obj);
if (encoded == NULL)
goto error;

if (_Pickler_Write(self, &string_op, 1) < 0)
goto error;

if (_Pickler_Write(self, "'", 1) < 0)
goto error;

size = PyBytes_GET_SIZE(encoded);
if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0)
goto error;

if (_Pickler_Write(self, "'\n", 2) < 0)
goto error;

Py_DECREF(encoded);
return 0;

error:
Py_XDECREF(encoded);
return -1;
}
else {
Py_ssize_t size;
char header[5];
Expand All @@ -1764,12 +1847,13 @@ save_bytes(PicklerObject *self, PyObject *obj)
return -1;

if (size < 256) {
header[0] = SHORT_BINBYTES;
header[0] = (self->bytes_as_strings ? SHORT_BINSTRING
: SHORT_BINBYTES);
header[1] = (unsigned char)size;
len = 2;
}
else if (size <= 0xffffffffL) {
header[0] = BINBYTES;
header[0] = (self->bytes_as_strings ? BINSTRING : BINBYTES);
header[1] = (unsigned char)(size & 0xff);
header[2] = (unsigned char)((size >> 8) & 0xff);
header[3] = (unsigned char)((size >> 16) & 0xff);
Expand All @@ -1788,9 +1872,6 @@ save_bytes(PicklerObject *self, PyObject *obj)
if (_Pickler_Write(self, PyBytes_AS_STRING(obj), size) < 0)
return -1;

if (memo_put(self, obj) < 0)
return -1;

return 0;
}
}
Expand Down Expand Up @@ -3418,25 +3499,35 @@ PyDoc_STRVAR(Pickler_doc,
"\n"
"If fix_imports is True and protocol is less than 3, pickle will try to\n"
"map the new Python 3.x names to the old module names used in Python\n"
"2.x, so that the pickle data stream is readable with Python 2.x.\n");
"2.x, so that the pickle data stream is readable with Python 2.x.\n"
"\n"
"If bytes_as_strings is True and protocol is less than 3, pickle\n"
"will store byte strings as native strings, i.e. the way Python 2.x\n"
"would've stored them. Be aware that such pickles cannot be\n"
"reliably unpickled on Python 3 if you do not use errors='bytes',\n"
"and even then they might be silently converted to Unicode objects.\n");


static int
Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds)
{
static char *kwlist[] = {"file", "protocol", "fix_imports", 0};
static char *kwlist[] = {
"file", "protocol", "fix_imports", "bytes_as_strings", 0};
PyObject *file;
PyObject *proto_obj = NULL;
PyObject *fix_imports = Py_True;
PyObject *bytes_as_strings = Py_False;

if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:Pickler",
kwlist, &file, &proto_obj, &fix_imports))
if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OOO:Pickler",
kwlist, &file, &proto_obj, &fix_imports,
&bytes_as_strings))
return -1;

/* In case of multiple __init__() calls, clear previous content. */
if (self->write != NULL)
(void)Pickler_clear(self);

if (_Pickler_SetProtocol(self, proto_obj, fix_imports) < 0)
if (_Pickler_SetProtocol(self, proto_obj, fix_imports, bytes_as_strings) < 0)
return -1;

if (_Pickler_SetOutputStream(self, file) < 0)
Expand Down Expand Up @@ -6014,7 +6105,7 @@ static PyTypeObject Unpickler_Type = {
};

PyDoc_STRVAR(pickle_dump_doc,
"dump(obj, file, protocol=None, *, fix_imports=True) -> None\n"
"dump(obj, file, protocol=None, *, fix_imports=True, bytes_as_strings=False)\n"
"\n"
"Write a pickled representation of obj to the open file object file. This\n"
"is equivalent to ``Pickler(file, protocol).dump(obj)``, but may be more\n"
Expand All @@ -6034,16 +6125,24 @@ PyDoc_STRVAR(pickle_dump_doc,
"\n"
"If fix_imports is True and protocol is less than 3, pickle will try to\n"
"map the new Python 3.x names to the old module names used in Python 2.x,\n"
"so that the pickle data stream is readable with Python 2.x.\n");
"so that the pickle data stream is readable with Python 2.x.\n"
"\n"
"If bytes_as_strings is True and protocol is less than 3, pickle\n"
"will store byte strings as native strings, i.e. the way Python 2.x\n"
"would've stored them. Be aware that such pickles cannot be\n"
"reliably unpickled on Python 3 if you do not use errors='bytes',\n"
"and even then they might be silently converted to Unicode objects.\n");

static PyObject *
pickle_dump(PyObject *self, PyObject *args, PyObject *kwds)
{
static char *kwlist[] = {"obj", "file", "protocol", "fix_imports", 0};
static char *kwlist[] = {
"obj", "file", "protocol", "fix_imports", "bytes_as_strings", 0};
PyObject *obj;
PyObject *file;
PyObject *proto = NULL;
PyObject *fix_imports = Py_True;
PyObject *bytes_as_strings = Py_False;
PicklerObject *pickler;

/* fix_imports is a keyword-only argument. */
Expand All @@ -6054,15 +6153,16 @@ pickle_dump(PyObject *self, PyObject *args, PyObject *kwds)
return NULL;
}

if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO|OO:dump", kwlist,
&obj, &file, &proto, &fix_imports))
if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO|OOO:dump", kwlist,
&obj, &file, &proto, &fix_imports,
&bytes_as_strings))
return NULL;

pickler = _Pickler_New();
if (pickler == NULL)
return NULL;

if (_Pickler_SetProtocol(pickler, proto, fix_imports) < 0)
if (_Pickler_SetProtocol(pickler, proto, fix_imports, bytes_as_strings) < 0)
goto error;

if (_Pickler_SetOutputStream(pickler, file) < 0)
Expand All @@ -6083,7 +6183,7 @@ pickle_dump(PyObject *self, PyObject *args, PyObject *kwds)
}

PyDoc_STRVAR(pickle_dumps_doc,
"dumps(obj, protocol=None, *, fix_imports=True) -> bytes\n"
"dumps(obj, protocol=None, *, fix_imports=True, bytes_as_strings=False) -> bytes\n"
"\n"
"Return the pickled representation of the object as a bytes\n"
"object, instead of writing it to a file.\n"
Expand All @@ -6098,16 +6198,24 @@ PyDoc_STRVAR(pickle_dumps_doc,
"\n"
"If fix_imports is True and *protocol* is less than 3, pickle will try to\n"
"map the new Python 3.x names to the old module names used in Python 2.x,\n"
"so that the pickle data stream is readable with Python 2.x.\n");
"so that the pickle data stream is readable with Python 2.x.\n"
"\n"
"If bytes_as_strings is True and protocol is less than 3, pickle\n"
"will store byte strings as native strings, i.e. the way Python 2.x\n"
"would've stored them. Be aware that such pickles cannot be\n"
"reliably unpickled on Python 3 if you do not use errors='bytes',\n"
"and even then they might be silently converted to Unicode objects.\n");

static PyObject *
pickle_dumps(PyObject *self, PyObject *args, PyObject *kwds)
{
static char *kwlist[] = {"obj", "protocol", "fix_imports", 0};
static char *kwlist[] = {
"obj", "protocol", "fix_imports", "bytes_as_strings", 0};
PyObject *obj;
PyObject *proto = NULL;
PyObject *result;
PyObject *fix_imports = Py_True;
PyObject *bytes_as_strings = Py_False;
PicklerObject *pickler;

/* fix_imports is a keyword-only argument. */
Expand All @@ -6118,15 +6226,16 @@ pickle_dumps(PyObject *self, PyObject *args, PyObject *kwds)
return NULL;
}

if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:dumps", kwlist,
&obj, &proto, &fix_imports))
if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OOO:dumps", kwlist,
&obj, &proto, &fix_imports,
&bytes_as_strings))
return NULL;

pickler = _Pickler_New();
if (pickler == NULL)
return NULL;

if (_Pickler_SetProtocol(pickler, proto, fix_imports) < 0)
if (_Pickler_SetProtocol(pickler, proto, fix_imports, bytes_as_strings) < 0)
goto error;

if (dump(pickler, obj) < 0)
Expand Down
19 changes: 19 additions & 0 deletions src/zodbpickle/tests/pickletester.py
Expand Up @@ -1332,6 +1332,25 @@ def test_dump_dumps(self):
p = pickle.dumps(b'\x00', protocol=0, bytes_as_strings=True)
self.assertEqual(p, b"S'\\x00'\n.")

def test_save_bytes_roundtrip(self):
for proto in protocols:
for ch in range(256):
data = bytes([ch])
pickled = self.dumps(data, proto, bytes_as_strings=True)
unpickled = self.loads(pickled, encoding='bytes')
self.assertEqual(unpickled, data)

def test_dump_dumps(self):
# see that the bytes_as_string kwarg is accepted everywhere
# (the other tests go throught the Pickler API)
# XXX: this is not very abstract; the only way to test the Python
# implementation of dump and dumps is to remove the C module
f = io.BytesIO()
pickle.dump(b'\x00', f, protocol=0, bytes_as_strings=True)
self.assertEqual(f.getvalue(), b"S'\\x00'\n.")
p = pickle.dumps(b'\x00', protocol=0, bytes_as_strings=True)
self.assertEqual(p, b"S'\\x00'\n.")


class BigmemPickleTests(unittest.TestCase):

Expand Down
3 changes: 1 addition & 2 deletions tox.ini
@@ -1,6 +1,5 @@
[tox]
envlist = py33
##envlist = py32,py33
envlist = py32,py33

[testenv]
deps =
Expand Down

0 comments on commit 073bb89

Please sign in to comment.