From da31f3e1dcec561239d7339ed852141e4791cf88 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Sun, 2 Dec 2018 16:12:29 -0500 Subject: [PATCH 1/2] Use (new) buffer protocol in `Pickle.decode` On Python 3, `pickle.loads` is able to take anything that conforms to the (new) buffer protocol. So there has been no issue giving it an `ndarray` to work with. Unfortunately, on Python 2, `pickle.loads` requires a `bytes` object specifically and is not able to take any type implementing the (new) buffer protocol. So we have been going ahead and coercing everything to `bytes` on Python 2. However it turns out that `cStringIO`'s `StringIO` on Python 2 does support the buffer protocol. Thus a `StringIO` object can be created on Python 2 without copying the data. While this still cannot be used with `pickle.loads`, it can be used with `pickle.load`, which special cases reading from `StringIO` leveraging the read function, which amounts to sharing a pointer between `StringIO` and `pickle.loads`. Thus achieving a no copying unpickler for Python 2. ref: http://www.hydrogen18.com/blog/unpickling-buffers.html ref: https://github.com/python/cpython/blob/2.7/Modules/cStringIO.c#L716 ref: https://github.com/python/cpython/blob/2.7/Modules/cStringIO.c#L681 ref: https://github.com/python/cpython/blob/2.7/Modules/cPickle.c#L614 ref: https://github.com/python/cpython/blob/2.7/Modules/cStringIO.c#L160 --- numcodecs/pickles.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/numcodecs/pickles.py b/numcodecs/pickles.py index 7e1c26f7..9c22b9cb 100644 --- a/numcodecs/pickles.py +++ b/numcodecs/pickles.py @@ -6,11 +6,12 @@ from .abc import Codec -from .compat import PY2, ensure_bytes, ensure_contiguous_ndarray +from .compat import PY2, ensure_contiguous_ndarray if PY2: # pragma: py3 no cover import cPickle as pickle + from cStringIO import StringIO else: # pragma: py2 no cover import pickle @@ -48,12 +49,13 @@ def encode(self, buf): return pickle.dumps(buf, protocol=self.protocol) def decode(self, buf, out=None): + buf = ensure_contiguous_ndarray(buf) + if PY2: # pragma: py3 no cover - buf = ensure_bytes(buf) + dec = pickle.load(StringIO(buf)) else: # pragma: py2 no cover - buf = ensure_contiguous_ndarray(buf) + dec = pickle.loads(buf) - dec = pickle.loads(buf) if out is not None: np.copyto(out, dec) return out From 3e003bd87324e72a55de0444603d3d52e1ba15eb Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Sun, 2 Dec 2018 16:15:24 -0500 Subject: [PATCH 2/2] Link this PR to `Pickle.decode` release note --- docs/release.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/release.rst b/docs/release.rst index 762a7523..b349215d 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -7,7 +7,7 @@ Release notes ----- * Handle (new) buffer protocol conforming types in ``Pickle.decode``. - By :user:`John Kirkham `, :issue:`143`. + By :user:`John Kirkham `, :issue:`143`, :issue:`150`. * Fix other ``VLen*`` encode() methods to return numpy arrays as well. By :user:`John Kirkham `, :issue:`144`.