Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove LegacyMsgPack codec #218

Merged
merged 5 commits into from Apr 1, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 3 additions & 0 deletions docs/release.rst
Expand Up @@ -4,6 +4,9 @@ Release notes
Upcoming Release
----------------

* Remove LegacyMsgPack codec.
By :user:`James Bourbeau <jrbourbeau>`, :issue:`218`.

* Drop support for Python 2.
By :user:`James Bourbeau <jrbourbeau>`, :issue:`220`.

Expand Down
Binary file removed fixture/msgpack/array.00.npy
Binary file not shown.
Binary file removed fixture/msgpack/array.01.npy
Binary file not shown.
Binary file removed fixture/msgpack/array.02.npy
Binary file not shown.
Binary file removed fixture/msgpack/array.03.npy
Binary file not shown.
Binary file removed fixture/msgpack/array.04.npy
Binary file not shown.
Binary file removed fixture/msgpack/array.05.npy
Binary file not shown.
Binary file removed fixture/msgpack/array.06.npy
Binary file not shown.
Binary file removed fixture/msgpack/array.07.npy
Binary file not shown.
4 changes: 0 additions & 4 deletions fixture/msgpack/codec.00/config.json

This file was deleted.

1 change: 0 additions & 1 deletion fixture/msgpack/codec.00/encoded.00.dat

This file was deleted.

Binary file removed fixture/msgpack/codec.00/encoded.01.dat
Binary file not shown.
Binary file removed fixture/msgpack/codec.00/encoded.02.dat
Binary file not shown.
Binary file removed fixture/msgpack/codec.00/encoded.03.dat
Binary file not shown.
1 change: 0 additions & 1 deletion fixture/msgpack/codec.00/encoded.04.dat

This file was deleted.

Binary file removed fixture/msgpack/codec.00/encoded.05.dat
Binary file not shown.
Binary file removed fixture/msgpack/codec.00/encoded.06.dat
Binary file not shown.
Binary file removed fixture/msgpack/codec.00/encoded.07.dat
Binary file not shown.
Binary file added fixture/msgpack2/array.10.npy
Binary file not shown.
Binary file added fixture/msgpack2/codec.00/encoded.10.dat
Binary file not shown.
3 changes: 1 addition & 2 deletions numcodecs/__init__.py
Expand Up @@ -90,9 +90,8 @@
register_codec(Pickle)

try:
from numcodecs.msgpacks import MsgPack, LegacyMsgPack
from numcodecs.msgpacks import MsgPack
register_codec(MsgPack)
register_codec(LegacyMsgPack)
except ImportError: # pragma: no cover
pass

Expand Down
41 changes: 0 additions & 41 deletions numcodecs/msgpacks.py
Expand Up @@ -81,44 +81,3 @@ def __repr__(self):
'MsgPack(raw={!r}, use_bin_type={!r}, use_single_float={!r})'
.format(self.raw, self.use_bin_type, self.use_single_float)
)


class LegacyMsgPack(Codec):
"""Deprecated MsgPack codec.

.. deprecated:: 0.6.0
This codec is maintained to enable decoding of data previously encoded, however
there may be issues with encoding and correctly decoding certain object arrays,
hence the :class:`MsgPack` codec should be used instead for encoding new data.
See https://github.com/zarr-developers/numcodecs/issues/76 and
https://github.com/zarr-developers/numcodecs/pull/77 for more information.

"""

codec_id = 'msgpack'

def __init__(self, encoding='utf-8'):
self.encoding = encoding

def encode(self, buf):
buf = np.asarray(buf)
items = buf.tolist()
items.append(buf.dtype.str)
return msgpack.packb(items, encoding=self.encoding)

def decode(self, buf, out=None):
buf = ensure_contiguous_ndarray(buf)
items = msgpack.unpackb(buf, encoding=self.encoding)
dec = np.array(items[:-1], dtype=items[-1])
if out is not None:
np.copyto(out, dec)
return out
else:
return dec

def get_config(self):
return dict(id=self.codec_id,
encoding=self.encoding)

def __repr__(self):
return 'LegacyMsgPack(encoding=%r)' % self.encoding
76 changes: 10 additions & 66 deletions numcodecs/tests/test_msgpacks.py
@@ -1,17 +1,11 @@
import unittest
import warnings


import numpy as np


try:
from numcodecs.msgpacks import LegacyMsgPack, MsgPack
default_codec = MsgPack()
# N.B., legacy codec is broken, see tests below. Also legacy code generates
# PendingDeprecationWarning due to use of encoding argument, which we ignore here
# as not relevant.
legacy_codec = LegacyMsgPack()
from numcodecs.msgpacks import MsgPack
except ImportError: # pragma: no cover
raise unittest.SkipTest("msgpack not available")

Expand All @@ -35,44 +29,31 @@
np.array(greetings * 100, dtype=object),
np.array([b'foo', b'bar', b'baz'] * 300, dtype=object),
np.array([g.encode('utf-8') for g in greetings] * 100, dtype=object),
np.array([[0, 1], [2, 3]], dtype=object),
]


legacy_arrays = arrays[:8]


def test_encode_decode():

for arr in arrays:
check_encode_decode_array(arr, default_codec)

with warnings.catch_warnings():
warnings.simplefilter('ignore', PendingDeprecationWarning)
for arr in legacy_arrays:
check_encode_decode_array(arr, legacy_codec)
check_encode_decode_array(arr, MsgPack())


def test_config():
for codec in [default_codec, legacy_codec]:
check_config(codec)
check_config(MsgPack())


def test_repr():
check_repr("MsgPack(raw=False, use_bin_type=True, use_single_float=False)")
check_repr("MsgPack(raw=True, use_bin_type=False, use_single_float=True)")
check_repr("LegacyMsgPack(encoding='utf-8')")
check_repr("LegacyMsgPack(encoding='ascii')")


def test_backwards_compatibility():
check_backwards_compatibility(default_codec.codec_id, arrays, [default_codec])
with warnings.catch_warnings():
warnings.simplefilter('ignore', PendingDeprecationWarning)
check_backwards_compatibility(legacy_codec.codec_id, legacy_arrays,
[legacy_codec])
codec = MsgPack()
check_backwards_compatibility(codec.codec_id, arrays, [codec])


def test_non_numpy_inputs():
codec = MsgPack()
# numpy will infer a range of different shapes and dtypes for these inputs.
# Make sure that round-tripping through encode preserves this.
data = [
Expand All @@ -91,38 +72,16 @@ def test_non_numpy_inputs():
[{b"key": b"value"}, [b"list", b"of", b"strings"]],
]
for input_data in data:
actual = default_codec.decode(default_codec.encode(input_data))
actual = codec.decode(codec.encode(input_data))
expect = np.array(input_data)
assert expect.shape == actual.shape
assert np.array_equal(expect, actual)


def test_legacy_codec_broken():
# Simplest demonstration of why the MsgPack codec needed to be changed.
# The LegacyMsgPack codec didn't include shape information in the serialised
# bytes, which gave different shapes in the input and output under certain
# circumstances.
a = np.empty(2, dtype=object)
a[0] = [0, 1]
a[1] = [2, 3]
codec = LegacyMsgPack()
with warnings.catch_warnings():
warnings.simplefilter('ignore', PendingDeprecationWarning)
b = codec.decode(codec.encode(a))
assert a.shape == (2,)
assert b.shape == (2, 2)
assert not np.array_equal(a, b)

# Now show that the MsgPack codec handles this case properly.
codec = MsgPack()
b = codec.decode(codec.encode(a))
assert np.array_equal(a, b)
assert a.shape == b.shape


def test_encode_decode_shape_dtype_preserved():
codec = MsgPack()
for arr in arrays:
actual = default_codec.decode(default_codec.encode(arr))
actual = codec.decode(codec.encode(arr))
assert arr.shape == actual.shape
assert arr.dtype == actual.dtype

Expand Down Expand Up @@ -157,18 +116,3 @@ def test_bytes():
assert not np.array_equal(unicode_arr, b)
assert isinstance(b[0], bytes)
assert b[0] == b'foo'

# legacy codec
codec = LegacyMsgPack()
with warnings.catch_warnings():
warnings.simplefilter('ignore', PendingDeprecationWarning)
# broken for bytes array, round-trips bytes to unicode
b = codec.decode(codec.encode(bytes_arr))
assert not np.array_equal(bytes_arr, b)
assert isinstance(b[0], str)
assert b[0] == 'foo'
# works for unicode array, round-trips unicode to unicode
b = codec.decode(codec.encode(unicode_arr))
assert np.array_equal(unicode_arr, b)
assert isinstance(b[0], str)
assert b[0] == 'foo'