zarr-developers · jakirkham · Apr 1, 2020 · Mar 26, 2020 · Mar 26, 2020 · Mar 26, 2020
diff --git a/docs/release.rst b/docs/release.rst
@@ -4,6 +4,9 @@ Release notes
 Upcoming Release
 ----------------
 
+* Remove LegacyMsgPack codec.
+  By :user:`James Bourbeau  <jrbourbeau>`, :issue:`218`.
+
 * Drop support for Python 2.
   By :user:`James Bourbeau <jrbourbeau>`, :issue:`220`.
 

diff --git a/fixture/msgpack/array.00.npy b/fixture/msgpack/array.00.npy
diff --git a/fixture/msgpack/array.01.npy b/fixture/msgpack/array.01.npy
diff --git a/fixture/msgpack/array.02.npy b/fixture/msgpack/array.02.npy
diff --git a/fixture/msgpack/array.03.npy b/fixture/msgpack/array.03.npy
diff --git a/fixture/msgpack/array.04.npy b/fixture/msgpack/array.04.npy
diff --git a/fixture/msgpack/array.05.npy b/fixture/msgpack/array.05.npy
diff --git a/fixture/msgpack/array.06.npy b/fixture/msgpack/array.06.npy
diff --git a/fixture/msgpack/array.07.npy b/fixture/msgpack/array.07.npy
diff --git a/fixture/msgpack/codec.00/config.json b/fixture/msgpack/codec.00/config.json
diff --git a/fixture/msgpack/codec.00/encoded.00.dat b/fixture/msgpack/codec.00/encoded.00.dat
diff --git a/fixture/msgpack/codec.00/encoded.01.dat b/fixture/msgpack/codec.00/encoded.01.dat
diff --git a/fixture/msgpack/codec.00/encoded.02.dat b/fixture/msgpack/codec.00/encoded.02.dat
diff --git a/fixture/msgpack/codec.00/encoded.03.dat b/fixture/msgpack/codec.00/encoded.03.dat
diff --git a/fixture/msgpack/codec.00/encoded.04.dat b/fixture/msgpack/codec.00/encoded.04.dat
diff --git a/fixture/msgpack/codec.00/encoded.05.dat b/fixture/msgpack/codec.00/encoded.05.dat
diff --git a/fixture/msgpack/codec.00/encoded.06.dat b/fixture/msgpack/codec.00/encoded.06.dat
diff --git a/fixture/msgpack/codec.00/encoded.07.dat b/fixture/msgpack/codec.00/encoded.07.dat
diff --git a/fixture/msgpack2/array.10.npy b/fixture/msgpack2/array.10.npy
diff --git a/fixture/msgpack2/codec.00/encoded.10.dat b/fixture/msgpack2/codec.00/encoded.10.dat
diff --git a/numcodecs/__init__.py b/numcodecs/__init__.py
@@ -90,9 +90,8 @@
 register_codec(Pickle)
 
 try:
-    from numcodecs.msgpacks import MsgPack, LegacyMsgPack
+    from numcodecs.msgpacks import MsgPack
     register_codec(MsgPack)
-    register_codec(LegacyMsgPack)
 except ImportError: # pragma: no cover
     pass
 

diff --git a/numcodecs/msgpacks.py b/numcodecs/msgpacks.py
@@ -81,44 +81,3 @@ def __repr__(self):
             'MsgPack(raw={!r}, use_bin_type={!r}, use_single_float={!r})'
             .format(self.raw, self.use_bin_type, self.use_single_float)
         )
-
-
-class LegacyMsgPack(Codec):
-    """Deprecated MsgPack codec.
-
-    .. deprecated:: 0.6.0
-        This codec is maintained to enable decoding of data previously encoded, however
-        there may be issues with encoding and correctly decoding certain object arrays,
-        hence the :class:`MsgPack` codec should be used instead for encoding new data.
-        See https://github.com/zarr-developers/numcodecs/issues/76 and
-        https://github.com/zarr-developers/numcodecs/pull/77 for more information.
-
-    """
-
-    codec_id = 'msgpack'
-
-    def __init__(self, encoding='utf-8'):
-        self.encoding = encoding
-
-    def encode(self, buf):
-        buf = np.asarray(buf)
-        items = buf.tolist()
-        items.append(buf.dtype.str)
-        return msgpack.packb(items, encoding=self.encoding)
-
-    def decode(self, buf, out=None):
-        buf = ensure_contiguous_ndarray(buf)
-        items = msgpack.unpackb(buf, encoding=self.encoding)
-        dec = np.array(items[:-1], dtype=items[-1])
-        if out is not None:
-            np.copyto(out, dec)
-            return out
-        else:
-            return dec
-
-    def get_config(self):
-        return dict(id=self.codec_id,
-                    encoding=self.encoding)
-
-    def __repr__(self):
-        return 'LegacyMsgPack(encoding=%r)' % self.encoding
diff --git a/numcodecs/tests/test_msgpacks.py b/numcodecs/tests/test_msgpacks.py
@@ -1,17 +1,11 @@
 import unittest
-import warnings
 
 
 import numpy as np
 
 
 try:
-    from numcodecs.msgpacks import LegacyMsgPack, MsgPack
-    default_codec = MsgPack()
-    # N.B., legacy codec is broken, see tests below. Also legacy code generates
-    # PendingDeprecationWarning due to use of encoding argument, which we ignore here
-    # as not relevant.
-    legacy_codec = LegacyMsgPack()
+    from numcodecs.msgpacks import MsgPack
 except ImportError:  # pragma: no cover
     raise unittest.SkipTest("msgpack not available")
 
@@ -35,44 +29,31 @@
     np.array(greetings * 100, dtype=object),
     np.array([b'foo', b'bar', b'baz'] * 300, dtype=object),
     np.array([g.encode('utf-8') for g in greetings] * 100, dtype=object),
+    np.array([[0, 1], [2, 3]], dtype=object),
 ]
 
 
-legacy_arrays = arrays[:8]
-
-
 def test_encode_decode():
-
     for arr in arrays:
-        check_encode_decode_array(arr, default_codec)
-
-    with warnings.catch_warnings():
-        warnings.simplefilter('ignore', PendingDeprecationWarning)
-        for arr in legacy_arrays:
-            check_encode_decode_array(arr, legacy_codec)
+        check_encode_decode_array(arr, MsgPack())
 
 
 def test_config():
-    for codec in [default_codec, legacy_codec]:
-        check_config(codec)
+    check_config(MsgPack())
 
 
 def test_repr():
     check_repr("MsgPack(raw=False, use_bin_type=True, use_single_float=False)")
     check_repr("MsgPack(raw=True, use_bin_type=False, use_single_float=True)")
-    check_repr("LegacyMsgPack(encoding='utf-8')")
-    check_repr("LegacyMsgPack(encoding='ascii')")
 
 
 def test_backwards_compatibility():
-    check_backwards_compatibility(default_codec.codec_id, arrays, [default_codec])
-    with warnings.catch_warnings():
-        warnings.simplefilter('ignore', PendingDeprecationWarning)
-        check_backwards_compatibility(legacy_codec.codec_id, legacy_arrays,
-                                      [legacy_codec])
+    codec = MsgPack()
+    check_backwards_compatibility(codec.codec_id, arrays, [codec])
 
 
 def test_non_numpy_inputs():
+    codec = MsgPack()
     # numpy will infer a range of different shapes and dtypes for these inputs.
     # Make sure that round-tripping through encode preserves this.
     data = [
@@ -91,38 +72,16 @@ def test_non_numpy_inputs():
         [{b"key": b"value"}, [b"list", b"of", b"strings"]],
     ]
     for input_data in data:
-        actual = default_codec.decode(default_codec.encode(input_data))
+        actual = codec.decode(codec.encode(input_data))
         expect = np.array(input_data)
         assert expect.shape == actual.shape
         assert np.array_equal(expect, actual)
 
 
-def test_legacy_codec_broken():
-    # Simplest demonstration of why the MsgPack codec needed to be changed.
-    # The LegacyMsgPack codec didn't include shape information in the serialised
-    # bytes, which gave different shapes in the input and output under certain
-    # circumstances.
-    a = np.empty(2, dtype=object)
-    a[0] = [0, 1]
-    a[1] = [2, 3]
-    codec = LegacyMsgPack()
-    with warnings.catch_warnings():
-        warnings.simplefilter('ignore', PendingDeprecationWarning)
-        b = codec.decode(codec.encode(a))
-    assert a.shape == (2,)
-    assert b.shape == (2, 2)
-    assert not np.array_equal(a, b)
-
-    # Now show that the MsgPack codec handles this case properly.
-    codec = MsgPack()
-    b = codec.decode(codec.encode(a))
-    assert np.array_equal(a, b)
-    assert a.shape == b.shape
-
-
 def test_encode_decode_shape_dtype_preserved():
+    codec = MsgPack()
     for arr in arrays:
-        actual = default_codec.decode(default_codec.encode(arr))
+        actual = codec.decode(codec.encode(arr))
         assert arr.shape == actual.shape
         assert arr.dtype == actual.dtype
 
@@ -157,18 +116,3 @@ def test_bytes():
     assert not np.array_equal(unicode_arr, b)
     assert isinstance(b[0], bytes)
     assert b[0] == b'foo'
-
-    # legacy codec
-    codec = LegacyMsgPack()
-    with warnings.catch_warnings():
-        warnings.simplefilter('ignore', PendingDeprecationWarning)
-        # broken for bytes array, round-trips bytes to unicode
-        b = codec.decode(codec.encode(bytes_arr))
-        assert not np.array_equal(bytes_arr, b)
-        assert isinstance(b[0], str)
-        assert b[0] == 'foo'
-        # works for unicode array, round-trips unicode to unicode
-        b = codec.decode(codec.encode(unicode_arr))
-        assert np.array_equal(unicode_arr, b)
-        assert isinstance(b[0], str)
-        assert b[0] == 'foo'