From 590ace62de56317d3f1d65aa311eead7bb1d3d58 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Tue, 11 Jan 2022 10:43:33 +0100 Subject: [PATCH 01/29] Implement CuPyCPUCompressor and the meta_array argument This is base of Co-authored-by: John Kirkham --- zarr/core.py | 55 +++++++++++++----- zarr/creation.py | 13 ++++- zarr/cupy.py | 52 +++++++++++++++++ zarr/hierarchy.py | 2 +- zarr/storage.py | 6 +- zarr/tests/test_core.py | 4 +- zarr/tests/test_cupy.py | 122 ++++++++++++++++++++++++++++++++++++++++ zarr/util.py | 111 +++++++++++++++++++++++++++++++++++- 8 files changed, 341 insertions(+), 24 deletions(-) create mode 100644 zarr/cupy.py create mode 100644 zarr/tests/test_cupy.py diff --git a/zarr/core.py b/zarr/core.py index 6f6b468e3..a04c84351 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -7,7 +7,7 @@ from functools import reduce import numpy as np -from numcodecs.compat import ensure_bytes, ensure_ndarray +from numcodecs.compat import ensure_bytes from collections.abc import MutableMapping @@ -44,6 +44,7 @@ normalize_shape, normalize_storage_path, PartialReadBuffer, + ensure_ndarray ) @@ -91,6 +92,12 @@ class Array: .. versionadded:: 2.11 + meta_array : array-like, optional + An array instance to use for determining arrays to create and return + to users. Use `numpy.empty(())` by default. + + .. versionadded:: 2.12 + Attributes ---------- @@ -122,6 +129,7 @@ class Array: vindex oindex write_empty_chunks + meta_array Methods ------- @@ -155,6 +163,7 @@ def __init__( cache_attrs=True, partial_decompress=False, write_empty_chunks=True, + meta_array=None, ): # N.B., expect at this point store is fully initialized with all # configuration metadata fully specified and normalized @@ -175,6 +184,11 @@ def __init__( self._is_view = False self._partial_decompress = partial_decompress self._write_empty_chunks = write_empty_chunks + self._meta_array = meta_array + if meta_array is not None: + self._meta_array = np.empty_like(meta_array) + else: + self._meta_array = np.empty(()) # initialize metadata self._load_metadata() @@ -487,6 +501,13 @@ def write_empty_chunks(self) -> bool: """ return self._write_empty_chunks + @property + def meta_array(self): + """An array-like instance to use for determining arrays to create and return + to users. + """ + return self._meta_array + def __eq__(self, other): return ( isinstance(other, Array) and @@ -861,7 +882,7 @@ def _get_basic_selection_zd(self, selection, out=None, fields=None): except KeyError: # chunk not initialized - chunk = np.zeros((), dtype=self._dtype) + chunk = np.zeros_like(self._meta_array, shape=(), dtype=self._dtype) if self._fill_value is not None: chunk.fill(self._fill_value) @@ -1165,7 +1186,8 @@ def _get_selection(self, indexer, out=None, fields=None): # setup output array if out is None: - out = np.empty(out_shape, dtype=out_dtype, order=self._order) + out = np.empty_like(self._meta_array, shape=out_shape, + dtype=out_dtype, order=self._order) else: check_array_shape('out', out, out_shape) @@ -1539,9 +1561,13 @@ def set_coordinate_selection(self, selection, value, fields=None): # setup indexer indexer = CoordinateIndexer(selection, self) - # handle value - need to flatten + # handle value - need ndarray-like flatten value if not is_scalar(value, self._dtype): - value = np.asanyarray(value) + try: + value = ensure_ndarray(value) + except TypeError: + # Handle types like `list` or `tuple` + value = np.array(value, like=self._meta_array) if hasattr(value, 'shape') and len(value.shape) > 1: value = value.reshape(-1) @@ -1644,7 +1670,7 @@ def _set_basic_selection_zd(self, selection, value, fields=None): except KeyError: # chunk not initialized - chunk = np.zeros((), dtype=self._dtype) + chunk = np.zeros_like(self._meta_array, shape=(), dtype=self._dtype) if self._fill_value is not None: chunk.fill(self._fill_value) @@ -1704,7 +1730,7 @@ def _set_selection(self, indexer, value, fields=None): pass else: if not hasattr(value, 'shape'): - value = np.asanyarray(value) + value = np.array(value, like=self._meta_array) check_array_shape('value', value, sel_shape) # iterate over chunks in range @@ -1772,8 +1798,9 @@ def _process_chunk( self._dtype != object): dest = out[out_selection] + dest_is_writable = getattr(dest, "writeable", True) write_direct = ( - dest.flags.writeable and + dest_is_writable and ( (self._order == 'C' and dest.flags.c_contiguous) or (self._order == 'F' and dest.flags.f_contiguous) @@ -1800,7 +1827,7 @@ def _process_chunk( if partial_read_decode: cdata.prepare_chunk() # size of chunk - tmp = np.empty(self._chunks, dtype=self.dtype) + tmp = np.empty(self._chunks, dtype=self.dtype, like=self._meta_array) index_selection = PartialChunkIterator(chunk_selection, self.chunks) for start, nitems, partial_out_selection in index_selection: expected_shape = [ @@ -2014,7 +2041,7 @@ def _process_for_setitem(self, ckey, chunk_selection, value, fields=None): if is_scalar(value, self._dtype): # setup array filled with value - chunk = np.empty(self._chunks, dtype=self._dtype, order=self._order) + chunk = np.empty(self._chunks, dtype=self._dtype, order=self._order, like=self._meta_array) chunk.fill(value) else: @@ -2034,14 +2061,14 @@ def _process_for_setitem(self, ckey, chunk_selection, value, fields=None): # chunk not initialized if self._fill_value is not None: - chunk = np.empty(self._chunks, dtype=self._dtype, order=self._order) + chunk = np.empty(self._chunks, dtype=self._dtype, order=self._order, like=self._meta_array) chunk.fill(self._fill_value) elif self._dtype == object: chunk = np.empty(self._chunks, dtype=self._dtype, order=self._order) else: # N.B., use zeros here so any region beyond the array has consistent # and compressible data - chunk = np.zeros(self._chunks, dtype=self._dtype, order=self._order) + chunk = np.zeros(self._chunks, dtype=self._dtype, order=self._order, like=self._meta_array) else: @@ -2371,7 +2398,7 @@ def append(self, data, axis=0): Parameters ---------- - data : array_like + data : array-like Data to be appended. axis : int Axis along which to append. @@ -2407,7 +2434,7 @@ def _append_nosync(self, data, axis=0): # ensure data is array-like if not hasattr(data, 'shape'): - data = np.asanyarray(data) + data = np.array(data, like=self._meta_array) # ensure shapes are compatible for non-append dimensions self_shape_preserved = tuple(s for i, s in enumerate(self._shape) diff --git a/zarr/creation.py b/zarr/creation.py index d0dad231c..d36849be5 100644 --- a/zarr/creation.py +++ b/zarr/creation.py @@ -19,7 +19,9 @@ def create(shape, chunks=True, dtype=None, compressor='default', fill_value=0, order='C', store=None, synchronizer=None, overwrite=False, path=None, chunk_store=None, filters=None, cache_metadata=True, cache_attrs=True, read_only=False, - object_codec=None, dimension_separator=None, write_empty_chunks=True, **kwargs): + object_codec=None, dimension_separator=None, write_empty_chunks=True, + meta_array=None, + **kwargs): """Create an array. Parameters @@ -80,6 +82,13 @@ def create(shape, chunks=True, dtype=None, compressor='default', as only chunks with non-fill-value data are stored, at the expense of overhead associated with checking the data of each chunk. + .. versionadded:: 2.11 + + meta_array : array-like, optional + An array instance to use for determining arrays to create and return + to users. Use `numpy.empty(())` by default. + + .. versionadded:: 2.12 Returns ------- @@ -152,7 +161,7 @@ def create(shape, chunks=True, dtype=None, compressor='default', # instantiate array z = Array(store, path=path, chunk_store=chunk_store, synchronizer=synchronizer, cache_metadata=cache_metadata, cache_attrs=cache_attrs, read_only=read_only, - write_empty_chunks=write_empty_chunks) + write_empty_chunks=write_empty_chunks, meta_array=meta_array) return z diff --git a/zarr/cupy.py b/zarr/cupy.py new file mode 100644 index 000000000..ad706d922 --- /dev/null +++ b/zarr/cupy.py @@ -0,0 +1,52 @@ +from numcodecs.abc import Codec +from numcodecs.registry import get_codec, register_codec + +from .util import ensure_contiguous_ndarray + + +class CuPyCPUCompressor(Codec): + codec_id = "cupy_cpu_compressor" + + def __init__(self, cpu_compressor: Codec = None): + self.cpu_compressor = cpu_compressor + + def encode(self, buf): + import cupy + + buf = cupy.asnumpy(ensure_contiguous_ndarray(buf)) + if self.cpu_compressor: + buf = self.cpu_compressor.encode(buf) + return buf + + def decode(self, chunk, out=None): + import cupy + + if out is not None: + cpu_out = cupy.asnumpy(out) + else: + cpu_out = None + if self.cpu_compressor: + chunk = self.cpu_compressor.decode(chunk, cpu_out) + if out is None: + cpu_out = chunk + chunk = cupy.asarray(ensure_contiguous_ndarray(chunk)) + if out is not None: + cupy.copyto(out, chunk.view(dtype=out.dtype), casting="no") + chunk = out + return chunk + + def get_config(self): + cc_config = self.cpu_compressor.get_config() if self.cpu_compressor else None + return { + "id": self.codec_id, + "cpu_compressor_config": cc_config, + } + + @classmethod + def from_config(cls, config): + cc_config = config.get("cpu_compressor_config", None) + cpu_compressor = get_codec(cc_config) if cc_config else None + return cls(cpu_compressor=cpu_compressor) + + +register_codec(CuPyCPUCompressor) diff --git a/zarr/hierarchy.py b/zarr/hierarchy.py index 763a5f163..716730e22 100644 --- a/zarr/hierarchy.py +++ b/zarr/hierarchy.py @@ -768,7 +768,7 @@ def create_dataset(self, name, **kwargs): ---------- name : string Array name. - data : array_like, optional + data : array-like, optional Initial data. shape : int or tuple of ints Array shape. diff --git a/zarr/storage.py b/zarr/storage.py index 7f572d35f..3ff15304b 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -37,8 +37,7 @@ from numcodecs.compat import ( ensure_bytes, - ensure_text, - ensure_contiguous_ndarray + ensure_text ) from numcodecs.registry import codec_registry @@ -54,7 +53,8 @@ from zarr.util import (buffer_size, json_loads, nolock, normalize_chunks, normalize_dimension_separator, normalize_dtype, normalize_fill_value, normalize_order, - normalize_shape, normalize_storage_path, retry_call) + normalize_shape, normalize_storage_path, retry_call, + ensure_contiguous_ndarray) from zarr._storage.absstore import ABSStore # noqa: F401 from zarr._storage.store import (_listdir_from_keys, diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index 742313288..5f6ae0f59 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -12,7 +12,7 @@ from numcodecs import (BZ2, JSON, LZ4, Blosc, Categorize, Delta, FixedScaleOffset, GZip, MsgPack, Pickle, VLenArray, VLenBytes, VLenUTF8, Zlib) -from numcodecs.compat import ensure_bytes, ensure_ndarray +from numcodecs.compat import ensure_bytes from numcodecs.tests.common import greetings from numpy.testing import assert_array_almost_equal, assert_array_equal from pkg_resources import parse_version @@ -35,7 +35,7 @@ init_array, init_group, ) -from zarr.util import buffer_size +from zarr.util import buffer_size, ensure_ndarray from zarr.tests.util import abs_container, skip_test_env_var, have_fsspec # noinspection PyMethodMayBeStatic diff --git a/zarr/tests/test_cupy.py b/zarr/tests/test_cupy.py new file mode 100644 index 000000000..0e787c7e1 --- /dev/null +++ b/zarr/tests/test_cupy.py @@ -0,0 +1,122 @@ +import numpy as np +import pytest + +import zarr.codecs +from zarr.creation import array, empty, full, ones, zeros +from zarr.cupy import CuPyCPUCompressor +from zarr.storage import DirectoryStore, MemoryStore, Store, ZipStore + +cupy = pytest.importorskip("cupy") + + +def init_compressor(compressor) -> CuPyCPUCompressor: + if compressor: + compressor = getattr(zarr.codecs, compressor)() + return CuPyCPUCompressor(compressor) + + +def init_store(tmp_path, store_type) -> Store: + if store_type is DirectoryStore: + return store_type(str(tmp_path / "store")) + if store_type is MemoryStore: + return MemoryStore() + return None + + +@pytest.mark.parametrize("compressor", [None, "Zlib", "Blosc"]) +@pytest.mark.parametrize("store_type", [None, DirectoryStore, MemoryStore, ZipStore]) +def test_array(tmp_path, compressor, store_type): + compressor = init_compressor(compressor) + + # with cupy array + store = init_store(tmp_path / "from_cupy_array", store_type) + a = cupy.arange(100) + z = array( + a, chunks=10, compressor=compressor, store=store, meta_array=cupy.empty(()) + ) + assert a.shape == z.shape + assert a.dtype == z.dtype + assert isinstance(a, type(z[:])) + cupy.testing.assert_array_equal(a, z[:]) + + # with array-like + store = init_store(tmp_path / "from_list", store_type) + a = list(range(100)) + z = array( + a, chunks=10, compressor=compressor, store=store, meta_array=cupy.empty(()) + ) + assert (100,) == z.shape + assert np.asarray(a).dtype == z.dtype + cupy.testing.assert_array_equal(a, z[:]) + + # with another zarr array + store = init_store(tmp_path / "from_another_store", store_type) + z2 = array(z, compressor=compressor, store=store, meta_array=cupy.empty(())) + assert z.shape == z2.shape + assert z.chunks == z2.chunks + assert z.dtype == z2.dtype + cupy.testing.assert_array_equal(z[:], z2[:]) + + +@pytest.mark.parametrize("compressor", [None, "Zlib", "Blosc"]) +def test_empty(compressor): + z = empty( + 100, + chunks=10, + compressor=init_compressor(compressor), + meta_array=cupy.empty(()), + ) + assert (100,) == z.shape + assert (10,) == z.chunks + + +@pytest.mark.parametrize("compressor", [None, "Zlib", "Blosc"]) +def test_zeros(compressor): + z = zeros( + 100, + chunks=10, + compressor=init_compressor(compressor), + meta_array=cupy.empty(()), + ) + assert (100,) == z.shape + assert (10,) == z.chunks + cupy.testing.assert_array_equal(np.zeros(100), z[:]) + + +@pytest.mark.parametrize("compressor", [None, "Zlib", "Blosc"]) +def test_ones(compressor): + z = ones( + 100, + chunks=10, + compressor=init_compressor(compressor), + meta_array=cupy.empty(()), + ) + assert (100,) == z.shape + assert (10,) == z.chunks + cupy.testing.assert_array_equal(np.ones(100), z[:]) + + +@pytest.mark.parametrize("compressor", [None, "Zlib", "Blosc"]) +def test_full(compressor): + z = full( + 100, + chunks=10, + fill_value=42, + dtype="i4", + compressor=init_compressor(compressor), + meta_array=cupy.empty(()), + ) + assert (100,) == z.shape + assert (10,) == z.chunks + cupy.testing.assert_array_equal(np.full(100, fill_value=42, dtype="i4"), z[:]) + + # nan + z = full( + 100, + chunks=10, + fill_value=np.nan, + dtype="f8", + compressor=init_compressor(compressor), + meta_array=cupy.empty(()), + ) + assert np.all(np.isnan(z[:])) diff --git a/zarr/util.py b/zarr/util.py index 04d350a68..65e2e3646 100644 --- a/zarr/util.py +++ b/zarr/util.py @@ -1,3 +1,4 @@ +import array import inspect import json import math @@ -10,13 +11,119 @@ from asciitree import BoxStyle, LeftAligned from asciitree.traversal import Traversal from collections.abc import Iterable -from numcodecs.compat import ensure_ndarray, ensure_text -from numcodecs.registry import codec_registry +from numcodecs.abc import Codec +from numcodecs.compat import ensure_text +from numcodecs.registry import codec_registry, get_codec, register_codec from numcodecs.blosc import cbuffer_sizes, cbuffer_metainfo from typing import Any, Callable, Dict, Optional, Tuple, Union +# TODO: move the function to numcodecs +def ensure_ndarray(buf, allow_copy=False, like=np.empty(())): + """Convenience function to coerce `buf` to numpy array like, if it is not already + numpy array like. + + Parameters + ---------- + buf : array-like or bytes-like + A numpy array or any object exporting a buffer interface. + + Returns + ------- + arr : ndarray-like + A numpy array-like, sharing memory with `buf`. + + Notes + ----- + This function will not create a copy under any circumstances if `allow_copy=False`. + """ + + if isinstance(buf, np.ndarray): + # already a numpy array + arr = buf + + elif isinstance(buf, array.array) and buf.typecode in 'cu': + # Guard condition, do not support array.array with unicode type, this is + # problematic because numpy does not support it on all platforms. Also do not + # support char as it was removed in Python 3. + raise TypeError('array.array with char or unicode type is not supported') + + else: + + # TODO: make it possible to register array-like types lazily. + try: + import cupy + if isinstance(buf, cupy.ndarray): + return buf + except ImportError: + pass + + # N.B., first take a memoryview to make sure that we subsequently create a + # numpy array from a memory buffer with no copy + mem = memoryview(buf) + + # instantiate array from memoryview, ensures no copy + arr = np.array(mem, copy=False) + + return arr + + +# TODO: move the function to numcodecs +def ensure_contiguous_ndarray(buf, max_buffer_size=None): + """Convenience function to coerce `buf` to a numpy array, if it is not already a + numpy array. Also ensures that the returned value exports fully contiguous memory, + and supports the new-style buffer interface. If the optional max_buffer_size is + provided, raise a ValueError if the number of bytes consumed by the returned + array exceeds this value. + + Parameters + ---------- + buf : array-like or bytes-like + A numpy array or any object exporting a buffer interface. + max_buffer_size : int + If specified, the largest allowable value of arr.nbytes, where arr + is the returned array. + + Returns + ------- + arr : ndarray + A numpy array, sharing memory with `buf`. + + Notes + ----- + This function will not create a copy under any circumstances, it is guaranteed to + return a view on memory exported by `buf`. + + """ + + # ensure input is a numpy array + arr = ensure_ndarray(buf) + + # check for object arrays, these are just memory pointers, actual memory holding + # item data is scattered elsewhere + if arr.dtype == object: + raise TypeError('object arrays are not supported') + + # check for datetime or timedelta ndarray, the buffer interface doesn't support those + if arr.dtype.kind in 'Mm': + arr = arr.view(np.int64) + + # check memory is contiguous, if so flatten + if arr.flags.c_contiguous or arr.flags.f_contiguous: + # can flatten without copy + arr = arr.reshape(-1, order='A') + + else: + raise ValueError('an array with contiguous memory is required') + + if max_buffer_size is not None and arr.nbytes > max_buffer_size: + msg = "Codec does not support buffers of > {} bytes".format(max_buffer_size) + raise ValueError(msg) + + return arr + + def flatten(arg: Iterable) -> Iterable: for element in arg: if isinstance(element, Iterable) and not isinstance(element, (str, bytes)): From 254bbae373ac6272453e496f16ba873deb69319c Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Tue, 11 Jan 2022 11:23:23 +0100 Subject: [PATCH 02/29] Adding meta_array to open_group() --- zarr/hierarchy.py | 31 ++++++++++++++++++++++++++++--- zarr/tests/test_cupy.py | 15 +++++++++++++++ 2 files changed, 43 insertions(+), 3 deletions(-) diff --git a/zarr/hierarchy.py b/zarr/hierarchy.py index 716730e22..8024fdf91 100644 --- a/zarr/hierarchy.py +++ b/zarr/hierarchy.py @@ -59,6 +59,12 @@ class Group(MutableMapping): synchronizer : object, optional Array synchronizer. + meta_array : array-like, optional + An array instance to use for determining arrays to create and return + to users. Use `numpy.empty(())` by default. + + .. versionadded:: 2.12 + Attributes ---------- store @@ -69,6 +75,7 @@ class Group(MutableMapping): synchronizer attrs info + meta_array Methods ------- @@ -109,7 +116,7 @@ class Group(MutableMapping): """ def __init__(self, store, path=None, read_only=False, chunk_store=None, - cache_attrs=True, synchronizer=None): + cache_attrs=True, synchronizer=None, meta_array=None): store: BaseStore = BaseStore._ensure_store(store) chunk_store: BaseStore = BaseStore._ensure_store(chunk_store) self._store = store @@ -121,6 +128,11 @@ def __init__(self, store, path=None, read_only=False, chunk_store=None, self._key_prefix = '' self._read_only = read_only self._synchronizer = synchronizer + self._meta_array = meta_array + if meta_array is not None: + self._meta_array = np.empty_like(meta_array) + else: + self._meta_array = np.empty(()) # guard conditions if contains_array(store, path=self._path): @@ -198,6 +210,13 @@ def info(self): """Return diagnostic information about the group.""" return self._info + @property + def meta_array(self): + """An array-like instance to use for determining arrays to create and return + to users. + """ + return self._meta_array + def __eq__(self, other): return ( isinstance(other, Group) and @@ -1117,7 +1136,7 @@ def group(store=None, overwrite=False, chunk_store=None, def open_group(store=None, mode='a', cache_attrs=True, synchronizer=None, path=None, - chunk_store=None, storage_options=None): + chunk_store=None, storage_options=None, meta_array=None): """Open a group using file-mode-like semantics. Parameters @@ -1142,6 +1161,11 @@ def open_group(store=None, mode='a', cache_attrs=True, synchronizer=None, path=N storage_options : dict If using an fsspec URL to create the store, these will be passed to the backend implementation. Ignored otherwise. + meta_array : array-like, optional + An array instance to use for determining arrays to create and return + to users. Use `numpy.empty(())` by default. + + .. versionadded:: 2.12 Returns ------- @@ -1202,4 +1226,5 @@ def open_group(store=None, mode='a', cache_attrs=True, synchronizer=None, path=N read_only = mode == 'r' return Group(store, read_only=read_only, cache_attrs=cache_attrs, - synchronizer=synchronizer, path=path, chunk_store=chunk_store) + synchronizer=synchronizer, path=path, chunk_store=chunk_store, + meta_array=meta_array) diff --git a/zarr/tests/test_cupy.py b/zarr/tests/test_cupy.py index 0e787c7e1..bbf0cb2af 100644 --- a/zarr/tests/test_cupy.py +++ b/zarr/tests/test_cupy.py @@ -2,8 +2,10 @@ import pytest import zarr.codecs +from zarr.core import Array from zarr.creation import array, empty, full, ones, zeros from zarr.cupy import CuPyCPUCompressor +from zarr.hierarchy import open_group from zarr.storage import DirectoryStore, MemoryStore, Store, ZipStore cupy = pytest.importorskip("cupy") @@ -120,3 +122,16 @@ def test_full(compressor): meta_array=cupy.empty(()), ) assert np.all(np.isnan(z[:])) + + +@pytest.mark.parametrize("compressor", [None, "Zlib", "Blosc"]) +@pytest.mark.parametrize("store_type", [None, DirectoryStore, MemoryStore, ZipStore]) +def test_group(tmp_path, compressor, store_type): + store = init_store(tmp_path, store_type) + g = open_group(store, meta_array=cupy.empty(())) + g.ones("data", shape=(10, 11), dtype=int, compressor=init_compressor(compressor)) + a = g["data"] + assert a.shape == (10, 11) + assert a.dtype == int + assert isinstance(a, Array) + assert (a[:] == 1).all() From 40d084e55df39c93401e09e5224778960b4024eb Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Tue, 11 Jan 2022 14:02:42 +0100 Subject: [PATCH 03/29] CuPyCPUCompressor: clean up and doc --- zarr/cupy.py | 41 ++++++++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/zarr/cupy.py b/zarr/cupy.py index ad706d922..98dafa62a 100644 --- a/zarr/cupy.py +++ b/zarr/cupy.py @@ -5,30 +5,37 @@ class CuPyCPUCompressor(Codec): + """CPU compressor for CuPy arrays + + This compressor converts CuPy arrays host memory before compressing + the arrays using `compressor`. + + Parameters + ---------- + compressor : numcodecs.abc.Codec + The codec to use for compression and decompression. + """ + codec_id = "cupy_cpu_compressor" - def __init__(self, cpu_compressor: Codec = None): - self.cpu_compressor = cpu_compressor + def __init__(self, compressor: Codec = None): + self.compressor = compressor def encode(self, buf): import cupy buf = cupy.asnumpy(ensure_contiguous_ndarray(buf)) - if self.cpu_compressor: - buf = self.cpu_compressor.encode(buf) + if self.compressor: + buf = self.compressor.encode(buf) return buf def decode(self, chunk, out=None): import cupy - if out is not None: - cpu_out = cupy.asnumpy(out) - else: - cpu_out = None - if self.cpu_compressor: - chunk = self.cpu_compressor.decode(chunk, cpu_out) - if out is None: - cpu_out = chunk + if self.compressor: + cpu_out = None if out is None else cupy.asnumpy(out) + chunk = self.compressor.decode(chunk, cpu_out) + chunk = cupy.asarray(ensure_contiguous_ndarray(chunk)) if out is not None: cupy.copyto(out, chunk.view(dtype=out.dtype), casting="no") @@ -36,17 +43,17 @@ def decode(self, chunk, out=None): return chunk def get_config(self): - cc_config = self.cpu_compressor.get_config() if self.cpu_compressor else None + cc_config = self.compressor.get_config() if self.compressor else None return { "id": self.codec_id, - "cpu_compressor_config": cc_config, + "compressor_config": cc_config, } @classmethod def from_config(cls, config): - cc_config = config.get("cpu_compressor_config", None) - cpu_compressor = get_codec(cc_config) if cc_config else None - return cls(cpu_compressor=cpu_compressor) + cc_config = config.get("compressor_config", None) + compressor = get_codec(cc_config) if cc_config else None + return cls(compressor=compressor) register_codec(CuPyCPUCompressor) From 947152aa647a7c36af8019103514507bacd33597 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Tue, 11 Jan 2022 18:41:52 +0100 Subject: [PATCH 04/29] clean up --- zarr/util.py | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/zarr/util.py b/zarr/util.py index 65e2e3646..70b5cd253 100644 --- a/zarr/util.py +++ b/zarr/util.py @@ -20,9 +20,9 @@ # TODO: move the function to numcodecs -def ensure_ndarray(buf, allow_copy=False, like=np.empty(())): - """Convenience function to coerce `buf` to numpy array like, if it is not already - numpy array like. +def ensure_ndarray(buf): + """Convenience function to coerce `buf` to numpy array-like, if it is not + already a numpy array-like. Parameters ---------- @@ -31,12 +31,13 @@ def ensure_ndarray(buf, allow_copy=False, like=np.empty(())): Returns ------- - arr : ndarray-like - A numpy array-like, sharing memory with `buf`. + arr : array-like + A numpy array-like array that shares memory with `buf`. Notes ----- - This function will not create a copy under any circumstances if `allow_copy=False`. + This function will not create a copy under any circumstances, it is guaranteed to + return a view on memory exported by `buf`. """ if isinstance(buf, np.ndarray): @@ -71,11 +72,13 @@ def ensure_ndarray(buf, allow_copy=False, like=np.empty(())): # TODO: move the function to numcodecs def ensure_contiguous_ndarray(buf, max_buffer_size=None): - """Convenience function to coerce `buf` to a numpy array, if it is not already a - numpy array. Also ensures that the returned value exports fully contiguous memory, - and supports the new-style buffer interface. If the optional max_buffer_size is - provided, raise a ValueError if the number of bytes consumed by the returned - array exceeds this value. + """Convenience function to coerce `buf` to numpy array-like, if it is not already + numpy array-like. + + Also ensures that the returned value exports fully contiguous memory, and supports + the new-style buffer interface. If the optional max_buffer_size is provided, raise + a ValueError if the number of bytes consumed by the returned array exceeds this + value. Parameters ---------- @@ -87,8 +90,8 @@ def ensure_contiguous_ndarray(buf, max_buffer_size=None): Returns ------- - arr : ndarray - A numpy array, sharing memory with `buf`. + arr : array-like + A numpy array-like array that shares memory with `buf`. Notes ----- From a60b2f415cca6f2c2c2b9b871f409e1178389c37 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Wed, 12 Jan 2022 08:33:49 +0100 Subject: [PATCH 05/29] flake8 --- zarr/core.py | 12 +++++++++--- zarr/util.py | 3 +-- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/zarr/core.py b/zarr/core.py index a04c84351..65cd63100 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -2041,7 +2041,9 @@ def _process_for_setitem(self, ckey, chunk_selection, value, fields=None): if is_scalar(value, self._dtype): # setup array filled with value - chunk = np.empty(self._chunks, dtype=self._dtype, order=self._order, like=self._meta_array) + chunk = np.empty( + self._chunks, dtype=self._dtype, order=self._order, like=self._meta_array + ) chunk.fill(value) else: @@ -2061,14 +2063,18 @@ def _process_for_setitem(self, ckey, chunk_selection, value, fields=None): # chunk not initialized if self._fill_value is not None: - chunk = np.empty(self._chunks, dtype=self._dtype, order=self._order, like=self._meta_array) + chunk = np.empty( + self._chunks, dtype=self._dtype, order=self._order, like=self._meta_array + ) chunk.fill(self._fill_value) elif self._dtype == object: chunk = np.empty(self._chunks, dtype=self._dtype, order=self._order) else: # N.B., use zeros here so any region beyond the array has consistent # and compressible data - chunk = np.zeros(self._chunks, dtype=self._dtype, order=self._order, like=self._meta_array) + chunk = np.zeros( + self._chunks, dtype=self._dtype, order=self._order, like=self._meta_array + ) else: diff --git a/zarr/util.py b/zarr/util.py index 70b5cd253..348c6d622 100644 --- a/zarr/util.py +++ b/zarr/util.py @@ -11,9 +11,8 @@ from asciitree import BoxStyle, LeftAligned from asciitree.traversal import Traversal from collections.abc import Iterable -from numcodecs.abc import Codec from numcodecs.compat import ensure_text -from numcodecs.registry import codec_registry, get_codec, register_codec +from numcodecs.registry import codec_registry from numcodecs.blosc import cbuffer_sizes, cbuffer_metainfo from typing import Any, Callable, Dict, Optional, Tuple, Union From 8da9f17b5524a15817758db09ed2dbaabe4955a8 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Wed, 12 Jan 2022 08:58:43 +0100 Subject: [PATCH 06/29] mypy --- zarr/tests/test_cupy.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/zarr/tests/test_cupy.py b/zarr/tests/test_cupy.py index bbf0cb2af..3dc999e57 100644 --- a/zarr/tests/test_cupy.py +++ b/zarr/tests/test_cupy.py @@ -1,3 +1,4 @@ +from typing import Optional import numpy as np import pytest @@ -17,7 +18,7 @@ def init_compressor(compressor) -> CuPyCPUCompressor: return CuPyCPUCompressor(compressor) -def init_store(tmp_path, store_type) -> Store: +def init_store(tmp_path, store_type) -> Optional[Store]: if store_type is DirectoryStore: return store_type(str(tmp_path / "store")) if store_type is MemoryStore: From 0bf1cf0a205b7eb1cfaad1ab9b0c42df779004d9 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Wed, 12 Jan 2022 12:06:00 +0100 Subject: [PATCH 07/29] Use KVStore when checking for in-memory data Checking against MutableMapping categories all BaseStores as in-memory stores. --- zarr/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/zarr/core.py b/zarr/core.py index 65cd63100..e1ac71a34 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -31,7 +31,7 @@ is_scalar, pop_fields, ) -from zarr.storage import array_meta_key, attrs_key, getsize, listdir, BaseStore +from zarr.storage import KVStore, array_meta_key, attrs_key, getsize, listdir, BaseStore from zarr.util import ( all_equal, InfoReporter, @@ -2153,7 +2153,7 @@ def _encode_chunk(self, chunk): cdata = chunk # ensure in-memory data is immutable and easy to compare - if isinstance(self.chunk_store, MutableMapping): + if isinstance(self.chunk_store, KVStore): cdata = ensure_bytes(cdata) return cdata From 9795f77624325696069fc9422771b8ffb475e743 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Wed, 12 Jan 2022 13:11:13 +0100 Subject: [PATCH 08/29] group: the meta_array argument is now used for new arrays --- zarr/hierarchy.py | 10 ++++++---- zarr/tests/test_cupy.py | 1 + 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/zarr/hierarchy.py b/zarr/hierarchy.py index 8024fdf91..8ea2ecb7a 100644 --- a/zarr/hierarchy.py +++ b/zarr/hierarchy.py @@ -313,7 +313,7 @@ def typestr(o): def __getstate__(self): return (self._store, self._path, self._read_only, self._chunk_store, - self.attrs.cache, self._synchronizer) + self.attrs.cache, self._synchronizer, self._meta_array) def __setstate__(self, state): self.__init__(*state) @@ -371,11 +371,12 @@ def __getitem__(self, item): if contains_array(self._store, path): return Array(self._store, read_only=self._read_only, path=path, chunk_store=self._chunk_store, - synchronizer=self._synchronizer, cache_attrs=self.attrs.cache) + synchronizer=self._synchronizer, cache_attrs=self.attrs.cache, + meta_array=self._meta_array) elif contains_group(self._store, path): return Group(self._store, read_only=self._read_only, path=path, chunk_store=self._chunk_store, cache_attrs=self.attrs.cache, - synchronizer=self._synchronizer) + synchronizer=self._synchronizer, meta_array=self._meta_array) else: raise KeyError(item) @@ -898,7 +899,8 @@ def _require_dataset_nosync(self, name, shape, dtype=None, exact=False, cache_attrs = kwargs.get('cache_attrs', self.attrs.cache) a = Array(self._store, path=path, read_only=self._read_only, chunk_store=self._chunk_store, synchronizer=synchronizer, - cache_metadata=cache_metadata, cache_attrs=cache_attrs) + cache_metadata=cache_metadata, cache_attrs=cache_attrs, + meta_array=self._meta_array) shape = normalize_shape(shape) if shape != a.shape: raise TypeError('shape do not match existing array; expected {}, got {}' diff --git a/zarr/tests/test_cupy.py b/zarr/tests/test_cupy.py index 3dc999e57..f5379c4bb 100644 --- a/zarr/tests/test_cupy.py +++ b/zarr/tests/test_cupy.py @@ -135,4 +135,5 @@ def test_group(tmp_path, compressor, store_type): assert a.shape == (10, 11) assert a.dtype == int assert isinstance(a, Array) + assert isinstance(a[:], cupy.ndarray) assert (a[:] == 1).all() From bf03fd78a5b19d528587c76b661d1354c7cb7851 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Wed, 12 Jan 2022 21:47:30 +0100 Subject: [PATCH 09/29] flake8 --- zarr/core.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/zarr/core.py b/zarr/core.py index e1ac71a34..0661df7d8 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -9,8 +9,6 @@ import numpy as np from numcodecs.compat import ensure_bytes -from collections.abc import MutableMapping - from zarr.attrs import Attributes from zarr.codecs import AsType, get_codec from zarr.errors import ArrayNotFoundError, ReadOnlyError, ArrayIndexError From d78ce33eae2acd2bf7658702dbc369497e04d136 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Fri, 21 Jan 2022 11:30:21 +0100 Subject: [PATCH 10/29] Use empty_like instead of empty Co-authored-by: jakirkham --- zarr/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/core.py b/zarr/core.py index 0661df7d8..6dcbcfc47 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -1825,7 +1825,7 @@ def _process_chunk( if partial_read_decode: cdata.prepare_chunk() # size of chunk - tmp = np.empty(self._chunks, dtype=self.dtype, like=self._meta_array) + tmp = np.empty_like(self._meta_array, shape=self._chunks, dtype=self.dtype) index_selection = PartialChunkIterator(chunk_selection, self.chunks) for start, nitems, partial_out_selection in index_selection: expected_shape = [ From c0402e2b7d97b9d1c01632f086971adf0c441593 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Fri, 21 Jan 2022 11:35:12 +0100 Subject: [PATCH 11/29] More use of NumPy's *_like API Co-authored-by: jakirkham --- zarr/core.py | 16 ++++++++-------- zarr/hierarchy.py | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/zarr/core.py b/zarr/core.py index 6dcbcfc47..2a05bf571 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -1728,7 +1728,7 @@ def _set_selection(self, indexer, value, fields=None): pass else: if not hasattr(value, 'shape'): - value = np.array(value, like=self._meta_array) + value = np.asanyarray(value, like=self._meta_array) check_array_shape('value', value, sel_shape) # iterate over chunks in range @@ -2039,8 +2039,8 @@ def _process_for_setitem(self, ckey, chunk_selection, value, fields=None): if is_scalar(value, self._dtype): # setup array filled with value - chunk = np.empty( - self._chunks, dtype=self._dtype, order=self._order, like=self._meta_array + chunk = np.empty_like( + self._meta_array, shape=self._chunks, dtype=self._dtype, order=self._order ) chunk.fill(value) @@ -2061,8 +2061,8 @@ def _process_for_setitem(self, ckey, chunk_selection, value, fields=None): # chunk not initialized if self._fill_value is not None: - chunk = np.empty( - self._chunks, dtype=self._dtype, order=self._order, like=self._meta_array + chunk = np.empty_like( + self._meta_array, shape=self._chunks, dtype=self._dtype, order=self._order ) chunk.fill(self._fill_value) elif self._dtype == object: @@ -2070,8 +2070,8 @@ def _process_for_setitem(self, ckey, chunk_selection, value, fields=None): else: # N.B., use zeros here so any region beyond the array has consistent # and compressible data - chunk = np.zeros( - self._chunks, dtype=self._dtype, order=self._order, like=self._meta_array + chunk = np.zeros_like( + self._meta_array, shape=self._chunks, dtype=self._dtype, order=self._order ) else: @@ -2438,7 +2438,7 @@ def _append_nosync(self, data, axis=0): # ensure data is array-like if not hasattr(data, 'shape'): - data = np.array(data, like=self._meta_array) + data = np.asanyarray(data, like=self._meta_array) # ensure shapes are compatible for non-append dimensions self_shape_preserved = tuple(s for i, s in enumerate(self._shape) diff --git a/zarr/hierarchy.py b/zarr/hierarchy.py index 8ea2ecb7a..99997497b 100644 --- a/zarr/hierarchy.py +++ b/zarr/hierarchy.py @@ -130,7 +130,7 @@ def __init__(self, store, path=None, read_only=False, chunk_store=None, self._synchronizer = synchronizer self._meta_array = meta_array if meta_array is not None: - self._meta_array = np.empty_like(meta_array) + self._meta_array = np.empty_like(meta_array, shape=()) else: self._meta_array = np.empty(()) From 39ffef557ac8c0b8a17ff668831ed4f98155e84e Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Fri, 21 Jan 2022 12:55:34 +0100 Subject: [PATCH 12/29] Assume that array-like objects that doesn't have a `writeable` flag is writable. --- zarr/core.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/zarr/core.py b/zarr/core.py index 2a05bf571..fcb7ddd84 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -1796,6 +1796,8 @@ def _process_chunk( self._dtype != object): dest = out[out_selection] + # Assume that array-like objects that doesn't have a + # `writeable` flag is writable. dest_is_writable = getattr(dest, "writeable", True) write_direct = ( dest_is_writable and From 86a1ec6675c688212ee1e33454118504e5e62f82 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Fri, 21 Jan 2022 13:23:37 +0100 Subject: [PATCH 13/29] _meta_array: use shape=() Co-authored-by: jakirkham --- zarr/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/core.py b/zarr/core.py index 2a05bf571..0692a20fa 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -184,7 +184,7 @@ def __init__( self._write_empty_chunks = write_empty_chunks self._meta_array = meta_array if meta_array is not None: - self._meta_array = np.empty_like(meta_array) + self._meta_array = np.empty_like(meta_array, shape=()) else: self._meta_array = np.empty(()) From cb0c02f3e2f65c41049f2bed7fbe2b5f72e96f59 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Wed, 9 Feb 2022 14:39:46 +0100 Subject: [PATCH 14/29] use ensure_ndarray_like() and ensure_contiguous_ndarray_like() --- zarr/core.py | 14 ++--- zarr/cupy.py | 7 ++- zarr/storage.py | 13 ++--- zarr/tests/test_core.py | 4 +- zarr/util.py | 113 +--------------------------------------- 5 files changed, 21 insertions(+), 130 deletions(-) diff --git a/zarr/core.py b/zarr/core.py index 94cbf39bd..3c9c9f2aa 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -42,7 +42,7 @@ normalize_shape, normalize_storage_path, PartialReadBuffer, - ensure_ndarray + ensure_ndarray_like ) @@ -1562,7 +1562,7 @@ def set_coordinate_selection(self, selection, value, fields=None): # handle value - need ndarray-like flatten value if not is_scalar(value, self._dtype): try: - value = ensure_ndarray(value) + value = ensure_ndarray_like(value) except TypeError: # Handle types like `list` or `tuple` value = np.array(value, like=self._meta_array) @@ -1817,7 +1817,7 @@ def _process_chunk( cdata = cdata.read_full() self._compressor.decode(cdata, dest) else: - chunk = ensure_ndarray(cdata).view(self._dtype) + chunk = ensure_ndarray_like(cdata).view(self._dtype) chunk = chunk.reshape(self._chunks, order=self._order) np.copyto(dest, chunk) return @@ -1884,7 +1884,7 @@ def _chunk_getitem(self, chunk_coords, chunk_selection, out, out_selection, """ out_is_ndarray = True try: - out = ensure_ndarray(out) + out = ensure_ndarray_like(out) except TypeError: out_is_ndarray = False @@ -1919,7 +1919,7 @@ def _chunk_getitems(self, lchunk_coords, lchunk_selection, out, lout_selection, """ out_is_ndarray = True try: - out = ensure_ndarray(out) + out = ensure_ndarray_like(out) except TypeError: # pragma: no cover out_is_ndarray = False @@ -2116,7 +2116,7 @@ def _decode_chunk(self, cdata, start=None, nitems=None, expected_shape=None): chunk = f.decode(chunk) # view as numpy array with correct dtype - chunk = ensure_ndarray(chunk) + chunk = ensure_ndarray_like(chunk) # special case object dtype, because incorrect handling can lead to # segfaults and other bad things happening if self._dtype != object: @@ -2143,7 +2143,7 @@ def _encode_chunk(self, chunk): chunk = f.encode(chunk) # check object encoding - if ensure_ndarray(chunk).dtype == object: + if ensure_ndarray_like(chunk).dtype == object: raise RuntimeError('cannot write object array without object codec') # compress diff --git a/zarr/cupy.py b/zarr/cupy.py index 98dafa62a..9fbf922d2 100644 --- a/zarr/cupy.py +++ b/zarr/cupy.py @@ -1,8 +1,7 @@ from numcodecs.abc import Codec +from numcodecs.compat import ensure_contiguous_ndarray_like from numcodecs.registry import get_codec, register_codec -from .util import ensure_contiguous_ndarray - class CuPyCPUCompressor(Codec): """CPU compressor for CuPy arrays @@ -24,7 +23,7 @@ def __init__(self, compressor: Codec = None): def encode(self, buf): import cupy - buf = cupy.asnumpy(ensure_contiguous_ndarray(buf)) + buf = cupy.asnumpy(ensure_contiguous_ndarray_like(buf)) if self.compressor: buf = self.compressor.encode(buf) return buf @@ -36,7 +35,7 @@ def decode(self, chunk, out=None): cpu_out = None if out is None else cupy.asnumpy(out) chunk = self.compressor.decode(chunk, cpu_out) - chunk = cupy.asarray(ensure_contiguous_ndarray(chunk)) + chunk = cupy.asarray(ensure_contiguous_ndarray_like(chunk)) if out is not None: cupy.copyto(out, chunk.view(dtype=out.dtype), casting="no") chunk = out diff --git a/zarr/storage.py b/zarr/storage.py index 3ff15304b..00ec66899 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -37,7 +37,8 @@ from numcodecs.compat import ( ensure_bytes, - ensure_text + ensure_text, + ensure_contiguous_ndarray_like ) from numcodecs.registry import codec_registry @@ -53,8 +54,8 @@ from zarr.util import (buffer_size, json_loads, nolock, normalize_chunks, normalize_dimension_separator, normalize_dtype, normalize_fill_value, normalize_order, - normalize_shape, normalize_storage_path, retry_call, - ensure_contiguous_ndarray) + normalize_shape, normalize_storage_path, retry_call + ) from zarr._storage.absstore import ABSStore # noqa: F401 from zarr._storage.store import (_listdir_from_keys, @@ -899,7 +900,7 @@ def __setitem__(self, key, value): key = self._normalize_key(key) # coerce to flat, contiguous array (ideally without copying) - value = ensure_contiguous_ndarray(value) + value = ensure_contiguous_ndarray_like(value) # destination path for key file_path = os.path.join(self.path, key) @@ -1551,7 +1552,7 @@ def __getitem__(self, key): def __setitem__(self, key, value): if self.mode == 'r': raise ReadOnlyError() - value = ensure_contiguous_ndarray(value).view("u1") + value = ensure_contiguous_ndarray_like(value).view("u1") with self.mutex: # writestr(key, value) writes with default permissions from # zipfile (600) that are too restrictive, build ZipInfo for @@ -2392,7 +2393,7 @@ def update(self, *args, **kwargs): kv_list = [] for dct in args: for k, v in dct.items(): - v = ensure_contiguous_ndarray(v) + v = ensure_contiguous_ndarray_like(v) # Accumulate key-value pairs for storage kv_list.append((k, v)) diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index 5f6ae0f59..742313288 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -12,7 +12,7 @@ from numcodecs import (BZ2, JSON, LZ4, Blosc, Categorize, Delta, FixedScaleOffset, GZip, MsgPack, Pickle, VLenArray, VLenBytes, VLenUTF8, Zlib) -from numcodecs.compat import ensure_bytes +from numcodecs.compat import ensure_bytes, ensure_ndarray from numcodecs.tests.common import greetings from numpy.testing import assert_array_almost_equal, assert_array_equal from pkg_resources import parse_version @@ -35,7 +35,7 @@ init_array, init_group, ) -from zarr.util import buffer_size, ensure_ndarray +from zarr.util import buffer_size from zarr.tests.util import abs_container, skip_test_env_var, have_fsspec # noinspection PyMethodMayBeStatic diff --git a/zarr/util.py b/zarr/util.py index b3bc2865e..1c1fc2439 100644 --- a/zarr/util.py +++ b/zarr/util.py @@ -1,4 +1,3 @@ -import array import inspect import json import math @@ -11,121 +10,13 @@ from asciitree import BoxStyle, LeftAligned from asciitree.traversal import Traversal from collections.abc import Iterable -from numcodecs.compat import ensure_text +from numcodecs.compat import ensure_text, ensure_ndarray_like from numcodecs.registry import codec_registry from numcodecs.blosc import cbuffer_sizes, cbuffer_metainfo from typing import Any, Callable, Dict, Optional, Tuple, Union -# TODO: move the function to numcodecs -def ensure_ndarray(buf): - """Convenience function to coerce `buf` to numpy array-like, if it is not - already a numpy array-like. - - Parameters - ---------- - buf : array-like or bytes-like - A numpy array or any object exporting a buffer interface. - - Returns - ------- - arr : array-like - A numpy array-like array that shares memory with `buf`. - - Notes - ----- - This function will not create a copy under any circumstances, it is guaranteed to - return a view on memory exported by `buf`. - """ - - if isinstance(buf, np.ndarray): - # already a numpy array - arr = buf - - elif isinstance(buf, array.array) and buf.typecode in 'cu': - # Guard condition, do not support array.array with unicode type, this is - # problematic because numpy does not support it on all platforms. Also do not - # support char as it was removed in Python 3. - raise TypeError('array.array with char or unicode type is not supported') - - else: - - # TODO: make it possible to register array-like types lazily. - try: - import cupy - if isinstance(buf, cupy.ndarray): - return buf - except ImportError: - pass - - # N.B., first take a memoryview to make sure that we subsequently create a - # numpy array from a memory buffer with no copy - mem = memoryview(buf) - - # instantiate array from memoryview, ensures no copy - arr = np.array(mem, copy=False) - - return arr - - -# TODO: move the function to numcodecs -def ensure_contiguous_ndarray(buf, max_buffer_size=None): - """Convenience function to coerce `buf` to numpy array-like, if it is not already - numpy array-like. - - Also ensures that the returned value exports fully contiguous memory, and supports - the new-style buffer interface. If the optional max_buffer_size is provided, raise - a ValueError if the number of bytes consumed by the returned array exceeds this - value. - - Parameters - ---------- - buf : array-like or bytes-like - A numpy array or any object exporting a buffer interface. - max_buffer_size : int - If specified, the largest allowable value of arr.nbytes, where arr - is the returned array. - - Returns - ------- - arr : array-like - A numpy array-like array that shares memory with `buf`. - - Notes - ----- - This function will not create a copy under any circumstances, it is guaranteed to - return a view on memory exported by `buf`. - - """ - - # ensure input is a numpy array - arr = ensure_ndarray(buf) - - # check for object arrays, these are just memory pointers, actual memory holding - # item data is scattered elsewhere - if arr.dtype == object: - raise TypeError('object arrays are not supported') - - # check for datetime or timedelta ndarray, the buffer interface doesn't support those - if arr.dtype.kind in 'Mm': - arr = arr.view(np.int64) - - # check memory is contiguous, if so flatten - if arr.flags.c_contiguous or arr.flags.f_contiguous: - # can flatten without copy - arr = arr.reshape(-1, order='A') - - else: - raise ValueError('an array with contiguous memory is required') - - if max_buffer_size is not None and arr.nbytes > max_buffer_size: - msg = "Codec does not support buffers of > {} bytes".format(max_buffer_size) - raise ValueError(msg) - - return arr - - def flatten(arg: Iterable) -> Iterable: for element in arg: if isinstance(element, Iterable) and not isinstance(element, (str, bytes)): @@ -449,7 +340,7 @@ def normalize_storage_path(path: Union[str, bytes, None]) -> str: def buffer_size(v) -> int: - return ensure_ndarray(v).nbytes + return ensure_ndarray_like(v).nbytes def info_text_report(items: Dict[Any, Any]) -> str: From 9976f067a42c99b37e2c1a57ef7377b0b34f9318 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Fri, 4 Mar 2022 12:15:50 +0100 Subject: [PATCH 15/29] CI: use https://github.com/zarr-developers/numcodecs/pull/305 --- .github/workflows/minimal.yml | 1 + .github/workflows/python-package.yml | 1 + .github/workflows/windows-testing.yml | 1 + 3 files changed, 3 insertions(+) diff --git a/.github/workflows/minimal.yml b/.github/workflows/minimal.yml index eb6ebd5d2..5a146dd17 100644 --- a/.github/workflows/minimal.yml +++ b/.github/workflows/minimal.yml @@ -24,6 +24,7 @@ jobs: shell: "bash -l {0}" run: | conda activate minimal + python -m pip install git+https://github.com/madsbk/numcodecs.git@ndarray_like --no-deps python -m pip install . pytest -svx --timeout=300 - name: Fixture generation diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index e0d404b1a..7c2e76842 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -60,6 +60,7 @@ jobs: python -m pip install --upgrade pip python -m pip install -U pip setuptools wheel codecov line_profiler python -m pip install -rrequirements_dev_minimal.txt numpy${{ matrix.numpy_version}} -rrequirements_dev_optional.txt pymongo redis + python -m pip install git+https://github.com/madsbk/numcodecs.git@ndarray_like --no-deps python -m pip install . python -m pip freeze - name: Tests diff --git a/.github/workflows/windows-testing.yml b/.github/workflows/windows-testing.yml index af656aa88..213711918 100644 --- a/.github/workflows/windows-testing.yml +++ b/.github/workflows/windows-testing.yml @@ -39,6 +39,7 @@ jobs: python -m pip install --upgrade pip python -m pip install -U pip setuptools wheel python -m pip install -r requirements_dev_numpy.txt -r requirements_dev_minimal.txt -r requirements_dev_optional.txt + python -m pip install git+https://github.com/madsbk/numcodecs.git@ndarray_like --no-deps python -m pip install . python -m pip freeze npm install -g azurite From be9099d79acb1f20a886837340f154460762dedd Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Fri, 4 Mar 2022 12:42:40 +0100 Subject: [PATCH 16/29] Removed unused code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Tobias Kölling --- zarr/core.py | 1 - zarr/hierarchy.py | 1 - 2 files changed, 2 deletions(-) diff --git a/zarr/core.py b/zarr/core.py index 3c9c9f2aa..07778bbd2 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -182,7 +182,6 @@ def __init__( self._is_view = False self._partial_decompress = partial_decompress self._write_empty_chunks = write_empty_chunks - self._meta_array = meta_array if meta_array is not None: self._meta_array = np.empty_like(meta_array, shape=()) else: diff --git a/zarr/hierarchy.py b/zarr/hierarchy.py index 99997497b..65bdc10a7 100644 --- a/zarr/hierarchy.py +++ b/zarr/hierarchy.py @@ -128,7 +128,6 @@ def __init__(self, store, path=None, read_only=False, chunk_store=None, self._key_prefix = '' self._read_only = read_only self._synchronizer = synchronizer - self._meta_array = meta_array if meta_array is not None: self._meta_array = np.empty_like(meta_array, shape=()) else: From 082f299424152e50e97c968f112c2762ac52db42 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Fri, 4 Mar 2022 12:49:03 +0100 Subject: [PATCH 17/29] CI: changed minimal NumPy version to v1.20 --- .github/workflows/python-package.yml | 12 ++++++------ tox.ini | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 7c2e76842..1f2379b22 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -16,10 +16,10 @@ jobs: strategy: matrix: python-version: [3.7, 3.8, 3.9] - numpy_version: ['!=1.21.0', '==1.17.*'] + numpy_version: ['!=1.21.0', '==1.20.*'] exclude: - python-version: 3.9 - numpy_version: '==1.17.*' + numpy_version: '==1.20.*' services: redis: image: redis @@ -37,7 +37,7 @@ jobs: - 27017:27017 steps: - uses: actions/checkout@v2 - with: + with: fetch-depth: 0 - name: Setup Miniconda uses: conda-incubator/setup-miniconda@master @@ -59,13 +59,13 @@ jobs: conda activate zarr-env python -m pip install --upgrade pip python -m pip install -U pip setuptools wheel codecov line_profiler - python -m pip install -rrequirements_dev_minimal.txt numpy${{ matrix.numpy_version}} -rrequirements_dev_optional.txt pymongo redis + python -m pip install -rrequirements_dev_minimal.txt numpy${{matrix.numpy_version}} -rrequirements_dev_optional.txt pymongo redis python -m pip install git+https://github.com/madsbk/numcodecs.git@ndarray_like --no-deps python -m pip install . python -m pip freeze - name: Tests shell: "bash -l {0}" - env: + env: COVERAGE_FILE: .coverage.${{matrix.python-version}}.${{matrix.numpy_version}} ZARR_TEST_ABS: 1 ZARR_TEST_MONGO: 1 @@ -90,4 +90,4 @@ jobs: flake8 zarr mypy zarr - + diff --git a/tox.ini b/tox.ini index 3adc147da..3ce8611fe 100644 --- a/tox.ini +++ b/tox.ini @@ -4,7 +4,7 @@ # and then run "tox" from this directory. [tox] -envlist = py37-npy{117,latest}, py38, py39, docs +envlist = py37-npy{120,latest}, py38, py39, docs [testenv] install_command = pip install --no-binary=numcodecs {opts} {packages} @@ -21,7 +21,7 @@ commands = # main unit test runner py{38,39}: pytest -v --cov=zarr --cov-config=.coveragerc zarr # don't collect coverage when running older numpy versions - py37-npy117: pytest -v zarr + py37-npy120: pytest -v zarr # collect coverage and run doctests under py37 py37-npylatest: pytest -v --cov=zarr --cov-config=.coveragerc --doctest-plus zarr --remote-data # generate a coverage report @@ -33,7 +33,7 @@ commands = # print environment for debugging pip freeze deps = - py37-npy117: numpy==1.17.* + py37-npy120: numpy==1.20.* py37-npylatest,py38: -rrequirements_dev_numpy.txt -rrequirements_dev_minimal.txt -rrequirements_dev_optional.txt From 3dd64dd6ff8d5ae8962d33d2736a765505fb7968 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Fri, 4 Mar 2022 13:54:29 +0100 Subject: [PATCH 18/29] CI: use numpy>=1.21.* for `mypy` check --- .github/workflows/python-package.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 1f2379b22..565ec6b87 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -88,6 +88,7 @@ jobs: run: | conda activate zarr-env flake8 zarr + python -m pip install numpy>=1.21.* mypy zarr From 4c921e699c71aca4de16d33cab2136bd71f5880c Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Tue, 15 Mar 2022 16:19:51 +0100 Subject: [PATCH 19/29] Revert "CI: use https://github.com/zarr-developers/numcodecs/pull/305" This reverts commit 9976f067a42c99b37e2c1a57ef7377b0b34f9318. --- .github/workflows/minimal.yml | 1 - .github/workflows/python-package.yml | 1 - .github/workflows/windows-testing.yml | 1 - 3 files changed, 3 deletions(-) diff --git a/.github/workflows/minimal.yml b/.github/workflows/minimal.yml index 5a146dd17..eb6ebd5d2 100644 --- a/.github/workflows/minimal.yml +++ b/.github/workflows/minimal.yml @@ -24,7 +24,6 @@ jobs: shell: "bash -l {0}" run: | conda activate minimal - python -m pip install git+https://github.com/madsbk/numcodecs.git@ndarray_like --no-deps python -m pip install . pytest -svx --timeout=300 - name: Fixture generation diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 565ec6b87..9f2dd7fa2 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -60,7 +60,6 @@ jobs: python -m pip install --upgrade pip python -m pip install -U pip setuptools wheel codecov line_profiler python -m pip install -rrequirements_dev_minimal.txt numpy${{matrix.numpy_version}} -rrequirements_dev_optional.txt pymongo redis - python -m pip install git+https://github.com/madsbk/numcodecs.git@ndarray_like --no-deps python -m pip install . python -m pip freeze - name: Tests diff --git a/.github/workflows/windows-testing.yml b/.github/workflows/windows-testing.yml index 213711918..af656aa88 100644 --- a/.github/workflows/windows-testing.yml +++ b/.github/workflows/windows-testing.yml @@ -39,7 +39,6 @@ jobs: python -m pip install --upgrade pip python -m pip install -U pip setuptools wheel python -m pip install -r requirements_dev_numpy.txt -r requirements_dev_minimal.txt -r requirements_dev_optional.txt - python -m pip install git+https://github.com/madsbk/numcodecs.git@ndarray_like --no-deps python -m pip install . python -m pip freeze npm install -g azurite From 2abc1111d18252336d4958dffc3caae3fa42d878 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Mon, 23 May 2022 19:57:56 -0400 Subject: [PATCH 20/29] fix merge mistake --- zarr/core.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/zarr/core.py b/zarr/core.py index 2a5e076e5..4489d625b 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -4,12 +4,11 @@ import math import operator import re -from collections.abc import MutableMapping from functools import reduce from typing import Any import numpy as np -from numcodecs.compat import ensure_bytes, ensure_ndarray +from numcodecs.compat import ensure_bytes from zarr._storage.store import _prefix_to_attrs_key, assert_zarr_v3_api_available from zarr.attrs import Attributes @@ -170,7 +169,6 @@ def __init__( cache_metadata=True, cache_attrs=True, partial_decompress=False, - write_empty_chunks=False, write_empty_chunks=True, zarr_version=None, meta_array=None, From a7857d6451a692dba0ef014ab63a053464f4d1a1 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Wed, 6 Jul 2022 13:25:26 +0200 Subject: [PATCH 21/29] CI: remove manual numpy install --- .github/workflows/python-package.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index a9cc31a07..06d4537e7 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -88,7 +88,6 @@ jobs: run: | conda activate zarr-env flake8 zarr - python -m pip install numpy>=1.21.* mypy zarr From b3fc4883aa08cca89be320f2683a7ed89e88c737 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Wed, 6 Jul 2022 14:37:49 +0200 Subject: [PATCH 22/29] pickle: use kwargs --- zarr/core.py | 18 ++++++++++++++---- zarr/hierarchy.py | 14 +++++++++++--- 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/zarr/core.py b/zarr/core.py index 4489d625b..8d29fa9e9 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -2387,12 +2387,22 @@ def hexdigest(self, hashname="sha1"): return checksum def __getstate__(self): - return (self._store, self._path, self._read_only, self._chunk_store, - self._synchronizer, self._cache_metadata, self._attrs.cache, - self._partial_decompress, self._write_empty_chunks, self._version) + return { + "store": self._store, + "path": self._path, + "read_only": self._read_only, + "chunk_store": self._chunk_store, + "synchronizer": self._synchronizer, + "cache_metadata": self._cache_metadata, + "cache_attrs": self._attrs.cache, + "partial_decompress": self._partial_decompress, + "write_empty_chunks": self._write_empty_chunks, + "zarr_version": self._version, + "meta_array": self._meta_array, + } def __setstate__(self, state): - self.__init__(*state) + self.__init__(**state) def _synchronized_op(self, f, *args, **kwargs): diff --git a/zarr/hierarchy.py b/zarr/hierarchy.py index de95bf720..bcf7e8d8e 100644 --- a/zarr/hierarchy.py +++ b/zarr/hierarchy.py @@ -369,11 +369,19 @@ def typestr(o): return items def __getstate__(self): - return (self._store, self._path, self._read_only, self._chunk_store, - self.attrs.cache, self._synchronizer, self._meta_array) + return { + "store": self._store, + "path": self._path, + "read_only": self._read_only, + "chunk_store": self._chunk_store, + "cache_attrs": self._attrs.cache, + "synchronizer": self._synchronizer, + "zarr_version": self._version, + "meta_array": self._meta_array, + } def __setstate__(self, state): - self.__init__(*state) + self.__init__(**state) def _item_path(self, item): absolute = isinstance(item, str) and item and item[0] == '/' From 2e2b0225c9f28f0423161e356a5b81ad2870c6a8 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Wed, 6 Jul 2022 15:00:44 +0200 Subject: [PATCH 23/29] moved CuPyCPUCompressor to the test suite --- zarr/cupy.py | 58 --------------------------------------- zarr/tests/test_cupy.py | 61 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 60 insertions(+), 59 deletions(-) delete mode 100644 zarr/cupy.py diff --git a/zarr/cupy.py b/zarr/cupy.py deleted file mode 100644 index 9fbf922d2..000000000 --- a/zarr/cupy.py +++ /dev/null @@ -1,58 +0,0 @@ -from numcodecs.abc import Codec -from numcodecs.compat import ensure_contiguous_ndarray_like -from numcodecs.registry import get_codec, register_codec - - -class CuPyCPUCompressor(Codec): - """CPU compressor for CuPy arrays - - This compressor converts CuPy arrays host memory before compressing - the arrays using `compressor`. - - Parameters - ---------- - compressor : numcodecs.abc.Codec - The codec to use for compression and decompression. - """ - - codec_id = "cupy_cpu_compressor" - - def __init__(self, compressor: Codec = None): - self.compressor = compressor - - def encode(self, buf): - import cupy - - buf = cupy.asnumpy(ensure_contiguous_ndarray_like(buf)) - if self.compressor: - buf = self.compressor.encode(buf) - return buf - - def decode(self, chunk, out=None): - import cupy - - if self.compressor: - cpu_out = None if out is None else cupy.asnumpy(out) - chunk = self.compressor.decode(chunk, cpu_out) - - chunk = cupy.asarray(ensure_contiguous_ndarray_like(chunk)) - if out is not None: - cupy.copyto(out, chunk.view(dtype=out.dtype), casting="no") - chunk = out - return chunk - - def get_config(self): - cc_config = self.compressor.get_config() if self.compressor else None - return { - "id": self.codec_id, - "compressor_config": cc_config, - } - - @classmethod - def from_config(cls, config): - cc_config = config.get("compressor_config", None) - compressor = get_codec(cc_config) if cc_config else None - return cls(compressor=compressor) - - -register_codec(CuPyCPUCompressor) diff --git a/zarr/tests/test_cupy.py b/zarr/tests/test_cupy.py index f5379c4bb..3cb3cdf31 100644 --- a/zarr/tests/test_cupy.py +++ b/zarr/tests/test_cupy.py @@ -2,16 +2,75 @@ import numpy as np import pytest +from numcodecs.abc import Codec +from numcodecs.compat import ensure_contiguous_ndarray_like +from numcodecs.registry import get_codec, register_codec + import zarr.codecs from zarr.core import Array from zarr.creation import array, empty, full, ones, zeros -from zarr.cupy import CuPyCPUCompressor from zarr.hierarchy import open_group from zarr.storage import DirectoryStore, MemoryStore, Store, ZipStore + cupy = pytest.importorskip("cupy") +class CuPyCPUCompressor(Codec): + """CPU compressor for CuPy arrays + + This compressor converts CuPy arrays host memory before compressing + the arrays using `compressor`. + + Parameters + ---------- + compressor : numcodecs.abc.Codec + The codec to use for compression and decompression. + """ + + codec_id = "cupy_cpu_compressor" + + def __init__(self, compressor: Codec = None): + self.compressor = compressor + + def encode(self, buf): + import cupy + + buf = cupy.asnumpy(ensure_contiguous_ndarray_like(buf)) + if self.compressor: + buf = self.compressor.encode(buf) + return buf + + def decode(self, chunk, out=None): + import cupy + + if self.compressor: + cpu_out = None if out is None else cupy.asnumpy(out) + chunk = self.compressor.decode(chunk, cpu_out) + + chunk = cupy.asarray(ensure_contiguous_ndarray_like(chunk)) + if out is not None: + cupy.copyto(out, chunk.view(dtype=out.dtype), casting="no") + chunk = out + return chunk + + def get_config(self): + cc_config = self.compressor.get_config() if self.compressor else None + return { + "id": self.codec_id, + "compressor_config": cc_config, + } + + @classmethod + def from_config(cls, config): + cc_config = config.get("compressor_config", None) + compressor = get_codec(cc_config) if cc_config else None + return cls(compressor=compressor) + + +register_codec(CuPyCPUCompressor) + + def init_compressor(compressor) -> CuPyCPUCompressor: if compressor: compressor = getattr(zarr.codecs, compressor)() From 18c4c6b780ba12d8fee25aa985b20b74b908ded3 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Tue, 2 Aug 2022 12:39:19 +0200 Subject: [PATCH 24/29] doc-meta_array: changed to versionadded:: 2.13 --- zarr/core.py | 2 +- zarr/creation.py | 2 +- zarr/hierarchy.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/zarr/core.py b/zarr/core.py index 71bddad55..e5b204516 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -103,7 +103,7 @@ class Array: An array instance to use for determining arrays to create and return to users. Use `numpy.empty(())` by default. - .. versionadded:: 2.12 + .. versionadded:: 2.13 Attributes diff --git a/zarr/creation.py b/zarr/creation.py index 3294695c0..e1c815ed2 100644 --- a/zarr/creation.py +++ b/zarr/creation.py @@ -95,7 +95,7 @@ def create(shape, chunks=True, dtype=None, compressor='default', An array instance to use for determining arrays to create and return to users. Use `numpy.empty(())` by default. - .. versionadded:: 2.12 + .. versionadded:: 2.13 Returns ------- diff --git a/zarr/hierarchy.py b/zarr/hierarchy.py index 0f484ad82..177d1eec7 100644 --- a/zarr/hierarchy.py +++ b/zarr/hierarchy.py @@ -68,7 +68,7 @@ class Group(MutableMapping): An array instance to use for determining arrays to create and return to users. Use `numpy.empty(())` by default. - .. versionadded:: 2.12 + .. versionadded:: 2.13 Attributes ---------- @@ -1324,7 +1324,7 @@ def open_group(store=None, mode='a', cache_attrs=True, synchronizer=None, path=N An array instance to use for determining arrays to create and return to users. Use `numpy.empty(())` by default. - .. versionadded:: 2.12 + .. versionadded:: 2.13 Returns ------- From 2efa2fe7bd41aac177afe5bed678378b65d69f33 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Tue, 2 Aug 2022 12:50:23 +0200 Subject: [PATCH 25/29] test_cupy: assert meta_array --- zarr/tests/test_cupy.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/zarr/tests/test_cupy.py b/zarr/tests/test_cupy.py index 3cb3cdf31..4533b8be7 100644 --- a/zarr/tests/test_cupy.py +++ b/zarr/tests/test_cupy.py @@ -99,6 +99,7 @@ def test_array(tmp_path, compressor, store_type): assert a.shape == z.shape assert a.dtype == z.dtype assert isinstance(a, type(z[:])) + assert isinstance(z.meta_array, type(cupy.empty(()))) cupy.testing.assert_array_equal(a, z[:]) # with array-like @@ -196,3 +197,4 @@ def test_group(tmp_path, compressor, store_type): assert isinstance(a, Array) assert isinstance(a[:], cupy.ndarray) assert (a[:] == 1).all() + assert isinstance(g.meta_array, type(cupy.empty(()))) From 0dc8f93dd3329cace3bc377078a0f5091ce88195 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Thu, 11 Aug 2022 09:43:29 +0200 Subject: [PATCH 26/29] test_cupy: test when CuPy isn't available --- zarr/tests/test_cupy.py | 120 ++++++++++++++++++++++++++-------------- 1 file changed, 80 insertions(+), 40 deletions(-) diff --git a/zarr/tests/test_cupy.py b/zarr/tests/test_cupy.py index 4533b8be7..37b11321c 100644 --- a/zarr/tests/test_cupy.py +++ b/zarr/tests/test_cupy.py @@ -13,9 +13,6 @@ from zarr.storage import DirectoryStore, MemoryStore, Store, ZipStore -cupy = pytest.importorskip("cupy") - - class CuPyCPUCompressor(Codec): """CPU compressor for CuPy arrays @@ -71,6 +68,28 @@ def from_config(cls, config): register_codec(CuPyCPUCompressor) +class MyArray(np.ndarray): + """Dummy array class to test the `meta_array` argument + + Useful when CuPy isn't available. + + This class also makes some of the functions from the numpy + module available. + """ + + testing = np.testing + + @classmethod + def arange(cls, size): + ret = cls(shape=(size,), dtype="int64") + ret[:] = range(size) + return ret + + @classmethod + def empty(cls, shape): + return cls(shape=shape) + + def init_compressor(compressor) -> CuPyCPUCompressor: if compressor: compressor = getattr(zarr.codecs, compressor)() @@ -85,93 +104,113 @@ def init_store(tmp_path, store_type) -> Optional[Store]: return None -@pytest.mark.parametrize("compressor", [None, "Zlib", "Blosc"]) +def ensure_cls(obj): + if isinstance(obj, str): + module, cls_name = obj.rsplit(".", maxsplit=1) + return getattr(pytest.importorskip(module), cls_name) + return obj + + +def ensure_module(module): + if isinstance(module, str): + return pytest.importorskip(module) + return module + + +param_module_and_compressor = [ + (MyArray, None), + ("cupy", init_compressor(None)), + ("cupy", init_compressor("Zlib")), + ("cupy", init_compressor("Blosc")), +] + + +@pytest.mark.parametrize("module, compressor", param_module_and_compressor) @pytest.mark.parametrize("store_type", [None, DirectoryStore, MemoryStore, ZipStore]) -def test_array(tmp_path, compressor, store_type): - compressor = init_compressor(compressor) +def test_array(tmp_path, module, compressor, store_type): + xp = ensure_module(module) - # with cupy array store = init_store(tmp_path / "from_cupy_array", store_type) - a = cupy.arange(100) - z = array( - a, chunks=10, compressor=compressor, store=store, meta_array=cupy.empty(()) - ) + a = xp.arange(100) + z = array(a, chunks=10, compressor=compressor, store=store, meta_array=xp.empty(())) assert a.shape == z.shape assert a.dtype == z.dtype assert isinstance(a, type(z[:])) - assert isinstance(z.meta_array, type(cupy.empty(()))) - cupy.testing.assert_array_equal(a, z[:]) + assert isinstance(z.meta_array, type(xp.empty(()))) + xp.testing.assert_array_equal(a, z[:]) # with array-like store = init_store(tmp_path / "from_list", store_type) a = list(range(100)) - z = array( - a, chunks=10, compressor=compressor, store=store, meta_array=cupy.empty(()) - ) + z = array(a, chunks=10, compressor=compressor, store=store, meta_array=xp.empty(())) assert (100,) == z.shape assert np.asarray(a).dtype == z.dtype - cupy.testing.assert_array_equal(a, z[:]) + xp.testing.assert_array_equal(a, z[:]) # with another zarr array store = init_store(tmp_path / "from_another_store", store_type) - z2 = array(z, compressor=compressor, store=store, meta_array=cupy.empty(())) + z2 = array(z, compressor=compressor, store=store, meta_array=xp.empty(())) assert z.shape == z2.shape assert z.chunks == z2.chunks assert z.dtype == z2.dtype - cupy.testing.assert_array_equal(z[:], z2[:]) + xp.testing.assert_array_equal(z[:], z2[:]) -@pytest.mark.parametrize("compressor", [None, "Zlib", "Blosc"]) -def test_empty(compressor): +@pytest.mark.parametrize("module, compressor", param_module_and_compressor) +def test_empty(module, compressor): + xp = ensure_module(module) z = empty( 100, chunks=10, compressor=init_compressor(compressor), - meta_array=cupy.empty(()), + meta_array=xp.empty(()), ) assert (100,) == z.shape assert (10,) == z.chunks -@pytest.mark.parametrize("compressor", [None, "Zlib", "Blosc"]) -def test_zeros(compressor): +@pytest.mark.parametrize("module, compressor", param_module_and_compressor) +def test_zeros(module, compressor): + xp = ensure_module(module) z = zeros( 100, chunks=10, compressor=init_compressor(compressor), - meta_array=cupy.empty(()), + meta_array=xp.empty(()), ) assert (100,) == z.shape assert (10,) == z.chunks - cupy.testing.assert_array_equal(np.zeros(100), z[:]) + xp.testing.assert_array_equal(np.zeros(100), z[:]) -@pytest.mark.parametrize("compressor", [None, "Zlib", "Blosc"]) -def test_ones(compressor): +@pytest.mark.parametrize("module, compressor", param_module_and_compressor) +def test_ones(module, compressor): + xp = ensure_module(module) z = ones( 100, chunks=10, compressor=init_compressor(compressor), - meta_array=cupy.empty(()), + meta_array=xp.empty(()), ) assert (100,) == z.shape assert (10,) == z.chunks - cupy.testing.assert_array_equal(np.ones(100), z[:]) + xp.testing.assert_array_equal(np.ones(100), z[:]) -@pytest.mark.parametrize("compressor", [None, "Zlib", "Blosc"]) -def test_full(compressor): +@pytest.mark.parametrize("module, compressor", param_module_and_compressor) +def test_full(module, compressor): + xp = ensure_module(module) z = full( 100, chunks=10, fill_value=42, dtype="i4", compressor=init_compressor(compressor), - meta_array=cupy.empty(()), + meta_array=xp.empty(()), ) assert (100,) == z.shape assert (10,) == z.chunks - cupy.testing.assert_array_equal(np.full(100, fill_value=42, dtype="i4"), z[:]) + xp.testing.assert_array_equal(np.full(100, fill_value=42, dtype="i4"), z[:]) # nan z = full( @@ -180,21 +219,22 @@ def test_full(compressor): fill_value=np.nan, dtype="f8", compressor=init_compressor(compressor), - meta_array=cupy.empty(()), + meta_array=xp.empty(()), ) assert np.all(np.isnan(z[:])) -@pytest.mark.parametrize("compressor", [None, "Zlib", "Blosc"]) +@pytest.mark.parametrize("module, compressor", param_module_and_compressor) @pytest.mark.parametrize("store_type", [None, DirectoryStore, MemoryStore, ZipStore]) -def test_group(tmp_path, compressor, store_type): +def test_group(tmp_path, module, compressor, store_type): + xp = ensure_module(module) store = init_store(tmp_path, store_type) - g = open_group(store, meta_array=cupy.empty(())) + g = open_group(store, meta_array=xp.empty(())) g.ones("data", shape=(10, 11), dtype=int, compressor=init_compressor(compressor)) a = g["data"] assert a.shape == (10, 11) assert a.dtype == int assert isinstance(a, Array) - assert isinstance(a[:], cupy.ndarray) + assert isinstance(a[:], xp.ndarray) assert (a[:] == 1).all() - assert isinstance(g.meta_array, type(cupy.empty(()))) + assert isinstance(g.meta_array, type(xp.empty(()))) From fdcf9495c7d4a342b21ac92a7a5e3a79ce8ed47b Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Thu, 11 Aug 2022 09:44:25 +0200 Subject: [PATCH 27/29] renamed: test_cupy.py -> test_meta_array.py --- zarr/tests/{test_cupy.py => test_meta_array.py} | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) rename zarr/tests/{test_cupy.py => test_meta_array.py} (94%) diff --git a/zarr/tests/test_cupy.py b/zarr/tests/test_meta_array.py similarity index 94% rename from zarr/tests/test_cupy.py rename to zarr/tests/test_meta_array.py index 37b11321c..58aa61fa1 100644 --- a/zarr/tests/test_cupy.py +++ b/zarr/tests/test_meta_array.py @@ -162,7 +162,7 @@ def test_empty(module, compressor): z = empty( 100, chunks=10, - compressor=init_compressor(compressor), + compressor=compressor, meta_array=xp.empty(()), ) assert (100,) == z.shape @@ -175,7 +175,7 @@ def test_zeros(module, compressor): z = zeros( 100, chunks=10, - compressor=init_compressor(compressor), + compressor=compressor, meta_array=xp.empty(()), ) assert (100,) == z.shape @@ -189,7 +189,7 @@ def test_ones(module, compressor): z = ones( 100, chunks=10, - compressor=init_compressor(compressor), + compressor=compressor, meta_array=xp.empty(()), ) assert (100,) == z.shape @@ -205,7 +205,7 @@ def test_full(module, compressor): chunks=10, fill_value=42, dtype="i4", - compressor=init_compressor(compressor), + compressor=compressor, meta_array=xp.empty(()), ) assert (100,) == z.shape @@ -218,7 +218,7 @@ def test_full(module, compressor): chunks=10, fill_value=np.nan, dtype="f8", - compressor=init_compressor(compressor), + compressor=compressor, meta_array=xp.empty(()), ) assert np.all(np.isnan(z[:])) @@ -230,11 +230,11 @@ def test_group(tmp_path, module, compressor, store_type): xp = ensure_module(module) store = init_store(tmp_path, store_type) g = open_group(store, meta_array=xp.empty(())) - g.ones("data", shape=(10, 11), dtype=int, compressor=init_compressor(compressor)) + g.ones("data", shape=(10, 11), dtype=int, compressor=compressor) a = g["data"] assert a.shape == (10, 11) assert a.dtype == int assert isinstance(a, Array) - assert isinstance(a[:], xp.ndarray) + assert isinstance(a[:], type(xp.empty(()))) assert (a[:] == 1).all() assert isinstance(g.meta_array, type(xp.empty(()))) From d40593b8e08d31ac1543ebf9837e68f88284ad26 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Thu, 11 Aug 2022 12:37:07 +0200 Subject: [PATCH 28/29] removed ensure_cls() --- zarr/tests/test_meta_array.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/zarr/tests/test_meta_array.py b/zarr/tests/test_meta_array.py index 58aa61fa1..798bdd8ca 100644 --- a/zarr/tests/test_meta_array.py +++ b/zarr/tests/test_meta_array.py @@ -104,13 +104,6 @@ def init_store(tmp_path, store_type) -> Optional[Store]: return None -def ensure_cls(obj): - if isinstance(obj, str): - module, cls_name = obj.rsplit(".", maxsplit=1) - return getattr(pytest.importorskip(module), cls_name) - return obj - - def ensure_module(module): if isinstance(module, str): return pytest.importorskip(module) From 149d511194c28593b6b9564957c684a3b5cd66bc Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Thu, 11 Aug 2022 12:37:25 +0200 Subject: [PATCH 29/29] Added "# pragma: no cover" to the CuPyCPUCompressor test class --- zarr/tests/test_meta_array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/tests/test_meta_array.py b/zarr/tests/test_meta_array.py index 798bdd8ca..6172af3be 100644 --- a/zarr/tests/test_meta_array.py +++ b/zarr/tests/test_meta_array.py @@ -13,7 +13,7 @@ from zarr.storage import DirectoryStore, MemoryStore, Store, ZipStore -class CuPyCPUCompressor(Codec): +class CuPyCPUCompressor(Codec): # pragma: no cover """CPU compressor for CuPy arrays This compressor converts CuPy arrays host memory before compressing