zarr-developers · joshmoore · Sep 8, 2022 · Jan 11, 2022 · Jan 11, 2022 · Jan 11, 2022
diff --git a/zarr/core.py b/zarr/core.py
@@ -7,9 +7,7 @@
 from functools import reduce
 
 import numpy as np
-from numcodecs.compat import ensure_bytes, ensure_ndarray
-
-from collections.abc import MutableMapping
+from numcodecs.compat import ensure_bytes
 
 from zarr.attrs import Attributes
 from zarr.codecs import AsType, get_codec
@@ -31,7 +29,7 @@
     is_scalar,
     pop_fields,
 )
-from zarr.storage import array_meta_key, attrs_key, getsize, listdir, BaseStore
+from zarr.storage import KVStore, array_meta_key, attrs_key, getsize, listdir, BaseStore
 from zarr.util import (
     all_equal,
     InfoReporter,
@@ -44,6 +42,7 @@
     normalize_shape,
     normalize_storage_path,
     PartialReadBuffer,
+    ensure_ndarray
 )
 
 
@@ -91,6 +90,12 @@ class Array:
 
         .. versionadded:: 2.11
 
+    meta_array : array-like, optional
+        An array instance to use for determining arrays to create and return
+        to users. Use `numpy.empty(())` by default.
+
+        .. versionadded:: 2.12
+
 
     Attributes
     ----------
@@ -122,6 +127,7 @@ class Array:
     vindex
     oindex
     write_empty_chunks
+    meta_array
 
     Methods
     -------
@@ -155,6 +161,7 @@ def __init__(
         cache_attrs=True,
         partial_decompress=False,
         write_empty_chunks=True,
+        meta_array=None,
     ):
         # N.B., expect at this point store is fully initialized with all
         # configuration metadata fully specified and normalized
@@ -175,6 +182,11 @@ def __init__(
         self._is_view = False
         self._partial_decompress = partial_decompress
         self._write_empty_chunks = write_empty_chunks
+        self._meta_array = meta_array
+        if meta_array is not None:
+            self._meta_array = np.empty_like(meta_array)
+        else:
+            self._meta_array = np.empty(())
 
         # initialize metadata
         self._load_metadata()
@@ -487,6 +499,13 @@ def write_empty_chunks(self) -> bool:
         """
         return self._write_empty_chunks
 
+    @property
+    def meta_array(self):
+        """An array-like instance to use for determining arrays to create and return
+        to users.
+        """
+        return self._meta_array
+
     def __eq__(self, other):
         return (
             isinstance(other, Array) and
@@ -861,7 +880,7 @@ def _get_basic_selection_zd(self, selection, out=None, fields=None):
 
         except KeyError:
             # chunk not initialized
-            chunk = np.zeros((), dtype=self._dtype)
+            chunk = np.zeros_like(self._meta_array, shape=(), dtype=self._dtype)
             if self._fill_value is not None:
                 chunk.fill(self._fill_value)
 
@@ -1165,7 +1184,8 @@ def _get_selection(self, indexer, out=None, fields=None):
 
         # setup output array
         if out is None:
-            out = np.empty(out_shape, dtype=out_dtype, order=self._order)
+            out = np.empty_like(self._meta_array, shape=out_shape,
+                                dtype=out_dtype, order=self._order)
         else:
             check_array_shape('out', out, out_shape)
 
@@ -1539,9 +1559,13 @@ def set_coordinate_selection(self, selection, value, fields=None):
         # setup indexer
         indexer = CoordinateIndexer(selection, self)
 
-        # handle value - need to flatten
+        # handle value - need ndarray-like flatten value
         if not is_scalar(value, self._dtype):
-            value = np.asanyarray(value)
+            try:
+                value = ensure_ndarray(value)
+            except TypeError:
+                # Handle types like `list` or `tuple`
+                value = np.array(value, like=self._meta_array)
         if hasattr(value, 'shape') and len(value.shape) > 1:
             value = value.reshape(-1)
 
@@ -1644,7 +1668,7 @@ def _set_basic_selection_zd(self, selection, value, fields=None):
 
         except KeyError:
             # chunk not initialized
-            chunk = np.zeros((), dtype=self._dtype)
+            chunk = np.zeros_like(self._meta_array, shape=(), dtype=self._dtype)
             if self._fill_value is not None:
                 chunk.fill(self._fill_value)
 
@@ -1704,7 +1728,7 @@ def _set_selection(self, indexer, value, fields=None):
             pass
         else:
             if not hasattr(value, 'shape'):
-                value = np.asanyarray(value)
+                value = np.array(value, like=self._meta_array)
             check_array_shape('value', value, sel_shape)
 
         # iterate over chunks in range
@@ -1772,8 +1796,9 @@ def _process_chunk(
                 self._dtype != object):
 
             dest = out[out_selection]
+            dest_is_writable = getattr(dest, "writeable", True)
             write_direct = (
-                dest.flags.writeable and
+                dest_is_writable and
                 (
                     (self._order == 'C' and dest.flags.c_contiguous) or
                     (self._order == 'F' and dest.flags.f_contiguous)
@@ -1800,7 +1825,7 @@ def _process_chunk(
             if partial_read_decode:
                 cdata.prepare_chunk()
                 # size of chunk
-                tmp = np.empty(self._chunks, dtype=self.dtype)
+                tmp = np.empty(self._chunks, dtype=self.dtype, like=self._meta_array)
                 index_selection = PartialChunkIterator(chunk_selection, self.chunks)
                 for start, nitems, partial_out_selection in index_selection:
                     expected_shape = [
@@ -2014,7 +2039,9 @@ def _process_for_setitem(self, ckey, chunk_selection, value, fields=None):
             if is_scalar(value, self._dtype):
 
                 # setup array filled with value
-                chunk = np.empty(self._chunks, dtype=self._dtype, order=self._order)
+                chunk = np.empty(
+                    self._chunks, dtype=self._dtype, order=self._order, like=self._meta_array
+                )
                 chunk.fill(value)
 
             else:
@@ -2034,14 +2061,18 @@ def _process_for_setitem(self, ckey, chunk_selection, value, fields=None):
 
                 # chunk not initialized
                 if self._fill_value is not None:
-                    chunk = np.empty(self._chunks, dtype=self._dtype, order=self._order)
+                    chunk = np.empty(
+                        self._chunks, dtype=self._dtype, order=self._order, like=self._meta_array
+                    )
                     chunk.fill(self._fill_value)
                 elif self._dtype == object:
                     chunk = np.empty(self._chunks, dtype=self._dtype, order=self._order)
                 else:
                     # N.B., use zeros here so any region beyond the array has consistent
                     # and compressible data
-                    chunk = np.zeros(self._chunks, dtype=self._dtype, order=self._order)
+                    chunk = np.zeros(
+                        self._chunks, dtype=self._dtype, order=self._order, like=self._meta_array
+                    )
 
             else:
 
@@ -2120,7 +2151,7 @@ def _encode_chunk(self, chunk):
             cdata = chunk
 
         # ensure in-memory data is immutable and easy to compare
-        if isinstance(self.chunk_store, MutableMapping):
+        if isinstance(self.chunk_store, KVStore):
             cdata = ensure_bytes(cdata)
 
         return cdata
@@ -2371,7 +2402,7 @@ def append(self, data, axis=0):
 
         Parameters
         ----------
-        data : array_like
+        data : array-like
             Data to be appended.
         axis : int
             Axis along which to append.
@@ -2407,7 +2438,7 @@ def _append_nosync(self, data, axis=0):
 
         # ensure data is array-like
         if not hasattr(data, 'shape'):
-            data = np.asanyarray(data)
+            data = np.array(data, like=self._meta_array)
 
         # ensure shapes are compatible for non-append dimensions
         self_shape_preserved = tuple(s for i, s in enumerate(self._shape)

diff --git a/zarr/creation.py b/zarr/creation.py
@@ -19,7 +19,9 @@ def create(shape, chunks=True, dtype=None, compressor='default',
            fill_value=0, order='C', store=None, synchronizer=None,
            overwrite=False, path=None, chunk_store=None, filters=None,
            cache_metadata=True, cache_attrs=True, read_only=False,
-           object_codec=None, dimension_separator=None, write_empty_chunks=True, **kwargs):
+           object_codec=None, dimension_separator=None, write_empty_chunks=True,
+           meta_array=None,
+           **kwargs):
     """Create an array.
 
     Parameters
@@ -80,6 +82,13 @@ def create(shape, chunks=True, dtype=None, compressor='default',
         as only chunks with non-fill-value data are stored, at the expense
         of overhead associated with checking the data of each chunk.
 
+        .. versionadded:: 2.11
+
+    meta_array : array-like, optional
+        An array instance to use for determining arrays to create and return
+        to users. Use `numpy.empty(())` by default.
+
+        .. versionadded:: 2.12
 
     Returns
     -------
@@ -152,7 +161,7 @@ def create(shape, chunks=True, dtype=None, compressor='default',
     # instantiate array
     z = Array(store, path=path, chunk_store=chunk_store, synchronizer=synchronizer,
               cache_metadata=cache_metadata, cache_attrs=cache_attrs, read_only=read_only,
-              write_empty_chunks=write_empty_chunks)
+              write_empty_chunks=write_empty_chunks, meta_array=meta_array)
 
     return z
 

diff --git a/zarr/cupy.py b/zarr/cupy.py
@@ -0,0 +1,59 @@
+from numcodecs.abc import Codec
+from numcodecs.registry import get_codec, register_codec
+
+from .util import ensure_contiguous_ndarray
+
+
+class CuPyCPUCompressor(Codec):
+    """CPU compressor for CuPy arrays
+
+    This compressor converts CuPy arrays host memory before compressing
+    the arrays using `compressor`.
+
+    Parameters
+    ----------
+    compressor : numcodecs.abc.Codec
+        The codec to use for compression and decompression.
+    """
+
+    codec_id = "cupy_cpu_compressor"
+
+    def __init__(self, compressor: Codec = None):
+        self.compressor = compressor
+
+    def encode(self, buf):
+        import cupy
+
+        buf = cupy.asnumpy(ensure_contiguous_ndarray(buf))
+        if self.compressor:
+            buf = self.compressor.encode(buf)
+        return buf
+
+    def decode(self, chunk, out=None):
+        import cupy
+
+        if self.compressor:
+            cpu_out = None if out is None else cupy.asnumpy(out)
+            chunk = self.compressor.decode(chunk, cpu_out)
+
+        chunk = cupy.asarray(ensure_contiguous_ndarray(chunk))
+        if out is not None:
+            cupy.copyto(out, chunk.view(dtype=out.dtype), casting="no")
+            chunk = out
+        return chunk
+
+    def get_config(self):
+        cc_config = self.compressor.get_config() if self.compressor else None
+        return {
+            "id": self.codec_id,
+            "compressor_config": cc_config,
+        }
+
+    @classmethod
+    def from_config(cls, config):
+        cc_config = config.get("compressor_config", None)
+        compressor = get_codec(cc_config) if cc_config else None
+        return cls(compressor=compressor)
+
+
+register_codec(CuPyCPUCompressor)