diff --git a/changes/2863.feature.md b/changes/2863.feature.md new file mode 100644 index 0000000000..a8347157d9 --- /dev/null +++ b/changes/2863.feature.md @@ -0,0 +1,7 @@ +Added GPU-accelerated Zstd Codec + +This adds support for decoding with the Zstd Codec on NVIDIA GPUs using the +nvidia-nvcomp library. + +With `zarr.config.enable_gpu()`, buffers will be decoded using the GPU +and the output will reside in device memory. diff --git a/docs/user-guide/config.md b/docs/user-guide/config.md index 21fe9b5def..4ab8e33077 100644 --- a/docs/user-guide/config.md +++ b/docs/user-guide/config.md @@ -39,6 +39,25 @@ first register the implementations in the registry and then select them in the c For example, an implementation of the bytes codec in a class `'custompackage.NewBytesCodec'`, requires the value of `codecs.bytes.name` to be `'custompackage.NewBytesCodec'`. +## Codecs + +Zarr and zarr-python split the logical codec definition from the implementation. +The Zarr metadata serialized in the store specifies just the codec name and +configuration. To resolve the specific implementation, a Python class, that's +used at runtime to encode or decode data, zarr-python looks up the codec name +in the codec registry. + +For example, after calling `zarr.config.enable_gpu()`, an nvcomp-based +codec will be used: + +```python +>>> with zarr.config.enable_gpu(): +... print(zarr.config.get('codecs.zstd')) +zarr.codecs.gpu.NvcompZstdCodec +``` + +## Default Configuration + This is the current default configuration: ```python exec="true" session="config" source="above" result="ansi" diff --git a/docs/user-guide/gpu.md b/docs/user-guide/gpu.md index 3317bdf065..a54874a85b 100644 --- a/docs/user-guide/gpu.md +++ b/docs/user-guide/gpu.md @@ -2,15 +2,6 @@ Zarr can use GPUs to accelerate your workload by running `zarr.Config.enable_gpu`. -!!! note - `zarr-python` currently supports reading the ndarray data into device (GPU) - memory as the final stage of the codec pipeline. Data will still be read into - or copied to host (CPU) memory for encoding and decoding. - - In the future, codecs will be available compressing and decompressing data on - the GPU, avoiding the need to move data between the host and device for - compression and decompression. - ## Reading data into device memory [`zarr.config`][] configures Zarr to use GPU memory for the data @@ -29,3 +20,9 @@ type(z[:10, :10]) ``` Note that the output type is a `cupy.ndarray` rather than a NumPy array. + +For supported codecs, data will be decoded using the GPU via the [nvcomp] library. +See [runtime-configuration][] for more. Isseus and feature requestsfor NVIDIA nvCOMP can be reported in the nvcomp [issue tracker]. + +[nvcomp]: https://docs.nvidia.com/cuda/nvcomp/samples/python_samples.html +[issue tradcker]: https://github.com/NVIDIA/CUDALibrarySamples/issues \ No newline at end of file diff --git a/docs/user-guide/gpu.rst b/docs/user-guide/gpu.rst new file mode 100644 index 0000000000..cd4f6c5eaf --- /dev/null +++ b/docs/user-guide/gpu.rst @@ -0,0 +1,34 @@ +.. _user-guide-gpu: + +Using GPUs with Zarr +==================== + +Zarr can use GPUs to accelerate your workload by running +:meth:`zarr.config.enable_gpu`. + +Reading data into device memory +------------------------------- + +:meth:`zarr.config.enable_gpu` configures Zarr to use GPU memory for the data +buffers used internally by Zarr. + +.. code-block:: python + + >>> import zarr + >>> import cupy as cp # doctest: +SKIP + >>> zarr.config.enable_gpu() # doctest: +SKIP + >>> store = zarr.storage.MemoryStore() # doctest: +SKIP + >>> z = zarr.create_array( # doctest: +SKIP + ... store=store, shape=(100, 100), chunks=(10, 10), dtype="float32", + ... ) + >>> type(z[:10, :10]) # doctest: +SKIP + cupy.ndarray + +Note that the output type is a ``cupy.ndarray`` rather than a NumPy array. + +For supported codecs, data will be decoded using the GPU via the `nvcomp`_ +library. See :ref:`user-guide-config` for more. Isseus and feature requests +for NVIDIA nvCOMP can be reported in the `nvcomp issue tracker`_. + +.. _nvcomp: https://docs.nvidia.com/cuda/nvcomp/samples/python_samples.html +.. _nvcomp issue tracker: https://github.com/NVIDIA/CUDALibrarySamples/issues diff --git a/pyproject.toml b/pyproject.toml index 6164f69382..27d39a8e1a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -67,6 +67,7 @@ remote = [ ] gpu = [ "cupy-cuda12x", + "nvidia-nvcomp-cu12", ] cli = ["typer"] # Development extras diff --git a/src/zarr/codecs/__init__.py b/src/zarr/codecs/__init__.py index 4c621290e7..512e3252cb 100644 --- a/src/zarr/codecs/__init__.py +++ b/src/zarr/codecs/__init__.py @@ -3,6 +3,7 @@ from zarr.codecs.blosc import BloscCname, BloscCodec, BloscShuffle from zarr.codecs.bytes import BytesCodec, Endian from zarr.codecs.crc32c_ import Crc32cCodec +from zarr.codecs.gpu import NvcompZstdCodec from zarr.codecs.gzip import GzipCodec from zarr.codecs.numcodecs import ( BZ2, @@ -41,6 +42,7 @@ "Crc32cCodec", "Endian", "GzipCodec", + "NvcompZstdCodec", "ShardingCodec", "ShardingCodecIndexLocation", "TransposeCodec", diff --git a/src/zarr/codecs/gpu.py b/src/zarr/codecs/gpu.py new file mode 100644 index 0000000000..88df70fff6 --- /dev/null +++ b/src/zarr/codecs/gpu.py @@ -0,0 +1,176 @@ +from __future__ import annotations + +import asyncio +from dataclasses import dataclass +from functools import cached_property +from typing import TYPE_CHECKING + +import numpy as np + +from zarr.abc.codec import BytesBytesCodec +from zarr.core.common import JSON, parse_named_configuration +from zarr.registry import register_codec + +if TYPE_CHECKING: + from collections.abc import Iterable + from typing import Self + + from zarr.core.array_spec import ArraySpec + from zarr.core.buffer import Buffer + +try: + import cupy as cp +except ImportError: # pragma: no cover + cp = None + +try: + from nvidia import nvcomp +except ImportError: # pragma: no cover + nvcomp = None + + +def _parse_zstd_level(data: JSON) -> int: + if isinstance(data, int): + if data >= 23: + raise ValueError(f"Value must be less than or equal to 22. Got {data} instead.") + return data + raise TypeError(f"Got value with type {type(data)}, but expected an int.") + + +def _parse_checksum(data: JSON) -> bool: + if isinstance(data, bool): + return data + raise TypeError(f"Expected bool. Got {type(data)}.") + + +@dataclass(frozen=True) +class NvcompZstdCodec(BytesBytesCodec): + is_fixed_size = True + + level: int = 0 + checksum: bool = False + + def __init__(self, *, level: int = 0, checksum: bool = False) -> None: + # TODO: Set CUDA device appropriately here and also set CUDA stream + + level_parsed = _parse_zstd_level(level) + checksum_parsed = _parse_checksum(checksum) + + object.__setattr__(self, "level", level_parsed) + object.__setattr__(self, "checksum", checksum_parsed) + + @classmethod + def from_dict(cls, data: dict[str, JSON]) -> Self: + _, configuration_parsed = parse_named_configuration(data, "zstd") + return cls(**configuration_parsed) # type: ignore[arg-type] + + def to_dict(self) -> dict[str, JSON]: + return { + "name": "zstd", + "configuration": {"level": self.level, "checksum": self.checksum}, + } + + @cached_property + def _zstd_codec(self) -> nvcomp.Codec: + device = cp.cuda.Device() # Select the current default device + stream = cp.cuda.get_current_stream() # Use the current default stream + return nvcomp.Codec( + algorithm="Zstd", + bitstream_kind=nvcomp.BitstreamKind.RAW, + device_id=device.id, + cuda_stream=stream.ptr, + ) + + def _convert_to_nvcomp_arrays( + self, + chunks_and_specs: Iterable[tuple[Buffer | None, ArraySpec]], + ) -> tuple[list[nvcomp.Array], list[int]]: + none_indices = [i for i, (b, _) in enumerate(chunks_and_specs) if b is None] + filtered_inputs = [b.as_array_like() for b, _ in chunks_and_specs if b is not None] + # TODO: add CUDA stream here + return nvcomp.as_arrays(filtered_inputs), none_indices + + def _convert_from_nvcomp_arrays( + self, + arrays: Iterable[nvcomp.Array], + chunks_and_specs: Iterable[tuple[Buffer | None, ArraySpec]], + ) -> Iterable[Buffer | None]: + return [ + spec.prototype.buffer.from_array_like(cp.array(a, dtype=np.dtype("B"), copy=False)) + if a + else None + for a, (_, spec) in zip(arrays, chunks_and_specs, strict=True) + ] + + async def decode( + self, + chunks_and_specs: Iterable[tuple[Buffer | None, ArraySpec]], + ) -> Iterable[Buffer | None]: + """Decodes a batch of chunks. + Chunks can be None in which case they are ignored by the codec. + + Parameters + ---------- + chunks_and_specs : Iterable[tuple[Buffer | None, ArraySpec]] + Ordered set of encoded chunks with their accompanying chunk spec. + + Returns + ------- + Iterable[Buffer | None] + """ + chunks_and_specs = list(chunks_and_specs) + + # Convert to nvcomp arrays + filtered_inputs, none_indices = self._convert_to_nvcomp_arrays(chunks_and_specs) + + outputs = self._zstd_codec.decode(filtered_inputs) if len(filtered_inputs) > 0 else [] + + # Record event for synchronization + event = cp.cuda.Event() + # Wait for decode to complete in a separate async thread + await asyncio.to_thread(event.synchronize) + + for index in none_indices: + outputs.insert(index, None) + + return self._convert_from_nvcomp_arrays(outputs, chunks_and_specs) + + async def encode( + self, + chunks_and_specs: Iterable[tuple[Buffer | None, ArraySpec]], + ) -> Iterable[Buffer | None]: + """Encodes a batch of chunks. + Chunks can be None in which case they are ignored by the codec. + + Parameters + ---------- + chunks_and_specs : Iterable[tuple[Buffer | None, ArraySpec]] + Ordered set of to-be-encoded chunks with their accompanying chunk spec. + + Returns + ------- + Iterable[Buffer | None] + """ + # TODO: Make this actually async + chunks_and_specs = list(chunks_and_specs) + + # Convert to nvcomp arrays + filtered_inputs, none_indices = self._convert_to_nvcomp_arrays(chunks_and_specs) + + outputs = self._zstd_codec.encode(filtered_inputs) if len(filtered_inputs) > 0 else [] + + # Record event for synchronization + event = cp.cuda.Event() + # Wait for decode to complete in a separate async thread + await asyncio.to_thread(event.synchronize) + + for index in none_indices: + outputs.insert(index, None) + + return self._convert_from_nvcomp_arrays(outputs, chunks_and_specs) + + def compute_encoded_size(self, _input_byte_length: int, _chunk_spec: ArraySpec) -> int: + raise NotImplementedError + + +register_codec("zstd", NvcompZstdCodec) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 59ca8f5929..5cda8044c2 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -28,7 +28,6 @@ from zarr.codecs._v2 import V2Codec from zarr.codecs.bytes import BytesCodec from zarr.codecs.vlen_utf8 import VLenBytesCodec, VLenUTF8Codec -from zarr.codecs.zstd import ZstdCodec from zarr.core._info import ArrayInfo from zarr.core.array_spec import ArrayConfig, ArrayConfigLike, parse_array_config from zarr.core.attributes import Attributes @@ -128,6 +127,7 @@ _parse_array_array_codec, _parse_array_bytes_codec, _parse_bytes_bytes_codec, + get_codec_class, get_pipeline_class, ) from zarr.storage._common import StorePath, ensure_no_existing_node, make_store_path @@ -5036,9 +5036,9 @@ def default_compressors_v3(dtype: ZDType[Any, Any]) -> tuple[BytesBytesCodec, .. """ Given a data type, return the default compressors for that data type. - This is just a tuple containing ``ZstdCodec`` + This is just a tuple containing an instance of the default "zstd" codec class. """ - return (ZstdCodec(),) + return (cast(BytesBytesCodec, get_codec_class("zstd")()),) def default_serializer_v3(dtype: ZDType[Any, Any]) -> ArrayBytesCodec: diff --git a/src/zarr/core/buffer/gpu.py b/src/zarr/core/buffer/gpu.py index bfe977c50f..f0242ee8b4 100644 --- a/src/zarr/core/buffer/gpu.py +++ b/src/zarr/core/buffer/gpu.py @@ -8,9 +8,6 @@ cast, ) -import numpy as np -import numpy.typing as npt - from zarr.core.buffer import core from zarr.core.buffer.core import ArrayLike, BufferPrototype, NDArrayLike from zarr.errors import ZarrUserWarning @@ -23,8 +20,9 @@ from collections.abc import Iterable from typing import Self - from zarr.core.common import BytesLike + import numpy.typing as npt + from zarr.core.common import BytesLike try: import cupy as cp except ImportError: @@ -54,14 +52,14 @@ class Buffer(core.Buffer): def __init__(self, array_like: ArrayLike) -> None: if cp is None: - raise ImportError( + raise ImportError( # pragma: no cover "Cannot use zarr.buffer.gpu.Buffer without cupy. Please install cupy." ) if array_like.ndim != 1: raise ValueError("array_like: only 1-dim allowed") - if array_like.dtype != np.dtype("B"): - raise ValueError("array_like: only byte dtype allowed") + if array_like.dtype.itemsize != 1: + raise ValueError("array_like: only dtypes with itemsize=1 allowed") if not hasattr(array_like, "__cuda_array_interface__"): # Slow copy based path for arrays that don't support the __cuda_array_interface__ @@ -108,13 +106,13 @@ def as_numpy_array(self) -> npt.NDArray[Any]: return cast("npt.NDArray[Any]", cp.asnumpy(self._data)) def __add__(self, other: core.Buffer) -> Self: - other_array = other.as_array_like() - assert other_array.dtype == np.dtype("B") - gpu_other = Buffer(other_array) - gpu_other_array = gpu_other.as_array_like() - return self.__class__( - cp.concatenate((cp.asanyarray(self._data), cp.asanyarray(gpu_other_array))) - ) + other_array = cp.asanyarray(other.as_array_like()) + left = self._data + if left.dtype != other_array.dtype: + other_array = other_array.view(left.dtype) + + buffer = cp.concatenate([left, other_array]) + return type(self)(buffer) class NDBuffer(core.NDBuffer): @@ -144,7 +142,7 @@ class NDBuffer(core.NDBuffer): def __init__(self, array: NDArrayLike) -> None: if cp is None: - raise ImportError( + raise ImportError( # pragma: no cover "Cannot use zarr.buffer.gpu.NDBuffer without cupy. Please install cupy." ) diff --git a/src/zarr/core/config.py b/src/zarr/core/config.py index 5d463ec79c..a3cc9bb2eb 100644 --- a/src/zarr/core/config.py +++ b/src/zarr/core/config.py @@ -74,7 +74,15 @@ def enable_gpu(self) -> ConfigSet: Configure Zarr to use GPUs where possible. """ return self.set( - {"buffer": "zarr.buffer.gpu.Buffer", "ndbuffer": "zarr.buffer.gpu.NDBuffer"} + { + "buffer": "zarr.buffer.gpu.Buffer", + "ndbuffer": "zarr.buffer.gpu.NDBuffer", + "codecs": {"zstd": "zarr.codecs.gpu.NvcompZstdCodec"}, + "codec_pipeline": { + "path": "zarr.core.codec_pipeline.BatchedCodecPipeline", + "batch_size": 65536, + }, + } ) diff --git a/tests/test_api.py b/tests/test_api.py index 30f648a815..e96bd2f4bb 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -14,6 +14,7 @@ from collections.abc import Callable from pathlib import Path + from zarr.abc.codec import Codec from zarr.abc.store import Store from zarr.core.common import JSON, MemoryOrder, ZarrFormat @@ -41,6 +42,7 @@ save_array, save_group, ) +from zarr.codecs import NvcompZstdCodec from zarr.core.buffer import NDArrayLike from zarr.errors import ( ArrayNotFoundError, @@ -1390,14 +1392,15 @@ def test_api_exports() -> None: assert zarr.api.asynchronous.__all__ == zarr.api.synchronous.__all__ -@gpu_test +@gpu_test # type: ignore[misc,unused-ignore] @pytest.mark.parametrize( "store", ["local", "memory", "zip"], indirect=True, ) @pytest.mark.parametrize("zarr_format", [None, 2, 3]) -def test_gpu_basic(store: Store, zarr_format: ZarrFormat | None) -> None: +@pytest.mark.parametrize("codec", ["auto", NvcompZstdCodec()]) +def test_gpu_basic(store: Store, zarr_format: ZarrFormat | None, codec: str | Codec) -> None: import cupy as cp if zarr_format == 2: @@ -1405,7 +1408,7 @@ def test_gpu_basic(store: Store, zarr_format: ZarrFormat | None) -> None: # array to bytes. compressors = None else: - compressors = "auto" + compressors = codec with zarr.config.enable_gpu(): src = cp.random.uniform(size=(100, 100)) # allocate on the device diff --git a/tests/test_buffer.py b/tests/test_buffer.py index b50e5abb67..43321ea2b0 100644 --- a/tests/test_buffer.py +++ b/tests/test_buffer.py @@ -193,6 +193,19 @@ def test_numpy_buffer_prototype() -> None: ndbuffer.as_scalar() +@gpu_test +def test_gpu_buffer_raises() -> None: + import cupy as cp + + arr = cp.empty((10, 10), dtype="B") + with pytest.raises(ValueError, match="array_like: only 1-dim allowed"): + gpu.Buffer(arr) + + arr = cp.arange(12, dtype="int32") + with pytest.raises(ValueError, match="array_like: only dtypes"): + gpu.Buffer(arr) + + @gpu_test def test_gpu_buffer_prototype() -> None: buffer = gpu.buffer_prototype.buffer.create_zero_length() diff --git a/tests/test_codecs/test_codecs.py b/tests/test_codecs/test_codecs.py index 1884d501a5..7d7b4ed8aa 100644 --- a/tests/test_codecs/test_codecs.py +++ b/tests/test_codecs/test_codecs.py @@ -16,12 +16,14 @@ GzipCodec, ShardingCodec, TransposeCodec, + ZstdCodec, ) from zarr.core.buffer import default_buffer_prototype from zarr.core.indexing import BasicSelection, morton_order_iter from zarr.core.metadata.v3 import ArrayV3Metadata from zarr.dtype import UInt8 from zarr.errors import ZarrUserWarning +from zarr.registry import register_codec from zarr.storage import StorePath if TYPE_CHECKING: @@ -362,3 +364,22 @@ async def test_resize(store: Store) -> None: assert await store.get(f"{path}/0.1", prototype=default_buffer_prototype()) is not None assert await store.get(f"{path}/1.0", prototype=default_buffer_prototype()) is None assert await store.get(f"{path}/1.1", prototype=default_buffer_prototype()) is None + + +def test_uses_default_codec() -> None: + class MyZstdCodec(ZstdCodec): + pass + + register_codec("zstd", MyZstdCodec) + + with zarr.config.set( + {"codecs": {"zstd": f"{MyZstdCodec.__module__}.{MyZstdCodec.__qualname__}"}} + ): + a = zarr.create_array( + StorePath(zarr.storage.MemoryStore(), path="mycodec"), + shape=(10, 10), + chunks=(10, 10), + dtype="int32", + ) + assert a.metadata.zarr_format == 3 + assert isinstance(a.metadata.codecs[-1], MyZstdCodec) diff --git a/tests/test_codecs/test_nvcomp.py b/tests/test_codecs/test_nvcomp.py new file mode 100644 index 0000000000..9b5554d9a3 --- /dev/null +++ b/tests/test_codecs/test_nvcomp.py @@ -0,0 +1,190 @@ +import contextlib +import typing +from collections.abc import Iterator + +import numpy as np +import pytest + +import zarr +from zarr.abc.store import Store +from zarr.buffer.gpu import buffer_prototype +from zarr.codecs import NvcompZstdCodec +from zarr.core.array_spec import ArrayConfig, ArraySpec +from zarr.storage import StorePath +from zarr.testing.utils import gpu_test + +if typing.TYPE_CHECKING: + from zarr.core.common import JSON + + +# the type-ignores here are here thanks to not reliably having GPU +# libraries in the pre-commit mypy environment. + + +@gpu_test # type: ignore[misc,unused-ignore] +@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) +@pytest.mark.parametrize( + "checksum", + [ + False, + ], +) +@pytest.mark.parametrize( + "selection", + [ + (slice(None), slice(None)), # everything + (slice(4, None), slice(4, None)), # top-left chunk is empty + ], +) +def test_nvcomp_zstd(store: Store, checksum: bool, selection: tuple[slice, slice]) -> None: + import cupy as cp + + with zarr.config.enable_gpu(): + data = cp.arange(0, 256, dtype="uint16").reshape((16, 16)) + + a = zarr.create_array( + StorePath(store, path="nvcomp_zstd"), + shape=data.shape, + chunks=(4, 4), + dtype=data.dtype, + fill_value=0, + compressors=NvcompZstdCodec(level=0, checksum=checksum), + ) + + a[*selection] = data[*selection] + + if selection == (slice(None), slice(None)): + cp.testing.assert_array_equal(data[*selection], a[*selection]) + cp.testing.assert_array_equal(data[:, :], a[:, :]) + else: + assert a.nchunks_initialized < a.nchunks + expected = cp.full(data.shape, a.fill_value) + expected[*selection] = data[*selection] + cp.testing.assert_array_equal(expected[*selection], a[*selection]) + cp.testing.assert_array_equal(expected[:, :], a[:, :]) + + +@gpu_test # type: ignore[misc,unused-ignore] +@pytest.mark.parametrize("host_encode", [True, False]) +def test_gpu_codec_compatibility(host_encode: bool) -> None: + # Ensure that the we can decode CPU-encoded data with the GPU + # and GPU-encoded data with the CPU + import cupy as cp + + @contextlib.contextmanager + def gpu_context() -> Iterator[None]: + with zarr.config.enable_gpu(): + yield + + if host_encode: + # CPU encode, GPU decode + write_ctx: contextlib.AbstractContextManager[None] = contextlib.nullcontext() + read_ctx: contextlib.AbstractContextManager[None] = gpu_context() + write_data = np.arange(16, dtype="int32").reshape(4, 4) + read_data = cp.array(write_data) + xp = cp + # MemoryStore holds Buffers; We write a CPU buffer, but read a GPU buffer + # which emits a warning. + expected_warning: pytest.WarningsRecorder | contextlib.AbstractContextManager[None] = ( + pytest.warns(zarr.errors.ZarrUserWarning) + ) + else: + # GPU encode, CPU decode + write_ctx = gpu_context() + read_ctx = contextlib.nullcontext() + write_data = cp.arange(16, dtype="int32").reshape(4, 4) + read_data = write_data.get() + xp = np + expected_warning = contextlib.nullcontext() + + store = zarr.storage.MemoryStore() + + with write_ctx: + z = zarr.create_array( + store=store, + shape=write_data.shape, + chunks=(4, 4), + dtype=write_data.dtype, + ) + z[:] = write_data + + with read_ctx, expected_warning: + # We need to reopen z, because `z.codec_pipeline` is set at creation + z = zarr.open_array(store=store, mode="r") + result = z[:] + assert isinstance(result, type(read_data)) + xp.testing.assert_array_equal(result, read_data) + + +@gpu_test # type: ignore[misc,unused-ignore] +def test_invalid_raises() -> None: + with pytest.raises(ValueError): + NvcompZstdCodec(level=100, checksum=False) + + with pytest.raises(TypeError): + NvcompZstdCodec(level="100", checksum=False) # type: ignore[arg-type,unused-ignore] + + with pytest.raises(TypeError): + NvcompZstdCodec(checksum="False") # type: ignore[arg-type,unused-ignore] + + +@gpu_test # type: ignore[misc,unused-ignore] +def test_uses_default_codec() -> None: + with zarr.config.enable_gpu(): + a = zarr.create_array( + StorePath(zarr.storage.MemoryStore(), path="nvcomp_zstd"), + shape=(10, 10), + chunks=(10, 10), + dtype="int32", + ) + assert a.metadata.zarr_format == 3 + assert isinstance(a.metadata.codecs[-1], NvcompZstdCodec) + + +@gpu_test # type: ignore[misc,unused-ignore] +def test_nvcomp_from_dict() -> None: + config: dict[str, JSON] = { + "name": "zstd", + "configuration": { + "level": 0, + "checksum": False, + }, + } + codec = NvcompZstdCodec.from_dict(config) + assert codec.level == 0 + assert codec.checksum is False + + +@gpu_test # type: ignore[misc,unused-ignore] +def test_compute_encoded_chunk_size() -> None: + codec = NvcompZstdCodec(level=0, checksum=False) + with pytest.raises(NotImplementedError): + codec.compute_encoded_size( + _input_byte_length=0, + _chunk_spec=ArraySpec( + shape=(10, 10), + dtype=zarr.core.dtype.npy.int.Int32(), + fill_value=0, + config=ArrayConfig(order="C", write_empty_chunks=False), + prototype=buffer_prototype, + ), + ) + + +@gpu_test # type: ignore[misc,unused-ignore] +async def test_nvcomp_zstd_encode_none() -> None: + codec = NvcompZstdCodec(level=0, checksum=False) + chunks_and_specs = [ + ( + None, + ArraySpec( + shape=(10, 10), + dtype=zarr.core.dtype.npy.int.Int32(), + fill_value=0, + config=ArrayConfig(order="C", write_empty_chunks=False), + prototype=buffer_prototype, + ), + ) + ] + result = await codec.encode(chunks_and_specs) + assert result == [None]