From 8808f9149ae1b407c20cbbf5193544b77532d8a8 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sun, 17 Aug 2025 11:23:53 -0700 Subject: [PATCH] Optimize getitem with empty chunks --- changes/3368.misc.rst | 2 ++ src/zarr/abc/codec.py | 5 +++++ src/zarr/codecs/sharding.py | 6 ++---- src/zarr/core/array.py | 3 +-- 4 files changed, 10 insertions(+), 6 deletions(-) create mode 100644 changes/3368.misc.rst diff --git a/changes/3368.misc.rst b/changes/3368.misc.rst new file mode 100644 index 0000000000..92c90cff33 --- /dev/null +++ b/changes/3368.misc.rst @@ -0,0 +1,2 @@ +Improved performance of reading arrays by not unnecessarily using +the fill value. diff --git a/src/zarr/abc/codec.py b/src/zarr/abc/codec.py index d5c995d2ca..ec100af4e5 100644 --- a/src/zarr/abc/codec.py +++ b/src/zarr/abc/codec.py @@ -423,6 +423,11 @@ async def read( The second slice selection determines where in the output array the chunk data will be written. The ByteGetter is used to fetch the necessary bytes. The chunk spec contains information about the construction of an array from the bytes. + + If the Store returns ``None`` for a chunk, then the chunk was not + written and the implementation must set the values of that chunk (or + ``out``) to the fill value for the array. + out : NDBuffer """ ... diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py index 888d258649..6501942a31 100644 --- a/src/zarr/codecs/sharding.py +++ b/src/zarr/codecs/sharding.py @@ -452,11 +452,10 @@ async def _decode_single( ) # setup output array - out = chunk_spec.prototype.nd_buffer.create( + out = chunk_spec.prototype.nd_buffer.empty( shape=shard_shape, dtype=shard_spec.dtype.to_native_dtype(), order=shard_spec.order, - fill_value=0, ) shard_dict = await _ShardReader.from_bytes(shard_bytes, self, chunks_per_shard) @@ -499,11 +498,10 @@ async def _decode_partial_single( ) # setup output array - out = shard_spec.prototype.nd_buffer.create( + out = shard_spec.prototype.nd_buffer.empty( shape=indexer.shape, dtype=shard_spec.dtype.to_native_dtype(), order=shard_spec.order, - fill_value=0, ) indexed_chunks = list(indexer) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 2ce33df7ba..8bf6c089dd 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -1350,11 +1350,10 @@ async def _get_selection( f"shape of out argument doesn't match. Expected {indexer.shape}, got {out.shape}" ) else: - out_buffer = prototype.nd_buffer.create( + out_buffer = prototype.nd_buffer.empty( shape=indexer.shape, dtype=out_dtype, order=self.order, - fill_value=self.metadata.fill_value, ) if product(indexer.shape) > 0: # need to use the order from the metadata for v2