Description
What happened?
I opened a hsds dataset with only a string in it on hsds. It fails with the exception below.
What did you expect to happen?
The dataset would have opened.
Minimal Complete Verifiable Example
# This requires a configured writable hsds server in ~/.hscfg
import h5pyd
import xarray as xr
with h5pyd.File("/test/rho/y.h5", "w") as f:
f.create_dataset('foo', (), dtype=bytes)
xr.open_datatree("/test/rho/y.h5", engine='h5netcdf', driver='h5pyd',)
[...]
File ~/git/xarray/xarray/backends/h5netcdf_.py:222, in H5NetCDFStore.open_store_variable(self, name, var)
216 encoding = {
217 "chunksizes": var.chunks,
218 "fletcher32": var.fletcher32,
219 "shuffle": var.shuffle,
220 }
221 if var.chunks:
--> 222 encoding["preferred_chunks"] = dict(
223 zip(var.dimensions, var.chunks, strict=True)
224 )
225 # Convert h5py-style compression options to NetCDF4-Python
226 # style, if possible
227 if var.compression == "gzip":
ValueError: zip() argument 2 is longer than argument 1
MVCE confirmation
- Minimal example — the example is as focused as reasonably possible to demonstrate the underlying issue in xarray.
- Complete example — the example is self-contained, including all data and the text of any traceback.
- Verifiable example — the example copy & pastes into an IPython prompt or Binder notebook, returning the result.
- New issue — a search of GitHub Issues suggests this is not a duplicate.
- Recent environment — the issue occurs with the latest version of xarray and its dependencies.
Relevant log output
Cell In[36], line 4
2 with h5pyd.File("/test/rho/y.h5", "w") as f:
3 f.create_dataset('foo', (), dtype=bytes)
----> 4 xr.open_datatree("/test/rho/y.h5", engine='h5netcdf', driver='h5pyd',)
File ~/git/xarray/xarray/backends/api.py:1130, in open_datatree(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, inline_array, chunked_array_type, from_array_kwargs, backend_kwargs, **kwargs)
1118 decoders = _resolve_decoders_kwargs(
1119 decode_cf,
1120 open_backend_dataset_parameters=(),
(...) 1126 decode_coords=decode_coords,
1127 )
1128 overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
-> 1130 backend_tree = backend.open_datatree(
1131 filename_or_obj,
1132 drop_variables=drop_variables,
1133 **decoders,
1134 **kwargs,
1135 )
1137 tree = _datatree_from_backend_datatree(
1138 backend_tree,
1139 filename_or_obj,
(...) 1149 **kwargs,
1150 )
1152 return tree
File ~/git/xarray/xarray/backends/h5netcdf_.py:514, in H5netcdfBackendEntrypoint.open_datatree(self, filename_or_obj, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, use_cftime, decode_timedelta, format, group, lock, invalid_netcdf, phony_dims, decode_vlen_strings, driver, driver_kwds, **kwargs)
493 def open_datatree(
494 self,
495 filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
(...) 512 **kwargs,
513 ) -> DataTree:
--> 514 groups_dict = self.open_groups_as_dict(
515 filename_or_obj,
516 mask_and_scale=mask_and_scale,
517 decode_times=decode_times,
518 concat_characters=concat_characters,
519 decode_coords=decode_coords,
520 drop_variables=drop_variables,
521 use_cftime=use_cftime,
522 decode_timedelta=decode_timedelta,
523 format=format,
524 group=group,
525 lock=lock,
526 invalid_netcdf=invalid_netcdf,
527 phony_dims=phony_dims,
528 decode_vlen_strings=decode_vlen_strings,
529 driver=driver,
530 driver_kwds=driver_kwds,
531 **kwargs,
532 )
534 return datatree_from_dict_with_io_cleanup(groups_dict)
File ~/git/xarray/xarray/backends/h5netcdf_.py:590, in H5netcdfBackendEntrypoint.open_groups_as_dict(self, filename_or_obj, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, use_cftime, decode_timedelta, format, group, lock, invalid_netcdf, phony_dims, decode_vlen_strings, driver, driver_kwds, **kwargs)
588 store_entrypoint = StoreBackendEntrypoint()
589 with close_on_error(group_store):
--> 590 group_ds = store_entrypoint.open_dataset(
591 group_store,
592 mask_and_scale=mask_and_scale,
593 decode_times=decode_times,
594 concat_characters=concat_characters,
595 decode_coords=decode_coords,
596 drop_variables=drop_variables,
597 use_cftime=use_cftime,
598 decode_timedelta=decode_timedelta,
599 )
601 if group:
602 group_name = str(NodePath(path_group).relative_to(parent))
File ~/git/xarray/xarray/backends/store.py:44, in StoreBackendEntrypoint.open_dataset(self, filename_or_obj, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, use_cftime, decode_timedelta)
30 def open_dataset(
31 self,
32 filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
(...) 40 decode_timedelta=None,
41 ) -> Dataset:
42 assert isinstance(filename_or_obj, AbstractDataStore)
---> 44 vars, attrs = filename_or_obj.load()
45 encoding = filename_or_obj.get_encoding()
47 vars, attrs, coord_names = conventions.decode_cf_variables(
48 vars,
49 attrs,
(...) 56 decode_timedelta=decode_timedelta,
57 )
File ~/git/xarray/xarray/backends/common.py:312, in AbstractDataStore.load(self)
293 def load(self):
294 """
295 This loads the variables and attributes simultaneously.
296 A centralized loading function makes it easier to create
(...) 309 are requested, so care should be taken to make sure its fast.
310 """
311 variables = FrozenDict(
--> 312 (_decode_variable_name(k), v) for k, v in self.get_variables().items()
313 )
314 attributes = FrozenDict(self.get_attrs())
315 return variables, attributes
File ~/git/xarray/xarray/backends/h5netcdf_.py:262, in H5NetCDFStore.get_variables(self)
261 def get_variables(self):
--> 262 return FrozenDict(
263 (k, self.open_store_variable(k, v)) for k, v in self.ds.variables.items()
264 )
File ~/git/xarray/xarray/core/utils.py:457, in FrozenDict(*args, **kwargs)
456 def FrozenDict(*args, **kwargs) -> Frozen:
--> 457 return Frozen(dict(*args, **kwargs))
File ~/git/xarray/xarray/backends/h5netcdf_.py:263, in <genexpr>(.0)
261 def get_variables(self):
262 return FrozenDict(
--> 263 (k, self.open_store_variable(k, v)) for k, v in self.ds.variables.items()
264 )
File ~/git/xarray/xarray/backends/h5netcdf_.py:222, in H5NetCDFStore.open_store_variable(self, name, var)
216 encoding = {
217 "chunksizes": var.chunks,
218 "fletcher32": var.fletcher32,
219 "shuffle": var.shuffle,
220 }
221 if var.chunks:
--> 222 encoding["preferred_chunks"] = dict(
223 zip(var.dimensions, var.chunks, strict=True)
224 )
225 # Convert h5py-style compression options to NetCDF4-Python
226 # style, if possible
227 if var.compression == "gzip":
ValueError: zip() argument 2 is longer than argument 1
Anything else we need to know?
Running the example requires a running writable hsds server, and the latest (prerelease) h5netcdf package at https://github.com/h5netcdf/h5netcdf . Install with e.g.
pip install git+https://github.com/h5netcdf/h5netcdf
Environment
INSTALLED VERSIONS
commit: None
python: 3.12.9 | packaged by conda-forge | (main, Feb 14 2025, 08:00:06) [GCC 13.3.0]
python-bits: 64
OS: Linux
OS-release: 6.8.0-53-generic
machine: x86_64
processor: x86_64
byteorder: little
LC_ALL: None
LANG: en_US.UTF-8
LOCALE: ('en_US', 'UTF-8')
libhdf5: 1.14.6
libnetcdf: None
xarray: 0.1.dev5847+g0184702
pandas: 2.2.3
numpy: 2.2.3
scipy: None
netCDF4: None
pydap: None
h5netcdf: 0.1.0.dev278+g0622911
h5py: 3.13.0
zarr: None
cftime: None
nc_time_axis: None
iris: None
bottleneck: None
dask: None
distributed: None
matplotlib: None
cartopy: None
seaborn: None
numbagg: None
fsspec: 2025.2.0
cupy: None
pint: None
sparse: None
flox: None
numpy_groupies: None
setuptools: 75.8.2
pip: 25.0.1
conda: None
pytest: None
mypy: None
IPython: 9.0.0
sphinx: None