diff --git a/CHANGES.md b/CHANGES.md index 97939edfc..f0f19b19e 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,6 +1,10 @@ ## Changes in 0.5.0.devX (in development) -* From 0.4.1: Fixed time-series performance drop (#299). +### Fixes +* From 0.4.1: Fixed time-series performance drop (#299). + +* Fixed `xcube gen` CLI tool to correctly insert time slices into an + existing cube stored as Zarr (#317). * When creating an ImageGeom from a dataset, correct the height if it would otherwise give a maximum latitude >90°. diff --git a/test/core/gen/test_gen.py b/test/core/gen/test_gen.py index a5877f34c..c976408b4 100644 --- a/test/core/gen/test_gen.py +++ b/test/core/gen/test_gen.py @@ -11,7 +11,7 @@ def clean_up(): - files = ['l2c-single.nc', 'l2c-single.zarr', 'l2c.nc', 'l2c.zarr', 'l2c_1x80x60.zarr', + files = ['l2c-single.nc', 'l2c-single.zarr', 'l2c.nc', 'l2c.zarr', 'l2c_presorted.zarr', 'l2c_1x80x60.zarr', 'l2c_1x80x80.zarr', 'l2c_packed.zarr', 'l2c_packed_1x80x80.zarr'] for file in files: rimraf(file) @@ -105,6 +105,14 @@ def test_process_inputs_append_multiple_zarr(self): time_coverage_end='2017-01-03T12:00:00.000000000')) def test_process_inputs_insert_multiple_zarr(self): + status, output = gen_cube_wrapper( + [get_inputdata_path('20170102-IFR-L4_GHRSST-SSTfnd-ODYSSEA-NWE_002-v2.0-fv1.0.nc'), + get_inputdata_path('20170103-IFR-L4_GHRSST-SSTfnd-ODYSSEA-NWE_002-v2.0-fv1.0.nc'), + get_inputdata_path('20170101-IFR-L4_GHRSST-SSTfnd-ODYSSEA-NWE_002-v2.0-fv1.0.nc')], 'l2c_presorted.zarr') + self.assertEqual(True, status) + self.assertTrue('\nstep 9 of 9: creating input slice in l2c_presorted.zarr...\n' in output) + self.assertTrue('\nstep 9 of 9: appending input slice to l2c_presorted.zarr...\n' in output) + self.assertFalse('\nstep 9 of 9: inserting input slice before index 0 in l2c_presorted.zarr...\n' in output) status, output = gen_cube_wrapper( [get_inputdata_path('20170102-IFR-L4_GHRSST-SSTfnd-ODYSSEA-NWE_002-v2.0-fv1.0.nc'), get_inputdata_path('20170103-IFR-L4_GHRSST-SSTfnd-ODYSSEA-NWE_002-v2.0-fv1.0.nc'), @@ -119,6 +127,9 @@ def test_process_inputs_insert_multiple_zarr(self): expected_extra_attrs=dict(date_modified=None, time_coverage_start='2016-12-31T12:00:00.000000000', time_coverage_end='2017-01-03T12:00:00.000000000')) + ds_presorted = xr.open_zarr('l2c_presorted.zarr') + ds_insert = xr.open_zarr('l2c.zarr') + np.testing.assert_allclose(ds_presorted.analysed_sst.values, ds_insert.analysed_sst.values) def test_process_inputs_replace_multiple_zarr(self): status, output = gen_cube_wrapper( @@ -249,6 +260,81 @@ def test_process_packed_zarr(self): self.assertEqual(65535, ds_packed.analysed_sst.encoding['_FillValue']) self.assertEqual((1, 80, 80), ds_packed.analysed_sst.encoding['chunks']) + def test_insert_packed_zarr(self): + status, output = gen_cube_wrapper( + [get_inputdata_path('20170102-IFR-L4_GHRSST-SSTfnd-ODYSSEA-NWE_002-v2.0-fv1.0.nc'), + get_inputdata_path('20170103-IFR-L4_GHRSST-SSTfnd-ODYSSEA-NWE_002-v2.0-fv1.0.nc'), + get_inputdata_path('20170101-IFR-L4_GHRSST-SSTfnd-ODYSSEA-NWE_002-v2.0-fv1.0.nc')], 'l2c_presorted.zarr', + output_writer_params={'chunksizes': {'lon': 80, 'lat': 80}, + 'packing': {'analysed_sst': {'scale_factor': 0.07324442274239326, + 'add_offset': -300.0, + 'dtype': 'uint16', + '_FillValue': 65535}}}) + self.assertEqual(True, status) + self.assertTrue('\nstep 9 of 9: creating input slice in l2c_presorted.zarr...\n' in output) + self.assertTrue('\nstep 9 of 9: appending input slice to l2c_presorted.zarr...\n' in output) + self.assertFalse('\nstep 9 of 9: inserting input slice before index 0 in l2c_presorted.zarr...\n' in output) + status, output = gen_cube_wrapper( + [get_inputdata_path('20170102-IFR-L4_GHRSST-SSTfnd-ODYSSEA-NWE_002-v2.0-fv1.0.nc'), + get_inputdata_path('20170103-IFR-L4_GHRSST-SSTfnd-ODYSSEA-NWE_002-v2.0-fv1.0.nc'), + get_inputdata_path('20170101-IFR-L4_GHRSST-SSTfnd-ODYSSEA-NWE_002-v2.0-fv1.0.nc')], 'l2c.zarr', + output_writer_params={'chunksizes': {'lon': 80, 'lat': 80}, + 'packing': {'analysed_sst': {'scale_factor': 0.07324442274239326, + 'add_offset': -300.0, + 'dtype': 'uint16', + '_FillValue': 65535}}}, + no_sort_mode=True) + self.assertEqual(True, status) + self.assertTrue('\nstep 9 of 9: creating input slice in l2c.zarr...\n' in output) + self.assertTrue('\nstep 9 of 9: appending input slice to l2c.zarr...\n' in output) + self.assertTrue('\nstep 9 of 9: inserting input slice before index 0 in l2c.zarr...\n' in output) + self.assert_cube_ok(xr.open_zarr('l2c.zarr'), + expected_time_dim=3, + expected_extra_attrs=dict(date_modified=None, + time_coverage_start='2016-12-31T12:00:00.000000000', + time_coverage_end='2017-01-03T12:00:00.000000000')) + ds_presorted = xr.open_zarr('l2c_presorted.zarr') + ds_insert = xr.open_zarr('l2c.zarr') + np.testing.assert_allclose(ds_presorted.analysed_sst.values, ds_insert.analysed_sst.values) + + def test_replace_packed_zarr(self): + status, output = gen_cube_wrapper( + [get_inputdata_path('20170102-IFR-L4_GHRSST-SSTfnd-ODYSSEA-NWE_002-v2.0-fv1.0.nc'), + get_inputdata_path('20170103-IFR-L4_GHRSST-SSTfnd-ODYSSEA-NWE_002-v2.0-fv1.0.nc'), + get_inputdata_path('20170101-IFR-L4_GHRSST-SSTfnd-ODYSSEA-NWE_002-v2.0-fv1.0.nc')], 'l2c_presorted.zarr', + output_writer_params={'chunksizes': {'lon': 80, 'lat': 80}, + 'packing': {'analysed_sst': {'scale_factor': 0.07324442274239326, + 'add_offset': -300.0, + 'dtype': 'uint16', + '_FillValue': 65535}}}) + self.assertEqual(True, status) + self.assertTrue('\nstep 9 of 9: creating input slice in l2c_presorted.zarr...\n' in output) + self.assertTrue('\nstep 9 of 9: appending input slice to l2c_presorted.zarr...\n' in output) + self.assertFalse('\nstep 9 of 9: inserting input slice before index 0 in l2c_presorted.zarr...\n' in output) + status, output = gen_cube_wrapper( + [get_inputdata_path('20170101-IFR-L4_GHRSST-SSTfnd-ODYSSEA-NWE_002-v2.0-fv1.0.nc'), + get_inputdata_path('20170102-IFR-L4_GHRSST-SSTfnd-ODYSSEA-NWE_002-v2.0-fv1.0.nc'), + get_inputdata_path('20170103-IFR-L4_GHRSST-SSTfnd-ODYSSEA-NWE_002-v2.0-fv1.0.nc'), + get_inputdata_path('20170102-IFR-L4_GHRSST-SSTfnd-ODYSSEA-NWE_002-v2.0-fv1.0.nc')], 'l2c.zarr', + output_writer_params={'chunksizes': {'lon': 80, 'lat': 80}, + 'packing': {'analysed_sst': {'scale_factor': 0.07324442274239326, + 'add_offset': -300.0, + 'dtype': 'uint16', + '_FillValue': 65535}}}, + no_sort_mode=True) + self.assertEqual(True, status) + self.assertTrue('\nstep 9 of 9: creating input slice in l2c.zarr...\n' in output) + self.assertTrue('\nstep 9 of 9: appending input slice to l2c.zarr...\n' in output) + self.assertTrue('\nstep 9 of 9: replacing input slice at index 1 in l2c.zarr...\n' in output) + self.assert_cube_ok(xr.open_zarr('l2c.zarr'), + expected_time_dim=3, + expected_extra_attrs=dict(date_modified=None, + time_coverage_start='2016-12-31T12:00:00.000000000', + time_coverage_end='2017-01-03T12:00:00.000000000')) + ds_presorted = xr.open_zarr('l2c_presorted.zarr') + ds_insert = xr.open_zarr('l2c.zarr') + np.testing.assert_allclose(ds_presorted.analysed_sst.values, ds_insert.analysed_sst.values) + def test_process_compressed_zarr(self): status, output = gen_cube_wrapper( [get_inputdata_path('201701??-IFR-L4_GHRSST-SSTfnd-ODYSSEA-NWE_002-v2.0-fv1.0.nc')], diff --git a/xcube/core/timeslice.py b/xcube/core/timeslice.py index 9878b34d3..bd13f2763 100644 --- a/xcube/core/timeslice.py +++ b/xcube/core/timeslice.py @@ -145,6 +145,7 @@ def update_time_slice(store: Union[str, MutableMapping], insert_mode = mode == 'insert' time_var_names = [] + encoding = {} with xr.open_zarr(store) as cube: for var_name in cube.variables: var = cube[var_name] @@ -152,12 +153,12 @@ def update_time_slice(store: Union[str, MutableMapping], if var.dims[0] != 'time': raise ValueError(f"dimension 'time' of variable {var_name!r} must be first dimension") time_var_names.append(var_name) + encoding[var_name] = cube[var_name].encoding if chunk_sizes: time_slice = chunk_dataset(time_slice, chunk_sizes, format_name='zarr') - temp_dir = tempfile.TemporaryDirectory(prefix='xcube-time-slice-', suffix='.zarr') - time_slice.to_zarr(temp_dir.name) + time_slice.to_zarr(temp_dir.name, encoding=encoding) slice_root_group = zarr.open(temp_dir.name, mode='r') slice_arrays = dict(slice_root_group.arrays())