From 1f56861244726af600b9f75018c95783d4600f87 Mon Sep 17 00:00:00 2001 From: AliceBalfanz Date: Tue, 23 Jun 2020 10:26:38 +0200 Subject: [PATCH 1/4] Added test for case, that input slice is inserted into a cube with variable packing --- test/core/gen/test_gen.py | 50 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) diff --git a/test/core/gen/test_gen.py b/test/core/gen/test_gen.py index a5877f34c..7a1372027 100644 --- a/test/core/gen/test_gen.py +++ b/test/core/gen/test_gen.py @@ -11,7 +11,7 @@ def clean_up(): - files = ['l2c-single.nc', 'l2c-single.zarr', 'l2c.nc', 'l2c.zarr', 'l2c_1x80x60.zarr', + files = ['l2c-single.nc', 'l2c-single.zarr', 'l2c.nc', 'l2c.zarr', 'l2c_presorted.zarr', 'l2c_1x80x60.zarr', 'l2c_1x80x80.zarr', 'l2c_packed.zarr', 'l2c_packed_1x80x80.zarr'] for file in files: rimraf(file) @@ -105,6 +105,14 @@ def test_process_inputs_append_multiple_zarr(self): time_coverage_end='2017-01-03T12:00:00.000000000')) def test_process_inputs_insert_multiple_zarr(self): + status, output = gen_cube_wrapper( + [get_inputdata_path('20170102-IFR-L4_GHRSST-SSTfnd-ODYSSEA-NWE_002-v2.0-fv1.0.nc'), + get_inputdata_path('20170103-IFR-L4_GHRSST-SSTfnd-ODYSSEA-NWE_002-v2.0-fv1.0.nc'), + get_inputdata_path('20170101-IFR-L4_GHRSST-SSTfnd-ODYSSEA-NWE_002-v2.0-fv1.0.nc')], 'l2c_presorted.zarr') + self.assertEqual(True, status) + self.assertTrue('\nstep 9 of 9: creating input slice in l2c_presorted.zarr...\n' in output) + self.assertTrue('\nstep 9 of 9: appending input slice to l2c_presorted.zarr...\n' in output) + self.assertFalse('\nstep 9 of 9: inserting input slice before index 0 in l2c_presorted.zarr...\n' in output) status, output = gen_cube_wrapper( [get_inputdata_path('20170102-IFR-L4_GHRSST-SSTfnd-ODYSSEA-NWE_002-v2.0-fv1.0.nc'), get_inputdata_path('20170103-IFR-L4_GHRSST-SSTfnd-ODYSSEA-NWE_002-v2.0-fv1.0.nc'), @@ -119,6 +127,9 @@ def test_process_inputs_insert_multiple_zarr(self): expected_extra_attrs=dict(date_modified=None, time_coverage_start='2016-12-31T12:00:00.000000000', time_coverage_end='2017-01-03T12:00:00.000000000')) + ds_presorted = xr.open_zarr('l2c_presorted.zarr') + ds_insert = xr.open_zarr('l2c.zarr') + np.testing.assert_allclose(ds_presorted.analysed_sst.values, ds_insert.analysed_sst.values) def test_process_inputs_replace_multiple_zarr(self): status, output = gen_cube_wrapper( @@ -249,6 +260,43 @@ def test_process_packed_zarr(self): self.assertEqual(65535, ds_packed.analysed_sst.encoding['_FillValue']) self.assertEqual((1, 80, 80), ds_packed.analysed_sst.encoding['chunks']) + def test_insert_packed_zarr(self): + status, output = gen_cube_wrapper( + [get_inputdata_path('20170102-IFR-L4_GHRSST-SSTfnd-ODYSSEA-NWE_002-v2.0-fv1.0.nc'), + get_inputdata_path('20170103-IFR-L4_GHRSST-SSTfnd-ODYSSEA-NWE_002-v2.0-fv1.0.nc'), + get_inputdata_path('20170101-IFR-L4_GHRSST-SSTfnd-ODYSSEA-NWE_002-v2.0-fv1.0.nc')], 'l2c_presorted.zarr', + output_writer_params={'chunksizes': {'lon': 80, 'lat': 80}, + 'packing': {'analysed_sst': {'scale_factor': 0.07324442274239326, + 'add_offset': -300.0, + 'dtype': 'uint16', + '_FillValue': 65535}}}) + self.assertEqual(True, status) + self.assertTrue('\nstep 9 of 9: creating input slice in l2c_presorted.zarr...\n' in output) + self.assertTrue('\nstep 9 of 9: appending input slice to l2c_presorted.zarr...\n' in output) + self.assertFalse('\nstep 9 of 9: inserting input slice before index 0 in l2c_presorted.zarr...\n' in output) + status, output = gen_cube_wrapper( + [get_inputdata_path('20170102-IFR-L4_GHRSST-SSTfnd-ODYSSEA-NWE_002-v2.0-fv1.0.nc'), + get_inputdata_path('20170103-IFR-L4_GHRSST-SSTfnd-ODYSSEA-NWE_002-v2.0-fv1.0.nc'), + get_inputdata_path('20170101-IFR-L4_GHRSST-SSTfnd-ODYSSEA-NWE_002-v2.0-fv1.0.nc')], 'l2c.zarr', + output_writer_params={'chunksizes': {'lon': 80, 'lat': 80}, + 'packing': {'analysed_sst': {'scale_factor': 0.07324442274239326, + 'add_offset': -300.0, + 'dtype': 'uint16', + '_FillValue': 65535}}}, + no_sort_mode=True) + self.assertEqual(True, status) + self.assertTrue('\nstep 9 of 9: creating input slice in l2c.zarr...\n' in output) + self.assertTrue('\nstep 9 of 9: appending input slice to l2c.zarr...\n' in output) + self.assertTrue('\nstep 9 of 9: inserting input slice before index 0 in l2c.zarr...\n' in output) + self.assert_cube_ok(xr.open_zarr('l2c.zarr'), + expected_time_dim=3, + expected_extra_attrs=dict(date_modified=None, + time_coverage_start='2016-12-31T12:00:00.000000000', + time_coverage_end='2017-01-03T12:00:00.000000000')) + ds_presorted = xr.open_zarr('l2c_presorted.zarr') + ds_insert = xr.open_zarr('l2c.zarr') + np.testing.assert_allclose(ds_presorted.analysed_sst.values, ds_insert.analysed_sst.values) + def test_process_compressed_zarr(self): status, output = gen_cube_wrapper( [get_inputdata_path('201701??-IFR-L4_GHRSST-SSTfnd-ODYSSEA-NWE_002-v2.0-fv1.0.nc')], From 2c41f07a4b5c0fdd1c7a20999dbf59b0babeb911 Mon Sep 17 00:00:00 2001 From: AliceBalfanz Date: Tue, 23 Jun 2020 11:06:56 +0200 Subject: [PATCH 2/4] adding copy of store attrs to time_slice which is appended or replaced - not working yet --- xcube/core/timeslice.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/xcube/core/timeslice.py b/xcube/core/timeslice.py index 0a9d0ff8e..c341f7aba 100644 --- a/xcube/core/timeslice.py +++ b/xcube/core/timeslice.py @@ -155,7 +155,9 @@ def update_time_slice(store: Union[str, MutableMapping], if chunk_sizes: time_slice = chunk_dataset(time_slice, chunk_sizes, format_name='zarr') - + ds = zarr.open_group(store, mode='r') + time_slice = time_slice.copy() + time_slice.attrs.update(ds.attrs) temp_dir = tempfile.TemporaryDirectory(prefix='xcube-time-slice-', suffix='.zarr') time_slice.to_zarr(temp_dir.name) slice_root_group = zarr.open(temp_dir.name, mode='r') From 694d28eae126cd0ff50574fc34fed427d7c265bb Mon Sep 17 00:00:00 2001 From: AliceBalfanz Date: Wed, 24 Jun 2020 08:47:12 +0200 Subject: [PATCH 3/4] Fixed incorrect insert of time slices when packing of variables is defined. Addressing issue #317. Added test for replacing time slices when packing of variables is defined. --- CHANGES.md | 3 +++ test/core/gen/test_gen.py | 38 ++++++++++++++++++++++++++++++++++++++ xcube/core/timeslice.py | 7 +++---- 3 files changed, 44 insertions(+), 4 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 57e5ef872..2e8a3c563 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,6 +1,9 @@ ## Changes in 0.5.0.devX (in development) +### Fixes * From 0.4.1: Fixed time-series performance drop (#299). +* Fixed incorrect handling input slices with defined packing of variables when inserting + into a xcube dataset in zarr (#317). ### New in 0.5.0.dev2 diff --git a/test/core/gen/test_gen.py b/test/core/gen/test_gen.py index 7a1372027..c976408b4 100644 --- a/test/core/gen/test_gen.py +++ b/test/core/gen/test_gen.py @@ -297,6 +297,44 @@ def test_insert_packed_zarr(self): ds_insert = xr.open_zarr('l2c.zarr') np.testing.assert_allclose(ds_presorted.analysed_sst.values, ds_insert.analysed_sst.values) + def test_replace_packed_zarr(self): + status, output = gen_cube_wrapper( + [get_inputdata_path('20170102-IFR-L4_GHRSST-SSTfnd-ODYSSEA-NWE_002-v2.0-fv1.0.nc'), + get_inputdata_path('20170103-IFR-L4_GHRSST-SSTfnd-ODYSSEA-NWE_002-v2.0-fv1.0.nc'), + get_inputdata_path('20170101-IFR-L4_GHRSST-SSTfnd-ODYSSEA-NWE_002-v2.0-fv1.0.nc')], 'l2c_presorted.zarr', + output_writer_params={'chunksizes': {'lon': 80, 'lat': 80}, + 'packing': {'analysed_sst': {'scale_factor': 0.07324442274239326, + 'add_offset': -300.0, + 'dtype': 'uint16', + '_FillValue': 65535}}}) + self.assertEqual(True, status) + self.assertTrue('\nstep 9 of 9: creating input slice in l2c_presorted.zarr...\n' in output) + self.assertTrue('\nstep 9 of 9: appending input slice to l2c_presorted.zarr...\n' in output) + self.assertFalse('\nstep 9 of 9: inserting input slice before index 0 in l2c_presorted.zarr...\n' in output) + status, output = gen_cube_wrapper( + [get_inputdata_path('20170101-IFR-L4_GHRSST-SSTfnd-ODYSSEA-NWE_002-v2.0-fv1.0.nc'), + get_inputdata_path('20170102-IFR-L4_GHRSST-SSTfnd-ODYSSEA-NWE_002-v2.0-fv1.0.nc'), + get_inputdata_path('20170103-IFR-L4_GHRSST-SSTfnd-ODYSSEA-NWE_002-v2.0-fv1.0.nc'), + get_inputdata_path('20170102-IFR-L4_GHRSST-SSTfnd-ODYSSEA-NWE_002-v2.0-fv1.0.nc')], 'l2c.zarr', + output_writer_params={'chunksizes': {'lon': 80, 'lat': 80}, + 'packing': {'analysed_sst': {'scale_factor': 0.07324442274239326, + 'add_offset': -300.0, + 'dtype': 'uint16', + '_FillValue': 65535}}}, + no_sort_mode=True) + self.assertEqual(True, status) + self.assertTrue('\nstep 9 of 9: creating input slice in l2c.zarr...\n' in output) + self.assertTrue('\nstep 9 of 9: appending input slice to l2c.zarr...\n' in output) + self.assertTrue('\nstep 9 of 9: replacing input slice at index 1 in l2c.zarr...\n' in output) + self.assert_cube_ok(xr.open_zarr('l2c.zarr'), + expected_time_dim=3, + expected_extra_attrs=dict(date_modified=None, + time_coverage_start='2016-12-31T12:00:00.000000000', + time_coverage_end='2017-01-03T12:00:00.000000000')) + ds_presorted = xr.open_zarr('l2c_presorted.zarr') + ds_insert = xr.open_zarr('l2c.zarr') + np.testing.assert_allclose(ds_presorted.analysed_sst.values, ds_insert.analysed_sst.values) + def test_process_compressed_zarr(self): status, output = gen_cube_wrapper( [get_inputdata_path('201701??-IFR-L4_GHRSST-SSTfnd-ODYSSEA-NWE_002-v2.0-fv1.0.nc')], diff --git a/xcube/core/timeslice.py b/xcube/core/timeslice.py index c341f7aba..280811540 100644 --- a/xcube/core/timeslice.py +++ b/xcube/core/timeslice.py @@ -145,6 +145,7 @@ def update_time_slice(store: Union[str, MutableMapping], insert_mode = mode == 'insert' time_var_names = [] + encoding = {} with xr.open_zarr(store) as cube: for var_name in cube.variables: var = cube[var_name] @@ -152,14 +153,12 @@ def update_time_slice(store: Union[str, MutableMapping], if var.dims[0] != 'time': raise ValueError(f"dimension 'time' of variable {var_name!r} must be first dimension") time_var_names.append(var_name) + encoding[var_name] = cube[var_name].encoding if chunk_sizes: time_slice = chunk_dataset(time_slice, chunk_sizes, format_name='zarr') - ds = zarr.open_group(store, mode='r') - time_slice = time_slice.copy() - time_slice.attrs.update(ds.attrs) temp_dir = tempfile.TemporaryDirectory(prefix='xcube-time-slice-', suffix='.zarr') - time_slice.to_zarr(temp_dir.name) + time_slice.to_zarr(temp_dir.name, encoding=encoding) slice_root_group = zarr.open(temp_dir.name, mode='r') slice_arrays = dict(slice_root_group.arrays()) From 5f37b7b6566ce99e7dbda20112ba7e8fd6e332d0 Mon Sep 17 00:00:00 2001 From: AliceBalfanz Date: Wed, 24 Jun 2020 13:22:37 +0200 Subject: [PATCH 4/4] ajusted CHANGES.md based on comment of forman. --- CHANGES.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index ff12b7696..f0f19b19e 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,8 +2,9 @@ ### Fixes * From 0.4.1: Fixed time-series performance drop (#299). -* Fixed incorrect handling input slices with defined packing of variables when inserting - into a xcube dataset in zarr (#317). + +* Fixed `xcube gen` CLI tool to correctly insert time slices into an + existing cube stored as Zarr (#317). * When creating an ImageGeom from a dataset, correct the height if it would otherwise give a maximum latitude >90°.