In [49]:
import xarray as xr
import pandas as pd
import numpy as np
import zarr

In [50]:
DATASET_PATH = "test2.zarr"

In [51]:
def create_dataset(datetime):
    """Create a spatial, single time step dataset with two variables."""
    dims = ("time", "lat", "lon")
    time = [pd.to_datetime(datetime)]
    w = 4000
    h = 2000
    lon = np.linspace(0, 4, w)
    lat = np.linspace(50, 52, h)
    precipitation_var = xr.DataArray(np.random.rand(1, h, w), coords=(time, lat, lon), dims=("time", "lat", "lon"))
    temperature_var = xr.DataArray(np.random.rand(1, h, w), coords=(time, lat, lon), dims=("time", "lat", "lon"))
    ds = xr.Dataset({"precipitation": precipitation_var, "temperature": temperature_var})
    return ds
    

## Save a single time step dataset with default chunking

In [52]:
ds = create_dataset("2018-01-01")

In [53]:
ds

<xarray.Dataset>
Dimensions:        (lat: 2000, lon: 4000, time: 1)
Coordinates:
  * time           (time) datetime64[ns] 2018-01-01
  * lat            (lat) float64 50.0 50.0 50.0 50.0 ... 52.0 52.0 52.0 52.0
  * lon            (lon) float64 0.0 0.001 0.002001 0.003001 ... 3.998 3.999 4.0
Data variables:
    precipitation  (time, lat, lon) float64 0.8486 0.4054 ... 0.005686 0.9161
    temperature    (time, lat, lon) float64 0.4486 0.9025 ... 0.3882 0.5572

In [54]:
ds.time.encoding

{}

In [55]:
ds.precipitation.encoding

{}

In [56]:
ds.to_zarr(DATASET_PATH, mode="w")

<xarray.backends.zarr.ZarrStore at 0x7fb5f128d6d8>

In [57]:
ds.close()

## Inspect default chunking

In [58]:
ds = xr.open_zarr(DATASET_PATH)

In [59]:
ds

<xarray.Dataset>
Dimensions:        (lat: 2000, lon: 4000, time: 1)
Coordinates:
  * lat            (lat) float64 50.0 50.0 50.0 50.0 ... 52.0 52.0 52.0 52.0
  * lon            (lon) float64 0.0 0.001 0.002001 0.003001 ... 3.998 3.999 4.0
  * time           (time) datetime64[ns] 2018-01-01
Data variables:
    precipitation  (time, lat, lon) float64 dask.array<shape=(1, 2000, 4000), chunksize=(1, 250, 500)>
    temperature    (time, lat, lon) float64 dask.array<shape=(1, 2000, 4000), chunksize=(1, 250, 500)>

In [60]:
ds.time.encoding

{'chunks': (1,),
 'compressor': Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0),
 'filters': None,
 'units': 'days since 2018-01-01 00:00:00',
 'calendar': 'proleptic_gregorian',
 'dtype': dtype('int64')}

In [61]:
ds.precipitation.encoding

{'chunks': (1, 250, 500),
 'compressor': Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0),
 'filters': None,
 '_FillValue': nan,
 'dtype': dtype('float64')}

In [62]:
ds.close()

## -----------------------------------

In [63]:
root_group = zarr.open("test2.zarr", mode='a')

In [64]:
for i in range(3, 11, 2):
    ds = create_dataset(f"2018-01-0{i}")
    for var_name, var_array in root_group.arrays():
        var = ds[var_name]
        if 'time' in var.dims:            
            time_axis = var.dims.index('time')
            var_array.append(var, axis=time_axis)


## -----------------------------------

In [65]:
ds2 = xr.open_zarr("test2.zarr", decode_times=False)

In [66]:
ds2

<xarray.Dataset>
Dimensions:        (lat: 2000, lon: 4000, time: 5)
Coordinates:
  * lat            (lat) float64 50.0 50.0 50.0 50.0 ... 52.0 52.0 52.0 52.0
  * lon            (lon) float64 0.0 0.001 0.002001 0.003001 ... 3.998 3.999 4.0
  * time           (time) int64 0 1514937600000000000 ... 1515456000000000000
Data variables:
    precipitation  (time, lat, lon) float64 dask.array<shape=(5, 2000, 4000), chunksize=(1, 250, 500)>
    temperature    (time, lat, lon) float64 dask.array<shape=(5, 2000, 4000), chunksize=(1, 250, 500)>

In [67]:
ds2.time.encoding

{'chunks': (1,),
 'compressor': Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0),
 'filters': None,
 'dtype': dtype('int64')}

In [68]:
ds2.precipitation.encoding

{'chunks': (1, 250, 500),
 'compressor': Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0),
 'filters': None,
 '_FillValue': nan,
 'dtype': dtype('float64')}

In [69]:
ds2.close()

In [70]:
ds2


<xarray.Dataset>
Dimensions:        (lat: 2000, lon: 4000, time: 5)
Coordinates:
  * lat            (lat) float64 50.0 50.0 50.0 50.0 ... 52.0 52.0 52.0 52.0
  * lon            (lon) float64 0.0 0.001 0.002001 0.003001 ... 3.998 3.999 4.0
  * time           (time) int64 0 1514937600000000000 ... 1515456000000000000
Data variables:
    precipitation  (time, lat, lon) float64 dask.array<shape=(5, 2000, 4000), chunksize=(1, 250, 500)>
    temperature    (time, lat, lon) float64 dask.array<shape=(5, 2000, 4000), chunksize=(1, 250, 500)>

In [71]:
dict(ds2.dims)

{'lat': 2000, 'lon': 4000, 'time': 5}

In [72]:
zarr.group()

<zarr.hierarchy.Group '/'>

In [73]:
ds1 = xr.open_zarr("test.zarr", decode_times=False)

In [None]:
zarr.tree(ds2)

In [77]:
import sys

In [80]:
zarr.convenience.copy_all(ds1, ds2, log=sys.stdout)

ValueError: dest must be a group, got <xarray.Dataset>
Dimensions:        (lat: 2000, lon: 4000, time: 5)
Coordinates:
  * lat            (lat) float64 50.0 50.0 50.0 50.0 ... 52.0 52.0 52.0 52.0
  * lon            (lon) float64 0.0 0.001 0.002001 0.003001 ... 3.998 3.999 4.0
  * time           (time) int64 0 1514937600000000000 ... 1515456000000000000
Data variables:
    precipitation  (time, lat, lon) float64 dask.array<shape=(5, 2000, 4000), chunksize=(1, 250, 500)>
    temperature    (time, lat, lon) float64 dask.array<shape=(5, 2000, 4000), chunksize=(1, 250, 500)>