In [3]:
import xarray as xr
import pandas as pd
import numpy as np
import zarr

In [4]:
DATASET_PATH = "test.zarr"

In [36]:
def create_dataset(datetime):
    """Create a spatial, single time step dataset with two variables."""
    dims = ("time", "lat", "lon")
    time = [pd.to_datetime(datetime)]
    w = 4000
    h = 2000
    lon = np.linspace(0, 4, w)
    lat = np.linspace(50, 52, h)
    precipitation_var = xr.DataArray(np.random.rand(1, h, w), coords=(time, lat, lon), dims=("time", "lat", "lon"))
    temperature_var = xr.DataArray(np.random.rand(1, h, w), coords=(time, lat, lon), dims=("time", "lat", "lon"))
    ds = xr.Dataset({"precipitation": precipitation_var, "temperature": temperature_var})
    return ds
    

## Save a single time step dataset with default chunking

In [37]:
ds = create_dataset("2018-01-01")

In [38]:
ds

<xarray.Dataset>
Dimensions:        (lat: 2000, lon: 4000, time: 1)
Coordinates:
  * time           (time) datetime64[ns] 2018-01-01
  * lat            (lat) float64 50.0 50.0 50.0 50.0 50.0 50.01 50.01 50.01 ...
  * lon            (lon) float64 0.0 0.001 0.002001 0.003001 0.004001 ...
Data variables:
    precipitation  (time, lat, lon) float64 0.2631 0.9521 0.9499 0.1651 ...
    temperature    (time, lat, lon) float64 0.2243 0.5968 0.6761 0.02927 ...

In [39]:
ds.time.encoding

{}

In [40]:
ds.precipitation.encoding

{}

In [41]:
ds.to_zarr(DATASET_PATH, mode="w")

<xarray.backends.zarr.ZarrStore at 0x176431503c8>

In [42]:
ds.close()

## Inspect default chunking

In [43]:
ds = xr.open_zarr(DATASET_PATH)

In [44]:
ds

<xarray.Dataset>
Dimensions:        (lat: 2000, lon: 4000, time: 1)
Coordinates:
  * lat            (lat) float64 50.0 50.0 50.0 50.0 50.0 50.01 50.01 50.01 ...
  * lon            (lon) float64 0.0 0.001 0.002001 0.003001 0.004001 ...
  * time           (time) datetime64[ns] 2018-01-01
Data variables:
    precipitation  (time, lat, lon) float64 dask.array<shape=(1, 2000, 4000), chunksize=(1, 250, 500)>
    temperature    (time, lat, lon) float64 dask.array<shape=(1, 2000, 4000), chunksize=(1, 250, 500)>

In [45]:
ds.time.encoding

{'chunks': (1,),
 'compressor': Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0),
 'filters': None,
 'units': 'days since 2018-01-01 00:00:00',
 'calendar': 'proleptic_gregorian',
 'dtype': dtype('int64')}

In [46]:
ds.precipitation.encoding

{'chunks': (1, 250, 500),
 'compressor': Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0),
 'filters': None,
 '_FillValue': nan,
 'dtype': dtype('float64')}

In [47]:
ds.close()

## -----------------------------------

In [309]:
root_group = zarr.open("test.zarr", mode='a')

In [310]:
for i in range(2, 10):
    ds = create_dataset(f"2018-01-0{i}")
    for var_name, var_array in root_group.arrays():
        var = ds[var_name]
        if 'time' in var.dims:            
            time_axis = var.dims.index('time')
            var_array.append(var, axis=time_axis)


## -----------------------------------

In [311]:
ds2 = xr.open_zarr("test.zarr", decode_times=False)

In [312]:
ds2

<xarray.Dataset>
Dimensions:        (lat: 2000, lon: 4000, time: 9)
Coordinates:
  * lat            (lat) float64 50.0 50.0 50.0 50.0 50.0 50.01 50.01 50.01 ...
  * lon            (lon) float64 0.0 0.001 0.002001 0.003001 0.004001 ...
  * time           (time) int64 0 1514851200000000000 1514937600000000000 ...
Data variables:
    precipitation  (time, lat, lon) float64 dask.array<shape=(9, 2000, 4000), chunksize=(1, 200, 400)>
    temperature    (time, lat, lon) float64 dask.array<shape=(9, 2000, 4000), chunksize=(1, 200, 400)>

In [315]:
ds2.time.encoding

{'chunks': (1,),
 'compressor': Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0),
 'filters': None,
 'dtype': dtype('int64')}

In [313]:
ds2.precipitation.encoding

{'chunks': (1, 200, 400),
 'compressor': Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0),
 'filters': None,
 '_FillValue': nan,
 'dtype': dtype('float64')}

In [314]:
ds2.close()

In [316]:
ds2


<xarray.Dataset>
Dimensions:        (lat: 2000, lon: 4000, time: 9)
Coordinates:
  * lat            (lat) float64 50.0 50.0 50.0 50.0 50.0 50.01 50.01 50.01 ...
  * lon            (lon) float64 0.0 0.001 0.002001 0.003001 0.004001 ...
  * time           (time) int64 0 1514851200000000000 1514937600000000000 ...
Data variables:
    precipitation  (time, lat, lon) float64 dask.array<shape=(9, 2000, 4000), chunksize=(1, 200, 400)>
    temperature    (time, lat, lon) float64 dask.array<shape=(9, 2000, 4000), chunksize=(1, 200, 400)>

In [319]:
dict(ds2.dims)

{'lat': 2000, 'lon': 4000, 'time': 9}