In [1]:
# !pip install -e ".[icechunk]"
# !pip install git+https://github.com/mpiannucci/kerchunk@v3
# !pip install fsspec s3fs

In [2]:
import warnings

import fsspec
import xarray as xr
from icechunk import IcechunkStore, StorageConfig, StoreConfig, VirtualRefConfig

from virtualizarr import open_virtual_dataset

warnings.filterwarnings("ignore", category=UserWarning)

In [3]:
fs = fsspec.filesystem("s3", anon=True)

oisst_files = fs.glob(
    "s3://noaa-cdr-sea-surface-temp-optimum-interpolation-pds/data/v2.1/avhrr/202408/oisst-avhrr-v02r01.*.nc"
)

oisst_files = sorted(["s3://" + f for f in oisst_files])

In [4]:
so = dict(anon=True, default_fill_cache=False, default_cache_type="none")

virtual_datasets = [
    open_virtual_dataset(url, indexes={}, reader_options={"storage_options": so})
    for url in oisst_files[0:2]
]

In [5]:
virtual_ds = xr.concat(
    virtual_datasets,
    dim="time",
    coords="minimal",
    compat="override",
    combine_attrs="override",
)

In [6]:
!rm -rf ./noaa-cdr-icechunk/

In [7]:
storage_config = StorageConfig.filesystem("./noaa-cdr-icechunk")
virtual_ref_store_config = StoreConfig(
    virtual_ref_config=VirtualRefConfig.s3_anonymous(region="us-east-1"),
)

In [8]:
store = IcechunkStore.create(storage=storage_config, config=virtual_ref_store_config)

In [9]:
virtual_ds.virtualize.to_icechunk(store)

In [10]:
store.commit("first 2 days of 202408 data")

'VEA6RE6TYKDAN90Y42EG'

In [11]:
await store._store.get("time/c/0")

b'x^cx\xd3\xe2\x06\x00\x04\x16\x01\xb7'

In [12]:
ds = xr.open_zarr(store, consolidated=False, zarr_format=3)
ds

## Append

In [13]:
virtual_datasets_a = [
    open_virtual_dataset(
        url, indexes={}, reader_options={"storage_options": {"anon": True}}
    )
    for url in oisst_files[2:4]
]

In [14]:
virtual_ds_a = xr.concat(
    virtual_datasets_a,
    dim="time",
    coords="minimal",
    compat="override",
    combine_attrs="override",
)

In [15]:
virtual_ds_a

In [16]:
append_store = IcechunkStore.open_existing(
    storage=storage_config, config=virtual_ref_store_config, mode="a"
)

In [17]:
await append_store._store.get("time/c/0")

b'x^cx\xd3\xe2\x06\x00\x04\x16\x01\xb7'

In [18]:
virtual_ds_a.virtualize.to_icechunk(append_store, append_dim="time")

In [19]:
append_store.commit("wrote 2 more days of data")

'8W1DS346WSZ8YJHARP8G'

In [20]:
read_store = IcechunkStore.open_existing(
    storage=storage_config, config=virtual_ref_store_config, mode="r"
)

In [21]:
await read_store._store.get("time/c/0")

b'\x00\x00\x00>\x00\x00\xc0>\x00\x00 ?\x00\x00`?\x00\x00\x90?\x00\x00\xb0?\x00\x00\xd0?\x00\x00\xf0?\x00\x00\x08@\x00\x00\x18@\x00\x00(@\x00\x008@\x00\x00H@\x00\x00X@\x00\x00h@\x00\x00x@\x00\x00\x84@\x00\x00\x8c@\x00\x00\x94@\x00\x00\x9c@\x00\x00\xa4@\x00\x00\xac@\x00\x00\xb4@\x00\x00\xbc@\x00\x00\xc4@\x00\x00\xcc@\x00\x00\xd4@\x00\x00\xdc@\x00\x00\xe4@\x00\x00\xec@\x00\x00\xf4@\x00\x00\xfc@\x00\x00\x02A\x00\x00\x06A\x00\x00\nA\x00\x00\x0eA\x00\x00\x12A\x00\x00\x16A\x00\x00\x1aA\x00\x00\x1eA\x00\x00"A\x00\x00&A\x00\x00*A\x00\x00.A\x00\x002A\x00\x006A\x00\x00:A\x00\x00>A\x00\x00BA\x00\x00FA\x00\x00JA\x00\x00NA\x00\x00RA\x00\x00VA\x00\x00ZA\x00\x00^A\x00\x00bA\x00\x00fA\x00\x00jA\x00\x00nA\x00\x00rA\x00\x00vA\x00\x00zA\x00\x00~A\x00\x00\x81A\x00\x00\x83A\x00\x00\x85A\x00\x00\x87A\x00\x00\x89A\x00\x00\x8bA\x00\x00\x8dA\x00\x00\x8fA\x00\x00\x91A\x00\x00\x93A\x00\x00\x95A\x00\x00\x97A\x00\x00\x99A\x00\x00\x9bA\x00\x00\x9dA\x00\x00\x9fA\x00\x00\xa1A\x00\x00\xa3A\x00\x00\xa5A\x00\x00\xa7A\x00

In [22]:
ds = xr.open_zarr(read_store, consolidated=False, zarr_format=3)
ds

error: Failed to decode variable 'time': Error -3 while decompressing data: unknown compression method