In [1]:
# !pip install -e ".[icechunk]"
# !pip install git+https://github.com/mpiannucci/kerchunk@v3
# !pip install fsspec s3fs

In [2]:
!pip show icechunk

Name: icechunk
Version: 0.1.0a7
Summary: Transactional storage engine for Zarr designed for use on cloud object storage
Home-page: https://github.com/earth-mover/icechunk
Author: Earthmover PBC
Author-email: 
License: Apache-2.0
Location: /Users/aimeebarciauskas/github/virtualizarr/venv/lib/python3.12/site-packages
Requires: zarr
Required-by: 


In [1]:
import warnings

import fsspec
import xarray as xr
from icechunk import IcechunkStore, StorageConfig, StoreConfig, VirtualRefConfig

from virtualizarr import open_virtual_dataset

warnings.filterwarnings("ignore", category=UserWarning)

In [2]:
fs = fsspec.filesystem("s3", anon=True)

oisst_files = fs.glob(
    "s3://noaa-cdr-sea-surface-temp-optimum-interpolation-pds/data/v2.1/avhrr/202408/oisst-avhrr-v02r01.*.nc"
)

oisst_files = sorted(["s3://" + f for f in oisst_files])

In [3]:
so = dict(anon=True, default_fill_cache=False, default_cache_type="none")

virtual_datasets = [
    open_virtual_dataset(url, indexes={}, reader_options={"storage_options": so})
    for url in oisst_files[0:2]
]

In [4]:
virtual_ds = xr.concat(
    virtual_datasets,
    dim="time",
    coords="minimal",
    compat="override",
    combine_attrs="override",
)

In [5]:
!rm -rf ./noaa-cdr-icechunk/

In [6]:
storage_config = StorageConfig.filesystem("./noaa-cdr-icechunk")
virtual_ref_store_config = StoreConfig(
    virtual_ref_config=VirtualRefConfig.s3_anonymous(region="us-east-1"),
)

In [7]:
store = IcechunkStore.create(
    storage=storage_config, config=virtual_ref_store_config, read_only=False
)

In [11]:
store.commit("first 2 days of 202408 data")

'JKZCDPMTJ3ETZFY2KXKG'

In [10]:
virtual_ds.virtualize.to_icechunk(store)

b'x^cx\xd3\xe2\x06\x00\x04\x16\x01\xb7'

In [None]:
store.commit("first 2 days of 202408 data")

In [None]:
await store._store.get("time/c/0")

In [11]:
ds = xr.open_zarr(store, consolidated=False, zarr_format=3)
ds

## Append

In [12]:
virtual_datasets_a = [
    open_virtual_dataset(
        url, indexes={}, reader_options={"storage_options": {"anon": True}}
    )
    for url in oisst_files[2:4]
]

In [13]:
virtual_ds_a = xr.concat(
    virtual_datasets_a,
    dim="time",
    coords="minimal",
    compat="override",
    combine_attrs="override",
)

In [14]:
virtual_ds_a

In [15]:
append_store = IcechunkStore.open_existing(
    storage=storage_config, config=virtual_ref_store_config, read_only=False
)

In [16]:
await append_store._store.get("time/c/0")

b'x^cx\xd3\xe2\x06\x00\x04\x16\x01\xb7'

In [17]:
virtual_ds_a.virtualize.to_icechunk(append_store, append_dim="time")

In [18]:
append_store.commit("wrote 2 more days of data")

'Q0VZ42WZ1RG45H8CTNK0'

In [19]:
read_store = IcechunkStore.open_existing(
    storage=storage_config, config=virtual_ref_store_config, read_only=True
)

In [20]:
await read_store._store.get("time/c/0")

b'x^cx\xd3\xe2\x06\x00\x04\x16\x01\xb7'

In [21]:
ds = xr.open_zarr(read_store, consolidated=False, zarr_format=3)
ds