# Appending to an Icechunk Store with Virtual References
This notebook demonstrates how to append to an icechunk store.

Please ensure the correct dependencies are installed before starting.

In [None]:
# !pip install 'virtualizarr['icechunk','hdf']' ipykernel s3fs

In [None]:
import warnings

import fsspec
import icechunk
import xarray as xr
from obstore.store import from_url

from virtualizarr import open_virtual_dataset
from virtualizarr.parsers import HDFParser
from virtualizarr.registry import ObjectStoreRegistry

warnings.filterwarnings("ignore", category=UserWarning)

# Before you start

Identify the dataset you will be using and create a list of files to generate a virtual icechunk datastore with.

In [None]:
fs = fsspec.filesystem("s3", anon=True)

oisst_files = fs.glob(
    "s3://noaa-cdr-sea-surface-temp-optimum-interpolation-pds/data/v2.1/avhrr/202408/oisst-avhrr-v02r01.*.nc"
)

oisst_files = sorted(["s3://" + f for f in oisst_files])

### Define our Virtualizarr `Parser` and `ObjectStoreRegistry`

In [None]:
bucket = "s3://noaa-cdr-sea-surface-temp-optimum-interpolation-pds"
store = from_url(bucket, region="us-east-1", skip_signature=True)
registry = ObjectStoreRegistry({bucket: store})
parser = HDFParser()

## Create virtual datasets with VirtualiZarr's `open_virtual_dataset`

In [None]:
virtual_datasets = [
    open_virtual_dataset(
        url=url,
        parser=parser,
        registry=registry,
        loadable_variables=["time", "lat", "lon", "zlev"],
    )
    for url in oisst_files[0:2]
]

In [None]:
virtual_ds = xr.concat(
    virtual_datasets,
    dim="time",
    coords="minimal",
    compat="override",
    combine_attrs="override",
)

In [None]:
virtual_ds

In [None]:
# Clean up the store if running this notebook multiple times.
#!rm -rf ./noaa-cdr-icechunk/

## Initialize the Icechunk Store

In [None]:
storage = icechunk.local_filesystem_storage("./noaa-cdr-icechunk")

config = icechunk.RepositoryConfig.default()


config.set_virtual_chunk_container(
    icechunk.VirtualChunkContainer(
        url_prefix="s3://noaa-cdr-sea-surface-temp-optimum-interpolation-pds/",
        store=icechunk.s3_store(region="us-east-1", anonymous=True),
    ),
)

repo = icechunk.Repository.create(storage, config)

session = repo.writable_session("main")

## Write the virtual datasets to the icechunk store and commit

In [None]:
virtual_ds.virtualize.to_icechunk(session.store)

In [None]:
session.commit("first 2 days of 202408 data")

## Check your work!

In [None]:
ds = xr.open_zarr(session.store, consolidated=False, zarr_format=3)
ds

# Append

That was all nothing new! Basically a repeat of what is in the [icechunk docs](https://icechunk.io/icechunk-python/virtual/). Here we follow the same steps to create a virtual dataset, but we add an `append_dim` argument to the `to_icechunk` function.

In [None]:
virtual_datasets_a = [
    open_virtual_dataset(
        url=url,
        parser=parser,
        registry=registry,
        loadable_variables=["time", "lat", "lon", "zlev"],
    )
    for url in oisst_files[2:4]
]

In [None]:
virtual_ds_a = xr.concat(
    virtual_datasets_a,
    dim="time",
    coords="minimal",
    compat="override",
    combine_attrs="override",
)

In [None]:
append_session = repo.writable_session("main")

In [None]:
virtual_ds_a.virtualize.to_icechunk(append_session.store, append_dim="time")

In [None]:
append_session.commit("wrote 2 more days of data")

# Check that it worked!

In [None]:
read_session = repo.readonly_session(branch="main")

In [None]:
ds = xr.open_zarr(read_session.store, consolidated=False, zarr_format=3)
ds