In [1]:
import xarray as xr
from obstore.store import MemoryStore, S3Store
from virtualizarr.manifests.store import ObjectStoreRegistry

from hrrrparser import HRRRParser

#### AWS PDS HRRR bucket information

In [2]:
scheme = "s3://"
bucket = "noaa-hrrr-bdp-pds"
prefix = "hrrr.20250710/conus"

In [3]:
parser = HRRRParser()

#### Generate the ObjectStoreRegistry to be used for virtual chunk access.

In [4]:
object_store = S3Store(
    bucket=bucket,
    skip_signature=True,
)
registry = ObjectStoreRegistry({f"{scheme}{bucket}": object_store})

In [5]:
file_urls = [
    f"{scheme}{bucket}/{prefix.rstrip('/')}/hrrr.t22z.wrfsfcf16.grib2",
    f"{scheme}{bucket}/{prefix.rstrip('/')}/hrrr.t23z.wrfsfcf16.grib2",
]
file_urls

['s3://noaa-hrrr-bdp-pds/hrrr.20250710/conus/hrrr.t22z.wrfsfcf16.grib2',
 's3://noaa-hrrr-bdp-pds/hrrr.20250710/conus/hrrr.t23z.wrfsfcf16.grib2']

#### Use intermediate memory cache for scanning
The Gribberish library will need to scan every message in the GRIB file which can generate a lot of http requests to S3 and increase latency. We can use obstore to create a MemoryStore, read the file into that MemoryStore and scan the data in the memory store as if it were actually coming from the S3 bucket but without the latency associated with a large number of requests

In [8]:
def cache_and_open_manifest_store(url, scheme, bucket):
    store, path_in_store = registry.resolve(url)
    memory_store = MemoryStore()
    buffer = store.get(path_in_store).bytes()
    memory_store.put(path_in_store, buffer)
    cached_reg = ObjectStoreRegistry({f"{scheme}{bucket}": memory_store})
    manifest_store = parser(url=url, registry=cached_reg)
    return manifest_store

In [10]:
manifest_stores = []
for url in file_urls:
    manifest_store = cache_and_open_manifest_store(url, scheme, bucket)
    manifest_stores.append(manifest_store)
manifest_stores

[<virtualizarr.manifests.store.ManifestStore at 0x1099a7250>,
 <virtualizarr.manifests.store.ManifestStore at 0x1099a6d50>]

#### Open a Virtualizarr ManifestStore directly as a Zarr store.

In [19]:
ds = xr.open_dataset(
    manifest_stores[1], engine="zarr", consolidated=False, zarr_format=3
)
ds

In [12]:
ds["tmp_hag"].isel(hag=2).values

array([[[[294.26507568, 294.26507568, 294.26507568, ..., 302.32757568,
          302.32757568, 302.26507568],
         [294.26507568, 294.26507568, 294.26507568, ..., 302.26507568,
          302.26507568, 302.26507568],
         [294.26507568, 294.26507568, 294.26507568, ..., 302.26507568,
          302.26507568, 302.20257568],
         ...,
         [286.89007568, 286.89007568, 286.95257568, ..., 286.57757568,
          286.45257568, 286.39007568],
         [286.89007568, 286.89007568, 286.95257568, ..., 286.45257568,
          286.39007568, 286.26507568],
         [286.89007568, 286.89007568, 286.89007568, ..., 286.32757568,
          286.26507568, 286.20257568]]]], shape=(1, 1, 1059, 1799))

In [13]:
ds["longitude"].isel(x=0, y=0).values

array(-122.719528)

In [14]:
vdss = []
for manifest_store in manifest_stores:
    vds = xr.open_dataset(
        manifest_store, engine="zarr", consolidated=False, zarr_format=3
    )
    vdss.append(vds)

In [15]:
combined_vds = xr.combine_by_coords(vdss, combine_attrs="override")
combined_vds

In [17]:
combined_vds["tmp_isobar"].isel(y=100, x=100).values

array([[[  0.        ,   0.        , 266.31001282, 283.92590332,
         295.52978516, 292.79974365, 290.33868408]],

       [[  0.        ,   0.        , 266.54208374, 283.99029541,
         295.74993896, 290.52233887, 291.21044922]]])