In [1]:
import glob
import re
import matplotlib as plt
import numpy as np
import scipy as sp
import xarray as xr
import intake
import intake_esm
import pandas as pd

In [2]:
######## File paths ################
lustre_scratch    = "/lustre/desc1/scratch/harshah"
gdex_data         = "/gdex/data/special_projects/pythia_2024"
annual_means      =  gdex_data + '/annual_means/'
zarr_path         =  gdex_data + "/tas_zarr/"
#########
gdex_url           = 'https://data.gdex.ucar.edu/'
era5_catalog      = gdex_url + 'special_projects/pythia_2024/pythia_intake_catalogs/era5_catalog.json'
#
##########
print(era5_catalog)

https://data.gdex.ucar.edu/special_projects/pythia_2024/pythia_intake_catalogs/era5_catalog.json


In [3]:
USE_PBS_SCHEDULER = True

In [4]:
# Create a PBS cluster object
def get_pbs_cluster():
    """ Create cluster through dask_jobqueue.   
    """
    from dask_jobqueue import PBSCluster
    cluster = PBSCluster(
        job_name = 'dask-osdf-24',
        cores = 1,
        memory = '4GiB',
        processes = 1,
        local_directory = lustre_scratch + '/dask/spill',
        log_directory = lustre_scratch + '/dask/logs/',
        resource_spec = 'select=1:ncpus=1:mem=4GB',
        queue = 'casper',
        walltime = '3:00:00',
        #interface = 'ib0'
        interface = 'ext'
    )
    return cluster

def get_gateway_cluster():
    """ Create cluster through dask_gateway
    """
    from dask_gateway import Gateway

    gateway = Gateway()
    cluster = gateway.new_cluster()
    cluster.adapt(minimum=2, maximum=4)
    return cluster

def get_local_cluster():
    """ Create cluster using the Jupyter server's resources
    """
    from distributed import LocalCluster, performance_report
    cluster = LocalCluster()    

    cluster.scale(6)
    return cluster

In [5]:
# Obtain dask cluster in one of three ways
if USE_PBS_SCHEDULER:
    cluster = get_pbs_cluster()
elif USE_DASK_GATEWAY:
    cluster = get_gateway_cluster()
else:
    cluster = get_local_cluster()

# Connect to cluster
from distributed import Client
client = Client(cluster)

Perhaps you already have a cluster running?
Hosting the HTTP server on port 37833 instead


In [6]:
# Scale the cluster and display cluster dashboard URL
n_workers =5
cluster.scale(n_workers)
client.wait_for_workers(n_workers = n_workers)
cluster

0,1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/37833/status,Workers: 5
Total threads: 5,Total memory: 20.00 GiB

0,1
Comm: tcp://128.117.208.96:43731,Workers: 5
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/37833/status,Total threads: 5
Started: 1 minute ago,Total memory: 20.00 GiB

0,1
Comm: tcp://128.117.208.174:36515,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/46775/status,Memory: 4.00 GiB
Nanny: tcp://128.117.208.174:45723,
Local directory: /lustre/desc1/scratch/harshah/dask/spill/dask-scratch-space/worker-evhdzejh,Local directory: /lustre/desc1/scratch/harshah/dask/spill/dask-scratch-space/worker-evhdzejh
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 2.0%,Last seen: Just now
Memory usage: 124.82 MiB,Spilled bytes: 0 B
Read bytes: 321.73 MiB,Write bytes: 38.83 MiB

0,1
Comm: tcp://128.117.208.174:33703,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/46013/status,Memory: 4.00 GiB
Nanny: tcp://128.117.208.174:33173,
Local directory: /lustre/desc1/scratch/harshah/dask/spill/dask-scratch-space/worker-eloszrbb,Local directory: /lustre/desc1/scratch/harshah/dask/spill/dask-scratch-space/worker-eloszrbb
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 2.0%,Last seen: Just now
Memory usage: 124.85 MiB,Spilled bytes: 0 B
Read bytes: 323.16 MiB,Write bytes: 38.71 MiB

0,1
Comm: tcp://128.117.208.174:36961,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/44833/status,Memory: 4.00 GiB
Nanny: tcp://128.117.208.174:35711,
Local directory: /lustre/desc1/scratch/harshah/dask/spill/dask-scratch-space/worker-d0c1rumh,Local directory: /lustre/desc1/scratch/harshah/dask/spill/dask-scratch-space/worker-d0c1rumh
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 2.0%,Last seen: Just now
Memory usage: 124.78 MiB,Spilled bytes: 0 B
Read bytes: 322.14 MiB,Write bytes: 38.86 MiB

0,1
Comm: tcp://128.117.208.175:39413,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/46779/status,Memory: 4.00 GiB
Nanny: tcp://128.117.208.175:33193,
Local directory: /lustre/desc1/scratch/harshah/dask/spill/dask-scratch-space/worker-ismynx8a,Local directory: /lustre/desc1/scratch/harshah/dask/spill/dask-scratch-space/worker-ismynx8a
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 2.0%,Last seen: Just now
Memory usage: 126.86 MiB,Spilled bytes: 0 B
Read bytes: 669.41 MiB,Write bytes: 25.01 MiB

0,1
Comm: tcp://128.117.208.173:42837,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/39357/status,Memory: 4.00 GiB
Nanny: tcp://128.117.208.173:40741,
Local directory: /lustre/desc1/scratch/harshah/dask/spill/dask-scratch-space/worker-ckj7rzqv,Local directory: /lustre/desc1/scratch/harshah/dask/spill/dask-scratch-space/worker-ckj7rzqv
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 0.0%,Last seen: Just now
Memory usage: 51.55 MiB,Spilled bytes: 0 B
Read bytes: 5.77 MiB,Write bytes: 3.10 MiB


In [7]:
%pip show intake-esm

Name: intake-esm
Version: 2025.7.9
Summary: An intake plugin for parsing an Earth System Model (ESM) catalog and loading netCDF files and/or Zarr stores into Xarray datasets.
Home-page: https://intake-esm.readthedocs.io
Author: 
Author-email: 
License: Apache Software License 2.0
Location: /glade/u/home/harshah/venvs/osdf/lib/python3.10/site-packages
Requires: dask, fastprogress, fsspec, intake, itables, netCDF4, pandas, polars, pydantic, pydap, requests, xarray, zarr
Required-by: ecgtools
Note: you may need to restart the kernel to use updated packages.


In [8]:
era5_cat = intake.open_esm_datastore(era5_catalog)
era5_cat

OSError: object-store error: Generic HTTP error: Failed to parse value for CONTENT_RANGE header: "bytes 0-53851469/*"

This error occurred with the following context stack:
	[1] 'csv scan'
	[2] 'slice'
	[3] 'sink'


In [None]:
era5_cat.df

In [None]:
temp_cat = era5_cat.search(variable='VAR_2T',frequency = 'hourly',year=2000)
temp_cat

In [None]:
# Define the xarray_open_kwargs with a compatible engine, for example, 'scipy'
xarray_open_kwargs = {
    'engine': 'h5netcdf',
    'chunks': {},  # Specify any chunking if needed
    'backend_kwargs': {}  # Any additional backend arguments if required
}

In [None]:
%%time
dset_temp = temp_cat.to_dataset_dict(xarray_open_kwargs=xarray_open_kwargs)

In [None]:
dset_temp

In [None]:
temps = dset_temp['an.sfc']

In [None]:
temps.VAR_2T.isel(time=0).plot()

In [None]:
cluster.close()