# DART-CAM6 reanalysis diagnostic plots

- This notebook is adapted from the DART-CAM6 example notebook on AWS
- https://ncar-dart-cam6.s3-us-west-2.amazonaws.com/examples/plot-ensemble-values.html

### Input Data Access

- This notebook illustrates how to make diagnostic plots hosted from the DART reanalysis stored on NCAR's glade storage.
- https://rda.ucar.edu/datasets/d345001/#
- This data is open access and can be accessed via 3 protocols 1) posix (if you access to NCAR's HPC) 2) HTTPS 3) OSDF using an intake-ESM catalog.

In [1]:
# Display output of plots directly in Notebook
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")
import intake
import numpy as np
import pandas as pd
import xarray as xr
import seaborn as sns
import re
import matplotlib.pyplot as plt

In [2]:
import fsspec.implementations.http as fshttp
from pelicanfs.core import PelicanFileSystem, PelicanMap, OSDFFileSystem 

In [3]:
import dask 
from dask_jobqueue import PBSCluster
from dask.distributed import Client
from dask.distributed import performance_report

In [4]:
init_year0  = '1991'
init_year1  = '2020'
final_year0 = '2071'
final_year1 = '2100'

In [5]:
rda_scratch = '/glade/campaign/collections/rda/scratch/harshah'
cat_url    = 'https://data.rda.ucar.edu/d345001/catalogs/d345001-osdf-zarr.json'
# cat_url     =  'https://data.rda.ucar.edu/d345001/catalogs/d345001-https-zarr.json'
# cat_url    =  '/glade/campaign/collections/rda/data/d345001/catalogs/d345001-posix-zarr.json'
print(cat_url)

https://data.rda.ucar.edu/d345001/catalogs/d345001-osdf-zarr.json


## Create a PBS cluster

In [6]:
# Create a PBS cluster object
cluster = PBSCluster(
    job_name = 'dask-wk24-hpc',
    cores = 1,
    memory = '8GiB',
    processes = 1,
    local_directory = rda_scratch+'/dask/spill',
    log_directory = rda_scratch + '/dask/logs/',
    resource_spec = 'select=1:ncpus=1:mem=8GB',
    queue = 'casper',
    walltime = '5:00:00',
    #interface = 'ib0'
    interface = 'ext'
)

In [7]:
# Create the client to load the Dashboard
client = Client(cluster)
n_workers = 8
cluster.scale(n_workers)
client.wait_for_workers(n_workers = n_workers)
cluster

0,1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/8787/status,Workers: 8
Total threads: 8,Total memory: 64.00 GiB

0,1
Comm: tcp://128.117.208.98:45735,Workers: 8
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/8787/status,Total threads: 8
Started: Just now,Total memory: 64.00 GiB

0,1
Comm: tcp://128.117.208.177:36621,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/39041/status,Memory: 8.00 GiB
Nanny: tcp://128.117.208.177:41399,
Local directory: /glade/campaign/collections/rda/scratch/harshah/dask/spill/dask-scratch-space/worker-byk5spun,Local directory: /glade/campaign/collections/rda/scratch/harshah/dask/spill/dask-scratch-space/worker-byk5spun
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 4.0%,Last seen: Just now
Memory usage: 126.78 MiB,Spilled bytes: 0 B
Read bytes: 317.69 kiB,Write bytes: 437.28 kiB

0,1
Comm: tcp://128.117.208.178:46269,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/44125/status,Memory: 8.00 GiB
Nanny: tcp://128.117.208.178:36171,
Local directory: /glade/campaign/collections/rda/scratch/harshah/dask/spill/dask-scratch-space/worker-0_7nbyx0,Local directory: /glade/campaign/collections/rda/scratch/harshah/dask/spill/dask-scratch-space/worker-0_7nbyx0
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 0.0%,Last seen: Just now
Memory usage: 51.52 MiB,Spilled bytes: 0 B
Read bytes: 22.35 MiB,Write bytes: 1.22 MiB

0,1
Comm: tcp://128.117.208.175:36453,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/36015/status,Memory: 8.00 GiB
Nanny: tcp://128.117.208.175:46723,
Local directory: /glade/campaign/collections/rda/scratch/harshah/dask/spill/dask-scratch-space/worker-f_n9mvi6,Local directory: /glade/campaign/collections/rda/scratch/harshah/dask/spill/dask-scratch-space/worker-f_n9mvi6
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 2.0%,Last seen: Just now
Memory usage: 124.96 MiB,Spilled bytes: 0 B
Read bytes: 850.83 MiB,Write bytes: 2.42 GiB

0,1
Comm: tcp://128.117.208.176:36365,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/37857/status,Memory: 8.00 GiB
Nanny: tcp://128.117.208.176:38347,
Local directory: /glade/campaign/collections/rda/scratch/harshah/dask/spill/dask-scratch-space/worker-nnbroe4y,Local directory: /glade/campaign/collections/rda/scratch/harshah/dask/spill/dask-scratch-space/worker-nnbroe4y
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 4.0%,Last seen: Just now
Memory usage: 130.18 MiB,Spilled bytes: 0 B
Read bytes: 237.54 MiB,Write bytes: 437.51 MiB

0,1
Comm: tcp://128.117.208.178:44453,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/35725/status,Memory: 8.00 GiB
Nanny: tcp://128.117.208.178:35373,
Local directory: /glade/campaign/collections/rda/scratch/harshah/dask/spill/dask-scratch-space/worker-i8ihdn8d,Local directory: /glade/campaign/collections/rda/scratch/harshah/dask/spill/dask-scratch-space/worker-i8ihdn8d
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 0.0%,Last seen: Just now
Memory usage: 51.46 MiB,Spilled bytes: 0 B
Read bytes: 20.05 MiB,Write bytes: 1.40 MiB

0,1
Comm: tcp://128.117.208.176:33179,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/42919/status,Memory: 8.00 GiB
Nanny: tcp://128.117.208.176:39281,
Local directory: /glade/campaign/collections/rda/scratch/harshah/dask/spill/dask-scratch-space/worker-05pr1jk3,Local directory: /glade/campaign/collections/rda/scratch/harshah/dask/spill/dask-scratch-space/worker-05pr1jk3
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 4.0%,Last seen: Just now
Memory usage: 128.68 MiB,Spilled bytes: 0 B
Read bytes: 237.54 MiB,Write bytes: 437.40 MiB

0,1
Comm: tcp://128.117.208.176:35937,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/46825/status,Memory: 8.00 GiB
Nanny: tcp://128.117.208.176:45685,
Local directory: /glade/campaign/collections/rda/scratch/harshah/dask/spill/dask-scratch-space/worker-uh7ujcs8,Local directory: /glade/campaign/collections/rda/scratch/harshah/dask/spill/dask-scratch-space/worker-uh7ujcs8
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 2.0%,Last seen: Just now
Memory usage: 134.18 MiB,Spilled bytes: 0 B
Read bytes: 236.70 MiB,Write bytes: 436.44 MiB

0,1
Comm: tcp://128.117.208.178:36641,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/38499/status,Memory: 8.00 GiB
Nanny: tcp://128.117.208.178:43375,
Local directory: /glade/campaign/collections/rda/scratch/harshah/dask/spill/dask-scratch-space/worker-0jvog2na,Local directory: /glade/campaign/collections/rda/scratch/harshah/dask/spill/dask-scratch-space/worker-0jvog2na
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 0.0%,Last seen: Just now
Memory usage: 51.47 MiB,Spilled bytes: 0 B
Read bytes: 2.71 MiB,Write bytes: 1.88 MiB


## Load DART Reanalysis data from RDA using an intake catalog

In [8]:
col = intake.open_esm_datastore(cat_url)
col

Unnamed: 0,unique
variable,11
long_name,11
units,6
standard_name,10
vertical_levels,2
component,2
spatial_domain,1
start_time,2
end_time,2
frequency,2


### Load data into xarray using catalog

In [9]:
data_var = 'PS'

col_subset = col.search(variable=data_var)
col_subset

Unnamed: 0,unique
variable,1
long_name,1
units,1
standard_name,1
vertical_levels,1
component,1
spatial_domain,1
start_time,1
end_time,1
frequency,1


In [10]:
col_subset.df['path'].values

array(['osdf:///ncar/rda/d345001/weekly/PS.zarr'], dtype=object)

In [11]:
col_subset.df

Unnamed: 0,variable,long_name,units,standard_name,vertical_levels,component,spatial_domain,start_time,end_time,frequency,path
0,PS,Surface pressure,Pa,surface_air_pressure,1,atm,global,2011-01-03T00:00:00,2019-12-30T00:00:00,weekly,osdf:///ncar/rda/d345001/weekly/PS.zarr


### Convert catalog subset to a dictionary of xarray datasets, and use the first one.

In [12]:
dsets = col_subset.to_dataset_dict(zarr_kwargs={"consolidated": True})
print(f"\nDataset dictionary keys:\n {dsets.keys()}")

# Load the first dataset and display a summary.
dataset_key = list(dsets.keys())[0]
ds = dsets[dataset_key]
ds


--> The keys in the returned dictionary of datasets are constructed as follows:
	'variable.frequency.component.vertical_levels'



Dataset dictionary keys:
 dict_keys(['PS.weekly.atm.1'])


Unnamed: 0,Array,Chunk
Bytes,15.52 GiB,50.00 MiB
Shape,"(80, 471, 192, 288)","(80, 80, 32, 32)"
Dask graph,324 chunks in 2 graph layers,324 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 15.52 GiB 50.00 MiB Shape (80, 471, 192, 288) (80, 80, 32, 32) Dask graph 324 chunks in 2 graph layers Data type float64 numpy.ndarray",80  1  288  192  471,

Unnamed: 0,Array,Chunk
Bytes,15.52 GiB,50.00 MiB
Shape,"(80, 471, 192, 288)","(80, 80, 32, 32)"
Dask graph,324 chunks in 2 graph layers,324 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [13]:
# Load the first dataset and display a summary.
dataset_key = list(dsets.keys())[0]
store_name = dataset_key + ".zarr"

ds = dsets[dataset_key]
ds

Unnamed: 0,Array,Chunk
Bytes,15.52 GiB,50.00 MiB
Shape,"(80, 471, 192, 288)","(80, 80, 32, 32)"
Dask graph,324 chunks in 2 graph layers,324 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 15.52 GiB 50.00 MiB Shape (80, 471, 192, 288) (80, 80, 32, 32) Dask graph 324 chunks in 2 graph layers Data type float64 numpy.ndarray",80  1  288  192  471,

Unnamed: 0,Array,Chunk
Bytes,15.52 GiB,50.00 MiB
Shape,"(80, 471, 192, 288)","(80, 80, 32, 32)"
Dask graph,324 chunks in 2 graph layers,324 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


### Define Plot Functions

#### Get consistently shaped data slices for both 2D and 3D variables.

In [14]:
def getSlice(ds, data_var):
    '''If the data has vertical levels, choose the level closest
       to the Earth's surface for 2-D diagnostic plots.
    '''
    data_slice = ds[data_var]

    if 'lev' in data_slice.dims:
        lastLevel = ds.lev.values[-1]
        data_slice = data_slice.sel(lev = lastLevel)
        data_slice = data_slice.squeeze()

    return data_slice

#### Get lat/lon dimension names

In [15]:
def getSpatialDimensionNames(data_slice):
    '''Get the spatial dimension names for this data slice.
    '''
    # Determine lat/lon conventions for this slice.
    lat_dim = 'lat' if 'lat' in data_slice.dims else 'slat'
    lon_dim = 'lon' if 'lon' in data_slice.dims else 'slon'
    
    return [lat_dim, lon_dim]

#### Produce Time Series Spaghetti Plot of Ensemble Members

In [16]:
def plot_timeseries(ds, data_var, store_name):
    '''Create a spaghetti plot for a given variable.
    '''
    figWidth = 25 
    figHeight = 20
    linewidth = 0.5

    numPlotsPerPage = 3
    numPlotCols = 1
    
    # Plot the aggregate statistics across time.
    fig, axs = plt.subplots(3, 1, figsize=(figWidth, figHeight))

    data_slice = getSlice(ds, data_var)
    spatial_dims = getSpatialDimensionNames(data_slice)

    unit_string = ds[data_var].attrs['units']

    # Persist the slice so it's read from disk only once.
    # This is faster when data values are reused many times.
    data_slice = data_slice.persist()

    max_vals = data_slice.max(dim = spatial_dims).transpose()
    mean_vals = data_slice.mean(dim = spatial_dims).transpose()
    min_vals = data_slice.min(dim = spatial_dims).transpose()

    
    rangeMaxs = max_vals.max(dim = 'member_id')
    rangeMins = max_vals.min(dim = 'member_id')
    axs[0].set_facecolor('lightgrey')
    axs[0].fill_between(ds.time, rangeMins, rangeMaxs, linewidth=linewidth, color='white')
    axs[0].plot(ds.time, max_vals, linewidth=linewidth, color='red', alpha=0.1)
    axs[0].set_title('Ensemble Member Maxima Over Time', fontsize=20)
    axs[0].set_ylabel(unit_string)

    rangeMaxs = mean_vals.max(dim = 'member_id')
    rangeMins = mean_vals.min(dim = 'member_id')
    axs[1].set_facecolor('lightgrey')
    axs[1].fill_between(ds.time, rangeMins, rangeMaxs, linewidth=linewidth, color='white')
    axs[1].plot(ds.time, mean_vals, linewidth=linewidth, color='red', alpha=0.1)
    axs[1].set_title('Ensemble Member Means Over Time', fontsize=20)
    axs[1].set_ylabel(unit_string)

    rangeMaxs = min_vals.max(dim = 'member_id')
    rangeMins = min_vals.min(dim = 'member_id')
    axs[2].set_facecolor('lightgrey')
    axs[2].fill_between(ds.time, rangeMins, rangeMaxs, linewidth=linewidth, color='white')
    axs[2].plot(ds.time, min_vals, linewidth=linewidth, color='red', alpha=0.1)
    axs[2].set_title('Ensemble Member Minima Over Time', fontsize=20)
    axs[2].set_ylabel(unit_string)

    plt.suptitle(store_name, fontsize=25)
    
    return fig

#### Actually Create Spaghetti Plot Showing All Ensemble Members

In [None]:
%%time

store_name = f'{data_var}.zarr'
fig = plot_timeseries(ds, data_var, store_name)


#### Release dask workers

In [None]:
cluster.close()