# Access AWS CESM2 using the AWS open data origin data and compute GMST

In [1]:
# Display output of plots directly in Notebook
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")

import intake
import numpy as np
import pandas as pd
import xarray as xr
import s3fs
import seaborn as sns
import re
import nest_asyncio
nest_asyncio.apply()

In [2]:
from pelicanfs.core import PelicanFileSystem, PelicanMap
import fsspec.implementations.http as fshttp

In [3]:
import dask 
from dask_jobqueue import PBSCluster
from dask.distributed import Client
from dask.distributed import performance_report

In [4]:
pelican_director = 'https://osdf-director.osg-htc.org/'
pelfs = PelicanFileSystem(pelican_director)
pelfs.ls('/ncar-cesm2-lens/')
#

RuntimeError: 

In [5]:
#Try passing the url to xr.open_dataset
pel_zarr = PelicanMap('s3://ncar-cesm2-lens/atm/daily/cesm2LE-historical-cmip6-TREFHT.zarr', pelfs)
print(pel_zarr)
#
test = xr.open_zarr(pel_zarr)
test

RuntimeError: 

# Comments
- Using PelicanFS to access 

In [7]:
# Create a PBS cluster object
cluster = PBSCluster(
    job_name = 'dask-wk24-hpc',
    cores = 1,
    memory = '8GiB',
    processes = 1,
    local_directory = rda_scratch+'/dask/spill',
    resource_spec = 'select=1:ncpus=1:mem=8GB',
    queue = 'casper',
    walltime = '2:00:00',
    #interface = 'ib0'
    interface = 'ext'
)

# Access the data from the AWS bucket using intake to compare

In [8]:
# Open collection description file using intake
catalog = intake.open_esm_datastore(
    'https://raw.githubusercontent.com/NCAR/cesm2-le-aws/main/intake-catalogs/aws-cesm2-le.json'
)
catalog

Unnamed: 0,unique
variable,53
long_name,51
component,4
experiment,2
forcing_variant,2
frequency,3
vertical_levels,3
spatial_domain,3
units,20
start_time,4


In [9]:
catalog_subset = catalog.search(variable='TREFHT', frequency='daily')
catalog_subset

Unnamed: 0,unique
variable,1
long_name,1
component,1
experiment,2
forcing_variant,2
frequency,1
vertical_levels,1
spatial_domain,1
units,1
start_time,2


In [10]:
catalog_subset.df

Unnamed: 0,variable,long_name,component,experiment,forcing_variant,frequency,vertical_levels,spatial_domain,units,start_time,end_time,path
0,TREFHT,reference height temperature,atm,historical,cmip6,daily,1.0,global,K,1850-01-01 12:00:00,2014-12-31 12:00:00,s3://ncar-cesm2-lens/atm/daily/cesm2LE-histori...
1,TREFHT,reference height temperature,atm,historical,smbb,daily,1.0,global,K,1850-01-01 12:00:00,2014-12-31 12:00:00,s3://ncar-cesm2-lens/atm/daily/cesm2LE-histori...
2,TREFHT,reference height temperature,atm,ssp370,cmip6,daily,1.0,global,K,2015-01-01 12:00:00,2100-12-31 12:00:00,s3://ncar-cesm2-lens/atm/daily/cesm2LE-ssp370-...
3,TREFHT,reference height temperature,atm,ssp370,smbb,daily,1.0,global,K,2015-01-01 12:00:00,2100-12-31 12:00:00,s3://ncar-cesm2-lens/atm/daily/cesm2LE-ssp370-...


In [11]:
catalog_subset.df.loc[0,'path']

's3://ncar-cesm2-lens/atm/daily/cesm2LE-historical-cmip6-TREFHT.zarr'

In [12]:
dsets = catalog_subset.to_dataset_dict(storage_options={'anon':True})


--> The keys in the returned dictionary of datasets are constructed as follows:
	'component.experiment.frequency.forcing_variant'


In [13]:
dsets.keys()

dict_keys(['atm.historical.daily.cmip6', 'atm.ssp370.daily.smbb', 'atm.ssp370.daily.cmip6', 'atm.historical.daily.smbb'])

In [14]:
# GMST function ###
# calculate global means

def get_lat_name(ds):
    for lat_name in ['lat', 'latitude']:
        if lat_name in ds.coords:
            return lat_name
    raise RuntimeError("Couldn't find a latitude coordinate")

def global_mean(ds):
    lat = ds[get_lat_name(ds)]
    weight = np.cos(np.deg2rad(lat))
    weight /= weight.mean()
    other_dims = set(ds.dims) - {'time','member_id'}
    return (ds * weight).mean(other_dims)

In [15]:
client = Client(cluster)
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/43743/status,

0,1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/43743/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://128.117.208.98:43177,Workers: 0
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/43743/status,Total threads: 0
Started: Just now,Total memory: 0 B


In [16]:
cluster.scale(8)
cluster

0,1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/43743/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://128.117.208.98:43177,Workers: 0
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/43743/status,Total threads: 0
Started: Just now,Total memory: 0 B


### Calculate GMST 

#### Now compute (spatially weighted) Global Mean