# Access AWS CESM2 using the AWS open data origin data and compute GMST

In [1]:
# Display output of plots directly in Notebook
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")

import intake
import numpy as np
import pandas as pd
import xarray as xr
import s3fs
import seaborn as sns
import re
import nest_asyncio
nest_asyncio.apply()

In [2]:
import fsspec.implementations.http as fshttp
from pelicanfs.core import PelicanFileSystem, PelicanMap, OSDFFileSystem 

In [3]:
import dask 
from dask_jobqueue import PBSCluster
from dask.distributed import Client
from dask.distributed import performance_report

In [4]:
# file = PelicanMap('/chtc/PUBLIC/eturetsky/ncar-subset/ncar/monthly/cesm2LE-historical-cmip6-FLUT.zarr', pelfs)
# test = xr.open_dataset(file, engine='zarr')
# test

In [5]:
pelfs = PelicanFileSystem("pelican://osg-htc.org")
pelfs.ls('/ncar/rda/ds559.0')

[{'name': '/ncar/rda/ds559.0/wy2015', 'size': None, 'type': 'file'},
 {'name': '/ncar/rda/ds559.0/wy2018', 'size': None, 'type': 'file'},
 {'name': '/ncar/rda/ds559.0/wy1989', 'size': None, 'type': 'file'},
 {'name': '/ncar/rda/ds559.0/wy2009', 'size': None, 'type': 'file'},
 {'name': '/ncar/rda/ds559.0/index.html', 'size': None, 'type': 'file'},
 {'name': '/ncar/rda/ds559.0/wy1986', 'size': None, 'type': 'file'},
 {'name': '/ncar/rda/ds559.0/wy2013', 'size': None, 'type': 'file'},
 {'name': '/ncar/rda/ds559.0/wy1995', 'size': None, 'type': 'file'},
 {'name': '/ncar/rda/ds559.0/wy2016', 'size': None, 'type': 'file'},
 {'name': '/ncar/rda/ds559.0/wy2007', 'size': None, 'type': 'file'},
 {'name': '/ncar/rda/ds559.0/wy1999', 'size': None, 'type': 'file'},
 {'name': '/ncar/rda/ds559.0/wy2002', 'size': None, 'type': 'file'},
 {'name': '/ncar/rda/ds559.0/wy1996', 'size': None, 'type': 'file'},
 {'name': '/ncar/rda/ds559.0/wy2010', 'size': None, 'type': 'file'},
 {'name': '/ncar/rda/ds559.0/w

In [22]:
pelfs = PelicanFileSystem("https://osdf-director.osg-htc.org/")
pelfs.ls('/aws-opendata/us-west-2/ncar-cesm2-lens/')

[]

In [29]:
pelfs = PelicanFileSystem("pelican://osg-htc.org")
pelfs.ls('/aws-opendata/us-west-1/')

FileNotFoundError: https://west-1-aws-opendata-s3-osdf-pelican-origin.osdf-prod.chtc.io:8443/aws-opendata/us-west-1

In [7]:
pelfs = OSDFFileSystem("https://osdf-director.osg-htc.org/")
pelfs.ls('/aws-opendata/us-west-2/')

TypeError: OSDFFileSystem.__init__() takes 1 positional argument but 2 were given

In [32]:
test = xr.open_dataset("osdf://aws-opendata/us-west-2/ncar-cesm2-lens/monthly/cesm2LE-historical-cmip6-FLUT.zarr", engine='zarr')
test

InvalidMetadata: 

In [12]:
pelican_director = 'https://osdf-director.osg-htc.org/'
pelfs = PelicanFileSystem(pelican_director)
pelfs.ls('/us-west-2/')

BadDirectorResponse: 

In [13]:
#Try passing the url to xr.open_dataset
pel_zarr = PelicanMap('s3://ncar-cesm2-lens/atm/daily/cesm2LE-historical-cmip6-TREFHT.zarr', pelfs)
print(pel_zarr)
#
test = xr.open_zarr(pel_zarr)
test

<fsspec.mapping.FSMap object at 0x148a6c9f5ed0>


InvalidMetadata: 

# Comments
- Using PelicanFS to access 

In [None]:
# Create a PBS cluster object
cluster = PBSCluster(
    job_name = 'dask-wk24-hpc',
    cores = 1,
    memory = '8GiB',
    processes = 1,
    local_directory = rda_scratch+'/dask/spill',
    resource_spec = 'select=1:ncpus=1:mem=8GB',
    queue = 'casper',
    walltime = '2:00:00',
    #interface = 'ib0'
    interface = 'ext'
)

# Access the data from the AWS bucket using intake to compare

In [None]:
# Open collection description file using intake
catalog = intake.open_esm_datastore(
    'https://raw.githubusercontent.com/NCAR/cesm2-le-aws/main/intake-catalogs/aws-cesm2-le.json'
)
catalog

In [None]:
catalog_subset = catalog.search(variable='TREFHT', frequency='daily')
catalog_subset

In [None]:
catalog_subset.df

In [None]:
catalog_subset.df.loc[0,'path']

In [None]:
dsets = catalog_subset.to_dataset_dict(storage_options={'anon':True})

In [None]:
dsets.keys()

In [None]:
# GMST function ###
# calculate global means

def get_lat_name(ds):
    for lat_name in ['lat', 'latitude']:
        if lat_name in ds.coords:
            return lat_name
    raise RuntimeError("Couldn't find a latitude coordinate")

def global_mean(ds):
    lat = ds[get_lat_name(ds)]
    weight = np.cos(np.deg2rad(lat))
    weight /= weight.mean()
    other_dims = set(ds.dims) - {'time','member_id'}
    return (ds * weight).mean(other_dims)

In [None]:
client = Client(cluster)
client

In [None]:
cluster.scale(8)
cluster

### Calculate GMST 

#### Now compute (spatially weighted) Global Mean