# Access AWS CESM data and compute GMST

In [1]:
# Display output of plots directly in Notebook
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")

import intake
import numpy as np
import pandas as pd
import xarray as xr
# import s3fs
import seaborn as sns

In [2]:
import dask 
from dask_jobqueue import PBSCluster
from dask.distributed import Client
from dask.distributed import performance_report

In [3]:
rda_scratch = '/gpfs/csfs1/collections/rda/scratch/harshah'
catalog_url = 'https://data.rda.ucar.edu/harshah/intake_catalogs/osdf/cesm2-lens-aws-osdf/aws-cesm2-le.json'

In [19]:
# GMST function ###
# calculate global means
def get_lat_name(ds):
    for lat_name in ['lat', 'latitude']:
        if lat_name in ds.coords:
            return lat_name
    raise RuntimeError("Couldn't find a latitude coordinate")

def global_mean(ds):
    lat = ds[get_lat_name(ds)]
    weight = np.cos(np.deg2rad(lat))
    weight /= weight.mean()
    other_dims = set(ds.dims) - {'time','member_id'}
    return (ds * weight).mean(other_dims)

In [4]:
# Create a PBS cluster object
cluster = PBSCluster(
    job_name = 'dask-wk24-hpc',
    cores = 1,
    memory = '8GiB',
    processes = 1,
    local_directory = rda_scratch+'/dask/spill',
    log_directory = rda_scratch + '/dask/logs/',
    resource_spec = 'select=1:ncpus=1:mem=8GB',
    queue = 'casper',
    walltime = '5:00:00',
    #interface = 'ib0'
    interface = 'ext'
)

In [5]:
client = Client(cluster)
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/33381/status,

0,1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/33381/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://128.117.208.93:43591,Workers: 0
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/33381/status,Total threads: 0
Started: Just now,Total memory: 0 B


In [6]:
cluster.scale(8)
cluster

0,1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/33381/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://128.117.208.93:43591,Workers: 0
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/33381/status,Total threads: 0
Started: Just now,Total memory: 0 B


In [7]:
# Open collection description file using intake
col         = intake.open_esm_datastore(catalog_url)
col

Unnamed: 0,unique
Unnamed: 0,322
variable,53
long_name,51
component,4
experiment,2
forcing_variant,2
frequency,3
vertical_levels,3
spatial_domain,3
units,20


In [8]:
cesm_temp = col.search(variable ='TREFHT', frequency ='monthly')
cesm_temp

Unnamed: 0,unique
Unnamed: 0,4
variable,1
long_name,1
component,1
experiment,2
forcing_variant,2
frequency,1
vertical_levels,1
spatial_domain,1
units,1


In [9]:
cesm_temp.df['path'].values

array(['osdf:///ncar/rda/d010092/atm/monthly/cesm2LE-historical-cmip6-TREFHT.zarr',
       'osdf:///ncar/rda/d010092/atm/monthly/cesm2LE-historical-smbb-TREFHT.zarr',
       'osdf:///ncar/rda/d010092/atm/monthly/cesm2LE-ssp370-cmip6-TREFHT.zarr',
       'osdf:///ncar/rda/d010092/atm/monthly/cesm2LE-ssp370-smbb-TREFHT.zarr'],
      dtype=object)

In [10]:
dsets_cesm = cesm_temp.to_dataset_dict()


--> The keys in the returned dictionary of datasets are constructed as follows:
	'component.experiment.frequency.forcing_variant'


In [11]:
dsets_cesm.keys()

dict_keys(['atm.historical.monthly.cmip6', 'atm.historical.monthly.smbb', 'atm.ssp370.monthly.cmip6', 'atm.ssp370.monthly.smbb'])

In [12]:
historical_cmip6 = dsets_cesm['atm.historical.monthly.cmip6']
future_cmip6     = dsets_cesm['atm.ssp370.monthly.cmip6']

In [13]:
future_cmip6 

Unnamed: 0,Array,Chunk
Bytes,16.12 kiB,16.12 kiB
Shape,"(1032, 2)","(1032, 2)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,object numpy.ndarray,object numpy.ndarray
"Array Chunk Bytes 16.12 kiB 16.12 kiB Shape (1032, 2) (1032, 2) Dask graph 1 chunks in 2 graph layers Data type object numpy.ndarray",2  1032,

Unnamed: 0,Array,Chunk
Bytes,16.12 kiB,16.12 kiB
Shape,"(1032, 2)","(1032, 2)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,object numpy.ndarray,object numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.63 GiB,126.56 MiB
Shape,"(50, 1032, 192, 288)","(1, 600, 192, 288)"
Dask graph,100 chunks in 2 graph layers,100 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 10.63 GiB 126.56 MiB Shape (50, 1032, 192, 288) (1, 600, 192, 288) Dask graph 100 chunks in 2 graph layers Data type float32 numpy.ndarray",50  1  288  192  1032,

Unnamed: 0,Array,Chunk
Bytes,10.63 GiB,126.56 MiB
Shape,"(50, 1032, 192, 288)","(1, 600, 192, 288)"
Dask graph,100 chunks in 2 graph layers,100 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [17]:
merge_ds_cmip6 = xr.concat([historical_cmip6, future_cmip6], dim='time')
# merge_ds_cmip6 = merge_ds_cmip6.dropna(dim='member_id')
merge_ds_cmip6 = merge_ds_cmip6.TREFHT
merge_ds_cmip6

Unnamed: 0,Array,Chunk
Bytes,31.02 GiB,126.56 MiB
Shape,"(50, 3012, 192, 288)","(1, 600, 192, 288)"
Dask graph,300 chunks in 5 graph layers,300 chunks in 5 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 31.02 GiB 126.56 MiB Shape (50, 3012, 192, 288) (1, 600, 192, 288) Dask graph 300 chunks in 5 graph layers Data type float32 numpy.ndarray",50  1  288  192  3012,

Unnamed: 0,Array,Chunk
Bytes,31.02 GiB,126.56 MiB
Shape,"(50, 3012, 192, 288)","(1, 600, 192, 288)"
Dask graph,300 chunks in 5 graph layers,300 chunks in 5 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


### Calculate GMST 

#### Compute (spatially weighted) Global Mean

In [18]:
ds_cmip6_annual = merge_ds_cmip6.resample(time='AS').mean()
ds_cmip6_annual

Unnamed: 0,Array,Chunk
Bytes,2.59 GiB,216.00 kiB
Shape,"(50, 251, 192, 288)","(1, 1, 192, 288)"
Dask graph,12550 chunks in 760 graph layers,12550 chunks in 760 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 2.59 GiB 216.00 kiB Shape (50, 251, 192, 288) (1, 1, 192, 288) Dask graph 12550 chunks in 760 graph layers Data type float32 numpy.ndarray",50  1  288  192  251,

Unnamed: 0,Array,Chunk
Bytes,2.59 GiB,216.00 kiB
Shape,"(50, 251, 192, 288)","(1, 1, 192, 288)"
Dask graph,12550 chunks in 760 graph layers,12550 chunks in 760 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [20]:
%%time
gmst_cmip6 = global_mean(ds_cmip6_annual)
gmst_cmip6 = gmst_cmip6.rename('gmst')
gmst_cmip6

CPU times: user 275 ms, sys: 10.1 ms, total: 285 ms
Wall time: 305 ms


Unnamed: 0,Array,Chunk
Bytes,98.05 kiB,8 B
Shape,"(50, 251)","(1, 1)"
Dask graph,12550 chunks in 764 graph layers,12550 chunks in 764 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 98.05 kiB 8 B Shape (50, 251) (1, 1) Dask graph 12550 chunks in 764 graph layers Data type float64 numpy.ndarray",251  50,

Unnamed: 0,Array,Chunk
Bytes,98.05 kiB,8 B
Shape,"(50, 251)","(1, 1)"
Dask graph,12550 chunks in 764 graph layers,12550 chunks in 764 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


### Turn the result into a data frame and plot

In [None]:
# %%time
# gmst_cmip6_df = gmst_cmip6.to_dataframe().reset_index()
# gmst_cmip6_df.head()

In [None]:
# %%time
# sns.relplot(data=gmst_cmip6_df, x="time", y="gmst", hue='member_id',kind="line", ci="sd", aspect=2)

### Compute anomaly and plot

In [30]:
gmst_cmip6_ano = gmst_cmip6 - gmst_cmip6.mean()

In [None]:
%%time
gmst_cmip6_ano.mean(dim='member_id').plot()