# Bias-correct CESM2 LENS temperature data using ERA5 reanalysis

In [1]:
# Display output of plots directly in Notebook
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")

import intake
import numpy as np
import pandas as pd
import xarray as xr
import s3fs
import seaborn as sns
import re
import nest_asyncio
nest_asyncio.apply()

In [2]:
import fsspec.implementations.http as fshttp
from pelicanfs.core import PelicanFileSystem, PelicanMap, OSDFFileSystem 

In [3]:
import dask 
from dask_jobqueue import PBSCluster
from dask.distributed import Client
from dask.distributed import performance_report

In [4]:
rda_scratch = '/gpfs/csfs1/collections/rda/scratch/harshah'

## Create a PBS cluster

In [5]:
# Create a PBS cluster object
cluster = PBSCluster(
    job_name = 'dask-wk24-hpc',
    cores = 1,
    memory = '8GiB',
    processes = 1,
    local_directory = rda_scratch+'/dask/spill',
    log_directory = rda_scratch + '/dask/logs/',
    resource_spec = 'select=1:ncpus=1:mem=8GB',
    queue = 'casper',
    walltime = '2:00:00',
    #interface = 'ib0'
    interface = 'ext'
)

In [6]:
cluster.scale(20)

In [7]:
cluster

0,1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/42183/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://128.117.208.96:38307,Workers: 0
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/harshah/proxy/42183/status,Total threads: 0
Started: Just now,Total memory: 0 B


## Load CESM LENS2 temperature data

In [8]:
cesm_cat = intake.open_esm_datastore(rda_scratch + '/intake_catalogs/posix/aws-cesm2-le.json')
cesm_cat

Unnamed: 0,unique
Unnamed: 0,322
variable,53
long_name,51
component,4
experiment,2
forcing_variant,2
frequency,3
vertical_levels,3
spatial_domain,3
units,20


In [9]:
cesm_temp = cesm_cat.search(variable ='TREFHTMX', frequency ='daily')
cesm_temp

Unnamed: 0,unique
Unnamed: 0,4
variable,1
long_name,1
component,1
experiment,2
forcing_variant,2
frequency,1
vertical_levels,1
spatial_domain,1
units,1


In [10]:
cesm_temp.df

Unnamed: 0.1,Unnamed: 0,variable,long_name,component,experiment,forcing_variant,frequency,vertical_levels,spatial_domain,units,start_time,end_time,path
0,18,TREFHTMX,maximum reference height temperature over outp...,atm,historical,cmip6,daily,1.0,global,K,1850-01-01 12:00:00,2014-12-31 12:00:00,/glade/campaign/collections/rda/transfer/chifa...
1,41,TREFHTMX,maximum reference height temperature over outp...,atm,historical,smbb,daily,1.0,global,K,1850-01-01 12:00:00,2014-12-31 12:00:00,/glade/campaign/collections/rda/transfer/chifa...
2,63,TREFHTMX,maximum reference height temperature over outp...,atm,ssp370,cmip6,daily,1.0,global,K,2015-01-01 12:00:00,2100-12-31 12:00:00,/glade/campaign/collections/rda/transfer/chifa...
3,84,TREFHTMX,maximum reference height temperature over outp...,atm,ssp370,smbb,daily,1.0,global,K,2015-01-01 12:00:00,2100-12-31 12:00:00,/glade/campaign/collections/rda/transfer/chifa...


In [11]:
dsets_cesm = cesm_temp.to_dataset_dict()


--> The keys in the returned dictionary of datasets are constructed as follows:
	'component.experiment.frequency.forcing_variant'


In [12]:
historical_smbb  = dsets_cesm['atm.historical.daily.smbb']
future_smbb      = dsets_cesm['atm.ssp370.daily.smbb']

historical_cmip6 = dsets_cesm['atm.historical.daily.cmip6']
future_cmip6     = dsets_cesm['atm.ssp370.daily.cmip6']

In [None]:
%%time
merge_ds_smbb = xr.concat([historical_smbb, future_smbb], dim='time')
merge_ds_smbb = merge_ds_smbb.dropna(dim='member_id')

merge_ds_cmip6= xr.concat([historical_cmip6, future_cmip6], dim='time')
merge_ds_cmip6 = merge_ds_cmip6.dropna(dim='member_id')

In [None]:
t_smbb = merge_ds_smbb.TREFHT
t_cmip6 = merge_ds_cmip6.TREFHT
t_ref = t_cmip6.sel(time=slice('1961', '1990'))

# Comments
- Using PelicanFS to access 

In [None]:
# Create a PBS cluster object
cluster = PBSCluster(
    job_name = 'dask-wk24-hpc',
    cores = 1,
    memory = '8GiB',
    processes = 1,
    local_directory = rda_scratch+'/dask/spill',
    resource_spec = 'select=1:ncpus=1:mem=8GB',
    queue = 'casper',
    walltime = '2:00:00',
    #interface = 'ib0'
    interface = 'ext'
)

# Access the data from the AWS bucket using intake to compare

In [None]:
# Open collection description file using intake
catalog = intake.open_esm_datastore(
    'https://raw.githubusercontent.com/NCAR/cesm2-le-aws/main/intake-catalogs/aws-cesm2-le.json'
)
catalog

In [None]:
catalog_subset = catalog.search(variable='TREFHT', frequency='daily')
catalog_subset

In [None]:
catalog_subset.df

In [None]:
catalog_subset.df.loc[0,'path']

In [None]:
dsets = catalog_subset.to_dataset_dict(storage_options={'anon':True})

In [None]:
dsets.keys()

In [None]:
# GMST function ###
# calculate global means

def get_lat_name(ds):
    for lat_name in ['lat', 'latitude']:
        if lat_name in ds.coords:
            return lat_name
    raise RuntimeError("Couldn't find a latitude coordinate")

def global_mean(ds):
    lat = ds[get_lat_name(ds)]
    weight = np.cos(np.deg2rad(lat))
    weight /= weight.mean()
    other_dims = set(ds.dims) - {'time','member_id'}
    return (ds * weight).mean(other_dims)

In [None]:
client = Client(cluster)
client

In [None]:
cluster.scale(8)
cluster

### Calculate GMST 

#### Now compute (spatially weighted) Global Mean