In [1]:
%matplotlib inline

import xarray as xr
import intake
import pandas as pd
import xarray as xr

# util.py is in the local directory
# it contains code that is common across project notebooks
# or routines that are too extensive and might otherwise clutter
# the notebook design
import util 



In [2]:
this_experiment_id = ['historical','ssp126', 'ssp245', 'ssp370', 'ssp585']
this_variable_id = 'tas'
this_table_id = 'Amon'
this_grid_label='gn'

In [3]:
def createDataDict():
    if util.is_ncar_host():
        col = intake.open_esm_datastore("../catalogs/glade-cmip6.json")
    else:
        col = intake.open_esm_datastore("../catalogs/pangeo-cmip6.json")
    
    cat = col.search(experiment_id=this_experiment_id, \
                     table_id=this_table_id, \
                     variable_id=this_variable_id, \
                     grid_label=this_grid_label)
    dataset_info = cat.df
    
    dset_dict = cat.to_dataset_dict(zarr_kwargs={'consolidated': True, 'decode_times': False}, 
                                cdf_kwargs={'chunks': {}, 'decode_times': False})
    #dset_dict.keys()
    
    source_ids = cat.df['source_id']
    modelnames = list(set(source_ids))
    
    return dataset_info, dset_dict, modelnames

In [4]:
[dataset_info, dset_dict, modelnames]=createDataDict()

--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'

--> There will be 60 group(s)


In [9]:
modelname = 'CESM2'
scenarios_for1model = dict()
for expname in this_experiment_id:
    if expname=='historical':
        activity_id='CMIP'
    else:
        activity_id='ScenarioMIP'
    dataset_info_subset = dataset_info[dataset_info['source_id']==modelname]
    institution_id = list(set(dataset_info_subset['institution_id']))[0]
    nametag = activity_id+'.'+institution_id+'.'+modelname+'.'+expname+'.'+this_table_id+'.'+this_grid_label
    if nametag in dset_dict:
        if nametag=='CMIP.UA.MCM-UA-1-0.historical.Amon.gn':
            thisdata=dset_dict[nametag][this_variable_id].mean(dim=['latitude','longitude'])
        else:
            thisdata=dset_dict[nametag][this_variable_id].mean(dim=['lat','lon'])
    else:
        thisdata='No data'
    scenarios_for1model[expname]=thisdata

In [10]:
scenarios_for1model

{'historical': <xarray.DataArray 'tas' (member_id: 11, time: 3960)>
 dask.array<mean_agg-aggregate, shape=(11, 3960), dtype=float32, chunksize=(1, 950), chunktype=numpy.ndarray>
 Coordinates:
   * time       (time) float64 0.0 708.0 1.416e+03 ... 1.444e+06 1.445e+06
   * member_id  (member_id) <U9 'r10i1p1f1' 'r11i1p1f1' ... 'r8i1p1f1' 'r9i1p1f1',
 'ssp126': <xarray.DataArray 'tas' (member_id: 1, time: 1032)>
 dask.array<mean_agg-aggregate, shape=(1, 1032), dtype=float32, chunksize=(1, 817), chunktype=numpy.ndarray>
 Coordinates:
   * time       (time) int64 0 708 1416 2148 2880 ... 750420 751152 751884 752616
   * member_id  (member_id) <U8 'r1i1p1f1',
 'ssp245': <xarray.DataArray 'tas' (member_id: 1, time: 1032)>
 dask.array<mean_agg-aggregate, shape=(1, 1032), dtype=float32, chunksize=(1, 600), chunktype=numpy.ndarray>
 Coordinates:
   * time       (time) int64 0 708 1416 2148 2880 ... 750420 751152 751884 752616
   * member_id  (member_id) <U8 'r1i1p1f1',
 'ssp370': <xarray.DataArr