In [13]:
####### CONSIDER REGRIDDING EVERYTHING

### Set up Workspace

In [7]:
%matplotlib inline

import xarray as xr
import intake
import pandas as pd

# util.py is in the local directory
# it contains code that is common across project notebooks
# or routines that are too extensive and might otherwise clutter
# the notebook design
import util 

### Choose Settings for what to load into dictionary

In [8]:
this_experiment_id = ['historical','ssp585']
this_variable_id = 'tas'
this_table_id = 'Amon'
this_grid_label='gn'

### Load Data into Data Dictionary

In [9]:
def createDataDict():
    if util.is_ncar_host():
        col = intake.open_esm_datastore("../catalogs/glade-cmip6.json")
    else:
        col = intake.open_esm_datastore("../catalogs/pangeo-cmip6.json")
    
    cat = col.search(experiment_id=this_experiment_id, \
                     table_id=this_table_id, \
                     variable_id=this_variable_id, \
                     grid_label=this_grid_label)
    dataset_info = cat.df
    
    dset_dict = cat.to_dataset_dict(zarr_kwargs={'consolidated': True, 'decode_times': False}, 
                                cdf_kwargs={'chunks': {}, 'decode_times': False})
    #dset_dict.keys()
    
    source_ids = cat.df['source_id']
    modelnames = list(set(source_ids))
    
    return dataset_info, dset_dict, modelnames

In [10]:
[dataset_info, dset_dict, modelnames]=createDataDict()

--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'

--> There will be 28 group(s)


### Create Pandas Dataset

Structure: rows = models; columns = scenarios; data = timeseries

In [11]:
def CreateDataFrame():
    df = pd.DataFrame(index=modelnames) 
    for expname in this_experiment_id:
        experiment_id=expname
        expvals = []
        if expname=='historical':
            activity_id='CMIP'
        else:
            activity_id='ScenarioMIP'
        for modelname in modelnames:
            source_id = modelname
            dataset_info_subset = dataset_info[dataset_info['source_id']==source_id]
            institution_id = list(set(dataset_info_subset['institution_id']))[0]
            nametag = activity_id+'.'+institution_id+'.'+source_id+'.'+experiment_id+'.'+this_table_id+'.'+this_grid_label
            #print(nametag)
            if nametag in dset_dict:
                thisdata=dset_dict[nametag]
                #print(np.shape(thisdata['time'].values))
            else:
                thisdata='No data'
            expvals.append(thisdata)
        df[expname]=expvals
    return df

In [14]:
df = CreateDataFrame()
df['historical']['GISS-E2-1-G-CC'][this_variable_id]

<xarray.DataArray 'tas' (member_id: 1, time: 1980, lat: 90, lon: 144)>
dask.array<transpose, shape=(1, 1980, 90, 144), dtype=float32, chunksize=(1, 1980, 90, 144), chunktype=numpy.ndarray>
Coordinates:
  * lon        (lon) float64 1.25 3.75 6.25 8.75 ... 351.2 353.8 356.2 358.8
  * time       (time) int64 0 708 1416 2148 ... 1442460 1443192 1443924 1444656
  * lat        (lat) float64 -89.0 -87.0 -85.0 -83.0 ... 83.0 85.0 87.0 89.0
  * member_id  (member_id) <U8 'r1i1p1f1'
Attributes:
    cell_measures:  area: areacella
    cell_methods:   area: time: mean
    comment:        near-surface (usually, 2 meter) air temperature
    long_name:      Near-Surface Air Temperature
    standard_name:  air_temperature
    units:          K

In [None]:
### Changed this