In [13]:
####### CONSIDER REGRIDDING EVERYTHING

### Set up Workspace

In [2]:
%matplotlib inline

import xarray as xr
import intake
import pandas as pd
import xarray as xr

# util.py is in the local directory
# it contains code that is common across project notebooks
# or routines that are too extensive and might otherwise clutter
# the notebook design
import util 



### Choose Settings for what to load into dictionary

In [3]:
this_experiment_id = ['historical','ssp585']
this_variable_id = 'tas'
this_table_id = 'Amon'
this_grid_label='gn'

### Load Data into Data Dictionary

In [4]:
def createDataDict():
    if util.is_ncar_host():
        col = intake.open_esm_datastore("../catalogs/glade-cmip6.json")
    else:
        col = intake.open_esm_datastore("../catalogs/pangeo-cmip6.json")
    
    cat = col.search(experiment_id=this_experiment_id, \
                     table_id=this_table_id, \
                     variable_id=this_variable_id, \
                     grid_label=this_grid_label)
    dataset_info = cat.df
    
    dset_dict = cat.to_dataset_dict(zarr_kwargs={'consolidated': True, 'decode_times': False}, 
                                cdf_kwargs={'chunks': {}, 'decode_times': False})
    #dset_dict.keys()
    
    source_ids = cat.df['source_id']
    modelnames = list(set(source_ids))
    
    return dataset_info, dset_dict, modelnames

In [5]:
[dataset_info, dset_dict, modelnames]=createDataDict()

--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'

--> There will be 28 group(s)


### Create Pandas Dataset

Structure: rows = models; columns = scenarios; data = timeseries

In [6]:
def CreateDataFrame1():
    df = pd.DataFrame(index=modelnames) 
    for expname in this_experiment_id:
        experiment_id=expname
        expvals = [None] * len(modelnames)
        df[expname]=expvals
        if expname=='historical':
            activity_id='CMIP'
        else:
            activity_id='ScenarioMIP'
        for modelname in modelnames:
            source_id = modelname
            dataset_info_subset = dataset_info[dataset_info['source_id']==source_id]
            institution_id = list(set(dataset_info_subset['institution_id']))[0]
            nametag = activity_id+'.'+institution_id+'.'+source_id+'.'+experiment_id+'.'+this_table_id+'.'+this_grid_label
            if nametag in dset_dict:
                if nametag=='CMIP.UA.MCM-UA-1-0.historical.Amon.gn':
                    thisdata=dset_dict[nametag][this_variable_id].mean(dim=['latitude','longitude'])
                else:
                    thisdata=dset_dict[nametag][this_variable_id].mean(dim=['lat','lon'])
            else:
                thisdata='No data'
            df[expname][modelname]=thisdata
    return df

In [13]:
df = CreateDataFrame1()

### WANT TO MAKE A MULTIMODEL DATA STRUCTURE WITH DIMENSIONS OF: MODELNAME, TIME, SCENARIO

Beyond this point, this is me trying to experiment with different data structures, etc. Doesn't really work/make sense

In [22]:
modelnames_to_test_time_type = ['NESM3', 'BCC-CSM2-MR', 'NorCPM1', 'UKESM1-0-LL', 'GISS-E2-1-G-CC', 'SAM0-UNICON', 'CanESM5', 'MCM-UA-1-0', 'GISS-E2-1-G', 'MIROC6', 'MRI-ESM2-0', 'BCC-ESM1', 'MIROC-ES2L', 'CESM2-WACCM', 'CESM2', 'CAMS-CSM1-0', 'GISS-E2-1-H', 'HadGEM3-GC31-LL']
for modelname in modelnames_to_test_time_type:
    print(df['historical'][modelname]['time'].attrs['calendar'])

gregorian
365_day
noleap
360_day
noleap
noleap
365_day
noleap
noleap
proleptic_gregorian
proleptic_gregorian
365_day
gregorian
noleap
noleap
365_day
noleap
360_day


In [None]:
nametag='CMIP.NCAR.CESM2.historical.Amon.gn'
thistime=dset_dict[nametag]['time']
thisxr =    xr.DataArray(coords=[modelnames, thistime], dims=['modelnames', 'time'])
thisxr[modelnames=='NorCPM1']=dset_dict[nametag]['tas']

In [29]:
experiment_id='historical'
activity_id='CMIP'
for modelname in modelnames:
    source_id = modelname
    dataset_info_subset = dataset_info[dataset_info['source_id']==source_id]
    institution_id = list(set(dataset_info_subset['institution_id']))[0]
    nametag = activity_id+'.'+institution_id+'.'+source_id+'.'+experiment_id+'.'+this_table_id+'.'+this_grid_label
    if nametag in dset_dict:
        print(nametag)
        if nametag=='CMIP.UA.MCM-UA-1-0.historical.Amon.gn':
            thisdata=dset_dict[nametag][this_variable_id].mean(dim=['latitude','longitude'])
        else:
            thisdata=dset_dict[nametag][this_variable_id].mean(dim=['lat','lon'])
    else:
        thisdata='No data'

CMIP.NCC.NorCPM1.historical.Amon.gn
CMIP.CAMS.CAMS-CSM1-0.historical.Amon.gn
CMIP.BCC.BCC-ESM1.historical.Amon.gn
CMIP.UA.MCM-UA-1-0.historical.Amon.gn
CMIP.NASA-GISS.GISS-E2-1-G-CC.historical.Amon.gn
CMIP.BCC.BCC-CSM2-MR.historical.Amon.gn
CMIP.MRI.MRI-ESM2-0.historical.Amon.gn
CMIP.NUIST.NESM3.historical.Amon.gn
CMIP.NASA-GISS.GISS-E2-1-G.historical.Amon.gn
CMIP.NASA-GISS.GISS-E2-1-H.historical.Amon.gn
CMIP.MOHC.HadGEM3-GC31-LL.historical.Amon.gn
CMIP.MIROC.MIROC6.historical.Amon.gn
CMIP.NCAR.CESM2.historical.Amon.gn
CMIP.NCAR.CESM2-WACCM.historical.Amon.gn
CMIP.CCCma.CanESM5.historical.Amon.gn
CMIP.MIROC.MIROC-ES2L.historical.Amon.gn
CMIP.SNU.SAM0-UNICON.historical.Amon.gn
CMIP.MOHC.UKESM1-0-LL.historical.Amon.gn
