# Gather CESM2 SMYLE and DP runs 

In [1]:
import zarr
import numpy as np
import xarray as xr
from glob import glob

In [2]:
import matplotlib.pyplot as plt
%matplotlib inline

## File info

In [3]:
# /glade/campaign/cesm/development/espwg/CESM2-DP/timeseries/b.e21.BSMYLE.f09_g17.2000-11.011/atm/proc/tseries/day_1
# 'TS': 'Surface temperature (radiative) [K]',
# 'TSMN': 'Surface Temperature Min [K]',
# 'TSMX': 'Surface temperature Max [K]',

varname_dict = {
    'PSL': 'Sea level pressure',
    'TREFHT': '2-m Air Temperature [K]',
    'TREFHTMN': '2-m Air Temperature [K]',
    'TREFHTMX': '2-m Air Temperature [K]',
    'QREFHT': '2-m Specific Humidity [kg/kg]',
    'PRECT': 'Total Precipitation [m/s]',
    'PRECSC': 'Convective snow rate (water equivalent) [m/s]',
    'PRECSL': 'Large-scale (stable) snow rate (water equivalent) [m/s]',
    'TMQ': 'Total (vertically integrated) precipitable water [kg/m2]',
    'FLDS': 'Downwelling longwave flux at surface [W/m2]',
    'FSDS': 'Downwelling solar flux at surface [W/m2]',
    'U10': '10-m wind speed [m/s]',
}


## Single year SYMLE + DP

In [4]:
year = 1958

In [5]:
base_case = 'b.e21.BSMYLE.f09_g17'
base_dir_DP = '/glade/campaign/cesm/development/espwg/CESM2-DP/timeseries/'
base_dir_SMYLE = '/glade/campaign/cesm/development/espwg/SMYLE/archive/'

**DP collect**

In [6]:
members = np.arange(11, 31, 1)
open_kwargs = dict(engine="netcdf4", chunks={"time": 6, "lev": 12, "lat": 192, "lon": 288})

ds_full_DP = []

for i_member, mem in enumerate(members):

    ds_collect = []
    
    str_member = f'{mem:03d}'
    fn_daily = base_dir_DP + f'{base_case}.{year}-11.{str_member}/' + '/atm/proc/tseries/day_1/'
    
    for varname in list(varname_dict.keys()):
        fn_var = glob(fn_daily + f'*{varname}.*')[0]
        ds_collect.append(xr.open_dataset(fn_var, **open_kwargs)[[varname,]])
    
    ds_full_DP.append(xr.merge(ds_collect))

lat = ds_full_DP[0]['lat'].values
lon = ds_full_DP[0]['lon'].values

for ds in ds_full_DP:
    ds['lat'] = lat
    ds['lon'] = lon

**SMYLE collect**

In [7]:
members = np.arange(11, 31, 1)
open_kwargs = dict(engine="netcdf4", chunks={"time": 6, "lev": 12, "lat": 192, "lon": 288})

ds_full_SMYLE = []

for i_member, mem in enumerate(members):

    ds_collect = []
    
    str_member = f'{mem:03d}'
    fn_daily = base_dir_SMYLE + f'{base_case}.{year}-11.{str_member}/' + '/atm/proc/tseries/day_1/'
    
    for varname in list(varname_dict.keys()):
        fn_var = glob(fn_daily + f'*{varname}.*')[0]
        ds_collect.append(xr.open_dataset(fn_var, **open_kwargs)[[varname,]])
    
    ds_full_SMYLE.append(xr.merge(ds_collect))

lat = ds_full_SMYLE[0]['lat'].values
lon = ds_full_SMYLE[0]['lon'].values

for ds in ds_full_SMYLE:
    ds['lat'] = lat
    ds['lon'] = lon

**Merge**

In [8]:
ds_DP = xr.concat(ds_full_DP, dim=xr.DataArray(members, dims="member", name="member"),)
ds_DP = ds_DP.assign_coords(member=("member", members))
ds_DP = ds_DP.sel(time=slice(f'{year+2}-11-01T00', f'{year+10}-12-31T00'))

ds_SMYLE = xr.concat(ds_full_SMYLE, dim=xr.DataArray(members, dims="member", name="member"),)
ds_SMYLE = ds_SMYLE.assign_coords(member=("member", members))
ds_SMYLE = ds_SMYLE.sel(time=slice(f'{year}-11-01T00', f'{year+2}-10-31T00'))

In [9]:
ds_year = xr.concat([ds_SMYLE, ds_DP], dim='time')
ds_year = ds_year.chunk({"member": 20, "time": 6, "lat": 192, "lon": 288})

In [10]:
dict_encoding = {}
varnames = list(ds_year.keys())
varname_4D = []

chunk_size_3d = dict(chunks=(20, 6, 192, 288)) # member, time, lat, lon
chunk_size_4d = dict(chunks=(20, 6, 12, 192, 288)) # member, time, lev, lat, lon
compress = zarr.Blosc(cname='zstd', clevel=1, shuffle=zarr.Blosc.SHUFFLE, blocksize=0)

for i_var, var in enumerate(varnames):
    if var in varname_4D:
        dict_encoding[var] = {'compressor': compress, **chunk_size_4d}
    else:
        dict_encoding[var] = {'compressor': compress, **chunk_size_3d}

In [14]:
ds_year['PSL'].isel(member=11, time=1999, lat=99, lon=122).values

array(100783.04, dtype=float32)

In [4]:
save_name = f'/glade/derecho/scratch/ksha/EPRI_data/CESM2_SMYLE/SMYLE_{year}-11-01_daily_ensemble.zarr'
# ds_year.to_zarr(save_name, mode='w', consolidated=True, compute=True, encoding=dict_encoding)

In [37]:
# fn = '/glade/derecho/scratch/ksha/EPRI_data/CESM2_SMYLE/SMYLE_1958-11-01_daily_ensemble.zarr'
# ds = xr.open_zarr(fn)