# Calculate ERA5 climatology using shifted windows

In [None]:
import os
import sys
import yaml
from glob import glob
from datetime import datetime

import numpy as np
import xarray as xr

In [None]:
# ------------------------------------------------- #
# interpolation utils
# from scipy.interpolate import griddata
import scipy.interpolate as spint
from scipy.spatial import Delaunay
import itertools

def interp_weights(xy, uv, d=2):
    tri = Delaunay(xy)
    simplex = tri.find_simplex(uv)
    vertices = np.take(tri.simplices, simplex, axis=0)
    temp = np.take(tri.transform, simplex, axis=0)
    delta = uv - temp[:, d]
    bary = np.einsum('njk,nk->nj', temp[:, :d, :], delta)
    return vertices, np.hstack((bary, 1 - bary.sum(axis=1, keepdims=True)))

def interpolate(values, vtx, wts):
    return np.einsum('nj,nj->n', np.take(values, vtx), wts)

In [None]:
sys.path.insert(0, os.path.realpath('../libs/'))
import verif_utils as vu

In [None]:
config_name = os.path.realpath('verif_config.yml')

with open(config_name, 'r') as stream:
    conf = yaml.safe_load(stream)

## Import climatology from WeatherBench 2

In [None]:
# import ERA5 climatology from WeatherBench GS
ds_ERA5_clim = xr.open_zarr(
    'gs://weatherbench2/datasets/era5-hourly-climatology/1990-2019_6h_1440x721.zarr')

# subset variables
variables_levels = conf['ERA5_weatherbench']['verif_variables']
ds_ERA5_clim = vu.ds_subset_everything(ds_ERA5_clim, variables_levels)

# unify file name and coords
ds_ERA5_clim = ds_ERA5_clim.rename({'latitude':'lat','longitude':'lon'})
ds_ERA5_clim = ds_ERA5_clim.rename(conf['ERA5_weatherbench']['rename_variables'])
ds_ERA5_clim = ds_ERA5_clim.squeeze('level')
ds_ERA5_clim = ds_ERA5_clim.drop_vars('level')

## Save raw

In [None]:
# # -------------------------------------------------------- #
# # Save raw
# save_name = conf['ERA5_weatherbench']['save_loc_clim'] + 'ERA5_clim_1990_2019_6h.nc'
# print('Save to {}'.format(save_name))
# ds_ERA5_clim.to_netcdf(save_name)

## Interpolation

In [None]:
# ERA5 IFS lat/lons
x_IFS = np.array(ds_ERA5_clim['lon'])
y_IFS = np.array(ds_ERA5_clim['lat'])
lon_IFS, lat_IFS = np.meshgrid(x_IFS, y_IFS)

# OUR lat/lons
OURS_dataset = xr.open_dataset(conf['geo']['geo_file_nc'])
x_OURS = np.array(OURS_dataset['longitude'])
y_OURS = np.array(OURS_dataset['latitude'])
lon_OURS, lat_OURS = np.meshgrid(x_OURS, y_OURS)
shape_OURS = lon_OURS.shape

In [None]:
# dataset info
list_var_names = list(ds_ERA5_clim.keys())
L_dayofyear = len(ds_ERA5_clim['dayofyear'])
L_hour = len(ds_ERA5_clim['hour'])

# interp weights
temp_data = np.load(conf['geo']['regrid_weights_numpy'], allow_pickle=True)[()]
vtx = temp_data['vtx']
wts = temp_data['wts']

In [None]:
# allocate xarray.Dataset
ds_clim_regrid = xr.Dataset()
ds_clim_regrid['dayofyear'] = ds_ERA5_clim['dayofyear']
ds_clim_regrid['hour'] = ds_ERA5_clim['hour']
ds_clim_regrid = ds_clim_regrid.assign_coords({'lat': y_OURS, 'lon': x_OURS})

for var_name in list_var_names:
    
    # allocate regridded IFS on multi-lead times
    allocate_interp = np.empty((L_dayofyear, L_hour)+shape_OURS)

    print('Processing var {}'.format(var_name))
    
    for i_day in range(L_dayofyear):
        for i_hour in range(L_hour):
            
            # select the variable on the current lead time
            IFS_var = ds_ERA5_clim[var_name].isel(dayofyear=i_day, hour=i_hour)
            # ========================================================================== #
            # scipy.interpolate.griddata(method='linear') with manually inputted weights #
            IFS_var_regrid = interpolate(IFS_var, vtx, wts)
            IFS_var_regrid = np.reshape(IFS_var_regrid, shape_OURS)
            allocate_interp[i_day, i_hour, ...] = IFS_var_regrid
            # ========================================================================== #
    
    # np.array --> xr.DataArray
    IFS_var_regrid_da = xr.DataArray(
        allocate_interp, 
        coords={
            'dayofyear': ds_ERA5_clim['dayofyear'],
            'hour': ds_ERA5_clim['hour'], 
            'lat': y_OURS, 
            'lon': x_OURS,},
        dims=['dayofyear', 'hour', 'lat', 'lon']
    )
    
    # add xr.DataArray to the allocated xr.Dataset
    ds_clim_regrid[var_name] = IFS_var_regrid_da
    
save_name = conf['ERA5_weatherbench']['save_loc_clim'] + 'ERA5_clim_1990_2019_6h_interp.nc'
print('Save to {}'.format(save_name))
ds_clim_regrid.to_netcdf(save_name)