# Create resaidual norm files for CREDIT

In [1]:
import os
import yaml
import copy
import numpy as np
import xarray as xr

In [2]:
from scipy.stats import gmean

In [3]:
import matplotlib.pyplot as plt
%matplotlib inline

## 1 deg residual coeff

In [4]:
# get variable information from data_preprocessing/config
config_name = os.path.realpath('data_config_6h.yml')

with open(config_name, 'r') as stream:
    conf = yaml.safe_load(stream)

In [11]:
N_levels = 16
base_dir = '/glade/derecho/scratch/ksha/CREDIT_data/ERA5_mlevel_1deg_stage1/'

In [6]:
# get variable names
varnames = list(conf['residual'].keys())
varnames = varnames[:-5] # remove save_loc and others

varname_surf = list(set(varnames) - set(['U', 'V', 'T', 'Q']))
varname_upper = ['U', 'V', 'T', 'Q']

# collect computed mean and variance values
# See "qsub_STEP01_compute_mean_std.ipynb"
MEAN_values = {}
STD_values = {}

for varname in varname_surf:
    save_name = conf['residual']['save_loc'] + '{}_mean_std_{}.npy'.format(conf['residual']['prefix'], varname)
    mean_std = np.load(save_name)
    MEAN_values[varname] = mean_std[0]
    STD_values[varname] = mean_std[1]

for varname in varname_upper:

    # -------------------------------------------- #
    # allocate all levels
    mean_std_all_levels = np.empty((2, N_levels))
    mean_std_all_levels[...] = np.nan
    
    for i_level in range(N_levels):
        save_name = conf['residual']['save_loc'] + '{}_level{}_mean_std_{}.npy'.format(
            conf['residual']['prefix'], i_level, varname)
        mean_std = np.load(save_name)
        mean_std_all_levels[:, i_level] = mean_std

    # -------------------------------------------- #
    # save
    MEAN_values[varname] = np.copy(mean_std_all_levels[0, :])
    STD_values[varname] = np.copy(mean_std_all_levels[1, :])

In [7]:
std_val_all = list(STD_values.values())
std_val_surf = np.array(std_val_all[:len(varname_surf)])
std_val_upper = std_val_all[len(varname_surf):]

In [9]:
std_concat = np.concatenate([std_val_surf]+ std_val_upper)
std_g = gmean(np.sqrt(std_concat))

In [12]:
ds_example = xr.open_zarr(base_dir+'all_in_one/ERA5_mlevel_1deg_6h_lev16_1979.zarr')

In [13]:
# ------------------------------------------------------- #
# create xr.DataArray for std
# Initialize level coord
level = np.array(ds_example['level'])

ds_std_6h = xr.Dataset(coords={"level": level})

for varname, data in STD_values.items():
    data = np.sqrt(data) / std_g # <--- var to std and divided by std_g
    if len(data.shape) == 1:
        data_array = xr.DataArray(
            data,
            dims=["level",],
            coords={"level": level},
            name=varname,
        )
        ds_std_6h[varname] = data_array
    else:
        data_array = xr.DataArray(
            data,
            name=varname,
        )
        ds_std_6h[varname] = data_array

In [14]:
ds_std_6h.to_netcdf(base_dir+'mean_std/residual_6h_1979_2018_1deg.nc')

In [15]:
# ------------------------------------------------------- #
# Compare with my old ones
new_std = xr.open_dataset(base_dir+'mean_std/residual_6h_1979_2018_1deg.nc')
old_std = xr.open_dataset('/glade/campaign/cisl/aiml/ksha/CREDIT/residual_6h_1979_2018_16lev_0.25deg.nc')

for varname in varnames:
    print('=============== {} ================='.format(varname))
    print(np.array(new_std[varname]))
    print(np.array(old_std[varname]))

[2.22877341 0.53753047 0.62650154 0.56019855 1.15624953 1.5771914
 1.78807265 1.68954145 1.61768614 1.480396   1.41090533 1.27505268
 0.71039864 0.47997947 0.47196743 0.46551926]
[2.16997428 0.53433262 0.62570473 0.55720494 1.20616546 1.67091602
 1.91372493 1.75973037 1.68885545 1.54658143 1.48259999 1.36626287
 0.80051818 0.5225825  0.51008686 0.50359098]
1.5569376753269437
1.58681366782801
0.10954554882597509
0.10572432808125728
[1.48663303 0.63306474 0.54263562 0.49398754 0.35977363 0.63002081
 0.67261589 0.40241168 0.40389084 0.38125369 0.35218385 0.35031988
 0.35146155 0.36032256 0.40865243 0.44163144]
[1.44029965 0.62705213 0.54127914 0.49537273 0.36188953 0.6233748
 0.6603371  0.39599639 0.39746314 0.37602801 0.34882406 0.35038579
 0.35424132 0.36267483 0.40894171 0.43969869]
0.4449076539389654
0.43393328129223074
[0.97168118 0.63959321 0.63282095 0.78192577 0.85711048 0.85819257
 1.26184492 1.45473614 1.40217659 1.35780377 1.37412031 1.45085895
 1.61508644 1.69310209 1.65991458

## CESM grid residual coeff

In [4]:
# get variable information from data_preprocessing/config
config_name = os.path.realpath('data_config_cesm_6h.yml')

with open(config_name, 'r') as stream:
    conf = yaml.safe_load(stream)

In [5]:
N_levels = 16
base_dir = '/glade/derecho/scratch/ksha/CREDIT_data/ERA5_mlevel_cesm_stage1/'

In [6]:
# get variable names
varnames = list(conf['residual'].keys())
varnames = varnames[:-5] # remove save_loc and others

varname_surf = list(set(varnames) - set(['U', 'V', 'T', 'Q']))
varname_upper = ['U', 'V', 'T', 'Q']

# collect computed mean and variance values
# See "qsub_STEP01_compute_mean_std.ipynb"
MEAN_values = {}
STD_values = {}

for varname in varname_surf:
    save_name = conf['residual']['save_loc'] + '{}_mean_std_{}.npy'.format(conf['residual']['prefix'], varname)
    mean_std = np.load(save_name)
    MEAN_values[varname] = mean_std[0]
    STD_values[varname] = mean_std[1]

for varname in varname_upper:

    # -------------------------------------------- #
    # allocate all levels
    mean_std_all_levels = np.empty((2, N_levels))
    mean_std_all_levels[...] = np.nan
    
    for i_level in range(N_levels):
        save_name = conf['residual']['save_loc'] + '{}_level{}_mean_std_{}.npy'.format(
            conf['residual']['prefix'], i_level, varname)
        mean_std = np.load(save_name)
        mean_std_all_levels[:, i_level] = mean_std

    # -------------------------------------------- #
    # save
    MEAN_values[varname] = np.copy(mean_std_all_levels[0, :])
    STD_values[varname] = np.copy(mean_std_all_levels[1, :])

In [7]:
std_val_all = list(STD_values.values())
std_val_surf = np.array(std_val_all[:len(varname_surf)])
std_val_upper = std_val_all[len(varname_surf):]

In [8]:
std_concat = np.concatenate([std_val_surf]+ std_val_upper)
std_g = gmean(np.sqrt(std_concat))

In [9]:
ds_example = xr.open_zarr(base_dir+'all_in_one/ERA5_mlevel_cesm_6h_lev16_1979.zarr')

In [10]:
# ------------------------------------------------------- #
# create xr.DataArray for std
# Initialize level coord
level = np.array(ds_example['level'])

ds_std_6h = xr.Dataset(coords={"level": level})

for varname, data in STD_values.items():
    data = np.sqrt(data) / std_g # <--- var to std and divided by std_g
    if len(data.shape) == 1:
        data_array = xr.DataArray(
            data,
            dims=["level",],
            coords={"level": level},
            name=varname,
        )
        ds_std_6h[varname] = data_array
    else:
        data_array = xr.DataArray(
            data,
            name=varname,
        )
        ds_std_6h[varname] = data_array

In [12]:
ds_std_6h.to_netcdf(base_dir+'mean_std/residual_6h_1979_2018_cesm.nc')

In [14]:
# ------------------------------------------------------- #
# Compare with my old ones
base_dir_1deg = '/glade/derecho/scratch/ksha/CREDIT_data/ERA5_mlevel_1deg_stage1/'

STD_conserve = xr.open_dataset(base_dir+'mean_std/residual_6h_1979_2018_cesm.nc')
STD_bilinear = xr.open_dataset(base_dir_1deg+'mean_std/residual_6h_1979_2018_1deg.nc')

for varname in varnames:
    print('=============== {} ================='.format(varname))
    print(np.array(STD_conserve[varname]))
    print(np.array(STD_bilinear[varname]))

[2.23233559 0.53916113 0.62872641 0.56179554 1.15607257 1.57369515
 1.7812335  1.68544535 1.61390932 1.47717657 1.40775172 1.27178843
 0.70755711 0.47870039 0.47093225 0.4645251 ]
[2.22877341 0.53753047 0.62650154 0.56019855 1.15624953 1.5771914
 1.78807265 1.68954145 1.61768614 1.480396   1.41090533 1.27505268
 0.71039864 0.47997947 0.47196743 0.46551926]
1.553384498872459
1.5569376753269437
0.10960609446584396
0.10954554882597509
[1.48702112 0.63378785 0.54322846 0.49421725 0.35959012 0.63005526
 0.67294766 0.40261723 0.40409038 0.38143488 0.35236627 0.35051729
 0.3517611  0.36055843 0.40887998 0.44184623]
[1.48663303 0.63306474 0.54263562 0.49398754 0.35977363 0.63002081
 0.67261589 0.40241168 0.40389084 0.38125369 0.35218385 0.35031988
 0.35146155 0.36032256 0.40865243 0.44163144]
0.44507665583932676
0.4449076539389654
[0.97186325 0.63945121 0.63256742 0.78131413 0.85593403 0.85756702
 1.26300068 1.45775784 1.40504405 1.36026356 1.37651835 1.45357922
 1.61786986 1.6948886  1.660986