# Compute residual norm coefficients

This notebook computes the residual norm coefficients as part of the variable weights.

In [1]:
import os
import yaml
import copy
import numpy as np
import xarray as xr

In [2]:
from scipy.stats import gmean

In [3]:
import matplotlib.pyplot as plt
%matplotlib inline

## Main routine

In [7]:
# get variable information from data_preprocessing/config
config_name = os.path.realpath('data_config_6h.yml')

with open(config_name, 'r') as stream:
    conf = yaml.safe_load(stream)


In [None]:
# get level info
N_levels = 13
base_dir = '/glade/derecho/scratch/ksha/CREDIT_data/ERA5_plevel_1deg/'
ds_example = xr.open_zarr(base_dir+'upper_subset/ERA5_subset_1deg_6h_1979_conserve.zarr')
level = np.array(ds_example['level'])

In [None]:
# get variable names
varnames = list(conf['residual'].keys())
varnames = varnames[:-5] # remove save_loc and others

varname_surf = list(set(varnames) - set(['U', 'V', 'T', 'Q', 'Z', 'specific_total_water']))
varname_upper = ['U', 'V', 'T', 'Q', 'Z', 'specific_total_water']

In [8]:
# collect computed mean and variance values
# See "qsub_STEP01_compute_mean_std.ipynb"
MEAN_values = {}
STD_values = {}

for varname in varname_surf:
    save_name = conf['residual']['save_loc'] + '{}_mean_std_{}.npy'.format(
        conf['residual']['prefix'], varname)
    mean_std = np.load(save_name)
    MEAN_values[varname] = mean_std[0]
    STD_values[varname] = mean_std[1]

for varname in varname_upper:

    # -------------------------------------------- #
    # allocate all levels
    mean_std_all_levels = np.empty((2, N_levels))
    mean_std_all_levels[...] = np.nan
    
    for i_level in range(N_levels):
        save_name = conf['residual']['save_loc'] + '{}_level{}_mean_std_{}.npy'.format(
            conf['residual']['prefix'], i_level, varname)
        mean_std = np.load(save_name)
        mean_std_all_levels[:, i_level] = mean_std

    # -------------------------------------------- #
    # save
    MEAN_values[varname] = np.copy(mean_std_all_levels[0, :])
    STD_values[varname] = np.copy(mean_std_all_levels[1, :])

keys_to_drop = ['TCC', 'SKT', 'SP', 'Q', 'land_sea_CI_mask'] # <---------------- some variables are not used in the paper
MEAN_values = {k: v for k, v in MEAN_values.items() if k not in keys_to_drop}
STD_values = {k: v for k, v in STD_values.items() if k not in keys_to_drop}

In [9]:
# separate upper air (list) and surf (float) std values
std_val_all = list(STD_values.values())
std_val_surf = np.array(std_val_all[:-5])
std_val_upper = std_val_all[-5:]

# combine
std_concat = np.concatenate([std_val_surf]+ std_val_upper)

# geometrical mean (not used)
std_g = gmean(np.sqrt(std_concat))

### Save residual coef as a file

In [11]:
# ------------------------------------------------------- #
# create xr.DataArray for std
ds_std_6h = xr.Dataset(coords={"level": level})

for varname, data in STD_values.items():
    data = np.sqrt(data) # / std_g # <--- var to std and divided by std_g
    if len(data.shape) == 1:
        data_array = xr.DataArray(
            data,
            dims=["level",],
            coords={"level": level},
            name=varname,
        )
        ds_std_6h[varname] = data_array
    else:
        data_array = xr.DataArray(
            data,
            name=varname,
        )
        ds_std_6h[varname] = data_array

In [12]:
# ds_std_6h.to_netcdf(base_dir+'mean_std/residual_original_6h_1979_2019_1deg_project2.nc')

### Comparing with the old residual norm

In [11]:
# ------------------------------------------------------- #
# Compare with my old ones
std_new = xr.open_dataset(base_dir+'mean_std/residual_6h_1979_2019_1deg_project2.nc')
std_conserve = xr.open_dataset(base_dir+'mean_std/residual_6h_1979_2019_conserve_1deg.nc')

for varname in list(std_conserve.keys()):
    try:
        print('=============== {} ================='.format(varname))
        print(np.array(std_new[varname]))
        print(np.array(std_conserve[varname]))
    except:
        pass

2.306996458279529
2.756990226781677
5.259948532947676
6.285933663515951
0.48636395589003817
0.5812322200302392
2.603373302474922
3.1111771870427525
5.274119616967553
6.302868904142561
4.24858429119819
5.077296641030351
0.8125157492432455
0.9710019153823931
3.69710900263375
4.418252724674436
1.5778630952817745
1.8856349420386707
2.3033441023752
2.752625456520836
5.2281697471186686
6.2479562311557375
[0.9236332  0.70619558 0.79551732 0.94796135 1.18434099 1.33943807
 1.39364216 1.3264029  1.2878272  1.30712027 1.46732084 1.57984744
 1.55446726]
[1.10379351 0.9926741  0.91675773 0.8195625  0.7896874  0.73181563
 0.69538035 0.72879487 0.84394335 0.94066804 0.96333641 0.94496085
 0.95068783 1.01350351 1.13286701 1.28118643 1.41535395 1.60070366
 1.67241509 1.66548059 1.62708905 1.58512589 1.55642738 1.53902578
 1.54016888 1.56208207 1.60793256 1.63801339 1.67344779 1.7131567
 1.75353075 1.79639599 1.84374096 1.88800635 1.90963237 1.89210613
 1.85767561]
[2.70923704 1.6855055  1.51498717 1.6