# Create resaidual norm files for CREDIT

In [1]:
import os
import yaml
import copy
import numpy as np
import xarray as xr

In [2]:
from scipy.stats import gmean

In [3]:
import matplotlib.pyplot as plt
%matplotlib inline

## File creation

### 6 hourly residual coeff

In [4]:
# get variable information from data_preprocessing/config
config_name = os.path.realpath('data_config_6h.yml')

with open(config_name, 'r') as stream:
    conf = yaml.safe_load(stream)

In [5]:
N_levels = 37
base_dir = '/glade/derecho/scratch/ksha/CREDIT_data/ERA5_plevel_1deg/'

In [6]:
# get variable names
varnames = list(conf['residual'].keys())
varnames = varnames[:-5] # remove save_loc and others

varname_surf = list(set(varnames) - set(['U', 'V', 'T', 'Q', 'Z', 'specific_total_water']))
varname_upper = ['U', 'V', 'T', 'Q', 'Z', 'specific_total_water']

In [7]:
# collect computed mean and variance values
# See "qsub_STEP01_compute_mean_std.ipynb"
MEAN_values = {}
STD_values = {}

for varname in varname_surf:
    save_name = conf['residual']['save_loc'] + '{}_mean_std_{}.npy'.format(conf['residual']['prefix'], varname)
    mean_std = np.load(save_name)
    MEAN_values[varname] = mean_std[0]
    STD_values[varname] = mean_std[1]

for varname in varname_upper:

    # -------------------------------------------- #
    # allocate all levels
    mean_std_all_levels = np.empty((2, N_levels))
    mean_std_all_levels[...] = np.nan
    
    for i_level in range(N_levels):
        save_name = conf['residual']['save_loc'] + '{}_level{}_mean_std_{}.npy'.format(conf['residual']['prefix'], i_level, varname)
        mean_std = np.load(save_name)
        mean_std_all_levels[:, i_level] = mean_std

    # -------------------------------------------- #
    # save
    MEAN_values[varname] = np.copy(mean_std_all_levels[0, :])
    STD_values[varname] = np.copy(mean_std_all_levels[1, :])

keys_to_drop = ['VAR_10U', 'VAR_10V', 'SP', 'Q', 'Q500', 'land_sea_CI_mask']
MEAN_values = {k: v for k, v in MEAN_values.items() if k not in keys_to_drop}
STD_values = {k: v for k, v in STD_values.items() if k not in keys_to_drop}

In [8]:
std_val_all = list(STD_values.values())
std_val_surf = np.array(std_val_all[:-5])
std_val_upper = std_val_all[-5:]

In [9]:
std_concat = np.concatenate([std_val_surf]+ std_val_upper)
std_g = gmean(np.sqrt(std_concat))

In [10]:
ds_example = xr.open_zarr(base_dir+'all_in_one/ERA5_plevel_1deg_6h_1979_conserve.zarr')
level = np.array(ds_example['level'])

In [11]:
# ------------------------------------------------------- #
# create xr.DataArray for std
ds_std_6h = xr.Dataset(coords={"level": level})

for varname, data in STD_values.items():
    data = np.sqrt(data) / std_g # <--- var to std and divided by std_g
    if len(data.shape) == 1:
        data_array = xr.DataArray(
            data,
            dims=["level",],
            coords={"level": level},
            name=varname,
        )
        ds_std_6h[varname] = data_array
    else:
        data_array = xr.DataArray(
            data,
            name=varname,
        )
        ds_std_6h[varname] = data_array

In [12]:
ds_std_6h.to_netcdf(base_dir+'mean_std/residual_6h_1979_2019_conserve_1deg.nc')

In [14]:
# ------------------------------------------------------- #
# Compare with my old ones
std_conserve = xr.open_dataset(base_dir+'mean_std/residual_6h_1979_2019_conserve_1deg.nc')
std_bilinear = xr.open_dataset(base_dir+'mean_std/residual_6h_1979_2019_bilinear_1deg.nc')

for varname in list(std_conserve.keys()):
    print('=============== {} ================='.format(varname))
    print(np.array(std_conserve[varname]))
    print(np.array(std_bilinear[varname]))

0.9020328939906355
0.8889482229414569
2.756990226781677
2.7338782622675653
6.285933663515951
6.097818137560312
0.5812322200302392
0.5771408616318606
3.1111771870427525
3.082331817881059
6.302868904142561
6.115183631423283
5.077296641030351
4.921228881376689
0.9710019153823931
0.947887088931082
3.451359736171103
3.6788048004145764
4.418252724674436
4.625040749189226
1.8856349420386707
1.9081347668682982
2.752625456520836
2.729696891949517
6.2479562311557375
6.060059077019095
[1.10379351 0.9926741  0.91675773 0.8195625  0.7896874  0.73181563
 0.69538035 0.72879487 0.84394335 0.94066804 0.96333641 0.94496085
 0.95068783 1.01350351 1.13286701 1.28118643 1.41535395 1.60070366
 1.67241509 1.66548059 1.62708905 1.58512589 1.55642738 1.53902578
 1.54016888 1.56208207 1.60793256 1.63801339 1.67344779 1.7131567
 1.75353075 1.79639599 1.84374096 1.88800635 1.90963237 1.89210613
 1.85767561]
[1.07412955 0.96714283 0.89446095 0.8002246  0.7720293  0.7161376
 0.68204999 0.71602873 0.83159014 0.92982

In [21]:
# ------------------------------------------------------- #
# Compare with my old ones
new_std = xr.open_dataset('/glade/derecho/scratch/ksha/CREDIT_data/ERA5_plevel_1deg/mean_std/residual_6h_1979_2019_full_1deg.nc')

for varname in list(new_std.keys()):
    print('=============== {} ================='.format(varname))
    print(np.array(new_std[varname]))

2.624155534902845
2.963156138014652
5.862051300897985
4.44621756977132
0.85457781277974
5.878745373038826
2.628175236172955
1.8343583950970768
0.5548262120004838
3.536567011270057
0.9112378587590986
5.825752161603732
4.730953845342623
[1.03259926 0.9297491  0.85987739 0.76928461 0.74217946 0.68844876
 0.65567912 0.68834411 0.79943743 0.89386972 0.91926509 0.90307095
 0.90904464 0.96740824 1.07955553 1.22024454 1.34835542 1.52910728
 1.60346896 1.60110295 1.5670054  1.52871495 1.50382798 1.48949051
 1.49454591 1.51930797 1.5670986  1.59765022 1.63321122 1.67236384
 1.71087802 1.75005903 1.79291379 1.83467147 1.8565739  1.84238302
 1.8133912 ]
[3.051173   2.61485035 2.33264689 2.04071048 1.95104031 1.80858624
 1.7635113  1.81439941 1.92224485 1.9511978  1.84078104 1.75870506
 1.72329362 1.76788551 1.88872464 2.04085552 2.17430738 2.36250987
 2.44921065 2.4577648  2.42661696 2.38074107 2.34196043 2.30953538
 2.29564204 2.31714499 2.36234664 2.39134694 2.42675879 2.46684371
 2.50545559 2.5