# Create z-score files for CREDIT

In [1]:
import os
import yaml
import numpy as np
import xarray as xr

## File creation

In [2]:
# get variable information from data_preprocessing/config
config_name = os.path.realpath('data_config_6h.yml')

with open(config_name, 'r') as stream:
    conf = yaml.safe_load(stream)

In [3]:
N_levels = 37
base_dir = '/glade/derecho/scratch/ksha/CREDIT_data/ERA5_plevel_1deg/'

In [4]:
# get variable names
varnames = list(conf['zscore'].keys())
varnames = varnames[:-3] # remove save_loc and others

varname_surf = list(set(varnames) - set(['U', 'V', 'T', 'Q', 'Z', 'specific_total_water']))
varname_upper = ['U', 'V', 'T', 'Q', 'Z', 'specific_total_water']

# collect computed mean and variance values
# See "qsub_STEP01_compute_mean_std.ipynb"
MEAN_values = {}
STD_values = {}

for varname in varname_surf:
    save_name = conf['zscore']['save_loc'] + '{}_mean_std_{}.npy'.format(conf['zscore']['prefix'], varname)
    mean_std = np.load(save_name)
    MEAN_values[varname] = mean_std[0]
    STD_values[varname] = mean_std[1]

for varname in varname_upper:

    # -------------------------------------------- #
    # allocate all levels
    mean_std_all_levels = np.empty((2, N_levels))
    mean_std_all_levels[...] = np.nan
    
    for i_level in range(N_levels):
        save_name = conf['zscore']['save_loc'] + '{}_level{}_mean_std_{}.npy'.format(conf['zscore']['prefix'], i_level, varname)
        mean_std = np.load(save_name)
        mean_std_all_levels[:, i_level] = mean_std

    # -------------------------------------------- #
    # save
    MEAN_values[varname] = np.copy(mean_std_all_levels[0, :])
    STD_values[varname] = np.copy(mean_std_all_levels[1, :])

In [5]:
ds_example = xr.open_zarr(base_dir+'all_in_one/ERA5_plevel_1deg_6h_1979_conserve.zarr')

In [6]:
# ------------------------------------------------------- #
# create xr.DataArray for mean

# Initialize level coord
level = np.array(ds_example['level'])

# Initialize dataset
ds_mean_6h = xr.Dataset(coords={"level": level})

for varname, data in MEAN_values.items():
    if len(data.shape) == 1:
        data_array = xr.DataArray(
            data,
            dims=["level",],
            coords={"level": level},
            name=varname,
        )
        ds_mean_6h[varname] = data_array
    else:
        data_array = xr.DataArray(
            data,
            name=varname,
        )
        ds_mean_6h[varname] = data_array

In [7]:
# ------------------------------------------------------- #
# create xr.DataArray for std

# use the same level coord as mean
ds_std_6h = xr.Dataset(coords={"level": level})

for varname, data in STD_values.items():
    data = np.sqrt(data)
    if len(data.shape) == 1:
        data_array = xr.DataArray(
            data,
            dims=["level",],
            coords={"level": level},
            name=varname,
        )
        ds_std_6h[varname] = data_array
    else:
        data_array = xr.DataArray(
            data,
            name=varname,
        )
        ds_std_6h[varname] = data_array

In [8]:
ds_mean_6h['land_sea_CI_mask'] = 0.0
# ds_std_6h['land_sea_CI_mask'] = 1.0

In [9]:
# ------------------------------------------------------- #
# Same to netCDF
ds_mean_6h.to_netcdf(base_dir+'mean_std/mean_6h_1979_2019_conserve_1deg.nc')
ds_std_6h.to_netcdf(base_dir+'mean_std/std_6h_1979_2019_conserve_1deg.nc')

In [11]:
# ------------------------------------------------------- #
# Compare with my old ones
STD_conserve = xr.open_dataset(base_dir+'mean_std/std_6h_1979_2019_conserve_1deg.nc')
STD_bilinear = xr.open_dataset(base_dir+'mean_std/std_6h_1979_2019_bilinear_1deg.nc')

for varname in varnames:
    print('=============== {} ================='.format(varname))
    print(np.array(STD_conserve[varname]))
    print(np.array(STD_bilinear[varname]))

1330.8846696194373
1332.1479128381566
22.404838153788788
22.447001334275868
9597.442621657625
9656.6245797302
0.34918409441572806
0.3658749665813759
5.487526636552918
5.52748270314236
4.71086288032973
4.744373030500121
21.373759417572977
21.40206776071968
0.0006418363022945283
0.0006502217778641388
1604157.7576941664
1625160.551704857
4214109.662542443
4226444.322152559
666310.6177924678
677867.5588998266
917596.9043234516
942432.9995989761
7716263.343614088
7716422.875064548
5830393.034174383
5838101.391376633
1048153.7804762697
1054880.054276175
0.0017714241278640804
0.001961351589872363
0.6015238294544372
0.6121059612021218
[1.47448629e-07 2.18486793e-07 2.69306473e-07 3.18866754e-07
 3.39629482e-07 3.44408546e-07 3.02537166e-07 3.06876374e-07
 3.60947701e-07 4.06882702e-07 5.69227151e-07 1.18649061e-06
 3.76527269e-06 1.00445285e-05 2.24325240e-05 4.32211514e-05
 7.34969316e-05 1.66583928e-04 3.09963038e-04 5.04110787e-04
 7.55147609e-04 1.07678579e-03 1.45225272e-03 1.76289549e-03