# Create z-score files for CREDIT

In [None]:
import os
import yaml
import numpy as np
import xarray as xr

## File creation

### 6 hourly mean std files

In [None]:
# get variable information from data_preprocessing/config
config_name = os.path.realpath('data_config_6h.yml')

with open(config_name, 'r') as stream:
    conf = yaml.safe_load(stream)

In [None]:
# get variable names
varnames = list(conf['zscore'].keys())
varnames = varnames[:-3] # remove save_loc and others

# collect computed mean and variance values
# See "qsub_STEP01_compute_mean_std.ipynb"
MEAN_values = {}
STD_values = {}
for varname in varnames:
    save_name = conf['zscore']['save_loc'] + '{}_mean_std_{}.npy'.format(conf['zscore']['prefix'], varname)
    mean_std = np.load(save_name)
    if len(mean_std.shape) == 2:
        MEAN_values[varname] = mean_std[0, :]
        STD_values[varname] = mean_std[1, :]
    elif len(mean_std.shape) == 1:
        MEAN_values[varname] = mean_std[0]
        STD_values[varname] = mean_std[1]
    else:
        raise

In [None]:
# ------------------------------------------------------- #
# create xr.DataArray for mean

# Initialize level coord
level = np.array([ 10,  30,  40,  50,  60,  70,  80,  90,  95, 100, 105, 110, 120, 130, 136])

# Initialize dataset
ds_mean_6h = xr.Dataset(coords={"level": level})

for varname, data in MEAN_values.items():
    if len(data.shape) == 1:
        data_array = xr.DataArray(
            data,
            dims=["level",],
            coords={"level": level},
            name=varname,
        )
        ds_mean_6h[varname] = data_array
    else:
        data_array = xr.DataArray(
            data,
            name=varname,
        )
        ds_mean_6h[varname] = data_array

In [None]:
# ------------------------------------------------------- #
# create xr.DataArray for std

# use the same level coord as mean
ds_std_6h = xr.Dataset(coords={"level": level})

for varname, data in STD_values.items():
    data = np.sqrt(data)
    if len(data.shape) == 1:
        data_array = xr.DataArray(
            data,
            dims=["level",],
            coords={"level": level},
            name=varname,
        )
        ds_std_6h[varname] = data_array
    else:
        data_array = xr.DataArray(
            data,
            name=varname,
        )
        ds_std_6h[varname] = data_array

In [None]:
# ------------------------------------------------------- #
# Same to netCDF
# ds_mean_6h.to_netcdf('/glade/campaign/cisl/aiml/ksha/CREDIT/mean_6h_0.25deg.nc')
# ds_std_6h.to_netcdf('/glade/campaign/cisl/aiml/ksha/CREDIT/std_6h_0.25deg.nc')

In [None]:
# ------------------------------------------------------- #
# Compare with my old ones
# TEST_std = xr.open_dataset('/glade/campaign/cisl/aiml/ksha/CREDIT/ERA5_std_6h.nc')
# NEW_std = xr.open_dataset('/glade/campaign/cisl/aiml/ksha/CREDIT/std_6h_0.25deg.nc')

# for varname in varnames:
#     print('=============== {} ================='.format(varname))
#     print(np.array(TEST_std[varname]))
#     print(np.array(NEW_std[varname]))

## Add new variables to existing files 

In [None]:
# load old files
old_mean = '/glade/derecho/scratch/dgagne/credit_scalers/All_1979-2014_staged.mean.Lev.SLO.nc'
old_std = '/glade/derecho/scratch/dgagne/credit_scalers/All_1979-2014_staged.std.Lev.SLO.nc'

# open old files
xr_old_mean = xr.open_dataset(old_mean)
xr_old_std = xr.open_dataset(old_std)

# create new files from old + assign varialbe
xr_new_mean = xr_old_mean.assign(tsi=6430203.22586827)
xr_new_std = xr_old_std.assign(tsi=3972698.08699589)

try:
    xr_new_mean = xr_new_mean.rename({'levels': 'level'})
    xr_new_std = xr_new_std.rename({'levels': 'level'})
except:
    print('Coordinate name good')

# save
# xr_new_mean.to_netcdf('/glade/campaign/cisl/aiml/ksha/CREDIT/mean_6h_0.25deg.nc')
# xr_new_std.to_netcdf('/glade/campaign/cisl/aiml/ksha/CREDIT/std_6h_0.25deg.nc')

## Change values for existing variables

In [None]:
old_mean = '/glade/campaign/cisl/aiml/ksha/CREDIT/EXTEND/All_2010_staged.mean.Lev.SLO.nc'
old_std = '/glade/campaign/cisl/aiml/ksha/CREDIT/EXTEND/All_2010_staged.std.Lev.SLO.nc'

xr_old_mean = xr.open_dataset(old_mean)
xr_old_std = xr.open_dataset(old_std)

In [None]:
# copy old to new
xr_new_mean = xr_old_mean.copy()
xr_new_std = xr_old_std.copy()

In [None]:
varnames = list(xr_old_mean.keys())

for var in varnames:
    xr_new_mean[var] = xr_old_mean[var] - xr_old_mean[var]
    xr_new_std[var] = xr_old_std[var] / xr_old_std[var]

In [None]:
xr_new_mean.to_netcdf('/glade/campaign/cisl/aiml/ksha/CREDIT/mean_zero.nc')
xr_new_std.to_netcdf('/glade/campaign/cisl/aiml/ksha/CREDIT/std_one.nc')