# Create z-score files for CREDIT

In [1]:
import os
import yaml
import numpy as np
import xarray as xr

## File creation

### 6 hourly mean std files

In [2]:
# get variable information from data_preprocessing/config
config_name = os.path.realpath('data_config_6h.yml')

with open(config_name, 'r') as stream:
    conf = yaml.safe_load(stream)

In [3]:
N_levels = 16

In [5]:
# get variable names
varnames = list(conf['zscore'].keys())
varnames = varnames[:-3] # remove save_loc and others

varname_surf = list(set(varnames) - set(['U', 'V', 'T', 'Q']))
varname_upper = ['U', 'V', 'T', 'Q']

# collect computed mean and variance values
# See "qsub_STEP01_compute_mean_std.ipynb"
MEAN_values = {}
STD_values = {}

for varname in varname_surf:
    save_name = conf['zscore']['save_loc'] + '{}_mean_std_{}.npy'.format(conf['zscore']['prefix'], varname)
    mean_std = np.load(save_name)
    MEAN_values[varname] = mean_std[0]
    STD_values[varname] = mean_std[1]

for varname in varname_upper:

    # -------------------------------------------- #
    # allocate all levels
    mean_std_all_levels = np.empty((2, N_levels))
    mean_std_all_levels[...] = np.nan
    
    for i_level in range(N_levels):
        save_name = conf['zscore']['save_loc'] + '{}_level{}_mean_std_{}.npy'.format(conf['zscore']['prefix'], i_level, varname)
        mean_std = np.load(save_name)
        mean_std_all_levels[:, i_level] = mean_std

    # -------------------------------------------- #
    # save
    MEAN_values[varname] = np.copy(mean_std_all_levels[0, :])
    STD_values[varname] = np.copy(mean_std_all_levels[1, :])

In [6]:
# ------------------------------------------------------- #
# create xr.DataArray for mean

# Initialize level coord
level = np.array([ 10,  30,  40,  50,  60,  70,  80,  90,  95, 100, 105, 110, 120, 130, 136, 137])

# Initialize dataset
ds_mean_6h = xr.Dataset(coords={"level": level})

for varname, data in MEAN_values.items():
    if len(data.shape) == 1:
        data_array = xr.DataArray(
            data,
            dims=["level",],
            coords={"level": level},
            name=varname,
        )
        ds_mean_6h[varname] = data_array
    else:
        data_array = xr.DataArray(
            data,
            name=varname,
        )
        ds_mean_6h[varname] = data_array

In [7]:
# ------------------------------------------------------- #
# create xr.DataArray for std

# use the same level coord as mean
ds_std_6h = xr.Dataset(coords={"level": level})

for varname, data in STD_values.items():
    data = np.sqrt(data)
    if len(data.shape) == 1:
        data_array = xr.DataArray(
            data,
            dims=["level",],
            coords={"level": level},
            name=varname,
        )
        ds_std_6h[varname] = data_array
    else:
        data_array = xr.DataArray(
            data,
            name=varname,
        )
        ds_std_6h[varname] = data_array

In [8]:
# ------------------------------------------------------- #
# Same to netCDF
ds_mean_6h.to_netcdf('/glade/campaign/cisl/aiml/ksha/CREDIT/mean_6h_1979_2018_16lev_0.25deg.nc')
ds_std_6h.to_netcdf('/glade/campaign/cisl/aiml/ksha/CREDIT/std_6h_1979_2018_16lev_0.25deg.nc')

In [9]:
# ------------------------------------------------------- #
# Compare with my old ones
TEST_std = xr.open_dataset('/glade/campaign/cisl/aiml/ksha/CREDIT/ERA5_std_6h.nc') # 
NEW_std = xr.open_dataset('/glade/campaign/cisl/aiml/ksha/CREDIT/std_6h_1979_2018_16lev_0.25deg.nc')

for varname in varnames:
    print('=============== {} ================='.format(varname))
    print(np.array(TEST_std[varname]))
    print(np.array(NEW_std[varname]))

[40.25234767 25.24087972 19.68657544 14.46772809 13.46194334 16.57442082
 17.87593007 14.55545107 12.45376369 10.66571396  9.40403755  8.63441766
  8.32003227  7.53983843  6.12131051]
[40.21485329 25.27922629 19.71381648 14.48358737 13.48588523 16.58891951
 17.88966184 14.56381561 12.46334059 10.6776952   9.41726967  8.64687776
  8.33099297  7.54898916  6.12810154  5.51947415]
[14.0958925  10.58697935  8.26052837  6.86788885  7.4285101  10.16234846
 13.44694114 11.43703369  9.63471779  8.14337788  7.13661052  6.58880531
  6.7778161   6.61265801  5.29263089]
[14.10785166 10.69972115  8.34700692  6.91838006  7.45253687 10.17627158
 13.45741855 11.4414309   9.64035721  8.15036388  7.14415171  6.5955258
  6.78472338  6.61929494  5.2975703   4.73462807]
[ 8.35982194 12.55240563 10.72877453 10.48790508 12.60196446  8.17566597
  9.06483897 13.55759473 14.35155307 14.7407884  15.26683939 15.79633956
 16.973469   19.19026079 20.6964735 ]
[ 8.28246086 12.54296629 10.73224169 10.51179137 12.60151

### hourly mean std files

In [2]:
# get variable information from data_preprocessing/config
config_name = os.path.realpath('data_config_1h.yml')

with open(config_name, 'r') as stream:
    conf = yaml.safe_load(stream)

In [3]:
N_levels = 16

In [4]:
# get variable names
varnames = list(conf['zscore'].keys())
varnames = varnames[:-3] # remove save_loc and others

varname_surf = list(set(varnames) - set(['U', 'V', 'T', 'Q']))
varname_upper = ['U', 'V', 'T', 'Q']

# collect computed mean and variance values
# See "qsub_STEP01_compute_mean_std.ipynb"
MEAN_values = {}
STD_values = {}

for varname in varname_surf:
    save_name = conf['zscore']['save_loc'] + '{}_mean_std_{}.npy'.format(conf['zscore']['prefix'], varname)
    mean_std = np.load(save_name)
    MEAN_values[varname] = mean_std[0]
    STD_values[varname] = mean_std[1]

for varname in varname_upper:

    # -------------------------------------------- #
    # allocate all levels
    mean_std_all_levels = np.empty((2, N_levels))
    mean_std_all_levels[...] = np.nan
    
    for i_level in range(N_levels):
        save_name = conf['zscore']['save_loc'] + '{}_level{}_mean_std_{}.npy'.format(conf['zscore']['prefix'], i_level, varname)
        mean_std = np.load(save_name)
        mean_std_all_levels[:, i_level] = mean_std

    # -------------------------------------------- #
    # save
    MEAN_values[varname] = np.copy(mean_std_all_levels[0, :])
    STD_values[varname] = np.copy(mean_std_all_levels[1, :])

In [5]:
# ------------------------------------------------------- #
# create xr.DataArray for mean

# Initialize level coord
level = np.array([ 10,  30,  40,  50,  60,  70,  80,  90,  95, 100, 105, 110, 120, 130, 136, 137])

# Initialize dataset
ds_mean_6h = xr.Dataset(coords={"level": level})

for varname, data in MEAN_values.items():
    if len(data.shape) == 1:
        data_array = xr.DataArray(
            data,
            dims=["level",],
            coords={"level": level},
            name=varname,
        )
        ds_mean_6h[varname] = data_array
    else:
        data_array = xr.DataArray(
            data,
            name=varname,
        )
        ds_mean_6h[varname] = data_array

In [6]:
# ------------------------------------------------------- #
# create xr.DataArray for std

# use the same level coord as mean
ds_std_6h = xr.Dataset(coords={"level": level})

for varname, data in STD_values.items():
    data = np.sqrt(data)
    if len(data.shape) == 1:
        data_array = xr.DataArray(
            data,
            dims=["level",],
            coords={"level": level},
            name=varname,
        )
        ds_std_6h[varname] = data_array
    else:
        data_array = xr.DataArray(
            data,
            name=varname,
        )
        ds_std_6h[varname] = data_array

In [7]:
# ------------------------------------------------------- #
# Same to netCDF
ds_mean_6h.to_netcdf('/glade/campaign/cisl/aiml/ksha/CREDIT/mean_1h_1979_2018_16lev_0.25deg.nc')
ds_std_6h.to_netcdf('/glade/campaign/cisl/aiml/ksha/CREDIT/std_1h_1979_2018_16lev_0.25deg.nc')

In [8]:
# ------------------------------------------------------- #
# Compare with my old ones
TEST_std = xr.open_dataset('/glade/campaign/cisl/aiml/ksha/CREDIT/ERA5_std_1h.nc')
NEW_std = xr.open_dataset('/glade/campaign/cisl/aiml/ksha/CREDIT/std_1h_1979_2018_16lev_0.25deg.nc')

for varname in varnames:
    print('=============== {} ================='.format(varname))
    print(np.array(TEST_std[varname]))
    print(np.array(NEW_std[varname]))

[40.2114419  25.30733029 19.72293431 14.46480427 13.46300897 16.57334659
 17.87322625 14.55331681 12.45124576 10.66328941  9.40176548  8.63292531
  8.32383704  7.54299004  6.12433374]
[40.21049709 25.2798485  19.71400057 14.48428308 13.48577109 16.58890918
 17.88740389 14.56269018 12.46127708 10.6746157   9.41358712  8.64394196
  8.33355144  7.55061439  6.13005239  5.52133267]
[14.11541539 10.66165703  8.3133328   6.8962488   7.43961874 10.1677194
 13.44958957 11.43840746  9.63494636  8.14215549  7.1344495   6.58715016
  6.77985707  6.61466834  5.29455845]
[14.10112359 10.69938842  8.34745048  6.919396    7.45394486 10.17814471
 13.45693799 11.44162342  9.63946818  8.14773921  7.14047788  6.59255576
  6.78514191  6.61942895  5.29818956  4.73515897]
[ 8.32816028 12.54452925 10.72980058 10.49247162 12.60096061  8.17539356
  9.06981504 13.55772756 14.35015711 14.73956924 15.26662261 15.79610131
 16.96981059 19.18755154 20.69553482]
[ 8.28036718 12.54196664 10.73134699 10.51129095 12.60088

## Old blocks

### Add new variables to existing files 

In [60]:
# load old files
old_mean = '/glade/derecho/scratch/dgagne/credit_scalers/All_1979-2014_staged.mean.Lev.SLO.nc'
old_std = '/glade/derecho/scratch/dgagne/credit_scalers/All_1979-2014_staged.std.Lev.SLO.nc'

# open old files
xr_old_mean = xr.open_dataset(old_mean)
xr_old_std = xr.open_dataset(old_std)

# create new files from old + assign varialbe
xr_new_mean = xr_old_mean.assign(tsi=6430203.22586827)
xr_new_std = xr_old_std.assign(tsi=3972698.08699589)

try:
    xr_new_mean = xr_new_mean.rename({'levels': 'level'})
    xr_new_std = xr_new_std.rename({'levels': 'level'})
except:
    print('Coordinate name good')

# save
# xr_new_mean.to_netcdf('/glade/campaign/cisl/aiml/ksha/CREDIT/mean_6h_0.25deg.nc')
# xr_new_std.to_netcdf('/glade/campaign/cisl/aiml/ksha/CREDIT/std_6h_0.25deg.nc')

### Change values for existing variables

In [8]:
old_mean = '/glade/campaign/cisl/aiml/ksha/CREDIT/EXTEND/All_2010_staged.mean.Lev.SLO.nc'
old_std = '/glade/campaign/cisl/aiml/ksha/CREDIT/EXTEND/All_2010_staged.std.Lev.SLO.nc'

xr_old_mean = xr.open_dataset(old_mean)
xr_old_std = xr.open_dataset(old_std)

In [9]:
# copy old to new
xr_new_mean = xr_old_mean.copy()
xr_new_std = xr_old_std.copy()

In [10]:
varnames = list(xr_old_mean.keys())

for var in varnames:
    xr_new_mean[var] = xr_old_mean[var] - xr_old_mean[var]
    xr_new_std[var] = xr_old_std[var] / xr_old_std[var]

In [13]:
xr_new_mean.to_netcdf('/glade/campaign/cisl/aiml/ksha/CREDIT/mean_zero.nc')
xr_new_std.to_netcdf('/glade/campaign/cisl/aiml/ksha/CREDIT/std_one.nc')