In [1]:
import sys
from glob import glob

import time
import h5py
import zarr
import numpy as np
import pandas as pd

In [2]:
sys.path.insert(0, '/glade/u/home/ksha/NCAR/')
sys.path.insert(0, '/glade/u/home/ksha/NCAR/libs/')

from namelist import *
import data_utils as du

In [3]:
import matplotlib.pyplot as plt
%matplotlib inline

In [4]:
from datetime import datetime, timedelta

In [5]:
import subprocess

In [6]:
lead = 9

### Import HRRR v3

In [7]:
start_time = time.time()

HRRRv3_lead = zarr.load(save_dir_scratch+'HRRR_{:02}_v3.zarr'.format(lead))

print("--- %s seconds ---" % (time.time() - start_time))

--- 1815.733027935028 seconds ---


In [9]:
with h5py.File(save_dir_scratch+'SPC_to_lead{}.hdf'.format(lead), 'r') as h5io:
    record_v3 = h5io['record_v3'][...]

In [36]:
with h5py.File(save_dir+'HRRR_domain.hdf', 'r') as h5io:
    lon_3km = h5io['lon_3km'][...]
    lat_3km = h5io['lat_3km'][...]
    lon_72km = h5io['lon_72km'][...]
    lat_72km = h5io['lat_72km'][...]

In [56]:
batch_dir_neg = '/glade/scratch/ksha/DATA/NCAR_batch_neg/'

features_pick = [10, 13, 18, 19, 20]

mean_dp = 280
std_dp = 12

mean_srh = 41
std_srh = 80

mean_ushear = 3
std_ushear = 5

mean_vshear = 0.2
std_vshear = 5

grid_shape = record_v3.shape

L_vars = len(features_pick)

target_size = 24
input_size = 128

half_margin = int((input_size - target_size) / 2)

grid_shape_input = (1059, 1799)

prefix_train = 'TRAIN_neg_{}_mag{}_lead'+str(lead)+'.npy'
prefix_valid = 'VALID_neg_{}_mag{}_lead'+str(lead)+'.npy'

L_train = np.min([675, grid_shape[0]])
L_valid = grid_shape[0]-L_train

count = 0
out_slice = np.empty((1, input_size, input_size, L_vars))

count = 0

for i in range(grid_shape[0]-1):

    if i < L_train:
        prefix = prefix_train
    else:
        prefix = prefix_valid

    lon_temp = record_v3[i, 0]
    lat_temp = record_v3[i, 1]
    mag_temp = record_v3[i, 2]

    flag_obs = lon_temp + lat_temp

    if np.logical_not(np.isnan(flag_obs)):

        k = i + 1

        lon_temp_neg = record_v3[k, 0]
        lat_temp_neg = record_v3[k, 1]

        flag_obs_neg = lon_temp_neg + lat_temp_neg
        
        indx_3km, indy_3km = du.grid_search(lon_3km, lat_3km, np.array(lon_temp)[None], np.array(lat_temp)[None])
        indx_3km = indx_3km[0]
        indy_3km = indy_3km[0]
        
        while np.isnan(flag_obs_neg) and k < grid_shape[0]:
            #print(k)
            
            for augx in range(0, target_size, 4):
                for augy in range(0, target_size, 4):

                    x_margin_left = augx
                    x_margin_right = target_size - augx

                    y_margin_bottom = augy
                    y_margin_top = target_size - augy

                    x_edge_left = indx_3km - x_margin_left - half_margin
                    x_edge_right = indx_3km + x_margin_right + half_margin

                    y_edge_bottom = indy_3km - y_margin_bottom - half_margin
                    y_edge_top = indy_3km + y_margin_top + half_margin
                    

                    if x_edge_left >= 0 and y_edge_bottom >= 0 and x_edge_right <= grid_shape_input[0] and y_edge_top <= grid_shape_input[1]:
                        for v, ind_var in enumerate(features_pick):
                            out_slice[..., v] = HRRRv3_lead[k, x_edge_left:x_edge_right, y_edge_bottom:y_edge_top, ind_var]

                        # ----- Normalization ----- #
                        out_slice[..., 0] = (out_slice[..., 0] - mean_dp) / std_dp
                        out_slice[..., 1] = np.log(out_slice[..., 1]+1)
                        out_slice[..., 2] = (out_slice[..., 2] - mean_srh) / std_srh
                        out_slice[..., 3] = (out_slice[..., 3] - mean_ushear) / std_ushear
                        out_slice[..., 4] = (out_slice[..., 4] - mean_vshear) / std_vshear
                        # ------------------------- #

                        save_name = batch_dir_neg+prefix.format(count, int(mag_temp))
                        print(save_name)
                        np.save(save_name, out_slice)

                        count += 1

            k += 1
            if k < grid_shape[0]:
                lon_temp_neg = record_v3[k, 0]
                lat_temp_neg = record_v3[k, 1]

                flag_obs_neg = lon_temp_neg + lat_temp_neg
            else:
                flag_obs_neg = 999



In [59]:
# count = 0

# for i in range(grid_shape[0]-1):

#     if i < L_train:
#         prefix = prefix_train
#     else:
#         prefix = prefix_valid

#     lon_temp = record_v3[i, 0]
#     lat_temp = record_v3[i, 1]
#     mag_temp = record_v3[i, 2]

#     flag_obs = lon_temp + lat_temp

#     if np.logical_not(np.isnan(flag_obs)):

#         k = i + 1

#         lon_temp_neg = record_v3[k, 0]
#         lat_temp_neg = record_v3[k, 1]

#         flag_obs_neg = lon_temp_neg + lat_temp_neg
        
#         indx_3km, indy_3km = du.grid_search(lon_3km, lat_3km, np.array(lon_temp)[None], np.array(lat_temp)[None])
#         indx_3km = indx_3km[0]
#         indy_3km = indy_3km[0]
        
#         while np.isnan(flag_obs_neg) and k < grid_shape[0]:
#             #print(k)
            
#             for augx in range(0, target_size, 4):
#                 for augy in range(0, target_size, 4):

#                     x_margin_left = augx
#                     x_margin_right = target_size - augx

#                     y_margin_bottom = augy
#                     y_margin_top = target_size - augy

#                     x_edge_left = indx_3km - x_margin_left - half_margin
#                     x_edge_right = indx_3km + x_margin_right + half_margin

#                     y_edge_bottom = indy_3km - y_margin_bottom - half_margin
#                     y_edge_top = indy_3km + y_margin_top + half_margin
                    

#                     if x_edge_left >= 0 and y_edge_bottom >= 0 and x_edge_right <= grid_shape_input[0] and y_edge_top <= grid_shape_input[1]:
#                         for v, ind_var in enumerate(features_pick):
#                             out_slice[..., v] = HRRRv3_lead[k, x_edge_left:x_edge_right, y_edge_bottom:y_edge_top, ind_var]

#                         # ----- Normalization ----- #
#                         out_slice[..., 0] = (out_slice[..., 0] - mean_dp) / std_dp
#                         out_slice[..., 1] = np.log(out_slice[..., 1]+1)
#                         out_slice[..., 2] = (out_slice[..., 2] - mean_srh) / std_srh
#                         out_slice[..., 3] = (out_slice[..., 3] - mean_ushear) / std_ushear
#                         out_slice[..., 4] = (out_slice[..., 4] - mean_vshear) / std_vshear
#                         # ------------------------- #

#                         save_name = batch_dir_neg+prefix.format(count, int(mag_temp))
#                         print(save_name)
#                         np.save(save_name, out_slice)

#                         count += 1

#             k += 1
#             if k < grid_shape[0]:
#                 lon_temp_neg = record_v3[k, 0]
#                 lat_temp_neg = record_v3[k, 1]

#                 flag_obs_neg = lon_temp_neg + lat_temp_neg
#             else:
#                 flag_obs_neg = 999


In [34]:
k < grid_shape[0]

True

### Import tornado observations

In [16]:
with h5py.File(save_dir_scratch+'SPC_to_lead{}.hdf'.format(lead), 'r') as h5io:
    record_v3 = h5io['record_v3'][...]

In [9]:
HRRRv3_lead.shape

(872, 1059, 1799, 23)

### Import domain info

In [52]:
with h5py.File(save_dir+'HRRR_domain.hdf', 'r') as h5io:
    lon_3km = h5io['lon_3km'][...]
    lat_3km = h5io['lat_3km'][...]
    lon_72km = h5io['lon_72km'][...]
    lat_72km = h5io['lat_72km'][...]

### Predictors

In [88]:
names = [
    'Maximum/Composite radar reflectivity:dB (instant):lambert:atmosphere:level 0',
    'MSLP (MAPS System Reduction):Pa (instant):lambert:meanSea:level 0',
    '1016:198:198 (max):lambert:heightAboveGround:level 1000 m',
    '1018:199:199 (max):lambert:heightAboveGroundLayer:levels 5000-2000 m',
    '1020:199:199 (max):lambert:heightAboveGroundLayer:levels 2000-0 m',
    '1022:199:199 (max):lambert:heightAboveGroundLayer:levels 3000-0 m',
    'Vorticity (relative):s**-1 (max):lambert:heightAboveGroundLayer:levels 2000-0 m',
    'Vorticity (relative):s**-1 (max):lambert:heightAboveGroundLayer:levels 1000-0 m',
    '1028:74:74 (max):lambert:atmosphereSingleLayer:level 0 considered as a single layer',
    '2 metre temperature:K (instant):lambert:heightAboveGround:level 2 m',
    '2 metre dewpoint temperature:K (instant):lambert:heightAboveGround:level 2 m',
    '10 metre U wind component:m s**-1 (instant):lambert:heightAboveGround:level 10 m',
    '10 metre V wind component:m s**-1 (instant):lambert:heightAboveGround:level 10 m',
    '10 metre wind speed:m s**-1 (max):lambert:heightAboveGround:level 10 m',
    'Total Precipitation:kg m**-2 (accum):lambert:surface:level 0',
    'Convective available potential energy:J kg**-1 (instant):lambert:surface:level 0',
    'Convective inhibition:J kg**-1 (instant):lambert:surface:level 0',
    'Storm relative helicity:J kg**-1 (instant):lambert:heightAboveGroundLayer:levels 3000-0 m',
    'Storm relative helicity:J kg**-1 (instant):lambert:heightAboveGroundLayer:levels 1000-0 m',
    'Vertical u-component shear:s**-1 (instant):lambert:heightAboveGroundLayer:levels 0-1000 m',
    'Vertical v-component shear:s**-1 (instant):lambert:heightAboveGroundLayer:levels 0-1000 m',
    'Vertical u-component shear:s**-1 (instant):lambert:heightAboveGroundLayer:levels 0-6000 m',
    'Vertical v-component shear:s**-1 (instant):lambert:heightAboveGroundLayer:levels 0-6000 m'
]

In [89]:
features_pick = [10, 13, 18, 19, 20]

print("the following predictors are selected:\n")

for ind in features_pick:
    print('\t'+names[ind])

the following predictors are selected:

	2 metre dewpoint temperature:K (instant):lambert:heightAboveGround:level 2 m
	10 metre wind speed:m s**-1 (max):lambert:heightAboveGround:level 10 m
	Storm relative helicity:J kg**-1 (instant):lambert:heightAboveGroundLayer:levels 1000-0 m
	Vertical u-component shear:s**-1 (instant):lambert:heightAboveGroundLayer:levels 0-1000 m
	Vertical v-component shear:s**-1 (instant):lambert:heightAboveGroundLayer:levels 0-1000 m


In [90]:
mean_dp = 280
std_dp = 12

mean_srh = 41
std_srh = 80

mean_ushear = 3
std_ushear = 5

mean_vshear = 0.2
std_vshear = 5


In [91]:
grid_shape = record_v3.shape
L_max = int(grid_shape[-1]/3)*grid_shape[0]

L_vars = len(features_pick)

In [92]:
target_size = 24
input_size = 128

half_margin = int((input_size - target_size) / 2)

grid_shape_input = (1059, 1799)

In [115]:
batch_dir = '/glade/scratch/ksha/DATA/NCAR_batch/'
prefix_train = 'TRAIN_pos_{}_mag{}_lead'+str(lead)+'.npy'
prefix_train = 'VALID_pos_{}_mag{}_lead'+str(lead)+'.npy'

In [119]:
L_train = 800
L_valid = 72

In [123]:
np.min([800, grid_shape[0]])

800