In [1]:
import sys
import h5py
from glob import glob
from datetime import datetime, timedelta

import pandas as pd
import numpy as np
import netCDF4 as nc

In [2]:
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
sys.path.insert(0, '/glade/u/home/ksha/NCAR/')
sys.path.insert(0, '/glade/u/home/ksha/NCAR/libs/')

from namelist import *
import data_utils as du
import graph_utils as gu

In [4]:
base_dir = '/glade/p/mmm/parc/sobash/NSC/3KM_WRF_POST_12sec_ts/'


**Get NCAR-500 dates**

In [5]:
date_list = []

nc_files = sorted(glob(base_dir+'*/*00_00_00.nc'))
for i, name in enumerate(nc_files):
    date_list.append(datetime.strptime(name[72:], '%Y-%m-%d_%H_00_00.nc'))

In [6]:
date_list[1]

datetime.datetime(2010, 10, 25, 0, 0)

**Get local reports on the corresponded dates**

In [7]:
from scipy.spatial import cKDTree

In [8]:
with h5py.File(save_dir+'HRRR_domain.hdf', 'r') as h5io:
    lon_72km = h5io['lon_80km'][...]
    lat_72km = h5io['lat_80km'][...]
    lon_3km = h5io['lon_3km'][...]
    lat_3km = h5io['lat_3km'][...]

In [9]:
gridTree = cKDTree(list(zip(lon_72km.ravel(), lat_72km.ravel()))) #KDTree_wraper(xgrid, ygrid)
grid_shape = lon_72km.shape

In [10]:
base_day = date_list[0]

In [11]:
preserve_inds = [4, 5, 6, 15, 16]
column_names = ['date', 'time', 'tz', 'slat', 'slon']

# ['date', 'time', 'tz', 'slat', 'slon']

L_v3 = len(date_list)
#
for lead in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]:
    
    torn_grid_v3 = np.empty((L_v3,)+lon_72km.shape+(3,))

    print('========== Processing lead time = {} =========='.format(lead))

    for y in range(2010, 2022):
        
        #temp_day_old = 9999
        
        # Year info
        year_int = int(y)
        year = str(year_int)

        # Raw tornado files
        file_torn = sorted(glob(report_dir+'{}_torn.csv'.format(year)))[0]
        file_wind = sorted(glob(report_dir+'{}_wind.csv'.format(year)))[0]
        file_hail = sorted(glob(report_dir+'{}_hail.csv'.format(year)))[0]
        
        # import csv to pandas and then np.array
        df_torn = pd.read_csv(file_torn)
        df_torn = df_torn.iloc[:, preserve_inds]
        df_torn.columns = column_names
        
        df_wind = pd.read_csv(file_wind)
        df_wind = df_wind.iloc[:, preserve_inds]
        df_wind.columns = column_names
        
        df_hail = pd.read_csv(file_hail)
        df_hail = df_hail.iloc[:, preserve_inds]
        df_hail.columns = column_names
        
        data_frames = [df_torn, df_wind, df_hail]
        #df_merged = reduce(lambda  left,right: pd.merge(left, right, how='outer'), data_frames)
        
        for c, df in enumerate(data_frames):
        
            temp_array = df.values

            # datetime and timezone processing
            L = len(temp_array)
            temp_tz = temp_array[:, 2]
            temp_dt_list = []
            flag_badboy = False

            for i in range(L):
                try:
                    # the string can be converted to datetime object
                    temp_localtime = datetime.strptime(temp_array[i, 0]+'|'+temp_array[i, 1], '%Y-%m-%d|%H:%M:%S')
                    flag_badboy = False
                except:
                    # the string cannot be converted; typically a "?"
                    temp_localtime = np.nan
                    flag_badboy = True

                # adjust timezones to UTC/GMT 
                if flag_badboy is False:
                    temp_tz = temp_array[i, 2]
                    if temp_tz == 3:
                        temp_localtime = temp_localtime + timedelta(hours=6) # <--- !!! "3" means CST
                    elif temp_tz == 9:
                        temp_localtime = temp_localtime # "9" means GMT
                    else:
                        temp_localtime = np.nan # otherwise doint know

                temp_dt_list.append(temp_localtime)

            # Insert in-situ reports into hourly, gridded data frames    
            ## convert slat slon to domain indices

            slon = temp_array[:, 4]
            slat = temp_array[:, 3]

            flag_pick = np.logical_and(slon<-20, slat>5)

            slon = slon[flag_pick]
            slat = slat[flag_pick]

            L = len(slon)
        
            if L > 0:
            
                dist, indexes = gridTree.query(list(zip(np.array(slon), np.array(slat))))
                indx, indy = np.unravel_index(indexes, grid_shape)
            
                for i in range(L):

                    # the time of a single record
                    temp_datetime = temp_dt_list[i]
                    temp_day = temp_datetime.day
                    temp_hour = temp_datetime.hour

                    temp_datetime_day = datetime(temp_datetime.year, temp_datetime.month, temp_day)

                    # if (temp_day_old == temp_day) is False:
                    #     count_v3 = 0
                    #     count_v4 = 0
                    #     temp_day_old = temp_day

                    if temp_hour == lead:
                        
                        for dt_ind, dt_model in enumerate(date_list):
                            if temp_datetime_day == dt_model:
                                torn_grid_v3[dt_ind, indx[i], indy[i], c] = 1.0
                                continue;

    tuple_save = (torn_grid_v3, )
    label_save = ['record_v3',]
    du.save_hdf5(tuple_save, label_save, save_dir_scratch, 'SPC_x_NCAR500_to_lead{}_72km_all.hdf'.format(lead))

Save to /glade/scratch/ksha/DRIVE/SPC_x_NCAR500_to_lead0_72km_all.hdf
Save to /glade/scratch/ksha/DRIVE/SPC_x_NCAR500_to_lead1_72km_all.hdf
Save to /glade/scratch/ksha/DRIVE/SPC_x_NCAR500_to_lead2_72km_all.hdf
Save to /glade/scratch/ksha/DRIVE/SPC_x_NCAR500_to_lead3_72km_all.hdf
Save to /glade/scratch/ksha/DRIVE/SPC_x_NCAR500_to_lead4_72km_all.hdf
Save to /glade/scratch/ksha/DRIVE/SPC_x_NCAR500_to_lead5_72km_all.hdf
Save to /glade/scratch/ksha/DRIVE/SPC_x_NCAR500_to_lead6_72km_all.hdf
Save to /glade/scratch/ksha/DRIVE/SPC_x_NCAR500_to_lead7_72km_all.hdf
Save to /glade/scratch/ksha/DRIVE/SPC_x_NCAR500_to_lead8_72km_all.hdf
Save to /glade/scratch/ksha/DRIVE/SPC_x_NCAR500_to_lead9_72km_all.hdf
Save to /glade/scratch/ksha/DRIVE/SPC_x_NCAR500_to_lead10_72km_all.hdf
Save to /glade/scratch/ksha/DRIVE/SPC_x_NCAR500_to_lead11_72km_all.hdf
Save to /glade/scratch/ksha/DRIVE/SPC_x_NCAR500_to_lead12_72km_all.hdf
Save to /glade/scratch/ksha/DRIVE/SPC_x_NCAR500_to_lead13_72km_all.hdf
Save to /glade/s

In [26]:
with nc.Dataset(nc_files[0], 'r') as ncio:
    XLAT = np.array(ncio['XLAT'][0, ...])
    XLON = np.array(ncio['XLONG'][0, ...])

In [16]:
with h5py.File(save_dir+'HRRR_domain.hdf', 'r') as h5io:
    lon_3km = h5io['lon_3km'][...]
    lat_3km = h5io['lat_3km'][...]
    land_mask_3km = h5io['land_mask_3km'][...]

In [37]:
land_mask_ncar = du.interp2d_wraper(lon_3km, lat_3km, land_mask_3km, XLON, XLAT, method='linear')

In [38]:
land_mask_ncar = land_mask_ncar > 0.5

In [41]:
# save_dir = '/glade/work/ksha/NCAR/'

# tuple_save = (XLON, XLAT, land_mask_ncar)

# label_save = ['lon_ncar', 'lat_ncar', 'land_mask_ncar']

# du.save_hdf5(tuple_save, label_save, save_dir, 'NCAR_domain.hdf')

In [18]:
with h5py.File(save_dir+'HRRR_domain.hdf', 'r') as h5io:
    lon_3km = h5io['lon_3km'][...]
    lat_3km = h5io['lat_3km'][...]
    lon_80km = h5io['lon_80km'][...]
    lat_80km = h5io['lat_80km'][...]
    land_mask_80km = h5io['land_mask_80km'][...]
    land_mask_3km = h5io['land_mask_3km'][...]

In [22]:
var_list = ['REFL_COM', 'PSFC', 'UP_HELI_MAX', 'UP_HELI_MAX03', 'GRPL_MAX', 'T2', 
            'TD2', 'WSPD10MAX', 'PREC_ACC_NC', 'SBCAPE', 'SBCINH', 'SRH03', 'SRH01', 'USHR6', 'VSHR6']

In [23]:
dict_temp = {}

with nc.Dataset(nc_files[0], 'r') as ncio:
    for i, var in enumerate(var_list):
        dict_temp[var] = ncio[var][0, ...]
