# Dynamical data preprocessing for operational forecast

Requirements: Python 2, PyNIO, numpy, scipy, pandas

Steps:
0. Purge the `./dynamical_data` folder. Delete old versions of the output CSV files if needed.
1. Download from [NCEP FNL](https://rda.ucar.edu/datasets/ds083.2/index.html#!description) the following within the 24-hour period you want:   
  - 500 hPa geopotential height (geop)
  - 850 hPa absolute vorticity (vort)
  - 1000, 850, 500, 200 hPa u and v winds (wind)
  - 750-800 and 300-500 hPa relative humidity (humid)
  - surface and 200 hPa temperature (surface and temp)
  - sigma level 0.995 potential temperature (pott)
  - over the area bounded by 53E, 8S and 164W, 55N.
  
2. Extract the downloaded gzip files to obtain GRIB2 files. Place them inside `./dynamical_data`, sort them into different folders, and note their file names.
3. Proceed to the following.

In [1]:
import numpy as np
import pandas as pd
import Nio
import datetime
import os
import math
import csv

def save_dict_to_csv(data, filename):
    '''
    Takes in dictionary and a full filename (with extensions) and writes the dictionary to the specified file as a CSV (no header).
    '''
    with open(filename, 'wb') as f:
        w = csv.writer(f)
        w.writerows(data.items())

Step 4: Provide some information about the TC's wherabouts in lat/lon. Also add the date/time, that will be necessary.

In [2]:
# information to provide
YY, MM, DD, HH = 2021, 10, 11, 18

LAT00, LON00 = 18.8, 120.5
LAT06, LON06 = 18.9, 121.6
LAT12, LON12 = 18.9, 123.1
LAT18, LON18 = 18.5, 124.2
LAT24, LON24 = 18.8, 124.9

lat = [LAT00,LAT06,LAT12,LAT18,LAT24]
lon = [LON00,LON06,LON12,LON18,LON24]
timestamp = datetime.datetime(YY, MM, DD, HH)
delta_fixee = datetime.timedelta(hours=6)
extension = "grib2"

Step 5: Cross your fingers and wait for it to finish running.

In [3]:
# Geopotential height
# bookkeeping
wnpsh_area_indices = dict()
wnpsh_intensity_indices = dict()
wnpsh_extension_indices = dict()
westerly_indices = dict()

# constants
var_name = "HGT_P0_L100_GLL0"
latlon_suffix = 0

data_time = timestamp - datetime.timedelta(hours=24)
while data_time <= timestamp:    
    # generate target file name
    time_string = data_time.strftime("%Y%m%d_%H_%M")
    filename = "./dynamical_data/geop/fnl_{0}.{1}".format(time_string, extension)
    
    geop = Nio.open_file(filename, mode='r')

    wnpsh = geop.variables[var_name]["lat_{0}|10:60 lon_{0}|100:180".format(latlon_suffix)]
    area_index = np.count_nonzero(wnpsh > 5870.0)
    intensity_index = 0 if area_index == 0 else np.average(wnpsh, weights=(wnpsh > 5870.0))    

    everything = geop.variables[var_name].get_value()
    extension_index = 0 if area_index == 0 else min(np.argwhere(np.any(everything > 5870.0, axis=0))) + 53
    extension_index = int(extension_index)

    westerly = geop.variables[var_name]["lat_{0}|35,55 lon_{0}|100:180".format(latlon_suffix)]
    westerly_index = np.average(westerly[0] - westerly[1])
    
    geop.close()
    
    wnpsh_area_indices[time_string] = area_index
    wnpsh_intensity_indices[time_string] = intensity_index
    wnpsh_extension_indices[time_string] = extension_index
    westerly_indices[time_string] = westerly_index

    data_time += delta_fixee
    
print wnpsh_area_indices
print wnpsh_intensity_indices
print wnpsh_extension_indices
print westerly_indices

{'20211011_00_00': 22, '20211011_06_00': 0, '20211011_12_00': 162, '20211010_18_00': 0, '20211011_18_00': 3}
{'20211011_00_00': 5871.817, '20211011_06_00': 0, '20211011_12_00': 5871.3228, '20211010_18_00': 0, '20211011_18_00': 5870.89}
{'20211011_00_00': 53, '20211011_06_00': 0, '20211011_12_00': 53, '20211010_18_00': 0, '20211011_18_00': 53}
{'20211011_00_00': 477.07355, '20211011_06_00': 476.99432, '20211011_12_00': 484.0822, '20211010_18_00': 469.44098, '20211011_18_00': 483.59033}


In [7]:
# winds
hk_u_winds = dict()
hk_v_winds = dict()
easm_indices = dict()

u200_values = dict()
u500_values = dict()
v500_values = dict()
ulvws_values = dict() # Upper-Lower levels Vertical Wind Shear magnitudes
mlvws_values = dict() # Middle-Lower levels Vertical Wind Shear magnitudes

# constants
u_var_name = "UGRD_P0_L100_GLL0"
v_var_name = "VGRD_P0_L100_GLL0"
latlon_suffix = 0

data_time = timestamp - datetime.timedelta(hours=24)
time_step = 0
while data_time <= timestamp: 
    time_string = data_time.strftime("%Y%m%d_%H_%M")
    filename = "./dynamical_data/wind/fnl_{0}.{1}".format(time_string, extension)
    
    wind = Nio.open_file(filename, mode='r')

    # hong kong u wind
    hk_u = wind.variables[u_var_name]["lat_{0}|22.30i lon_{0}|114.17i lv_ISBL0|1000".format(latlon_suffix)]

    # hong kong v wind
    hk_v = wind.variables[v_var_name]["lat_{0}|22.30i lon_{0}|114.17i lv_ISBL0|1000".format(latlon_suffix)]

    # easm index
    u850_1 = wind.variables[u_var_name]["lv_ISBL0|850 lon_{0}|90:130".format(latlon_suffix)][(8+5):(8+15),:]
    u850_2 = wind.variables[u_var_name]["lv_ISBL0|850 lon_{0}|110:140".format(latlon_suffix)][(8+23):(8+33),:]
    easm_idx = np.average(u850_1) - np.average(u850_2)

    wind.close()
    
    # bookkeeping
    hk_u_winds[time_string] = hk_u
    hk_v_winds[time_string] = hk_v
    easm_indices[time_string] = easm_idx
    
    wind = Nio.open_file(filename, mode='r')

    # identify TC center
    center_lat, center_lon = int(round(lat[time_step])), lon[time_step]
    # negative longitudes do not go well with the addressing
    if lon[time_step] > 0:
        center_lon = int(round(lon[time_step]))
    elif int(round(lon[time_step])) == -180:
        center_lon = 180
    else:
        center_lon = int((round(lon[time_step])) + 360) % 180
       
    # U200 
    u200 = wind.variables[u_var_name]["lv_ISBL0|200"][(center_lat-6+8):(center_lat+6+8),(center_lon-6-53):(center_lon+6-53)]
    u200_avg = np.average(u200)
    u200_values[time_string] = u200_avg

    # U500 and V500
    u500 = wind.variables[u_var_name]["lv_ISBL0|500"][(center_lat-6+8):(center_lat+6+8),(center_lon-6-53):(center_lon+6-53)]
    u500_avg = np.average(u500)
    u500_values[time_string] = u500_avg
    v500 = wind.variables[v_var_name]["lv_ISBL0|500"][(center_lat-6+8):(center_lat+6+8),(center_lon-6-53):(center_lon+6-53)]
    v500_avg = np.average(v500)
    v500_values[time_string] = v500_avg
    
    # vertical wind shear
    # get V200, U850 and V850 first
    v200 = wind.variables[v_var_name]["lv_ISBL0|200"][(center_lat-6+8):(center_lat+6+8),(center_lon-6-53):(center_lon+6-53)]
    u850 = wind.variables[u_var_name]["lv_ISBL0|850"][(center_lat-6+8):(center_lat+6+8),(center_lon-6-53):(center_lon+6-53)]
    v850 = wind.variables[v_var_name]["lv_ISBL0|850"][(center_lat-6+8):(center_lat+6+8),(center_lon-6-53):(center_lon+6-53)]
    # calculate averages
    v200_avg = np.average(v200)
    u850_avg = np.average(u850)
    v850_avg = np.average(v850)
    # upper-lower wind shear
    hi_low_shear_u = u200_avg - u850_avg
    hi_low_shear_v = v200_avg - v850_avg
    hi_low_shear = math.sqrt(hi_low_shear_u ** 2 + hi_low_shear_v ** 2) # magnitude
    ulvws_values[time_string] = hi_low_shear
    # mid-lower wind shear
    mid_low_shear_u = u500_avg - u850_avg
    mid_low_shear_v = v500_avg - v850_avg
    mid_low_shear = math.sqrt(mid_low_shear_u ** 2 + mid_low_shear_v ** 2) # magnitude
    mlvws_values[time_string] = mid_low_shear

    wind.close()    

    time_step += 1
    data_time += delta_fixee
    
print hk_u_winds
print hk_v_winds
print easm_indices
print u200_values
print u500_values
print v500_values
print ulvws_values
print mlvws_values

{'20211011_00_00': masked_array(data=-9.11329997,
             mask=False,
       fill_value=array([1.00000002e+20])), '20211011_06_00': masked_array(data=-3.84540007,
             mask=False,
       fill_value=array([1.00000002e+20])), '20211011_12_00': masked_array(data=-3.02400012,
             mask=False,
       fill_value=array([1.00000002e+20])), '20211010_18_00': masked_array(data=-7.42980001,
             mask=False,
       fill_value=array([1.00000002e+20])), '20211011_18_00': masked_array(data=-10.4715005,
             mask=False,
       fill_value=array([1.00000002e+20]))}
{'20211011_00_00': masked_array(data=-4.51670005,
             mask=False,
       fill_value=array([1.00000002e+20])), '20211011_06_00': masked_array(data=5.10499998,
             mask=False,
       fill_value=array([1.00000002e+20])), '20211011_12_00': masked_array(data=2.87040009,
             mask=False,
       fill_value=array([1.00000002e+20])), '20211010_18_00': masked_array(data=-10.99580019,
      

In [8]:
# relative humidity
lo_humid_values = dict()
hi_humid_values = dict()

# constants
var_name = "RH_P0_L100_GLL0"
latlon_suffix = 0

data_time = timestamp - datetime.timedelta(hours=24)
time_step = 0
while data_time <= timestamp: 
    # identify TC center
    center_lat, center_lon = int(round(lat[time_step])), lon[time_step]
    # negative longitudes do not go well with the addressing
    if lon[time_step] > 0:
        center_lon = int(round(lon[time_step]))
    elif int(round(lon[time_step])) == -180:
        center_lon = 180
    else:
        center_lon = int((round(lon[time_step])) + 360) % 180
        
    time_string = data_time.strftime("%Y%m%d_%H_%M")
    filename = "./dynamical_data/humid/fnl_{0}.{1}".format(time_string, extension)
    
    humid = Nio.open_file(filename, mode='r')

    # 300mb through 500mb
    values = list()
    for j in range(300,550,50):
        grid = humid.variables[var_name]["lv_ISBL0|{0}".format(j)][(center_lat-6+8):(center_lat+6+8),(center_lon-6-53):(center_lon+6-53)]
        values.append(grid)
    values = np.stack(values)
    hi_humid = np.average(values)
    hi_humid_values[time_string] = hi_humid

    # 750-800mb
    values = list()
    for j in range(750,800,50):
        grid = humid.variables[var_name]["lv_ISBL0|{0}".format(j)][(center_lat-6+8):(center_lat+6+8),(center_lon-6-53):(center_lon+6-53)]
        values.append(grid)
    values = np.stack(values)
    lo_humid_values[time_string] = np.average(values)

    humid.close()
        
    time_step += 1
    data_time += delta_fixee
    
print lo_humid_values
print hi_humid_values

{'20211011_00_00': 53.655556, '20211011_06_00': 55.403473, '20211011_12_00': 57.87431, '20211010_18_00': 60.445835, '20211011_18_00': 62.506943}
{'20211011_00_00': 53.655556, '20211011_06_00': 55.403473, '20211011_12_00': 57.874306, '20211010_18_00': 60.445835, '20211011_18_00': 62.506943}


In [9]:
# temperature
surface_temp_values = dict()
temp200_values = dict()

# constants
sfc_var_name = "TMP_P0_L1_GLL0"
upper_var_name = "TMP_P0_L100_GLL0"
latlon_suffix = 0

data_time = timestamp - datetime.timedelta(hours=24)
time_step = 0
while data_time <= timestamp: 
    # identify TC center
    center_lat, center_lon = int(round(lat[time_step])), lon[time_step]
    # negative longitudes do not go well with the addressing
    if lon[time_step] > 0:
        center_lon = int(round(lon[time_step]))
    elif int(round(lon[time_step])) == -180:
        center_lon = 180
    else:
        center_lon = int((round(lon[time_step])) + 360) % 180
        
    time_string = data_time.strftime("%Y%m%d_%H_%M")
    filename = "./dynamical_data/surface/fnl_{0}.{1}".format(time_string, extension)
    
    surface = Nio.open_file(filename, mode='r')
    values = surface.variables[sfc_var_name].get_value()[(center_lat-1+8):(center_lat+1+8),(center_lon-1-53):(center_lon+1-53)]
    surface_temp = np.average(values)
    surface_temp_values[time_string] = surface_temp
    surface.close()
    
    filename = "./dynamical_data/temp/fnl_{0}.{1}".format(time_string, extension)
    temp = Nio.open_file(filename, mode='r')
    values = temp.variables[upper_var_name]["lv_ISBL0|200"][(center_lat-6+8):(center_lat+6+8),(center_lon-6-53):(center_lon+6-53)]
    temp_200 = np.average(values)
    temp200_values[time_string] = temp_200
    temp.close()
    
    time_step += 1
    data_time += delta_fixee
    
print surface_temp_values
print temp200_values

{'20211011_00_00': 298.65, '20211011_06_00': 300.52502, '20211011_12_00': 300.8, '20211010_18_00': 294.875, '20211011_18_00': 300.825}
{'20211011_00_00': 222.49818, '20211011_06_00': 222.86102, '20211011_12_00': 223.54416, '20211010_18_00': 222.43394, '20211011_18_00': 223.52556}


In [10]:
# vorticity
vort_values = dict()

# constants
var_name = "ABSV_P0_L100_GLL0"
latlon_suffix = 0

data_time = timestamp - datetime.timedelta(hours=24)
time_step = 0
while data_time <= timestamp: 
    # identify TC center
    center_lat, center_lon = int(round(lat[time_step])), lon[time_step]
    # negative longitudes do not go well with the addressing
    if lon[time_step] > 0:
        center_lon = int(round(lon[time_step]))
    elif int(round(lon[time_step])) == -180:
        center_lon = 180
    else:
        center_lon = int((round(lon[time_step])) + 360) % 180
        
    time_string = data_time.strftime("%Y%m%d_%H_%M")
    filename = "./dynamical_data/vort/fnl_{0}.{1}".format(time_string, extension)
    
    vort = Nio.open_file(filename, mode='r')
    values = vort.variables[var_name].get_value()[(center_lat-1+8):(center_lat+1+8),(center_lon-1-53):(center_lon+1-53)]
    vort.close()
    vort850 = np.average(values)
    vort_values[time_string] = vort850
    
    time_step += 1
    data_time += delta_fixee
    
print vort_values

{'20211011_00_00': 5.8499998e-05, '20211011_06_00': 7.675e-05, '20211011_12_00': 6.025e-05, '20211010_18_00': 3.7749996e-05, '20211011_18_00': 5.7e-05}


In [11]:
# potential temp.
pott_values = dict()

# constants
var_name = "POT_P0_L104_GLL0"
latlon_suffix = 0

data_time = timestamp - datetime.timedelta(hours=24)
time_step = 0
while data_time <= timestamp: 
    # identify TC center
    center_lat, center_lon = int(round(lat[time_step])), lon[time_step]
    # negative longitudes do not go well with the addressing
    if lon[time_step] > 0:
        center_lon = int(round(lon[time_step]))
    elif int(round(lon[time_step])) == -180:
        center_lon = 180
    else:
        center_lon = int((round(lon[time_step])) + 360) % 180
        
    time_string = data_time.strftime("%Y%m%d_%H_%M")
    filename = "./dynamical_data/pott/fnl_{0}.{1}".format(time_string, extension)
    
    pott = Nio.open_file(filename, mode='r')
    values = pott.variables[var_name].get_value()[(center_lat-1+8):(center_lat+1+8),(center_lon-1-53):(center_lon+1-53)]
    pott.close()
    pott_avg = np.average(values)
    pott_values[time_string] = pott_avg
    
    time_step += 1
    data_time += delta_fixee
    
print pott_values

{'20211011_00_00': 297.685, '20211011_06_00': 298.5525, '20211011_12_00': 298.6175, '20211010_18_00': 296.97998, '20211011_18_00': 299.2825}


Step 6: Finally, save them to file.

In [12]:
save_dict_to_csv(pott_values, "pott.csv")
save_dict_to_csv(wnpsh_area_indices, "wnpsh_area_indices.csv")
save_dict_to_csv(wnpsh_intensity_indices, "wnpsh_intensity_indices.csv")
save_dict_to_csv(wnpsh_extension_indices, "wnpsh_extension_indices.csv")
save_dict_to_csv(westerly_indices, "westerly_indices.csv")
save_dict_to_csv(vort_values, "vort850.csv")
save_dict_to_csv(surface_temp_values, "temp_surface.csv")
save_dict_to_csv(temp200_values, "temp200.csv")
save_dict_to_csv(lo_humid_values, "lo_humid.csv")
save_dict_to_csv(hi_humid_values, "hi_humid.csv")
save_dict_to_csv(hk_u_winds, "hk_u_winds.csv")
save_dict_to_csv(hk_v_winds, "hk_v_winds.csv")
save_dict_to_csv(easm_indices, "easm_indices.csv")
save_dict_to_csv(u200_values, "u200.csv")
save_dict_to_csv(u500_values, "u500.csv")
save_dict_to_csv(v500_values, "v500.csv")
save_dict_to_csv(ulvws_values, "ulvws.csv")
save_dict_to_csv(mlvws_values, "mlvws.csv")