# Hurricane Data to SQL database


load necessary packages

In [None]:
import sqlite3
import pandas as pd

import geopandas as gpd
from matplotlib.colors import ListedColormap
import matplotlib.pyplot as plt
import sys
sys.path.append('..')
from functions import *

load necessary dataset

# Hurricane Risk Score Index

In [None]:
#Load the shapefile
shapefile_path = "../../shapefile/NRI_Shapefile_Counties/NRI_Shapefile_Counties.shp"
print('loading')
gdf = gpd.read_file(shapefile_path)
print('loaded')
#Filter the data to exclude Alaska and the islands
mainland_usa_gdf = gdf[
    (gdf['STATE'] != 'Alaska') & 
    (gdf['STATE'] != 'Hawaii') & 
    (gdf['STATE'] != 'Puerto Rico') & 
    (gdf['STATE'] != 'Guam') & 
    (gdf['STATE'] != 'American Samoa') & 
    (gdf['STATE'] != 'Northern Mariana Islands') & 
    (gdf['STATE'] != 'Virgin Islands')
].copy()

#mainland_usa_gdf.loc[:, 'HRCN_RISKS'].replace(-9999, np.nan, inplace=True)
mainland_usa_gdf = mainland_usa_gdf[mainland_usa_gdf['HRCN_RISKS'] != -9999]
#Takes around 5 mins to run

In [None]:
#5 bins based on quantiles to categorize the scores into 5 distinct sections
mainland_usa_gdf['HRCN_EALS_Norm'] = (mainland_usa_gdf['HRCN_EALS'] - mainland_usa_gdf['HRCN_EALS'].mean())/ mainland_usa_gdf['HRCN_EALS'].std()
mainland_usa_gdf['HRCN_RISK_CATEGORY_QUANTILE'] = pd.qcut(mainland_usa_gdf['HRCN_EALS_Norm'], q=5, labels=['Very Low', "Relatively Low", 'Relatively Moderate', 'Relatively High', 'Very High'])
colors = ['#08306b', '#4292c6', '#fdae6b', '#f16913', '#67000d']
custom_cmap = ListedColormap(colors) # type: ignore
#Plot the heatmap
fig, ax = plt.subplots(1, 1, figsize=(15, 25))
plt.grid(False)
mainland_usa_gdf.plot(column='HRCN_RISK_CATEGORY_QUANTILE', cmap=custom_cmap, linewidth=0.8, ax=ax, edgecolor='0.8', legend=True)
ax.set_title('Hurricane Risk Heatmap (USA)', fontdict={'fontsize': '25', 'fontweight' : '3'})
#Remove all axis values
ax.set_xticks([])
ax.set_yticks([])
#Remove all axis spines
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.spines['left'].set_visible(False)
# Display the map
#Save plot heatmap
plt.savefig('../../Plots/hurricane_risk_heatmap.png')
plt.show()

## Subset data to HRCN and County Fips

In [None]:
#show columns that have HRCN in mainland_usa_gdf
display(mainland_usa_gdf.columns[mainland_usa_gdf.columns.str.contains('HRCN')])
#Save mainland_usa_gdf_HRCN file which includes HRCN columns
hrcn_columns = [col for col in mainland_usa_gdf.columns if 'HRCN' in col]
selected_columns = ['STATE', 'STCOFIPS'] + hrcn_columns
mainland_usa_gdf = mainland_usa_gdf[selected_columns]
display(mainland_usa_gdf)
#Save CSV file
mainland_usa_gdf.to_csv('../../Data/mainland_usa_gdf_HRCN.csv')

In [None]:
listcols = list(mainland_usa_gdf.columns)

## Add 3ZIP Column

In [None]:
sys.path.append('..')
import functions as f
#load ZIP_COUNTY_062023.xlsx
zip_county = pd.read_excel('../../Data/ZIP_COUNTY_062023.xlsx', dtype={'3ZIP': str, 'COUNTY': str,'ZIP': str})

mainland_usa_gdf['3ZIP'] = mainland_usa_gdf['STCOFIPS'].apply(f.county_to_zip, zip_data=zip_county)


In [None]:
mainland_usa_gdf.head()

In [None]:
#Save to pickle file
mainland_usa_gdf.to_pickle('../../Data/pickle/mainland_usa_gdf_HRCN.pkl')

## Open connection SQL

In [None]:
db_path = "../../Database/thesis_database.db"
conn = sqlite3.connect(db_path)

### Transform dataset to necessary

In [None]:
mainland_usa_gdf.to_sql('hrcn_risk_data', conn, if_exists = "replace", index = False)

## Close connection SQL

In [None]:
conn.close()