# Mortgage file to SQL database

In [2]:
import sqlite3
import pandas as pd
import geopandas as gpd
from matplotlib.colors import ListedColormap
import matplotlib.pyplot as plt

# Atlantic Hurricane Dataset from 1979
https://www.nhc.noaa.gov/data/#hurdat

In [3]:
#Import data from url
hurricane_data = []
with open("../../Data/nhc.noaa.gov_data_hurdat_hurdat2-1851-2020-052921.txt", "r") as f:
    lines = f.readlines()
    storm_data = None
    for line in lines:
        parts = line.split(",")
        
        if len(parts) == 4 and int(parts[0][-4:]) >= 1979:  # Header Line
            #Get last 4 digits from first entry
            storm_data = {"id": parts[0].strip(), "name": parts[1].strip(), "entries": int(parts[2]), "year": int(parts[0][-4:])}
        else:  # Data Line
            hurricane_data.append({**storm_data, **{
                "date": parts[0].strip(),
                "time": parts[1].strip(),
                "record_id": parts[2].strip(),
                "status": parts[3].strip(),
                "latitude": parts[4].strip(),
                "longitude": parts[5].strip(),
                "max_wind": int(parts[6].strip()),
                "min_pressure": int(parts[7].strip()) if parts[12].strip() != "" else None
            }})

# Create DataFrame from parsed data
hurricane_df = pd.DataFrame(hurricane_data)
#convert date to datetime format
hurricane_df['date'] = pd.to_datetime(hurricane_df['date'], format='%Y%m%d')
#add month column to hurrican_df
hurricane_df['month'] = hurricane_df['date'].dt.month
#Save as csv
#hurricane_df.to_csv('../Data/hurricane_df.csv')

#describe data and make entries column the unique count of id
display(hurricane_df.describe())
display(hurricane_df)

Unnamed: 0,entries,year,date,max_wind,min_pressure,month
count,19180.0,19180.0,19180,19180.0,19180.0,19180.0
mean,40.35391,2001.466632,2002-02-23 12:13:21.835245184,48.399791,881.66074,8.678311
min,2.0,1979.0,1979-06-11 00:00:00,-99.0,-999.0,1.0
25%,24.0,1991.0,1991-10-30 00:00:00,30.0,982.0,8.0
50%,38.0,2003.0,2003-08-27 12:00:00,40.0,999.0,9.0
75%,55.0,2012.0,2012-05-26 00:00:00,60.0,1006.0,9.0
max,96.0,2020.0,2020-11-18 00:00:00,165.0,1024.0,12.0
std,20.312358,12.104994,,25.734435,459.088771,1.400565


Unnamed: 0,id,name,entries,year,date,time,record_id,status,latitude,longitude,max_wind,min_pressure,month
0,AL011979,UNNAMED,22,1979,1979-06-11,1200,,TD,17.4N,81.1W,20,-999,6
1,AL011979,UNNAMED,22,1979,1979-06-11,1800,,TD,17.7N,80.8W,25,-999,6
2,AL011979,UNNAMED,22,1979,1979-06-12,0000,,TD,18.1N,80.2W,25,-999,6
3,AL011979,UNNAMED,22,1979,1979-06-12,0600,,TD,18.4N,79.9W,25,-999,6
4,AL011979,UNNAMED,22,1979,1979-06-12,1200,,TD,19.0N,79.6W,25,-999,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...
19175,AL312020,IOTA,26,2020,2020-11-17,1200,,HU,13.7N,84.7W,75,965,11
19176,AL312020,IOTA,26,2020,2020-11-17,1800,,TS,13.7N,85.7W,55,988,11
19177,AL312020,IOTA,26,2020,2020-11-18,0000,,TS,13.8N,86.7W,40,1000,11
19178,AL312020,IOTA,26,2020,2020-11-18,0600,,TS,13.8N,87.8W,35,1005,11


In [None]:
hurricane_df_agg = hurricane_df.groupby(['year', 'month']).agg({'id': 'nunique', 'max_wind': 'median', 'min_pressure': 'median', 'status': 'count'}).reset_index()
display(hurricane_df_agg.head())