In [163]:
import numpy as np
import pandas as pd

In [164]:
disasters = pd.read_csv('disasters_cleaned_alt.csv')
disasters

Unnamed: 0,year,state,incident_type
0,1953,GA,Tornado
1,1953,TX,Tornado
2,1953,LA,Flood
3,1953,MI,Tornado
4,1953,MT,Flood
...,...,...,...
64087,2023,ME,Severe Storm
64088,2023,MS,Severe Storm
64089,2023,MS,Severe Storm
64090,2023,MS,Severe Storm


In [165]:
disasters = disasters[disasters['year'] >= 1960]

In [166]:
disasters.insert(0, 'decade', list(map(lambda x: str((x//10)*10) + '\'s', disasters['year'])))
disasters

Unnamed: 0,decade,year,state,incident_type
93,1960's,1960,OK,Flood
94,1960's,1960,HI,Earthquake
95,1960's,1960,FL,Other
96,1960's,1960,NE,Flood
97,1960's,1960,SD,Flood
...,...,...,...,...
64087,2020's,2023,ME,Severe Storm
64088,2020's,2023,MS,Severe Storm
64089,2020's,2023,MS,Severe Storm
64090,2020's,2023,MS,Severe Storm


In [167]:
disasters = disasters[~disasters.isna().any(axis=1)].drop('year', axis=1)

In [168]:
disasters.set_index('decade', inplace=True)

In [169]:
disasters.reset_index(inplace=True)

In [170]:

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
#import category_encoders as ce

encoder = OneHotEncoder(sparse=False)
encoded_disaster_types = encoder.fit_transform(disasters[['incident_type']])

types_encoded =pd.DataFrame(encoded_disaster_types, columns=[col.replace('incident_type_', '') for col in encoder.get_feature_names_out(['incident_type'])])



In [171]:
disasters.drop(columns=['incident_type'], axis=1, inplace=True)

In [172]:
disasters

Unnamed: 0,decade,state
0,1960's,OK
1,1960's,HI
2,1960's,FL
3,1960's,NE
4,1960's,SD
...,...,...
63994,2020's,ME
63995,2020's,MS
63996,2020's,MS
63997,2020's,MS


In [173]:
disasters_onehot = pd.concat([disasters, types_encoded], axis=1)

In [175]:
rows_with_na = disasters_onehot.isna().any(axis=1)

# Get indices of rows with NA entries
indices_with_na = rows_with_na[rows_with_na].index

print(indices_with_na.to_numpy())

[]


In [180]:
disasters_onehot

Unnamed: 0,decade,state,Biological,Chemical,Coastal Storm,Dam/Levee Break,Drought,Earthquake,Fire,Fishing Losses,...,Severe Storm,Snowstorm,Terrorist,Tornado,Toxic Substances,Tropical Storm,Tsunami,Typhoon,Volcanic Eruption,Winter Storm
0,1960's,OK,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1960's,HI,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1960's,FL,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1960's,NE,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1960's,SD,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
63994,2020's,ME,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
63995,2020's,MS,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
63996,2020's,MS,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
63997,2020's,MS,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [183]:
disasters_breakdown = disasters_onehot.groupby(['decade', 'state']).sum().reset_index()

In [188]:
disasters_breakdown

Unnamed: 0,decade,state,Biological,Chemical,Coastal Storm,Dam/Levee Break,Drought,Earthquake,Fire,Fishing Losses,...,Severe Storm,Snowstorm,Terrorist,Tornado,Toxic Substances,Tropical Storm,Tsunami,Typhoon,Volcanic Eruption,Winter Storm
0,1960's,AK,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1960's,AL,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1960's,AR,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,29.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1960's,AS,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0
4,1960's,AZ,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
383,2020's,VT,28.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,16.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
384,2020's,WA,107.0,0.0,0.0,0.0,0.0,0.0,40.0,0.0,...,27.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
385,2020's,WI,167.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
386,2020's,WV,110.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [190]:
states = [
    "AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DE", "FL", "GA",
    "HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD",
    "MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ",
    "NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC",
    "SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY"
]

states_arr = np.array(states)

In [199]:
disasters_breakdown = disasters_breakdown[disasters_breakdown['state'].isin(states_arr)]


for col in disasters_breakdown.columns:
    try:
        disasters_breakdown[col] = disasters_breakdown[col].astype(int)
    except:
        pass

disasters_breakdown['Total'] = disasters_breakdown.select_dtypes(include=['int']).sum(axis=1)
disasters_breakdown

Unnamed: 0,decade,state,Biological,Chemical,Coastal Storm,Dam/Levee Break,Drought,Earthquake,Fire,Fishing Losses,...,Snowstorm,Terrorist,Tornado,Toxic Substances,Tropical Storm,Tsunami,Typhoon,Volcanic Eruption,Winter Storm,Total
0,1960's,AK,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,2
1,1960's,AL,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2,1960's,AR,0,0,0,0,0,0,0,0,...,0,0,29,0,0,0,0,0,0,68
4,1960's,AZ,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,5
5,1960's,CA,0,0,0,1,0,0,1,0,...,0,0,0,0,0,0,0,0,0,88
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
383,2020's,VT,28,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,60
384,2020's,WA,107,0,0,0,0,0,40,0,...,0,0,0,0,0,0,0,0,0,225
385,2020's,WI,167,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,170
386,2020's,WV,110,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,124


In [202]:
disasters_breakdown.groupby('decade')['Total'].sum().to_frame()

Unnamed: 0_level_0,Total
decade,Unnamed: 1_level_1
1960's,1600
1970's,5227
1980's,1960
1990's,9795
2000's,17351
2010's,13173
2020's,12561


In [None]:
disasters_breakdown.to_csv('disasters_by_decade.csv', index=False)