In [208]:
import numpy as np
import pandas as pd

In [209]:
disasters = pd.read_csv('disasters_cleaned_alt.csv')
disasters

Unnamed: 0,year,state,incident_type
0,1953,GA,Tornado
1,1953,TX,Tornado
2,1953,LA,Flood
3,1953,MI,Tornado
4,1953,MT,Flood
...,...,...,...
64087,2023,ME,Severe Storm
64088,2023,MS,Severe Storm
64089,2023,MS,Severe Storm
64090,2023,MS,Severe Storm


In [210]:
disasters = disasters[disasters['year'] >= 1960]

In [211]:
disasters.insert(0, 'decade', list(map(lambda x: int(x//10)*10, disasters['year'])))
disasters

Unnamed: 0,decade,year,state,incident_type
93,1960,1960,OK,Flood
94,1960,1960,HI,Earthquake
95,1960,1960,FL,Other
96,1960,1960,NE,Flood
97,1960,1960,SD,Flood
...,...,...,...,...
64087,2020,2023,ME,Severe Storm
64088,2020,2023,MS,Severe Storm
64089,2020,2023,MS,Severe Storm
64090,2020,2023,MS,Severe Storm


In [212]:
disasters = disasters[~disasters.isna().any(axis=1)].drop('year', axis=1)

In [213]:
disasters.set_index('decade', inplace=True)

In [214]:
disasters.reset_index(inplace=True)

In [215]:

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
#import category_encoders as ce

encoder = OneHotEncoder(sparse=False)
encoded_disaster_types = encoder.fit_transform(disasters[['incident_type']])

types_encoded =pd.DataFrame(encoded_disaster_types, columns=[col.replace('incident_type_', '') for col in encoder.get_feature_names_out(['incident_type'])])



In [216]:
disasters.drop(columns=['incident_type'], axis=1, inplace=True)

In [217]:
disasters

Unnamed: 0,decade,state
0,1960,OK
1,1960,HI
2,1960,FL
3,1960,NE
4,1960,SD
...,...,...
63994,2020,ME
63995,2020,MS
63996,2020,MS
63997,2020,MS


In [218]:
disasters_onehot = pd.concat([disasters, types_encoded], axis=1)

In [219]:
rows_with_na = disasters_onehot.isna().any(axis=1)

# Get indices of rows with NA entries
indices_with_na = rows_with_na[rows_with_na].index

print(indices_with_na.to_numpy())

[]


In [220]:
disasters_onehot

Unnamed: 0,decade,state,Biological,Chemical,Coastal Storm,Dam/Levee Break,Drought,Earthquake,Fire,Fishing Losses,...,Severe Storm,Snowstorm,Terrorist,Tornado,Toxic Substances,Tropical Storm,Tsunami,Typhoon,Volcanic Eruption,Winter Storm
0,1960,OK,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1960,HI,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1960,FL,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1960,NE,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1960,SD,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
63994,2020,ME,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
63995,2020,MS,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
63996,2020,MS,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
63997,2020,MS,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [221]:
disasters_breakdown = disasters_onehot.groupby(['decade', 'state']).sum().reset_index()

In [222]:
disasters_breakdown

Unnamed: 0,decade,state,Biological,Chemical,Coastal Storm,Dam/Levee Break,Drought,Earthquake,Fire,Fishing Losses,...,Severe Storm,Snowstorm,Terrorist,Tornado,Toxic Substances,Tropical Storm,Tsunami,Typhoon,Volcanic Eruption,Winter Storm
0,1960,AK,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1960,AL,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1960,AR,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,29.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1960,AS,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0
4,1960,AZ,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
383,2020,VT,28.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,16.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
384,2020,WA,107.0,0.0,0.0,0.0,0.0,0.0,40.0,0.0,...,27.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
385,2020,WI,167.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
386,2020,WV,110.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [223]:
states = [
    "AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DE", "FL", "GA",
    "HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD",
    "MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ",
    "NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC",
    "SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY"
]

states_arr = np.array(states)

In [224]:
disasters_breakdown = disasters_breakdown[disasters_breakdown['state'].isin(states_arr)]


for col in disasters_breakdown.columns:
    try:
        disasters_breakdown[col] = disasters_breakdown[col].astype(int)
    except:
        pass

disasters_breakdown['Total'] = disasters_breakdown.select_dtypes(include=['int']).sum(axis=1)
disasters_breakdown

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  disasters_breakdown[col] = disasters_breakdown[col].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  disasters_breakdown[col] = disasters_breakdown[col].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  disasters_breakdown[col] = disasters_breakdown[col].astype(int)
A value is tr

Unnamed: 0,decade,state,Biological,Chemical,Coastal Storm,Dam/Levee Break,Drought,Earthquake,Fire,Fishing Losses,...,Snowstorm,Terrorist,Tornado,Toxic Substances,Tropical Storm,Tsunami,Typhoon,Volcanic Eruption,Winter Storm,Total
0,1960,AK,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,1962
1,1960,AL,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1961
2,1960,AR,0,0,0,0,0,0,0,0,...,0,0,29,0,0,0,0,0,0,2028
4,1960,AZ,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1965
5,1960,CA,0,0,0,1,0,0,1,0,...,0,0,0,0,0,0,0,0,0,2048
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
383,2020,VT,28,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2080
384,2020,WA,107,0,0,0,0,0,40,0,...,0,0,0,0,0,0,0,0,0,2245
385,2020,WI,167,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2190
386,2020,WV,110,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2144


In [225]:
disasters_breakdown.groupby('decade')['Total'].sum().to_frame()

Unnamed: 0_level_0,Total
decade,Unnamed: 1_level_1
1960,85880
1970,101757
1980,98980
1990,109295
2000,117351
2010,113673
2020,113561


In [226]:
disasters_breakdown.to_csv('disasters_by_decade.csv', index=False)