In [64]:
# Import dependencies
import pandas as pd
import scipy.stats as st
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import cm

In [117]:
# Prepare the dataset for analysis
# Read in csv file
file_path = "Conditions_Contributing_to_COVID-19_Deaths__by_State_and_Age__Provisional_2020-2023.csv"
inital_df = pd.read_csv(file_path)

# Drop the unwanted columns.
inital_df = inital_df.drop(columns=['Data As Of', 'Start Date', 'End Date', 'ICD10_codes', 'Number of Mentions', 'Flag'])

# Only keep the rows where the Group is "By Month", and delete the row where Age Group is "All Ages"
inital_df = inital_df.loc[inital_df['Group'] == 'By Month']
clean_df = inital_df.loc[inital_df['Age Group'] != 'All Ages']

# Drop the NaN values
clean_df = clean_df.dropna()

# Convert Year and Month columns to integers
clean_df.loc[:, 'Year'] = clean_df['Year'].astype(int)
clean_df.loc[:, 'Month'] = clean_df['Month'].astype(int)
clean_df.loc[:, 'COVID-19 Deaths'] = clean_df['COVID-19 Deaths'].astype(int)

clean_df.head()

Unnamed: 0,Group,Year,Month,State,Condition Group,Condition,Age Group,COVID-19 Deaths
62100,By Month,2020,1,United States,Respiratory diseases,Influenza and pneumonia,0-24,0
62101,By Month,2020,2,United States,Respiratory diseases,Influenza and pneumonia,0-24,0
62102,By Month,2020,3,United States,Respiratory diseases,Influenza and pneumonia,0-24,9
62103,By Month,2020,4,United States,Respiratory diseases,Influenza and pneumonia,0-24,27
62104,By Month,2020,5,United States,Respiratory diseases,Influenza and pneumonia,0-24,19


In [129]:
# Group Covid-19 deaths by year and month while keeping months in order
grouped_df = clean_df.groupby(['Year','State', 'Month'])['COVID-19 Deaths'].sum().reset_index()
grouped_df.sort_values(by=['Year', 'Month'])
grouped_df.head()

# Group into seasons by defining a function to return the season based on the month
def get_season(month):
    if month in [12, 1, 2]:
        return "Winter"
    elif month in [3, 4, 5]:
        return "Spring"
    elif month in [6, 7, 8]:
        return "Summer"
    else:
        return "Fall"
# Apply the function to the dataframe and create a new column named "Season"    
grouped_df['Season'] = grouped_df['Month'].apply(get_season)

grouped_df


Unnamed: 0,Year,State,Month,COVID-19 Deaths,Season
0,2020,Alabama,1,0,Winter
1,2020,Alabama,2,0,Winter
2,2020,Alabama,3,24,Spring
3,2020,Alabama,4,851,Spring
4,2020,Alabama,5,1131,Spring
...,...,...,...,...,...
2101,2023,Wisconsin,2,370,Winter
2102,2023,Wisconsin,3,50,Spring
2103,2023,Wyoming,1,0,Winter
2104,2023,Wyoming,2,0,Winter


In [130]:
covid_2020 = grouped_df.loc[grouped_df['Year'] == 2020]
covid_2021 = grouped_df.loc[grouped_df['Year'] == 2021]
covid_2022 = grouped_df.loc[grouped_df['Year'] == 2022]
covid_2023 = grouped_df.loc[grouped_df['Year'] == 2023]

covid_years = [covid_2020, covid_2021, covid_2022, covid_2023]

In [131]:
covid_2020

Unnamed: 0,Year,State,Month,COVID-19 Deaths,Season
0,2020,Alabama,1,0,Winter
1,2020,Alabama,2,0,Winter
2,2020,Alabama,3,24,Spring
3,2020,Alabama,4,851,Spring
4,2020,Alabama,5,1131,Spring
...,...,...,...,...,...
643,2020,Wyoming,8,0,Summer
644,2020,Wyoming,9,0,Fall
645,2020,Wyoming,10,108,Fall
646,2020,Wyoming,11,409,Fall


In [114]:
# Rename values in the Month column
rename_month = {'1': 'January', '2': 'February',
                '3': 'March', '4': 'April',
                '5': 'May', '6': 'June',
                '7': 'July', '8': 'August',
                '9': 'September', '10': 'October',
                '11': 'November', '12': 'December'}

for index, row in clean_df.iterrows():
    clean_df.loc[index, 'Month'] = rename_month[str(row['Month'])]


clean_df.head()

AttributeError: 'list' object has no attribute 'loc'