In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import numpy as np
import scipy.stats as st

In [None]:
#This is the full data frame with some extra columns removed
health_df = pd.read_csv("Resources/Provisional_COVID-19_Deaths_by_Sex_and_Age.csv")
health_df = health_df.drop(columns = ["Data As Of", "Start Date", "End Date", "Footnote"])
health_df.head()

In [None]:
#I have removed some random age groups
age_adjusted_df = health_df.loc[(health_df["Age Group"] != "40-49 years") &
                                (health_df["Age Group"] != "30-39 years") &
                                (health_df["Age Group"] != "18-29 years") &
                                (health_df["Age Group"] != "0-17 years") &
                                (health_df["Age Group"] != "50-64 years"), :]

In [None]:
#This data frame shows deaths regardless of Year and Month
#This data frame can be sorted by State, Sex, and Age Group
#State: You can choose any of the 50 states, Puerto Rico, or the United States as a whole
#Sex: Male, Female, All Sexes
#Age Group:
    #All ages
    #Under 1 year
    #1-4 years old
    #5-14 years old
    #15-24 years old
    #25-34 years old
    #35-44 years old
    #45-54 years old
    #55-64 years old
    #65-74 years old
    #75-84 years old
    #85 years and over
total_df = age_adjusted_df.loc[health_df["Group"] == "By Total", :]
total_df = total_df.drop(columns = ["Year", "Month", "Group"])
total_df.head()

In [None]:
#This data frame shows deaths by year, regardless of Month.
#You can pick 2020, 2021, or 2022
#This data frame can be sorted by State, Sex, and Age Group
#State: You can choose any of the 50 states, Puerto Rico, or the United States as a whole
#Sex: Male, Female, All Sexes
#Age Group:
    #All ages
    #Under 1 year
    #1-4 years old
    #5-14 years old
    #15-24 years old
    #25-34 years old
    #35-44 years old
    #45-54 years old
    #55-64 years old
    #65-74 years old
    #75-84 years old
    #85 years and over
year_df = age_adjusted_df.loc[health_df["Group"] == "By Year", :]
year_df = year_df.drop(columns = ["Month", "Group"])
year_df.head()

In [None]:
#This data frame shows deaths by Month.
#You can pick any month numerically (ex. January = 1.0)
#This data frame can be sorted by Year, State, Sex, and Age Group
#Year: You can pick 2020, 2021, or 2022
#State: You can choose any of the 50 states, Puerto Rico, or the United States as a whole
#Sex: Male, Female, All Sexes
#Age Group:
    #All ages
    #Under 1 year
    #1-4 years old
    #5-14 years old
    #15-24 years old
    #25-34 years old
    #35-44 years old
    #45-54 years old
    #55-64 years old
    #65-74 years old
    #75-84 years old
    #85 years and over
month_df = age_adjusted_df.loc[health_df["Group"] == "By Month", :]
month_df = month_df.drop(columns = "Group")
month_df.head()

In [None]:
#Flu deaths by state/age
year_df["State"].unique()
year_df["Age Group"].unique()
x=year_df.groupby(["State","Age Group"])["Influenza Deaths"].count().reset_index()

x

In [None]:
df_2021 = month_df[month_df.Year == 2021.0]
df_2021.head()

In [None]:
#influenza deaths in 2021
year_df["State"].unique()
year_df["Age Group"].unique()
i=df_2021.groupby(["State"])["Influenza Deaths"].count().reset_index()
i.drop(i[i['State'] == "United States"].index, inplace = True)
i.sort_values('Influenza Deaths', ascending=True).plot.barh(x='State', title='Number of Influenza Deaths by State in 2021'
                                                            ,figsize=(17, 10))
plt.savefig("Resources/Images/InfluenzdeathsByState.png")

In [None]:
#Pneumonia Deaths in 2021
i=df_2021.groupby(["State"])["Pneumonia Deaths"].count().reset_index()
i.drop(i[i['State'] == "United States"].index, inplace = True)
i.sort_values('Pneumonia Deaths', ascending=True).plot.barh(x='State', title='Number of Pneumonia Deaths by State in 2021'
                                                            ,figsize=(17, 10))
plt.savefig("Resources/Images/PneumoniadeathsByState.png")

In [None]:
#Which months have the most deaths for Covid

month_df.head()

y=month_df.groupby("Month")["COVID-19 Deaths"].count().reset_index()
y

In [None]:
month_df

In [None]:
#covid deaths by month
y.plot.bar(x="Month", title='Number of Covid Deaths by Month',ylabel="Number of Deaths")




In [None]:
bins=[0.0,4.0,7.0,10.0,12.0]
labels=["Winter","Spring","Summer","Fall"]
df_2021['Season'] = pd.cut(df_2021['Month'], bins=bins, labels=labels)

In [None]:
df_2021.head()

In [None]:
df_binned_season=df_2021.groupby("Season")["COVID-19 Deaths"].count().reset_index()
df_binned_season

In [None]:
#binning per season
#Jan-March:Winter April-June:Spring July-September:Summer October-Decemeber:Fall


sns.barplot(x = 'Season', y = 'COVID-19 Deaths', data = df_binned_season ,
            palette="hls").set(title='COVID-19 Deaths per Season (2021)')
plt.savefig("Resources/Images/CovidDeathsperSeason.png")



In [None]:
#Correlation Graph
#getting some errors so still working on it
#correlation = st.pearsonr(month_df["Month"],month_df["COVID-19 Deaths"])
fig1, ax1 = plt.subplots(figsize=(8, 5))
sns.scatterplot(x = "Month",
            y = "COVID-19 Deaths", 
            data = month_df)
#print(f"The correlation between Months and the COVID-19-Deaths is {round(correlation[0],2)}")
