In [None]:
total_deaths = total_df.iloc[0, :]
total_deaths_df = pd.DataFrame(index = [0,1,2])
total_deaths_df["Disease Type"] = ["COVID-19", "Pneumonia", "Influenza"]
total_deaths_df["Total Deaths (by thousands)"] = [total_deaths["COVID-19 Deaths"] / 1000,
                                                  total_deaths["Pneumonia Deaths"] / 1000,
                                                  total_deaths["Influenza Deaths"] / 1000]

In [None]:
f, ax = plt.subplots(figsize = (8,6))
ax = sns.barplot(data = total_deaths_df, x = "Disease Type", y = "Total Deaths (by thousands)", 
                  ci=None, palette="rocket")
ax.set_title("Total Deaths by Disease from 2020-2022")
plt.ylim([0, total_deaths_df["Total Deaths (by thousands)"].max() + 100])
plt.rc('axes', labelsize=16, titlesize=16) 
plt.savefig("Resources/Images/TotalDeaths.png")
plt.show()

#This graph shows the total deaths by Disease from the start of 2020.
#There were significantly more COVID-19 and Pneumonia deaths than Influenza deaths.

In [None]:
year = [2020.0, 2021.0, 2022.0]
covid = []
pneumonia = []
influenza = []
for x in year:
    deaths = year_df.loc[(year_df["Year"] == x) & (year_df["State"] == "United States") &
                        (year_df["Sex"] == "All Sexes") & (year_df["Age Group"] == "All Ages")]
    covid.append(deaths["COVID-19 Deaths"] / 1000)
    pneumonia.append(deaths["Pneumonia Deaths"] / 1000)
    influenza.append(deaths["Influenza Deaths"] / 1000)
yearly_deaths_df = pd.DataFrame(index = range(0, 9))
yearly_deaths_df["Year"] = ["2020", "2020", "2020", "2021", "2021", "2021", "2022", "2022", "2022"]
yearly_deaths_df["Disease Type"] = ["COVID-19", "COVID-19", "COVID-19", "Pneumonia", "Pneumonia", "Pneumonia",
                                   "Influenza", "Influenza", "Influenza"]
yearly_deaths_df["Total Deaths (by thousands)"] = ""
for x in range(len(covid)):
    yearly_deaths_df.iloc[x, 2] = covid[x]
    yearly_deaths_df.iloc[x + 3, 2] = pneumonia[x]
    yearly_deaths_df.iloc[x + 6, 2] = influenza[x]

In [None]:
x_values = ["2020", "2021", "2022"]
y_covid = yearly_deaths_df.loc[yearly_deaths_df["Disease Type"] == "COVID-19", :]
y_pneumonia = yearly_deaths_df.loc[yearly_deaths_df["Disease Type"] == "Pneumonia", :]
y_influenza = yearly_deaths_df.loc[yearly_deaths_df["Disease Type"] == "Influenza", :]
covid_handle, = plt.plot(x_values, y_covid["Total Deaths (by thousands)"], marker = 'o', label = "COVID-19")
pneumonia_handle, = plt.plot(x_values, y_pneumonia["Total Deaths (by thousands)"],  marker = 'o', 
                             label = "Pneumonia")
influenza_handle, = plt.plot(x_values, y_influenza["Total Deaths (by thousands)"],  marker = 'o',
                            label = "Influenza")
plt.title("Total Deaths by Year")
plt.xlabel("Year")
plt.ylabel("Total Deaths (by thousands)")
plt.legend()
plt.savefig("Resources/Images/TotalDeathsByYear.png")
plt.show()

#This graph shows that COVID-19 and Pneumonia deaths dropped significantly in 2022 and the 
#number of Influenza deaths has stayed steady

In [None]:
age_groups = total_df["Age Group"].unique()
covid_age = []
pneumonia_age = []
influenza_age = []
age_deaths_df = pd.DataFrame(index = range(0, 36))
age_deaths_df["Age Group"] = ""
age_deaths_df["Disease Type"] = ""
age_deaths_df["Total Deaths (by thousands)"] = ""
for age in range(len(age_groups)):
    deaths_age = total_df.loc[(total_df["State"] == "United States") &
                    (total_df["Sex"] == "All Sexes") & (total_df["Age Group"] == age_groups[age])]
    covid_age.append(deaths_age["COVID-19 Deaths"] / 1000)
    pneumonia_age.append(deaths_age["Pneumonia Deaths"] / 1000)
    influenza_age.append(deaths_age["Influenza Deaths"] / 1000)
    age_deaths_df.iloc[range(age * 3, (age*3)+3), 0] = age_groups[age]
    age_deaths_df.iloc[age * 3, 1] = "COVID-19"
    age_deaths_df.iloc[(age*3)+1, 1] =  "Pneumonia"
    age_deaths_df.iloc[(age*3)+2, 1] =  "Influenza"
    age_deaths_df.iloc[age * 3, 2] = deaths_age["COVID-19 Deaths"] / 1000
    age_deaths_df.iloc[(age*3)+1, 2] = deaths_age["Pneumonia Deaths"] / 1000
    age_deaths_df.iloc[(age*3)+2, 2] = deaths_age["Influenza Deaths"] / 1000
age_deaths_df = age_deaths_df.loc[age_deaths_df["Age Group"] != "All Ages"]

In [None]:
f, ax = plt.subplots(figsize = (8,6))
ax = sns.barplot(data = age_deaths_df, x="Age Group", y = "Total Deaths (by thousands)", 
                 hue = "Disease Type", ci=None, palette="rocket")
ax.set_xticklabels(age_groups[1:12], rotation='vertical')
ax.set_title("Total Deaths by Age Group")
plt.legend(loc="upper center")
plt.savefig("Resources/Images/TotalDeathsByAge.png")
plt.show()

#This graph shows that deaths caused by these diseases occured mostly in people who were 45+ years old 

In [None]:
covid_age_deaths_df = age_deaths_df.loc[age_deaths_df["Disease Type"] == "COVID-19"]
covid_age_deaths_df["Age"] = range(1,12)
covid_age_deaths_df['Total Deaths (by thousands)'] = covid_age_deaths_df['Total Deaths (by thousands)'].astype('int64')

In [None]:
(slope, intercept, rvalue, pvalue, stderr) = linregress((covid_age_deaths_df["Age"]).astype(float), 
                                                        covid_age_deaths_df["Total Deaths (by thousands)"])
regress_values = covid_age_deaths_df["Age"] * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

plt.plot(covid_age_deaths_df["Age"],regress_values,"r-")
plt.annotate(line_eq,(200, 50),fontsize=15,color="red")

unique, index = np.unique(age_groups[1:12], return_index=True)
plt.scatter(covid_age_deaths_df["Age"], covid_age_deaths_df["Total Deaths (by thousands)"])
plt.title("Age vs COVID-19 Deaths")
plt.xlabel("Age")
plt.ylabel("Total Deaths (by thousands)")
plt.xticks(range(1, len(unique) + 1), unique[index.argsort()], rotation = 'vertical')
print(f"The r-value is: {rvalue**2}")
plt.savefig("Resources/Images/AgeVsDeaths.png")
plt.show()

#There is a strong correlation between age and number of deaths related to COVID-19, Pneumonia, and Influenza.