## Flu and Variant-Specific Booster Uptake

In this notebook, we the avaialability of flu clinics and compare it COVID variant-specific booster uptake. 

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
## HAWAII EXCLUDED DUE TO LACK OF BOOSTER DATA

state_list = [
    "AL",
    "AK",
    "AZ",
    "AR",
    "CA",
    "CO",
    "CT",
    "DE",
    "FL",
    "GA",
    # "HI",
    "ID",
    "IL",
    "IN",
    "IA",
    "KS",
    "KY",
    "LA",
    "ME",
    "MD",
    "MA",
    "MI",
    "MN",
    "MS",
    "MO",
    "MT",
    "NE",
    "NV",
    "NH",
    "NJ",
    "NM",
    "NY",
    "NC",
    "ND",
    "OH",
    "OK",
    "OR",
    "PA",
    "RI",
    "SC",
    "SD",
    "TN",
    "TX",
    "UT",
    "VT",
    "VA",
    "WA",
    "WV",
    "WI",
    "WY",
]

#### Cleaning and filtering COVID vaccine uptake dataset

In [None]:
## Import CDC COVID vaccine data

col_list_covid = ["Date", "Recip_State", "Booster_Doses", "Census2019", "FIPS"]
df_covid_raw = pd.read_csv(
    "../../data/cdc/COVID-19_Vaccinations_in_the_United_States_County.csv",
    usecols=col_list_covid,
)

In [None]:
df_covid_raw["FIPS"] = pd.to_numeric(df_covid_raw["FIPS"], errors="coerce")

In [None]:
## Rename columns

col_names_covid = {
    "Recip_State": "State",
    "Booster_Doses": "Booster",
    "Census2019": "Population",
    "FIPS": "FIPS",
}

df_covid_raw.rename(columns=col_names_covid, inplace=True)

In [None]:
## Filter to most recent data

df_covid_date = df_covid_raw.loc[df_covid_raw.Date == "09/28/2022"]

In [None]:
df_covid_date

In [None]:
## Group by states and sum raw booster and population values

df_covid_agg = df_covid_date.groupby("FIPS", as_index=False).agg(
    {"State": "first", "Booster": "sum", "Population": "sum"}
)

In [None]:
## Calculate proportion of population with booster shot
### May want to consider using eligible population rather than total population

df_covid_agg["Boosted_Pct"] = df_covid_agg.Booster.div(
    df_covid_agg.Population
).multiply(100)

In [None]:
df_covid_graph = df_covid_date.groupby("State", as_index=False).agg(
    {"Booster": "sum", "Population": "sum"}
)

df_covid_graph["Boosted_Pct"] = df_covid_graph.Booster.div(
    df_covid_graph.Population
).multiply(100)

In [None]:
df_covid_graph_final = df_covid_graph.loc[df_covid_graph.State.isin(state_list)]

In [None]:
import matplotlib.ticker as mtick

fig, ax = plt.subplots(figsize=(10, 10), dpi=100)

sns.barplot(
    data=df_covid_graph_final,
    x="Boosted_Pct",
    y="State",
    order=df_covid_graph_final.sort_values("Boosted_Pct", ascending=False).State,
    color="Teal",
)

ax.set(ylabel="State", xlabel="Percentage of population with booster")
ax.xaxis.set_major_formatter(mtick.PercentFormatter())
for container in ax.containers:
    ax.bar_label(container, fmt="%.f%%")

#### Cleaning and filtering flu locations dataset

In [None]:
## Import Vaccine.gov flu shot location data

col_list = [
    "provider_location_guid",
    "loc_admin_state",
    "loc_admin_zip",
]
df_flu_raw = pd.read_csv(
    "../../data/cdc/Vaccines.gov__Flu_vaccinating_provider_locations.csv",
    usecols=col_list,
)

In [None]:
## Rename columns

col_names_flu = {
    "provider_location_guid": "ID",
    "loc_admin_state": "State",
    "loc_admin_zip": "Zip",
}

df_flu_raw.rename(columns=col_names_flu, inplace=True)

In [None]:
df_flu_raw["Zip"] = df_flu_raw["Zip"].str.split("-").str[0]

In [None]:
df_flu_raw["Zip"] = pd.to_numeric(df_flu_raw["Zip"], errors="coerce")

In [None]:
## Drop duplicate values

df_flu_raw = df_flu_raw.drop_duplicates(subset=["ID"])

In [None]:
df_flu_raw

#### Add crosswalk file

In [None]:
col_list = [
    "zip",
    "county",
]
df_cross = pd.read_csv(
    "../../data/census/ZIP_COUNTY_122021.csv",
    usecols=col_list,
)

In [None]:
## Rename columns

col_names_cross = {
    "zip": "Zip",
    "county": "FIPS",
}

df_cross.rename(columns=col_names_cross, inplace=True)

In [None]:
df_cross.head(1)

In [None]:
crosswalk = {k: g["FIPS"].tolist() for k, g in df_cross.groupby("Zip")}

In [None]:
df_flu_raw.replace({"Zip": crosswalk}, inplace=True)

In [None]:
df_flu_final = df_flu_raw.copy()

col_names_flu_final = {
    "Zip": "FIPS",
}

df_flu_final.rename(columns=col_names_flu_final, inplace=True)

In [None]:
df_flu_final

In [None]:
## Count number of flu shot locations

df_flu_agg = df_flu_final.groupby("FIPS", as_index=False).agg(
    {"State": "first", "ID": "count"}
)

In [None]:
df_flu_agg

#### Analyze flu and booster interaction

In [None]:
## Combine COVID vaccine dataset and flu shot locations dataset

df_final = pd.concat([df_covid_agg, df_flu_agg.ID], axis=1)

In [None]:
df_final

In [None]:
fig, ax = plt.subplots(figsize=(10, 5), dpi=100)

sns.regplot(data=df_final, x="ID", y="Boosted_Pct", y_partial=None)

ax.set(xlabel="Number of flu clinics", ylabel="Percentage of state with booster")
ax.set(title="Relationship between number of flu clinics and booster uptake")

#### Analyze potential confounding variables

In [None]:
## Import political election results

col_list_voting = [
    "year",
    "state_po",
    "candidatevotes",
    "totalvotes",
    "party_simplified",
]
df_voting_raw = pd.read_csv(
    "../../data/politics/1976-2020-president.csv",
    usecols=col_list_voting,
)

In [None]:
## Rename columns

col_names_voting = {
    "year": "Year",
    "state_po": "State",
    "candidatevotes": "Candidate Votes",
    "totalvotes": "Total Votes",
    "party_simplified": "Party",
}

df_voting_raw.rename(columns=col_names_voting, inplace=True)

In [None]:
df_voting_date = df_voting_raw.loc[df_voting_raw.Year == 2020]

In [None]:
party_list = ["DEMOCRAT"]

In [None]:
df_voting_party = df_voting_date.loc[df_voting_date.Party.isin(party_list)]

In [None]:
df_voting_state = df_voting_party.loc[
    df_voting_party.State.isin(state_list)
].reset_index()

In [None]:
df_voting_state["Vote_Pct"] = (
    df_voting_state["Candidate Votes"].div(df_voting_state["Total Votes"]).multiply(100)
)

In [None]:
df_final_one = pd.DataFrame.merge(df_final, df_voting_state, on="State")

In [None]:
df_final_one.head()

In [None]:
fig, ax = plt.subplots(figsize=(10, 5), dpi=100)

sns.regplot(
    data=df_final_one, x="ID", y="Boosted_Pct", y_partial="Vote_Pct", color="Teal"
)

ax.set(xlabel="Number of flu clinics", ylabel="Percentage of county with booster")

In [None]:
import statsmodels.formula.api as sm

result = sm.ols(formula="Boosted_Pct ~ ID + Vote_Pct", data=df_final_one).fit()

print(result.summary())