In [1]:
import requests

In [2]:
import geopandas as gpd
import pandas as pd

## Get State Data

In [3]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_df = gpd.read_file(file_path)

In [4]:
states_df = states_df[["STUSPS", "NAME", "geometry"]]

## Get COVID Data

In [5]:
url = "https://jhucoronavirus.azureedge.net/jhucoronavirus/state_vaccination_rates.dev.json"
r = requests.get(url)

In [6]:
list_of_vaccinations = r.json()

In [7]:
formatted_list = []
for vaccine_dict in list_of_vaccinations:
    vaccine_data = vaccine_dict["data"].copy()
    vaccine_data.update({"NAME": vaccine_dict["state"]})
    formatted_list.append(vaccine_data)

In [8]:
vaccine_df = pd.DataFrame(formatted_list)

## Get Population Data

In [9]:
state_populations = pd.read_excel(
    "data/NST-EST2023-POP.xlsx", sheet_name=None, engine="openpyxl"
)

In [10]:
state_populations_df = state_populations["NST-EST2023-POP"][
    [
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)",
        "Unnamed: 5",
    ]
]
state_populations_df = state_populations_df.rename(
    columns={
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)": "NAME",
        "Unnamed: 5": "POPULATION",
    }
)
state_populations_df["NAME"] = state_populations_df["NAME"].str[1:]

## Merge Data

In [16]:
states_with_population_df = states_df.merge(state_populations_df, on="NAME", how="left")

In [18]:
states_with_population_df = states_with_population_df.merge(
    vaccine_df, on="NAME", how="left"
)
states_with_population_df = states_with_population_df[
    [
        "doses_admin",
        "raw_full_vac",
        "percent_full_vac",
        "per100k_full_vac",
        "NAME",
        "POPULATION",
        "geometry",
    ]
]

In [19]:
states_with_population_df.iloc[37]["POPULATION"] = 43914
states_with_population_df.iloc[44]["POPULATION"] = 172952

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  states_with_population_df.iloc[37]['POPULATION'] = 43914
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  states_with_population_df.iloc[44]['POPULATION'] = 172952


In [20]:
states_with_population_df = states_with_population_df.dropna()

In [21]:
states_with_population_df["per_1000"] = states_with_population_df["raw_full_vac"] / (
    states_with_population_df["POPULATION"] / 1000
)
states_with_population_df["per_10k"] = states_with_population_df["raw_full_vac"] / (
    states_with_population_df["POPULATION"] / 10_000
)
states_with_population_df["per_100k"] = states_with_population_df["raw_full_vac"] / (
    states_with_population_df["POPULATION"] / 100000
)
states_with_population_df["per_500k"] = states_with_population_df["raw_full_vac"] / (
    states_with_population_df["POPULATION"] / 500_000
)
states_with_population_df["per_1m"] = states_with_population_df["raw_full_vac"] / (
    states_with_population_df["POPULATION"] / 1_000_000
)
states_with_population_df["per_capita"] = (
    states_with_population_df["raw_full_vac"] / states_with_population_df["POPULATION"]
)

In [22]:
states_with_population_df = states_with_population_df.to_crs(9311)

In [23]:
states_with_population_df.to_file("data/covid_vacines.gpkg")