In [10]:
from abc import ABC, abstractmethod
from typing import Iterable, Mapping, Sequence

import pandas as pd

from nepal.datasets import Dataset, NYTimes, PopulationDensity, GovernmentResponse, Vaccinations

In [2]:
df = GovernmentResponse().load()

In [8]:
df_usa = df[(df["CountryCode"] == "USA") & (df["RegionCode"].notna())]

Unnamed: 0,CountryName,CountryCode,RegionName,RegionCode,Jurisdiction,Date,C1_combined_numeric,C1_combined,C2_combined_numeric,C2_combined,...,StringencyIndex,StringencyIndexForDisplay,StringencyLegacyIndex,StringencyLegacyIndexForDisplay,GovernmentResponseIndex,GovernmentResponseIndexForDisplay,ContainmentHealthIndex,ContainmentHealthIndexForDisplay,EconomicSupportIndex,EconomicSupportIndexForDisplay
243474,United States,USA,Alaska,US_AK,STATE_TOTAL,2020-01-01,0.0,0,0.0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
243475,United States,USA,Alaska,US_AK,STATE_TOTAL,2020-01-02,0.0,0,0.0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
243476,United States,USA,Alaska,US_AK,STATE_TOTAL,2020-01-03,0.0,0,0.0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
243477,United States,USA,Alaska,US_AK,STATE_TOTAL,2020-01-04,0.0,0,0.0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
243478,United States,USA,Alaska,US_AK,STATE_TOTAL,2020-01-05,0.0,0,0.0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
288043,United States,USA,Wyoming,US_WY,STATE_TOTAL,2022-05-19,,,,,...,,,,,,,,,,
288044,United States,USA,Wyoming,US_WY,STATE_TOTAL,2022-05-20,,,,,...,,,,,,,,,,
288045,United States,USA,Wyoming,US_WY,STATE_TOTAL,2022-05-21,,,,,...,,,,,,,,,,
288046,United States,USA,Wyoming,US_WY,STATE_TOTAL,2022-05-22,,,,,...,,,,,,,,,,


In [35]:
df_left = df_usa[
    [
        "RegionCode",
        "Date",
        "StringencyIndex",
        "GovernmentResponseIndex",
        "ContainmentHealthIndex",
        "EconomicSupportIndex",
    ]
].set_index(["RegionCode", "Date"])

In [21]:
df = Vaccinations().load()
df = df[df["FIPS"] != "UNK"]

{'AK',
 'AL',
 'AR',
 'AZ',
 'CA',
 'CO',
 'CT',
 'DC',
 'DE',
 'FL',
 'GA',
 'GU',
 'HI',
 'IA',
 'ID',
 'IL',
 'IN',
 'KS',
 'KY',
 'LA',
 'MA',
 'MD',
 'ME',
 'MI',
 'MN',
 'MO',
 'MS',
 'MT',
 'NC',
 'ND',
 'NE',
 'NH',
 'NJ',
 'NM',
 'NV',
 'NY',
 'OH',
 'OK',
 'OR',
 'PA',
 'PR',
 'RI',
 'SC',
 'SD',
 'TN',
 'TX',
 'UT',
 'VA',
 'VI',
 'VT',
 'WA',
 'WI',
 'WV',
 'WY'}

In [39]:
import numpy as np


def _add_derived_columns(data: pd.DataFrame) -> pd.DataFrame:
    data["RegionCode"] = ("US_" + data["Recip_State"]).astype("string")
    data["Under5_Pop_Pct"] = (data["Census2019"] - data["Census2019_5PlusPop"]) / data[
        "Census2019"
    ]
    data["Between5to17_Pop_Pct"] = data["Census2019_5to17Pop"] / data["Census2019"]
    data["Between18to65_Pop_Pct"] = (
        data["Census2019_18PlusPop"] - data["Census2019_65PlusPop"]
    ) / data["Census2019"]
    data["Plus65_Pop_Pct"] = data["Census2019_65PlusPop"] / data["Census2019"]
    data["Is_Metro"] = np.where(data["Metro_status"] == "Metro", 1, 0)
    data["SVI_A"] = np.where(data["SVI_CTGY"] == "A", 1, 0)
    data["SVI_B"] = np.where(data["SVI_CTGY"] == "B", 1, 0)
    data["SVI_C"] = np.where(data["SVI_CTGY"] == "C", 1, 0)
    data["SVI_D"] = np.where(data["SVI_CTGY"] == "D", 1, 0)
    return data

In [42]:
df_ex = _add_derived_columns(df)

df_right = (
    df_ex[
        [
            "Date",
            "RegionCode",
            "FIPS",
            "Completeness_pct",
            "Administered_Dose1_Pop_Pct",
            "Administered_Dose1_Recip_18PlusPop_Pct",
            "Administered_Dose1_Recip_65PlusPop_Pct",
            "Series_Complete_Pop_Pct",
            "Series_Complete_18PlusPop_Pct",
            "Series_Complete_65PlusPop_Pct",
            "Booster_Doses_Vax_Pct",
            "Booster_Doses_18Plus_Vax_Pct",
            "Booster_Doses_50Plus_Vax_Pct",
            "Booster_Doses_65Plus_Vax_Pct",
            "Under5_Pop_Pct",
            "Between5to17_Pop_Pct",
            "Between18to65_Pop_Pct",
            "Plus65_Pop_Pct",
            "Is_Metro",
            "SVI_A",
            "SVI_B",
            "SVI_C",
            "SVI_D",
        ]
    ]
    .set_index(["RegionCode", "Date"])
    .sort_index()
)

In [43]:
df_right

Unnamed: 0_level_0,Unnamed: 1_level_0,FIPS,Completeness_pct,Administered_Dose1_Pop_Pct,Administered_Dose1_Recip_18PlusPop_Pct,Administered_Dose1_Recip_65PlusPop_Pct,Series_Complete_Pop_Pct,Series_Complete_18PlusPop_Pct,Series_Complete_65PlusPop_Pct,Booster_Doses_Vax_Pct,Booster_Doses_18Plus_Vax_Pct,...,Booster_Doses_65Plus_Vax_Pct,Under5_Pop_Pct,Between5to17_Pop_Pct,Between18to65_Pop_Pct,Plus65_Pop_Pct,Is_Metro,SVI_A,SVI_B,SVI_C,SVI_D
RegionCode,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
US_AK,2020-12-13,02130,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,...,,,,,,0,0,0,1,0
US_AK,2020-12-13,02122,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,...,,,,,,0,0,1,0,0
US_AK,2020-12-13,02090,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,...,,,,,,1,0,1,0,0
US_AK,2020-12-13,02100,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,...,,,,,,0,1,0,0,0
US_AK,2020-12-13,02170,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,...,,,,,,1,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
US_WY,2022-05-21,56027,97.099998,34.099998,40.400002,67.099998,31.5,37.5,62.700001,40.799999,41.299999,...,57.700001,0.060696,0.114601,0.591256,0.233447,0,0,1,0,0
US_WY,2022-05-21,56021,97.099998,64.699997,76.800003,95.0,56.799999,67.400002,89.199997,38.200001,40.599998,...,63.599998,0.062744,0.167668,0.604864,0.164724,1,0,1,0,0
US_WY,2022-05-21,56017,97.099998,52.299999,62.400002,85.0,49.799999,59.400002,81.400002,54.599998,56.900002,...,71.900002,0.048946,0.156583,0.517788,0.276683,0,1,0,0,0
US_WY,2022-05-21,56037,97.099998,59.200001,73.0,95.0,50.299999,62.099998,93.199997,40.599998,43.400002,...,68.099998,0.066056,0.192972,0.611341,0.129632,0,0,1,0,0


In [47]:
df_joined = df_left.join(df_right, on=["RegionCode", "Date"])

In [62]:
df_joined[df_joined["FIPS"].notna()].reset_index().drop(columns="RegionCode").set_index(
    ["FIPS", "Date"]
).sort_index()

Unnamed: 0_level_0,Unnamed: 1_level_0,StringencyIndex,GovernmentResponseIndex,ContainmentHealthIndex,EconomicSupportIndex,Completeness_pct,Administered_Dose1_Pop_Pct,Administered_Dose1_Recip_18PlusPop_Pct,Administered_Dose1_Recip_65PlusPop_Pct,Series_Complete_Pop_Pct,Series_Complete_18PlusPop_Pct,...,Booster_Doses_65Plus_Vax_Pct,Under5_Pop_Pct,Between5to17_Pop_Pct,Between18to65_Pop_Pct,Plus65_Pop_Pct,Is_Metro,SVI_A,SVI_B,SVI_C,SVI_D
FIPS,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
01001,2020-12-13,43.52,47.40,48.81,37.5,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,1.0,0.0,1.0,0.0,0.0
01001,2020-12-14,43.52,47.40,48.81,37.5,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,1.0,0.0,1.0,0.0,0.0
01001,2020-12-15,43.52,48.65,50.24,37.5,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,1.0,0.0,1.0,0.0,0.0
01001,2020-12-16,43.52,48.65,50.24,37.5,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,1.0,0.0,1.0,0.0,0.0
01001,2020-12-17,43.52,48.65,50.24,37.5,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,1.0,0.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56045,2022-05-17,27.78,,35.71,,97.099998,41.299999,49.599998,76.199997,38.099998,45.900002,...,67.599998,0.050527,0.153746,0.582359,0.213368,0.0,0.0,1.0,0.0,0.0
56045,2022-05-18,,,,,97.099998,41.299999,49.599998,76.199997,38.099998,45.900002,...,67.599998,0.050527,0.153746,0.582359,0.213368,0.0,0.0,1.0,0.0,0.0
56045,2022-05-19,,,,,97.099998,41.299999,49.599998,76.199997,38.099998,45.900002,...,67.599998,0.050527,0.153746,0.582359,0.213368,0.0,0.0,1.0,0.0,0.0
56045,2022-05-20,,,,,97.099998,41.299999,49.599998,76.199997,38.099998,45.900002,...,67.599998,0.050527,0.153746,0.582359,0.213368,0.0,0.0,1.0,0.0,0.0
