In [125]:
import pandas as pd
import requests

# zip to city
zip_map = pd.read_csv('uszips.csv')
zip_map = zip_map[['zip','city']]
zip_map['zip'] = zip_map['zip'].astype(str).str.zfill(5)



Columns (17) have mixed types. Specify dtype option on import or set low_memory=False.



In [126]:
# rollout data
rollout_df = pd.read_csv('vaccination_by_zip_2021.csv')
rollout_df = rollout_df[rollout_df['county']=='Los Angeles'].rename(columns={'zip_code_tabulation_area': 'zip'})
rollout_df['zip'] = rollout_df['zip'].astype(str).str.zfill(5)

# convert rollout proportion columns to percentage scale
cols_to_convert = [
    'percent_of_population_fully_vaccinated',
    'percent_of_population_partially_vaccinated',
    'percent_of_population_with_1_plus_dose'
]

for col in cols_to_convert:
    rollout_df[col] = rollout_df[col] * 100

In [127]:
# ACS 2020 median income
url = "https://api.census.gov/data/2020/acs/acs5"
params = {
    "get": "NAME,B19013_001E",          # median-household-income field, poverty rate
    "for": "zip code tabulation area:*",# every ZCTA
}

resp = requests.get(url, params=params)
raw = resp.json()
income_df = pd.DataFrame(raw[1:], columns=raw[0])

income_df.rename(columns={
    "B19013_001E": "median_income",
    "zip code tabulation area": "zip"
}, inplace=True)


income_df["median_income"] = pd.to_numeric(income_df["median_income"],
                                            errors="coerce")
income_df['zip'] = income_df['zip'].astype(str).str.zfill(5)

# ACS 2020 poverty rate
url_poverty = "https://api.census.gov/data/2020/acs/acs5/subject"
params_poverty = {
    "get": "NAME,S1701_C03_001E",
    "for": "zip code tabulation area:*"
}

resp_poverty = requests.get(url_poverty, params=params_poverty)
data_poverty = resp_poverty.json()

poverty_df = pd.DataFrame(data_poverty[1:], columns=data_poverty[0])
poverty_df.rename(columns={
    "S1701_C03_001E": "poverty_rate",
    "zip code tabulation area": "zip"
}, inplace=True)
poverty_df["zip"] = poverty_df["zip"].astype(str).str.zfill(5)
poverty_df["poverty_rate"] = pd.to_numeric(poverty_df["poverty_rate"], errors="coerce")

# merged
econ_df = pd.merge(income_df, poverty_df[['zip', 'poverty_rate']], on='zip', how='left')
econ_df.head()

Unnamed: 0,NAME,median_income,zip,poverty_rate
0,ZCTA5 29590,30985,29590,37.1
1,ZCTA5 93306,54450,93306,21.5
2,ZCTA5 93660,39625,93660,25.9
3,ZCTA5 93110,93264,93110,9.1
4,ZCTA5 93212,42983,93212,28.9


In [128]:
# keep data only by march 2021
merged_df = rollout_df.merge(econ_df, on='zip', how='left')
merged_2103_df = merged_df[merged_df['as_of_date'] == '2021-03-30']

In [129]:
city_2103_df = merged_2103_df.merge(zip_map, on='zip', how='left')\
    [['zip','city','median_income','poverty_rate','percent_of_population_fully_vaccinated',
       'percent_of_population_partially_vaccinated',
       'percent_of_population_with_1_plus_dose']]
city_2103_df = city_2103_df.dropna()
city_2103_df = city_2103_df[city_2103_df['median_income'] > 0]

In [130]:
city_2103_df.head()

Unnamed: 0,zip,city,median_income,poverty_rate,percent_of_population_fully_vaccinated,percent_of_population_partially_vaccinated,percent_of_population_with_1_plus_dose
0,91773,San Dimas,89669,8.8,27.6266,16.0076,43.6342
1,91204,Glendale,55847,19.4,17.1616,11.8067,28.9683
2,91303,Canoga Park,62848,18.0,13.8763,12.6083,26.4846
3,91384,Castaic,111990,4.8,15.482,11.4619,26.9439
4,91702,Azusa,68058,13.3,17.0288,14.8524,31.8812


In [131]:
city_2103_df['city'].nunique()

125

In [132]:
# Group by 'city' and average all numeric columns
city_grouped_df = (
    city_2103_df
    .groupby('city', as_index=False)
    .mean(numeric_only=True)
)

city_grouped_df.head()


Unnamed: 0,city,median_income,poverty_rate,percent_of_population_fully_vaccinated,percent_of_population_partially_vaccinated,percent_of_population_with_1_plus_dose
0,Acton,101750.0,11.5,15.9858,11.0308,27.0166
1,Agoura Hills,130315.0,3.7,28.7131,17.5654,46.2785
2,Alhambra,66486.5,12.95,27.07145,18.3807,45.45215
3,Altadena,104577.0,7.3,28.9673,16.2284,45.1957
4,Arcadia,94922.0,8.55,28.58715,18.064,46.65115


In [133]:
import plotly.express as px

income_fig = px.scatter(
    city_grouped_df,
    x="median_income",
    y="percent_of_population_fully_vaccinated",
    # y="percent_of_population_with_1_plus_dose",
    hover_name="city",
    hover_data={
        "median_income": ":,.0f",
        "percent_of_population_fully_vaccinated": ":.2f"
        # "percent_of_population_with_1_plus_dose": ":.2f"
    },
    labels={
        "median_income": "Median Household Income ($)",
        "percent_of_population_fully_vaccinated": "Fully Vaccinated (%)"
        # "percent_of_population_with_1_plus_dose": "More Than 1 Dose (%)"
    },
    title="Income vs. Vaccination Rate by City (March 2021)"
)

income_fig.update_layout(height=600, width=800)
income_fig.show()


In [134]:
poverty_fig = px.scatter(
    city_grouped_df,
    x="poverty_rate",
    y="percent_of_population_fully_vaccinated",
    # y="percent_of_population_with_1_plus_dose",
    hover_name="city",
    hover_data={
        "poverty_rate": ":.2f",
        "percent_of_population_fully_vaccinated": ":.2f"
        # "percent_of_population_with_1_plus_dose": ":.2f"
    },
    labels={
        "poverty_rate": "Poverty Rate (%)",
        "percent_of_population_fully_vaccinated": "Fully Vaccinated (%)"
        # "percent_of_population_with_1_plus_dose": "More Than 1 Dose (%)"
    },
    title="Poverty Rate vs. Vaccination Rate by City (March 2021)"
)

poverty_fig.update_layout(height=600, width=800)
poverty_fig.show()


In [135]:
income_fig.write_html("income_vs_vaccination.html", include_plotlyjs="cdn")
poverty_fig.write_html("poverty_vs_vaccination.html", include_plotlyjs="cdn")