In [26]:
# Dependencies
import numpy as np
import pandas as pd
import requests
from census import Census
import gmaps

# Census & gmaps API Keys
from config import (api_key, gkey)
c = Census(api_key, year=2020)

# Configure gmaps
gmaps.configure(api_key=gkey)

In [82]:
# https://api.census.gov/data/2020/acs/acs1?get=NAME,B01001_001E&for=county:*
# https://api.census.gov/data/2020/acs/acs1?get=NAME,B01001_001E&for=county:*&in=state:*
# Run Census Search to retrieve data on all states
# Note the addition of "B23025_005E" for unemployment count
census_data = c.acs5.get(("NAME", "B01003_001E"), {'for': 'state:*'})

# Convert to DataFrame
census_pd = pd.DataFrame(census_data)

census_pd.head()

Unnamed: 0,NAME,B01003_001E,state
0,Pennsylvania,12794885.0,42
1,California,39346023.0,6
2,West Virginia,1807426.0,54
3,Utah,3151239.0,49
4,New York,19514849.0,36


In [84]:
# Remove column name 'state'
census_pd = census_pd.drop(['state'], axis=1)
census_pd.head()

Unnamed: 0,NAME,B01003_001E
0,Pennsylvania,12794885.0
1,California,39346023.0
2,West Virginia,1807426.0
3,Utah,3151239.0
4,New York,19514849.0


In [85]:
census_pd.dtypes

NAME            object
B01003_001E    float64
dtype: object

In [86]:
census_pd.columns

Index(['NAME', 'B01003_001E'], dtype='object')

In [87]:
# Rename the column names

census_pd.rename(columns = {'NAME':'State', 'B01003_001E':'Population'}, inplace = True)
census_pd.head()

Unnamed: 0,State,Population
0,Pennsylvania,12794885.0
1,California,39346023.0
2,West Virginia,1807426.0
3,Utah,3151239.0
4,New York,19514849.0


In [88]:
# To convert from float to integer

census_pd['Population'] = census_pd['Population'].astype(int)
census_pd.head()

Unnamed: 0,State,Population
0,Pennsylvania,12794885
1,California,39346023
2,West Virginia,1807426
3,Utah,3151239
4,New York,19514849


In [91]:
census_pd[['State']] = census_pd.State.str.split(",",expand=True,)
census_pd.head()

Unnamed: 0,State,Population
0,Pennsylvania,12794885
1,California,39346023
2,West Virginia,1807426
3,Utah,3151239
4,New York,19514849


In [92]:
# Importing mortality data file

csv_path = 'https://raw.githubusercontent.com/uzmabb182/Data602_Assignments/main/Final_Project_Data_602/resources/posgres_modality_df.csv'
modality_df = pd.read_csv(csv_path, encoding="utf-8")
modality_df.head(5)

Unnamed: 0,countyname,fips,learning_modality,state,abbreviation,year,student_count
0,Abbeville County,45001,Hybrid,South Carolina,SC,2022,11890
1,Abbeville County,45001,In Person,South Carolina,SC,2021,95710
2,Abbeville County,45001,In Person,South Carolina,SC,2022,180362
3,Acadia Parish,22001,In Person,Louisiana,LA,2021,77220
4,Acadia Parish,22001,In Person,Louisiana,LA,2022,154440


In [80]:
# Merging the two dataframes on state

#merge DataFrames
modality_df = pd.merge(modality_df, census_pd, left_on='countyname', right_on='County')
modality_df.head()

Unnamed: 0,countyname,fips,learning_modality,state,abbreviation,year,student_count,State,Population,County
0,Abbeville County,45001,Hybrid,South Carolina,SC,2022,11890,South Carolina,24582,Abbeville County
1,Abbeville County,45001,In Person,South Carolina,SC,2021,95710,South Carolina,24582,Abbeville County
2,Abbeville County,45001,In Person,South Carolina,SC,2022,180362,South Carolina,24582,Abbeville County
3,Acadia Parish,22001,In Person,Louisiana,LA,2021,77220,Louisiana,62371,Acadia Parish
4,Acadia Parish,22001,In Person,Louisiana,LA,2022,154440,Louisiana,62371,Acadia Parish


In [81]:
modality_df["percentage_student_count"] = modality_df["student_count"] / modality_df["Population"] 
modality_df.head()

Unnamed: 0,countyname,fips,learning_modality,state,abbreviation,year,student_count,State,Population,County,percentage_student_count
0,Abbeville County,45001,Hybrid,South Carolina,SC,2022,11890,South Carolina,24582,Abbeville County,0.483687
1,Abbeville County,45001,In Person,South Carolina,SC,2021,95710,South Carolina,24582,Abbeville County,3.893499
2,Abbeville County,45001,In Person,South Carolina,SC,2022,180362,South Carolina,24582,Abbeville County,7.337157
3,Acadia Parish,22001,In Person,Louisiana,LA,2021,77220,Louisiana,62371,Acadia Parish,1.238075
4,Acadia Parish,22001,In Person,Louisiana,LA,2022,154440,Louisiana,62371,Acadia Parish,2.476151


In [46]:
modality_df.dtypes

countyname                   object
fips                          int64
learning_modality            object
state                        object
abbreviation                 object
year                          int64
student_count                 int64
State                        object
Population                    int32
percentage_student_count    float64
dtype: object

In [47]:
# Save as a csv for Tableau dashboard
# Note to avoid any issues later, use encoding="utf-8"
modality_df.to_csv("resources/tableau_modality_df.csv", encoding="utf-8", index=False)

In [57]:
# Importing covid data file

csv_path = 'https://raw.githubusercontent.com/uzmabb182/Data602_Assignments/main/Final_Project_Data_602/resources/posgres_covid_df.csv'
covid_df = pd.read_csv(csv_path, encoding="utf-8")
covid_df.head(5)

Unnamed: 0,countyname,fips,state,year,cases_count,deaths_count
0,Autauga County,1001,Alabama,2021,2871893,41785
1,Autauga County,1001,Alabama,2022,5301518,67890
2,Baldwin County,1003,Alabama,2021,9751057,136369
3,Baldwin County,1003,Alabama,2022,18880887,218429
4,Barbour County,1005,Alabama,2021,983791,22337


In [51]:
df_grouped = covid_df.groupby(by="state")["cases_count"].sum()
df_grouped.head()

state
Alabama        665159605
Alaska         111768901
Arizona       1021174110
Arkansas       412626556
California    4665910952
Name: cases_count, dtype: int64

In [61]:
# Merging the two dataframes on state

#merge DataFrames
covid_df = pd.merge(df_grouped, census_pd, left_on='state', right_on='State')
covid_df.head()

Unnamed: 0,cases_count,State,Population
0,665159605,Alabama,4893186
1,111768901,Alaska,736990
2,1021174110,Arizona,7174064
3,412626556,Arkansas,3011873
4,4665910952,California,39346023


In [50]:
modality_df["percentage_cases_count"] = modality_df["cases_count"] / modality_df["Population"] 
modality_df.head()

KeyError: 'cases_count'