In [5]:
# Dependencies
import numpy as np
import pandas as pd
import requests
from census import Census
import gmaps

# Census & gmaps API Keys
from config import (api_key, gkey)
c = Census(api_key, year=2020)

# Configure gmaps
gmaps.configure(api_key=gkey)

In [6]:
# https://api.census.gov/data/2020/acs/acs1?get=NAME,B01001_001E&for=county:*
# https://api.census.gov/data/2020/acs/acs1?get=NAME,B01001_001E&for=county:*&in=state:*
# Run Census Search to retrieve data on all states
# Note the addition of "B23025_005E" for unemployment count
census_data = c.acs5.get(("NAME", "B19013_001E"), {'for': 'state:*'})

# Convert to DataFrame
census_pd = pd.DataFrame(census_data)

# Column Reordering
#census_pd = census_pd.rename(columns={"B01003_001E": "Population",  "NAME": "Name"})
                                     

# Final DataFrame
#census_pd = census_pd[["Name", "Population", "State"]]

census_pd.head()

Unnamed: 0,NAME,B19013_001E,state
0,Pennsylvania,63627.0,42
1,California,78672.0,6
2,West Virginia,48037.0,54
3,Utah,74197.0,49
4,New York,71117.0,36


In [7]:
# Remove column name 'state'
census_pd = census_pd.drop(['state'], axis=1)
census_pd.head()

Unnamed: 0,NAME,B19013_001E
0,Pennsylvania,63627.0
1,California,78672.0
2,West Virginia,48037.0
3,Utah,74197.0
4,New York,71117.0


In [8]:
census_pd.dtypes

NAME            object
B19013_001E    float64
dtype: object

In [9]:
census_pd.columns

Index(['NAME', 'B19013_001E'], dtype='object')

In [10]:
# Rename the column names

census_pd.rename(columns = {'NAME':'State', 'B19013_001E':'Population'}, inplace = True)
census_pd.head()

Unnamed: 0,State,Population
0,Pennsylvania,63627.0
1,California,78672.0
2,West Virginia,48037.0
3,Utah,74197.0
4,New York,71117.0


In [11]:
# To convert from float to integer

census_pd['Population'] = census_pd['Population'].astype(int)
census_pd.head()

Unnamed: 0,State,Population
0,Pennsylvania,63627
1,California,78672
2,West Virginia,48037
3,Utah,74197
4,New York,71117


In [17]:
# Importing mortality data file

csv_path = 'https://raw.githubusercontent.com/uzmabb182/Data602_Assignments/main/Final_Project_Data_602/resources/posgres_modality_df.csv'
modality_df = pd.read_csv(csv_path, encoding="utf-8")
modality_df.head(5)

Unnamed: 0,countyname,fips,learning_modality,state,abbreviation,year,student_count
0,Abbeville County,45001,Hybrid,South Carolina,SC,2022,11890
1,Abbeville County,45001,In Person,South Carolina,SC,2021,95710
2,Abbeville County,45001,In Person,South Carolina,SC,2022,180362
3,Acadia Parish,22001,In Person,Louisiana,LA,2021,77220
4,Acadia Parish,22001,In Person,Louisiana,LA,2022,154440


In [None]:
# Importing mortality data file

csv_path = 'https://raw.githubusercontent.com/uzmabb182/Data602_Assignments/main/Final_Project_Data_602/resources/posgres_covid_df.csv'
covid_df = pd.read_csv(csv_path, encoding="utf-8")
covid_df.head(5)

In [13]:
# Merging the two dataframes on state

#merge DataFrames
modality_df = pd.merge(modality_df, census_pd, left_on='state', right_on='State')
modality_df.head()

Unnamed: 0,countyname,fips,learning_modality,state,abbreviation,year,student_count,State,Population
0,Abbeville County,45001,Hybrid,South Carolina,SC,2022,11890,South Carolina,54864
1,Abbeville County,45001,In Person,South Carolina,SC,2021,95710,South Carolina,54864
2,Abbeville County,45001,In Person,South Carolina,SC,2022,180362,South Carolina,54864
3,Aiken County,45003,Hybrid,South Carolina,SC,2021,24184,South Carolina,54864
4,Aiken County,45003,Hybrid,South Carolina,SC,2022,5387,South Carolina,54864


In [14]:
modality_df["percentage_student_count"] = modality_df["student_count"] / modality_df["Population"] 
modality_df.head()

Unnamed: 0,countyname,fips,learning_modality,state,abbreviation,year,student_count,State,Population,percentage_student_count
0,Abbeville County,45001,Hybrid,South Carolina,SC,2022,11890,South Carolina,54864,0.216718
1,Abbeville County,45001,In Person,South Carolina,SC,2021,95710,South Carolina,54864,1.744495
2,Abbeville County,45001,In Person,South Carolina,SC,2022,180362,South Carolina,54864,3.287438
3,Aiken County,45003,Hybrid,South Carolina,SC,2021,24184,South Carolina,54864,0.440799
4,Aiken County,45003,Hybrid,South Carolina,SC,2022,5387,South Carolina,54864,0.098188


In [15]:
modality_df.dtypes

countyname                   object
fips                          int64
learning_modality            object
state                        object
abbreviation                 object
year                          int64
student_count                 int64
State                        object
Population                    int32
percentage_student_count    float64
dtype: object

In [16]:
# Save as a csv for Tableau dashboard
# Note to avoid any issues later, use encoding="utf-8"
#modality_df.to_csv("tableau_modality_df.csv", encoding="utf-8", index=False)