In [201]:
# Dependencies
import numpy as np
import pandas as pd
import requests
from census import Census
import gmaps

# Census & gmaps API Keys
from config import (api_key, gkey)
c = Census(api_key, year=2020)

# Configure gmaps
gmaps.configure(api_key=gkey)

In [203]:
# https://api.census.gov/data/2020/acs/acs1?get=NAME,B01001_001E&for=county:*
# https://api.census.gov/data/2020/acs/acs1?get=NAME,B01001_001E&for=county:*&in=state:*
# Run Census Search to retrieve data on all states
# Note the addition of "B23025_005E" for unemployment count
census_data = c.acs5.get(("NAME", "B01003_001E"), {'for': 'state:*'})

# Convert to DataFrame
census_pd = pd.DataFrame(census_data)

census_pd.head()

Unnamed: 0,NAME,B01003_001E,state
0,Pennsylvania,12794885.0,42
1,California,39346023.0,6
2,West Virginia,1807426.0,54
3,Utah,3151239.0,49
4,New York,19514849.0,36


In [204]:
# Remove column name 'state'
census_pd = census_pd.drop(['state'], axis=1)
census_pd.head()

Unnamed: 0,NAME,B01003_001E
0,Pennsylvania,12794885.0
1,California,39346023.0
2,West Virginia,1807426.0
3,Utah,3151239.0
4,New York,19514849.0


In [205]:
census_pd.dtypes

NAME            object
B01003_001E    float64
dtype: object

In [206]:
census_pd.columns

Index(['NAME', 'B01003_001E'], dtype='object')

In [207]:
# Rename the column names

census_pd.rename(columns = {'NAME':'State', 'B01003_001E':'Population'}, inplace = True)
census_pd.head()

Unnamed: 0,State,Population
0,Pennsylvania,12794885.0
1,California,39346023.0
2,West Virginia,1807426.0
3,Utah,3151239.0
4,New York,19514849.0


In [208]:
# To convert from float to integer

census_pd['Population'] = census_pd['Population'].astype(int)
census_pd.head()

Unnamed: 0,State,Population
0,Pennsylvania,12794885
1,California,39346023
2,West Virginia,1807426
3,Utah,3151239
4,New York,19514849


In [210]:
# Importing mortality data file

csv_path = 'https://raw.githubusercontent.com/uzmabb182/Data602_Assignments/main/Final_Project_Data_602/resources/posgres_states_df.csv'
states_df = pd.read_csv(csv_path, encoding="utf-8")
states_df.head(5)

Unnamed: 0,State,Abbreviation
0,Alabama,AL
1,Alaska,AK
2,Arizona,AZ
3,Arkansas,AR
4,California,CA


In [214]:
# When column names are different
df=pd.merge(census_pd,states_df, left_on='State', right_on='State', how='left')
df.head()

Unnamed: 0,State,Population,Abbreviation
0,Pennsylvania,12794885,PA
1,California,39346023,CA
2,West Virginia,1807426,WV
3,Utah,3151239,UT
4,New York,19514849,NY


In [211]:
# Importing mortality data file

csv_path = 'https://raw.githubusercontent.com/uzmabb182/Data602_Assignments/main/Final_Project_Data_602/resources/posgres_modality_df.csv'
modality_df = pd.read_csv(csv_path, encoding="utf-8")
modality_df

Unnamed: 0,learning_modality,year,state,student_count
0,Hybrid,2021,AK,63105
1,Hybrid,2021,AL,374899
2,Hybrid,2021,AR,188044
3,Hybrid,2021,AZ,158363
4,Hybrid,2021,BI,19912
...,...,...,...,...
275,Remote,2022,SD,4270
276,Remote,2022,TX,69317
277,Remote,2022,UT,42084
278,Remote,2022,WA,2320


In [216]:
# When column names are different
population_df=pd.merge(modality_df,df, left_on='state', right_on='Abbreviation', how='left')
population_df

Unnamed: 0,learning_modality,year,state,student_count,State,Population,Abbreviation
0,Hybrid,2021,AK,63105,Alaska,736990.0,AK
1,Hybrid,2021,AL,374899,Alabama,4893186.0,AL
2,Hybrid,2021,AR,188044,Arkansas,3011873.0,AR
3,Hybrid,2021,AZ,158363,Arizona,7174064.0,AZ
4,Hybrid,2021,BI,19912,,,
...,...,...,...,...,...,...,...
275,Remote,2022,SD,4270,South Dakota,879336.0,SD
276,Remote,2022,TX,69317,Texas,28635442.0,TX
277,Remote,2022,UT,42084,Utah,3151239.0,UT
278,Remote,2022,WA,2320,Washington,7512465.0,WA


In [193]:
modality_df["student_count_per_population"] = modality_df["student_count"] / 1000
modality_df

KeyError: 'Population'

In [144]:
modality_df.dtypes

State                        object
Abbreviation                 object
learning_modality            object
year                          int64
student_count                 int64
Population                    int32
percentage_student_count    float64
student_count_per_capita    float64
dtype: object

In [119]:
# Save as a csv for Tableau dashboard
# Note to avoid any issues later, use encoding="utf-8"
modality_df.to_csv("resources/tableau_modality_df.csv", encoding="utf-8", index=False)

In [120]:
# Importing covid data file

csv_path = 'https://raw.githubusercontent.com/uzmabb182/Data602_Assignments/main/Final_Project_Data_602/resources/posgres_covid_df.csv'
covid_df = pd.read_csv(csv_path, encoding="utf-8")
covid_df.head(5)

Unnamed: 0,state,fips,cases,deaths,year
0,Alabama,1,365747,4872,2021
1,Alaska,2,46740,198,2021
2,Arizona,4,530267,9015,2021
3,Arkansas,5,229442,3711,2021
4,California,6,2345811,26236,2021


In [121]:
df_grouped = covid_df.groupby(by="state")["cases_count"].sum()
df_grouped.head()

KeyError: 'Column not found: cases_count'

In [None]:
# Merging the two dataframes on state

#merge DataFrames
covid_df = pd.merge(df_grouped, census_pd, left_on='state', right_on='State')
covid_df.head()

In [None]:
modality_df["percentage_cases_count"] = modality_df["cases_count"] / modality_df["Population"] 
modality_df.head()