In [14]:
# Dependencies
import numpy as np
import pandas as pd
import requests
from census import Census
import gmaps

# Census & gmaps API Keys
from config import (api_key, gkey)
c = Census(api_key, year=2019)

# Configure gmaps
gmaps.configure(api_key=gkey)

In [15]:
# https://api.census.gov/data/2019/acs/acs1?get=NAME,B01001_001E&for=county:*
# https://api.census.gov/data/2019/acs/acs1?get=NAME,B01001_001E&for=county:*&in=state:*
# Run Census Search to retrieve data on all states
# Note the addition of "B23025_005E" for unemployment count
census_data = c.acs5.get(("NAME", "B19013_001E", "B01003_001E", "B01002_001E",
                          "B19301_001E",
                          "B17001_002E",
                          "B23025_005E"), {'for': 'county:*', 'in': 'state:*'})

# Convert to DataFrame
census_pd = pd.DataFrame(census_data)

# Column Reordering
census_pd = census_pd.rename(columns={"B01003_001E": "Population",
                                      "B01002_001E": "Median Age",
                                      "B19013_001E": "Household Income",
                                      "B19301_001E": "Per Capita Income",
                                      "B17001_002E": "Poverty Count",
                                      "B23025_005E": "Unemployment Count",
                                      "NAME": "Name", "county": "County"})

# Add in Poverty Rate (Poverty Count / Population)
census_pd["Poverty Rate"] = 100 * \
    census_pd["Poverty Count"].astype(
        int) / census_pd["Population"].astype(int)

# Add in Employment Rate (Employment Count / Population)
census_pd["Unemployment Rate"] = 100 * \
    census_pd["Unemployment Count"].astype(
        int) / census_pd["Population"].astype(int)

# Final DataFrame
census_pd = census_pd[["County", "Name", "Population", "Median Age", "Household Income",
                       "Per Capita Income", "Poverty Count", "Poverty Rate", "Unemployment Rate"]]

census_pd.head()

Unnamed: 0,County,Name,Population,Median Age,Household Income,Per Capita Income,Poverty Count,Poverty Rate,Unemployment Rate
0,51,"Fayette County, Illinois",21565.0,41.9,46650.0,23194.0,3421.0,15.863668,2.4345
1,107,"Logan County, Illinois",29003.0,40.1,57308.0,27546.0,2323.0,8.009516,2.544564
2,165,"Saline County, Illinois",23994.0,42.2,44090.0,25342.0,4936.0,20.57181,3.40085
3,97,"Lake County, Illinois",701473.0,38.4,89427.0,45766.0,54273.0,7.737005,2.759479
4,127,"Massac County, Illinois",14219.0,43.5,47481.0,23539.0,2331.0,16.393558,1.821506


In [16]:
census_pd.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3220 entries, 0 to 3219
Data columns (total 9 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   County             3220 non-null   object 
 1   Name               3220 non-null   object 
 2   Population         3220 non-null   float64
 3   Median Age         3220 non-null   float64
 4   Household Income   3220 non-null   float64
 5   Per Capita Income  3220 non-null   float64
 6   Poverty Count      3220 non-null   float64
 7   Poverty Rate       3220 non-null   float64
 8   Unemployment Rate  3220 non-null   float64
dtypes: float64(7), object(2)
memory usage: 226.5+ KB


In [17]:
# Save as a csv
# Note to avoid any issues later, use encoding="utf-8"
census_pd.to_csv("census_us_county_output.csv", encoding="utf-8", index=False)