# Make api call to retrieve education level of counties

In [1]:
from us import states

import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import requests

CENSUS_API_KEY = os.getenv('CENSUS_API_KEY')

from IPython.core.display import display

plt.style.use('dark_background')
plt.rcParams.update({"grid.linewidth":0.5, "grid.alpha":0.5})

ESTIMATE_POP = 'B01003_001E'
ESTIMATE_TOT = 'B06009_001E'
ESTIMATE_TOT_LESS_HS = 'B06009_002E'
ESTIMATE_TOT_HS = 'B06009_003E'
ESTIMATE_TOT_COL_OR_ASSOC = 'B06009_004E'
ESTIMATE_TOT_BACH = 'B06009_005E'
ESTIMATE_TOT_GRAD = 'B06009_006E'

#Car van truck, used in commuting
ESTIMATE_AGG_VEHICLE = 'B08015_001E'
ESTIMATE_TOT_WALK = 'B08126_068E'

# Retrieve education and vehicle data from acs1 2019

In [2]:
# getting data for 2019 county datasets

HOST = 'https://api.census.gov/data'
year = '2019'
dataset = 'acs/acs1'
base_url = '/'.join([HOST, year, dataset])

predicates = {}
get_vars = ['NAME', ESTIMATE_POP, ESTIMATE_TOT, ESTIMATE_TOT_LESS_HS, ESTIMATE_TOT_HS,
            ESTIMATE_TOT_COL_OR_ASSOC, ESTIMATE_TOT_BACH, ESTIMATE_TOT_GRAD,
            ESTIMATE_AGG_VEHICLE, ESTIMATE_TOT_WALK]
predicates['get'] = ','.join(get_vars)
predicates['for'] = ['county:*','state:*']

r = requests.get(base_url, params=predicates)
json = r.text

* Estimate total refers to sum of the education attainment totals of less then highschool, highschool, college or associate, bachelors, and graduate


In [3]:
json
# df = pd.DataFrame(columns=)
col_names = (r.json()[0])
df = pd.DataFrame(columns=col_names, data=r.json()[1:])
# educational attainment for all us by county

In [4]:
df.rename(columns={'B01003_001E':'POP', 'B06009_001E': 'EDU_TOT', 'B06009_002E':'LESS_HS_TOT', 'B06009_003E':'HS_TOT',
                   'B06009_004E':'COL_OR_ASSOC_TOT', 'B06009_005E':'BACH_TOT', 'B06009_006E':'GRAD_TOT',
                   'B08015_001E':'VEHICLE_TOT', 'B08126_068E':'WALK_TOT' }, inplace=True)

In [5]:
# reformat columns
df = df[['NAME', 'state', 'county', 'POP', 'EDU_TOT', 'LESS_HS_TOT', 'HS_TOT', 'COL_OR_ASSOC_TOT',
       'BACH_TOT', 'GRAD_TOT', 'VEHICLE_TOT', 'WALK_TOT']]

In [6]:
# save the data to a new csv file. keep commented to prevent from always saving
display(df)
data_path = r'../../data/processed/acs1_edu_vehicle2019.csv'
df.to_csv(data_path)


Unnamed: 0,NAME,state,county,POP,EDU_TOT,LESS_HS_TOT,HS_TOT,COL_OR_ASSOC_TOT,BACH_TOT,GRAD_TOT,VEHICLE_TOT,WALK_TOT
0,"Baldwin County, Alabama",01,003,223234,159717,15158,41884,51204,32781,18690,85170,
1,"Calhoun County, Alabama",01,015,113605,79084,12195,27277,24355,9082,6175,41330,
2,"Cullman County, Alabama",01,043,83768,58795,11014,17924,20616,5935,3306,31840,
3,"DeKalb County, Alabama",01,049,71513,47007,10043,17881,13084,3693,2306,28065,
4,"Elmore County, Alabama",01,051,81209,57553,5940,19992,17311,9418,4892,33005,
...,...,...,...,...,...,...,...,...,...,...,...,...
835,"Ponce Municipio, Puerto Rico",72,113,131881,,,,,,,31635,
836,"San Juan Municipio, Puerto Rico",72,127,318441,,,,,,,92055,171
837,"Toa Alta Municipio, Puerto Rico",72,135,72025,,,,,,,24230,
838,"Toa Baja Municipio, Puerto Rico",72,137,74271,,,,,,,22900,


# repeat steps for year 2018


In [7]:
year = '2018'
base_url = '/'.join([HOST, year, dataset])

predicates = {}
get_vars = ['NAME', ESTIMATE_POP, ESTIMATE_TOT, ESTIMATE_TOT_LESS_HS, ESTIMATE_TOT_HS,
            ESTIMATE_TOT_COL_OR_ASSOC, ESTIMATE_TOT_BACH, ESTIMATE_TOT_GRAD,
            ESTIMATE_AGG_VEHICLE, ESTIMATE_TOT_WALK]
predicates['get'] = ','.join(get_vars)
predicates['for'] = ['county:*','state:*']

r = requests.get(base_url, params=predicates)
json = r.text

In [8]:
json
# df = pd.DataFrame(columns=)
col_names = (r.json()[0])
df = pd.DataFrame(columns=col_names, data=r.json()[1:])
# educational attainment for all us by county

In [9]:
df.rename(columns={'B01003_001E':'POP', 'B06009_001E': 'EDU_TOT', 'B06009_002E':'LESS_HS_TOT', 'B06009_003E':'HS_TOT',
                   'B06009_004E':'COL_OR_ASSOC_TOT', 'B06009_005E':'BACH_TOT', 'B06009_006E':'GRAD_TOT',
                   'B08015_001E':'VEHICLE_TOT', 'B08126_068E':'WALK_TOT' }, inplace=True)

In [10]:
# reformat columns
df = df[['NAME', 'state', 'county', 'POP', 'EDU_TOT', 'LESS_HS_TOT', 'HS_TOT', 'COL_OR_ASSOC_TOT',
       'BACH_TOT', 'GRAD_TOT', 'VEHICLE_TOT', 'WALK_TOT']]


In [11]:
# save the data to a new csv file. keep commented to prevent from always saving
display(df)
data_path = r'../../data/processed/acs1_edu_vehicle2018.csv'
df.to_csv(data_path)

Unnamed: 0,NAME,state,county,POP,EDU_TOT,LESS_HS_TOT,HS_TOT,COL_OR_ASSOC_TOT,BACH_TOT,GRAD_TOT,VEHICLE_TOT,WALK_TOT
0,"Baldwin County, Alabama",01,003,218022,155691,15282,44013,47573,32210,16613,87745,
1,"Calhoun County, Alabama",01,015,114277,79172,12121,25761,27039,7914,6337,42760,
2,"Cullman County, Alabama",01,043,83442,58319,10228,18149,22883,4916,2143,33065,
3,"DeKalb County, Alabama",01,049,71385,48169,11681,14333,14995,3931,3229,26150,
4,"Elmore County, Alabama",01,051,81887,56793,7490,18457,16505,10146,4195,31600,
...,...,...,...,...,...,...,...,...,...,...,...,...
833,"Ponce Municipio, Puerto Rico",72,113,133191,,,,,,,32475,
834,"San Juan Municipio, Puerto Rico",72,127,320967,,,,,,,94245,0
835,"Toa Alta Municipio, Puerto Rico",72,135,71094,,,,,,,24755,
836,"Toa Baja Municipio, Puerto Rico",72,137,74623,,,,,,,23700,


# repeat steps for year 2017


In [12]:
year = '2017'
base_url = '/'.join([HOST, year, dataset])

predicates = {}
get_vars = ['NAME', ESTIMATE_POP, ESTIMATE_TOT, ESTIMATE_TOT_LESS_HS, ESTIMATE_TOT_HS,
            ESTIMATE_TOT_COL_OR_ASSOC, ESTIMATE_TOT_BACH, ESTIMATE_TOT_GRAD,
            ESTIMATE_AGG_VEHICLE, ESTIMATE_TOT_WALK]
predicates['get'] = ','.join(get_vars)
predicates['for'] = ['county:*','state:*']

r = requests.get(base_url, params=predicates)
json = r.text

In [13]:
json
# df = pd.DataFrame(columns=)
col_names = (r.json()[0])
df = pd.DataFrame(columns=col_names, data=r.json()[1:])
# educational attainment for all us by county

In [14]:
df.rename(columns={'B01003_001E':'POP', 'B06009_001E': 'EDU_TOT', 'B06009_002E':'LESS_HS_TOT', 'B06009_003E':'HS_TOT',
                   'B06009_004E':'COL_OR_ASSOC_TOT', 'B06009_005E':'BACH_TOT', 'B06009_006E':'GRAD_TOT',
                   'B08015_001E':'VEHICLE_TOT', 'B08126_068E':'WALK_TOT' }, inplace=True)

In [15]:
# reformat columns
df = df[['NAME', 'state', 'county', 'POP', 'EDU_TOT', 'LESS_HS_TOT', 'HS_TOT', 'COL_OR_ASSOC_TOT',
       'BACH_TOT', 'GRAD_TOT', 'VEHICLE_TOT', 'WALK_TOT']]


In [16]:
# save the data to a new csv file. keep commented to prevent from always saving
display(df)
data_path = r'../../data/processed/acs1_edu_vehicle2017.csv'
df.to_csv(data_path)



Unnamed: 0,NAME,state,county,POP,EDU_TOT,LESS_HS_TOT,HS_TOT,COL_OR_ASSOC_TOT,BACH_TOT,GRAD_TOT,VEHICLE_TOT,WALK_TOT
0,"St. Lawrence County, New York",36,089,109623,72555,9164,25858,20091,8279,9163,35920,
1,"Saratoga County, New York",36,091,229869,164414,10656,39033,45561,37886,31278,105705,
2,"Schenectady County, New York",36,093,155565,107453,10085,32144,35263,17210,12751,60290,
3,"Steuben County, New York",36,101,96281,68011,6510,25920,20685,7451,7445,35585,
4,"Suffolk County, New York",36,103,1492953,1036946,102621,277080,279213,208064,169968,617715,107
...,...,...,...,...,...,...,...,...,...,...,...,...
832,"Putnam County, New York",36,079,99323,71447,5090,20597,18095,14929,12736,42395,
833,"Queens County, New York",36,081,2358582,1695302,304984,493794,372885,335990,187649,391980,629
834,"Rensselaer County, New York",36,083,159722,111047,9095,31730,34954,20143,15125,65720,
835,"Richmond County, New York",36,085,479458,334079,39031,100730,83679,66108,44531,127985,41
