# Census Data

The demographics data is sourced from the Census Bureau's Population Estimates Program from 2010 to 2019. The data is collected at the state and county levels. The API links are available here: https://www.census.gov/data/developers/data-sets/popest-popproj/popest.html. The variable descriptions are documented here: https://www.census.gov/data/developers/data-sets/popest-popproj/popest/popest-vars.Vintage_2019.html.

In [1]:
# Import libraries
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import os
from os import *
import sys
sys.path.append("../")
import urllib.request
import requests
import json

In [15]:
# Change directory
def change_dir(newpath):
    get_path = sys.path[0].split("\\")      
    del get_path[-1]                                                  
    get_path.append(newpath)                    
    path = "\\".join(get_path)              
    os.chdir(path) 

In [2]:
# Source census data
def get_census_data(api_call,fields,level):
    api_query = 'https://api.census.gov/data/2019/pep/' + str(api_call) + '?get=NAME,STATE'
    for field in fields:
        api_query = api_query + "," + field
    api_query = api_query + '&for=' + str(level)
    response = requests.get(api_query)
    formattedResponse = json.loads(response.text)
    return formattedResponse

In [3]:
# Pass a call for demographics data
demographics_api_call = 'charagegroups'
demographics_fields = ['POP','RACE','HISP','SEX','DATE_CODE']
demographics_level = 'county&AGEGROUP=0,30,25,26'
demographics = pd.DataFrame(get_census_data(demographics_api_call, demographics_fields, demographics_level))
demographics.columns = demographics.iloc[0]
demographics = demographics[1:]
demographics

Unnamed: 0,NAME,STATE,POP,RACE,HISP,SEX,DATE_CODE,AGEGROUP,state,county
1,"Autauga County, Alabama",01,54571,0,0,0,1,0,01,001
2,"Autauga County, Alabama",01,53261,0,1,0,1,0,01,001
3,"Autauga County, Alabama",01,1310,0,2,0,1,0,01,001
4,"Autauga County, Alabama",01,43297,1,0,0,1,0,01,001
5,"Autauga County, Alabama",01,42194,1,1,0,1,0,01,001
...,...,...,...,...,...,...,...,...,...,...
4422596,"Yauco Municipio, Puerto Rico",72,5805,0,0,1,11,30,72,153
4422597,"Yauco Municipio, Puerto Rico",72,5984,0,0,2,11,30,72,153
4422598,"Yauco Municipio, Puerto Rico",72,11681,0,0,0,12,30,72,153
4422599,"Yauco Municipio, Puerto Rico",72,5755,0,0,1,12,30,72,153


In [4]:
#demographics = demographics.astype({'NAME':str,'STATE':int,'POP':int,'RACE':int,'HISP':int,'DATE_CODE':int,'county':int})
demographics.dtypes

0
NAME         object
STATE        object
POP          object
RACE         object
HISP         object
SEX          object
DATE_CODE    object
AGEGROUP     object
state        object
county       object
dtype: object

In [5]:
demographics['NAME'] = demographics['NAME'].astype(str)
demographics['STATE'] = demographics['STATE'].astype(str).astype(int)
demographics['POP'] = demographics['POP'].astype(str).astype(int)
demographics['RACE'] = demographics['RACE'].astype(str).astype(int)
demographics['HISP'] = demographics['HISP'].astype(str).astype(int)
demographics['SEX'] = demographics['SEX'].astype(str).astype(int)
demographics['DATE_CODE'] = demographics['DATE_CODE'].astype(str).astype(int)
demographics['state'] = demographics['state'].astype(str).astype(int)
demographics['county'] = demographics['county'].astype(str).astype(int)
demographics['AGEGROUP'] = demographics['AGEGROUP'].astype(str).astype(int)
demographics.dtypes

0
NAME         object
STATE         int32
POP           int32
RACE          int32
HISP          int32
SEX           int32
DATE_CODE     int32
AGEGROUP      int32
state         int32
county        int32
dtype: object

In [6]:
demographics = demographics[demographics['STATE']<57]
demographics = demographics[demographics['DATE_CODE']>7]
demographics = demographics[demographics['RACE']<7]
demographics

Unnamed: 0,NAME,STATE,POP,RACE,HISP,SEX,DATE_CODE,AGEGROUP,state,county
757,"Autauga County, Alabama",1,54864,0,0,0,8,0,1,1
758,"Autauga County, Alabama",1,53391,0,1,0,8,0,1,1
759,"Autauga County, Alabama",1,1473,0,2,0,8,0,1,1
760,"Autauga County, Alabama",1,42650,1,0,0,8,0,1,1
761,"Autauga County, Alabama",1,41448,1,1,0,8,0,1,1
...,...,...,...,...,...,...,...,...,...,...
4419788,"Weston County, Wyoming",56,1275,0,0,1,11,30,56,45
4419789,"Weston County, Wyoming",56,1061,0,0,2,11,30,56,45
4419790,"Weston County, Wyoming",56,2363,0,0,0,12,30,56,45
4419791,"Weston County, Wyoming",56,1296,0,0,1,12,30,56,45


In [7]:
# Pass a call for migration data
migration_api_call = 'components'
migration_fields = ['NETMIG','DOMESTICMIG','INTERNATIONALMIG','PERIOD_CODE']
migration_level = 'county'
migration = pd.DataFrame(get_census_data(migration_api_call, migration_fields, migration_level))
migration.columns = migration.iloc[0]
migration = migration[1:]
migration

Unnamed: 0,NAME,STATE,NETMIG,DOMESTICMIG,INTERNATIONALMIG,PERIOD_CODE,state,county
1,"Cape Girardeau County, Missouri",29,134,115,19,1,29,031
2,"Cape Girardeau County, Missouri",29,598,458,140,2,29,031
3,"Cape Girardeau County, Missouri",29,231,117,114,3,29,031
4,"Cape Girardeau County, Missouri",29,242,151,91,4,29,031
5,"Cape Girardeau County, Missouri",29,287,158,129,5,29,031
...,...,...,...,...,...,...,...,...
31416,"Murray County, Georgia",13,80,12,68,6,13,213
31417,"Murray County, Georgia",13,-189,-250,61,7,13,213
31418,"Murray County, Georgia",13,344,317,27,8,13,213
31419,"Murray County, Georgia",13,31,10,21,9,13,213


In [8]:
migration = migration.astype({'NAME':str,'STATE':int,'NETMIG':int,'DOMESTICMIG':int,'INTERNATIONALMIG':int,'PERIOD_CODE':int,'state':int,'county':int})
migration.dtypes

0
NAME                object
STATE                int32
NETMIG               int32
DOMESTICMIG          int32
INTERNATIONALMIG     int32
PERIOD_CODE          int32
state                int32
county               int32
dtype: object

In [9]:
merged = demographics.merge(migration)
merged

Unnamed: 0,NAME,STATE,POP,RACE,HISP,SEX,DATE_CODE,AGEGROUP,state,county,NETMIG,DOMESTICMIG,INTERNATIONALMIG,PERIOD_CODE
0,"Autauga County, Alabama",1,54864,0,0,0,8,0,1,1,172,147,25,1
1,"Autauga County, Alabama",1,54864,0,0,0,8,0,1,1,331,327,4,2
2,"Autauga County, Alabama",1,54864,0,0,0,8,0,1,1,-343,-329,-14,3
3,"Autauga County, Alabama",1,54864,0,0,0,8,0,1,1,-214,-226,12,4
4,"Autauga County, Alabama",1,54864,0,0,0,8,0,1,1,108,101,7,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11311195,"Weston County, Wyoming",56,1067,0,0,2,12,30,56,45,66,24,42,6
11311196,"Weston County, Wyoming",56,1067,0,0,2,12,30,56,45,5,-9,14,7
11311197,"Weston County, Wyoming",56,1067,0,0,2,12,30,56,45,-257,-275,18,8
11311198,"Weston County, Wyoming",56,1067,0,0,2,12,30,56,45,-48,-65,17,9


In [10]:
# Change directory
def change_dir(newpath):
    get_path = sys.path[0].split("\\")      
    del get_path[-1]                                                  
    get_path.append(newpath)                    
    path = "\\".join(get_path)              
    os.chdir(path) 

In [11]:
merged['state'] = merged['state'].astype(int)
merged['county'] = merged['county'].astype(int)

In [12]:
change_dir('input')
add_CBSA_codes = pd.read_csv('delineation_files.csv')
add_CBSA_codes = add_CBSA_codes.rename(columns={"FIPS State Code": "state", "FIPS County Code": "county"})
add_CBSA_codes['state'] = add_CBSA_codes['state'].astype(int)
add_CBSA_codes['county'] = add_CBSA_codes['county'].astype(int)

In [13]:
merged = merged.merge(add_CBSA_codes)

In [14]:
change_dir('output')
merged.to_csv('demographics.csv')

In [15]:
merged['CBSA Code'].nunique()

984

## Demographics for stage 2 analysis

In [67]:
# Get demographics data
api_query = "https://api.census.gov/data/2019/pep/charagegroups?get=POP,NAME,RACE,HISP,STATE&for=county&DATE_CODE=12"
response = requests.get(api_query)
formattedResponse = json.loads(response.text)
dg = pd.DataFrame(formattedResponse)
dg.columns = dg.iloc[0]
dg = dg.iloc[1:,:]
dg

Unnamed: 0,POP,NAME,RACE,HISP,STATE,DATE_CODE,state,county
1,22124,"Coahoma County, Mississippi",0,0,28,12,28,027
2,21757,"Coahoma County, Mississippi",0,1,28,12,28,027
3,367,"Coahoma County, Mississippi",0,2,28,12,28,027
4,4626,"Coahoma County, Mississippi",1,0,28,12,28,027
5,4412,"Coahoma County, Mississippi",1,1,28,12,28,027
...,...,...,...,...,...,...,...,...
113186,22,"Stanton County, Kansas",10,1,20,12,20,187
113187,3,"Stanton County, Kansas",10,2,20,12,20,187
113188,4,"Stanton County, Kansas",11,0,20,12,20,187
113189,0,"Stanton County, Kansas",11,1,20,12,20,187


In [68]:
# Get age group data
api_query = "https://api.census.gov/data/2019/pep/charagegroups?get=POP,NAME,AGEGROUP&for=county&DATE_CODE=12"
response = requests.get(api_query)
formattedResponse = json.loads(response.text)
age = pd.DataFrame(formattedResponse)
age.columns = age.iloc[0]
age = age.iloc[1:,:]
age = age[age['AGEGROUP']=='26']
age

Unnamed: 0,POP,NAME,AGEGROUP,DATE_CODE,state,county
27,3541,"Coahoma County, Mississippi",26,12,28,027
59,3399,"Jasper County, Mississippi",26,12,28,061
91,11827,"Jones County, Mississippi",26,12,28,067
123,2846,"Walthall County, Mississippi",26,12,28,147
155,6954,"Monroe County, Mississippi",26,12,28,095
...,...,...,...,...,...,...
102907,28886,"Scott County, Iowa",26,12,19,163
102939,11351,"Dallas County, Iowa",26,12,19,049
102971,2840,"O'Brien County, Iowa",26,12,19,141
103003,1152,"Sherman County, Kansas",26,12,20,181


In [69]:
# Merge age group with the demographics data
add_age_group = dg.merge(age, on=['NAME','state','county','DATE_CODE'])
add_age_group = add_age_group.rename(columns={'POP_x':'population','POP_y':'above_65'})
add_age_group = add_age_group.drop(columns=['AGEGROUP','DATE_CODE'])
dg = add_age_group

In [79]:
# Merge the CBSA crosswalk
dg['state'] = dg['state'].astype(int)
dg['county'] = dg['county'].astype(int)
change_dir('input')
add_CBSA_codes = pd.read_csv('delineation_files.csv')
add_CBSA_codes = add_CBSA_codes.rename(columns={"FIPS State Code": "state", "FIPS County Code": "county"})
add_CBSA_codes['state'] = add_CBSA_codes['state'].astype(int)
add_CBSA_codes['county'] = add_CBSA_codes['county'].astype(int)
dg = dg.merge(add_CBSA_codes)
dg

Unnamed: 0,population,NAME,RACE,HISP,STATE,state,county,above_65,CBSA Code,CBSA Title,County/County Equivalent,State Name
0,22124,"Coahoma County, Mississippi",0,0,28,28,27,3541,17260,"Clarksdale, MS",Coahoma County,Mississippi
1,21757,"Coahoma County, Mississippi",0,1,28,28,27,3541,17260,"Clarksdale, MS",Coahoma County,Mississippi
2,367,"Coahoma County, Mississippi",0,2,28,28,27,3541,17260,"Clarksdale, MS",Coahoma County,Mississippi
3,4626,"Coahoma County, Mississippi",1,0,28,28,27,3541,17260,"Clarksdale, MS",Coahoma County,Mississippi
4,4412,"Coahoma County, Mississippi",1,1,28,28,27,3541,17260,"Clarksdale, MS",Coahoma County,Mississippi
...,...,...,...,...,...,...,...,...,...,...,...,...
96549,5182,"Dallas County, Iowa",10,1,19,19,49,11351,19780,"Des Moines-West Des Moines, IA",Dallas County,Iowa
96550,88,"Dallas County, Iowa",10,2,19,19,49,11351,19780,"Des Moines-West Des Moines, IA",Dallas County,Iowa
96551,129,"Dallas County, Iowa",11,0,19,19,49,11351,19780,"Des Moines-West Des Moines, IA",Dallas County,Iowa
96552,105,"Dallas County, Iowa",11,1,19,19,49,11351,19780,"Des Moines-West Des Moines, IA",Dallas County,Iowa


In [80]:
# Get education data - ACS 5 year estimates
api_query = "https://api.census.gov/data/2019/acs/acs5/cprofile?get=NAME,CP02_2019_065E,CP02_2019_063E,CP02_2019_064E,CP02_2019_062E&for=metropolitan%20statistical%20area/micropolitan%20statistical%20area:*"
response = requests.get(api_query)
formattedResponse = json.loads(response.text)
df = pd.DataFrame(formattedResponse)
df.columns = df.iloc[0]
df = df.iloc[1:,:]
df = df.rename(columns={'metropolitan statistical area/micropolitan statistical area':'CBSA Code','CP02_2019_065E':'bachelors','CP02_2019_063E':'college_no_degree','CP02_2019_064E':'associates','CP02_2019_062E':'ged'})
df['CBSA Code'] = df['CBSA Code'].astype(int)
df

Unnamed: 0,NAME,bachelors,college_no_degree,associates,ged,CBSA Code
1,"Big Stone Gap, VA Micro Area",9.8,22.6,9.2,29.1,13720
2,"Billings, MT Metro Area",21.9,22.8,9.1,30.6,13740
3,"Binghamton, NY Metro Area",15.6,17.8,12.7,32.4,13780
4,"Birmingham-Hoover, AL Metro Area",19.7,21.6,8.7,27.4,13820
5,"Bismarck, ND Metro Area",24.3,21.2,14.6,24.5,13900
...,...,...,...,...,...,...
903,"Opelousas, LA Micro Area",10.0,15.5,5.3,41.2,36660
904,"Orangeburg, SC Micro Area",11.6,20.8,14.0,31.2,36700
905,"Orlando-Kissimmee-Sanford, FL Metro Area",21.3,19.9,11.5,25.8,36740
906,"Fort Dodge, IA Micro Area",14.5,22.9,13.8,33.9,22700


In [81]:
m1 = df.merge(dg, on='CBSA Code')
m1 = m1.drop(columns=['NAME_y','STATE','County/County Equivalent','State Name'])
m1

Unnamed: 0,NAME_x,bachelors,college_no_degree,associates,ged,CBSA Code,population,RACE,HISP,state,county,above_65,CBSA Title
0,"Big Stone Gap, VA Micro Area",9.8,22.6,9.2,29.1,13720,14318,0,0,51,51,3290,"Big Stone Gap, VA"
1,"Big Stone Gap, VA Micro Area",9.8,22.6,9.2,29.1,13720,14167,0,1,51,51,3290,"Big Stone Gap, VA"
2,"Big Stone Gap, VA Micro Area",9.8,22.6,9.2,29.1,13720,151,0,2,51,51,3290,"Big Stone Gap, VA"
3,"Big Stone Gap, VA Micro Area",9.8,22.6,9.2,29.1,13720,14063,1,0,51,51,3290,"Big Stone Gap, VA"
4,"Big Stone Gap, VA Micro Area",9.8,22.6,9.2,29.1,13720,13938,1,1,51,51,3290,"Big Stone Gap, VA"
...,...,...,...,...,...,...,...,...,...,...,...,...,...
60044,"Fort Collins, CO Metro Area",28.0,20.4,9.2,19.0,22660,11505,10,1,8,69,57774,"Fort Collins, CO"
60045,"Fort Collins, CO Metro Area",28.0,20.4,9.2,19.0,22660,843,10,2,8,69,57774,"Fort Collins, CO"
60046,"Fort Collins, CO Metro Area",28.0,20.4,9.2,19.0,22660,940,11,0,8,69,57774,"Fort Collins, CO"
60047,"Fort Collins, CO Metro Area",28.0,20.4,9.2,19.0,22660,716,11,1,8,69,57774,"Fort Collins, CO"


In [89]:
m1['CBSA Code'].nunique()

896

In [91]:
change_dir('output')
m1.to_csv('demographics.csv',index=False)