# ACS Data
This notebook provides an example of how the American Community Survey can be accessed.

NOTE: You need a key to do this, which you can request online [here](https://api.census.gov/data/key_signup.html).

You'll also need to obtain a shapefile that contains the geography in lat/lon coordinates, so you can later obtain the Weather Underground data and analyze that data. You can find that for your state/county [here](https://www2.census.gov/geo/tiger/TIGER2020/).

In [10]:
import os
import requests
import json
import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import geopandas as gpd

In [5]:
# Key is hidden in a file that is not on github.
 
# Read the json file that has the key
with open('../acs_key.json') as f:
    data = json.load(f)
    key = data['key']

bc6ed4255d6ca9a9332ffc8219bee15b80e16631


In [6]:
# Look at the base URL and obtain the variables desired.
# Variables
#     -- income : B19013_001E
#     -- population : "B01003_001E
base_url = "https://api.census.gov/data/2022/acs/acs5?get=NAME,B19013_001E,B01003_001E"

params = {
    "for": "tract:*",
    "in": "state:37",
    "key": key
}

response = requests.get(base_url, params=params)
acs_tract = pd.DataFrame(response.json()[1:], columns=response.json()[0])

In [15]:
# Select the columns you want based on the variables you want.
charlotte_acs = acs_tract[acs_tract["county"] == "119"]
print(len(charlotte_acs))

# Rename B19013_001E to income and B01003_001E to population
charlotte_acs = charlotte_acs.rename(columns={
    "B19013_001E": "income",
    "B01003_001E": "population"
})


# Drop the name column
charlotte_acs = charlotte_acs.drop(columns=["NAME", 'state', 'county'])

charlotte_acs.head()

305


Unnamed: 0,income,population,tract
1396,101587,1148,101
1397,123650,2741,102
1398,131398,2042,103
1399,109896,1619,104
1400,82500,954,301


In [16]:
# This dataset was obtained from the TIGER/Line shapefiles from the US Census Bureau.
census_tracts = gpd.read_file('../data/raw/census/tl_2020_37_tract/tl_2020_37_tract.shp')
# Filter out to only look at the county code you care about
charlotte = census_tracts[census_tracts['COUNTYFP'] == '119']

In [17]:
# Join charlotte acs data with charlotte geodata
charlotte_acs = charlotte.merge(charlotte_acs, left_on="TRACTCE", right_on="tract")

# Drop more columns
charlotte_acs = charlotte_acs.drop(columns=["STATEFP", "COUNTYFP", "TRACTCE", "NAME", "tract"])
charlotte_acs.head()

Unnamed: 0,GEOID,NAMELSAD,MTFCC,FUNCSTAT,ALAND,AWATER,INTPTLAT,INTPTLON,geometry,income,population
0,37119002701,Census Tract 27.01,G5020,S,1671003,0,35.2004703,-80.8309136,"POLYGON ((-80.83839 35.19628, -80.83831 35.196...",236115,2925
1,37119005404,Census Tract 54.04,G5020,S,5366655,53444,35.286058,-80.8140714,"POLYGON ((-80.83657 35.27296, -80.83650 35.273...",52321,4439
2,37119005403,Census Tract 54.03,G5020,S,6132898,33162,35.2993564,-80.8204775,"POLYGON ((-80.84000 35.29855, -80.83999 35.299...",58333,6003
3,37119005519,Census Tract 55.19,G5020,S,6060922,47091,35.36782,-80.7394872,"POLYGON ((-80.76510 35.37745, -80.76509 35.377...",71194,5653
4,37119005516,Census Tract 55.16,G5020,S,1210608,14870,35.3843829,-80.766735,"POLYGON ((-80.77800 35.39118, -80.77596 35.392...",114018,1950


In [18]:
# Save the formatted result for future analysis.
charlotte_acs.to_file("../data/preprocessed/census/charlotte.shp")