In [1]:
# Import requisite packages
import numpy as np
import pandas as pd
import requests

# Create a function for querying data from Census ACS API
def query_census_api_at_block_group(api_key, year, var_dict):
    '''
    Create a dataframe with Census Block Group level data for a given year and variable dictionary.
    api_key = Census API key
    year = Census year
    var_dict = Dictionary of variables to query from Census API
    '''
    
    # Create a basic data frame with Census Block Group level info
    url = f'https://api.census.gov/data/{year}/acs/acs5?get=NAME,B01001_001E&for=block%20group:*&in=state:48%20county:113&key={api_key}'
    response = requests.request("GET", url)
    df = pd.DataFrame(response.json()[1:], columns=response.json()[0])[['NAME','state','county','tract','block group']]
    
    # Query Census Block Group Level Dataset
    for key, item in var_dict.items():
        url = f'https://api.census.gov/data/{year}/acs/acs5?get=NAME,{key}&for=block%20group:*&in=state:48%20county:113&key={api_key}'
        response = requests.request("GET", url)
        df[item] = pd.DataFrame(response.json()[1:], columns=response.json()[0])[key]

    return df    



In [2]:
# Set census API key, year, and variable dictionary
census_api_key = 'b32d020a89fe8d18a9aad261a0b94e3725748774'
year = '2019'


In [3]:
key = 'B25002_003E'
url = f'https://api.census.gov/data/{year}/acs/acs5?get=NAME,{key}&for=block%20group:*&in=state:48%20county:113&key={census_api_key}'
response = requests.request("GET", url)
pd.DataFrame(response.json()[1:], columns=response.json()[0])[key]

0       23
1        0
2        0
3       73
4       89
        ..
1664    81
1665     0
1666    23
1667    21
1668     0
Name: B25002_003E, Length: 1669, dtype: object

In [4]:
key = 'B25004_002E'
url = f'https://api.census.gov/data/{year}/acs/acs5?get=NAME,{key}&for=block%20group:*&in=state:48%20county:113&key={census_api_key}'
response = requests.request("GET", url)
pd.DataFrame(response.json()[1:], columns=response.json()[0])[key]

0       37
1        0
2        0
3        0
4        0
        ..
1565    26
1566     0
1567     0
1568     0
1569    54
Name: B25004_002E, Length: 1570, dtype: object

In [4]:
key = 'B11001_001E'
url = f'https://api.census.gov/data/{year}/acs/acs5?get=NAME,{key}&for=block%20group:*&in=state:48%20county:113&key={census_api_key}'
response = requests.request("GET", url)
pd.DataFrame(response.json()[1:], columns=response.json()[0])[key]

0       491
1       298
2       317
3       474
4       331
       ... 
1664    382
1665    376
1666    667
1667    269
1668    321
Name: B11001_001E, Length: 1669, dtype: object

In [12]:
key = 'B25081_009E'
url = f'https://api.census.gov/data/{year}/acs/acs5?get=NAME,{key}&for=block%20group:*&in=state:48%20county:113&key={census_api_key}'
response = requests.request("GET", url)
pd.DataFrame(response.json()[1:], columns=response.json()[0])[key]

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [5]:
key = 'B25077_001E'
url = f'https://api.census.gov/data/{year}/acs/acs5?get=NAME,{key}&for=block%20group:*&in=state:48%20county:113&key={census_api_key}'
response = requests.request("GET", url)
pd.DataFrame(response.json()[1:], columns=response.json()[0])[key]

0       117300
1       122200
2       144400
3       142500
4        19500
         ...  
1664     75500
1665     67800
1666     86100
1667     79400
1668     77600
Name: B25077_001E, Length: 1669, dtype: object

In [6]:
var_dict = {
    # Demographic variables
    'B01001_001E':'total_population',
    'B01001_026E':'female_population',
    'B01002_001E':'median_age',
    'B02001_002E':'race_white',
    'B02001_003E':'race_black',
    'B02001_004E':'race_amerindian',
    'B02001_005E':'race_asian',
    'B03002_012E':'race_hispanic',
    'B09002_001E':'hh_w_child_total',
    'B09002_009E':'hh_w_child_male_hh_wo_spouse',
    'B09002_015E':'hh_w_child_female_hh_wo_spouse',
    'B11001_001E':'hh_total',
    'B11001_007E':'hh_nonfamily',
    'B11001_008E':'hh_living_alone',
    'B15003_002E':'edu_att_over25_no_schooling_completed',
    'B15003_017E':'edu_att_over25_highschool_graduate',
    'B15003_021E':'edu_att_over25_associate_degree',
    'B15003_022E':'edu_att_over25_bachelor_degree',
    'B15003_023E':'edu_att_over25_master_degree',
    'B15003_024E':'edu_att_over25_professional_degree',
    'B15003_025E':'edu_att_over25_doctorate_degree',    
    
    # Built environment variables
    'B08303_001E':'time_to_work_total',
    'B08303_002E':'time_to_work_lt5',
    'B08303_003E':'time_to_work_5to9',
    'B08303_004E':'time_to_work_10to14',
    'B08303_005E':'time_to_work_15to19',
    'B08303_006E':'time_to_work_20to24',
    'B08303_007E':'time_to_work_25to29',
    'B08303_008E':'time_to_work_30to34',
    'B08303_009E':'time_to_work_35to39',
    'B08303_010E':'time_to_work_40to44',
    'B08303_011E':'time_to_work_45to59',
    'B08303_012E':'time_to_work_60to89',
    'B08303_013E':'time_to_work_mt90',
    'B28002_013E':'no_internet_access',
    
    # Housing variables"medrent_samples"
    'B25001_001E':'housing_unit',
    'B25002_003E':'housing_vacant',
    'B25003_002E':'tenure_owner_occupied',
    'B25003_003E':'tenure_renter_occupied',
    'B25004_002E':'vacancy_for_rent',
    'B25010_003E':'hh_average_size_renter_occupied',
    'B25024_002E':'1unit_detached_structure',
    'B25024_003E':'1unit_attached_structure',
    'B25035_001E':'median_year_built',
    'B25070_001E':'gross_rent_percent_hhincome_total',
    'B25070_002E':'gross_rent_percent_hhincome_lt10',
    'B25070_003E':'gross_rent_percent_hhincome_10to15',
    'B25070_004E':'gross_rent_percent_hhincome_15to20',
    'B25070_005E':'gross_rent_percent_hhincome_20to25',
    'B25070_006E':'gross_rent_percent_hhincome_25to29',
    'B25070_007E':'gross_rent_percent_hhincome_30to35',
    'B25070_008E':'gross_rent_percent_hhincome_35to39',
    'B25070_009E':'gross_rent_percent_hhincome_40to50',
    'B25070_010E':'gross_rent_percent_hhincome_50ormore',
    'B25070_011E':'gross_rent_percent_hhincome_notcomputed',
    'B25071_001E':'gross_rent_percent_hhincome_median',
    'B25077_001E':'housing_median_value',
    'B25081_001E':'mortgage_status_total',
    'B25081_002E':'mortgage_status_w_mortgage',
    'B25081_008E':'mortgage_status_wo_mortgage',
    B25081_009E
    'B25088_001E':'median_monthly_owner_costs',
    'B25088_002E':'median_monthly_owner_costs_w_mortgage',
    'B25088_003E':'median_monthly_owner_costs_wo_mortgage',

    # Economic variables
    'B23025_002E':'ind_labor_force',
    'B23025_005E':'ind_labor_force_civilian_unemployed',
    'B17021_002E':'ind_poverty',
    'B19013_001E':'hh_median_income',
    'B19056_002E':'hh_w_ssi', #number of households with supplemental security income (SSI)
    'B19057_002E':'hh_w_pai', #number of households with public assistance income (PAI)
    'B19058_002E':'hh_w_foodstamp_SNAP', #number of households with PAI and Food Stamps / SNAP
    'B25058_001E':'median_contract_rent',
    'B25064_001E':'median_gross_rent', # contract rent plus monthly cost of utilities
}

df = query_census_api_at_block_group(census_api_key, year, var_dict)

# Save dataframe as csv
df.to_csv('data/acs_data_5y_2019_revised.csv', index=False)


# Query the change in median gross rent between 2016 and 2021
# Set census API key, year, and variable dictionary
# year_2016 = '2016'
# var_dict_2016 = {'B25064_001E':'median_gross_rent'}
# df_2016 = query_census_api_at_block_group(census_api_key, year_2016, var_dict_2016)
# df_2016.to_csv('data/acs_data_5y_2016.csv', index=False)

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [8]:
for key in var_dict:
    print(key)

B01001_001E
B01001_026E
B01002_001E
B02001_002E
B02001_003E
B02001_004E
B02001_005E
B03002_012E
B09002_001E
B09002_009E
B09002_015E
B11001_001E
B11001_007E
B11001_008E
B15003_002E
B15003_017E
B15003_021E
B15003_022E
B15003_023E
B15003_024E
B15003_025E
B08303_001E
B08303_002E
B08303_003E
B08303_004E
B08303_005E
B08303_006E
B08303_007E
B08303_008E
B08303_009E
B08303_010E
B08303_011E
B08303_012E
B08303_013E
B28002_013E
B25001_001E
B25002_003E
B25003_002E
B25003_003E
B25004_002E
B25010_003E
B25024_002E
B25024_003E
B25035_001E
B25070_001E
B25070_002E
B25070_003E
B25070_004E
B25070_005E
B25070_006E
B25070_007E
B25070_008E
B25070_009E
B25070_010E
B25070_011E
B25071_001E
B25077_001E
B25081_001E
B25081_002E
B25081_009E
B25088_001E
B25088_002E
B25088_003E
B23025_002E
B23025_005E
B17021_002E
B19013_001E
B19056_002E
B19057_002E
B19058_002E
B25058_001E
B25064_001E


In [11]:
from tqdm import tqdm
year = 2019
for key in tqdm(var_dict):
    print(key)
    url = f'https://api.census.gov/data/{year}/acs/acs5?get=NAME,{key}&for=block%20group:*&in=state:48%20county:113&key={census_api_key}'
    response = requests.request("GET", url)
    pd.DataFrame(response.json()[1:], columns=response.json()[0])[key]

  0%|          | 0/72 [00:00<?, ?it/s]

B01001_001E


  1%|▏         | 1/72 [00:01<01:39,  1.40s/it]

B01001_026E


  3%|▎         | 2/72 [00:02<01:31,  1.30s/it]

B01002_001E


  4%|▍         | 3/72 [00:03<01:25,  1.23s/it]

B02001_002E


  6%|▌         | 4/72 [00:04<01:16,  1.12s/it]

B02001_003E


  7%|▋         | 5/72 [00:05<01:13,  1.10s/it]

B02001_004E


  8%|▊         | 6/72 [00:06<01:09,  1.06s/it]

B02001_005E


 10%|▉         | 7/72 [00:07<01:06,  1.02s/it]

B03002_012E


 11%|█         | 8/72 [00:08<01:03,  1.00it/s]

B09002_001E


 12%|█▎        | 9/72 [00:11<01:34,  1.50s/it]

B09002_009E


 14%|█▍        | 10/72 [00:12<01:35,  1.54s/it]

B09002_015E


 15%|█▌        | 11/72 [00:13<01:24,  1.39s/it]

B11001_001E


 17%|█▋        | 12/72 [00:15<01:23,  1.39s/it]

B11001_007E


 18%|█▊        | 13/72 [00:16<01:16,  1.30s/it]

B11001_008E


 19%|█▉        | 14/72 [00:17<01:11,  1.23s/it]

B15003_002E


 21%|██        | 15/72 [00:18<01:11,  1.26s/it]

B15003_017E


 22%|██▏       | 16/72 [00:19<01:08,  1.22s/it]

B15003_021E


 24%|██▎       | 17/72 [00:21<01:05,  1.19s/it]

B15003_022E


 25%|██▌       | 18/72 [00:22<01:01,  1.14s/it]

B15003_023E


 26%|██▋       | 19/72 [00:23<00:57,  1.09s/it]

B15003_024E


 28%|██▊       | 20/72 [00:24<00:55,  1.06s/it]

B15003_025E


 29%|██▉       | 21/72 [00:25<00:54,  1.07s/it]

B08303_001E


 31%|███       | 22/72 [00:26<00:54,  1.09s/it]

B08303_002E


 32%|███▏      | 23/72 [00:27<00:52,  1.07s/it]

B08303_003E


 33%|███▎      | 24/72 [00:28<00:51,  1.07s/it]

B08303_004E


 35%|███▍      | 25/72 [00:29<00:50,  1.07s/it]

B08303_005E


 36%|███▌      | 26/72 [00:32<01:18,  1.70s/it]

B08303_006E


 38%|███▊      | 27/72 [00:33<01:08,  1.52s/it]

B08303_007E


 39%|███▉      | 28/72 [00:34<01:00,  1.37s/it]

B08303_008E


 40%|████      | 29/72 [00:36<00:57,  1.34s/it]

B08303_009E


 42%|████▏     | 30/72 [00:37<00:54,  1.31s/it]

B08303_010E


 43%|████▎     | 31/72 [00:38<00:50,  1.23s/it]

B08303_011E


 44%|████▍     | 32/72 [00:39<00:45,  1.15s/it]

B08303_012E


 46%|████▌     | 33/72 [00:40<00:43,  1.12s/it]

B08303_013E


 47%|████▋     | 34/72 [00:41<00:42,  1.12s/it]

B28002_013E


 49%|████▊     | 35/72 [00:42<00:41,  1.13s/it]

B25001_001E


 50%|█████     | 36/72 [00:43<00:39,  1.10s/it]

B25002_003E


 51%|█████▏    | 37/72 [00:44<00:37,  1.07s/it]

B25003_002E


 53%|█████▎    | 38/72 [00:45<00:35,  1.05s/it]

B25003_003E


 54%|█████▍    | 39/72 [00:46<00:34,  1.05s/it]

B25004_002E


 56%|█████▌    | 40/72 [00:48<00:41,  1.30s/it]

B25010_003E


 57%|█████▋    | 41/72 [00:49<00:38,  1.24s/it]

B25024_002E


 58%|█████▊    | 42/72 [00:50<00:37,  1.27s/it]

B25024_003E


 60%|█████▉    | 43/72 [00:52<00:35,  1.22s/it]

B25035_001E


 61%|██████    | 44/72 [00:53<00:32,  1.17s/it]

B25070_001E


 62%|██████▎   | 45/72 [00:54<00:33,  1.23s/it]

B25070_002E


 64%|██████▍   | 46/72 [00:55<00:30,  1.17s/it]

B25070_003E


 65%|██████▌   | 47/72 [00:56<00:29,  1.19s/it]

B25070_004E


 67%|██████▋   | 48/72 [00:57<00:27,  1.15s/it]

B25070_005E


 68%|██████▊   | 49/72 [00:58<00:24,  1.08s/it]

B25070_006E


 69%|██████▉   | 50/72 [00:59<00:23,  1.06s/it]

B25070_007E


 71%|███████   | 51/72 [01:00<00:21,  1.05s/it]

B25070_008E


 72%|███████▏  | 52/72 [01:01<00:21,  1.08s/it]

B25070_009E


 74%|███████▎  | 53/72 [01:02<00:20,  1.06s/it]

B25070_010E


 75%|███████▌  | 54/72 [01:03<00:18,  1.05s/it]

B25070_011E


 76%|███████▋  | 55/72 [01:05<00:18,  1.09s/it]

B25071_001E


 78%|███████▊  | 56/72 [01:06<00:18,  1.13s/it]

B25077_001E


 79%|███████▉  | 57/72 [01:07<00:17,  1.20s/it]

B25081_001E


 81%|████████  | 58/72 [01:08<00:16,  1.16s/it]

B25081_002E


 82%|████████▏ | 59/72 [01:09<00:15,  1.16s/it]

B25081_009E


 82%|████████▏ | 59/72 [01:10<00:15,  1.19s/it]


JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [22]:
# Read census tract relationship file between 2010 and 2020
file_path = 'https://www2.census.gov/geo/docs/maps-data/data/rel2020/tract/tab20_tract20_tract10_natl.txt'
df_tract_rel = pd.read_csv(file_path, sep='|')

# Select only Dallas County, Texas
df_tract_rel['GEOID_TRACT_20'] = df_tract_rel['GEOID_TRACT_20'].astype(str)
df_tract_rel['GEOID_TRACT_10'] = df_tract_rel['GEOID_TRACT_10'].astype(str)
df_tract_rel = df_tract_rel[df_tract_rel['GEOID_TRACT_20'].str.startswith('48113')]


In [23]:
df_tract_rel

Unnamed: 0,OID_TRACT_20,GEOID_TRACT_20,NAMELSAD_TRACT_20,AREALAND_TRACT_20,AREAWATER_TRACT_20,MTFCC_TRACT_20,FUNCSTAT_TRACT_20,OID_TRACT_10,GEOID_TRACT_10,NAMELSAD_TRACT_10,AREALAND_TRACT_10,AREAWATER_TRACT_10,MTFCC_TRACT_10,FUNCSTAT_TRACT_10,AREALAND_PART,AREAWATER_PART
105024,20790555940585,48113000100,Census Tract 1,3077392,511268,G5020,S,20740555940585,48113000100,Census Tract 1,3064914,511268,G5020,S,3064269,511268
105025,20790555940585,48113000100,Census Tract 1,3077392,511268,G5020,S,20740555940601,48113001202,Census Tract 12.02,3208547,0,G5020,S,4325,0
105026,20790555940585,48113000100,Census Tract 1,3077392,511268,G5020,S,20740555940653,48113008100,Census Tract 81,5349487,792325,G5020,S,8798,0
105027,20790555940586,48113000201,Census Tract 2.01,1762502,0,G5020,S,20740555940586,48113000201,Census Tract 2.01,1762502,0,G5020,S,1762502,0
105028,20790555940588,48113000202,Census Tract 2.02,1571509,30513,G5020,S,20740555940588,48113000202,Census Tract 2.02,1571509,30513,G5020,S,1571509,30513
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
105949,2079015922093404,48113021200,Census Tract 212,18522258,2824814,G5020,S,20740555945159,48113015800,Census Tract 158,13182509,624991,G5020,S,13182509,624991
105950,207903717245367,48113980000,Census Tract 9800,26609569,0,G5020,S,207403717245367,48113980000,Census Tract 9800,26680183,0,G5020,S,26609569,0
105951,207903717245264,48113980100,Census Tract 9801,5549416,0,G5020,S,207403717245267,48113000406,Census Tract 4.06,6032764,530047,G5020,S,55983,0
105952,207903717245264,48113980100,Census Tract 9801,5549416,0,G5020,S,207403717245264,48113980100,Census Tract 9801,5493433,0,G5020,S,5493433,0


In [None]:
# Merge df_acs_tract_2021 and df_acs_tract_2016 with df_tract_rel
df_tract_temp = pd.merge(df_acs_tract_2021, df_tract_rel[['GEOID_TRACT_20', 'GEOID_TRACT_10']], left_on='GEOID_tract', right_on='GEOID_TRACT_20')
df_tract_temp2 = pd.merge(df_tract_temp, df_acs_tract_2016, left_on='GEOID_TRACT_10', right_on='GEOID_tract')
df_tract_temp2 = df_tract_temp2[['GEOID_tract_x','median_gross_rent','median_gross_rent_2016']].groupby('GEOID_tract_x').mean()

# Change the column name
df_tract_temp2 = df_tract_temp2.rename(columns={'median_gross_rent':'median_gross_rent_tract_2021', 'median_gross_rent_2016':'median_gross_rent_tract_2016'})

# Merge df_acs and df_tract_temp2
df_acs = pd.merge(df_acs, df_tract_temp2, left_on='GEOID_tract', right_on='GEOID_tract_x', how='left')