In [None]:
import h5py
import osmnx as ox
import pandas as pd
import numpy as np
import networkx as nx
from tqdm import tqdm
from itertools import product
from functools import cache
from scipy.spatial import KDTree
from haversine import haversine
from shapely.geometry import Point

In [None]:
houston_countyfips = ['48291', '48039', '48071', '48157', '48167', '48201', '48339', '48473']
#all tracts start with
hamiltoncounty_fips = '39061' 
kingscounty_fips = '53033'
sf_fips = '06075'
fultoncounty_fips = '13121'

#source https://www.census.gov/geographies/reference-files/time-series/geo/gazetteer-files.2019.html
tractcenters = pd.read_csv("data/2019_Gaz_tracts_national.txt", sep='\t', dtype=str)
tractcenters.rename(columns={'INTPTLONG                                                                                                                             ':'INTPTLONG'}, inplace=True)
tractcenters['COUNTYFP'] = tractcenters['GEOID'].apply(lambda x: x[:5])
tractcenters = tractcenters[['GEOID', 'COUNTYFP', 'INTPTLAT','INTPTLONG']]

In [None]:
def coords_in_city(place, lats, lons):
    gdf = ox.geocode_to_gdf(place)
    geom = gdf.loc[0, 'geometry']

    # get the bounding box of the city
    geom.bounds

    # determine if a point is within the city boundary
    mask = []
    for lat, lon in zip(lats, lons):
        coords = (lon, lat)
        mask.append(geom.intersects(Point(coords)))
    return mask

In [None]:
tractcenters_cinci = tractcenters.copy()[tractcenters['COUNTYFP'] == hamiltoncounty_fips]
place = 'Cincinnati, Ohio'                     
lats = tractcenters_cinci.copy()['INTPTLAT'].astype(float).to_numpy()
lons = tractcenters_cinci.copy()['INTPTLONG'].astype(float).to_numpy()
mask = coords_in_city(place, lats, lons)
tractcenters_cinci[mask].to_csv('data/tract_centers/tractcenters_cinci.csv', index=False)

In [None]:
tractcenters_hou = tractcenters.copy()[tractcenters['COUNTYFP'].isin(houston_countyfips)]
tractcenters_hou.to_csv('data/tract_centers/tractcenters_hou.csv', index=False)

In [None]:
tractcenters_sea = tractcenters.copy()[tractcenters['COUNTYFP'] == kingscounty_fips]
place='Seattle, Washington'
lats = tractcenters_sea.copy()['INTPTLAT'].astype(float).to_numpy()
lons = tractcenters_sea.copy()['INTPTLONG'].astype(float).to_numpy()
mask = coords_in_city(place, lats, lons)
tractcenters_sea[mask].to_csv('data/tract_centers/tractcenters_seattle.csv', index=False)

In [None]:
tractcenters_sf = tractcenters.copy()[tractcenters['COUNTYFP'] == sf_fips]
tractcenters_sf.to_csv('data/tract_centers/tractcenters_sf.csv',index=False)

In [None]:
tractcenters_nyc = pd.read_csv("data/ny_tracts.csv", dtype=str)
tractcenters_nyc['CountyFIPS'] = tractcenters_nyc['STATEFP']+tractcenters_nyc['COUNTYFP']
tractcenters_nyc['TractFIPS'] = tractcenters_nyc['STATEFP']+tractcenters_nyc['COUNTYFP']+tractcenters_nyc['TRACTCE']
tractcenters_nyc = tractcenters_nyc[tractcenters_nyc['CountyFIPS'].isin(['36081', '36061', '36005', '36047', '36085'])]
tractcenters = tractcenters.astype({'TractFIPS':'int'})
tracts = tractcenters_nyc['TractFIPS'].to_numpy()
tractcenters_nyc.drop(['STATEFP','COUNTYFP', 'TRACTCE'], inplace=True, axis=1)
tractcenters_nyc.to_csv('data/tract_centers/tractcenters_nyc.csv',index=False)

In [None]:
tractcenters_atl = tractcenters.copy()[tractcenters['COUNTYFP'] == fultoncounty_fips]
place='Atlanta, Georgia'
lats = tractcenters_atl.copy()['INTPTLAT'].astype(float).to_numpy()
lons = tractcenters_atl.copy()['INTPTLONG'].astype(float).to_numpy()
mask = coords_in_city(place, lats, lons)
tractcenters_atl.to_csv('data/tract_centers/tractcenters_atlanta.csv',index=False)

In [None]:
food_atlas = pd.read_excel('data/FoodAccessResearchAtlasData2019.xlsx', sheet_name = 'Food Access Research Atlas', dtype=str)
income_vars = food_atlas[['CensusTract', 'LowIncomeTracts', 'PovertyRate', 'MedianFamilyIncome']] \
.rename(columns={'CensusTract':'TractFIPS'}).astype({'LowIncomeTracts':int, 'MedianFamilyIncome':'float', 'PovertyRate':'float'})
income_vars.to_csv('data/tract_incomevars.csv', index=False)