In [1]:
import h5py
import osmnx as ox
import pandas as pd
import numpy as np
import networkx as nx
from tqdm import tqdm
from itertools import product
from functools import cache
from scipy.spatial import KDTree

In [2]:
#The Bronx is Bronx County (ANSI / FIPS 36005)
#Brooklyn is Kings County (ANSI / FIPS 36047)
#Manhattan is New York County (ANSI / FIPS 36061)
#Queens is Queens County (ANSI / FIPS 36081)
#Staten Island is Richmond County (ANSI / FIPS 36085)

health_df = pd.read_csv(r'PLACES__Census_Tract_Data__GIS_Friendly_Format___2021_release.csv')
nyc_health_df = health_df[health_df['CountyFIPS'].isin([36081, 36061, 36005, 36047, 36085])]
nyc_health_df.describe()

Unnamed: 0,CountyFIPS,TractFIPS,TotalPopulation,ACCESS2_CrudePrev,ARTHRITIS_CrudePrev,BINGE_CrudePrev,BPHIGH_CrudePrev,BPMED_CrudePrev,CANCER_CrudePrev,CASTHMA_CrudePrev,...,HIGHCHOL_CrudePrev,KIDNEY_CrudePrev,LPA_CrudePrev,MAMMOUSE_CrudePrev,MHLTH_CrudePrev,OBESITY_CrudePrev,PHLTH_CrudePrev,SLEEP_CrudePrev,STROKE_CrudePrev,TEETHLOST_CrudePrev
count,2117.0,2117.0,2117.0,2117.0,2117.0,2117.0,2117.0,2117.0,2117.0,2117.0,...,2117.0,2117.0,2117.0,2116.0,2117.0,2117.0,2117.0,2117.0,2117.0,2115.0
mean,36054.565423,36054610000.0,3861.491261,17.710061,18.548843,17.82872,28.489041,73.327256,5.564431,9.608975,...,29.10085,2.946197,30.218706,80.810775,13.852952,26.282617,12.565848,40.807227,3.244969,16.804397
std,25.942063,25948420.0,2123.428421,7.493863,3.883314,3.703147,6.139204,5.139485,1.575517,1.698424,...,3.780349,0.876414,8.417267,3.041739,2.890194,6.254969,3.5498,4.616626,1.16834,7.275074
min,36005.0,36005000000.0,56.0,4.4,6.6,5.0,9.3,40.2,1.6,6.5,...,13.7,0.8,11.1,69.0,6.7,13.1,4.4,25.7,0.6,3.2
25%,36047.0,36047020000.0,2333.0,12.2,16.5,15.4,25.0,71.0,4.5,8.2,...,26.9,2.4,25.1,78.4,11.9,20.9,10.5,37.0,2.6,11.8
50%,36047.0,36047120000.0,3487.0,16.3,18.5,17.3,28.2,74.1,5.3,9.4,...,29.3,2.8,30.1,80.9,13.4,25.9,12.2,41.2,3.1,16.0
75%,36081.0,36081030000.0,4869.0,22.6,20.8,19.3,32.6,76.5,6.4,10.7,...,31.2,3.4,35.6,83.2,15.4,31.2,14.4,44.4,3.9,21.05
max,36085.0,36085030000.0,26588.0,40.7,49.1,32.9,72.3,91.9,19.5,15.9,...,50.9,14.3,63.7,88.3,28.6,45.5,37.6,55.3,20.7,67.9


In [3]:
tractcenters = pd.read_csv("ny_tracts.csv", dtype=str)
#tractcenters['CountyFIPS'] = tractcenters['STATEFP']+tractcenters['COUNTYFP']
tractcenters['TractFIPS'] = tractcenters['STATEFP']+tractcenters['COUNTYFP']+tractcenters['TRACTCE']
tractcenters = tractcenters.astype({'TractFIPS':'int'})
#tracts = tractcenters['TractFIPS'].to_numpy()
#tractcenters.drop(['STATEFP','COUNTYFP', 'TRACTCE'], inplace=True, axis=1)
#tractcenters.head()
nyc_health = pd.merge(nyc_health_df, tractcenters, on='TractFIPS')

In [36]:
lats = nyc_health.copy()['LATITUDE'].astype(float).to_numpy()
lons = nyc_health.copy()['LONGITUDE'].astype(float).to_numpy()
tractfips = nyc_health.copy()['TractFIPS'].to_numpy()

In [5]:
# Referenced:
# https://towardsdatascience.com/finding-time-dependent-travel-times-between-every-pair-of-locations-in-manhattan-c3c48b0db7ba
# https://towardsdatascience.com/shortest-path-algorithm-with-osm-walking-network-6d2863ae96be
# https://osmnx.readthedocs.io/en/stable/osmnx.html and https://github.com/gboeing/osmnx
# https://movement.uber.com/?lang=en-US

In [6]:
# Bounding box covering the greater NYC region
bbox = (41.015, 40.497, -73.452, -74.745)

In [7]:
# Get the graph and the speeds associated with all edges
graph = ox.graph_from_bbox(*bbox, network_type='drive')  # Could also use ox.graph_from_place('New York, New York')
#graph = ox.graph_from_place('New York, New York', network_type='drive')
graph = ox.add_edge_speeds(graph)
graph = ox.add_edge_travel_times(graph)
ox.save_graphml(graph, r'new_york_metro_area.graphml')
graph = ox.utils_graph.get_largest_component(graph, strongly=True)
ox.save_graphml(graph, r'new_york_metro_area_cleaned.graphml')
graph = ox.load_graphml(r'new_york_metro_area_cleaned.graphml')

In [8]:
# Find all food stores
food_tags = {'shop': 'supermarket', 'amenity': 'marketplace'}
food_places = ox.geometries_from_bbox(*bbox, food_tags)

In [9]:
# Replace Polygons with a single point
food_places.loc[food_places['geometry'].type == 'Polygon', 'geometry'] = food_places.loc[food_places['geometry'].type == 'Polygon', 'geometry'].representative_point()

In [10]:
# Find all major green places or recreational areas
physical_tags = {'leisure': 'park', 'leisure': 'recreation_ground', 'leisure': 'playground', 'leisure': 'fitness_station',
                'leisure': 'sports_centre', 'leisure': 'nature_reserve', 'leisure': 'pitch'}
physical_places = ox.geometries_from_bbox(*bbox, physical_tags)
physical_places.loc[physical_places['geometry'].type == 'Polygon', 'geometry'] = physical_places.loc[physical_places['geometry'].type == 'Polygon', 'geometry'].representative_point()

  for merged_outer_linestring in list(merged_outer_linestrings):
  for merged_outer_linestring in list(merged_outer_linestrings):


In [11]:
# Find public transit
transport_tags = {'public_transport': 'platform', 'public_transport': 'stop_position', 'highway': 'bus_stop', 'highway': 'platform',
                 'railway': 'subway_entrance', 'railway': 'station', 'railway': 'tram', 'railway': 'tram_stop', 'station': 'subway'}
transport_places = ox.geometries_from_bbox(*bbox, transport_tags)
transport_places.loc[transport_places['geometry'].type == 'Polygon', 'geometry'] = transport_places.loc[transport_places['geometry'].type == 'Polygon', 'geometry'].representative_point()

In [12]:
# Find libraries and schools
education_tags = {'amenity': 'library', 'amenity': 'school', 'amenity': 'kindergarten'}
education_places = ox.geometries_from_bbox(*bbox, transport_tags)
education_places.loc[education_places['geometry'].type == 'Polygon', 'geometry'] = education_places.loc[education_places['geometry'].type == 'Polygon', 'geometry'].representative_point()

In [13]:
# Find places of worship
worship_tags = {'amenity': 'place_of_worship'}
worship_places = ox.geometries_from_bbox(*bbox, transport_tags)
worship_places.loc[worship_places['geometry'].type == 'Polygon', 'geometry'] = worship_places.loc[worship_places['geometry'].type == 'Polygon', 'geometry'].representative_point()

In [14]:
# Simplify everything
food_places = food_places['geometry'].droplevel(0)
physical_places = physical_places['geometry'].droplevel(0)
transport_places = transport_places['geometry'].droplevel(0)
education_places = education_places['geometry'].droplevel(0)
worship_places = worship_places['geometry'].droplevel(0)
food_places = food_places[food_places.type == 'Point']
physical_places = physical_places[physical_places.type == 'Point']
transport_places = transport_places[transport_places.type == 'Point']
education_places = education_places[education_places.type == 'Point']
worship_places = worship_places[worship_places.type == 'Point']

In [15]:
speed_raw = pd.read_csv(r'2020_speeds_new_york.csv')
speed_raw.head()

Unnamed: 0,year,quarter,hour_of_day,segment_id,start_junction_id,end_junction_id,osm_way_id,osm_start_node_id,osm_end_node_id,speed_mph_mean,speed_mph_stddev,speed_mph_p50,speed_mph_p85
0,2020,1,22,ffd5b0eb2ebe47dc55977417d7b9de8a2453fbef,33022b4b089929847e9047cbf4d911149b15eb1b,61e031cf1bb3c398a0b67a985cf50d6f4b553d89,627639479.0,5924963000.0,5924963000.0,23.573,4.144,23.914,27.289
1,2020,1,4,ffd5b0eb2ebe47dc55977417d7b9de8a2453fbef,33022b4b089929847e9047cbf4d911149b15eb1b,61e031cf1bb3c398a0b67a985cf50d6f4b553d89,627639479.0,5924963000.0,5924963000.0,25.239,4.322,25.24,29.869
2,2020,1,16,ffd5b0eb2ebe47dc55977417d7b9de8a2453fbef,33022b4b089929847e9047cbf4d911149b15eb1b,61e031cf1bb3c398a0b67a985cf50d6f4b553d89,627639479.0,5924963000.0,5924963000.0,24.005,3.759,24.012,27.575
3,2020,1,1,ffd5b0eb2ebe47dc55977417d7b9de8a2453fbef,33022b4b089929847e9047cbf4d911149b15eb1b,61e031cf1bb3c398a0b67a985cf50d6f4b553d89,627639479.0,5924963000.0,5924963000.0,23.531,3.605,24.045,26.941
4,2020,1,3,ffd5b0eb2ebe47dc55977417d7b9de8a2453fbef,33022b4b089929847e9047cbf4d911149b15eb1b,61e031cf1bb3c398a0b67a985cf50d6f4b553d89,627639479.0,5924963000.0,5924963000.0,24.126,3.927,24.966,27.389


In [16]:
# Remove unnecessary columns
speed_raw.drop(columns=['quarter', 'year', 'segment_id', 'start_junction_id', 'end_junction_id'], inplace=True)
# Assume people shop around 6pm after work?
speed_raw = speed_raw[speed_raw['hour_of_day'] == 18]
speed_raw.set_index('osm_way_id', drop=True, inplace=True)

In [17]:
#for edge in tqdm(graph.edges):
    # For some reason the travel times get loaded in as strings
#    e = graph[edge[0]][edge[1]][edge[2]]
#    print(e)
#    graph[edge[0]][edge[1]][edge[2]]['actual_travel_time'] = float(e['actual_travel_time'])
#Actual travel time should be saved in the graph now
#real_calculation = 0
#for edge in tqdm(graph.edges):
    # length is meters, speed_kph is kph (duh), maxspeed has units in string, and travel_time is seconds
#    e = graph[edge[0]][edge[1]][edge[2]]
#    if isinstance(e['osmid'], list):
        # Some graph edges are made up of multiple OSM ways apparently
#        for osmid in e['osmid']:
#            try:
#                meters_per_second = speed_raw.at[osmid, 'speed_mph_mean']*0.44704    # Convert to meters/sec
#                time = e['length']/meters_per_second
#                real_calculation += 1
#                break
#            except (KeyError, ZeroDivisionError):
#                time = e['travel_time']     # Backup (i.e. length/speed limit) if Uber data isn't available
#    else:
#        try:
#            meters_per_second = speed_raw.at[e['osmid'], 'speed_mph_mean']*0.44704
#            time = e['length']/meters_per_second
#            real_calculation += 1
#        except (KeyError, ZeroDivisionError):
#            time = e['travel_time']
#    if isinstance(time, pd.Series):
#        # TODO Bug check why this is happening, but not late at night
#        time = time.mean()
#    graph[edge[0]][edge[1]][edge[2]]['actual_travel_time'] = time

In [18]:
# The bounding box is around 60x30 miles, so 250 per side gives ~1 point per quarter mile on the longer side
#lats = np.linspace(bbox[1], bbox[0], num=250, dtype=np.float32)
#lons = np.linspace(bbox[3], bbox[2], num=lats.shape[0], dtype=np.float32)

In [25]:
all_nearest_nodes, dists = ox.distance.nearest_nodes(graph, lons, lats, return_dist=True)
print('Number farther than one kilometer:', np.count_nonzero(np.asarray(dists) < 1609), 'Number overall:', len(all_nearest_nodes))
#If it's more than a kilometer from an actual point then we're probably in the water/somewhere no one lives
#all_nearest_nodes = np.asarray(all_nearest_nodes)[np.asarray(dists) < 1609]
np.save(r'nodes.npy', all_nearest_nodes)
all_nearest_nodes = np.load(r'nodes.npy')
food_closest_nodes = np.full(all_nearest_nodes.shape[0], -1, dtype=np.int64)
food_closest_travel_times = np.full(all_nearest_nodes.shape[0], np.nan, dtype=np.float32)
physical_closest_nodes = food_closest_nodes.copy()
physical_closest_travel_times = food_closest_travel_times.copy()
transport_closest_nodes = food_closest_nodes.copy()
transport_closest_travel_times = food_closest_travel_times.copy()
education_closest_nodes = food_closest_nodes.copy()
education_closest_travel_times = food_closest_travel_times.copy()
worship_closest_nodes = food_closest_nodes.copy()
worship_closest_travel_times = food_closest_travel_times.copy()

Number farther than one kilometer: 2117 Number overall: 2117


In [26]:
max_dist = 160900000000
food_nodes, dists = ox.distance.nearest_nodes(graph, [x.x for x in food_places], [x.y for x in food_places], return_dist=True)
print('Found food', 'Number within 1 mile:', np.count_nonzero(np.asarray(dists) < max_dist), 'Number total:', len(food_nodes))
food_nodes = np.asarray(food_nodes)[np.asarray(dists) < max_dist]
physical_nodes, dists = ox.distance.nearest_nodes(graph, [x.x for x in physical_places], [x.y for x in physical_places], return_dist=True)
print('Found physical', 'Number within 1 mile:', np.count_nonzero(np.asarray(dists) < max_dist), 'Number total:', len(physical_nodes))
physical_nodes = np.asarray(physical_nodes)[np.asarray(dists) < max_dist]
transport_nodes, dists = ox.distance.nearest_nodes(graph, [x.x for x in transport_places], [x.y for x in transport_places], return_dist=True)
print('Found transport', 'Number within 1mi:', np.count_nonzero(np.asarray(dists) < max_dist), 'Number total:', len(transport_nodes))
transport_nodes = np.asarray(transport_nodes)[np.asarray(dists) < max_dist]
education_nodes, dists = ox.distance.nearest_nodes(graph, [x.x for x in education_places], [x.y for x in education_places], return_dist=True)
print('Found education', 'Number within 1mi:', np.count_nonzero(np.asarray(dists) < max_dist), 'Number total:', len(education_nodes))
education_nodes = np.asarray(education_nodes)[np.asarray(dists) < max_dist]
worship_nodes, dists = ox.distance.nearest_nodes(graph, [x.x for x in worship_places], [x.y for x in worship_places], return_dist=True)
print('Found worship', 'Number within 1mi:', np.count_nonzero(np.asarray(dists) < max_dist), 'Number total:', len(worship_nodes))
worship_nodes = np.asarray(worship_nodes)[np.asarray(dists) < max_dist]
np.save(r'food_nodes.npy', food_nodes)
np.save(r'physical_nodes.npy', physical_nodes)
np.save(r'transport_nodes.npy', transport_nodes)
np.save(r'education_nodes.npy', education_nodes)
np.save(r'worship_nodes.npy', worship_nodes)

Found food Number within 1 mile: 1411 Number total: 1411
Found physical Number within 1 mile: 12981 Number total: 12981
Found transport Number within 1mi: 4421 Number total: 4421
Found education Number within 1mi: 4421 Number total: 4421
Found worship Number within 1mi: 4421 Number total: 4421


In [27]:
#food_nodes = np.load(r'C:\Users\willd\Documents\Georgia Tech\CSE6424\Project\food_nodes.npy')
#physical_nodes = np.load(r'C:\Users\willd\Documents\Georgia Tech\CSE6424\Project\physical_nodes.npy')
#transport_nodes = np.load(r'C:\Users\willd\Documents\Georgia Tech\CSE6424\Project\transport_nodes.npy')
#education_nodes = np.load(r'C:\Users\willd\Documents\Georgia Tech\CSE6424\Project\education_nodes.npy')
#worship_nodes = np.load(r'C:\Users\willd\Documents\Georgia Tech\CSE6424\Project\food_nodes.npy')

In [28]:
food_lat_lons = np.array([[graph.nodes[x]['x'] for x in food_nodes], [graph.nodes[x]['y'] for x in food_nodes]], dtype=np.float32).T
physical_lat_lons = np.array([[graph.nodes[x]['x'] for x in physical_nodes], [graph.nodes[x]['y'] for x in physical_nodes]], dtype=np.float32).T
transport_lat_lons = np.array([[graph.nodes[x]['x'] for x in transport_nodes], [graph.nodes[x]['y'] for x in transport_nodes]], dtype=np.float32).T
education_lat_lons = np.array([[graph.nodes[x]['x'] for x in education_nodes], [graph.nodes[x]['y'] for x in education_nodes]], dtype=np.float32).T
worship_lat_lons = np.array([[graph.nodes[x]['x'] for x in worship_nodes], [graph.nodes[x]['y'] for x in worship_nodes]], dtype=np.float32).T

In [29]:
food_tree = KDTree(food_lat_lons)
physical_tree = KDTree(physical_lat_lons)
transport_tree = KDTree(transport_lat_lons)
education_tree = KDTree(education_lat_lons)
worship_tree = KDTree(worship_lat_lons)

In [30]:
closest_food_nodes = []
closest_physical_nodes = []
closest_transport_nodes = []
closest_education_nodes = []
closest_worship_nodes = []
for sample_node in tqdm(all_nearest_nodes):
    idxes = food_tree.query((graph.nodes[sample_node]['x'], graph.nodes[sample_node]['y']), k=5)[1]
    closest_food_nodes.append(food_nodes[idxes])
    idxes = physical_tree.query((graph.nodes[sample_node]['x'], graph.nodes[sample_node]['y']), k=5)[1]
    closest_physical_nodes.append(physical_nodes[idxes])
    idxes = transport_tree.query((graph.nodes[sample_node]['x'], graph.nodes[sample_node]['y']), k=5)[1]
    closest_transport_nodes.append(transport_nodes[idxes])
    idxes = education_tree.query((graph.nodes[sample_node]['x'], graph.nodes[sample_node]['y']), k=5)[1]
    closest_education_nodes.append(education_nodes[idxes])
    idxes = worship_tree.query((graph.nodes[sample_node]['x'], graph.nodes[sample_node]['y']), k=5)[1]
    closest_worship_nodes.append(worship_nodes[idxes])

100%|█████████████████████████████████████| 2117/2117 [00:00<00:00, 7786.46it/s]


In [31]:
@cache    # Trying to speed things up a little
def shortest_path(source, target):
    return nx.shortest_path_length(graph, source=source, target=target, weight='actual_travel_time')

In [32]:
for i, sample_node in enumerate(tqdm(all_nearest_nodes)):
    current_shortest = np.inf
    shortest_node = 0
    for food_node in closest_food_nodes[i]:
        #food_node = ox.distance.nearest_nodes(graph, food_places.iat[k].x, food_places.iat[k].y)
        shortest = shortest_path(sample_node, food_node)
        if shortest < current_shortest:
            current_shortest = shortest
            shortest_node = food_node
    food_closest_nodes[i] = shortest_node
    food_closest_travel_times[i] = current_shortest
    
    current_shortest = np.inf
    shortest_node = 0
    for physical_node in closest_physical_nodes[i]:
        shortest = shortest_path(sample_node, physical_node)
        if shortest < current_shortest:
            current_shortest = shortest
            shortest_node = physical_node
    physical_closest_nodes[i] = shortest_node
    physical_closest_travel_times[i] = current_shortest
    
    current_shortest = np.inf
    shortest_node = 0
    for transport_node in closest_transport_nodes[i]:
        shortest = shortest_path(sample_node, transport_node)
        if shortest < current_shortest:
            current_shortest = shortest
            shortest_node = transport_node
    transport_closest_nodes[i] = shortest_node
    transport_closest_travel_times[i] = current_shortest
    
    current_shortest = np.inf
    shortest_node = 0
    for education_node in closest_education_nodes[i]:
        shortest = shortest_path(sample_node, education_node)
        if shortest < current_shortest:
            current_shortest = shortest
            shortest_node = education_node
    education_closest_nodes[i] = shortest_node
    education_closest_travel_times[i] = current_shortest
    
    current_shortest = np.inf
    shortest_node = 0
    for worship_node in closest_worship_nodes[i]:
        shortest = shortest_path(sample_node, worship_node)
        if shortest < current_shortest:
            current_shortest = shortest
            shortest_node = worship_node
    worship_closest_nodes[i] = shortest_node
    worship_closest_travel_times[i] = current_shortest

100%|██████████████████████████████████████| 2117/2117 [00:17<00:00, 122.00it/s]


In [37]:
with h5py.File('nearest_EM.hdf5', 'w') as h5:
    h5.create_dataset('nodes', data=all_nearest_nodes)
    h5.create_dataset('nodes_x', data=np.array([graph.nodes[x]['x'] for x in all_nearest_nodes], dtype=np.float32))
    h5.create_dataset('nodes_y', data=np.array([graph.nodes[x]['y'] for x in all_nearest_nodes], dtype=np.float32))
    h5.create_dataset('closest_food_nodes', data=food_closest_nodes)
    h5.create_dataset('closest_food_nodes_travel_time', data=food_closest_travel_times)
    h5.create_dataset('closest_food_nodes_x', data=np.array([graph.nodes[x]['x'] for x in food_closest_nodes], dtype=np.float32))
    h5.create_dataset('closest_food_nodes_y', data=np.array([graph.nodes[x]['y'] for x in food_closest_nodes], dtype=np.float32))
    h5.create_dataset('closest_physical_nodes', data=physical_closest_nodes)
    h5.create_dataset('closest_physical_nodes_travel_time', data=physical_closest_travel_times)
    h5.create_dataset('closest_physical_nodes_x', data=np.array([graph.nodes[x]['x'] for x in physical_closest_nodes], dtype=np.float32))
    h5.create_dataset('closest_physical_nodes_y', data=np.array([graph.nodes[x]['y'] for x in physical_closest_nodes], dtype=np.float32))
    h5.create_dataset('closest_transport_nodes', data=transport_closest_nodes)
    h5.create_dataset('closest_transport_nodes_travel_time', data=transport_closest_travel_times)
    h5.create_dataset('closest_transport_nodes_x', data=np.array([graph.nodes[x]['x'] for x in transport_closest_nodes], dtype=np.float32))
    h5.create_dataset('closest_transport_nodes_y', data=np.array([graph.nodes[x]['y'] for x in transport_closest_nodes], dtype=np.float32))
    h5.create_dataset('closest_education_nodes', data=education_closest_nodes)
    h5.create_dataset('closest_education_nodes_travel_time', data=education_closest_travel_times)
    h5.create_dataset('closest_education_nodes_x', data=np.array([graph.nodes[x]['x'] for x in education_closest_nodes], dtype=np.float32))
    h5.create_dataset('closest_education_nodes_y', data=np.array([graph.nodes[x]['y'] for x in education_closest_nodes], dtype=np.float32))
    h5.create_dataset('closest_worship_nodes', data=worship_closest_nodes)
    h5.create_dataset('closest_worship_nodes_travel_time', data=worship_closest_travel_times)
    h5.create_dataset('closest_worship_nodes_x', data=np.array([graph.nodes[x]['x'] for x in worship_closest_nodes], dtype=np.float32))
    h5.create_dataset('closest_worship_nodes_y', data=np.array([graph.nodes[x]['y'] for x in worship_closest_nodes], dtype=np.float32))
    h5.create_dataset('TractFIPS', data=tractfips)

In [2]:
d = {}
l = []
cols = []
with h5py.File('nearest_EM.hdf5', 'r') as h5:
    for k in h5.keys():
        d[k] = h5[k][:]
        l.append(h5[k][:])
        cols.append(k)

In [39]:
desert_df = pd.DataFrame(np.asarray(l).T, columns = cols)
desert_df.head()

Unnamed: 0,TractFIPS,closest_education_nodes,closest_education_nodes_travel_time,closest_education_nodes_x,closest_education_nodes_y,closest_food_nodes,closest_food_nodes_travel_time,closest_food_nodes_x,closest_food_nodes_y,closest_physical_nodes,...,closest_transport_nodes_travel_time,closest_transport_nodes_x,closest_transport_nodes_y,closest_worship_nodes,closest_worship_nodes_travel_time,closest_worship_nodes_x,closest_worship_nodes_y,nodes,nodes_x,nodes_y
0,36047010000.0,42521189.0,4.0,-74.008987,40.649776,42465444.0,3.0,-74.006821,40.648464,42477073.0,...,4.0,-74.008987,40.649776,42521189.0,4.0,-74.008987,40.649776,42477077.0,-74.005783,40.646015
1,36047010000.0,42486091.0,7.0,-74.010742,40.634357,42490251.0,3.0,-74.00721,40.637756,42460936.0,...,7.0,-74.010742,40.634357,42486091.0,7.0,-74.010742,40.634357,42473465.0,-74.005005,40.636425
2,36047050000.0,42485802.0,9.0,-73.963348,40.635517,42514420.0,7.0,-73.974998,40.629978,42501950.0,...,9.0,-73.963348,40.635517,42485802.0,9.0,-73.963348,40.635517,42517167.0,-73.968407,40.631508
3,36005030000.0,470207824.0,4.0,-73.846474,40.85804,42734727.0,11.0,-73.83519,40.862591,42744568.0,...,4.0,-73.846474,40.85804,470207824.0,4.0,-73.846474,40.85804,42745688.0,-73.845444,40.860939
4,36005040000.0,42739327.0,5.0,-73.872543,40.878746,42731099.0,8.0,-73.879456,40.874977,42757892.0,...,5.0,-73.872543,40.878746,42739327.0,5.0,-73.872543,40.878746,42757892.0,-73.876549,40.876602


In [42]:
nyc_health_deserts = pd.merge(nyc_health, desert_df, on='TractFIPS')
nyc_health_deserts.to_csv('nychealth.csv', index=False)