In [1]:
import h5py
import osmnx as ox
import pandas as pd
import numpy as np
import networkx as nx
from tqdm import tqdm
from itertools import product
from functools import cache
from scipy.spatial import KDTree

In [2]:
# Referenced:
# https://towardsdatascience.com/finding-time-dependent-travel-times-between-every-pair-of-locations-in-manhattan-c3c48b0db7ba
# https://towardsdatascience.com/shortest-path-algorithm-with-osm-walking-network-6d2863ae96be
# https://osmnx.readthedocs.io/en/stable/osmnx.html and https://github.com/gboeing/osmnx
# https://movement.uber.com/?lang=en-US

In [3]:
# Bounding box covering the greater NYC region
bbox = (41.015, 40.497, -73.452, -74.745)

In [4]:
# Get the graph and the speeds associated with all edges
#graph = ox.graph_from_bbox(*bbox, network_type='drive')  # Could also use ox.graph_from_place('New York, New York')
#graph = ox.add_edge_speeds(graph)
#graph = ox.add_edge_travel_times(graph)
#ox.save_graphml(graph, r'C:\Users\willd\Documents\Georgia Tech\CSE6424\Project\new_york_metro_area.graphml')
#graph = ox.utils_graph.get_largest_component(graph, strongly=True)
#ox.save_graphml(graph, r'C:\Users\willd\Documents\Georgia Tech\CSE6424\Project\new_york_metro_area_cleaned.graphml')
graph = ox.load_graphml(r'C:\Users\willd\Documents\Georgia Tech\CSE6424\Project\new_york_metro_area_cleaned.graphml')

In [5]:
# Find all food stores
food_tags = {'shop': 'supermarket', 'amenity': 'marketplace'}
food_places = ox.geometries_from_bbox(*bbox, food_tags)

In [6]:
# Replace Polygons with a single point
food_places.loc[food_places['geometry'].type == 'Polygon', 'geometry'] = food_places.loc[food_places['geometry'].type == 'Polygon', 'geometry'].representative_point()

In [7]:
# Find all major green places or recreational areas
physical_tags = {'leisure': 'park', 'leisure': 'recreation_ground', 'leisure': 'playground', 'leisure': 'fitness_station',
                'leisure': 'sports_centre', 'leisure': 'nature_reserve', 'leisure': 'pitch'}
physical_places = ox.geometries_from_bbox(*bbox, physical_tags)
physical_places.loc[physical_places['geometry'].type == 'Polygon', 'geometry'] = physical_places.loc[physical_places['geometry'].type == 'Polygon', 'geometry'].representative_point()

  for merged_outer_linestring in list(merged_outer_linestrings):
  for merged_outer_linestring in list(merged_outer_linestrings):


In [8]:
# Find public transit
transport_tags = {'public_transport': 'platform', 'public_transport': 'stop_position', 'highway': 'bus_stop', 'highway': 'platform',
                 'railway': 'subway_entrance', 'railway': 'station', 'railway': 'tram', 'railway': 'tram_stop', 'station': 'subway'}
transport_places = ox.geometries_from_bbox(*bbox, transport_tags)
transport_places.loc[transport_places['geometry'].type == 'Polygon', 'geometry'] = transport_places.loc[transport_places['geometry'].type == 'Polygon', 'geometry'].representative_point()

In [9]:
# Find libraries and schools
education_tags = {'amenity': 'library', 'amenity': 'school', 'amenity': 'kindergarten'}
education_places = ox.geometries_from_bbox(*bbox, transport_tags)
education_places.loc[education_places['geometry'].type == 'Polygon', 'geometry'] = education_places.loc[education_places['geometry'].type == 'Polygon', 'geometry'].representative_point()

In [10]:
# Find places of worship
worship_tags = {'amenity': 'place_of_worship'}
worship_places = ox.geometries_from_bbox(*bbox, transport_tags)
worship_places.loc[worship_places['geometry'].type == 'Polygon', 'geometry'] = worship_places.loc[worship_places['geometry'].type == 'Polygon', 'geometry'].representative_point()

In [11]:
# Simplify everything
food_places = food_places['geometry'].droplevel(0)
physical_places = physical_places['geometry'].droplevel(0)
transport_places = transport_places['geometry'].droplevel(0)
education_places = education_places['geometry'].droplevel(0)
worship_places = worship_places['geometry'].droplevel(0)
food_places = food_places[food_places.type == 'Point']
physical_places = physical_places[physical_places.type == 'Point']
transport_places = transport_places[transport_places.type == 'Point']
education_places = education_places[education_places.type == 'Point']
worship_places = worship_places[worship_places.type == 'Point']

In [12]:
#speed_raw = pd.read_csv(r'C:\Users\willd\Documents\Georgia Tech\CSE6424\Project\2020_speeds_new_york.csv')

In [13]:
# Remove unnecessary columns
#speed_raw.drop(columns=['quarter', 'year', 'segment_id', 'start_junction_id', 'end_junction_id'], inplace=True)
# Assume people shop around 6pm after work?
#speed_raw = speed_raw[speed_raw['hour_of_day'] == 18]
#speed_raw.set_index('osm_way_id', drop=True, inplace=True)

In [14]:
for edge in tqdm(graph.edges):
    # For some reason the travel times get loaded in as strings
    e = graph[edge[0]][edge[1]][edge[2]]
    graph[edge[0]][edge[1]][edge[2]]['actual_travel_time'] = float(e['actual_travel_time'])
# Actual travel time should be saved in the graph now
#real_calculation = 0
#for edge in tqdm(graph.edges):
#    # length is meters, speed_kph is kph (duh), maxspeed has units in string, and travel_time is seconds
#    e = graph[edge[0]][edge[1]][edge[2]]
#    if isinstance(e['osmid'], list):
#        # Some graph edges are made up of multiple OSM ways apparently
#        for osmid in e['osmid']:
#            try:
#                meters_per_second = speed_raw.at[osmid, 'speed_mph_mean']*0.44704    # Convert to meters/sec
#                time = e['length']/meters_per_second
#                real_calculation += 1
#                break
#            except (KeyError, ZeroDivisionError):
#                time = e['travel_time']     # Backup (i.e. length/speed limit) if Uber data isn't available
#    else:
#        try:
#            meters_per_second = speed_raw.at[e['osmid'], 'speed_mph_mean']*0.44704
#            time = e['length']/meters_per_second
#            real_calculation += 1
#        except (KeyError, ZeroDivisionError):
#            time = e['travel_time']
#    if isinstance(time, pd.Series):
#        # TODO Bug check why this is happening, but not late at night
#        time = time.mean()
#    graph[edge[0]][edge[1]][edge[2]]['actual_travel_time'] = time

100%|█████████████████████████████████████████████████████████████████████| 1904630/1904630 [01:14<00:00, 25621.39it/s]


In [16]:
# The bounding box is around 60x30 miles, so 250 per side gives ~1 point per quarter mile on the longer side
lats = np.linspace(bbox[1], bbox[0], num=250, dtype=np.float32)
lons = np.linspace(bbox[3], bbox[2], num=lats.shape[0], dtype=np.float32)

In [17]:
#all_nearest_nodes, dists = ox.distance.nearest_nodes(graph, np.repeat(lons, lats.shape[0]), np.tile(lats, lats.shape[0]), return_dist=True)
#print('Number farther than one kilometer:', np.count_nonzero(np.asarray(dists) < 1000), 'Number overall:', len(all_nearest_nodes))
# If it's more than a kilometer from an actual point then we're probably in the water/somewhere no one lives
#all_nearest_nodes = np.asarray(all_nearest_nodes)[np.asarray(dists) < 1000]
#np.save(r'C:\Users\willd\Documents\Georgia Tech\CSE6424\Project\nodes.npy', all_nearest_nodes)
all_nearest_nodes = np.load(r'C:\Users\willd\Documents\Georgia Tech\CSE6424\Project\nodes.npy')
food_closest_nodes = np.full(all_nearest_nodes.shape[0], -1, dtype=np.int64)
food_closest_travel_times = np.full(all_nearest_nodes.shape[0], np.nan, dtype=np.float32)
physical_closest_nodes = food_closest_nodes.copy()
physical_closest_travel_times = food_closest_travel_times.copy()
transport_closest_nodes = food_closest_nodes.copy()
transport_closest_travel_times = food_closest_travel_times.copy()
education_closest_nodes = food_closest_nodes.copy()
education_closest_travel_times = food_closest_travel_times.copy()
worship_closest_nodes = food_closest_nodes.copy()
worship_closest_travel_times = food_closest_travel_times.copy()

Number farther than one kilometer: 54601 Number overall: 62500


In [19]:
#food_nodes, dists = ox.distance.nearest_nodes(graph, [x.x for x in food_places], [x.y for x in food_places], return_dist=True)
#print('Found food', 'Number within 1km:', np.count_nonzero(np.asarray(dists) < 1000), 'Number total:', len(food_nodes))
#food_nodes = np.asarray(food_nodes)[np.asarray(dists) < 1000]
#physical_nodes, dists = ox.distance.nearest_nodes(graph, [x.x for x in physical_places], [x.y for x in physical_places], return_dist=True)
#print('Found physical', 'Number within 1km:', np.count_nonzero(np.asarray(dists) < 1000), 'Number total:', len(physical_nodes))
#physical_nodes = np.asarray(physical_nodes)[np.asarray(dists) < 1000]
#transport_nodes, dists = ox.distance.nearest_nodes(graph, [x.x for x in transport_places], [x.y for x in transport_places], return_dist=True)
#print('Found transport', 'Number within 1km:', np.count_nonzero(np.asarray(dists) < 1000), 'Number total:', len(transport_nodes))
#transport_nodes = np.asarray(transport_nodes)[np.asarray(dists) < 1000]
#education_nodes, dists = ox.distance.nearest_nodes(graph, [x.x for x in education_places], [x.y for x in education_places], return_dist=True)
#print('Found education', 'Number within 1km:', np.count_nonzero(np.asarray(dists) < 1000), 'Number total:', len(education_nodes))
#education_nodes = np.asarray(education_nodes)[np.asarray(dists) < 1000]
#worship_nodes, dists = ox.distance.nearest_nodes(graph, [x.x for x in worship_places], [x.y for x in worship_places], return_dist=True)
#print('Found worship', 'Number within 1km:', np.count_nonzero(np.asarray(dists) < 1000), 'Number total:', len(worship_nodes))
#worship_nodes = np.asarray(worship_nodes)[np.asarray(dists) < 1000]
#np.save(r'C:\Users\willd\Documents\Georgia Tech\CSE6424\Project\food_nodes.npy', food_nodes)
#np.save(r'C:\Users\willd\Documents\Georgia Tech\CSE6424\Project\physical_nodes.npy', physical_nodes)
#np.save(r'C:\Users\willd\Documents\Georgia Tech\CSE6424\Project\transport_nodes.npy', transport_nodes)
#np.save(r'C:\Users\willd\Documents\Georgia Tech\CSE6424\Project\education_nodes.npy', education_nodes)
#np.save(r'C:\Users\willd\Documents\Georgia Tech\CSE6424\Project\worship_nodes.npy', worship_nodes)

In [20]:
food_nodes = np.load(r'C:\Users\willd\Documents\Georgia Tech\CSE6424\Project\food_nodes.npy')
physical_nodes = np.load(r'C:\Users\willd\Documents\Georgia Tech\CSE6424\Project\physical_nodes.npy')
transport_nodes = np.load(r'C:\Users\willd\Documents\Georgia Tech\CSE6424\Project\transport_nodes.npy')
education_nodes = np.load(r'C:\Users\willd\Documents\Georgia Tech\CSE6424\Project\education_nodes.npy')
worship_nodes = np.load(r'C:\Users\willd\Documents\Georgia Tech\CSE6424\Project\food_nodes.npy')

In [21]:
food_lat_lons = np.array([[graph.nodes[x]['x'] for x in food_nodes], [graph.nodes[x]['y'] for x in food_nodes]], dtype=np.float32).T
physical_lat_lons = np.array([[graph.nodes[x]['x'] for x in physical_nodes], [graph.nodes[x]['y'] for x in physical_nodes]], dtype=np.float32).T
transport_lat_lons = np.array([[graph.nodes[x]['x'] for x in transport_nodes], [graph.nodes[x]['y'] for x in transport_nodes]], dtype=np.float32).T
education_lat_lons = np.array([[graph.nodes[x]['x'] for x in education_nodes], [graph.nodes[x]['y'] for x in education_nodes]], dtype=np.float32).T
worship_lat_lons = np.array([[graph.nodes[x]['x'] for x in worship_nodes], [graph.nodes[x]['y'] for x in worship_nodes]], dtype=np.float32).T

In [22]:
food_tree = KDTree(food_lat_lons)
physical_tree = KDTree(physical_lat_lons)
transport_tree = KDTree(transport_lat_lons)
education_tree = KDTree(education_lat_lons)
worship_tree = KDTree(worship_lat_lons)

In [23]:
closest_food_nodes = []
closest_physical_nodes = []
closest_transport_nodes = []
closest_education_nodes = []
closest_worship_nodes = []
for sample_node in tqdm(all_nearest_nodes):
    idxes = food_tree.query((graph.nodes[sample_node]['x'], graph.nodes[sample_node]['y']), k=5)[1]
    closest_food_nodes.append(food_nodes[idxes])
    idxes = physical_tree.query((graph.nodes[sample_node]['x'], graph.nodes[sample_node]['y']), k=5)[1]
    closest_physical_nodes.append(physical_nodes[idxes])
    idxes = transport_tree.query((graph.nodes[sample_node]['x'], graph.nodes[sample_node]['y']), k=5)[1]
    closest_transport_nodes.append(transport_nodes[idxes])
    idxes = education_tree.query((graph.nodes[sample_node]['x'], graph.nodes[sample_node]['y']), k=5)[1]
    closest_education_nodes.append(education_nodes[idxes])
    idxes = worship_tree.query((graph.nodes[sample_node]['x'], graph.nodes[sample_node]['y']), k=5)[1]
    closest_worship_nodes.append(worship_nodes[idxes])

100%|██████████████████████████████████████████████████████████████████████████| 54601/54601 [00:13<00:00, 4179.34it/s]


In [24]:
@cache    # Trying to speed things up a little
def shortest_path(source, target):
    return nx.shortest_path_length(graph, source=source, target=target, weight='actual_travel_time')

In [25]:
for i, sample_node in enumerate(tqdm(all_nearest_nodes)):
    current_shortest = np.inf
    shortest_node = 0
    for food_node in closest_food_nodes[i]:
        #food_node = ox.distance.nearest_nodes(graph, food_places.iat[k].x, food_places.iat[k].y)
        shortest = shortest_path(sample_node, food_node)
        if shortest < current_shortest:
            current_shortest = shortest
            shortest_node = food_node
    food_closest_nodes[i] = shortest_node
    food_closest_travel_times[i] = current_shortest
    
    current_shortest = np.inf
    shortest_node = 0
    for physical_node in closest_physical_nodes[i]:
        shortest = shortest_path(sample_node, physical_node)
        if shortest < current_shortest:
            current_shortest = shortest
            shortest_node = physical_node
    physical_closest_nodes[i] = shortest_node
    physical_closest_travel_times[i] = current_shortest
    
    current_shortest = np.inf
    shortest_node = 0
    for transport_node in closest_transport_nodes[i]:
        shortest = shortest_path(sample_node, transport_node)
        if shortest < current_shortest:
            current_shortest = shortest
            shortest_node = transport_node
    transport_closest_nodes[i] = shortest_node
    transport_closest_travel_times[i] = current_shortest
    
    current_shortest = np.inf
    shortest_node = 0
    for education_node in closest_education_nodes[i]:
        shortest = shortest_path(sample_node, education_node)
        if shortest < current_shortest:
            current_shortest = shortest
            shortest_node = education_node
    education_closest_nodes[i] = shortest_node
    education_closest_travel_times[i] = current_shortest
    
    current_shortest = np.inf
    shortest_node = 0
    for worship_node in closest_worship_nodes[i]:
        shortest = shortest_path(sample_node, worship_node)
        if shortest < current_shortest:
            current_shortest = shortest
            shortest_node = worship_node
    worship_closest_nodes[i] = shortest_node
    worship_closest_travel_times[i] = current_shortest

100%|██████████████████████████████████████████████████████████████████████████| 54601/54601 [4:14:06<00:00,  3.58it/s]


In [26]:
with h5py.File(r'C:\Users\willd\Documents\Georgia Tech\CSE6424\Project\nearest.hdf5', 'w') as h5:
    h5.create_dataset('nodes', data=all_nearest_nodes)
    h5.create_dataset('nodes_x', data=np.array([graph.nodes[x]['x'] for x in all_nearest_nodes], dtype=np.float32))
    h5.create_dataset('nodes_y', data=np.array([graph.nodes[x]['y'] for x in all_nearest_nodes], dtype=np.float32))
    h5.create_dataset('closest_food_nodes', data=food_closest_nodes)
    h5.create_dataset('closest_food_nodes_travel_time', data=food_closest_travel_times)
    h5.create_dataset('closest_food_nodes_x', data=np.array([graph.nodes[x]['x'] for x in food_closest_nodes], dtype=np.float32))
    h5.create_dataset('closest_food_nodes_y', data=np.array([graph.nodes[x]['y'] for x in food_closest_nodes], dtype=np.float32))
    h5.create_dataset('closest_physical_nodes', data=physical_closest_nodes)
    h5.create_dataset('closest_physical_nodes_travel_time', data=physical_closest_travel_times)
    h5.create_dataset('closest_physical_nodes_x', data=np.array([graph.nodes[x]['x'] for x in physical_closest_nodes], dtype=np.float32))
    h5.create_dataset('closest_physical_nodes_y', data=np.array([graph.nodes[x]['y'] for x in physical_closest_nodes], dtype=np.float32))
    h5.create_dataset('closest_transport_nodes', data=transport_closest_nodes)
    h5.create_dataset('closest_transport_nodes_travel_time', data=transport_closest_travel_times)
    h5.create_dataset('closest_transport_nodes_x', data=np.array([graph.nodes[x]['x'] for x in transport_closest_nodes], dtype=np.float32))
    h5.create_dataset('closest_transport_nodes_y', data=np.array([graph.nodes[x]['y'] for x in transport_closest_nodes], dtype=np.float32))
    h5.create_dataset('closest_education_nodes', data=education_closest_nodes)
    h5.create_dataset('closest_education_nodes_travel_time', data=education_closest_travel_times)
    h5.create_dataset('closest_education_nodes_x', data=np.array([graph.nodes[x]['x'] for x in education_closest_nodes], dtype=np.float32))
    h5.create_dataset('closest_education_nodes_y', data=np.array([graph.nodes[x]['y'] for x in education_closest_nodes], dtype=np.float32))
    h5.create_dataset('closest_worship_nodes', data=worship_closest_nodes)
    h5.create_dataset('closest_worship_nodes_travel_time', data=worship_closest_travel_times)
    h5.create_dataset('closest_worship_nodes_x', data=np.array([graph.nodes[x]['x'] for x in worship_closest_nodes], dtype=np.float32))
    h5.create_dataset('closest_worship_nodes_y', data=np.array([graph.nodes[x]['y'] for x in worship_closest_nodes], dtype=np.float32))