In [None]:
import h5py
import osmnx as ox
import pandas as pd
import numpy as np
import networkx as nx
from tqdm import tqdm
from itertools import product
from functools import cache
from scipy.spatial import KDTree
from haversine import haversine
from shapely.geometry import Point

In [None]:
# Referenced:
# https://towardsdatascience.com/finding-time-dependent-travel-times-between-every-pair-of-locations-in-manhattan-c3c48b0db7ba
# https://towardsdatascience.com/shortest-path-algorithm-with-osm-walking-network-6d2863ae96be
# https://osmnx.readthedocs.io/en/stable/osmnx.html and https://github.com/gboeing/osmnx
# https://movement.uber.com/?lang=en-US

# Set City and File Variables:

In [None]:
####################################### SET VARS HERE ###########################################################

#Keep one line of the code below uncommented to find desert measures for the city of interest

place = 'Cincinnati, Ohio'
#place = 'Atlanta, Georgia'
#place = 'Seattle, Washington'
#place = 'Houston, Texas'
#place = 'San Francisco, California'
#place = 'New York, New York'

# place_abbr is used to name files saved by this notebook. The commented code below shows the place_abbr strings used
# to name files in the /data folder

place_abbr = 'cinci'
#place_abbr = 'seattle'
#place_abbr = 'nyc'
#place_abbr = 'houston'
#place_abbr = 'sf'
#place_abbr = 'atlanta'

#set file path for city tract centers 
tracts_path = 'data/tract_centers/tractcenters_cinci.csv'

# set file path for Uber Movement Data
# if speed file is not available for city of interest, input path to any other speed file 
speed_path = 'data/2020_speeds_cincinnati.csv' # download this file from Uber Movement yourself

# set name and directory of output file
savename = 'data/tract_desert_measures/' + place_abbr + '_desert_tracts.csv'

# Find Desert Measures

In [None]:
tractcenters = pd.read_csv(tracts_path)
lats = tractcenters['INTPTLAT'].to_numpy(copy=True)
lons = tractcenters['INTPTLONG'].to_numpy(copy=True)
tracts = tractcenters['GEOID'].to_numpy(copy=True)

In [None]:
# Get the graph and the speeds associated with all edges
graph = ox.graph_from_place(place, network_type='drive')
print('Got graph')
graph = ox.add_edge_speeds(graph)
print('Got speeds')
graph = ox.add_edge_travel_times(graph)
print('Got travel times')
graph = ox.utils_graph.get_largest_component(graph, strongly=True)
print('Got largest connected component')
#ox.save_graphml(graph, r'C:\Users\willd\Documents\Georgia Tech\CSE6424\Project\new_york_metro_area_cleaned.graphml')
#graph = ox.load_graphml(r'C:\Users\willd\Documents\Georgia Tech\CSE6424\Project\new_york_metro_area_cleaned.graphml')

In [None]:
# Find all food stores
food_tags = {'shop': 'supermarket', 'amenity': 'marketplace'}
food_places = ox.geometries_from_place(place, food_tags)

In [None]:
# Replace Polygons with a single point
food_places.loc[food_places['geometry'].type == 'Polygon', 'geometry'] = food_places.loc[food_places['geometry'].type == 'Polygon', 'geometry'].representative_point()

In [None]:
# Find all major green places or recreational areas
physical_tags = {'leisure': ['park', 'recreation_ground', 'playground', 'fitness_station', 'sports_centre', 'nature_reserve', 'pitch']}
physical_places = ox.geometries_from_place(place, physical_tags)
physical_places.loc[physical_places['geometry'].type == 'Polygon', 'geometry'] = physical_places.loc[physical_places['geometry'].type == 'Polygon', 'geometry'].representative_point()

In [None]:
# Find public transit
transport_tags = {'public_transport': ['platform', 'stop_position'], 'highway': ['bus_stop', 'platform'],
                 'railway': ['subway_entrance', 'station', 'tram', 'tram_stop'], 'station': 'subway'}
transport_places = ox.geometries_from_place(place, transport_tags)
transport_places.loc[transport_places['geometry'].type == 'Polygon', 'geometry'] = transport_places.loc[transport_places['geometry'].type == 'Polygon', 'geometry'].representative_point()

In [None]:
# Find libraries and schools
education_tags = {'amenity': ['library', 'school', 'kindergarten']}
education_places = ox.geometries_from_place(place, education_tags)
education_places.loc[education_places['geometry'].type == 'Polygon', 'geometry'] = education_places.loc[education_places['geometry'].type == 'Polygon', 'geometry'].representative_point()

In [None]:
# Find places of worship
worship_tags = {'amenity': 'place_of_worship'}
worship_places = ox.geometries_from_place(place, worship_tags)
worship_places.loc[worship_places['geometry'].type == 'Polygon', 'geometry'] = worship_places.loc[worship_places['geometry'].type == 'Polygon', 'geometry'].representative_point()

In [None]:
# Simplify everything
food_places = food_places['geometry'].droplevel(0)
physical_places = physical_places['geometry'].droplevel(0)
transport_places = transport_places['geometry'].droplevel(0)
education_places = education_places['geometry'].droplevel(0)
worship_places = worship_places['geometry'].droplevel(0)
food_places = food_places[food_places.type == 'Point']
physical_places = physical_places[physical_places.type == 'Point']
transport_places = transport_places[transport_places.type == 'Point']
education_places = education_places[education_places.type == 'Point']
worship_places = worship_places[worship_places.type == 'Point']

In [None]:
speed_raw = pd.read_csv(speed_path)

In [None]:
# Remove unnecessary columns
speed_raw.drop(columns=['quarter', 'year', 'segment_id', 'start_junction_id', 'end_junction_id'], inplace=True)
# Noon seems like a fair time
speed_raw = speed_raw[speed_raw['hour_of_day'] == 12]
speed_raw.set_index('osm_way_id', drop=True, inplace=True)

In [None]:
real_calculation = 0
for edge in tqdm(graph.edges):
   # length is meters, speed_kph is kph (duh), maxspeed has units in string, and travel_time is seconds
    e = graph[edge[0]][edge[1]][edge[2]]
    if isinstance(e['osmid'], list):
       # Some graph edges are made up of multiple OSM ways apparently
       for osmid in e['osmid']:
            try:
                meters_per_second = speed_raw.at[osmid, 'speed_mph_mean']*0.44704    # Convert to meters/sec
                time = e['length']/meters_per_second
                real_calculation += 1
                break
            except (KeyError, ZeroDivisionError):
                time = e['travel_time']     # Backup (i.e. length/speed limit) if Uber data isn't available
    else:
        try:
            meters_per_second = speed_raw.at[e['osmid'], 'speed_mph_mean']*0.44704
            time = e['length']/meters_per_second
            real_calculation += 1
        except (KeyError, ZeroDivisionError):
            time = e['travel_time']
    if isinstance(time, pd.Series):
       # TODO Bug check why this is happening, but not late at night
        time = time.mean()
    graph[edge[0]][edge[1]][edge[2]]['actual_travel_time'] = time

In [None]:
all_nearest_nodes, dists = ox.distance.nearest_nodes(graph, lons, lats, return_dist=True)
all_nearest_nodes = np.asarray(all_nearest_nodes)
print('Number:', all_nearest_nodes.shape[0])
food_closest_travel_times = np.full(all_nearest_nodes.shape[0], np.nan, dtype=np.float32)
physical_closest_dist = food_closest_travel_times.copy()
transport_closest_dist = food_closest_travel_times.copy()
education_closest_travel_times = food_closest_travel_times.copy()
worship_closest_travel_times = food_closest_travel_times.copy()

In [None]:
food_nodes, dists = ox.distance.nearest_nodes(graph, [x.x for x in food_places], [x.y for x in food_places], return_dist=True)
food_nodes = np.asarray(food_nodes)
physical_nodes, dists = ox.distance.nearest_nodes(graph, [x.x for x in physical_places], [x.y for x in physical_places], return_dist=True)
physical_nodes = np.asarray(physical_nodes)
transport_nodes, dists = ox.distance.nearest_nodes(graph, [x.x for x in transport_places], [x.y for x in transport_places], return_dist=True)
transport_nodes = np.asarray(transport_nodes)
education_nodes, dists = ox.distance.nearest_nodes(graph, [x.x for x in education_places], [x.y for x in education_places], return_dist=True)
education_nodes = np.asarray(education_nodes)
worship_nodes, dists = ox.distance.nearest_nodes(graph, [x.x for x in worship_places], [x.y for x in worship_places], return_dist=True)
worship_nodes = np.asarray(worship_nodes)

In [None]:
food_lat_lons = np.array([[graph.nodes[x]['x'] for x in food_nodes], [graph.nodes[x]['y'] for x in food_nodes]], dtype=np.float32).T
physical_lat_lons = np.array([[graph.nodes[x]['x'] for x in physical_nodes], [graph.nodes[x]['y'] for x in physical_nodes]], dtype=np.float32).T
transport_lat_lons = np.array([[graph.nodes[x]['x'] for x in transport_nodes], [graph.nodes[x]['y'] for x in transport_nodes]], dtype=np.float32).T
education_lat_lons = np.array([[graph.nodes[x]['x'] for x in education_nodes], [graph.nodes[x]['y'] for x in education_nodes]], dtype=np.float32).T
worship_lat_lons = np.array([[graph.nodes[x]['x'] for x in worship_nodes], [graph.nodes[x]['y'] for x in worship_nodes]], dtype=np.float32).T

In [None]:
food_tree = KDTree(food_lat_lons)
physical_tree = KDTree(physical_lat_lons)
transport_tree = KDTree(transport_lat_lons)
education_tree = KDTree(education_lat_lons)
worship_tree = KDTree(worship_lat_lons)

In [None]:
def haversine_wrapper(lat1, lon1, lat2, lon2):
    return haversine((lat1, lon1), (lat2, lon2))

In [None]:
closest_food_nodes = []
closest_education_nodes = []
closest_worship_nodes = []
for i, sample_node in tqdm(enumerate(all_nearest_nodes), total=len(all_nearest_nodes)):
    idxes = food_tree.query((graph.nodes[sample_node]['x'], graph.nodes[sample_node]['y']), k=5)[1]
    closest_food_nodes.append(food_nodes[idxes])
    idx = physical_tree.query((graph.nodes[sample_node]['x'], graph.nodes[sample_node]['y']), k=1)[1]
    dist = haversine_wrapper(graph.nodes[sample_node]['y'], graph.nodes[sample_node]['x'], physical_lat_lons[idx, 1], physical_lat_lons[idx, 0])
    physical_closest_dist[i] = dist
    idx = transport_tree.query((graph.nodes[sample_node]['x'], graph.nodes[sample_node]['y']), k=1)[1]
    dist = haversine_wrapper(graph.nodes[sample_node]['y'], graph.nodes[sample_node]['x'], transport_lat_lons[idx, 1], transport_lat_lons[idx, 0])
    transport_closest_dist[i] = dist
    idxes = education_tree.query((graph.nodes[sample_node]['x'], graph.nodes[sample_node]['y']), k=5)[1]
    closest_education_nodes.append(education_nodes[idxes])
    idxes = worship_tree.query((graph.nodes[sample_node]['x'], graph.nodes[sample_node]['y']), k=5)[1]
    closest_worship_nodes.append(worship_nodes[idxes])

In [None]:
@cache    # Trying to speed things up a little
def shortest_path(source, target):
    return nx.shortest_path_length(graph, source=source, target=target, weight='actual_travel_time')

In [None]:
for i, sample_node in enumerate(tqdm(all_nearest_nodes)):
    current_shortest = np.inf
    shortest_node = 0
    for food_node in closest_food_nodes[i]:
        shortest = shortest_path(sample_node, food_node)
        if shortest < current_shortest:
            current_shortest = shortest
            shortest_node = food_node
    food_closest_travel_times[i] = current_shortest
    
    current_shortest = np.inf
    shortest_node = 0
    for education_node in closest_education_nodes[i]:
        shortest = shortest_path(sample_node, education_node)
        if shortest < current_shortest:
            current_shortest = shortest
            shortest_node = education_node
    education_closest_travel_times[i] = current_shortest
    
    current_shortest = np.inf
    shortest_node = 0
    for worship_node in closest_worship_nodes[i]:
        shortest = shortest_path(sample_node, worship_node)
        if shortest < current_shortest:
            current_shortest = shortest
            shortest_node = worship_node
    worship_closest_travel_times[i] = current_shortest

In [None]:
deserts_wtracts = np.vstack((food_closest_travel_times, physical_closest_dist, transport_closest_dist, education_closest_travel_times,
                worship_closest_travel_times)).T
columns = ['food_closest_travel_times', 'physical_closest_dist','transport_closest_dist', 
           'education_closest_travel_times', 'worship_closest_travel_times']
desert_measures = pd.DataFrame(deserts_wtracts, columns=columns)
if place == 'San Francisco, California':
    tracts = tracts.tolist()
    tracts = ['0'+str(x) for x in tracts]
    desert_measures['GEOID'] = tracts
else:
    tracts = tracts.tolist()
    tracts = [str(x) for x in tracts]
    desert_measures['GEOID'] = tracts

In [None]:
desert_measures.to_csv(savename, index=False)