In [None]:
!pip install osmium

In [1]:
import osmium as osm
import pandas as pd
import folium
import copy

In [2]:
from osm_handler import OSMHandler

In [47]:
# The OSMHandler is taken from osmium and is necessary because
# most osm files are very large and require special consideration
# to be read properly without filling up your RAM. The code here
# allows me to find only the information I need and save it in a form
# that I then use later. 

class OSMHandler(osm.SimpleHandler):
    def __init__(self):
        osm.SimpleHandler.__init__(self)
        self.nodes = []
        self.ways = []
        self.relations = []
        self.data = []

    def tag_inventory(self, elem, elem_type):
        for tag in elem.tags:
            self.data.append([elem_type, 
                              elem.id,
                              elem.version,
                              elem.visible,
                              pd.Timestamp(elem.timestamp),
                              elem.uid,
                              elem.user,
                              elem.changeset,
                              len(elem.tags)])

    def node(self, n):
        self.tag_inventory(n, "node")
        self.nodes.append(["node", n.id, n.location])

    def way(self, w):
        self.tag_inventory(w, "way")
        info = {}
        waypoints = []
        for tag in w.tags:
            info.update({(tag.k,tag.v)})
        for node in w.nodes:
            waypoints.append(node.ref)
        self.ways.append([w.id, waypoints,  info])
        #print(w)

    def relation(self, r):
        self.tag_inventory(r, "relation")
        if 'name' in r.tags:
            if 'NFTA' in r.tags['name']:
                info = {}
                waypoints = []
                for tag in r.tags:
                    info.update({(tag.k,tag.v)})
                for node in r.members:
                    waypoints.append(node.ref)
                    #waypoints.append(node)
                    #print('{},{},{}'.format(node.ref, node.role, node.type))
                self.relations.append([r.id, waypoints, info])
                print(r.tags['name'])
osmhandler = OSMHandler()
osmhandler.apply_file("largest_map.osm") #Make sure to get an osm that includes all parts of NFTA or problems will arise later

NFTA 40A Buffalo-Niagara Falls via Grand Island
NFTA 23A Fillmore-Hertel (to Black Rock Riverside Transit Hub)
NFTA 15A Seneca
NFTA 23A Fillmore-Hertel (to Bailey & Abbott)
NFTA Metro Rail: Erie Canal Harbor → University
NFTA 2B Clinton (inbound)
NFTA 2B Clinton (outbound)
NFTA 2A Clinton (outbound)
NFTA 2A Clinton (inbound)
NFTA 1B William (outbound)
NFTA 1B William (inbound)
NFTA 11A Colvin (outbound)
NFTA 11A Colvin (inbound)
NFTA 4B Broadway (outbound)
NFTA 4B Broadway (inbound)
NFTA 4A Broadway (outbound)
NFTA 4A Broadway (inbound)
NFTA 20A Elmwood
NFTA 11D Colvin (outbound)
NFTA 11D Colvin (inbound)
NFTA 1A William (outbound)
NFTA 1A William (inbound)
NFTA 14A Abbott
NFTA 14B Abbott
NFTA 14C Abbott
NFTA 13A Kensington
NFTA 12A Utica
NFTA 8A Main
NFTA 7A Baynes-Richmond
NFTA 6A Sycamore
NFTA 5A Niagara-Kenmore
NFTA 3A Grant
NFTA 22A Porter-Best
NFTA 19A Bailey
NFTA 18A Jefferson
NFTA 16H South Park
NFTA 16A South Park
NFTA 16B South Park
NFTA 32A Amherst
NFTA 29A Wohlers
NFTA 26A 

The following cells are required because osm handler is a bit weird about how you can use the information due to memory management issues. This allows me to copy the information to normal data structures that I can then use however I want.

In [48]:
latitudes = []
longitudes = []
node_table = {}
for node in osmhandler.nodes:
    latitudes.append(node[2].lat)
    longitudes.append(node[2].lon)
    node_table.update({node[1]:[node[2].lat,node[2].lon]})
print(len(latitudes))

3538763


In [49]:
ways = []
for way in osmhandler.ways:
    ways.append(way)
#print(len(ways))

In [50]:
relations = []
for relation in osmhandler.relations:
    relations.append(relation)
#print(len(relations))

In [51]:
way_table = {}
for way in ways:
    way_table.update({way[0]:way[1]})
#print(way_table)

In [52]:
used_nodes = set()
used_ways = set()

# In the OSMHandler, I only saved relations that had NFTA in the name,
# but I still had to save all of the nodes and ways because they don't
# have additional information to tell me which ones I will use.
# Here, I only save the nodes and ways that are found in the relations
# that I'm actually going to use.

for relation in relations:
    for index in relation[1]:
        if index in way_table:
            used_ways.add(index)
            #print(index)
            for waypoint in way_table[index]:
                used_nodes.add(waypoint)
                #print('({},{})'.format(node_table[waypoint][0],node_table[waypoint][1]))
        elif index in node_table:
            used_nodes.add(index)
            #print('({},{})'.format(node_table[index][0], node_table[index][1]))

print(len(used_nodes))
print(len(used_ways))

20124
2796


In [None]:
m = folium.Map(location=[42.89, -78.74], tiles="OpenStreetMap", zoom_start=10)

for relation in relations[0:5]:
    print(relation[2]['name'])
    for index in relation[1]:
        points = []
        if index in way_table:
            for waypoint in way_table[index]:
                folium.Marker([node_table[waypoint][0], node_table[waypoint][1]]).add_to(m)
                points.append((node_table[waypoint][0], node_table[waypoint][1]))
        elif index in node_table:
            folium.Marker([node_table[index][0], node_table[index][1]]).add_to(m)
            points.append((node_table[index][0], node_table[index][1]))
        if(len(points) != 0):
            #print('points: {}'.format(points))
            folium.PolyLine(points).add_to(m)
            
# Debugging code used to plot the relations saved and show they are working properly.
# To reenable output, just uncomment m in the next cell.

In [54]:
#m

In [55]:
# Saves the final important nodes, ways, and relations to a format that is more practical for my purposes: the dictionary.

final_node_table = {}
final_way_table = {}
final_relation_table = {}

for key,value in node_table.items():
    if key in used_nodes:
        final_node_table.update({key:value})
        
for key,value in way_table.items():
    if key in used_ways:
        final_way_table.update({key:value})
        
for relation in relations:
    final_relation_table.update({relation[0]: [relation[1], relation[2]]})
        
print(len(final_node_table))
print(len(final_way_table))
print(len(final_relation_table))

20124
2796
90


In [56]:
#For debugging purposes, I plot all of the routes I saved, showing that I have all of the NFTA bus routes.

import random

m = folium.Map(location=[42.89, -78.74], tiles="OpenStreetMap", zoom_start=10)

for relation in relations:
    line_color = '#{}'.format(hex(random.randint(0,16777215))[2:])
    print(relation[2]['name'])
    for index in relation[1]:
        points = []
        if index in final_way_table:
            for waypoint in final_way_table[index]:
                #folium.Marker([final_node_table[waypoint][0], final_node_table[waypoint][1]]).add_to(m)
                points.append((final_node_table[waypoint][0], final_node_table[waypoint][1]))
        elif index in final_node_table:
            #folium.Marker([final_node_table[index][0], final_node_table[index][1]]).add_to(m)
            points.append((final_node_table[index][0], final_node_table[index][1]))
        if(len(points) != 0):
            folium.PolyLine(points, color=line_color).add_to(m)
m

NFTA 40A Buffalo-Niagara Falls via Grand Island
NFTA 23A Fillmore-Hertel (to Black Rock Riverside Transit Hub)
NFTA 15A Seneca
NFTA 23A Fillmore-Hertel (to Bailey & Abbott)
NFTA Metro Rail: Erie Canal Harbor → University
NFTA 2B Clinton (inbound)
NFTA 2B Clinton (outbound)
NFTA 2A Clinton (outbound)
NFTA 2A Clinton (inbound)
NFTA 1B William (outbound)
NFTA 1B William (inbound)
NFTA 11A Colvin (outbound)
NFTA 11A Colvin (inbound)
NFTA 4B Broadway (outbound)
NFTA 4B Broadway (inbound)
NFTA 4A Broadway (outbound)
NFTA 4A Broadway (inbound)
NFTA 20A Elmwood
NFTA 11D Colvin (outbound)
NFTA 11D Colvin (inbound)
NFTA 1A William (outbound)
NFTA 1A William (inbound)
NFTA 14A Abbott
NFTA 14B Abbott
NFTA 14C Abbott
NFTA 13A Kensington
NFTA 12A Utica
NFTA 8A Main
NFTA 7A Baynes-Richmond
NFTA 6A Sycamore
NFTA 5A Niagara-Kenmore
NFTA 3A Grant
NFTA 22A Porter-Best
NFTA 19A Bailey
NFTA 18A Jefferson
NFTA 16H South Park
NFTA 16A South Park
NFTA 16B South Park
NFTA 32A Amherst
NFTA 29A Wohlers
NFTA 26A 

In [57]:
# Using pickle, this code saves the data structures I've created.
import pickle 
with open('graph/final_node_table', 'wb') as f: 
    pickle.dump(final_node_table, f) 
with open('graph/final_way_table', 'wb') as f:
    pickle.dump(final_way_table, f)
with open('graph/relations', 'wb') as f:
    pickle.dump(relations, f)
with open('graph/final_relation_table', 'wb') as f:
    pickle.dump(final_relation_table, f)

In [46]:
# This code is how pickle loads the data structures that have been saved.
# I've added them here for convenience in copy-pasting in other cells.
with open('graph/final_node_table', 'rb') as f: 
    final_node_table = pickle.load(f)
with open('graph/final_way_table', 'rb') as f:
    final_way_table = pickle.load(f)
with open('graph/relations', 'rb') as f:
    relations = pickle.load(f)
with open('graph/final_relation_table', 'rb') as f:
    final_relation_table = pickle.load(f)