# Testing extracting OSM data using Osmium

In [1]:
import os, sys, time, importlib
import osmnx

import geopandas as gpd
import pandas as pd
import networkx as nx
import numpy as np
sys.path.append("../../../GOSTNets")
import GOSTnets as gn

# pip install osmium
import osmium, logging
import shapely.wkb as wkblib

from shapely.geometry import LineString, Point

import time

In [2]:
# set file
some_file = './colombo.osm.pbf'

## simplest example of using Osmium

In [3]:
class HotelHandler(osmium.SimpleHandler):
    def __init__(self):
        super(HotelHandler, self).__init__()
        self.hotels = []

    def node(self, o):
        if o.tags.get('tourism') == 'hotel' and 'name' in o.tags:
            self.hotels.append(o.tags['name'])


h = HotelHandler()
h.apply_file(some_file)

print(sorted(h.hotels))

['5th Lane House', 'Adhikaram Sea View Hotel', 'Airport Reach', 'Amaliya Reach Holiday Resort', 'Amaya Plam Garden Hotel', 'Ambalama Leisure Lounge', 'Aqua Marine Holiday Homes', 'Aqua Marine Holiday Homes', 'Aqua Pearl Lake Resort', 'Araliya Blue Beach View Hotel', 'Asiri Hotel and Reception Hall', 'Aurora Holiday House', 'Avenra Garden Hotel', 'Ayurveda Spring of Life', 'Beach Bungalow', 'Beach Rest', 'Beach Villa Guest House', 'Benchmark Holidays', 'Berjaya Mount Royal Beach Hotel', 'Big John', 'Biyagama Village', 'Bliss Hotel', 'Blue Horizone', 'Blue Lotus Villa', 'Blue Moon', 'Bolgoda Lake Villa', 'Bolgoda Park Hotel', 'Bon Bon Hotel', 'Boutique Colombo', 'Brighten Rest', 'C J Villas', 'Casa Colombo Collection Colombo', 'Cashew Hotel', 'Ceylonica Beach Hotel', 'Chimes', 'Choys Waterfront Residence', 'Cinnamon Cove', 'Cinnamon Lakeside Hotel', 'Cinnamon Village Eco Tourism Hotel & Lodge', 'City Beds - The Regent', 'City Rest Fort', 'Clarion Hotel', 'Clock Inn Colombo', 'Colombo Cou

## Extracting highways with their nodes using Osmium

In [9]:
start_time = time.time()

wkbfab = osmium.geom.WKBFactory()

# extract highways
class HighwayExtractor(osmium.SimpleHandler):
    def __init__(self):
        osmium.SimpleHandler.__init__(self) 
        #self.nodes = []
        #self.raw_h = []
        self.highways = []
        self.broken_highways = []
        #self.num_nodes = 0

      #do not think that we need to extract nodes, because we can get the nodes from the ways
    # this makes it more than two times faster
#     def node(self, n):
#         wkb = wkbfab.create_point(n)
#         shp = wkblib.loads(wkb, hex = True)
#         self.nodes.append([n.id, shp, shp.x, shp.y])
        #self.num_nodes += 1
        #self.nodes.append(shp)
    
    def way(self, w):
        #self.raw_h.append(w)
        try:
            nodes = [x.ref for x in w.nodes]
            wkb = wkbfab.create_linestring(w)
            shp = wkblib.loads(wkb, hex=True)
            if 'highway' in w.tags:
                info = [w.id, nodes, shp, w.tags['highway']]
                self.highways.append(info)

        except:
            print('hit exception')
            nodes = [x for x in w.nodes if x.location.valid()]
            if len(nodes) > 1:
                shp = LineString([Point(x.location.x, x.location.y) for x in nodes])
                info = [w.id, nodes, shp, w.tags['highway']]
                self.highways.append(info)
            else:
                self.broken_highways.append(w)
            logging.warning("Error Processing OSM Way %s" % w.id)
            
        
h = HighwayExtractor()
h.apply_file(some_file, locations=True)
#print(len(h.nodes))
print(len(h.highways))
print(len(h.broken_highways))

end_time = time.time()
print(end_time - start_time)

54063
0
46.99775695800781


### results: 
Extracting just the highways with their nodes took about 44 seconds for Colombo, where as extracting the highway nodes and highways seperately took about 185 seconds.

In [10]:
h.highways

[[8111662,
  [60796641, 2419600689, 3830183348],
  <shapely.geometry.linestring.LineString at 0x120fdff10>,
  'tertiary'],
 [8111663,
  [91030247,
   3825202158,
   3825202157,
   6537163557,
   6537163559,
   6537163558,
   127658543,
   3164258586],
  <shapely.geometry.linestring.LineString at 0x120fdf4d0>,
  'trunk'],
 [8111669,
  [678343035, 621993848],
  <shapely.geometry.linestring.LineString at 0x120fdf7d0>,
  'tertiary'],
 [8111671,
  [60796691, 4324781730, 60796694],
  <shapely.geometry.linestring.LineString at 0x121004b90>,
  'tertiary'],
 [8111672,
  [60796688, 3799979973, 1447088864],
  <shapely.geometry.linestring.LineString at 0x121004e10>,
  'tertiary'],
 [8111673,
  [60796648,
   5133502747,
   4047124003,
   5133502738,
   60796650,
   60796653,
   5133502775,
   645092082,
   4047124002,
   60796657,
   4047124004,
   4047124001,
   60796673,
   4324745329,
   60796676,
   4567967244,
   60796679,
   60796682,
   644617048,
   60796685,
   60796688],
  <shapely.geomet

In [11]:
h.highways[1]

[8111663,
 [91030247,
  3825202158,
  3825202157,
  6537163557,
  6537163559,
  6537163558,
  127658543,
  3164258586],
 <shapely.geometry.linestring.LineString at 0x120fdf4d0>,
 'trunk']

In [12]:
h.highways[2][2].length

0.00046395638802746445

In [13]:
list(h.highways[0][2].coords)

[(79.8661687, 6.9095744), (79.8665945, 6.9095545), (79.866881, 6.9095501)]

In [22]:
list(h.highways[2][2].coords)[1]

(79.8610658, 6.891542)

## Split up the highways into seperate edges between each node. At the same time create the node list from the highway edges.

In [24]:
start_time = time.time()

all_nodes = []
all_edges = []

for x in h.highways:
    for n_idx in range(0, (len(x[1]) - 1)):
        try:
            osm_id_from = x[1][n_idx].ref
        except:
            osm_id_from = x[1][n_idx]
        try:
            osm_id_to   = x[1][n_idx+1].ref
        except:
            osm_id_to   = x[1][n_idx+1]
        try:
            osm_coords_from = list(x[2].coords)[n_idx]
            #print(osm_coords_from[0])
            #create a node
            all_nodes.append([osm_id_from, { 'x' : osm_coords_from[0], 'y' : osm_coords_from[1] }])
            osm_coords_to = list(x[2].coords)[n_idx+1]
            #print(n_idx)
            #print(len(x[1]) - 1)
            if n_idx == (len(x[1]) - 2):
                #print('last element')
                #print(osm_coords_to)
                #create a node
                all_nodes.append([osm_id_to, { 'x' : osm_coords_to[0], 'y' : osm_coords_to[1]} ])
            edge = LineString([osm_coords_from, osm_coords_to])
            attr = {'osm_id':x[0], 'Wkt':edge, 'length':edge.length, 'infra_type':x[3]}
            #Create an edge from the list of nodes in both directions
            all_edges.append([osm_id_from, osm_id_to, attr])
            all_edges.append([osm_id_to, osm_id_from, attr])
        except:
            logging.warning(f"Error adding edge between nodes {osm_id_from} and {osm_id_to}")

end_time = time.time()
print(end_time - start_time)

23.299654006958008


### results: 
Took about 23 seconds to run

In [25]:
all_nodes[:10]

[[60796641, {'x': 79.8661687, 'y': 6.9095744}],
 [2419600689, {'x': 79.8665945, 'y': 6.9095545}],
 [3830183348, {'x': 79.866881, 'y': 6.9095501}],
 [91030247, {'x': 79.8423145, 'y': 6.931318}],
 [3825202158, {'x': 79.8423064, 'y': 6.9312566}],
 [3825202157, {'x': 79.8423018, 'y': 6.9312118}],
 [6537163557, {'x': 79.8423041, 'y': 6.9311676}],
 [6537163559, {'x': 79.8423108, 'y': 6.9311204}],
 [6537163558, {'x': 79.8423263, 'y': 6.9310452}],
 [127658543, {'x': 79.8423472, 'y': 6.9309658}]]

In [27]:
all_edges[:5]

[[60796641,
  2419600689,
  {'osm_id': 8111662,
   'Wkt': <shapely.geometry.linestring.LineString at 0x1256f2e50>,
   'length': 0.00042626476514233825,
   'infra_type': 'tertiary'}],
 [2419600689,
  60796641,
  {'osm_id': 8111662,
   'Wkt': <shapely.geometry.linestring.LineString at 0x1256f2e50>,
   'length': 0.00042626476514233825,
   'infra_type': 'tertiary'}],
 [2419600689,
  3830183348,
  {'osm_id': 8111662,
   'Wkt': <shapely.geometry.linestring.LineString at 0x1210048d0>,
   'length': 0.00028653378509489497,
   'infra_type': 'tertiary'}],
 [3830183348,
  2419600689,
  {'osm_id': 8111662,
   'Wkt': <shapely.geometry.linestring.LineString at 0x1210048d0>,
   'length': 0.00028653378509489497,
   'infra_type': 'tertiary'}],
 [91030247,
  3825202158,
  {'osm_id': 8111663,
   'Wkt': <shapely.geometry.linestring.LineString at 0x121004810>,
   'length': 6.193197881564978e-05,
   'infra_type': 'trunk'}]]

In [28]:
start_time = time.time()

G = nx.MultiDiGraph()
G.add_nodes_from(all_nodes)
G.add_edges_from(all_edges)

end_time = time.time()
print(end_time - start_time)

8.772705078125


### results: 
Took about 2404 seconds to run

In [29]:
len(G.edges)

954374