# Testing extracting OSM data using Osmium

In [1]:
import os, sys, time, importlib
import osmnx

import geopandas as gpd
import pandas as pd
import networkx as nx
import numpy as np
sys.path.append("../../../GOSTNets")
import GOSTnets as gn

# pip install osmium
import osmium, logging
import shapely.wkb as wkblib

from shapely.geometry import LineString, Point

import time

vavuniya is a city in northern Sri Lanka, and this will be a small area for testing

In [2]:
# set file
some_file = './vavuniya.osm.pbf'

## Extracting highways and nodes using Osmium

In [3]:
start_time = time.time()

wkbfab = osmium.geom.WKBFactory()

# extract highways
class HighwayExtractor(osmium.SimpleHandler):
    def __init__(self):
        osmium.SimpleHandler.__init__(self) 
        self.nodes = []
        #self.raw_h = []
        self.highways = []
        self.broken_highways = []
        self.total = 0
        self.num_nodes = 0
    
    def way(self, w):
        #self.raw_h.append(w)
        try:
            nodes = [x.ref for x in w.nodes]
            wkb = wkbfab.create_linestring(w)
            shp = wkblib.loads(wkb, hex=True)
            if 'highway' in w.tags:
                info = [w.id, nodes, shp, w.tags['highway']]
                self.highways.append(info)
        except:
            print('hit exception')
            nodes = [x for x in w.nodes if x.location.valid()]
            if len(nodes) > 1:
                shp = LineString([Point(x.location.x, x.location.y) for x in nodes])
                info = [w.id, nodes, shp, w.tags['highway']]
                self.highways.append(info)
            else:
                self.broken_highways.append(w)
            logging.warning("Error Processing OSM Way %s" % w.id)
            
        
h = HighwayExtractor()
h.apply_file(some_file, locations=True)
print(len(h.highways))
print(len(h.broken_highways))

end_time = time.time()
print(end_time - start_time)

2501
0
2.680011749267578


In [4]:
#h.nodes

In [5]:
#h.highways

In [6]:
h.highways[1]

[160900791,
 [1728748185,
  5968497940,
  5698422172,
  5968202466,
  5698422568,
  5968203109,
  1728748181,
  5968203110,
  5968497941,
  5968497942,
  1728748173,
  5968065525,
  1728748201,
  1728748228,
  1728748157,
  1728748122,
  1728748133,
  5971148451,
  3765441939,
  1728748180,
  1728748204,
  3221485407,
  1728748189,
  5107013353,
  3819809979,
  5971148452,
  1728748214,
  5971148453,
  1728748138,
  5971148454,
  2408604811,
  1728748211,
  5971148455,
  1728748135,
  5971148456,
  1728748197,
  1728748192,
  3819814360,
  2407759972,
  2407759961,
  5971148457,
  5971148458,
  2407759962,
  2407759901,
  2407760025,
  2407760027,
  5107012887,
  2407760055,
  2407759940,
  5968252774,
  5968252775,
  2407760153,
  5107013328,
  5107013327,
  2407760020,
  5104901156,
  2407759985,
  2407760048,
  2407760026,
  2407760033,
  2407759913,
  2407760073,
  2407760070],
 <shapely.geometry.linestring.LineString at 0x120b8da50>,
 'tertiary']

In [7]:
#h.highways[2][2].length

In [8]:
#list(h.highways[0][2].coords)

In [9]:
#list(h.highways[2][2].coords)[2]

In [10]:
start_time = time.time()

all_nodes = []
all_edges = []

for x in h.highways:
    for n_idx in range(0, (len(x[1]) - 1)):
        try:
            osm_id_from = x[1][n_idx].ref
        except:
            osm_id_from = x[1][n_idx]
        try:
            osm_id_to   = x[1][n_idx+1].ref
        except:
            osm_id_to   = x[1][n_idx+1]
        try:
            osm_coords_from = list(x[2].coords)[n_idx]
            #print(osm_coords_from[0])
            #create a node
            all_nodes.append([osm_id_from, { 'x' : osm_coords_from[0], 'y' : osm_coords_from[1] }])
            osm_coords_to = list(x[2].coords)[n_idx+1]
            #print(n_idx)
            #print(len(x[1]) - 1)
            if n_idx == (len(x[1]) - 2):
                #print('last element')
                #print(osm_coords_to)
                #create a node
                all_nodes.append([osm_id_to, { 'x' : osm_coords_to[0], 'y' : osm_coords_to[1]} ])
            edge = LineString([osm_coords_from, osm_coords_to])
            attr = {'osm_id':x[0], 'Wkt':edge, 'length':edge.length, 'infra_type':x[3]}
            #Create an edge from the list of nodes in both directions
            all_edges.append([osm_id_from, osm_id_to, attr])
            all_edges.append([osm_id_to, osm_id_from, attr])
        except:
            logging.warning(f"Error adding edge between nodes {osm_id_from} and {osm_id_to}")

end_time = time.time()
print(end_time - start_time)

1.2098300457000732


In [11]:
#all_nodes[:10]

In [12]:
#all_edges[:10]

In [13]:
G = nx.MultiDiGraph()
G.add_nodes_from(all_nodes)
G.add_edges_from(all_edges)

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,


In [14]:
gn.example_edge(G)

(3753591216, 3237363040, {'osm_id': 160900790, 'Wkt': <shapely.geometry.linestring.LineString object at 0x120e1f690>, 'length': 0.0002047956298348534, 'infra_type': 'trunk'})


In [15]:
gn.example_node(G)

(3753591216, {'x': 80.5002662, 'y': 8.7589551})


In [16]:
edges_gdf = gn.edge_gdf_from_graph(G)

In [17]:
len(edges_gdf)

55662

In [18]:
nodes_gdf = gn.node_gdf_from_graph(G)

## Mapbox Traffic Data

In [19]:
in_folder = "./osm"
traffic_csv = os.path.join(in_folder, "1233300-Asia-Colombo.csv")

In [20]:
# read in the traffic_csv
traffic = pd.read_csv(traffic_csv, header = None)
traffic.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017
0,1148494884,4177608798,37,37,37,37,37,37,37,37,...,37,37,37,37,37,37,37,37,37,37
1,1148495298,4137314867,57,57,57,57,57,57,57,57,...,57,57,57,57,57,57,57,57,57,57
2,1242700523,6537570627,60,60,60,60,60,60,60,60,...,60,60,60,60,60,60,60,60,60,60
3,1242730766,3377418986,46,46,46,46,46,46,46,46,...,46,46,46,46,46,46,46,46,46,46
4,1243299175,3805435746,40,40,40,40,40,40,40,40,...,40,40,40,40,40,40,40,40,40,40


In [21]:
traffic

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017
0,1148494884,4177608798,37,37,37,37,37,37,37,37,...,37,37,37,37,37,37,37,37,37,37
1,1148495298,4137314867,57,57,57,57,57,57,57,57,...,57,57,57,57,57,57,57,57,57,57
2,1242700523,6537570627,60,60,60,60,60,60,60,60,...,60,60,60,60,60,60,60,60,60,60
3,1242730766,3377418986,46,46,46,46,46,46,46,46,...,46,46,46,46,46,46,46,46,46,46
4,1243299175,3805435746,40,40,40,40,40,40,40,40,...,40,40,40,40,40,40,40,40,40,40
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24246,849231590,2920467254,17,17,17,17,17,17,17,17,...,17,17,17,17,17,17,17,17,17,17
24247,901010011,2058634444,47,47,47,47,47,47,47,47,...,47,47,47,47,47,47,47,47,47,47
24248,901010134,3377392482,61,61,61,61,61,61,61,61,...,61,61,61,61,61,61,61,61,61,61
24249,904233206,1242795139,24,24,24,24,24,24,24,24,...,24,24,24,24,24,24,24,24,24,24


In [22]:
def get_speeds(x):
    ''' Return Min, Max, and Mean speed '''
    x_vals = x[2:]
    return([min(x_vals), max(x_vals), np.mean(x_vals)]) #, np.argmax(x_vals)
    
traffic_vals = traffic.apply(lambda x: get_speeds(x), axis = 1, result_type = "expand")
traffic_vals.columns = ['min_speed','max_speed','mean_speed']

In [23]:
traffic_simplified = traffic.loc[:,[0,1]]
traffic_simplified.columns = ['FROM_NODE', "TO_NODE"]
traffic_simplified = traffic_simplified.join(traffic_vals)

In [24]:
traffic_simplified.head()

Unnamed: 0,FROM_NODE,TO_NODE,min_speed,max_speed,mean_speed
0,1148494884,4177608798,31.0,38.0,36.866071
1,1148495298,4137314867,57.0,57.0,57.0
2,1242700523,6537570627,60.0,60.0,60.0
3,1242730766,3377418986,46.0,46.0,46.0
4,1243299175,3805435746,40.0,40.0,40.0


# Add the traffic speeds to the edges

In [25]:
edges_gdf.loc[edges_gdf['stnode'] == 4965331097]

Unnamed: 0,stnode,endnode,Wkt,infra_type,osm_id,length,geometry


In [26]:
traffic_simplified.loc[traffic_simplified['FROM_NODE'] == 4965331097]

Unnamed: 0,FROM_NODE,TO_NODE,min_speed,max_speed,mean_speed


In [27]:
attributed_edges = edges_gdf.merge(traffic_simplified, left_on=['stnode','endnode'], right_on=['FROM_NODE','TO_NODE'])

In [28]:
attributed_edges

Unnamed: 0,stnode,endnode,Wkt,infra_type,osm_id,length,geometry,FROM_NODE,TO_NODE,min_speed,max_speed,mean_speed
0,3753591216,3237363040,"LINESTRING (80.5002662 8.7589551, 80.5003347 8...",trunk,160900790,0.000205,"LINESTRING (80.50027 8.75896, 80.50033 8.75876)",3753591216,3237363040,26.0,26.0,26.000000
1,3237363040,3753591216,"LINESTRING (80.5002662 8.7589551, 80.5003347 8...",trunk,160900790,0.000205,"LINESTRING (80.50033 8.75876, 80.50027 8.75896)",3237363040,3753591216,24.0,24.0,24.000000
2,3237363040,1728748209,"LINESTRING (80.5003347 8.7587621, 80.5008218 8...",trunk,160900790,0.001516,"LINESTRING (80.50033 8.75876, 80.50082 8.75733)",3237363040,1728748209,26.0,26.0,26.000000
3,1728748209,3237363040,"LINESTRING (80.5003347 8.7587621, 80.5008218 8...",trunk,160900790,0.001516,"LINESTRING (80.50082 8.75733, 80.50033 8.75876)",1728748209,3237363040,24.0,24.0,24.000000
4,1728748209,1728780068,"LINESTRING (80.5008218 8.757326000000001, 80.5...",trunk,160900790,0.000199,"LINESTRING (80.50082 8.75733, 80.50088 8.75713)",1728748209,1728780068,26.0,26.0,26.000000
...,...,...,...,...,...,...,...,...,...,...,...,...
1155,7085697361,7085697360,"LINESTRING (80.4981901 8.7538576, 80.498038500...",trunk,758655926,0.000406,"LINESTRING (80.49819 8.75386, 80.49804 8.75348)",7085697361,7085697360,16.0,27.0,19.472222
1156,7085697354,7085697355,"LINESTRING (80.4975168 8.751997899999999, 80.4...",trunk,758655926,0.001154,"LINESTRING (80.49752 8.75200, 80.49711 8.75092)",7085697354,7085697355,35.0,43.0,38.049603
1157,3753593912,1728748120,"LINESTRING (80.49811889999999 8.7538819, 80.49...",trunk,758655927,0.000339,"LINESTRING (80.49812 8.75388, 80.49821 8.75421)",3753593912,1728748120,9.0,34.0,21.283730
1158,3753591553,1729992676,"LINESTRING (80.4990105 8.7565451, 80.4998964 8...",trunk,758655928,0.002517,"LINESTRING (80.49990 8.75890, 80.49901 8.75655)",3753591553,1729992676,27.0,48.0,36.921627


In [30]:
# For a nx.MultiGraph you need an extra index going from 0 to n where n is the number of edges between the two nodes.
#G[1723379500][1723379502][0]['mapbox_traffic_speed'] = 55

In [31]:
#G[1723379502][1723379500]

In [32]:
#G[1723379500][1723379502]

In [33]:
def add_edge_attribute(x):
    #print(x['osm_id'])
    #print(int(x['mean_speed']))
    G[x['stnode']][x['endnode']][0]['mapbox_traffic_speed'] = int(x['mean_speed'])
    G[x['endnode']][x['stnode']][0]['mapbox_traffic_speed'] = int(x['mean_speed'])
    
attributed_edges.apply(lambda x: add_edge_attribute(x), axis = 1)

0       None
1       None
2       None
3       None
4       None
        ... 
1155    None
1156    None
1157    None
1158    None
1159    None
Length: 1160, dtype: object

In [34]:
#G[1723379500][1723379502]

# Export edges as shapefile to visualize

In [35]:
edge_gdf_w_traffic = gn.edge_gdf_from_graph(G)

In [36]:
edge_gdf_w_traffic

Unnamed: 0,stnode,endnode,infra_type,length,mapbox_traffic_speed,Wkt,osm_id,geometry
0,3753591216,3237363040,trunk,0.000205,24.0,"LINESTRING (80.5002662 8.7589551, 80.5003347 8...",160900790,"LINESTRING (80.50027 8.75896, 80.50033 8.75876)"
1,3753591216,6266383668,trunk,0.000226,,"LINESTRING (80.5002662 8.7589551, 80.5001539 8...",412190916,"LINESTRING (80.50027 8.75896, 80.50015 8.75915)"
2,3753591216,3237363045,trunk,0.000519,17.0,"LINESTRING (80.5001501 8.759460499999999, 80.5...",412190918,"LINESTRING (80.50027 8.75896, 80.50015 8.75946)"
3,3237363040,3753591216,trunk,0.000205,24.0,"LINESTRING (80.5002662 8.7589551, 80.5003347 8...",160900790,"LINESTRING (80.50033 8.75876, 80.50027 8.75896)"
4,3237363040,1728748209,trunk,0.001516,24.0,"LINESTRING (80.5003347 8.7587621, 80.5008218 8...",160900790,"LINESTRING (80.50033 8.75876, 80.50082 8.75733)"
...,...,...,...,...,...,...,...,...
55657,7299237383,7299237382,residential,0.000102,,"LINESTRING (80.4404419 8.7595008, 80.4404888 8...",781796386,"LINESTRING (80.44044 8.75950, 80.44049 8.75959)"
55658,7299237382,7299237383,residential,0.000102,,"LINESTRING (80.4404419 8.7595008, 80.4404888 8...",781796386,"LINESTRING (80.44049 8.75959, 80.44044 8.75950)"
55659,7299237382,7299237381,residential,0.000122,,"LINESTRING (80.4404888 8.7595913, 80.440584599...",781796386,"LINESTRING (80.44049 8.75959, 80.44058 8.75967)"
55660,7299237381,7299237382,residential,0.000122,,"LINESTRING (80.4404888 8.7595913, 80.440584599...",781796386,"LINESTRING (80.44058 8.75967, 80.44049 8.75959)"


In [37]:
# Therefore reduce number of columns, cannot have more than one column with geometry if saving to shapefile
edge_gdf_w_traffic_export = edge_gdf_w_traffic[['stnode','endnode','length','infra_type','osm_id','mapbox_traffic_speed','geometry']]

In [38]:
edge_gdf_w_traffic_export.to_file(driver = 'ESRI Shapefile', filename = './vavuniya_hwy_w_traffic.shp')