# Step 2: Assigning Speed to Traffic Data

In [33]:
import os, sys, time, importlib
import osmnx

import geopandas as gpd
import pandas as pd
import networkx as nx
import numpy as np
sys.path.append("../../../GOSTnets")
import GOSTnets as gn

# pip install osmium
# import osmium, logging
# import shapely.wkb as wkblib

from shapely.geometry import LineString, Point

In [34]:
# This is a Jupyter Notebook extension which reloads all of the modules whenever you run the code
# This is optional but good if you are modifying and testing source code
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [35]:
from GOSTnets.load_traffic2 import *

In [36]:
#read_nodes = pd.read_csv('./vavuniya_unclean_nodes.csv')

In [37]:
#read_nodes[:5]

In [38]:
# read graph
G = nx.read_gpickle('./sri_lanka_unclean2.pickle')

In [39]:
len(G.edges)

1930664

In [40]:
G[1650104033][6236632580]

AtlasView({0: {'osm_id': 4860427, 'geometry': <shapely.geometry.linestring.LineString object at 0x7fcd85de0d90>, 'infra_type': 'trunk', 'min_speed': 23.0, 'max_speed': 37.0, 'mean_speed': 26.793650793650794, 'length': 0.009757789924575978}})

In [41]:
gn.example_edge(G, 5)

(1650104033, 6236632580, {'osm_id': 4860427, 'geometry': <shapely.geometry.linestring.LineString object at 0x7fcd85de0d90>, 'infra_type': 'trunk', 'min_speed': 23.0, 'max_speed': 37.0, 'mean_speed': 26.793650793650794, 'length': 0.009757789924575978})
(1650104033, 970058024, {'osm_id': 152177791, 'geometry': <shapely.geometry.linestring.LineString object at 0x7fcd85de0f50>, 'infra_type': 'trunk', 'min_speed': 23.0, 'max_speed': 37.0, 'mean_speed': 26.793650793650794, 'length': 0.0160020254611234})
(6236632580, 6236632579, {'osm_id': 4860427, 'geometry': <shapely.geometry.linestring.LineString object at 0x7fcd85de0fd0>, 'infra_type': 'trunk', 'min_speed': 23.0, 'max_speed': 37.0, 'mean_speed': 26.793650793650794, 'length': 0.005235256075996137})
(6236632580, 1650104033, {'osm_id': 4860427, 'geometry': <shapely.geometry.linestring.LineString object at 0x7fcd85de0d90>, 'infra_type': 'trunk', 'min_speed': 23.0, 'max_speed': 37.0, 'mean_speed': 26.793650793650794, 'length': 0.00975778992457

## Find out average speeds for highway classes with Mapbox data

In [42]:
def find_traffic_hwy_avg_speeds_by_class(G, speed_tag='mean_speed'):
    """
    Function for finding out the different highway classes in the graph and their respective lengths

    :param G: a graph object
    :param speed_tag: specifies which edge attribute represents traffic speed
    :returns: a dictionary that has each class and the total distance per class
    """

    if type(G) == nx.classes.multidigraph.MultiDiGraph or type(G) == nx.classes.digraph.DiGraph:
        pass
    else:
        raise ValueError('Expecting a graph or geodataframe for G!')

    G_adj = G.copy()

    traffic_class_list = []

    for u, v, data in G_adj.edges(data=True):
        #print(data['infra_type'])
        if 'mean_speed' in data:
            if type(data['infra_type']) == list:
                    if data['infra_type'][0] not in traffic_class_list:
                        traffic_class_list.append(data['infra_type'][0])
            else:
                if data['infra_type'] not in traffic_class_list:
                    traffic_class_list.append(data['infra_type'])
    
    class_dict = { i : [] for i in traffic_class_list }
    
    print(f'print traffic_class_list: {traffic_class_list}')

    for i in traffic_class_list:
        for u, v, data in G_adj.edges(data=True):
            if 'mean_speed' in data:
                if data['mean_speed'] > 0:
                    if type(data['infra_type']) == list:
                        if data['infra_type'][0] == i:
                            class_dict[i].append(data[speed_tag])
                    else:
                        if data['infra_type'] == i:
                            class_dict[i].append(data[speed_tag])
                        
    print(f'print class_dict: {class_dict}')
    
    def Average(lst): 
        if len(lst) > 0:
            return sum(lst) / len(lst)
        else:
            return 0
    
    class_dict_copy = class_dict.copy()
    
    for key in class_dict_copy:
        class_dict_copy[key] = Average(class_dict_copy[key])

    return class_dict_copy

In [43]:
#average_speeds_per_class_dict = find_traffic_hwy_avg_speeds_by_class(G, speed_tag='mean_speed')

In [44]:
#average_speeds_per_class_dict

In [45]:
# speed dict based on existing speed limit tags
speed_dict_sri_lanka_max_speeds = {
'motorway':100 ,
'motorway_link':35 ,
'trunk': 60,
'trunk_link': 50,
'primary': 60,
'primary_link': 50,
'secondary': 50,
'secondary_link': 45,
'tertiary':40,
'tertiary_link': 40,
'residential': 30,
'unclassified': 25,
'track': 25,
'service': 20
}

In [46]:
# speed dict based on mean of mapbox real-world speeds per class
speed_dict_sri_lanka_mapbox_mean_speeds = {
'motorway':85 ,
'motorway_link':30 ,
'trunk': 40,
'trunk_link': 23,
'primary': 35,
'primary_link': 14,
'secondary': 34,
'secondary_link': 9,
'tertiary':25,
'tertiary_link': 13,
'residential': 20,
'unclassified': 20,
'track': 20,
'service': 10
}

In [47]:
# modified the function below to calculate the correct time based on whether it has a Mapbox speed or not
# also it creates a new 'speed' attribute that is either based on the mapbox traffic speed, or the speed provided
# by the input dictionary if the mapbox traffic speed does not exist

In [48]:
def convert_network_to_time_w_traffic(G, distance_tag, graph_type = 'drive', road_col = 'highway', traffic_col = 'mean_speed', speed_dict = speed_dict_sri_lanka_max_speeds, walk_speed = 4.5, factor = 1, default = None, improvement_cost_per_km = None):
    """
    Function for adding a time value to edge dictionaries. Ensure any GeoDataFrames / graphs are in the same projection before using function, or pass a crs.

    DEFAULT SPEEDS:

               speed_dict = {
               'residential': 20,  # kmph
               'primary': 40, # kmph
               'primary_link':35,
               'motorway':50,
               'motorway_link': 45,
               'trunk': 40,
               'trunk_link':35,
               'secondary': 30,
               'secondary_link':25,
               'tertiary':30,
               'tertiary_link': 25,
               'unclassified':20
               }

    :param G: a graph containing one or more nodes
    :param distance_tag: the key in the dictionary for the field currently
               containing a distance in meters
    :param road_col: key for the road type in the edge data dictionary
    :param graph_type: set to either 'drive' or 'walk'. IF walk - will set time = walking time across all segment, using the supplied walk_speed. IF drive - will use a speed dictionary for each road type, or defaults as per the note below.
    :param speed_dict: speed dictionary to use. If not supplied, reverts to
               defaults
    :param walk_speed: specify a walkspeed in km/h
    :param factor: allows you to scale up / down distances if saved in a unit other than meters. Set to 1000 if length in km.
    :param default: if highway type not in the speed_dict, use this road class as an in-fill value for time.
    :returns: The original graph with a new data property for the edges called 'time'
    """

    if type(G) == nx.classes.multidigraph.MultiDiGraph or type(G) == nx.classes.digraph.DiGraph:
        pass
    else:
        raise ValueError('Expecting a graph or geodataframe for G!')

    import warnings

    try:
        # checks the first edge to see if the 'time' attribute already exists
        if list(G.edges(data = True))[0][2]['time']:
          warnings.warn('Aree you sure you want to convert length to time? This graph already has a time attribute')
    except:
        pass

    G_adj = G.copy()

    for u, v, data in G_adj.edges(data=True):

        # the default lenth in the graph should be in km
        orig_len = data[distance_tag]

        # Note that this is a MultiDiGraph so there could
        # be multiple indices here, I naively assume this is not
        # the case
        data['length'] = orig_len * factor
        

        # get appropriate speed limit
        if graph_type == 'walk':
            speed = walk_speed

        elif graph_type == 'drive':

            if speed_dict == None:
                speed_dict = {
                'residential': 20,  # kmph
                'primary': 40, # kmph
                'primary_link':35,
                'motorway':50,
                'motorway_link': 45,
                'trunk': 40,
                'trunk_link':35,
                'secondary': 30,
                'secondary_link':25,
                'tertiary':30,
                'tertiary_link': 25,
                'unclassified':20
                }

            highwayclass = data[road_col]
            
            trafficclass = data[traffic_col]

            if trafficclass > 0:
                speed = data[traffic_col]
                data['speed'] = speed
            else:
                if type(highwayclass) == list:
                    highwayclass = highwayclass[0]

                if highwayclass in speed_dict.keys():
                    speed = speed_dict[highwayclass]
                else:
                    if default == None:
                        speed = 20
                    else:
                        speed = speed_dict[default]
                data['speed'] = speed
            
            #calculate road improvement costs for secondary or lower highways
            if highwayclass in ['secondary', 'secondary_link', 'tertiary', 'tertiary_link', 'unclassified', 'residential']:
                data['imp_cost'] = orig_len * improvement_cost_per_km

        else:
            raise ValueError('Expecting either a graph_type of "walk" or "drive"!')

        # perform conversion
        hours = orig_len / speed
        in_seconds = hours * 60 * 60
        data['time'] = in_seconds

        # And state the mode, too
        data['mode'] = graph_type

    return G_adj

In [49]:
G_time = convert_network_to_time_w_traffic(G, distance_tag = 'length', road_col = 'infra_type', speed_dict = speed_dict_sri_lanka_mapbox_mean_speeds, factor = 1, improvement_cost_per_km = 186800)

In [50]:
gn.example_edge(G_time, 15)

(1650104033, 6236632580, {'osm_id': 4860427, 'geometry': <shapely.geometry.linestring.LineString object at 0x7fcd85de0d90>, 'infra_type': 'trunk', 'min_speed': 23.0, 'max_speed': 37.0, 'mean_speed': 26.793650793650794, 'length': 0.009757789924575978, 'speed': 26.793650793650794, 'time': 1.3110585040840237, 'mode': 'drive'})
(1650104033, 970058024, {'osm_id': 152177791, 'geometry': <shapely.geometry.linestring.LineString object at 0x7fcd85de0f50>, 'infra_type': 'trunk', 'min_speed': 23.0, 'max_speed': 37.0, 'mean_speed': 26.793650793650794, 'length': 0.0160020254611234, 'speed': 26.793650793650794, 'time': 2.150035174515869, 'mode': 'drive'})
(6236632580, 6236632579, {'osm_id': 4860427, 'geometry': <shapely.geometry.linestring.LineString object at 0x7fcd85de0fd0>, 'infra_type': 'trunk', 'min_speed': 23.0, 'max_speed': 37.0, 'mean_speed': 26.793650793650794, 'length': 0.005235256075996137, 'speed': 26.793650793650794, 'time': 0.7034099988364477, 'mode': 'drive'})
(6236632580, 1650104033,

In [51]:
#save graph again
#gn.save(G_time,'sri_lanka_unclean2_w_time','./', pickle = True, edges = True, nodes = True)

## Export edges as shapefile to visualize

In [52]:
edge_gdf_w_traffic = gn.edge_gdf_from_graph(G_time)

In [53]:
edge_gdf_w_traffic.sort_values(by=['speed'])

Unnamed: 0,stnode,endnode,infra_type,osm_id,max_speed,speed,mean_speed,mode,imp_cost,length,min_speed,time,geometry
1111201,6524681180,3993490674,tertiary,396537099,1.0,1.0,1.0,drive,5796.270951,0.031029,1.0,111.705436,"LINESTRING (79.85570 6.91071, 79.85598 6.91075)"
1576860,5289709063,5289709062,unclassified,547442169,1.0,1.0,1.0,drive,999.132426,0.005349,1.0,19.255229,"LINESTRING (81.01944 6.77668, 81.01947 6.77664)"
1576859,5289709063,5289709064,unclassified,547442169,1.0,1.0,1.0,drive,663.055052,0.003550,1.0,12.778363,"LINESTRING (81.01947 6.77664, 81.01949 6.77662)"
1137421,3290651566,5564028576,residential,401386826,1.0,1.0,1.0,drive,947.782809,0.005074,1.0,18.265622,"LINESTRING (80.60600 7.26626, 80.60597 7.26630)"
1576858,5289709062,5289709061,unclassified,547442169,1.0,1.0,1.0,drive,2212.282617,0.011843,1.0,42.634997,"LINESTRING (81.01938 6.77677, 81.01944 6.77668)"
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1468672,5029174238,5029174237,residential,514866679,101.0,101.0,101.0,drive,3733.043235,0.019984,101.0,0.712307,"LINESTRING (80.20443 6.14638, 80.20432 6.14652)"
1468673,5029174239,5029174240,residential,514866679,101.0,101.0,101.0,drive,6529.925700,0.034957,101.0,1.245984,"LINESTRING (80.20414 6.14679, 80.20396 6.14705)"
1468674,5029174239,5029174238,residential,514866679,101.0,101.0,101.0,drive,6717.207052,0.035959,101.0,1.281719,"LINESTRING (80.20432 6.14652, 80.20414 6.14679)"
1468661,5029174233,5029174234,residential,514866679,101.0,101.0,101.0,drive,6478.568752,0.034682,101.0,1.236185,"LINESTRING (80.20513 6.14578, 80.20488 6.14597)"


In [54]:
#edge_gdf_w_traffic.to_file(driver = 'ESRI Shapefile', filename = './sri_lanka_hwy_w_traffic_and_time2.shp')

### now take the biggest sub-graph and compare

In [55]:
# before
# let's print info on our clean version
print(nx.info(G_time))

Name: 
Type: MultiDiGraph
Number of nodes: 1052094
Number of edges: 1930664
Average in degree:   1.8351
Average out degree:   1.8351


In [56]:
# Identify only the largest graph

# compatible with NetworkX 2.4
list_of_subgraphs = list(G_time.subgraph(c).copy() for c in nx.strongly_connected_components(G_time))
max_graph = None
max_edges = 0
for i in list_of_subgraphs:
    if i.number_of_edges() > max_edges:
        max_edges = i.number_of_edges()
        max_graph = i

# set your graph equal to the largest sub-graph
G_largest = max_graph

In [57]:
# print info about the largest sub-graph
print(nx.info(G_largest))

Name: 
Type: MultiDiGraph
Number of nodes: 820346
Number of edges: 1675316
Average in degree:   2.0422
Average out degree:   2.0422


In [58]:
# re-save
#edge_gdf_w_traffic_largest = gn.edge_gdf_from_graph(G_largest)

In [59]:
#edge_gdf_w_traffic_largest.to_file(driver = 'ESRI Shapefile', filename = './sri_lanka_hwy_w_traffic_and_time2_largest.shp')

In [60]:
#save graph again
gn.save(G_largest,'sri_lanka_unclean2_w_time_largest_20200616_traffic_mean_speeds','./', pickle = True, edges = False, nodes = False)