# Step 2: Assigning Speed to Traffic Data

## This notebook takes in the processed graph that was projected in Step 1, and it will add assign speeds to edges not covered with Mapbox traffic based on an input dictionary, add time values to edges, and add a potential improvement cost to edges.

In [12]:
import os, sys, time, importlib
import osmnx

import geopandas as gpd
import pandas as pd
import networkx as nx
import numpy as np
sys.path.append("../../../GOSTnets")
import GOSTnets as gn

from shapely.geometry import LineString, Point

In [13]:
# This is a Jupyter Notebook extension which reloads all of the modules whenever you run the code
# This is optional but good if you are modifying and testing source code
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [14]:
# read graph
G = nx.read_gpickle('../mapbox_traffic/sri_lanka_processed_graph_cleaned_part1_proj.pickle')

In [15]:
len(G.edges)

1342262

In [41]:
gn.example_edge(G, 25)

(150994974, 255476965, {'osmid': 8095866, 'infra_type': 'residential', 'maxspeed': '30', 'length': 0.103734, 'geometry': <shapely.geometry.linestring.LineString object at 0x0000016EF8A46908>})
(150994974, 102757766, {'osmid': 23592598, 'infra_type': 'residential', 'maxspeed': '30', 'length': 0.260001, 'geometry': <shapely.geometry.linestring.LineString object at 0x0000016EF8A46988>})
(150994974, 102760034, {'osmid': 775561264, 'infra_type': 'residential', 'maxspeed': '30', 'length': 0.48581299999999994, 'geometry': <shapely.geometry.linestring.LineString object at 0x0000016EF8A46A48>})
(150994974, 31363884, {'osmid': 775561264, 'infra_type': 'residential', 'maxspeed': '30', 'length': 0.259606, 'geometry': <shapely.geometry.linestring.LineString object at 0x0000016EF8A46B48>})
(150994986, 102758211, {'osmid': 11528635, 'infra_type': 'residential', 'maxspeed': '30', 'length': 0.248351, 'geometry': <shapely.geometry.linestring.LineString object at 0x0000016EF8A46B88>})
(150994986, 1027510

In [38]:
gn.example_node(G, 5)

(150994974, {'x': 412759.88663883344, 'y': 669542.0902890813, 'lon': 80.211648, 'lat': 6.0567583})
(150994986, {'x': 413334.91795846715, 'y': 669824.7266039154, 'lon': 80.2168403, 'lat': 6.0593224})
(4211081678, {'x': 391406.65301630425, 'y': 787673.9289258035, 'lon': 80.0166023, 'lat': 7.1249196})
(4211081685, {'x': 391249.6302768491, 'y': 787584.79544537, 'lon': 80.0151822, 'lat': 7.1241104})
(5049942656, {'x': 379847.1382586097, 'y': 758570.4046335119, 'lon': 79.9125431, 'lat': 6.8614634})


### Optional step: Find out average speeds for highway classes with Mapbox data
This function below will calculate the average speeds for all highways that have traffic information per highway classes

In [48]:
def find_traffic_hwy_avg_speeds_by_class(G, speed_tag='traffic_mean_speed'):
    """
    Function for finding out the different highway classes in the graph and their respective lengths

    :param G: a graph object
    :param speed_tag: specifies which edge attribute represents traffic speed
    :returns: a dictionary that has each class and the total distance per class
    """

    if type(G) == nx.classes.multidigraph.MultiDiGraph or type(G) == nx.classes.digraph.DiGraph:
        pass
    else:
        raise ValueError('Expecting a graph or geodataframe for G!')

    G_adj = G.copy()

    traffic_class_list = []

    for u, v, data in G_adj.edges(data=True):
        #print(data['infra_type'])
        if speed_tag in data:
            if type(data['infra_type']) == list:
                    if data['infra_type'][0] not in traffic_class_list:
                        traffic_class_list.append(data['infra_type'][0])
            else:
                if data['infra_type'] not in traffic_class_list:
                    traffic_class_list.append(data['infra_type'])
    
    class_dict = { i : [] for i in traffic_class_list }
    
    print(f'print traffic_class_list: {traffic_class_list}')

    for i in traffic_class_list:
        for u, v, data in G_adj.edges(data=True):
            if speed_tag in data:
                if data[speed_tag] > 0:
                    if type(data['infra_type']) == list:
                        if data['infra_type'][0] == i:
                            class_dict[i].append(data[speed_tag])
                    else:
                        if data['infra_type'] == i:
                            class_dict[i].append(data[speed_tag])
                        
    print(f'print class_dict: {class_dict}')
    
    def Average(lst): 
        if len(lst) > 0:
            return sum(lst) / len(lst)
        else:
            return 0
    
    class_dict_copy = class_dict.copy()
    
    for key in class_dict_copy:
        class_dict_copy[key] = Average(class_dict_copy[key])

    return class_dict_copy

In [49]:
average_speeds_per_class_dict = find_traffic_hwy_avg_speeds_by_class(G, speed_tag='traffic_mean_speed')

print traffic_class_list: ['residential', 'primary', 'trunk', 'secondary', 'secondary_link', 'trunk_link', 'tertiary', 'motorway', 'unclassified', 'primary_link', 'motorway_link', 'living_street', 'path', 'service', 'footway', 'track', 'tertiary_link', 'road']


IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [50]:
average_speeds_per_class_dict

{'residential': 19.719399798800318,
 'primary': 35.22479615651459,
 'trunk': 40.47037202657783,
 'secondary': 34.267525102442306,
 'secondary_link': 8.711783717679946,
 'trunk_link': 23.067515119834503,
 'tertiary': 25.51102660110717,
 'motorway': 85.68174309720709,
 'unclassified': 22.34259858136339,
 'primary_link': 14.266634674086603,
 'motorway_link': 31.09769383036527,
 'living_street': 8.122896424194643,
 'path': 12.259244227994229,
 'service': 10.27858925385402,
 'footway': 5.268601190476191,
 'track': 18.02662037037037,
 'tertiary_link': 12.891975308641976,
 'road': 16.0}

## convert from meters to km

In [20]:
for u, v, data in G.edges(data=True):
    data['length'] = data['length'] / 1000

In [21]:
gn.example_edge(G, 5)

(150994974, 255476965, {'osmid': 8095866, 'infra_type': 'residential', 'maxspeed': '30', 'length': 0.103734, 'geometry': <shapely.geometry.linestring.LineString object at 0x0000016EF8A46908>})
(150994974, 102757766, {'osmid': 23592598, 'infra_type': 'residential', 'maxspeed': '30', 'length': 0.260001, 'geometry': <shapely.geometry.linestring.LineString object at 0x0000016EF8A46988>})
(150994974, 102760034, {'osmid': 775561264, 'infra_type': 'residential', 'maxspeed': '30', 'length': 0.48581299999999994, 'geometry': <shapely.geometry.linestring.LineString object at 0x0000016EF8A46A48>})
(150994974, 31363884, {'osmid': 775561264, 'infra_type': 'residential', 'maxspeed': '30', 'length': 0.259606, 'geometry': <shapely.geometry.linestring.LineString object at 0x0000016EF8A46B48>})
(150994986, 102758211, {'osmid': 11528635, 'infra_type': 'residential', 'maxspeed': '30', 'length': 0.248351, 'geometry': <shapely.geometry.linestring.LineString object at 0x0000016EF8A46B88>})


## Speed Dictionaries
The mapbox speed dictionary was generated using the function above. The max speeds dictionary was generated by opening the edges in ArcGIS. The analyst can choose which dictionary to use.

In [22]:
# speed dict based on mean of mapbox real-world speeds per class
speed_dict_sri_lanka_mapbox_mean_speeds = {
'motorway':85 ,
'motorway_link':30 ,
'trunk': 40,
'trunk_link': 23,
'primary': 35,
'primary_link': 14,
'secondary': 34,
'secondary_link': 9,
'tertiary':25,
'tertiary_link': 13,
'residential': 20,
'unclassified': 20,
'track': 20,
'service': 10
}

In [23]:
# speed dict based on existing speed limit tags
speed_dict_sri_lanka_max_speeds = {
'motorway':100 ,
'motorway_link':35 ,
'trunk': 60,
'trunk_link': 50,
'primary': 60,
'primary_link': 50,
'secondary': 50,
'secondary_link': 45,
'tertiary':40,
'tertiary_link': 40,
'residential': 30,
'unclassified': 25,
'track': 25,
'service': 20
}

## Function to add time to edges
The function below was based off of a GOSTnets function. It was modified to calculate the correct time based on whether a Mapbox speed exists or not. Also, it creates a new 'speed' attribute that is either based on the mapbox traffic speed, or the speed provided by the input dictionary if the mapbox traffic speed does not exist.

In [24]:
def convert_network_to_time_w_traffic(G, distance_tag, graph_type = 'drive', road_col = 'highway', traffic_col = 'traffic_mean_speed', speed_dict = speed_dict_sri_lanka_max_speeds, walk_speed = 4.5, factor = 1, default = None, improvement_cost_per_km = None):
    """
    Function for adding a time value to edge dictionaries. Ensure any GeoDataFrames / graphs are in the same projection before using function, or pass a crs.

    DEFAULT SPEEDS:

               speed_dict = {
               'residential': 20,  # kmph
               'primary': 40, # kmph
               'primary_link':35,
               'motorway':50,
               'motorway_link': 45,
               'trunk': 40,
               'trunk_link':35,
               'secondary': 30,
               'secondary_link':25,
               'tertiary':30,
               'tertiary_link': 25,
               'unclassified':20
               }

    :param G: a graph containing one or more nodes
    :param distance_tag: the key in the dictionary for the field currently
               containing a distance in km
    :param road_col: key for the road type in the edge data dictionary
    :param graph_type: set to either 'drive' or 'walk'. IF walk - will set time = walking time across all segment, using the supplied walk_speed. IF drive - will use a speed dictionary for each road type, or defaults as per the note below.
    :param speed_dict: speed dictionary to use. If not supplied, reverts to
               defaults
    :param walk_speed: specify a walkspeed in km/h
    :param factor: allows you to scale up / down distances if saved in a unit other than meters. Set to 1000 if length in km.
    :param default: if highway type not in the speed_dict, use this road class as an in-fill value for time.
    :returns: The original graph with a new data property for the edges called 'time'
    """

    if type(G) == nx.classes.multidigraph.MultiDiGraph or type(G) == nx.classes.digraph.DiGraph:
        pass
    else:
        raise ValueError('Expecting a graph or geodataframe for G!')

    import warnings

    try:
        # checks the first edge to see if the 'time' attribute already exists
        if list(G.edges(data = True))[0][2]['time']:
          warnings.warn('Are you sure you want to convert length to time? This graph already has a time attribute')
    except:
        pass

    G_adj = G.copy()

    for u, v, data in G_adj.edges(data=True):

        # the default length in the graph should be in km
        orig_len = data[distance_tag]

        # Note that this is a MultiDiGraph so there could
        # be multiple indices here, I naively assume this is not
        # the case
        data['length'] = orig_len * factor
        

        # get appropriate speed limit
        if graph_type == 'walk':
            speed = walk_speed

        elif graph_type == 'drive':

            if speed_dict == None:
                speed_dict = {
                'residential': 20,  # kmph
                'primary': 40, # kmph
                'primary_link':35,
                'motorway':50,
                'motorway_link': 45,
                'trunk': 40,
                'trunk_link':35,
                'secondary': 30,
                'secondary_link':25,
                'tertiary':30,
                'tertiary_link': 25,
                'unclassified':20
                }

            highwayclass = data[road_col]
            
            trafficclass = data.get(traffic_col)

            if trafficclass:
                speed = data[traffic_col]
                data['speed'] = speed
            else:
                if type(highwayclass) == list:
                    highwayclass = highwayclass[0]

                if highwayclass in speed_dict.keys():
                    speed = speed_dict[highwayclass]
                else:
                    if default == None:
                        speed = 20
                    else:
                        speed = speed_dict[default]
                data['speed'] = speed
            
            #calculate road improvement costs for secondary or lower highways
            if highwayclass in ['secondary', 'secondary_link', 'tertiary', 'tertiary_link', 'unclassified', 'residential']:
                data['imp_cost'] = orig_len * improvement_cost_per_km

        else:
            raise ValueError('Expecting either a graph_type of "walk" or "drive"!')

        # perform conversion
        hours = orig_len / speed
        in_seconds = hours * 60 * 60
        data['time'] = in_seconds

        # And state the mode, too
        data['mode'] = graph_type

    return G_adj

### Note: we are using a cost of $186,800 to model improvements per km

In [25]:
G_time = convert_network_to_time_w_traffic(G, distance_tag = 'length', road_col = 'infra_type', speed_dict = speed_dict_sri_lanka_mapbox_mean_speeds, factor = 1, improvement_cost_per_km = 186800)

In [26]:
gn.example_edge(G_time, 15)

(150994974, 255476965, {'osmid': 8095866, 'infra_type': 'residential', 'maxspeed': '30', 'length': 0.103734, 'geometry': <shapely.geometry.linestring.LineString object at 0x0000016EF8A46908>, 'speed': 20, 'imp_cost': 19377.5112, 'time': 18.672120000000003, 'mode': 'drive'})
(150994974, 102757766, {'osmid': 23592598, 'infra_type': 'residential', 'maxspeed': '30', 'length': 0.260001, 'geometry': <shapely.geometry.linestring.LineString object at 0x0000016EF8A46988>, 'speed': 20, 'imp_cost': 48568.186799999996, 'time': 46.80018, 'mode': 'drive'})
(150994974, 102760034, {'osmid': 775561264, 'infra_type': 'residential', 'maxspeed': '30', 'length': 0.48581299999999994, 'geometry': <shapely.geometry.linestring.LineString object at 0x0000016EF8A46A48>, 'speed': 20, 'imp_cost': 90749.86839999999, 'time': 87.44633999999999, 'mode': 'drive'})
(150994974, 31363884, {'osmid': 775561264, 'infra_type': 'residential', 'maxspeed': '30', 'length': 0.259606, 'geometry': <shapely.geometry.linestring.LineSt

In [27]:
#save graph again
#gn.save(G_time,'sri_lanka_unclean2_w_time','./', pickle = True, edges = True, nodes = True)

## Export edges as shapefile to visualize

In [28]:
#edge_gdf_w_traffic = gn.edge_gdf_from_graph(G_time)

In [30]:
#edge_gdf_w_traffic.to_file(driver = 'ESRI Shapefile', filename = './sri_lanka_hwy_w_traffic_and_time2.shp')

### now take the biggest sub-graph and compare

In [31]:
# before
# let's print info on our clean version
print(nx.info(G_time))

Name: 
Type: MultiDiGraph
Number of nodes: 603358
Number of edges: 1342262
Average in degree:   2.2247
Average out degree:   2.2247


In [32]:
# Identify only the largest graph

# compatible with NetworkX 2.4
list_of_subgraphs = list(G_time.subgraph(c).copy() for c in nx.strongly_connected_components(G_time))
max_graph = None
max_edges = 0
for i in list_of_subgraphs:
    if i.number_of_edges() > max_edges:
        max_edges = i.number_of_edges()
        max_graph = i

# set your graph equal to the largest sub-graph
G_largest = max_graph

In [33]:
# print info about the largest sub-graph
print(nx.info(G_largest))

Name: 
Type: MultiDiGraph
Number of nodes: 599739
Number of edges: 1334561
Average in degree:   2.2252
Average out degree:   2.2252


In [34]:
# re-save
#edge_gdf_w_traffic_largest = gn.edge_gdf_from_graph(G_largest, crs = 'EPSG:32644')

In [35]:
#edge_gdf_w_traffic_largest.to_file(driver = 'ESRI Shapefile', filename = './sri_lanka_hwy_w_traffic_and_time2_largest.shp')

## Save your graph

In [36]:
#save graph again
gn.save(G_largest,'../mapbox_traffic/sri_lanka_clean_w_time_largest_mean_speeds','./', pickle = True, edges = False, nodes = False)