### Prepare road network, snap origins and destination points to road network nodes, and compute OD
general steps:
1. add time to edges
2. take only biggest subgraph
3. snap origins and destinations to the road network
4. calculate the OD (origin-destination matrix)

In [88]:
import geopandas as gpd
import os, sys, time
import pandas as pd
sys.path.append(r'../../../GOSTNets/GOSTNets')
import GOSTnet as gn
import importlib
import networkx as nx
import osmnx as ox
from shapely.ops import unary_union
from shapely.wkt import loads
from shapely.geometry import LineString, MultiLineString, Point

Prepare data for the OD matrix:
    1. Create your origins (point) .csv file - be sure to name the column of the unique identifier with "O_ID" and save the coordinates into columns "Lat" / "Lon" 
    2. Create your destinations (point) .csv file - be sure to save the coordinates in columns named "Lat" / "Lon" 

In [89]:
pth = r'../../Peru_Lima_Optimization/test_data'
OD_path = r'../../../../lima_optimization_output'
graph_path = r'../../../../lima_optimization_output'
graph_file = r'Lima_processed.pickle'
origins_file = r'origins_VES_CentroidUrbanBlocks.csv' #change with your .csv file name
destinations_file = r'destinations_VES_HealthFacilities.csv'  #change with your .csv file name

#### 1. Read in road network

In [90]:
G = nx.read_gpickle(os.path.join(graph_path, graph_file))

In [91]:
G.number_of_edges()

21362

In [92]:
G.number_of_nodes()

6758

The first cell contains the standard speed limits for each road type from OSM. However, it is recommended to adjust these values if the AOI has specific travel conditions (see below for Lima, Peru).

In [93]:
# default speed limits from OSM

#sd = {          'residential': 20,  # kmph
#                'primary': 40, # kmph
 #               'primary_link':35,
  #              'motorway':35,
   #             'motorway_link': 25,
    #            'trunk': 20,
     #           'trunk_link':15,
      #          'secondary': 10, # kmph
       #         'secondary_link':5,
        #        'tertiary':5,
         #       'tertiary_link': 5,
          #      'unclassified':5
           #     }

In [94]:
#speed limits for Lima
sd = {          'residential': 10,  # kmph
                'primary': 25, # kmph
                'primary_link':20,
                'motorway':35,
                'motorway_link': 25,
                'trunk': 20,
                'trunk_link':15,
                'secondary': 10, # kmph
                'secondary_link':5,
                'tertiary':5,
                'tertiary_link': 5,
                'unclassified':5
                }

In [95]:
gn.example_edge(G)

(0, 765, {'Wkt': 'LINESTRING (-76.9368475 -12.2335545, -76.9371123 -12.2339199)', 'id': 7537, 'infra_type': 'residential', 'osm_id': '372661129', 'key': 'edge_7537', 'length': 0.04963964227802029, 'Type': 'legitimate'})


### 1.) add time to edges
Here you can change the graph_type from "drive" to "walk" if needed. Instead of driving time this will compute walking time using the standard walk_speed = 4.5 km/h (this can be modified in the GOSTnets.py file) 

In [96]:
G_time = gn.convert_network_to_time(G, 
                                   distance_tag = 'length',
                                   graph_type = 'drive', 
                                   road_col = 'infra_type',
                                   speed_dict = sd, 
                                   factor = 1000)

In [97]:
gn.example_edge(G_time)

(0, 765, {'Wkt': 'LINESTRING (-76.9368475 -12.2335545, -76.9371123 -12.2339199)', 'id': 7537, 'infra_type': 'residential', 'osm_id': '372661129', 'key': 'edge_7537', 'length': 49.63964227802029, 'Type': 'legitimate', 'time': 17.870271220087307, 'mode': 'drive'})


### 2.) take only biggest routable subgraph from the road network
This is done because disconnected networks are not routable

In [98]:
D = list(nx.strongly_connected_component_subgraphs(G_time))

In [99]:
G = D[0]

In [100]:
G = nx.convert_node_labels_to_integers(G)

In [101]:
gn.save(G, 'biggest_subg', G_path)

### 3.) snap origins and destinations to the road network
origins and destination nodes need to be snapped directly to the road network nodes in order to do routing

In [125]:
O = pd.read_csv(os.path.join(pth, origins_file), sep=',', encoding = "ISO-8859-1")
D = pd.read_csv(os.path.join(pth, destinations_file), sep=',', encoding = "ISO-8859-1")

In [126]:
O[:3]

Unnamed: 0,O_ID,NC_CLASS,Shape_Leng,Shape_Area,ORIG_FID,Population,Lat,Lon
0,1,3.0,0.003205,2.8608e-07,0,1078.0,-12.24839,-76.91749
1,2,3.0,0.00801,2.5657e-06,1,2374.0,-12.24319,-76.92767
2,3,3.0,0.010808,2.14171e-06,2,367.0,-12.24444,-76.91685


In [127]:
D[:3]

Unnamed: 0,Field1,departamen,provincia,distrito,categoria,Lat,Lon,O_ID,geometry,NN
0,337,LIMA,LIMA,VILLA EL SALVADOR,12,-12.248749,-76.930702,337,POINT (-76.93070221000001 -12.24874878),26684
1,338,LIMA,LIMA,VILLA EL SALVADOR,12,-12.208811,-76.955727,338,POINT (-76.95572661999999 -12.20881081),102330
2,339,LIMA,LIMA,VILLA EL SALVADOR,14,-12.230375,-76.923637,339,POINT (-76.92363739 -12.23037529),66722


#### The function pandana_snap maps each origin and destination point to the nearest node on the road graph. It will create a GeoPandasDataFrame and it will add a column titled 'NN' that has the nearest road network node ID for each snapped point. If 'add_dist_to_node_col = True' then another column titled 'NN_dist' will be added that will have the distance from each snapped point to its nearest road network node.

In [128]:
O['geometry'] = list(zip(O['Lon'],O['Lat']))
O['geometry'] = O['geometry'].apply(Point)
O_gdf = gpd.GeoDataFrame(O, crs = {'init':'epsg:4326'}, geometry = 'geometry')

In [136]:
O_gdf[:5]

Unnamed: 0,O_ID,NC_CLASS,Shape_Leng,Shape_Area,ORIG_FID,Population,Lat,Lon,geometry,NN,NN_dist
0,1,3.0,0.003205,2.8608e-07,0,1078.0,-12.24839,-76.91749,POINT (-76.91749 -12.24839),6528,21.347245
1,2,3.0,0.00801,2.5657e-06,1,2374.0,-12.24319,-76.92767,POINT (-76.92766999999999 -12.24319),5270,37.50692
2,3,3.0,0.010808,2.14171e-06,2,367.0,-12.24444,-76.91685,POINT (-76.91685 -12.24444),1921,6.260416
3,4,3.0,0.004083,7.9102e-07,3,1230.0,-12.24269,-76.93012,POINT (-76.93012 -12.24269),3047,34.625022
4,5,3.0,0.006058,1.06581e-06,4,169.0,-12.24249,-76.92808,POINT (-76.92808000000001 -12.24249),4378,27.773978


In [133]:
# Be sure to input the correct local utm zone for the target_crs in order to have the correct distance computed (in meters)
O_gdf = gn.pandana_snap(G, O_gdf, target_crs = 'epsg:32718', add_dist_to_node_col = True)
origins = list(O_gdf.NN)
origins = list(set(origins))

In [134]:
O_gdf[:3]

Unnamed: 0,O_ID,NC_CLASS,Shape_Leng,Shape_Area,ORIG_FID,Population,Lat,Lon,geometry,NN,NN_dist
0,1,3.0,0.003205,2.8608e-07,0,1078.0,-12.24839,-76.91749,POINT (-76.91749 -12.24839),6528,21.347245
1,2,3.0,0.00801,2.5657e-06,1,2374.0,-12.24319,-76.92767,POINT (-76.92766999999999 -12.24319),5270,37.50692
2,3,3.0,0.010808,2.14171e-06,2,367.0,-12.24444,-76.91685,POINT (-76.91685 -12.24444),1921,6.260416


In [111]:
D['geometry'] = list(zip(D['Lon'],D['Lat']))
D['geometry'] = D['geometry'].apply(Point)
D_gdf = gpd.GeoDataFrame(D, crs = {'init':'epsg:4326'}, geometry = 'geometry')
D_gdf = gn.pandana_snap(G, D_gdf, target_crs = 'epsg:32718', add_dist_to_node_col = True)
destinations = list(D_gdf.NN)
destinations = list(set(destination))

In [112]:
print("length of origins is %s" % len(origins))

length of origins is 678


In [113]:
print("length of destinations is %s" % len(destinations))

length of destinations is 17


Write the origins_snapped and destinations_snapped files.

In [114]:
O_gdf.to_csv(os.path.join(OD_path, 'origins_snapped.csv'))
D_gdf.to_csv(os.path.join(OD_path, 'destinations_snapped.csv'))

### 4.) calculate the OD (origin-destination matrix)

run a simulation of OD generation below, with a subset of 50 origins and 50 destinations - a good way to estimate the time it will take to run the entire OD

In [115]:
o_test = origins[:10]
print(o_test)

[6147, 2052, 3, 6154, 6162, 4115, 6165, 21, 4125, 32]


In [116]:
d_test = destinations[:10]
print(d_test)

[6048, 2048, 6691, 4154, 4198, 4647, 4233, 3914, 2959, 175]


***The gn.calculate_OD function enables the users to choose a computation between "time" and "distance" between each origin and destination point. ***

In [117]:
%time OD_test = gn.calculate_OD(G, o_test, d_test, fail_value = 9999999999999, weight = 'time')

CPU times: user 1.19 s, sys: 0 ns, total: 1.19 s
Wall time: 1.36 s


This is the matix dimension of your whole dataset

In [119]:
len(origins),len(destinations)

(678, 17)

In [120]:
# count how large your OD matrix is
len(origins) * len(destinations)

11526

In [121]:
# do real OD calculation
%time OD = gn.calculate_OD(G, origins, destinations, fail_value = 9999999999999, weight = 'time')

CPU times: user 1.59 s, sys: 0 ns, total: 1.59 s
Wall time: 1.89 s


In [122]:
OD_df = pd.DataFrame(OD, columns = destinations , index = origins)

In [123]:
OD_df[:5]

Unnamed: 0,6048,2048,6691,4154,4198,4647,4233,3914,2959,175,3409,367,1556,917,4919,474,6107
6147,1968.655016,1020.721567,363.676689,819.749517,1322.002788,1578.076932,1803.247252,806.455001,2049.990821,1517.705535,1879.396717,803.156991,1542.402922,1657.102815,583.090841,1736.154601,1584.512721
2052,525.817013,558.842693,1633.650766,1591.881656,274.594031,839.743363,1166.553316,1578.626774,872.524426,1600.311304,1019.7201,1744.22869,875.417373,694.618499,1785.150976,666.975509,544.361984
3,1330.771618,448.775456,1230.200991,1380.165021,873.074022,1498.688595,1962.518259,1307.025267,1668.335516,2097.397326,1815.601711,1546.804731,1534.362605,1386.775967,1520.879916,1471.197652,1236.519452
6154,153.55021,1138.24779,1658.305553,1700.636369,853.999129,1304.180223,723.313264,1988.980369,429.130522,1197.635321,576.396716,1787.841266,1304.052071,1079.897504,1828.763553,817.834594,822.268208
6162,202.318076,907.367997,1799.378687,1663.935343,623.119336,1074.262074,845.305328,1927.152078,551.122585,1319.627384,698.38878,1751.14024,1074.133922,849.979356,1792.062527,653.254488,592.35006


In [124]:
#Write OD to file
OD_df.to_csv(os.path.join(OD_path,'saved_OD.csv'))

***** Depending on the size of the problem you are trying to solve, you might consider running the OD computation using the graph-tool python module in a virtual environment*****