# Optimization Example
## This python notebook is a complete example of solving location optimization problems from importing OSM data to calculating the optimization functions
### load libraries

In [2]:
import geopandas as gpd
import pandas as pd
import os, sys
# add to your system path the location of the LoadOSM.py and GOSTnet.py scripts
sys.path.append(r'/home/vagrant/repos/GOST_PublicGoods/GOSTNets/GOSTNets')
import GOSTnet as gn
import LoadOSM as losm
import importlib
from pulp import LpInteger,LpVariable, LpProblem, lpSum, LpMinimize

peartree version: 0.6.1 
networkx version: 2.2 
matplotlib version: 3.0.3 
osmnx version: 0.9 


### load OSM pbf file into a geodataframe containing only roads

In [3]:
osmNetwork = losm.OSM_to_network('./sampleData/nouakchott/mauritania-latest.osm.pbf')

In [4]:
osmNetwork.roads_raw.infra_type.value_counts()

residential       37459
track              5467
unclassified       4021
service            2028
path               1488
tertiary            891
footway             563
secondary           310
primary             228
construction        105
trunk                83
trunk_link           29
pedestrian           29
tertiary_link        14
primary_link          5
secondary_link        3
proposed              2
road                  2
raceway               2
cycleway              1
Name: infra_type, dtype: int64

### define and filter road types

In [5]:
accepted_road_types = ['residential', 'unclassified', 'track','service','tertiary','road','secondary','primary','trunk','primary_link','trunk_link','tertiary_link','secondary_link']
osmNetwork.filterRoads(acceptedRoads = accepted_road_types)

### clip road network using an input polygon shapefile

In [7]:
shp = gpd.read_file('./sampleData/nouakchott/layers/POLYGON.shp')
shp = shp.to_crs({'init':'epsg:4326'})
shp_obj = shp.geometry.iloc[0]
osmNetwork.roads_raw = osmNetwork.roads_raw.loc[osmNetwork.roads_raw.geometry.intersects(shp_obj) == True]

### additional LoadOSM functions to convert roads into a graph

In [9]:
osmNetwork.generateRoadsGDF(verbose = False)

In [10]:
osmNetwork.initialReadIn()

<networkx.classes.multidigraph.MultiDiGraph at 0x7f9a6633e080>

In [11]:
gn.save(osmNetwork.network,'nouakchott','./sampleData/nouakchott/')

### clean network

In [12]:
import os, sys, time
sys.path.append(r'/home/vagrant/repos/GOST_PublicGoods/GOSTNets/GOSTNets')
import GOSTnet as gn
import importlib
import networkx as nx
import osmnx as ox
from shapely.ops import unary_union
from shapely.wkt import loads
from shapely.geometry import LineString, MultiLineString, Point

In [13]:
def CleanNetwork(G, wpath, country, UTM, WGS = {'init': 'epsg:4326'}, junctdist = 50, verbose = False):
    
    ### Topologically simplifies an input graph object by collapsing junctions and removing interstital nodes
    # REQUIRED - G: a graph object containing nodes and edges. edges should have a property 
    #               called 'Wkt' containing geometry objects describing the roads
    #            wpath: the write path - a drive directory for inputs and output
    #            country: this parameter allows for the sequential processing of multiple countries
    #            UTM: the epsg code of the projection, in metres, to apply the junctdist
    # OPTIONAL - junctdist: distance within which to collapse neighboring nodes. simplifies junctions. 
    #            Set to 0.1 if not simplification desired. 50m good for national (primary / secondary) networks
    #            verbose: if True, saves down intermediate stages for dissection
    ################################################################################################
    
    # Squeezes clusters of nodes down to a single node if they are within the snapping tolerance
    a = gn.simplify_junctions(G, UTM, WGS, junctdist)

    # ensures all streets are two-way
    a = gn.add_missing_reflected_edges(a)
    
    #save progress
    if verbose is True: 
        gn.save(a, 'a', wpath)
    
    # Finds and deletes interstital nodes based on node degree
    b = gn.custom_simplify(a)
    
    # rectify geometry
    for u, v, data in b.edges(data = True):
        if type(data['Wkt']) == list:
                data['Wkt'] = gn.unbundle_geometry(data['Wkt'])
    
    # save progress
    if verbose is True: 
        gn.save(b, 'b', wpath)
    
    # For some reason CustomSimplify doesn't return a MultiDiGraph. Fix that here
    c = gn.convert_to_MultiDiGraph(b)

    # This is the most controversial function - removes duplicated edges. This takes care of two-lane but separate highways, BUT
    # destroys internal loops within roads. Can be run with or without this line
    c = gn.remove_duplicate_edges(c)

    # Run this again after removing duplicated edges
    c = gn.custom_simplify(c)

    # Ensure all remaining edges are duplicated (two-way streets)
    c = gn.add_missing_reflected_edges(c)
    
    # save final
    gn.save(c, '%s_processed' % country, wpath)
    
    print('Edge reduction: %s to %s (%d percent)' % (G.number_of_edges(), 
                                               c.number_of_edges(), 
                                               ((G.number_of_edges() - c.number_of_edges())/G.number_of_edges()*100)))
    return c

In [14]:
UTMZs = {'MRT':32628}

WGS = {'init': 'epsg:4326'}

countries = ['MRT']

wpath = r'./sampleData/nouakchott/'

for country in countries:
    
    print('\n--- processing for: %s ---\n' % country)
    print('start: %s\n' % time.ctime())

    print('Outputs can be found at: %s\n' % (wpath))
        
    UTM = {'init': 'epsg:%d' % UTMZs[country]}
    
    G = nx.read_gpickle(os.path.join(wpath, 'nouakchott.pickle'))
    
    G = CleanNetwork(G, wpath, country, UTM, WGS, 0.5, verbose = False)
    print('\nend: %s' % time.ctime())
    print('\n--- processing complete for: %s ---' % country)


--- processing for: MRT ---

start: Fri Mar 22 16:27:26 2019

Outputs can be found at: ./sampleData/nouakchott/

60139
120238
118392
59249
118388
Edge reduction: 60142 to 118388 (-96 percent)

end: Fri Mar 22 16:33:28 2019

--- processing complete for: MRT ---


## net prep phase

In [13]:
import geopandas as gpd
import pandas as pd
# add to your system path the location of the LoadOSM.py and GOSTnet.py scripts
sys.path.append(r'/home/vagrant/repos/GOST_PublicGoods/GOSTNets/GOSTNets')
import importlib
from pulp import LpInteger,LpVariable, LpProblem, lpSum, LpMinimize
import os, sys, time
import GOSTnet as gn
import networkx as nx
import osmnx as ox
from shapely.ops import unary_union
from shapely.wkt import loads
from shapely.geometry import LineString, MultiLineString, Point

In [14]:
#read back in processed graph
G = nx.read_gpickle('./sampleData/nouakchott/MRT_processed.pickle')

In [15]:
G.number_of_edges()

118388

In [16]:
G.number_of_nodes()

37305

In [17]:
sd = {          'residential': 10,  # kmph
                'primary': 25, # kmph
                'primary_link':20,
                'motorway':35,
                'motorway_link': 25,
                'trunk': 20,
                'trunk_link':15,
                'secondary': 10, # kmph
                'secondary_link':5,
                'tertiary':5,
                'tertiary_link': 5,
                'unclassified':5
                }

In [18]:
gn.example_edge(G)

(0, 24505, {'Wkt': 'LINESTRING (-15.8975893 18.0394637, -15.8976921 18.039127)', 'id': 45978, 'infra_type': 'residential', 'osm_id': '667426151', 'key': 'edge_45978', 'length': 0.03882328422750167, 'Type': 'legitimate'})


### calculates travel time in seconds

In [19]:
G_time = gn.convert_network_to_time(G, 
                                   distance_tag = 'length',
                                   graph_type = 'drive', 
                                   road_col = 'infra_type',
                                   speed_dict = sd, 
                                   factor = 1000)

In [20]:
gn.example_edge(G_time)

(0, 24505, {'Wkt': 'LINESTRING (-15.8975893 18.0394637, -15.8976921 18.039127)', 'id': 45978, 'infra_type': 'residential', 'osm_id': '667426151', 'key': 'edge_45978', 'length': 38.82328422750167, 'Type': 'legitimate', 'time': 13.976382321900601, 'mode': 'drive'})


In [21]:
D = list(nx.strongly_connected_component_subgraphs(G_time))

In [22]:
len(D)

11

### get the largest subgraph

In [23]:
G = D[0]

In [24]:
G = nx.convert_node_labels_to_integers(G)

In [25]:
gn.save(G, 'biggest_subg', './sampleData/nouakchott/')

In [26]:
#make sure origins and destinations are created. They will be CSVs with 'Lat' and 'Lon' columns.
#There will also need to be a way in the future to add a 'demand' column for the origins CSV, and a 'capacity' column for the destinations dataset.
origins = pd.read_csv('./sampleData/nouakchott/origins_test1.csv')


### import origins and snap to nearest node in graph

In [27]:
origins['geometry'] = list(zip(origins['Lon'],origins['Lat']))
origins['geometry'] = origins['geometry'].apply(Point)
origins_gdf = gpd.GeoDataFrame(origins, crs = {'init':'epsg:4326'}, geometry = 'geometry')
origins_gdf = gn.pandana_snap(G, origins_gdf, target_crs = 'epsg:32628', add_dist_to_node_col = True)
origins = list(origins_gdf.NN)
origins = list(set(origins))

  G_tree = spatial.KDTree(node_gdf[['x','y']].as_matrix())
  distances, indices = G_tree.query(in_df[['x','y']].as_matrix())


In [28]:
origins_gdf[:10]

Unnamed: 0,Lon,Lat,id,demand,geometry,NN,NN_dist
0,-15.920091,18.068748,0,492971,POINT (-15.9200905444876 18.068747698414),5343,38.598697
1,-15.970769,18.011527,1,455613,POINT (-15.9707686005727 18.0115274721116),15307,40.916068
2,-16.017149,18.101601,2,461385,POINT (-16.0171494105211 18.1016011324086),32797,13.793765
3,-16.011744,18.080487,3,253764,POINT (-16.0117439165805 18.0804867868114),19488,93.794886
4,-15.874072,18.065438,4,182095,POINT (-15.8740715454476 18.065437965025),11633,199.64193
5,-15.936539,18.142234,5,325801,POINT (-15.9365393885209 18.1422342764535),34556,33.226094
6,-15.936032,18.035973,6,216626,POINT (-15.9360316762384 18.0359726230182),34965,28.303928
7,-15.989495,18.010282,7,175802,POINT (-15.989494705491 18.0102817599499),5702,18.700289
8,-15.98844,18.0624,8,161223,POINT (-15.9884396394221 18.062399556345),11029,12.908252
9,-15.979721,17.992657,9,196191,POINT (-15.979721387056 17.9926573776181),12121,45.940805


In [29]:
destinations = pd.read_csv('./sampleData/nouakchott/destinations_test1.csv')

### import destinations and snap to nearest node in graph

In [30]:
destinations['geometry'] = list(zip(destinations['Lon'],destinations['Lat']))
destinations['geometry'] = destinations['geometry'].apply(Point)
destinations_gdf = gpd.GeoDataFrame(destinations, crs = {'init':'epsg:4326'}, geometry = 'geometry')
destinations_gdf = gn.pandana_snap(G, destinations_gdf, target_crs = 'epsg:32628', add_dist_to_node_col = True)
#destinations_gdf.NN is the nearest node of the road network
destinations = list(destinations_gdf.NN)
destinations = list(set(destinations))

In [31]:
destinations_gdf[:10]

Unnamed: 0,Lon,Lat,id,Unnamed: 3,geometry,NN,NN_dist
0,-16.005524,18.133726,1,,POINT (-16.0055244876789 18.1337264869769),25486,244.940598
1,-15.911223,18.146442,2,,POINT (-15.9112234904605 18.1464419959018),6724,91.625141
2,-15.894339,18.04651,3,,POINT (-15.8943385146072 18.0465095569873),1646,26.534798
3,-15.966657,17.979553,4,,POINT (-15.9666571847713 17.979553320176),8994,21.88853
4,-15.980675,18.028031,5,,POINT (-15.980674900574 18.0280309665427),15739,34.104235
5,-16.00202,18.067712,6,,POINT (-16.0020200587282 18.0677121043953),33112,31.65786
6,-16.00202,18.104962,7,,POINT (-16.0020200587282 18.1049618080371),4092,52.440472
7,-15.97717,18.084672,8,,POINT (-15.9771704716234 18.0846722997527),4965,54.837685
8,-15.941489,18.061654,9,,POINT (-15.9414890132164 18.0616544947466),2440,5.547694
9,-15.954232,18.11011,10,,POINT (-15.9542323912189 18.1101095187286),7011,6.335953


In [32]:
print("length of origins is %s" % len(origins))

length of origins is 50


In [33]:
print("length of destinations is %s" % len(destinations))

length of destinations is 10


In [34]:
len(origins) * len(destinations)

500

In [35]:
d_test = destinations[:50]
print(d_test)

[8994, 7011, 6724, 4965, 2440, 1646, 25486, 33112, 15739, 4092]


### make Origin Destination Matrix

In [36]:
%time OD = gn.calculate_OD(G, origins, destinations, fail_value = 9999999999999)

CPU times: user 2.45 s, sys: 0 ns, total: 2.45 s
Wall time: 2.89 s


In [53]:
OD_df = pd.DataFrame(OD, columns = destinations, index = origins)

In [54]:
facilities = OD_df.columns.values.tolist()

### facilities list

In [55]:
facilities

[8994, 7011, 6724, 4965, 2440, 1646, 25486, 33112, 15739, 4092]

### P-Median problem

In [56]:
result = gn.optimize_facility_locations(OD_df, facilities, 4, existing_facilities = None)

In [57]:
result

[15739, 2440, 4092, 7011]

In [42]:
#result is 5,9,7,10

In [43]:
import importlib
importlib.reload(gn)

peartree version: 0.6.1 
networkx version: 2.2 
matplotlib version: 3.0.3 
osmnx version: 0.9 


<module 'GOSTnet' from '/home/vagrant/repos/GOST_PublicGoods/GOSTNets/GOSTNets/GOSTnet.py'>

## P-Median problem with choosing two facilities locations beforehand

In [76]:
result2 = gn.optimize_facility_locations(OD_df, facilities, 4, existing_facilities = [8994,25486])

In [77]:
result2

[2440, 25486, 4965, 8994]

## P-Median problem with population weights

In [59]:
OD_df[:10]

Unnamed: 0,8994,7011,6724,4965,2440,1646,25486,33112,15739,4092
28291,3340.836168,1941.811792,2474.175924,1637.061354,1065.073847,2019.845992,2802.051017,2740.249661,2390.308863,2326.188681
15877,3719.561387,552.200337,1662.082592,1663.938809,2262.278821,3431.780571,2196.624544,2859.957422,2766.786612,1923.391222
33926,3774.268565,1292.827363,2257.56116,1196.118322,2316.985999,3486.487749,1490.138612,2222.673912,2819.961653,1017.160746
4101,2611.859344,2272.648285,2805.012416,1967.897847,575.529981,2532.306273,3132.887509,2695.357159,1661.431381,2657.025174
7178,2013.543292,1682.311487,2214.675618,1376.028912,1622.705275,2792.331903,2541.018574,1233.723334,1045.810811,2065.156239
15243,1622.658588,2097.960971,2630.325102,1791.678396,2038.35476,3207.981387,2956.668059,1927.828917,244.203522,2480.805723
14482,2911.656748,2009.77687,2542.141001,995.74662,2243.302682,3412.92931,1785.201938,366.536414,1938.141226,1309.339603
34965,2980.030795,3014.883868,3547.248,2710.133431,1363.870746,2203.531169,3875.123093,3153.667045,2034.871046,3399.260757
11029,2550.781031,2089.009361,2621.373492,1244.33164,2029.40315,3199.029777,2313.40178,733.0567,1577.265509,1837.539444
22552,2459.436717,1533.001302,2065.365433,1228.250864,929.352851,2489.316195,2393.240526,1977.578287,1509.008754,1917.378191


### create weighted OD matrix

In [60]:
origins_w_demands_series = pd.Series(origins_gdf.demand.values,index=origins_gdf.NN)

In [1]:
origins_w_demands_series

NameError: name 'origins_w_demands_series' is not defined

In [61]:
OD_weighted_df = OD_df.mul(origins_w_demands_series, axis='index')

In [62]:
OD_weighted_df

Unnamed: 0,8994,7011,6724,4965,2440,1646,25486,33112,15739,4092
2044,1543375000.0,535678800.0,216330500.0,712085000.0,954052600.0,1426997000.0,1182791000.0,1195753000.0,1158074000.0,990353000.0
4101,1104067000.0,960678000.0,1185715000.0,831856000.0,243284000.0,1070439000.0,1324312000.0,1139363000.0,702308600.0,1123159000.0
4798,981113500.0,517872300.0,705903800.0,119668900.0,560827500.0,973940800.0,595551200.0,400029600.0,643013800.0,427476200.0
4851,86941100.0,155480200.0,189545200.0,135881800.0,150256600.0,226508200.0,210427200.0,148075200.0,20368800.0,179977700.0
5048,193820200.0,325327900.0,371411000.0,298947800.0,239023100.0,256971500.0,399792800.0,315979500.0,179672700.0,358600700.0
5307,1493442000.0,1015979000.0,1197898000.0,911839600.0,714896100.0,68291290.0,1309939000.0,1288210000.0,1168629000.0,1147328000.0
5343,1511662000.0,823694300.0,1086134000.0,673461200.0,388515400.0,679884800.0,1247767000.0,1215590000.0,1043079000.0,1013181000.0
5702,281080900.0,516097700.0,609688400.0,462252600.0,501357500.0,710870200.0,667060200.0,495752900.0,167405700.0,583402600.0
7178,538340900.0,449782800.0,592115700.0,367895100.0,433846500.0,746557900.0,679366700.0,329848300.0,279608000.0,552140200.0
8638,1200892000.0,260719000.0,392437700.0,537246900.0,730417400.0,1107984000.0,909519800.0,923374300.0,893294700.0,759397200.0


In [68]:
result_w_demands = gn.optimize_facility_locations(OD_weighted_df, facilities, 4, existing_facilities = None)

In [69]:
result_w_demands

[15739, 2440, 4092, 7011]