# This python notebook has examples of the P-Median Problem for Lima
### load libraries

In [1]:
import geopandas as gpd
import pandas as pd
import os, sys, time
sys.path.append(r'C:\Code\Github\GOST_PublicGoods\GOSTNets\GOSTNets')
import GOSTnet as gn
import importlib
import networkx as nx
import osmnx as ox
from shapely.ops import unary_union
from shapely.wkt import loads
from shapely.geometry import LineString, MultiLineString, Point

peartree version: 0.6.1 
networkx version: 2.3 
matplotlib version: 3.1.0 
osmnx version: 0.10 


## load OD, origins_snapped, dest_snapped and prepare datasets for analysis

In [2]:
#read destinations snapped csv
dest_snapped_df = pd.read_csv(r'C:\Users\gost_\Desktop\lima\data\OD2\destinations_snapped.csv', header=0, index_col=0)
dest_snapped_df

FileNotFoundError: [Errno 2] File b'C:\\Users\\gost_\\Desktop\\lima\\data\\OD2\\destinations_snapped.csv' does not exist: b'C:\\Users\\gost_\\Desktop\\lima\\data\\OD2\\destinations_snapped.csv'

In [74]:
dest_series = pd.Series(dest_snapped_df.categoria.values,index=dest_snapped_df.NN)
dest_series

NN
47038    39
87562    13
24420    13
89721    12
29476    14
34559    13
dtype: int64

## split destination dataframe into 12 and 24 hour categories

In [92]:
#split dest file into 12 hour categories
#hour_12 = [11,12]
hour_12_dest_snapped_df = dest_snapped_df.loc[(dest_snapped_df['categoria'] == 11) | (dest_snapped_df['categoria'] == 12)]

#hour_12_dest_series = pd.Series(hour_12_dest_snapped_df.categoria.values,index=hour_12_dest_snapped_df.NN)
#hour_12_dest_series
hour_12_dest_list = hour_12_dest_snapped_df.NN.tolist()
hour_12_dest_list

[89721]

In [93]:
#split dest file into 24 hour categories

#hour_24 = [13,14,39]
hour_24_dest_snapped_df = dest_snapped_df.loc[(dest_snapped_df['categoria'] == 13) | (dest_snapped_df['categoria'] == 14) | (dest_snapped_df['categoria'] == 39)]

#hour_24_dest_series = pd.Series(hour_24_dest_snapped_df.categoria.values,index=hour_24_dest_snapped_df.NN)
#hour_24_dest_series

hour_24_dest_list = hour_24_dest_snapped_df.NN.tolist()
hour_24_dest_list

[47038, 87562, 24420, 29476, 34559]

In [94]:
#read origins snapped csv
origins_snapped_df = pd.read_csv(r'C:\Users\gost_\Desktop\lima\data\OD2\origins_snapped.csv', header=0, index_col=0)
origins_snapped_df

Unnamed: 0,ID,IDMANZANA,CODDPTO,CODPROV,CODDIST,CODZONA,SUFZONA,CODMZNA,SUFMZNA,UBIGEO,...,POB_TOTA_1,ID_ZONA_1,PERSONAS,PERSONAS_M,ORIG_FID,Lon,Lat,geometry,NN,NN_dist
0,256226,1.50115E+14,15,1,15,15,0,17,0,150115,...,0,0,120,48,78579,-77.026855,-12.078002,POINT (-77.02685507 -12.07800238),59955,41.540049
1,256227,1.50115E+14,15,1,15,15,0,25,0,150115,...,0,0,14,10,78580,-77.022800,-12.077652,POINT (-77.02280038000001 -12.07765234),58454,51.392559
2,256228,1.50115E+14,15,1,15,15,0,27,0,150115,...,0,0,332,116,78581,-77.024945,-12.077917,POINT (-77.02494490000001 -12.07791735),33477,46.644805
3,256229,1.50115E+14,15,1,15,15,0,18,0,150115,...,0,0,104,33,78582,-77.026209,-12.077764,POINT (-77.02620904 -12.07776402),53804,44.557279
4,256230,1.50115E+14,15,1,15,15,0,44,0,150115,...,0,0,613,287,78583,-77.025301,-12.081593,POINT (-77.02530102999999 -12.08159251),44488,57.277990
5,256231,1.50115E+14,15,1,15,15,0,3,0,150115,...,0,0,117,53,78584,-77.025864,-12.076272,POINT (-77.02586379 -12.07627247),13144,44.977708
6,256232,1.50115E+14,15,1,15,15,0,11,0,150115,...,0,0,151,60,78585,-77.025583,-12.076786,POINT (-77.02558263 -12.07678621),88366,48.094718
7,256234,1.50115E+14,15,1,15,15,0,52,0,150115,...,0,0,258,101,78586,-77.024556,-12.082498,POINT (-77.02455603999999 -12.08249849),39239,43.440377
8,256235,1.50115E+14,15,1,15,15,0,1,0,150115,...,0,0,66,17,78587,-77.027807,-12.077381,POINT (-77.02780717 -12.07738101),107790,36.785475
9,256236,1.50115E+14,15,1,15,15,0,35,0,150115,...,0,0,340,119,78588,-77.024638,-12.079659,POINT (-77.02463772999999 -12.07965858),114373,55.013238


### origins_w_demands_series will get used as an input for the P-Median problem with weight

In [95]:
origins_w_demands_series = pd.Series(origins_snapped_df.PERSONAS.values,index=origins_snapped_df.NN)
#origins_w_demands_series

### some origins end up snapping to the same nearest node, therefore below the code groups and sums origin populations

In [96]:
origins_no_dupl=origins_w_demands_series.groupby('NN').sum()
#origins_no_dupl

## load OD Matrix

In [97]:
#read small test OD
OD_df = pd.read_csv(r'C:\Users\gost_\Desktop\lima\data\OD2\output_gt_origins_destinations.csv', header=0, index_col=0)

In [98]:
#need to cast columns as int
OD_df.columns = OD_df.columns.astype(int)

In [99]:
OD_df[0:10]

Unnamed: 0,24420,29476,87562,89721,47038,34559
28677,627.217669,341.858199,624.218459,872.02033,398.81918,401.710043
96264,383.186848,536.593527,431.69476,627.989508,206.295481,157.679222
18441,220.886458,723.234229,840.315035,585.033768,614.915756,351.080618
88074,388.215409,646.804176,528.36296,633.01807,302.963681,117.647717
45067,546.88857,348.065098,620.223189,774.517526,394.82391,346.207651
47117,579.633664,304.136723,408.987755,807.26262,465.148029,486.806818
53262,894.980976,235.22849,548.998712,1122.609932,594.490106,802.15413
110611,251.173237,999.984788,992.888959,230.854705,767.489679,503.654542
40980,589.423449,709.964866,894.546913,920.954031,669.147634,605.169875
83989,329.891453,467.662111,649.930507,557.52041,424.531228,237.064607


In [100]:
OD_df.keys()

Int64Index([24420, 29476, 87562, 89721, 47038, 34559], dtype='int64')

In [101]:
OD_df.index

Int64Index([ 28677,  96264,  18441,  88074,  45067,  47117,  53262, 110611,
             40980,  83989,
            ...
             26590,  18396,  16350,  26588,  14308,  34789, 108522,  63467,
             59386,  83967],
           dtype='int64', length=634)

In [102]:
#filter OD by dest12 and dest24 and save as new files -> OD12  and OD24
OD_df_12_hr = OD_df[hour_12_dest_list]

Unnamed: 0,89721
28677,872.020330
96264,627.989508
18441,585.033768
88074,633.018070
45067,774.517526
47117,807.262620
53262,1122.609932
110611,230.854705
40980,920.954031
83989,557.520410


In [129]:
OD_df_24_hr = OD_df[hour_24_dest_list]

### some tests

In [7]:
#OD_df.iloc[2,2]

In [8]:
#OD_df.loc[88074]

In [11]:
#OD_df.loc[88074,'87562']
#test
#OD_df.loc[88074,87562]

528.3629602062092

### This finds the worst case travel time from an origin to a destination

In [13]:
#for each row get min value
OD_df_min = OD_df.min(axis=1)

In [14]:
OD_df_min[0:10]

28677     341.858199
96264     157.679222
18441     220.886458
88074     117.647717
45067     346.207651
47117     304.136723
53262     235.228490
110611    230.854705
40980     589.423449
83989     237.064607
dtype: float64

In [15]:
OD_df_min_max = OD_df_min.max(axis=0)

In [16]:
#This should be the worst service time for an origin in the whole dataset
OD_df_min_max

1036.6746220781097

In [125]:
import importlib
importlib.reload(gn)

peartree version: 0.6.1 
networkx version: 2.2 
matplotlib version: 3.0.3 
osmnx version: 0.9 


<module 'GOSTnet' from 'C:\\Code\\Github\\GOST_PublicGoods\\GOSTNets\\GOSTNets\\GOSTnet.py'>

## P-Median Problem
### Objective: minimize the avg distance

In [126]:
facilities = OD_df.columns.values.tolist()
facilities

[24420, 29476, 87562, 89721, 47038, 34559]

In [127]:
p_median_result1 = gn.optimize_facility_locations(OD_df, facilities, 4, existing_facilities = None, execute = True, verbose = True)

cpu count: 12
Setting up problem
Set up the problem
Processing time took:  1.5000035762786865
print objective value
234914.22140881655


In [118]:
p_median_result1

[24420, 29476, 34559, 87562]

## P-Median Problem with Weights

In [35]:
OD_df

Unnamed: 0,24420,29476,87562,89721,47038,34559
28677,627.217669,341.858199,624.218459,872.020330,398.819180,401.710043
96264,383.186848,536.593527,431.694760,627.989508,206.295481,157.679222
18441,220.886458,723.234229,840.315035,585.033768,614.915756,351.080618
88074,388.215409,646.804176,528.362960,633.018070,302.963681,117.647717
45067,546.888570,348.065098,620.223189,774.517526,394.823910,346.207651
47117,579.633664,304.136723,408.987755,807.262620,465.148029,486.806818
53262,894.980976,235.228490,548.998712,1122.609932,594.490106,802.154130
110611,251.173237,999.984788,992.888959,230.854705,767.489679,503.654542
40980,589.423449,709.964866,894.546913,920.954031,669.147634,605.169875
83989,329.891453,467.662111,649.930507,557.520410,424.531228,237.064607


In [36]:
OD_df.index

Int64Index([ 28677,  96264,  18441,  88074,  45067,  47117,  53262, 110611,
             40980,  83989,
            ...
             26590,  18396,  16350,  26588,  14308,  34789, 108522,  63467,
             59386,  83967],
           dtype='int64', length=634)

In [53]:
origins_no_dupl

NN
107        355
193       1278
356        281
670        474
860        286
1000       164
1051       245
1171      1102
1223       300
1561        92
1822       645
1896       390
1948       154
2234       145
2263         7
2464        48
2525       163
2734        99
2775       241
2834       319
2876       563
2877       351
2883       349
3463       307
3514       612
3519       127
3671       471
3996       106
4144       450
4476       255
          ... 
112083     334
112103     252
112271     146
112365      57
112743     453
113140     363
113277     369
113681     192
113896     306
114373     708
114377     155
114411      67
114535     235
114597     187
115231     446
115374      98
115376     586
115751     148
116238     249
116305     444
116516     447
117382     100
117714      18
117958     212
118921     255
118926     103
118984     174
119326     361
119356     308
119485     223
Length: 634, dtype: int64

In [50]:
origins_w_demands_series.duplicated()

NN
59955     False
58454     False
33477     False
53804     False
44488     False
13144     False
88366     False
39239     False
107790    False
114373    False
86535     False
113140    False
73478     False
95931     False
34789     False
56327     False
73478     False
47834     False
47834     False
107021    False
86466     False
78066     False
114373    False
24798     False
13953     False
27099     False
101889    False
9911      False
26590     False
108234    False
          ...  
54690      True
69079     False
3514       True
35322      True
3514       True
87315      True
21557     False
84133      True
62720      True
12476      True
92719      True
26211     False
80133     False
85068     False
7655       True
36403      True
95952      True
3514       True
64843      True
20762      True
95952      True
18332      True
105373     True
34014      True
32170     False
4476       True
96555      True
8656       True
98635      True
100664     True
Length: 999, dtype: b

In [54]:
OD_weighted_df = OD_df.mul(origins_no_dupl, axis=0)
OD_weighted_df

Unnamed: 0,24420,29476,87562,89721,47038,34559
107,216500.344549,86338.517590,208086.578701,2.973086e+05,206063.062794,183546.814236
193,848210.059558,487788.793781,363014.122435,1.161068e+06,160721.508984,560011.314055
356,224223.402079,147048.293467,16042.598580,2.930129e+05,73061.991507,160855.759288
670,269505.537541,176118.799248,141654.863224,3.855420e+05,34815.604842,162614.923012
860,201029.820654,39117.457551,192614.347681,2.661317e+05,132731.913276,174481.342712
1000,123864.073401,64883.943617,41655.131895,1.611952e+05,50547.754120,101786.821437
1051,83878.378358,140828.043723,173377.492433,1.396475e+05,118154.669008,61135.801099
1171,365372.462521,723529.316272,802213.605336,6.162196e+05,553823.599563,263077.278282
1223,169433.204472,89536.218498,129018.013601,2.371476e+05,145866.095684,142412.379862
1561,42048.558229,42571.236268,46511.286195,6.387223e+04,25774.552501,21301.856675


In [57]:
p_median_result2 = gn.optimize_facility_locations(OD_weighted_df, facilities, 4, existing_facilities = None)

In [58]:
p_median_result2

[24420, 29476, 34559, 47038]