### Slice OD as needed; compute weighted OD
In this notebook we have the opportunity to adjust the inputs for the optimization analysis:
- a choice can be made on the number and type of destinations that will be considered in the process
- a choice can be made between using a weighted or non-weighted OD matrix (in this example we compute a weighted OD by population )

In [52]:
import geopandas as gpd
import os, sys, time
import pandas as pd
#sys.path.append(r'C:\Users\gost_\Desktop\lima\GOSTNets\GOSTNets')
sys.path.append(r'../../../GOSTNets/GOSTNets')
import GOSTnet as gn
import importlib
import networkx as nx
import osmnx as ox
from shapely.ops import unary_union
from shapely.wkt import loads
from shapely.geometry import LineString, MultiLineString, Point

In [53]:
#define the paths to the origins, destinations files and the OD
#pth = r'C:\Users\gost_\Desktop\lima\data\OD'
pth = r'../../../../lima_optimization_output'

 Run this step if you need to filter the OD by a subset of the destination file

In [54]:
#read destinations snapped csv (from Step3)
dest_snapped_df = pd.read_csv(os.path.join(pth, 'destinations_snapped.csv'))
dest_snapped_df[:5]

Unnamed: 0.1,Unnamed: 0,Field1,departamen,provincia,distrito,categoria,Lat,Lon,O_ID,geometry,NN,NN_dist
0,0,337,LIMA,LIMA,VILLA EL SALVADOR,12,-12.248749,-76.930702,337,POINT (-76.93070221000001 -12.24874878),6691,17.668383
1,1,338,LIMA,LIMA,VILLA EL SALVADOR,12,-12.208811,-76.955727,338,POINT (-76.95572661999999 -12.20881081),6048,16.737332
2,2,339,LIMA,LIMA,VILLA EL SALVADOR,14,-12.230375,-76.923637,339,POINT (-76.92363739 -12.23037529),3914,46.775556
3,3,340,LIMA,LIMA,VILLA EL SALVADOR,13,-12.212154,-76.939438,340,POINT (-76.93943787000001 -12.21215439),917,38.959049
4,4,341,LIMA,LIMA,VILLA EL SALVADOR,12,-12.230355,-76.911362,341,POINT (-76.91136169000001 -12.23035526),4919,20.278953


In [55]:
#split destination file by hospital category 
hour_12_dest_snapped_df = dest_snapped_df.loc[(dest_snapped_df['categoria'] == 11) | (dest_snapped_df['categoria'] == 12)]
hour_12_dest_list = hour_12_dest_snapped_df.NN.tolist()
len(hour_12_dest_list)

13

In [56]:
#hour_12_dest_snapped_df

In [67]:
hour_12_dest_list[:5]

[6691, 6048, 4919, 4154, 1556]

In [58]:
#write new destiantion file to .csv
hour_12_dest_snapped_df.to_csv(os.path.join(pth,'hour_12_dest_snapped_df.csv'))

Run this step if you want to compute a weighted OD

In [66]:
#read origins snapped csv
origins_snapped_df =  pd.read_csv(os.path.join(pth, 'origins_snapped.csv'))
origins_snapped_df[:5]

Unnamed: 0.1,Unnamed: 0,O_ID,NC_CLASS,Shape_Leng,Shape_Area,ORIG_FID,Population,Lat,Lon,geometry,NN,NN_dist
0,0,1,3.0,0.003205,2.8608e-07,0,1078.0,-12.24839,-76.91749,POINT (-76.91749 -12.24839),6528,21.347245
1,1,2,3.0,0.00801,2.5657e-06,1,2374.0,-12.24319,-76.92767,POINT (-76.92766999999999 -12.24319),5270,37.50692
2,2,3,3.0,0.010808,2.14171e-06,2,367.0,-12.24444,-76.91685,POINT (-76.91685 -12.24444),1921,6.260416
3,3,4,3.0,0.004083,7.9102e-07,3,1230.0,-12.24269,-76.93012,POINT (-76.93012 -12.24269),3047,34.625022
4,4,5,3.0,0.006058,1.06581e-06,4,169.0,-12.24249,-76.92808,POINT (-76.92808000000001 -12.24249),4378,27.773978


In [65]:
#create a dictionary of the unique Nearest Node (NN) and its corresponding population value
origins_w_demands_series = pd.Series(origins_snapped_df.Population.values,index=origins_snapped_df.NN)
origins_w_demands_series[:5]

NN
6528    1078.0
5270    2374.0
1921     367.0
3047    1230.0
4378     169.0
dtype: float64

In [61]:
#some origins end up snapping to the same nearest node, therefore below the code groups and sums origin populations
origins_no_dupl=origins_w_demands_series.groupby('NN').sum()
origins_no_dupl [:3]

NN
3     1458.0
21    2232.0
32    2041.0
dtype: float64

In [None]:
#Check this below...

In [62]:
#load OD and multiply OD by population weight -> weighted OD
OD_df = pd.read_csv(os.path.join(pth, 'saved_OD.csv'), index_col=0, sep=',')
OD_df[:5]

Unnamed: 0,6048,2048,6691,4198,4647,4233,3914,2959,175,3409,474,367,1556,917,4919,4154,6107
6147,1968.655016,1020.721567,363.676689,1322.002788,1578.076932,1803.247252,806.455001,2049.990821,1517.705535,1879.396717,1736.154601,803.156991,1542.402922,1657.102815,583.090841,819.749517,1584.512721
2052,525.817013,558.842693,1633.650766,274.594031,839.743363,1166.553316,1578.626774,872.524426,1600.311304,1019.7201,666.975509,1744.22869,875.417373,694.618499,1785.150976,1591.881656,544.361984
3,1330.771618,448.775456,1230.200991,873.074022,1498.688595,1962.518259,1307.025267,1668.335516,2097.397326,1815.601711,1471.197652,1546.804731,1534.362605,1386.775967,1520.879916,1380.165021,1236.519452
6154,153.55021,1138.24779,1658.305553,853.999129,1304.180223,723.313264,1988.980369,429.130522,1197.635321,576.396716,817.834594,1787.841266,1304.052071,1079.897504,1828.763553,1700.636369,822.268208
6162,202.318076,907.367997,1799.378687,623.119336,1074.262074,845.305328,1927.152078,551.122585,1319.627384,698.38878,653.254488,1751.14024,1074.133922,849.979356,1792.062527,1663.935343,592.35006


In [63]:
#len(OD_weighted_df)
hour_12_dest_list

[6691, 6048, 4919, 4154, 1556, 3409, 474, 2959, 175, 2048, 367, 6107, 4647]

In [64]:
# filter the Od by the destination -category 12
#OD_df = OD_df[OD_df.index.isin(hour_12_dest_list)]
#len(OD_df)

0

In [51]:
#OD_df

Unnamed: 0,6048,2048,6691,4198,4647,4233,3914,2959,175,3409,474,367,1556,917,4919,4154,6107


In [41]:
OD_weighted_df = OD_df.mul(origins_no_dupl, axis=0)
OD_weighted_df[:100]

Unnamed: 0,6048,2048,6691,4198,4647,4233,3914,2959,175,3409,474,367,1556,917,4919,4154,6107
3,1.940265e+06,6.543146e+05,1.793633e+06,1.272942e+06,2.185088e+06,2.861352e+06,1.905643e+06,2.432433e+06,3.058005e+06,2.647147e+06,2.145006e+06,2.255241e+06,2.237101e+06,2.021919e+06,2.217443e+06,2.012281e+06,1.802845e+06
21,1.556951e+06,3.612186e+06,4.560601e+06,2.656107e+06,3.411629e+06,1.832438e+06,4.694381e+06,1.137778e+06,2.461096e+06,1.517866e+06,2.302923e+06,4.245439e+06,3.405823e+06,2.911030e+06,4.336777e+06,4.050797e+06,2.396085e+06
32,3.894547e+06,2.744122e+06,1.562129e+06,3.351121e+06,2.476515e+06,2.936088e+06,8.485914e+05,3.534769e+06,2.353297e+06,3.091509e+06,2.799151e+06,5.678146e+05,2.403704e+06,2.780753e+06,6.030392e+05,9.287687e+05,3.085438e+06
82,1.335045e+06,2.659143e+06,2.496666e+06,2.070588e+06,1.133101e+06,5.879733e+05,1.867506e+06,1.030311e+06,6.377000e+05,7.028067e+05,1.171972e+06,1.564188e+06,1.079305e+06,1.246888e+06,1.625899e+06,1.432683e+06,1.467688e+06
84,1.329234e+06,5.102372e+05,2.470234e+06,2.957873e+05,1.360474e+06,2.069029e+06,2.010214e+06,1.597096e+06,2.762749e+06,1.832945e+06,1.264477e+06,2.314202e+06,1.417909e+06,1.126823e+06,2.380087e+06,2.047780e+06,8.849099e+05
99,3.679446e+05,1.555826e+06,2.592952e+06,1.002620e+06,1.359253e+06,5.529742e+05,2.223321e+06,1.176814e+05,1.167221e+06,3.627173e+05,7.163231e+05,1.962846e+06,1.356223e+06,1.068807e+06,2.015840e+06,1.849916e+06,7.809081e+05
106,,,,,,,,,,,,,,,,,
114,,,,,,,,,,,,,,,,,
124,,,,,,,,,,,,,,,,,
130,,,,,,,,,,,,,,,,,


Filter weighted OD by destination type

In [42]:
#it might be necessary to transform the OD entries into type "int"
OD_weighted_df.dtypes
OD_weighted_df_int = OD_weighted_df.astype(int)
OD_weighted_df_int[:10]

ValueError: Cannot convert non-finite values (NA or inf) to integer

In [None]:
#transform the destination nodes into "string" type to be used as column headers
#hour_12_dest_list_s = [str(x) for x in hour_12_dest_list]
#len(hour_12_dest_list_s)

In [None]:
# filter the OD by the destination -category 12
OD_df_12_hr = OD_weighted_df_int[hour_12_dest_list]
OD_df_12_hr[0:10]

In [None]:
# save new OD as .csv = input into optimization
OD_df_12_hr.to_csv(os.path.join(pth,'OD_df_12_hr.csv'))