### Slice OD as needed; compute weighted OD
In this notebook we have the opportunity to adjust the inputs for the optimization analysis:
- a choice can be made on the number and type of destinations that will be considered in the process
- a choice can be made between using a weighted or non-weighted OD matrix (in this example we compute a weighted OD by population )

In [1]:
import geopandas as gpd
import os, sys, time
import pandas as pd
#sys.path.append(r'C:\Users\gost_\Desktop\lima\GOSTNets\GOSTNets')
sys.path.append(r'../../../GOSTNets/GOSTNets')
import GOSTnet as gn
import importlib
import networkx as nx
import osmnx as ox
from shapely.ops import unary_union
from shapely.wkt import loads
from shapely.geometry import LineString, MultiLineString, Point

networkx version: 2.2 
osmnx version: 0.9 


In [2]:
#define the paths to the origins, destinations files and the OD
#pth = r'C:\Users\gost_\Desktop\lima\data\OD'
pth = r'../../../../lima_optimization_output'

 Run this step if you need to filter the OD by a subset of the destination file

In [3]:
#read destinations snapped csv (from Step3)
dest_snapped_df = pd.read_csv(os.path.join(pth, 'destinations_snapped.csv'))
dest_snapped_df[:5]

Unnamed: 0.1,Unnamed: 0,Field1,departamen,provincia,distrito,categoria,Lat,Lon,O_ID,geometry,NN,NN_dist
0,0,337,LIMA,LIMA,VILLA EL SALVADOR,12,-12.248749,-76.930702,337,POINT (-76.93070221000001 -12.24874878),6691,17.668383
1,1,338,LIMA,LIMA,VILLA EL SALVADOR,12,-12.208811,-76.955727,338,POINT (-76.95572661999999 -12.20881081),6048,16.737332
2,2,339,LIMA,LIMA,VILLA EL SALVADOR,14,-12.230375,-76.923637,339,POINT (-76.92363739 -12.23037529),3914,46.775556
3,3,340,LIMA,LIMA,VILLA EL SALVADOR,13,-12.212154,-76.939438,340,POINT (-76.93943787000001 -12.21215439),917,38.959049
4,4,341,LIMA,LIMA,VILLA EL SALVADOR,12,-12.230355,-76.911362,341,POINT (-76.91136169000001 -12.23035526),4919,20.278953


In [4]:
#split destination file by hospital category 
hour_12_dest_snapped_df = dest_snapped_df.loc[(dest_snapped_df['categoria'] == 11) | (dest_snapped_df['categoria'] == 12)]
hour_12_dest_list = hour_12_dest_snapped_df.NN.tolist()
len(hour_12_dest_list)

13

In [5]:
hour_12_dest_list

[6691, 6048, 4919, 4154, 1556, 3409, 474, 2959, 175, 2048, 367, 6107, 4647]

### This list will eventually be used to only keep the columns in the OD matrix than match this list 

In [9]:
#write new destiantion file to .csv
hour_12_dest_snapped_df.to_csv(os.path.join(pth,'hour_12_dest_snapped_df.csv'))

Run this step if you want to compute a weighted OD

In [10]:
#read origins snapped csv
origins_snapped_df =  pd.read_csv(os.path.join(pth, 'origins_snapped.csv'))
origins_snapped_df[:5]

Unnamed: 0.1,Unnamed: 0,O_ID,NC_CLASS,Shape_Leng,Shape_Area,ORIG_FID,Population,Lat,Lon,geometry,NN,NN_dist
0,0,1,3.0,0.003205,2.8608e-07,0,1078.0,-12.24839,-76.91749,POINT (-76.91749 -12.24839),6528,21.347245
1,1,2,3.0,0.00801,2.5657e-06,1,2374.0,-12.24319,-76.92767,POINT (-76.92766999999999 -12.24319),5270,37.50692
2,2,3,3.0,0.010808,2.14171e-06,2,367.0,-12.24444,-76.91685,POINT (-76.91685 -12.24444),1921,6.260416
3,3,4,3.0,0.004083,7.9102e-07,3,1230.0,-12.24269,-76.93012,POINT (-76.93012 -12.24269),3047,34.625022
4,4,5,3.0,0.006058,1.06581e-06,4,169.0,-12.24249,-76.92808,POINT (-76.92808000000001 -12.24249),4378,27.773978


In [8]:
#create a dictionary of the unique Nearest Node (NN) and its corresponding population value
origins_w_demands_series = pd.Series(origins_snapped_df.Population.values,index=origins_snapped_df.NN)
origins_w_demands_series[:5]

NN
6528    1078.0
5270    2374.0
1921     367.0
3047    1230.0
4378     169.0
dtype: float64

In [12]:
#some origins end up snapping to the same nearest node, therefore the code below groups and sums origin populations
origins_no_dupl=origins_w_demands_series.groupby('NN').sum()
origins_no_dupl[:5]

NN
3     1458.0
21    2232.0
32    2041.0
82    1508.0
84    1610.0
dtype: float64

## Generate Weighted OD Matrix

In [13]:
#load OD and multiply OD by population weight -> weighted OD
OD_df = pd.read_csv(os.path.join(pth, 'saved_OD.csv'), index_col=0, sep=',')
OD_df[:5]

Unnamed: 0,6048,2048,6691,4154,4198,4647,4233,3914,2959,175,3409,367,1556,917,4919,474,6107
6147,1968.655016,1020.721567,363.676689,819.749517,1322.002788,1578.076932,1803.247252,806.455001,2049.990821,1517.705535,1879.396717,803.156991,1542.402922,1657.102815,583.090841,1736.154601,1584.512721
2052,525.817013,558.842693,1633.650766,1591.881656,274.594031,839.743363,1166.553316,1578.626774,872.524426,1600.311304,1019.7201,1744.22869,875.417373,694.618499,1785.150976,666.975509,544.361984
3,1330.771618,448.775456,1230.200991,1380.165021,873.074022,1498.688595,1962.518259,1307.025267,1668.335516,2097.397326,1815.601711,1546.804731,1534.362605,1386.775967,1520.879916,1471.197652,1236.519452
6154,153.55021,1138.24779,1658.305553,1700.636369,853.999129,1304.180223,723.313264,1988.980369,429.130522,1197.635321,576.396716,1787.841266,1304.052071,1079.897504,1828.763553,817.834594,822.268208
6162,202.318076,907.367997,1799.378687,1663.935343,623.119336,1074.262074,845.305328,1927.152078,551.122585,1319.627384,698.38878,1751.14024,1074.133922,849.979356,1792.062527,653.254488,592.35006


In [16]:
OD_weighted_df = OD_df.mul(origins_no_dupl, axis=0)
OD_weighted_df[:5]

Unnamed: 0,6048,2048,6691,4154,4198,4647,4233,3914,2959,175,3409,367,1556,917,4919,474,6107
3,1940265.0,654314.6,1793633.0,2012281.0,1272942.0,2185088.0,2861352.0,1905643.0,2432433.0,3058005.0,2647147.0,2255241.0,2237101.0,2021919.0,2217443.0,2145006.0,1802845.0
21,1556951.0,3612186.0,4560601.0,4050797.0,2656107.0,3411629.0,1832438.0,4694381.0,1137778.0,2461096.0,1517866.0,4245439.0,3405823.0,2911030.0,4336777.0,2302923.0,2396085.0
32,3894547.0,2744122.0,1562129.0,928768.7,3351121.0,2476515.0,2936088.0,848591.4,3534769.0,2353297.0,3091509.0,567814.6,2403704.0,2780753.0,603039.2,2799151.0,3085438.0
82,1335045.0,2659143.0,2496666.0,1432683.0,2070588.0,1133101.0,587973.3,1867506.0,1030311.0,637700.0,702806.7,1564188.0,1079305.0,1246888.0,1625899.0,1171972.0,1467688.0
84,1329234.0,510237.2,2470234.0,2047780.0,295787.3,1360474.0,2069029.0,2010214.0,1597096.0,2762749.0,1832945.0,2314202.0,1417909.0,1126823.0,2380087.0,1264477.0,884909.9


## Filter weighted OD by destination type

In [24]:
hour_12_dest_list

[6691, 6048, 4919, 4154, 1556, 3409, 474, 2959, 175, 2048, 367, 6107, 4647]

In [19]:
#it might be necessary to transform the OD entries into type "int"
OD_weighted_df.dtypes
OD_weighted_df_int = OD_weighted_df.astype(int)
OD_weighted_df_int[:5]

Unnamed: 0,6048,2048,6691,4154,4198,4647,4233,3914,2959,175,3409,367,1556,917,4919,474,6107
3,1940265,654314,1793633,2012280,1272941,2185087,2861351,1905642,2432433,3058005,2647147,2255241,2237100,2021919,2217442,2145006,1802845
21,1556950,3612185,4560600,4050797,2656106,3411629,1832437,4694381,1137778,2461095,1517866,4245438,3405823,2911030,4336777,2302922,2396084
32,3894547,2744122,1562128,928768,3351120,2476514,2936087,848591,3534769,2353296,3091508,567814,2403704,2780752,603039,2799151,3085437
82,1335045,2659143,2496666,1432683,2070587,1133100,587973,1867506,1030311,637700,702806,1564188,1079304,1246887,1625899,1171972,1467687
84,1329233,510237,2470233,2047779,295787,1360473,2069029,2010214,1597095,2762748,1832944,2314202,1417909,1126822,2380086,1264476,884909


In [25]:
#transform the destination nodes into "string" type to be used as column headers
hour_12_dest_list_s = [str(x) for x in hour_12_dest_list]
hour_12_dest_list_s

['6691',
 '6048',
 '4919',
 '4154',
 '1556',
 '3409',
 '474',
 '2959',
 '175',
 '2048',
 '367',
 '6107',
 '4647']

In [26]:
# filter the OD by the destination -category 12
OD_df_12_hr = OD_weighted_df_int[hour_12_dest_list_s]
OD_df_12_hr[0:10]

Unnamed: 0,6691,6048,4919,4154,1556,3409,474,2959,175,2048,367,6107,4647
3,1793633,1940265,2217442,2012280,2237100,2647147,2145006,2432433,3058005,654314,2255241,1802845,2185087
21,4560600,1556950,4336777,4050797,3405823,1517866,2302922,1137778,2461095,3612185,4245438,2396084,3411629
32,1562128,3894547,603039,928768,2403704,3091508,2799151,3534769,2353296,2744122,567814,3085437,2476514
82,2496666,1335045,1625899,1432683,1079304,702806,1171972,1030311,637700,2659143,1564188,1467687,1133100
84,2470233,1329233,2380086,2047779,1417909,1832944,1264476,1597095,2762748,510237,2314202,884909,1360473
99,2592952,367944,2015840,1849915,1356222,362717,716323,117681,1167221,1555826,1962846,780908,1359252
106,1366589,1520003,1446383,1138235,1463721,1825800,1395620,1647828,2139295,262479,1340869,1109929,1420341
114,1317148,1007905,851528,681453,360702,820666,506703,935616,1048993,573485,817808,451453,331307
124,970021,442091,715951,659575,595896,286480,442378,304162,346193,845696,697945,483005,599142
130,989744,1483227,1310654,1106284,1675135,2003865,1613307,1842285,2015141,463765,1289548,1353929,1635751


In [27]:
# save new OD as .csv = input into optimization
OD_df_12_hr.to_csv(os.path.join(pth,'OD_df_12_hr.csv'))