# Horn of Africa initial basic accessibility

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys, os, inspect, logging, importlib

import geopandas as gpd
import pandas as pd
import numpy as np
import osmnx as ox
import networkx as nx
from shapely.ops import split, unary_union
from shapely.geometry import box, Point

import matplotlib.pyplot as plt

In [3]:
# Get reference to GOSTNets
sys.path.append(r'../../../../GOSTnets')
import GOSTnets as gn

In [4]:
from GOSTnets.load_osm import *

## Create road network
Countries for this analysis include Ethiopia, Kenya, Somalia, Djibouti, South Sudan, and Sudan. Initially the OSM pbf files were downloaded from Geofabrik for these countries then merged together using the Osmium command line tool. Unfortunately this resulted in the all the roads being able to be imported. Instead the whole OSM Planet file was downloaded, then Osmium was used to extract only roads within the horn of africa spatial extent.

In [101]:
f = r'D:\data\planet-210614-clipped-highways2.osm.pbf'

In [102]:
# create OSM_to_network object from the load_osm GOSTnets sub-module
horn_of_africa = OSM_to_network(f)

  return _prepare_from_string(" ".join(pjargs))


In [120]:
# show the different highway types and counts
horn_of_africa.roads_raw.infra_type.value_counts()

residential       887026
path              323267
unclassified      284615
track             278295
service            53861
tertiary           22731
footway            14141
secondary          10893
primary             7649
trunk               5882
road                1809
trunk_link          1016
primary_link         916
secondary_link       780
living_street        589
construction         524
tertiary_link        478
pedestrian           439
steps                188
motorway             115
motorway_link         83
bridleway             55
cycleway              42
yes                    7
platform               6
proposed               3
raceway                3
corridor               2
rural road             1
bus_guideway           1
rest_area              1
d                      1
Name: infra_type, dtype: int64

## Decided to filter the primary roads

In [119]:
accepted_road_types = ['tertiary','road','secondary','primary','trunk','primary_link','trunk_link','tertiary_link','secondary_link']

In [121]:
horn_of_africa.filterRoads(acceptedRoads = accepted_road_types)

In [122]:
horn_of_africa.roads_raw.infra_type.value_counts()

tertiary          22731
secondary         10893
primary            7649
trunk              5882
road               1809
trunk_link         1016
primary_link        916
secondary_link      780
tertiary_link       478
Name: infra_type, dtype: int64

In [123]:
# load_osm GOSTnets sub-module intermediate step
horn_of_africa.generateRoadsGDF(verbose = False)

In [124]:
# load_osm GOSTnets sub-module final step, creates the graph
horn_of_africa.initialReadIn()

<networkx.classes.multidigraph.MultiDiGraph at 0x20ae55931f0>

In [125]:
print(nx.info(horn_of_africa.network))

Name: 
Type: MultiDiGraph
Number of nodes: 62233
Number of edges: 75760
Average in degree:   1.2174
Average out degree:   1.2174


### save the graph, this creates a pickle which can be imported layer, it also saves the nodes and the edges as CSVs, which can be opened in QGIS

In [2]:
gn.save(horn_of_africa.network,"horn_of_africa_unclean4",r"temp")

In [128]:
# open horn_of_africa_unclean
G = nx.read_gpickle(os.path.join(r'temp', 'horn_of_africa_unclean4.pickle'))

In [None]:
horn_of_africa_UTMZ = {'init': 'epsg:32638'}

WGS = {'init': 'epsg:4326'} # do not adjust. OSM natively comes in ESPG 4326

### clean network processes, including simplifying edges between intersections

In [129]:
print('start: %s\n' % time.ctime())
G_clean = gn.clean_network(G, UTM = horn_of_africa_UTMZ, WGS = {'init': 'epsg:4326'}, junctdist = 10, verbose = False)

print('\nend: %s' % time.ctime())
print('\n--- processing complete')

start: Tue Jun 22 23:02:12 2021



  return _prepare_from_string(" ".join(pjargs))
  return _prepare_from_string(" ".join(pjargs))

  juncs_gdf_unproj['centroid'] = juncs_gdf_unproj.centroid
Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: +init=epsg:4326 +type=crs
Right CRS: EPSG:4326

  juncs_gdf_bound = gpd.sjoin(juncs_gdf_unproj, gdfnodes, how='left', op='intersects', lsuffix='left', rsuffix='right')


63499
118068
32256
63614
Edge reduction: 75760 to 63614 (16 percent)

end: Tue Jun 22 23:10:11 2021

--- processing complete


In [131]:
# let's print info on our clean version
print(nx.info(G_clean))

Name: 
Type: MultiDiGraph
Number of nodes: 22629
Number of edges: 63614
Average in degree:   2.8112
Average out degree:   2.8112


In [132]:
# save and inspect graph
gn.save(G_clean,"G_cleaned_horn_of_africa4",r"temp")

### Identify only the largest graph

In [133]:
# compatible with NetworkX 2.4
list_of_subgraphs = list(G_clean.subgraph(c).copy() for c in nx.strongly_connected_components(G_clean))
max_graph = None
max_edges = 0
for i in list_of_subgraphs:
    if i.number_of_edges() > max_edges:
        max_edges = i.number_of_edges()
        max_graph = i

# set your graph equal to the largest sub-graph
G_largest = max_graph

In [134]:
# print info about the largest sub-graph
print(nx.info(G_largest))

Name: 
Type: MultiDiGraph
Number of nodes: 20005
Number of edges: 59515
Average in degree:   2.9750
Average out degree:   2.9750


In [135]:
# save and inspect graph
gn.save(G_largest,"G_largest_cleaned_horn_of_africa4",r"temp")

In [5]:
# load graph
G = nx.read_gpickle(os.path.join(r'temp', 'G_largest_cleaned_horn_of_africa4.pickle'))

## insert origins
Origins are from the GHS SMOD rural categories, and they were converted to vector points in QGIS.

In [8]:
origins = gpd.read_file(r'C:\Users\war-machine\Documents\world_bank_work\horn_of_africa_analysis\horn_of_africa_ghs_rural_pts_4326.shp')

In [9]:
origins['osmid'] = 1110000000 + origins.index
origins

Unnamed: 0,VALUE,geometry,osmid
0,11.0,POINT (35.60569 23.13432),1110000000
1,11.0,POINT (35.61628 23.13432),1110000001
2,11.0,POINT (35.59510 23.12619),1110000002
3,11.0,POINT (35.60569 23.12619),1110000003
4,11.0,POINT (35.61628 23.12619),1110000004
...,...,...,...
4783432,11.0,POINT (42.68058 -0.19391),1114783432
4783433,11.0,POINT (42.69117 -0.19391),1114783433
4783434,11.0,POINT (42.66999 -0.20204),1114783434
4783435,11.0,POINT (42.68058 -0.20204),1114783435


In [10]:
# find graph utm zone
G_utm = gn.utm_of_graph(G)
G_utm

'+proj=utm +zone=37 +ellps=WGS84 +datum=WGS84 +units=m +no_defs'

### snap origins to the graph (snaps to the nearest node on the graph)

In [16]:
%%time
#no need to do advanced_snap at this extent
#G2, pois_meter, new_footway_edges = gn.advanced_snap(G_largest, origins, u_tag = 'stnode', v_tag = 'endnode', node_key_col='node_ID', poi_key_col='osmid', path=None, threshold=2000, measure_crs=G_utm)
snapped_origins = gn.pandana_snap(G, origins, source_crs = 'epsg:4326', target_crs = G_utm)

Wall time: 3min 30s


In [37]:
snapped_origins

Unnamed: 0,VALUE,geometry,osmid,NN,NN_dist
0,11.0,POINT (35.60569 23.13432),1110000000,27063,7843.276336
1,11.0,POINT (35.61628 23.13432),1110000001,27063,8699.106774
2,11.0,POINT (35.59510 23.12619),1110000002,27063,6432.213991
3,11.0,POINT (35.60569 23.12619),1110000003,27063,7291.942411
4,11.0,POINT (35.61628 23.12619),1110000004,27063,8205.536694
...,...,...,...,...,...
4783432,11.0,POINT (42.68058 -0.19391),1114783432,37083,8386.955454
4783433,11.0,POINT (42.69117 -0.19391),1114783433,37083,9475.078814
4783434,11.0,POINT (42.66999 -0.20204),1114783434,37083,7793.697682
4783435,11.0,POINT (42.68058 -0.20204),1114783435,37083,8797.060067


In [23]:
originNodes = list(snapped_origins['NN'].unique())

## Snap destinations to the road graph
Destinations are centroids created in QGIS of the GHS urban centers

In [18]:
# insert destinations
destinations = gpd.read_file(r'C:\Users\war-machine\Documents\world_bank_work\horn_of_africa_analysis\horn_of_africa_ghs_stat_centroids2.shp')

In [19]:
destinations

Unnamed: 0,fid,ID_HDC_G0,QA2_1V,AREA,BBX_LATMN,BBX_LONMN,BBX_LATMX,BBX_LONMX,GCPNT_LAT,GCPNT_LON,...,EX_SS_P00,EX_SS_P15,EX_EQ19PGA,EX_EQ19MMI,EX_EQ19_Q,EX_HW_IDX,SDG_LUE901,SDG_A2G14,SDG_OS15MX,geometry
0,3475.0,3475.0,1.0,27.0,13.415981,22.420601,13.473200,22.482918,13.441260,22.448188,...,0.0,0.0,0.000000,0.0,available,6.77122,24.7727,0.006136,64.07,POINT (22.44819 13.44126)
1,3478.0,3478.0,2.0,3.0,13.800267,22.493902,13.816625,22.514710,13.807082,22.502775,...,0.0,0.0,0.000000,0.0,available,7.84638,-29.389,0.000000,,POINT (22.50278 13.80708)
2,3484.0,3484.0,1.0,9.0,12.109492,22.581877,12.142124,22.613938,12.122635,22.599989,...,0.0,0.0,0.000000,0.0,available,10.27490,-5.2497,0.000000,78.56,POINT (22.59999 12.12264)
3,3487.0,3487.0,1.0,10.0,15.011973,22.783617,15.044757,22.825476,15.026726,22.801921,...,0.0,0.0,0.000000,0.0,available,5.91611,0.52879,0.000000,,POINT (22.80192 15.02673)
4,3494.0,3494.0,1.0,3.0,12.942079,22.870255,12.966583,22.881783,12.954331,22.876018,...,0.0,0.0,0.000000,0.0,available,8.35383,-16.2193,0.000000,,POINT (22.87602 12.95433)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
831,5839.0,5839.0,0.0,94.0,4.360873,47.389311,4.506618,47.512125,4.428748,47.448015,...,0.0,0.0,0.000000,0.0,available,5.27807,0.014611,0.685121,,POINT (47.44802 4.42875)
832,5849.0,5849.0,2.0,4.0,10.707730,48.324559,10.724015,48.346337,10.715872,48.335448,...,0.0,0.0,0.022302,3.0,available,10.93800,0.11286,0.000000,,POINT (48.33545 10.71587)
833,5861.0,5861.0,1.0,9.0,8.390332,48.463480,8.414696,48.505537,8.402514,48.486182,...,0.0,0.0,0.000000,0.0,available,7.56884,1.8773,0.000000,54.22,POINT (48.48618 8.40251)
834,5871.0,5871.0,1.0,16.0,11.245304,49.159866,11.294193,49.201985,11.275350,49.184212,...,0.0,0.0,0.033539,3.0,available,5.40898,0.05113,0.000000,62.94,POINT (49.18421 11.27535)


In [20]:
snapped_destinations = gn.pandana_snap(G, destinations, source_crs = 'epsg:4326', target_crs = G_utm)

In [21]:
snapped_destinations

Unnamed: 0,fid,ID_HDC_G0,QA2_1V,AREA,BBX_LATMN,BBX_LONMN,BBX_LATMX,BBX_LONMX,GCPNT_LAT,GCPNT_LON,...,EX_EQ19PGA,EX_EQ19MMI,EX_EQ19_Q,EX_HW_IDX,SDG_LUE901,SDG_A2G14,SDG_OS15MX,geometry,NN,NN_dist
0,3475.0,3475.0,1.0,27.0,13.415981,22.420601,13.473200,22.482918,13.441260,22.448188,...,0.000000,0.0,available,6.77122,24.7727,0.006136,64.07,POINT (22.44819 13.44126),28040,516.533804
1,3478.0,3478.0,2.0,3.0,13.800267,22.493902,13.816625,22.514710,13.807082,22.502775,...,0.000000,0.0,available,7.84638,-29.389,0.000000,,POINT (22.50278 13.80708),13220,38206.998667
2,3484.0,3484.0,1.0,9.0,12.109492,22.581877,12.142124,22.613938,12.122635,22.599989,...,0.000000,0.0,available,10.27490,-5.2497,0.000000,78.56,POINT (22.59999 12.12264),10421,102509.080601
3,3487.0,3487.0,1.0,10.0,15.011973,22.783617,15.044757,22.825476,15.026726,22.801921,...,0.000000,0.0,available,5.91611,0.52879,0.000000,,POINT (22.80192 15.02673),13494,62421.607169
4,3494.0,3494.0,1.0,3.0,12.942079,22.870255,12.966583,22.881783,12.954331,22.876018,...,0.000000,0.0,available,8.35383,-16.2193,0.000000,,POINT (22.87602 12.95433),10421,2424.978433
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
831,5839.0,5839.0,0.0,94.0,4.360873,47.389311,4.506618,47.512125,4.428748,47.448015,...,0.000000,0.0,available,5.27807,0.014611,0.685121,,POINT (47.44802 4.42875),62063,20278.871522
832,5849.0,5849.0,2.0,4.0,10.707730,48.324559,10.724015,48.346337,10.715872,48.335448,...,0.022302,3.0,available,10.93800,0.11286,0.000000,,POINT (48.33545 10.71587),39080,2648.297654
833,5861.0,5861.0,1.0,9.0,8.390332,48.463480,8.414696,48.505537,8.402514,48.486182,...,0.000000,0.0,available,7.56884,1.8773,0.000000,54.22,POINT (48.48618 8.40251),41047,537.654964
834,5871.0,5871.0,1.0,16.0,11.245304,49.159866,11.294193,49.201985,11.275350,49.184212,...,0.033539,3.0,available,5.40898,0.05113,0.000000,62.94,POINT (49.18421 11.27535),6606,114.766162


In [22]:
destinationNodes = list(snapped_destinations['NN'].unique())

In [24]:
destinationNodes

[28040,
 13220,
 10421,
 13494,
 10940,
 48865,
 60633,
 33675,
 49667,
 39514,
 10327,
 9601,
 52916,
 46902,
 57631,
 19316,
 6938,
 'new_obj_5581',
 10079,
 35250,
 8813,
 15928,
 6544,
 7575,
 28196,
 50687,
 16780,
 38408,
 30587,
 28066,
 45109,
 36815,
 10598,
 13053,
 2793,
 57718,
 8405,
 52408,
 46293,
 4873,
 48307,
 38283,
 11832,
 49645,
 39788,
 19407,
 34007,
 27173,
 1960,
 33094,
 40755,
 17486,
 58209,
 39230,
 5861,
 28683,
 'new_obj_5660',
 25439,
 30910,
 26893,
 'new_obj_4559',
 31069,
 48507,
 18954,
 2343,
 45875,
 46519,
 23287,
 32893,
 49009,
 'new_obj_4103',
 3127,
 31465,
 6604,
 34624,
 41382,
 56216,
 46719,
 24271,
 61673,
 35050,
 'new_obj_3971',
 57084,
 48860,
 13124,
 52792,
 22063,
 34675,
 13648,
 10080,
 51005,
 10333,
 20535,
 55227,
 61749,
 55438,
 46183,
 19355,
 45940,
 49970,
 60869,
 4218,
 8975,
 49167,
 54089,
 35856,
 58860,
 7600,
 13753,
 43047,
 28381,
 'new_obj_4686',
 45006,
 32733,
 18462,
 26144,
 16452,
 37275,
 'new_obj_4585',
 

In [46]:
gn.example_edge(G)

(0, 1931, {'Wkt': 'LINESTRING (39.8515588 -3.6338911, 39.8518512 -3.6339058, 39.8521341 -3.6339205, 39.8528878 -3.6339553, 39.853549 -3.6339901, 39.854587 -3.6340463, 39.8550886 -3.6340758, 39.8554708 -3.6340972, 39.8556653 -3.6340985, 39.8558651 -3.6340912)', 'id': 58914, 'infra_type': 'tertiary', 'osm_id': '674344730', 'key': 'edge_58914', 'length': 0.4790331899592404, 'Type': 'legitimate'})


### add time to the graph edges as an attribute

In [72]:
# note that the length above is in km, therefore set factor to 1000
G = gn.convert_network_to_time(G, 'length', road_col = 'infra_type', factor = 1000)

In [73]:
gn.example_edge(G)

(0, 1931, {'Wkt': 'LINESTRING (39.8515588 -3.6338911, 39.8518512 -3.6339058, 39.8521341 -3.6339205, 39.8528878 -3.6339553, 39.853549 -3.6339901, 39.854587 -3.6340463, 39.8550886 -3.6340758, 39.8554708 -3.6340972, 39.8556653 -3.6340985, 39.8558651 -3.6340912)', 'id': 58914, 'infra_type': 'tertiary', 'osm_id': '674344730', 'key': 'edge_58914', 'length': 479.03318995924036, 'Type': 'legitimate', 'time': 57.48398279510884, 'mode': 'drive'})


## calculate OD matrix

In [74]:
OD_matrix = gn.calculate_OD(G, originNodes, destinationNodes, fail_value=-1, weight='time')

In [75]:
avg_trip_time = np.mean(OD_matrix)

In [88]:
avg_trip_time

145201.97663007074

In [77]:
OD_matrix

array([[250407.70343749, 249496.96092042, 250729.15934233, ...,
        307980.38158554, 345390.67455627, 333099.1385897 ],
       [235775.36211456, 234864.61959749, 236096.8180194 , ...,
        293348.04026261, 330758.33323334, 318466.79726677],
       [235776.16598492, 234865.42346786, 236097.62188976, ...,
        293348.84413298, 330759.13710371, 318467.60113713],
       ...,
       [388785.37908259, 387874.63656552, 389106.83498743, ...,
        121208.66296477, 160696.58331995, 146327.41996893],
       [389242.54590987, 388331.80339281, 389564.00181471, ...,
        121665.82979205, 161153.75014723, 146784.58679621],
       [390832.41654727, 389921.6740302 , 391153.87245211, ...,
        123255.70042945, 162743.62078462, 148374.45743361]])

## calculate accessibility
### For each row, the closest facility is the smallest value in the row

In [78]:
closest_facility_per_origin = OD_matrix.min(axis=1)

In [79]:
results = pd.DataFrame([originNodes, closest_facility_per_origin]).transpose()

In [80]:
colName = "travel_time_to_closest_facility"
results.columns = ['NN', colName]

In [81]:
results[:5]

Unnamed: 0,NN,travel_time_to_closest_facility
0,27063.0,42233.268804
1,59426.0,27600.927481
2,41774.0,27601.731351
3,59244.0,43517.109667
4,33687.0,27598.353442


In [82]:
snapped_origins

Unnamed: 0,VALUE,geometry,osmid,NN,NN_dist
0,11.0,POINT (35.60569 23.13432),1110000000,27063,7843.276336
1,11.0,POINT (35.61628 23.13432),1110000001,27063,8699.106774
2,11.0,POINT (35.59510 23.12619),1110000002,27063,6432.213991
3,11.0,POINT (35.60569 23.12619),1110000003,27063,7291.942411
4,11.0,POINT (35.61628 23.12619),1110000004,27063,8205.536694
...,...,...,...,...,...
4783432,11.0,POINT (42.68058 -0.19391),1114783432,37083,8386.955454
4783433,11.0,POINT (42.69117 -0.19391),1114783433,37083,9475.078814
4783434,11.0,POINT (42.66999 -0.20204),1114783434,37083,7793.697682
4783435,11.0,POINT (42.68058 -0.20204),1114783435,37083,8797.060067


In [83]:
output2 = pd.merge(snapped_origins, results, on="NN")

In [84]:
output2[:5]

Unnamed: 0,VALUE,geometry,osmid,NN,NN_dist,travel_time_to_closest_facility
0,11.0,POINT (35.60569 23.13432),1110000000,27063,7843.276336,42233.268804
1,11.0,POINT (35.61628 23.13432),1110000001,27063,8699.106774,42233.268804
2,11.0,POINT (35.59510 23.12619),1110000002,27063,6432.213991,42233.268804
3,11.0,POINT (35.60569 23.12619),1110000003,27063,7291.942411,42233.268804
4,11.0,POINT (35.61628 23.12619),1110000004,27063,8205.536694,42233.268804


In [85]:
output2['travel_time_to_closest_facility'] = output2['travel_time_to_closest_facility'].astype('int64')

In [86]:
output2.dtypes

VALUE                               float64
geometry                           geometry
osmid                                 int64
NN                                   object
NN_dist                             float64
travel_time_to_closest_facility       int64
dtype: object

## save a shapefile..
### Then in QGIS it can be opened and symbolized based on choosing a colored ramp based on the travel_time_to_closest_facility attribute

In [87]:
destinations_gpd = gpd.GeoDataFrame(output2, crs = "epsg:4326", geometry = 'geometry')
destinations_gpd.to_file('horn_of_africa_rural_accessibility3.shp')

  destinations_gpd.to_file('horn_of_africa_rural_accessibility3.shp')
