# Importing shapefiles into GOSTnets

This script is experimenting with converting shapefiles to G objects. The solution works because the input shapefile is derived from an existing OSM file and comes with from and to nodes.

In [2]:
import os, sys, importlib

import pandas as pd
import geopandas as gpd
import networkx as nx

from shapely.wkt import loads
from shapely.geometry import Point
from random import sample 

## Your GOSTnets path could be different
#sys.path.append(r"C:\Work\Code\GOSTnets")
sys.path.append("../../../GOSTNets")
import GOSTnets as gn

In [3]:
# Read in input shapefile and convert to G object
input_shapefile = "cxb_roads_edit.shp"

inD = gpd.read_file(input_shapefile)
G = nx.from_pandas_edgelist(inD, edge_attr=['length', 'infra_type', "osm_id", 'geometry'], source="stnode", target="endnode")
G = G.to_directed()

In [4]:
# the nodes in the dataset do not have coordinates, so let's fix that
edges = list(G.edges(data=True))
nodes = G.nodes(data=True)
all_nodes = []
# Loop through all the nodes to extract their coordinates from the nodes
for n in nodes:
    # For the current node, loop through the edges until we find an edge witht he current node
    found_node = False
    edge_count = 0
    while not found_node:
        e = edges[edge_count]
        edge_count = edge_count + 1        
        # if the current node is part of the current edge, we can extract the coordinate        
        if n[0] in e: 
            found_node = True
            # The coordinate for the node is either the first or final coordinate of the current edge            
            pt_idx = 0
            if e.index(n[0]) == 1:
                pt_idx = -1
            #Extract the appropriate point and store the new node
            pt = list(e[2]['geometry'].coords)[pt_idx]
            node_vals = {'x':pt[0], 'y':pt[1]}
            all_nodes.append([n[0], node_vals])
            #G.remove_node(n[0])
            #G.add_node(n[0], **node_vals)
G.update(nodes=all_nodes)

In [5]:
# inspect the resulting Graph
nodes = list(G.nodes(data=True))
edges = list(G.edges(data=True))
print(len(nodes))
print(nodes[0])
print(len(edges))
print(edges[0])

7922
(0, {'x': 92.3205152, 'y': 20.827246})
18047
(0, 4690, {'length': 0.128829058995489, 'infra_type': 'unclassified', 'osm_id': '644427970', 'geometry': <shapely.geometry.linestring.LineString object at 0x130e34310>})


In [7]:
print('print G before')
print(G.size())
# save the largest subgraph for networkX 2.4
largest = max(nx.strongly_connected_components(G), key=len)
G = nx.induced_subgraph(G,largest).copy()
print('print G after')
print(G.size())

print G before
18047
print G after
11843


In [8]:
# Identify only the largest graph
'''
list_of_subgraphs = list(nx.strongly_connected_component_subgraphs(G))
max_graph = None
max_edges = 0
for i in list_of_subgraphs:
    if i.number_of_edges() > max_edges:
        max_edges = i.number_of_edges()
        max_graph = i
G = max_graph
'''

'\nlist_of_subgraphs = list(nx.strongly_connected_component_subgraphs(G))\nmax_graph = None\nmax_edges = 0\nfor i in list_of_subgraphs:\n    if i.number_of_edges() > max_edges:\n        max_edges = i.number_of_edges()\n        max_graph = i\n'

In [9]:
# inspect the resulting Graph
nodes = list(G.nodes(data=True))
edges = list(G.edges(data=True))
print(len(nodes))
print(nodes[0])
print(len(edges))
print(edges[0])

4993
(0, {'x': 92.3205152, 'y': 20.827246})
11843
(0, 4690, {'length': 0.128829058995489, 'infra_type': 'unclassified', 'osm_id': '644427970', 'geometry': <shapely.geometry.linestring.LineString object at 0x130e34310>})


In [36]:
# read in origins and destinations
input_folder = r"C:\Work\Code\GOST_PublicGoods\Implementations\RobertBanick_ShapefileTToG\SampleData"
origins_file = os.path.join(input_folder, 'hrsl_pts_admins.csv')
destinations_file  = os.path.join(input_folder, 'education_cxb_lged.shp')

inO = pd.read_csv(origins_file)
inD = gpd.read_file(destinations_file)
inD = inD.to_crs({'init': 'epsg:4326'})

In [37]:
#Convert the input origins to a POINT geodaframe
inO_geom = [loads(x) for x in inO['geometry']]
inO_geom = [Point(x[0].x, x[0].y) for x in inO_geom]

inO = gpd.GeoDataFrame(inO.drop(['geometry'], axis=1), geometry = inO_geom, crs = {'init': 'epsg:4326'})
# sample origins for testing
inO = inO.loc[sample(list(inO.index), 1000)]

In [38]:
print(inO.shape)
print(inO.crs)
print(inD.shape)
print(inD.crs)

(1000, 8)
{'init': 'epsg:4326'}
(788, 20)
{'init': 'epsg:4326'}


Unnamed: 0,hrsl_ID,VALUE,Upaz_name,Upz_UID,index_right,ADM4_EN,ADM4_PCODE,geometry
118877,120538,38.083674,Teknaf Upazila,202290,4693,Teknaf Paurashava,20229099,POINT (92.30028 20.86472)
97928,96110,35.005145,Cox's Bazar Sadar Upazila,202224,2295,Jhilwanja,20222447,POINT (92.03694 21.42944)
108805,108595,26.462021,Ukhia Upazila,202294,4081,Ratna Palong,20229463,POINT (92.16694 21.26833)
103630,103215,31.045279,Ramu Upazila,202266,2755,Khuniapalong,20226657,POINT (92.08639 21.34444)
103750,103387,31.045279,Ramu Upazila,202266,2755,Khuniapalong,20226657,POINT (92.09472 21.34167)


In [39]:
origins = gn.pandana_snap(G, inO)
destinations = gn.pandana_snap(G, inD, source_crs=inD.crs['init'])
oNodes = origins['NN'].unique()
dNodes = destinations['NN'].unique()

  G_tree = spatial.KDTree(node_gdf[['x','y']].as_matrix())
  distances, indices = G_tree.query(in_df[['x','y']].as_matrix())


In [40]:
od = gn.calculate_OD(G, oNodes, dNodes, -1)
od.shape

In [None]:
'''# Write the largest graph back to file
all_data = {}
idx=0
for o, d, data in G.edges(data=True):
    data['o'] = o
    data['d'] = d
    all_data[idx] = data    
    idx = idx+1
big_graph = gpd.GeoDataFrame(all_data).transpose()
big_graph.to_csv(input_shapefile.replace(".shp", "_big_boy.csv"))
big_graph.head()'''