In [6]:
import geopandas as gpd
import os, sys, time
import pandas as pd
import numpy as np
from osgeo import ogr
from rtree import index
from shapely import speedups
import networkx as nx
import shapely.ops
from shapely.geometry import LineString, MultiLineString, MultiPoint, Point
from geopy.distance import vincenty
from boltons.iterutils import pairwise
import matplotlib.pyplot as plt
from shapely.wkt import loads,dumps

data_path = r'C:\Users\charl\Documents\GOST\NetClean'

In [18]:
def load_osm_data(data_path,country):
    
    osm_path = os.path.join(data_path,'{}.osm.pbf'.format(country))

    driver=ogr.GetDriverByName('OSM')
    return driver.Open(osm_path)

def fetch_roads(data_path, country):

    data = load_osm_data(data_path,country)
    
    sql_lyr = data.ExecuteSQL("SELECT osm_id,highway FROM lines WHERE highway IS NOT NULL")
    
    roads=[]                          
    for feature in sql_lyr:
        if feature.GetField('highway') is not None:
            osm_id = feature.GetField('osm_id')
            shapely_geo = loads(feature.geometry().ExportToWkt()) 
            if shapely_geo is None:
                continue
            highway=feature.GetField('highway')
            roads.append([osm_id,highway,shapely_geo])
    
    if len(roads) > 0:
        road_gdf = gpd.GeoDataFrame(roads,columns=['osm_id','infra_type','geometry'],crs={'init': 'epsg:4326'})
        if 'residential' in road_gdf.infra_type.unique():
            print('residential included')
        else:
            print('residential excluded')
        return road_gdf
    else:
        print('No roads in {}'.format(country))
        
def line_length(line, ellipsoid='WGS-84'):
    """Length of a line in meters, given in geographic coordinates

    Adapted from https://gis.stackexchange.com/questions/4022/looking-for-a-pythonic-way-to-calculate-the-length-of-a-wkt-linestring#answer-115285

    Arguments:
        line {Shapely LineString} -- a shapely LineString object with WGS-84 coordinates
        ellipsoid {String} -- string name of an ellipsoid that `geopy` understands (see
            http://geopy.readthedocs.io/en/latest/#module-geopy.distance)

    Returns:
        Length of line in meters
    """
    if line.geometryType() == 'MultiLineString':
        return sum(line_length(segment) for segment in line)

    return sum(
                vincenty(tuple(reversed(a)), tuple(reversed(b)), ellipsoid=ellipsoid).kilometers
                for a, b in pairwise(line.coords)
    )

In [19]:
def get_all_intersections(shape_input):
    # =============================================================================
    #         # Initialize Rtree
    # =============================================================================
    idx_inters = index.Index()

    # =============================================================================
    #         # Load data    
    # =============================================================================
    all_data = dict(zip(list(shape_input.osm_id),list(shape_input.geometry)))
    idx_osm = shape_input.sindex


    # =============================================================================
    #         # Find all the intersecting lines to prepare for cutting
    # =============================================================================
    count = 0
    inters_done = {}
    new_lines = []
    for key1, line in all_data.items():

        infra_line = shape_input.at[shape_input.index[shape_input['osm_id']==key1].tolist()[0],'infra_type']

        intersections = shape_input.iloc[list(idx_osm.intersection(line.bounds))]
        intersections = dict(zip(list(intersections.osm_id),list(intersections.geometry)))

        # Remove line1
        if key1 in intersections: intersections.pop(key1)

        # Find intersecting lines
        for key2,line2 in intersections.items():
            # Check that this intersection has not been recorded already
            if (key1, key2) in inters_done or (key2, key1) in inters_done:
                continue

            # Record that this intersection was saved
            inters_done[(key1, key2)] = True

            # Get intersection
            if line.intersects(line2):
                # Get intersection
                inter = line.intersection(line2)

                # Save intersecting point
                if "Point" == inter.type:
                    idx_inters.insert(0, inter.bounds, inter)
                    count += 1

                elif "MultiPoint" == inter.type:
                    for pt in inter:
                        idx_inters.insert(0, pt.bounds, pt)
                        count += 1

    ## =============================================================================
    ##         # cut lines where necessary and save all new linestrings to a list 
    ## =============================================================================
        hits = [n.object for n in idx_inters.intersection(line.bounds, objects=True)]

        if len(hits) != 0:
    #            try:
            out = shapely.ops.split(line, MultiPoint(hits))
            new_lines.append([{'geometry': LineString(x), 'osm_id':key1,'infra_type':infra_line} for x in out.geoms])
    #            except:
    #                new_lines.append([{'geometry': line, 'osm_id':key1,
    #                    infra_type:infra_line}])
        else:
            new_lines.append([{'geometry': line, 'osm_id':key1,
                    'infra_type':infra_line}])

    # Create one big list and treat all the cutted lines as unique lines    
    flat_list = []
    all_data = {}

    #item for sublist in new_lines for item in sublist
    i = 1
    for sublist in new_lines:
        if sublist is not None:
            for item in sublist:
                item['id'] = i
                flat_list.append(item)
                i += 1
                all_data[i] = item

    # =============================================================================
    #          # Transform into geodataframe and add coordinate system        
    # =============================================================================
    full_gpd = gpd.GeoDataFrame(flat_list,geometry ='geometry')
    full_gpd['country'] = country
    full_gpd.crs = {'init' :'epsg:4326'}
    return full_gpd

def get_nodes(x):
    return list(x.geometry.coords)[0],list(x.geometry.coords)[-1]

In [21]:
%%time
destfolder = r'C:\Users\charl\Documents\GOST\NetClean\processed'
country = 'YEM'
roads_raw = fetch_roads(data_path,country)

accepted_road_types = ['primary',
                      'primary_link',
                      'motorway',
                      'motorway_link'
                      'secondary',
                      'secondary_link',
                      'tertiary',
                      'tertiary_link',
                      'trunk',
                      'trunk_link',
                      'residential',
                      'unclassified',
                      'road',
                      'track',
                      'service',
                      'services'
                      ]

roads_raw = roads_raw.loc[roads_raw.infra_type.isin(accepted_road_types)]

roads = get_all_intersections(roads_raw)
roads['key'] = ['edge_'+str(x+1) for x in range(len(roads))]
np.arange(1,len(roads)+1,1)

nodes = gpd.GeoDataFrame(roads.apply(lambda x: get_nodes(x),axis=1).apply(pd.Series))
nodes.columns = ['u','v']

roads['length'] = roads.geometry.apply(lambda x : line_length(x))

#G = ox.gdfs_to_graph(all_nodes,roads)
roads.rename(columns={'geometry':'Wkt'}, inplace=True)

roads = pd.concat([roads,nodes],axis=1)
roads.to_csv(os.path.join(destfolder, '%s_combo.csv' % country))

residential included




Wall time: 8min 42s


In [25]:
roads.infra_type.value_counts()

residential       77687
unclassified      28641
track             13696
tertiary           9521
secondary          6477
primary            6329
trunk              5193
service            2725
road                549
trunk_link          331
secondary_link      271
primary_link        271
tertiary_link       156
services              1
Name: infra_type, dtype: int64