In [5]:
import geopandas as gpd
import os, sys, time
import pandas as pd
sys.path.append(r'C:\Users\charl\Documents\GitHub\GOST_PublicGoods\GOSTNets\GOSTNets')
import GOSTnet as gn
import importlib
importlib.reload(gn)
import networkx as nx
import osmnx as ox
importlib.reload(ox)
from shapely.ops import unary_union
from shapely.wkt import loads
from shapely.geometry import LineString, MultiLineString, Point

peartree version: 0.6.0 
networkx version: 2.2 
matplotlib version: 2.2.2 
osmnx version: 0.8.2 


In [6]:
def InitialReadIn(fpath, country):
    
    ffile = r'%s_combo.csv' % country
    
    edges_1 = pd.read_csv(os.path.join(fpath, 'processed', ffile))

    edges = edges_1.copy()

    node_bunch = list(set(list(edges['u']) + list(edges['v'])))

    def convert(x):
        u = x.u
        v = x.v
        data = {'Wkt':loads(x.Wkt),
               'id':x.id,
               'infra_type':x.infra_type, 
               'osm_id':x.osm_id,
               'country': x.country,
               'key': x.key, 
               'length':x.length}

        return (u, v, data)

    edge_bunch = edges.apply(lambda x: convert(x), axis = 1).tolist()

    G = nx.MultiDiGraph()

    G.add_nodes_from(node_bunch)
    G.add_edges_from(edge_bunch)

    for u, data in G.nodes(data = True):
        q = tuple(float(x) for x in u[1:-1].split(','))
        data['x'] = q[0]
        data['y'] = q[1]

    G = nx.convert_node_labels_to_integers(G)

    gdfnodes = gn.node_gdf_from_graph(G)
    gdfnodes.to_csv(os.path.join(wpath, '%s_pre_processing_nodes.csv' % country))
    gdfedges = gn.edge_gdf_from_graph(G, geom_col = 'Wkt')
    gdfedges.to_csv(os.path.join(wpath, '%s_pre_processing_edges.csv' % country))
    
    print('These two should equal: A) length of final df: %s | B) length of original df: %s' % (len(gdfedges), len(edges_1)))
    
    return G

### Full Process

In [7]:
def CleanNetwork(G, wpath, country, UTM, WGS = {'init': 'epsg:4326'}, junctdist = 50, verbose = False):
    
    # Squeezes clusters of nodes down to a single node if they are within the snapping tolerance
    a = gn.simplify_junctions(G, UTM, WGS, junctdist)

    # ensures all streets are two-way
    a = gn.add_missing_reflected_edges(a)
    
    #save progress
    if verbose is True: 
        gn.save(a, 'a', wpath)
    
    # Finds and deletes interstital nodes based on node degree
    b = gn.custom_simplify(a)
    
    # rectify geometry
    for u, v, data in b.edges(data = True):
        if type(data['Wkt']) == list:
                data['Wkt'] = gn.unbundle_geometry(data['Wkt'])
    
    # save progress
    if verbose is True: 
        gn.save(b, 'b', wpath)
    
    # For some reason CustomSimplify doesn't return a MultiDiGraph. Fix that here
    c = gn.convert_to_MultiDiGraph(b)

    # This is the most controversial function - removes duplicated edges. This takes care of two-lane but separate highways, BUT
    # destroys internal loops within roads. Can be run with or without this line
    c = gn.remove_duplicate_edges(c)

    # Run this again after removing duplicated edges
    c = gn.custom_simplify(c)

    # Ensure all remaining edges are duplicated (two-way streets)
    c = gn.add_missing_reflected_edges(c)
    
    # save final
    gn.save(c, '%s_processed' % country, wpath)
    
    print('Edge reduction: %s to %s (%d percent)' % (G.number_of_edges(), 
                                               c.number_of_edges(), 
                                               ((G.number_of_edges() - c.number_of_edges())/G.number_of_edges()*100)))
    return c

In [8]:
UTMZs = {'ABW':24819,
        'NRU':32629,
        'MAR':32629,
        'UGA':32736,
        'TZA':32737,
        'HUN':32634,
        'PNG':32756,
        'YEM':32638}

WGS = {'init': 'epsg:4326'}

countries = ['YEM']

importlib.reload(gn)

fpath = r'C:\Users\charl\Documents\GOST\NetClean'

for country in countries:
    
    print('\n--- processing for: %s ---\n' % country)
    print('start: %s\n' % time.ctime())
    wpath = os.path.join(fpath, r'output\%s' % country)
    if not os.path.exists(wpath):
        os.mkdir(wpath)
    
    print('Outputs can be found at: %s\n' % (wpath))
        
    UTM = {'init': 'epsg:%d' % UTMZs[country]}
    
    G = InitialReadIn(fpath, country)
    
    G = CleanNetwork(G, wpath, country, UTM, WGS, 0.5, verbose = False)
    print('\nend: %s' % time.ctime())
    print('\n--- processing complete for: %s ---' % country)

peartree version: 0.6.0 
networkx version: 2.2 
matplotlib version: 2.2.2 
osmnx version: 0.8.2 

--- processing for: YEM ---

start: Wed Jan  9 08:55:32 2019

Outputs can be found at: C:\Users\charl\Documents\GOST\NetClean\output\YEM

These two should equal: A) length of final df: 151848 | B) length of original df: 151848
151836
302730
288602
287827
286665
Edge reduction: 151848 to 286665 (-88 percent)

end: Wed Jan  9 09:32:34 2019

--- processing complete for: YEM ---
